Explanation and steps for logistic regression

Probabilities are utilized instead of specific values in this approach which is not the case for linear regression. Instead of mean square error, cross-entropy is employed.

The Gradient Descent method is applied for LogisticRegression as well.

Weight calculation involves subtracting the gradient from the current weight.

Steps: (i) Training - Initialize weight and bias as zero. (ii) Given a data point - predict result, calculate error, use gradient descent to determine new weight and bias, repeat n times. (iii) Testing - input values into the equation, select label based on probability.

The same equation as in linear regression is utilized, integrated into the sigmoid function.

model building

# creating a class LogisticRegression
import numpy as np

# Creating a sigmoid function as we'll be using it
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class LogisticRegression:
    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None


    # always start by adding fit and predict funciton 
    def fit(self, X, y):
        # Initializing weights and bias
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)  # assigning zeros as weights
        self.bias = 0

        # Gradient Descent
        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear_pred)
            
            # Gradient calculation
            dw = (1 / n_samples) * np.dot(X.T, (predictions - y))
            db = (1 / n_samples) * np.sum(predictions - y)

            # Update weights and bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(linear_pred)
        class_pred = [0 if i <= 0.5 else 1 for i in y_pred]
        return class_pred

testing

# testing how accurate it is with breast_cancer dataset from scikit_learn

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load data
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and fit the logistic regression model
clf = LogisticRegression(lr=0.01, n_iters=1000)
clf.fit(X_train, y_train)

# Predict on test data
y_pred = clf.predict(X_test)

# Accuracy function
def accuracy(y_pred, y_test):
    accuracy = np.sum(y_pred == y_test) / len(y_test)
    return accuracy

# Calculate accuracy
acc = accuracy(y_pred, y_test)
print(f'Accuracy: {acc:.2f}')

Accuracy: 0.94