# creating a class LogisticRegression
import numpy as np
# Creating a sigmoid function as we'll be using it
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class LogisticRegression:
def __init__(self, lr=0.001, n_iters=1000):
self.lr = lr
self.n_iters = n_iters
self.weights = None
self.bias = None
# always start by adding fit and predict funciton
def fit(self, X, y):
# Initializing weights and bias
= X.shape
n_samples, n_features self.weights = np.zeros(n_features) # assigning zeros as weights
self.bias = 0
# Gradient Descent
for _ in range(self.n_iters):
= np.dot(X, self.weights) + self.bias
linear_pred = sigmoid(linear_pred)
predictions
# Gradient calculation
= (1 / n_samples) * np.dot(X.T, (predictions - y))
dw = (1 / n_samples) * np.sum(predictions - y)
db
# Update weights and bias
self.weights -= self.lr * dw
self.bias -= self.lr * db
def predict(self, X):
= np.dot(X, self.weights) + self.bias
linear_pred = sigmoid(linear_pred)
y_pred = [0 if i <= 0.5 else 1 for i in y_pred]
class_pred return class_pred
Explanation and steps for logistic regression
Probabilities are utilized instead of specific values in this approach which is not the case for linear regression. Instead of mean square error, cross-entropy is employed.
The Gradient Descent method is applied for LogisticRegression as well.
Weight calculation involves subtracting the gradient from the current weight.
Steps: (i) Training - Initialize weight and bias as zero. (ii) Given a data point - predict result, calculate error, use gradient descent to determine new weight and bias, repeat n times. (iii) Testing - input values into the equation, select label based on probability.
The same equation as in linear regression is utilized, integrated into the sigmoid function.
model building
testing
# testing how accurate it is with breast_cancer dataset from scikit_learn
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# Load data
= datasets.load_breast_cancer()
bc = bc.data, bc.target
X, y
# Split data
= train_test_split(X, y, test_size=0.2, random_state=1234)
X_train, X_test, y_train, y_test
# Normalize features
= StandardScaler()
scaler = scaler.fit_transform(X_train)
X_train = scaler.transform(X_test)
X_test
# Initialize and fit the logistic regression model
= LogisticRegression(lr=0.01, n_iters=1000)
clf
clf.fit(X_train, y_train)
# Predict on test data
= clf.predict(X_test)
y_pred
# Accuracy function
def accuracy(y_pred, y_test):
= np.sum(y_pred == y_test) / len(y_test)
accuracy return accuracy
# Calculate accuracy
= accuracy(y_pred, y_test)
acc print(f'Accuracy: {acc:.2f}')
Accuracy: 0.94