0% found this document useful (0 votes)
10 views

Logisticregression

Uploaded by

Dev Khatanhar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views

Logisticregression

Uploaded by

Dev Khatanhar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

logisticregression

October 23, 2024

[1]: # Let's load and inspect the dataset to understand its structure and see if any␣
↪cleanup is needed.

import pandas as pd

# Load the dataset

data = pd.read_csv('/content/Iris.csv')

# Clean the dataset


iris_df_cleaned = data.drop(columns=['Id'])

# Check for any missing values to ensure dataset cleanliness


iris_df_cleaned.isnull().sum()

# Show the cleaned dataset


iris_df_cleaned.head()

[1]: SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species


0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa

[5]: import numpy as np


import pandas as pd

# One-hot encode the categorical variables


X = iris_df_cleaned.drop(columns=['Species'])
y = iris_df_cleaned['Species'].map({'Iris-setosa': 0, 'Iris-versicolor': 1,␣
↪'Iris-virginica': 2})

# Manually split the data into training and testing sets


def train_test_split_manual(X, y, test_size=0.2, random_state=None):
if random_state is not None:
np.random.seed(random_state)
indices = np.random.permutation(len(X))

1
test_set_size = int(len(X) * test_size)
test_indices = indices[:test_set_size]
train_indices = indices[test_set_size:]
return X.iloc[train_indices], X.iloc[test_indices], y.iloc[train_indices],␣
↪y.iloc[test_indices]

X_train, X_test, y_train, y_test = train_test_split_manual(X, y, test_size=0.2,␣


↪random_state=42)

# Logistic regression implementation


class LogisticRegressionFromScratch:
def __init__(self, learning_rate=0.01, num_iterations=1000):
self.learning_rate = learning_rate
self.num_iterations = num_iterations
self.weights = None
self.bias = None

def sigmoid(self, z):


return 1 / (1 + np.exp(-z))

def fit(self, X, y):


# Initialize parameters
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0

# Gradient descent
for _ in range(self.num_iterations):
# Linear model
linear_model = np.dot(X, self.weights) + self.bias
# Apply sigmoid function
y_predicted = self.sigmoid(linear_model)

# Compute gradients
dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
db = (1 / n_samples) * np.sum(y_predicted - y)

# Update weights and bias


self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db

def predict(self, X):


# Linear model
linear_model = np.dot(X, self.weights) + self.bias
# Apply sigmoid function
y_predicted = self.sigmoid(linear_model)
# Convert probabilities to binary output

2
return [1 if i > 0.5 else 0 for i in y_predicted]

# Train the logistic regression model


model = LogisticRegressionFromScratch(learning_rate=0.01, num_iterations=1000)
model.fit(X_train.values, y_train.values.astype(float)) # Ensure y_train is␣
↪numeric

# Make predictions
y_pred = model.predict(X_test.values)

# Add predictions to the test set to show the classification results


X_test_with_predictions = X_test.copy()
X_test_with_predictions['Actual'] = y_test
X_test_with_predictions['Predicted'] = y_pred
X_test_with_predictions['Species (Predicted)'] =␣
↪X_test_with_predictions['Predicted'].map({0: 'Iris-setosa', 1:␣

↪'Iris-versicolor', 2: 'Iris-virginica'})

X_test_with_predictions['Species (Actual)'] = X_test_with_predictions['Actual'].


↪map({0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'})

# Display the classification results


X_test_with_predictions[['Species (Actual)', 'Species (Predicted)']].head()

[5]: Species (Actual) Species (Predicted)


73 Iris-versicolor Iris-versicolor
18 Iris-setosa Iris-versicolor
118 Iris-virginica Iris-versicolor
78 Iris-versicolor Iris-versicolor
76 Iris-versicolor Iris-versicolor

[7]: accuracy = np.mean(y_pred == y_test.values)


print(f'Accuracy: {accuracy * 100:.2f}%')
def calculate_metrics(y_true, y_pred):
# Initialize variables to hold counts
TP = FP = FN = TN = 0
# Calculate TP, FP, FN, TN
for actual, predicted in zip(y_true, y_pred):
if actual == 1 and predicted == 1:
TP += 1
elif actual == 0 and predicted == 1:
FP += 1
elif actual == 1 and predicted == 0:
FN += 1
elif actual == 0 and predicted == 0:
TN += 1
# Calculate precision, recall, and F1-score
precision = TP / (TP + FP) if (TP + FP) > 0 else 0

3
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision +␣
↪recall) > 0 else 0

return precision, recall, f1_score


# Calculate precision, recall, and F1-score using the predictions and true␣
↪values

precision, recall, f1_score = calculate_metrics(y_test.values, y_pred)


# Print the results
print(f'Precision: {precision*100:.2f}')
print(f'Recall: {recall*100:.2f}')
print(f'F1 Score: {f1_score*100:.2f}')

Accuracy: 30.00%
Precision: 47.37
Recall: 100.00
F1 Score: 64.29

You might also like