ASSIGNMENT 1
SET A
Import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error
import [Link] as plt
#create Dataset
[Link](42)
data_size = 500
df = [Link]({'ID': [Link](1,data_size+1),
'TV':[Link](0, 300, data_size),
'Radio':[Link](0,100,data_size),
'Newspaper':[Link](0,50,data_size),
'Sales':[Link](5,25,data_size)})
df
#split into training and testing data
x = df[['TV']]
y = df['Sales']
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)
#Train Linear regression Model
model = LinearRegression()
[Link](X_train,Y_train)
#Make Predictions
Y_pred = [Link](X_test)
#Evaluate Model Performance
mse = mean_squared_error(Y_test, Y_pred)
r2_score = [Link](X_test, Y_test)
print("Model Coefficient:", model.coef_)
print("Intercept:", model.intercept_)
print("Mean Squared error:", mse)
print("R2 score:", r2_score)
[Link](X_train, Y_train)
[Link](X_train, [Link](X_train), color = "red")
#2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error
import [Link] as plt
#create Dataset
[Link](42)
data_size = 500
df = [Link]({'ID': [Link](1,data_size+1),
'Flat':[Link](500, 5000, data_size),
'Houses':[Link](1000,10000,data_size),
'Purchases':[Link](1,100,data_size),})
df
#split into training and testing data
x = df[['Houses']]
y = df['Purchases']
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)
#Train Linear regression Model
model = LinearRegression()
[Link](X_train,Y_train)
#Make Predictions
Y_pred = [Link](X_test)
#Evaluate Model Performance
mse = mean_squared_error(Y_test, Y_pred)
r2_score = [Link](X_test, Y_test)
print("Model Coefficient:", model.coef_)
print("Intercept:", model.intercept_)
print("Mean Squared error:", mse)
print("R2 score:", r2_score)
[Link](X_train, Y_train, color = 'red')
[Link](X_train,[Link](X_train))
#3
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score, confusion_matrix, classification_report
#create Dataset
[Link](42)
data_size = 500
df = [Link]({'UserID': [Link](1,data_size+1),
'Gender':[Link](['Male','Female'],data_size),
'Age':[Link](18,70,data_size),
'EstimatedSalary':[Link](20000,150000,data_size),
'Purchased':[Link]([0,1],data_size)})
Df
#convert Categorical Data into numeric form
encoder = LabelEncoder()
df['Gender'] = encoder.fit_transform(df['Gender'])
#split into training and testing datasets
x = df[['Gender']]
y = df[['Purchased']]
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)
#Train a Logistic Regression Model
df
model = LogisticRegression()
[Link](X_train, Y_train)
#make Predictions
Y_pred = [Link](X_test)
#Evaluate Model Performance
accuracy = accuracy_score(Y_test, Y_pred)
conf_matrix = confusion_matrix(Y_test, Y_pred)
class_report = classification_report(Y_test, Y_pred)
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Class Report:", class_report)
[Link](X_train, Y_train)
[Link](X_train, [Link](X_train), color = "red")
SET B
#1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error
from [Link] import LabelEncoder
import [Link] as plt
%matplotlib inline
data = pd.read_csv('[Link]')
data
x = data[['Species','Length1', 'Length2', 'Length3', 'Height', 'Width']]
y = data[['Weight']]
encoder = LabelEncoder()
x['Species'] = encoder.fit_transform(df['Species'])
X_train, X_test, Y_train, Y_test = train_test_split(y, x, test_size = 0.3, random_state = 42)
model = LinearRegression()
[Link](X_train,Y_train)
Y_pred = [Link](X_test)
mse = mean_squared_error(Y_test, Y_pred)
r2_score = [Link](X_test, Y_test)
print("Model Coefficient:", model.coef_)
print("Intercept:", model.intercept_)
print("Mean Squared error:", mse)
print("R2 score:", r2_score)
[Link](Y_test, Y_pred)
[Link]("actual Weight")
[Link]("Predicted Weight")
[Link]('Actual vs Predicted Fish Weight')
[Link]()
#2
from sklearn import datasets
iris = datasets.load_iris()
d = [Link](data = [Link], columns = iris.feature_names)
d['species'] = [Link]
d['species'] = d['species'].map({0:'Iris-setosa', 1:'Iris-Versicolor', 2:'Iris-virginica'})
[Link](5)
stats = [Link]('species').describe()
print(stats)
x = [Link][:,:-1]
y = d['species']
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
[Link](X_train, Y_train)
Y_pred = [Link](X_test)
from sklearn import metrics
import seaborn as sns
confusion_matrix = [Link](Y_test, Y_pred, rownames = ['Actual'], colnames = ['Predicted'])
[Link](confusion_matrix, annot = True)
ASSIGNMENT 2
#1
#Read the data, Enocode the data
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
transactions = [['1', 'Bread, Milk'],
['2', 'Bread, Diaper, Beer, Eggs'],
['3', 'Milk, Diaper, Beer, Coke'],
['4', 'Bread, Milk, Diaper, Beer'],
['5', 'Bread, Milk, Diaper, Coke']];
from [Link] import TransactionEncoder
te = TransactionEncoder()
te_array = [Link](transactions).transform(transactions)
df = [Link](te_array, columns = te.columns_)
df
freq_items = apriori(df, min_support = 0.2, use_colnames = True)
print(freq_items)
rules = association_rules(freq_items, metric = 'support', min_threshold = 0.05)
rules = rules.sort_values(['support', 'confidence'], ascending = [False, False])
print(rules)
#2
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
transactions = [['eggs', 'milk', 'bread'],
['eggs', 'apple'],
['milk', 'bread'],
['apple', 'milk'],
['milk', 'apple', 'bread']];
from [Link] import TransactionEncoder
te = TransactionEncoder()
te_array = [Link](transactions).transform(transactions)
df = [Link](te_array, columns = te.columns_)
df
freq_items = apriori(df, min_support = 0.5, use_colnames = True)
print(freq_items)
rules = association_rules(freq_items, metric = 'support', min_threshold = 0.05)
rules = rules.sort_values(['support', 'confidence'], ascending = [False, False])
print(rules)