0% found this document useful (0 votes)
40 views8 pages

MSE Evaluation in Econometrics with Pandas

The document consists of two assignments involving data analysis using Python libraries such as pandas, scikit-learn, and mlxtend. It covers tasks including linear regression, logistic regression, and association rule mining with datasets related to sales, purchases, and transactions. The assignments demonstrate model training, evaluation, and visualization techniques for predictive analytics.

Uploaded by

psb18039
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
40 views8 pages

MSE Evaluation in Econometrics with Pandas

The document consists of two assignments involving data analysis using Python libraries such as pandas, scikit-learn, and mlxtend. It covers tasks including linear regression, logistic regression, and association rule mining with datasets related to sales, purchases, and transactions. The assignments demonstrate model training, evaluation, and visualization techniques for predictive analytics.

Uploaded by

psb18039
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

ASSIGNMENT 1

SET A

Import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from [Link] import mean_squared_error

import [Link] as plt

#create Dataset

[Link](42)

data_size = 500

df = [Link]({'ID': [Link](1,data_size+1),

'TV':[Link](0, 300, data_size),

'Radio':[Link](0,100,data_size),

'Newspaper':[Link](0,50,data_size),

'Sales':[Link](5,25,data_size)})

df

#split into training and testing data

x = df[['TV']]

y = df['Sales']

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

#Train Linear regression Model

model = LinearRegression()

[Link](X_train,Y_train)

#Make Predictions

Y_pred = [Link](X_test)

#Evaluate Model Performance

mse = mean_squared_error(Y_test, Y_pred)

r2_score = [Link](X_test, Y_test)


print("Model Coefficient:", model.coef_)

print("Intercept:", model.intercept_)

print("Mean Squared error:", mse)

print("R2 score:", r2_score)

[Link](X_train, Y_train)

[Link](X_train, [Link](X_train), color = "red")

#2

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from [Link] import mean_squared_error

import [Link] as plt

#create Dataset

[Link](42)

data_size = 500

df = [Link]({'ID': [Link](1,data_size+1),

'Flat':[Link](500, 5000, data_size),

'Houses':[Link](1000,10000,data_size),

'Purchases':[Link](1,100,data_size),})

df

#split into training and testing data

x = df[['Houses']]

y = df['Purchases']

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)


#Train Linear regression Model

model = LinearRegression()

[Link](X_train,Y_train)

#Make Predictions

Y_pred = [Link](X_test)

#Evaluate Model Performance

mse = mean_squared_error(Y_test, Y_pred)

r2_score = [Link](X_test, Y_test)

print("Model Coefficient:", model.coef_)

print("Intercept:", model.intercept_)

print("Mean Squared error:", mse)

print("R2 score:", r2_score)

[Link](X_train, Y_train, color = 'red')

[Link](X_train,[Link](X_train))

#3

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from [Link] import LabelEncoder

from sklearn.linear_model import LogisticRegression

from [Link] import accuracy_score, confusion_matrix, classification_report

#create Dataset

[Link](42)

data_size = 500

df = [Link]({'UserID': [Link](1,data_size+1),

'Gender':[Link](['Male','Female'],data_size),
'Age':[Link](18,70,data_size),

'EstimatedSalary':[Link](20000,150000,data_size),

'Purchased':[Link]([0,1],data_size)})

Df

#convert Categorical Data into numeric form

encoder = LabelEncoder()

df['Gender'] = encoder.fit_transform(df['Gender'])

#split into training and testing datasets

x = df[['Gender']]

y = df[['Purchased']]

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

#Train a Logistic Regression Model

df

model = LogisticRegression()

[Link](X_train, Y_train)

#make Predictions

Y_pred = [Link](X_test)

#Evaluate Model Performance

accuracy = accuracy_score(Y_test, Y_pred)

conf_matrix = confusion_matrix(Y_test, Y_pred)

class_report = classification_report(Y_test, Y_pred)

print("Accuracy:", accuracy)

print("Confusion Matrix:\n", conf_matrix)

print("Class Report:", class_report)

[Link](X_train, Y_train)

[Link](X_train, [Link](X_train), color = "red")


SET B

#1

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from [Link] import mean_squared_error

from [Link] import LabelEncoder

import [Link] as plt

%matplotlib inline

data = pd.read_csv('[Link]')

data

x = data[['Species','Length1', 'Length2', 'Length3', 'Height', 'Width']]

y = data[['Weight']]

encoder = LabelEncoder()

x['Species'] = encoder.fit_transform(df['Species'])

X_train, X_test, Y_train, Y_test = train_test_split(y, x, test_size = 0.3, random_state = 42)

model = LinearRegression()

[Link](X_train,Y_train)

Y_pred = [Link](X_test)

mse = mean_squared_error(Y_test, Y_pred)

r2_score = [Link](X_test, Y_test)

print("Model Coefficient:", model.coef_)

print("Intercept:", model.intercept_)

print("Mean Squared error:", mse)

print("R2 score:", r2_score)

[Link](Y_test, Y_pred)

[Link]("actual Weight")
[Link]("Predicted Weight")

[Link]('Actual vs Predicted Fish Weight')

[Link]()

#2

from sklearn import datasets

iris = datasets.load_iris()

d = [Link](data = [Link], columns = iris.feature_names)

d['species'] = [Link]

d['species'] = d['species'].map({0:'Iris-setosa', 1:'Iris-Versicolor', 2:'Iris-virginica'})

[Link](5)

stats = [Link]('species').describe()

print(stats)

x = [Link][:,:-1]

y = d['species']

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

[Link](X_train, Y_train)

Y_pred = [Link](X_test)

from sklearn import metrics

import seaborn as sns

confusion_matrix = [Link](Y_test, Y_pred, rownames = ['Actual'], colnames = ['Predicted'])

[Link](confusion_matrix, annot = True)


ASSIGNMENT 2

#1

#Read the data, Enocode the data

import pandas as pd

from mlxtend.frequent_patterns import apriori, association_rules

transactions = [['1', 'Bread, Milk'],

['2', 'Bread, Diaper, Beer, Eggs'],

['3', 'Milk, Diaper, Beer, Coke'],

['4', 'Bread, Milk, Diaper, Beer'],

['5', 'Bread, Milk, Diaper, Coke']];

from [Link] import TransactionEncoder

te = TransactionEncoder()

te_array = [Link](transactions).transform(transactions)

df = [Link](te_array, columns = te.columns_)

df

freq_items = apriori(df, min_support = 0.2, use_colnames = True)

print(freq_items)

rules = association_rules(freq_items, metric = 'support', min_threshold = 0.05)

rules = rules.sort_values(['support', 'confidence'], ascending = [False, False])

print(rules)

#2

import pandas as pd

from mlxtend.frequent_patterns import apriori, association_rules

transactions = [['eggs', 'milk', 'bread'],

['eggs', 'apple'],

['milk', 'bread'],

['apple', 'milk'],

['milk', 'apple', 'bread']];


from [Link] import TransactionEncoder

te = TransactionEncoder()

te_array = [Link](transactions).transform(transactions)

df = [Link](te_array, columns = te.columns_)

df

freq_items = apriori(df, min_support = 0.5, use_colnames = True)

print(freq_items)

rules = association_rules(freq_items, metric = 'support', min_threshold = 0.05)

rules = rules.sort_values(['support', 'confidence'], ascending = [False, False])

print(rules)

You might also like