0% found this document useful (0 votes)
14 views

ML Lab File Final.docx - Google Docs

Uploaded by

akhil gharu
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views

ML Lab File Final.docx - Google Docs

Uploaded by

akhil gharu
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 17

‭EXPERIMENT – 2‬

‭ IM‬‭: Use PCA on a high-dimensional dataset to reduce‬‭its dimensionality while retaining most of the‬
A
‭variance and visualize the data.‬

‭CODE‬‭:‬

‭mport pandas as pd‬


i
import numpy as np‬

import matplotlib.pyplot as plt‬

import seaborn as sns‬

from sklearn.preprocessing import StandardScaler‬

from sklearn.decomposition import PCA‬

df = pd.read_csv('USA_Housing.csv')‬

‭rint(df.isnull().sum()) # no null values‬


p
df.drop(['Address'],axis=1,inplace=True)‬

‭ Putting feature variable to X‬


#
X = df[['Avg. Area Income','Avg. Area House Age','Avg. Area Number‬

of Rooms','Avg. Area Number of Bedrooms','Area Population']]‬

‭ Output variable‬
#
y = df['Price']‬

‭ Standardize the features‬


#
scaler = StandardScaler()‬

X_scaled = scaler.fit_transform(X)‬

‭ Apply PCA‬
#
pca = PCA(n_components=2) # Reduce to 2 dimensions for‬

visualization‬

X_pca = pca.fit_transform(X_scaled)‬

‭ Explained variance ratio‬


#
explained_variance = pca.explained_variance_ratio_‬

‭ Visualize the PCA results‬


#
plt.figure(figsize=(8, 6))‬

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')‬

plt.title('PCA of USA Pricing dataset')‬

plt.xlabel('Principal Component 1 (Explained Variance:‬

{:.2f}%)'.format(explained_variance[0]*100))‬

‭2‬
‭lt.ylabel('Principal Component 2 (Explained Variance:‬
p
{:.2f}%)'.format(explained_variance[1]*100))‬

plt.show()‬

‭OUTPUT‬‭:‬

‭Scatter Plot‬

‭Before PCA‬

‭After PCA‬

‭RESULT‬‭: Hence, we have reduced the dimension of the‬‭dataset.‬

‭3‬
‭EXPERIMENT – 3‬

‭ IM‬‭:‬‭Perform‬‭a‬‭linear‬‭regression‬‭analysis‬‭on‬‭a‬‭dataset‬‭to‬‭predict‬‭a‬‭continuous‬‭target‬‭variable‬‭based‬‭on‬
A
‭a‬ ‭one‬ ‭o‬ ‭r‬ ‭more‬‭predictor‬‭variables.‬‭Evaluate‬‭the‬‭model’s‬‭performance‬‭using‬‭metrics‬‭like‬‭RMSE‬‭and‬
‭R-sqaured.‬

‭CODE‬‭:‬

‭ Importing Libraries‬
#
import pandas as pd‬

import numpy as np‬

import matplotlib.pyplot as plt‬

import seaborn as sns‬

from sklearn.preprocessing import StandardScaler‬

from sklearn.decomposition import PCA‬

from sklearn.model_selection import train_test_split‬

from sklearn.metrics import mean_squared_error, r2_score‬

df = pd.read_csv('USA_Housing.csv')‬

‭rint("Checking null values:\n",df.isnull().sum(),"\n\n") # no null‬


p
values‬

df.drop(['Address'],axis=1,inplace=True)‬

‭ Putting feature variable to X‬


#
X = df[['Avg. Area Income','Avg. Area House Age','Avg. Area Number‬

of Rooms','Avg. Area Number of Bedrooms','Area Population']]‬

‭ Output variable‬
#
y = df['Price']‬

‭_train, X_test, y_train, y_test = train_test_split(X, y,‬


X
test_size=0.2, random_state=42)‬

‭rom sklearn.linear_model import LinearRegression‬


f
lm = LinearRegression()‬

lm.fit(X_train, y_train)‬

y_pred = lm.predict(X_test)‬

‭ Calculate RMSE‬
#
rmse = np.sqrt(mean_squared_error(y_test, y_pred))‬

‭ Calculate R-squared‬
#
r_squared = r2_score(y_test, y_pred)‬

‭4‬
‭rint("Linear regression model performance:\nRoot Mean Squared Error‬
p
(RMSE):", rmse)‬

print("R-squared:", r_squared)‬

‭lt.figure(figsize=(8, 6))‬
p
plt.scatter(y_test, y_pred, alpha=0.5, label='Predicted',‬

color='cyan')‬

plt.scatter(y_test, y_test, alpha=0.5, label='Actual', color='blue')‬

plt.xlabel('Actual Values')‬

plt.ylabel('Predicted Values')‬

plt.title('Actual vs Predicted Values')‬

plt.legend()‬

plt.show()‬

‭OUTPUT‬‭:‬

‭Predicted Value vs Actual Value‬

‭ ESULT‬‭: Hence, we have trained and evaluated the model.‬


R
‭Evaluation Results are:‬
‭●‬ ‭Root Mean Squared Error (RMSE): 100444.06055558745‬
‭●‬ ‭R-squared: 0.9179971706834289‬

‭5‬
‭EXPERIMENT – 4‬

‭ IM‬‭:‬ ‭Compare‬ ‭the‬ ‭performance‬ ‭of‬ ‭various‬ ‭classifications‬ ‭algorithms‬ ‭(e.g.,‬ ‭Logistic‬ ‭Regression,‬
A
‭Decision‬ ‭Trees,‬ ‭Random‬ ‭Forest,‬ ‭SVM‬ ‭and‬ ‭Naïve‬ ‭Bayes)‬ ‭on‬ ‭a‬ ‭common‬ ‭dataset‬ ‭using‬ ‭accuracy,‬
‭precision, recall and F1-Score.‬

‭CODE‬‭:‬

‭mport pandas as pd‬


i
import seaborn as sns‬

import matplotlib.pyplot as plt‬

from sklearn.model_selection import train_test_split‬

from sklearn.preprocessing import StandardScaler‬

from sklearn.linear_model import LogisticRegression‬

from sklearn.tree import DecisionTreeClassifier‬

from sklearn.ensemble import RandomForestClassifier‬

from sklearn.svm import SVC‬

from sklearn.naive_bayes import GaussianNB‬

from sklearn.metrics import accuracy_score, precision_score,‬

recall_score, f1_score‬

‭f = pd.read_csv('gender_classification_v7.csv')‬
d
df['gender'] = df['gender'].apply(lambda x: 0 if x == 'Male' else 1)‬

‭lt.figure(figsize=(2, 4))‬
p
plt.title('Count of Gender', size=10)‬

sns.countplot(data=df, x="gender")‬

plt.ylabel('Count', size=12)‬

plt.xlabel('Gender', size=12)‬

sns.despine(top=True, right=True, left=False, bottom=False)‬

plt.show()‬

‭ = df.drop(columns=['gender'])‬
X
y = df['gender']‬

‭caler = StandardScaler()‬
s
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)‬

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,‬

test_size=0.2, random_state=42, stratify=y)‬

Name, Accuracy, Precision, Recall, F1_Score = [], [], [], [], []‬

‭ Logistic Regression‬
#
regression = LogisticRegression()‬

regression.fit(X_train, y_train)‬

‭6‬
‭_pred = regression.predict(X_test)‬
y
Accuracy.append(float("{:.2f}".format(accuracy_score(y_test, y_pred)‬

*100)))‬

Precision.append(precision_score(y_test, y_pred))‬

Recall.append(recall_score(y_test, y_pred))‬

F1_Score.append(f1_score(y_test, y_pred))‬

Name.append('Logistic Regression')‬

‭ Decision Tree‬
#
tree = DecisionTreeClassifier(criterion="gini", random_state=100,‬

max_depth=3, min_samples_leaf=5)‬

tree.fit(X_train, y_train)‬

y_pred = tree.predict(X_test)‬

Accuracy.append(float("{:.2f}".format(accuracy_score(y_test, y_pred)‬

*100)))‬

Precision.append(precision_score(y_test, y_pred))‬

Recall.append(recall_score(y_test, y_pred))‬

F1_Score.append(f1_score(y_test, y_pred))‬

Name.append('Decision Tree')‬

‭ Random Forest‬
#
forest = RandomForestClassifier(n_estimators = 100)‬

forest.fit(X_train, y_train)‬

y_pred = forest.predict(X_test)‬

Accuracy.append(float("{:.2f}".format(accuracy_score(y_test, y_pred)‬

*100)))‬

Precision.append(precision_score(y_test, y_pred))‬

Recall.append(recall_score(y_test, y_pred))‬

F1_Score.append(f1_score(y_test, y_pred))‬

Name.append('Random Forest')‬

‭ SVM‬
#
svm = SVC(kernel='linear')‬

svm.fit(X_train, y_train)‬

y_pred = svm.predict(X_test)‬

Accuracy.append(float("{:.2f}".format(accuracy_score(y_test, y_pred)‬

*100)))‬

Precision.append(precision_score(y_test, y_pred))‬

Recall.append(recall_score(y_test, y_pred))‬

F1_Score.append(f1_score(y_test, y_pred))‬

Name.append('SVM')‬

‭ Naive Bayes‬
#
naiveBayes = GaussianNB()‬

naiveBayes.fit(X_train, y_train)‬

y_pred = naiveBayes.predict(X_test)‬

‭7‬
‭ccuracy.append(float("{:.2f}".format(accuracy_score(y_test, y_pred)‬
A
*100)))‬

Precision.append(precision_score(y_test, y_pred))‬

Recall.append(recall_score(y_test, y_pred))‬

F1_Score.append(f1_score(y_test, y_pred))‬

Name.append('Naive Bayes')‬

‭ Combining all models' Performance‬


#
models = {'Model':Name,'Accuracy':Accuracy, 'Precision':Precision,‬

'Recall':Recall, 'F1_Score':F1_Score}‬

model_df = pd.DataFrame(models)‬

model_df‬

‭OUTPUT‬‭:‬

‭Dataset‬

‭Count Plot‬

‭RESULT‬‭: All models are trained and evaluated. Naïve‬‭Bayes performs best for the given dataset.‬

‭8‬
‭EXPERIMENT – 5‬
‭ IM‬‭:‬ ‭Implement‬ ‭ensemble‬ ‭methods‬ ‭such‬ ‭as‬ ‭Bagging‬ ‭(e.g.,‬ ‭Random‬ ‭Forest)‬ ‭and‬ ‭Boosting‬ ‭(e.g.,‬
A
‭AdaBoost) on a classification task and compare their performance to individual models.‬

‭ ODE‬‭:‬
C
import pandas as pd‬

import seaborn as sns‬

import matplotlib.pyplot as plt‬

from sklearn.model_selection import train_test_split‬

from sklearn.preprocessing import StandardScaler‬

from sklearn.ensemble import RandomForestClassifier,‬

AdaBoostClassifier‬

from sklearn.metrics import accuracy_score, precision_score,‬

recall_score, f1_score‬

‭f = pd.read_csv('gender_classification_v7.csv')‬
d
df['gender'] = df['gender'].apply(lambda x: 0 if x == 'Male' else 1)‬

‭lt.figure(figsize=(2, 4))‬
p
plt.title('Count of Gender', size=10)‬

sns.countplot(data=df, x="gender")‬

plt.ylabel('Count', size=12)‬

plt.xlabel('Gender', size=12)‬

sns.despine(top=True, right=True, left=False, bottom=False)‬

plt.show()‬

‭ = df.drop(columns=['gender'])‬
X
y = df['gender']‬

scaler = StandardScaler()‬

X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)‬

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,‬

test_size=0.2, random_state=42, stratify=y)‬

Name, Accuracy, Precision, Recall, F1_Score = [], [], [], [], []‬

‭ Ensemble Methods‬
#
# Bagging - Random Forest‬

forest = RandomForestClassifier(n_estimators = 100)‬

forest.fit(X_train, y_train)‬

y_pred = forest.predict(X_test)‬

Accuracy.append(float("{:.2f}".format(accuracy_score(y_test, y_pred)‬

*100)))‬

Precision.append(precision_score(y_test, y_pred))‬

Recall.append(recall_score(y_test, y_pred))‬

F1_Score.append(f1_score(y_test, y_pred))‬

Name.append('Random Forest')‬

‭9‬
‭ Boosting - AdaBoost Classifier‬
#
adaBoost = AdaBoostClassifier()‬

adaBoost.fit(X_train, y_train)‬

y_pred = adaBoost.predict(X_test)‬

Accuracy.append(float("{:.2f}".format(accuracy_score(y_test, y_pred)‬

*100)))‬

Precision.append(precision_score(y_test, y_pred))‬

Recall.append(recall_score(y_test, y_pred))‬

F1_Score.append(f1_score(y_test, y_pred))‬

Name.append('AdaBoost')‬

‭ Combining all models' Performance‬


#
models = {'Model':Name,'Accuracy':Accuracy, 'Precision':Precision,‬

'Recall':Recall, 'F1_Score':F1_Score}‬

model_df = pd.DataFrame(models)‬

model_df‬

‭OUTPUT‬‭:‬

‭Dataset‬

‭Count Plot‬

‭ ESULT‬‭: Both models are trained and evaluated. AdaBoost‬‭Classifier performs best for the given‬
R
‭dataset.‬

‭10‬
‭EXPERIMENT – 6‬

‭ IM‬‭: Write a code for feature selection techniques‬‭to reduce the no. of features in a dataset while‬
A
‭maintaining or improving the model's performance.‬
‭CODE‬‭:‬

‭mport pandas as pd‬


i
from sklearn.preprocessing import StandardScaler‬

from sklearn.feature_selection import SelectKBest, f_regression‬

from sklearn.model_selection import train_test_split‬

from sklearn.linear_model import LinearRegression‬

from sklearn.metrics import mean_squared_error, r2_score‬

import math‬

‭ Assuming 'df' is your DataFrame containing the dataset‬


#
df = pd.read_csv('USA_Housing.csv')‬

‭ Selecting features and output variable‬


#
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number‬

of Rooms', 'Avg. Area Number of Bedrooms', 'Area Population']]‬

y = df['Price']‬

‭ Perform univariate feature selection using ANOVA F-test‬


#
selector = SelectKBest(score_func=f_regression, k=4) # Select top 3‬

features based on F-test‬

X_selected = selector.fit_transform(X, y)‬

‭ Get selected feature indices‬


#
selected_indices = selector.get_support(indices=True)‬

‭ Get the names of selected features‬


#
selected_features = X.columns[selected_indices]‬

‭ Splitting the dataset into training and testing sets‬


#
X_train, X_test, y_train, y_test = train_test_split(X_selected, y,‬

test_size=0.2, random_state=42)‬

‭ Initialize and fit a linear regression model‬


#
model = LinearRegression()‬

model.fit(X_train, y_train)‬

‭ Predict on the test set‬


#
y_pred = model.predict(X_test)‬

# Calculate performance metrics‬


‭11‬
‭mse = math.sqrt(mean_squared_error(y_test, y_pred))‬
r
r_squared = r2_score(y_test, y_pred)‬

‭rint(f'Root Mean Squared Error (MSE): {rmse:.4f}')‬


p
print(f'R-squared: {r_squared:.4f}')‬

print('Original Features: ', list(X.columns))‬

print(f'Selected Features: {list(selected_features)}\n')‬

‭lt.figure(figsize=(8, 6))‬
p
plt.scatter(y_test, y_pred, alpha=0.5, label='Predicted',‬

color='cyan')‬

plt.scatter(y_test, y_test, alpha=0.5, label='Actual', color='blue')‬

plt.xlabel('Actual Values')‬

plt.ylabel('Predicted Values')‬

plt.title('Actual vs Predicted Values')‬

plt.legend()‬

plt.show()‬

‭OUTPUT‬‭:‬

‭ ESULT‬‭:‬ ‭After‬ ‭Feature‬ ‭reduction‬ ‭using‬ ‭ANOVA‬ ‭F-Test.‬ ‭Model‬ ‭performance‬ ‭is‬ ‭good.‬ ‭Results‬ ‭are‬
R
‭shown below:‬
‭●‬ ‭Root Mean Squared Error (MSE)‬‭:‬‭100367.9313‬
‭●‬ ‭R-squared‬‭: 0.9181‬
‭●‬ ‭Original‬‭Features‬‭:‬‭['Avg.‬‭Area‬‭Income',‬‭'Avg.‬‭Area‬‭House‬‭Age',‬‭'Avg.‬‭Area‬‭Number‬‭of‬‭Rooms',‬
‭'Avg. Area Number of Bedrooms', 'Area Population']‬
‭●‬ ‭Selected‬‭Features‬‭:‬‭['Avg.‬‭Area‬‭Income',‬‭'Avg.‬‭Area‬‭House‬‭Age',‬‭'Avg.‬‭Area‬‭Number‬‭of‬‭Rooms',‬
‭'Area Population']‬

‭12‬
‭EXPERIMENT – 7‬
‭ IM‬‭: Write a code to apply Apriori algorithm to discover‬‭association rules in retail transaction dataset‬
A
‭to identify frequently co-occurring items in customer purchases‬

‭CODE‬‭:‬

‭pip install apyori‬


!
import pandas as pd‬

from mlxtend.frequent_patterns import association_rules‬

import matplotlib.pyplot as plt‬

import plotly.express as px‬

from apyori import apriori‬

‭ Create a dataframe and assign data from excel spreadsheet‬


#
data = pd.read_csv('/content/Groceries_dataset.csv')‬

#one hot encoding the products:‬


‭ummy = pd.get_dummies(data['itemDescription'])‬
d
data.drop(['itemDescription'], inplace =True, axis=1)‬

data = data.join(dummy)‬

‭ata.head()‬
d
# Transaction: If a customer bought multiple products in one day, it‬

will be considered as 1 transaction:‬

‭ata1 = data.groupby(['Member_number', 'Date'])[products[:]].sum()‬


d
data1 = data1.reset_index()[products]‬

‭rint("New Dimension", data1.shape)‬


p
data1.head()‬

#Replacing all non-zero values with the name of the product:‬

def product_names(x):‬

for product in products:‬

if x[product] >0:‬

x[product] = product‬

return x‬

‭ata1 = data1.apply(product_names, axis=1)‬


d
data1.head()‬

#Removing Zeros, Extracting the list of items bought per customer‬

‭ = data1.values‬
x
x = [sub[~(sub==0)].tolist() for sub in x if sub [sub !=‬

0].tolist()]‬

transactions = x‬

transactions[0:10]‬

rules = apriori(transactions, min_support = 0.00030, min_lift = 3,‬

max_length = 2, target = "rules")‬

‭13‬
‭ssociation_results = list(rules)‬
a
print(association_results[0])‬

for item in association_results:‬


‭air = item[0]‬
p
items = [x for x in pair]‬

‭rint("Rule : ", items[0], " -> " + items[1])‬


p
print("Support : ", str(item[1]))‬

print("Confidence : ",str(item[2][0][2]))‬

print("Lift : ", str(item[2][0][3]))‬

print("=============================")‬

‭OUTPUT‬‭:‬

‭ ESULT‬‭: 8 association rules in retail(grocery) transaction‬‭dataset are identified for frequently‬


R
‭co-occurring items in customer purchases.‬

‭14‬
‭EXPERIMENT – 8‬
‭ IM‬‭: Implement k-fold cross-validation on a classification‬‭task to assess the model’s performance,‬
A
‭addressing issue of overfitting.‬

‭CODE‬‭:‬

‭mport numpy as np‬


i
import pandas as pd‬

from sklearn.model_selection import KFold‬

from sklearn.datasets import load_iris‬

from sklearn.linear_model import LogisticRegression‬

from sklearn.model_selection import cross_val_score‬

from numpy import mean,std‬

‭ata = load_iris()‬
d
df = pd.DataFrame(data.data, columns=data.feature_names)‬

df['species'] = data.target‬

df‬

df['species'].value_counts()‬

‭ = df.drop(['species'],axis='columns')‬
X
Y = data.target‬

for i in range(2,16):‬

kf=KFold(n_splits=i, random_state=1, shuffle=True)‬

scores = cross_val_score(model, X, Y, scoring='accuracy', cv=kf,‬

n_jobs=-1)‬

print('n-split:',i)‬

print('Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))‬

‭OUTPUT‬‭:‬

‭ ESULT‬‭:‬‭K-fold cross-validation helps in obtaining‬‭a more reliable estimate of the model's‬


R
‭performance by repeating the training and testing process k times with different subsets. It‬
‭helps to identify models that generalize well to unseen data and reduces the risk of‬
‭overfitting to specific patterns in the training data, leading to a more robust evaluation of‬
‭model performance.‬

‭15‬
‭EXPERIMENT – 9‬
‭ IM‬‭: To implement a simple classification model to‬‭predict the species of iris flowers in Iris Dataset‬
A
‭using basic algorithms like logistic regression or k-nearest neighbors.‬
‭CODE‬‭:‬

‭mport numpy as np‬


i
import pandas as pd‬

import matplotlib.pyplot as plt‬

from sklearn.datasets import load_iris‬

from sklearn.neighbors import KNeighborsClassifier‬

from sklearn.model_selection import train_test_split‬

from sklearn.linear_model import LogisticRegression‬

data = load_iris()‬

df = pd.DataFrame(data.data, columns=data.feature_names)‬

df['species'] = data.target‬

df.info()‬

X = df.drop(['species'],axis='columns')‬

Y = data.target‬

X_train, X_test, y_train, y_test = train_test_split(X, Y,test_size =‬

0.2, random_state = 0)‬

#KNN Classifier‬

knn5 = KNeighborsClassifier(n_neighbors = 5)‬

knn1 = KNeighborsClassifier(n_neighbors=1)‬

knn5.fit(X_train, y_train)‬

knn1.fit(X_train, y_train)‬

‭_pred_knn5 = knn5.predict(X_test)‬
y
y_pred_knn1 = knn1.predict(X_test)‬

print("Accuracy with KNN at k=5", accuracy_score(y_test,‬

y_pred_knn5)*100)‬

print("Accuracy with KNN at k=1", accuracy_score(y_test,‬

y_pred_knn1)*100)‬

log_regr = LogisticRegression(solver='lbfgs', max_iter=1000)‬

log_regr.fit(X_train, y_train)‬

# Predict labels of unseen (test) data‬

y_pred_lr=log_regr.predict(X_test)‬

score=accuracy_score(y_test,y_pred_lr)‬

# The score method returns the accuracy of the model‬

print("Accuracy of logistic regression ", score*100)‬

‭OUTPUT‬‭:‬

‭ ESULT‬‭: Simple classification models (K-Nearest Neighbor‬‭and Logistic Regression) are trained‬
R
‭and evaluated.‬

‭16‬
‭EXPERIMENT – 10‬
‭ IM‬‭: Predict the quality of wine based on features‬‭like acidity, alcohol content, and pH by using‬
A
‭either linear regression or decision trees.‬

‭CODE‬‭:‬

‭mport pandas as pd‬


i
from sklearn.model_selection import train_test_split‬

from sklearn.linear_model import LinearRegression‬

from sklearn import metrics‬

import numpy as np‬

import matplotlib.pyplot as plt‬

‭Load the Dataset‬


#
data = pd.read_csv('/content/winequalityN.csv')‬

df = pd.DataFrame(data)‬

df‬

‭f.isnull().sum()‬
d
df.update(df.fillna(df.mean()))‬

X = df[['fixed acidity', 'volatile acidity', 'citric acid',‬

'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur‬

dioxide', 'density', 'pH','sulphates','alcohol']].values‬

Y = df[‘quality'].values‬

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size‬

= 0.2, random_state = 0)‬

regressor = LinearRegression()‬

regressor.fit(X_train, y_train)‬

coeff_df = pd.DataFrame(regressor.coef_, ['fixed acidity', 'volatile‬

acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur‬

dioxide', 'total sulfur dioxide', 'density',‬

'pH','sulphates','alcohol'] , columns=['Coefficient'])‬

coeff_df‬

print(regressor.intercept_)‬

y_pred = regressor.predict(X_test)‬

‭ Calculate RMSE‬
#
rmse = np.sqrt(mean_squared_error(y_test, y_pred))‬

‭ Calculate R-squared‬
#
r_squared = r2_score(y_test, y_pred)‬

‭rint("Linear regression model performance:\nRoot Mean Squared Error‬


p
(RMSE):", rmse)‬

print("R-squared:", r_squared)‬

plt.figure(figsize=(8, 6))‬

plt.scatter(y_test, y_pred, alpha=0.5, label='Predicted',‬

color='cyan')‬

plt.plot(y_test, y_test, alpha=0.5, label='Actual', color='blue')‬

plt.xlabel('Actual Values')‬

plt.ylabel('Predicted Values')‬

plt.title('Actual vs Predicted Values')‬

‭17‬
‭lt.legend()‬
p
plt.show()‬

‭OUTPUT‬‭:‬

‭ ESULT:‬‭Hence, we have trained and evaluated the model.‬


R
‭Evaluation Results are:‬
‭●‬ ‭Root Mean Squared Error (RMSE): 0.7302836974721729‬
‭●‬ ‭R-squared: 0.3001119515373122‬

‭18‬

You might also like