0% found this document useful (0 votes)
9 views

linear

Uploaded by

Belete Siyum
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views

linear

Uploaded by

Belete Siyum
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

# Import necessary libraries

import numpy as np # NumPy for numerical operations, especially array


manipulation
import pandas as pd # Pandas for data handling and manipulation

# Libraries for data visualization


import seaborn as sns # Seaborn for enhanced data visualization
import matplotlib.pyplot as plt # Matplotlib for creating basic plots

# Libraries for machine learning


from sklearn.model_selection import train_test_split # Function to split dataset
into training and testing sets
from sklearn.linear_model import LinearRegression # Class to create and use
linear regression models
import sklearn.metrics as sm # Metrics module for
evaluating model performance

# Load the CSV file (stored in Google Drive) into a Pandas dataframe
data = pd.read_csv('/content/drive/My Drive/linearDS.csv') # Reads CSV into a
DataFrame named 'data'
# Display the first few rows of the dataframe to understand its structure and
values
print(data.head()) # .head() shows the first 5 rows by default

# Creates a scatter plot with a linear regression line using Seaborn


sns.lmplot(x='LotArea', y='SalePrice', data=data, ci=None) # x and y specify
columns, data specifies DataFrame
# ci=None disables the confidence interval shading around the regression line,
making the plot cleaner
plt.show() # Displays the plot

# Extract values of 'LotArea' and 'SalePrice' from the dataset and reshape into 2D
NumPy arrays for processing
x = np.array(data['LotArea']).reshape(-1,1) # Independent variable reshaped to 2D
array
y = np.array(data['SalePrice']).reshape(-1,1) # Dependent variable reshaped to 2D
array

# Split the data into training and testing sets; 75% for training, 25% for testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
# test_size=0.25 reserves 25% of data for testing, the remaining 75% goes to
training

# Create a Linear Regression model


regr = LinearRegression() # Initializes the linear regression model

# Fit the training data to the model (i.e., train the model)
regr.fit(x_train, y_train) # Uses x_train and y_train to learn the relationship
between features and target

# Display how well the model fits the training data with R² score
print(regr.score(x_train, y_train)) # R² score indicates how well the model
explains the variance in data

# Predict SalePrice values for the test dataset


y_pred = regr.predict(x_test) # Predicts SalePrice values for x_test based on
learned model

# Create a plot to compare actual vs. predicted values on the test set
plt.scatter(x_test, y_test, color = 'b') # Scatter plot of actual values (x_test,
y_test), color blue
plt.plot(x_test, y_pred, color = 'r') # Line plot of predicted values (x_test,
y_pred), color red
plt.show() # Displays the plot

# Calculate and display the R² score for the predictions on the test set
print("R2 Score = ", round(sm.r2_score(y_test, y_pred), 2)) # R² score for test
set, rounded to 2 decimal places

# Get user input for LotArea to make a prediction on SalePrice


input_value = input("Enter a value for LotArea: ") # Prompt user for a value for
'LotArea'
input_value = float(input_value) # Convert input to float for
numerical operations
predicted_price = regr.predict([[input_value]]) # Predict SalePrice based on
user input, reshaped to 2D
print("Predicted SalePrice = ", predicted_price[0][0]) # Print the predicted
SalePrice value

You might also like