linear
linear
# Load the CSV file (stored in Google Drive) into a Pandas dataframe
data = pd.read_csv('/content/drive/My Drive/linearDS.csv') # Reads CSV into a
DataFrame named 'data'
# Display the first few rows of the dataframe to understand its structure and
values
print(data.head()) # .head() shows the first 5 rows by default
# Extract values of 'LotArea' and 'SalePrice' from the dataset and reshape into 2D
NumPy arrays for processing
x = np.array(data['LotArea']).reshape(-1,1) # Independent variable reshaped to 2D
array
y = np.array(data['SalePrice']).reshape(-1,1) # Dependent variable reshaped to 2D
array
# Split the data into training and testing sets; 75% for training, 25% for testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
# test_size=0.25 reserves 25% of data for testing, the remaining 75% goes to
training
# Fit the training data to the model (i.e., train the model)
regr.fit(x_train, y_train) # Uses x_train and y_train to learn the relationship
between features and target
# Display how well the model fits the training data with R² score
print(regr.score(x_train, y_train)) # R² score indicates how well the model
explains the variance in data
# Create a plot to compare actual vs. predicted values on the test set
plt.scatter(x_test, y_test, color = 'b') # Scatter plot of actual values (x_test,
y_test), color blue
plt.plot(x_test, y_pred, color = 'r') # Line plot of predicted values (x_test,
y_pred), color red
plt.show() # Displays the plot
# Calculate and display the R² score for the predictions on the test set
print("R2 Score = ", round(sm.r2_score(y_test, y_pred), 2)) # R² score for test
set, rounded to 2 decimal places