0% found this document useful (0 votes)
25 views

COMPARISON - Jupyter Notebook

Uploaded by

Taqqadus Zahra
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
25 views

COMPARISON - Jupyter Notebook

Uploaded by

Taqqadus Zahra
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

11/24/24, 3:30 PM COMPARISON - Jupyter Notebook

In [1]: import pandas as pd


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_ma
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.ensemble import IsolationForest
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_e
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

In [2]: file= "vilidation1.xlsx"


data = pd.read_excel(file)

In [3]: data = data.dropna()

In [4]: # Encoding categorical variables if any


label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
label_encoders[column] = LabelEncoder()
data[column] = label_encoders[column].fit_transform(data[column])

In [5]: data.columns = data.columns.str.strip()

In [6]: data.columns = data.columns.str.replace('wheat Area', 'Wheat Area', case=False

In [7]: X = data[['Q. id', 'Wheat Area', 'Wheat Seed', 'Fertilizer kg', 'Consumption']
y = data['Production']

In [8]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random

In [336]: preprocessor = ColumnTransformer(


transformers=[('standarization',scaler,[0,1,2,3,4])
],
remainder= 'passthrough'
)

localhost:8888/notebooks/COMPARISON.ipynb 1/5
11/24/24, 3:30 PM COMPARISON - Jupyter Notebook

In [337]: preprocessor

Out[337]: ColumnTransformer(remainder='passthrough',
transformers=[('standarization', StandardScaler(),
[0, 1, 2, 3, 4])])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust
the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with
nbviewer.org.

In [338]: scaler = StandardScaler()


X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [339]: X_train_dummy = preprocessor.fit_transform(X_train)


X_test_dummy = preprocessor.transform(X_test)

In [340]: X_train_dummy

Out[340]: array([[-0.30931761, 3.54091734, 3.6737382 , 3.49363766, 0.05084911],


[-1.32647631, -0.48266025, -0.01359251, -0.25144225, -0.462223 ],
[ 0.64721574, -0.00929818, -0.6361007 , -0.19636755, -0.20568695],
...,
[ 0.02748991, -0.71934129, -0.64694579, -0.42768131, -0.84702709],
[ 1.13895471, -0.71934129, -0.64694579, -0.36159166, -0.52635702],
[-1.10418334, 2.5941932 , 2.80613097, 1.18050007, 0.4356532 ]])

In [305]: models = {
# 'Linear Regression': LinearRegression(),
# 'Decision Tree': DecisionTreeRegressor(),
# 'K-Nearest Neighbors': KNeighborsRegressor(),
'Neural Network': MLPRegressor(max_iter=1000),
'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_sta
'Gaussian Process': GaussianProcessRegressor(kernel=RBF())

}

In [306]: # Function to evaluate models


def evaluate_model(model, X_train, X_test, y_train, y_test):
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)


rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

return mse, rmse, r2

localhost:8888/notebooks/COMPARISON.ipynb 2/5
11/24/24, 3:30 PM COMPARISON - Jupyter Notebook

In [307]: for model_name, model in models.items():


mse, rmse, r2 = evaluate_model(model, X_train, X_test, y_train, y_test)
print(f"Model: {model_name}")
print(f" Mean Squared Error (MSE): {mse}")
print(f" Root Mean Squared Error (RMSE): {rmse}")
print(f" R² Score: {r2}")

Model: Neural Network


Mean Squared Error (MSE): 0.05464342836221142
Root Mean Squared Error (RMSE): 0.2337593385561557
R² Score: 0.9993770943754456
Model: Random Forest
Mean Squared Error (MSE): 10.222415714285715
Root Mean Squared Error (RMSE): 3.1972512748118054
R² Score: 0.8834699718554807
Model: Gradient Boosting
Mean Squared Error (MSE): 7.8968282308177855
Root Mean Squared Error (RMSE): 2.81012957544982
R² Score: 0.9099804154214118
Model: Gaussian Process
Mean Squared Error (MSE): 35.679723815201044
Root Mean Squared Error (RMSE): 5.9732506908048855
R² Score: 0.5932703837740007

C:\Users\swp\anaconda3\Lib\site-packages\sklearn\gaussian_process\_gpr.py:65
9: ConvergenceWarning: lbfgs failed to converge (status=2):
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html (https://sciki
t-learn.org/stable/modules/preprocessing.html)
_check_optimize_result("lbfgs", opt_res)

In [308]: # Dictionary to store evaluation results


results = {
'Model': [],
'MSE': [],
'RMSE': [],
'R2': []
}

localhost:8888/notebooks/COMPARISON.ipynb 3/5
11/24/24, 3:30 PM COMPARISON - Jupyter Notebook

In [309]: # Evaluate each model and store results


for model_name, model in models.items():
mse, rmse, r2 = evaluate_model(model, X_train, X_test, y_train, y_test)
results['Model'].append(model_name)
results['MSE'].append(mse)
results['RMSE'].append(rmse)
results['R2'].append(r2)

C:\Users\swp\anaconda3\Lib\site-packages\sklearn\gaussian_process\_gpr.py:65
9: ConvergenceWarning: lbfgs failed to converge (status=2):
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html (https://sciki
t-learn.org/stable/modules/preprocessing.html)
_check_optimize_result("lbfgs", opt_res)

In [310]: # Convert results to a DataFrame for plotting


results_df = pd.DataFrame(results)

In [311]: # Plot the results


results_long = results_df.melt(id_vars='Model', var_name='Metric', value_name=
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='Value', hue='Metric', data=results_long)
plt.title('Model Performance Comparison (MSE, RMSE, R²)')
plt.ylabel('Metric Value')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Metrics')
plt.tight_layout()
plt.show()

localhost:8888/notebooks/COMPARISON.ipynb 4/5
11/24/24, 3:30 PM COMPARISON - Jupyter Notebook

In [9]: model = RandomForestRegressor(n_estimators=100, random_state=42)


model.fit(X_train, y_train)
Out[9]: ▾ RandomForestRegressor
RandomForestRegressor(random_state=42)

In [ ]: ​

In [10]: future_data = pd.DataFrame({


'Q. id': [1], # Include 'Q. id' column if it was used during training (eve
'Wheat Area': [2.0], # Future areas for 2024, 2025, 2026
'Wheat Seed': [20.0], # Future seed amounts
'Fertilizer kg': [1.3], # Correct column name
'Consumption': [5.0] # Future consumption
})

In [11]: future_predictions = model.predict(future_data)

In [12]: future_years = [2024]


for year, prediction in zip(future_years, future_predictions):
print(f'Predicted Wheat Production for {year}: {prediction:.2f} kg')
Predicted Wheat Production for 2024: 5.00 kg

In [ ]: ​

In [ ]: ​

localhost:8888/notebooks/COMPARISON.ipynb 5/5

You might also like