Car Price Prediction 1
Car Price Prediction 1
[2]: # This Python 3 environment comes with many helpful analytics libraries␣
↪installed
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can also write temporary files to /kaggle/temp/, but they won't be saved␣
↪outside of the current session
[3]: df = pd.read_csv("F:/499a/CarPrice_Assignment.csv")
1
2 3 1 alfa-romero Quadrifoglio gas std
3 4 2 audi 100 ls gas std
4 5 2 audi 100ls gas std
.. … … … … …
200 201 -1 volvo 145e (sw) gas std
201 202 -1 volvo 144ea gas turbo
202 203 -1 volvo 244dl gas std
203 204 -1 volvo 246 diesel turbo
204 205 -1 volvo 264gl gas turbo
2
204 mpfi 3.78 3.15 9.5 114 5400
[5]: df.columns
[6]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 car_ID 205 non-null int64
1 symboling 205 non-null int64
2 CarName 205 non-null object
3 fueltype 205 non-null object
4 aspiration 205 non-null object
5 doornumber 205 non-null object
6 carbody 205 non-null object
7 drivewheel 205 non-null object
8 enginelocation 205 non-null object
9 wheelbase 205 non-null float64
10 carlength 205 non-null float64
11 carwidth 205 non-null float64
12 carheight 205 non-null float64
3
13 curbweight 205 non-null int64
14 enginetype 205 non-null object
15 cylindernumber 205 non-null object
16 enginesize 205 non-null int64
17 fuelsystem 205 non-null object
18 boreratio 205 non-null float64
19 stroke 205 non-null float64
20 compressionratio 205 non-null float64
21 horsepower 205 non-null int64
22 peakrpm 205 non-null int64
23 citympg 205 non-null int64
24 highwaympg 205 non-null int64
25 price 205 non-null float64
dtypes: float64(8), int64(8), object(10)
memory usage: 41.8+ KB
[7]: df.isnull().sum()
[7]: car_ID 0
symboling 0
CarName 0
fueltype 0
aspiration 0
doornumber 0
carbody 0
drivewheel 0
enginelocation 0
wheelbase 0
carlength 0
carwidth 0
carheight 0
curbweight 0
enginetype 0
cylindernumber 0
enginesize 0
fuelsystem 0
boreratio 0
stroke 0
compressionratio 0
horsepower 0
peakrpm 0
citympg 0
highwaympg 0
price 0
dtype: int64
[8]: df.duplicated().sum()
4
[8]: 0
5
[11]: plt.figure(figsize=(6, 5)) # Adjust the figure size as needed
sns.countplot(data=df, x="aspiration", order=df["aspiration"].value_counts().
↪index)
6
[12]: plt.figure(figsize=(6, 5)) # Adjust the figure size as needed
sns.countplot(data=df, x="doornumber", order=df["doornumber"].value_counts().
↪index)
7
[13]: plt.figure(figsize=(6, 5)) # Adjust the figure size as needed
sns.countplot(data=df, x="carbody", order=df["carbody"].value_counts().index)
plt.xticks(rotation=90, fontsize=10) # Rotate and adjust x-axis labels
8
[14]: plt.figure(figsize=(6, 5)) # Adjust the figure size as needed
sns.countplot(data=df, x="drivewheel", order=df["drivewheel"].value_counts().
↪index)
9
[15]: correlation_matrix = df.corr()
# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Heatmap of Correlation Matrix')
plt.show()
10
[16]: df.head()
11
carheight curbweight enginetype cylindernumber enginesize fuelsystem \
0 48.8 2548 dohc four 130 mpfi
1 48.8 2548 dohc four 130 mpfi
2 52.4 2823 ohcv six 152 mpfi
3 54.3 2337 ohc four 109 mpfi
4 54.3 2824 ohc five 136 mpfi
highwaympg price
0 27 13495.0
1 27 16500.0
2 26 16500.0
3 30 13950.0
4 22 17450.0
plt.figure(figsize=(10, 20))
scatter('wheelbase', 'Wheelbase', 1)
scatter('carlength', 'Car Length', 2)
scatter('carheight', 'Car Height', 3)
scatter('curbweight', 'Curb Weight', 4)
scatter('enginesize', 'Engine Size', 5)
scatter('boreratio', 'Bore Ratio', 6)
scatter('stroke', 'Stroke', 7)
scatter('horsepower', 'Horsepower', 8)
scatter('peakrpm', 'Peak RPM', 9)
scatter('highwaympg', 'Highway MPG', 10)
scatter('citympg', 'City MPG', 11) # Change '11' to '1' or '2' based on your␣
↪desired position
plt.tight_layout()
plt.show()
12
13
[18]: df.drop(columns=["car_ID","peakrpm","stroke","carheight"],inplace=True)
[20]: en = LabelEncoder()
catCols =␣
↪['CarName','fueltype','aspiration','doornumber','carbody','drivewheel','enginelocation','eng
[21]: df.head()
[22]: X = df.drop("price",axis = 1)
y = df["price"]
14
[23]: X.head()
[24]: y.head()
[24]: 0 13495.0
1 16500.0
2 16500.0
3 13950.0
4 17450.0
Name: price, dtype: float64
[25]: X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,random_state=4)
from sklearn.preprocessing import StandardScaler
15
# Transform the test data using the same scaler
X_test = scaler.transform(X_test)
r2 = regressor.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
16
from xgboost import XGBRegressor
import time
y_pred = regressor.predict(X_test)
r2 = regressor.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
return training_times
regressors = {
"SVR": SVR(kernel='linear', C=1),
"KNeighborsRegressor": KNeighborsRegressor(n_neighbors=5),
"DecisionTreeRegressor": DecisionTreeRegressor(max_depth=5),
"Ridge": Ridge(alpha=1.0),
"RandomForestRegressor": RandomForestRegressor(n_estimators=200,␣
↪max_depth=3, random_state=2),
"GradientBoostingRegressor": GradientBoostingRegressor(learning_rate=0.2,␣
↪min_samples_leaf=4, min_samples_split=5, n_estimators=100),
17
training_times = train_regressor(regressor, X_train, y_train, X_test,␣
↪y_test)
results[regressor_name] = training_times
# Print the training times for each epoch for each regressor
for regressor_name, training_times in results.items():
print(f"{regressor_name} Training Times (in seconds): {training_times}")
Training SVR…
Epoch 1 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.02 seconds
Epoch 2 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 3 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 4 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 5 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 6 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 7 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 8 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 9 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Epoch 10 - R-squared: 0.81, MSE: 9494398.64, MAE: 2307.18, Time: 0.01 seconds
Training KNeighborsRegressor…
Epoch 1 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 2 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 3 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 4 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 5 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 6 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 7 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 8 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 9 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Epoch 10 - R-squared: 0.74, MSE: 12723427.90, MAE: 2361.76, Time: 0.00 seconds
Training DecisionTreeRegressor…
Epoch 1 - R-squared: 0.88, MSE: 6163271.96, MAE: 1892.47, Time: 0.00 seconds
Epoch 2 - R-squared: 0.88, MSE: 5995462.40, MAE: 1830.23, Time: 0.00 seconds
Epoch 3 - R-squared: 0.87, MSE: 6577179.04, MAE: 1941.35, Time: 0.00 seconds
Epoch 4 - R-squared: 0.85, MSE: 7309910.35, MAE: 2018.61, Time: 0.00 seconds
Epoch 5 - R-squared: 0.88, MSE: 6163271.96, MAE: 1892.47, Time: 0.00 seconds
Epoch 6 - R-squared: 0.87, MSE: 6455411.05, MAE: 1941.35, Time: 0.00 seconds
Epoch 7 - R-squared: 0.85, MSE: 7278440.34, MAE: 2002.93, Time: 0.00 seconds
Epoch 8 - R-squared: 0.85, MSE: 7263868.78, MAE: 1956.37, Time: 0.00 seconds
Epoch 9 - R-squared: 0.88, MSE: 6163271.96, MAE: 1892.47, Time: 0.00 seconds
Epoch 10 - R-squared: 0.87, MSE: 6455411.05, MAE: 1941.35, Time: 0.00 seconds
Training Ridge…
Epoch 1 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 2 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 3 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 4 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 5 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
18
Epoch 6 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 7 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 8 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 9 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Epoch 10 - R-squared: 0.83, MSE: 8196426.81, MAE: 2170.84, Time: 0.00 seconds
Training RandomForestRegressor…
Epoch 1 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.36 seconds
Epoch 2 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.30 seconds
Epoch 3 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.30 seconds
Epoch 4 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.28 seconds
Epoch 5 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.28 seconds
Epoch 6 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.28 seconds
Epoch 7 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.29 seconds
Epoch 8 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.28 seconds
Epoch 9 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.29 seconds
Epoch 10 - R-squared: 0.87, MSE: 6582210.91, MAE: 1914.52, Time: 0.28 seconds
Training AdaBoostRegressor…
Epoch 1 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.18 seconds
Epoch 2 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.15 seconds
Epoch 3 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.17 seconds
Epoch 4 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.16 seconds
Epoch 5 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.16 seconds
Epoch 6 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.18 seconds
Epoch 7 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.16 seconds
Epoch 8 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.14 seconds
Epoch 9 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.19 seconds
Epoch 10 - R-squared: 0.89, MSE: 5259872.25, MAE: 1933.86, Time: 0.15 seconds
Training BaggingRegressor…
Epoch 1 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.38 seconds
Epoch 2 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.43 seconds
Epoch 3 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.53 seconds
Epoch 4 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.47 seconds
Epoch 5 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.39 seconds
Epoch 6 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.37 seconds
Epoch 7 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.37 seconds
Epoch 8 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.35 seconds
Epoch 9 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.35 seconds
Epoch 10 - R-squared: 0.91, MSE: 4226915.74, MAE: 1486.30, Time: 0.36 seconds
Training ExtraTreesRegressor…
Epoch 1 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.21 seconds
Epoch 2 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.23 seconds
Epoch 3 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.22 seconds
Epoch 4 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.20 seconds
Epoch 5 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.21 seconds
Epoch 6 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.21 seconds
Epoch 7 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.22 seconds
Epoch 8 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.21 seconds
Epoch 9 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.21 seconds
19
Epoch 10 - R-squared: 0.92, MSE: 4074228.75, MAE: 1627.64, Time: 0.23 seconds
Training GradientBoostingRegressor…
Epoch 1 - R-squared: 0.93, MSE: 3420535.61, MAE: 1464.40, Time: 0.12 seconds
Epoch 2 - R-squared: 0.93, MSE: 3410006.06, MAE: 1463.99, Time: 0.11 seconds
Epoch 3 - R-squared: 0.93, MSE: 3501758.93, MAE: 1482.52, Time: 0.13 seconds
Epoch 4 - R-squared: 0.93, MSE: 3415039.28, MAE: 1453.30, Time: 0.12 seconds
Epoch 5 - R-squared: 0.93, MSE: 3424994.32, MAE: 1465.81, Time: 0.13 seconds
Epoch 6 - R-squared: 0.93, MSE: 3406715.27, MAE: 1459.56, Time: 0.15 seconds
Epoch 7 - R-squared: 0.93, MSE: 3542737.97, MAE: 1495.00, Time: 0.12 seconds
Epoch 8 - R-squared: 0.93, MSE: 3427623.50, MAE: 1466.08, Time: 0.15 seconds
Epoch 9 - R-squared: 0.93, MSE: 3419481.91, MAE: 1454.70, Time: 0.12 seconds
Epoch 10 - R-squared: 0.93, MSE: 3558609.02, MAE: 1489.47, Time: 0.12 seconds
Training XGBRegressor…
Epoch 1 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.10 seconds
Epoch 2 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.09 seconds
Epoch 3 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.12 seconds
Epoch 4 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.09 seconds
Epoch 5 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.09 seconds
Epoch 6 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.08 seconds
Epoch 7 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.09 seconds
Epoch 8 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.09 seconds
Epoch 9 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.09 seconds
Epoch 10 - R-squared: 0.92, MSE: 3711713.11, MAE: 1324.63, Time: 0.09 seconds
SVR Training Times (in seconds): [0.015958786010742188, 0.013962268829345703,
0.012964725494384766, 0.013965368270874023, 0.0139617919921875,
0.013960838317871094, 0.01302337646484375, 0.012023448944091797,
0.012023448944091797, 0.013007402420043945]
KNeighborsRegressor Training Times (in seconds): [0.0020248889923095703,
0.0022466182708740234, 0.0009970664978027344, 0.0009970664978027344,
0.001994609832763672, 0.0009970664978027344, 0.000997304916381836,
0.0009968280792236328, 0.0009965896606445312, 0.0019936561584472656]
DecisionTreeRegressor Training Times (in seconds): [0.0019941329956054688,
0.001994609832763672, 0.0029840469360351562, 0.0019948482513427734,
0.001994609832763672, 0.001993894577026367, 0.001994609832763672,
0.001994609832763672, 0.002992868423461914, 0.002991914749145508]
Ridge Training Times (in seconds): [0.0019958019256591797,
0.0019943714141845703, 0.001995086669921875, 0.001994609832763672,
0.0019953250885009766, 0.001994609832763672, 0.0019943714141845703,
0.002006053924560547, 0.0019943714141845703, 0.0009975433349609375]
RandomForestRegressor Training Times (in seconds): [0.35610437393188477,
0.2971975803375244, 0.29517292976379395, 0.282275915145874, 0.2792532444000244,
0.28029561042785645, 0.28623199462890625, 0.28224754333496094,
0.29026293754577637, 0.27829551696777344]
AdaBoostRegressor Training Times (in seconds): [0.18350958824157715,
0.15259027481079102, 0.16655611991882324, 0.16161036491394043,
0.1585242748260498, 0.17547845840454102, 0.15953278541564941,
0.14162087440490723, 0.18550562858581543, 0.15255141258239746]
BaggingRegressor Training Times (in seconds): [0.3809685707092285,
20
0.43088865280151367, 0.5305945873260498, 0.474729061126709, 0.38999199867248535,
0.3660435676574707, 0.3700551986694336, 0.35400986671447754,
0.35301899909973145, 0.36305880546569824]
ExtraTreesRegressor Training Times (in seconds): [0.20939874649047852,
0.23342037200927734, 0.21642446517944336, 0.20345544815063477,
0.20647621154785156, 0.20644760131835938, 0.22042274475097656,
0.20648550987243652, 0.21342992782592773, 0.22838807106018066]
GradientBoostingRegressor Training Times (in seconds): [0.11667943000793457,
0.11170077323913574, 0.133683443069458, 0.12067842483520508,
0.12865281105041504, 0.14561057090759277, 0.12462282180786133,
0.14760732650756836, 0.12462782859802246, 0.12366008758544922]
XGBRegressor Training Times (in seconds): [0.0967404842376709,
0.08976054191589355, 0.11768627166748047, 0.09076642990112305,
0.09474587440490723, 0.0827779769897461, 0.09275102615356445,
0.09075665473937988, 0.08676767349243164, 0.09075760841369629]
algorithm_names.append(name)
r2_scores.append(r2)
mse_scores.append(mse)
mae_scores.append(mae)
display(results_df)
21
5 AdaBoostRegressor 0.899744 5.004332e+06
6 BaggingRegressor 0.904408 4.771540e+06
7 ExtraTreesRegressor 0.911473 4.418865e+06
8 GradientBoostingRegressor 0.941728 2.908671e+06
9 XGBRegressor 0.913827 4.301364e+06
# Define your training and testing data: X_train, y_train, X_test, y_test
22
print(f"{model_name} - R-squared: {r2}, MSE: {mse}, MAE: {mae}, Accuracy:␣
↪{accuracy}")
results_df = pd.DataFrame({
'Algorithm': algorithm_names,
'R2 Score': r2_scores,
23
'Mean Squared Error': mse_scores,
'Mean Absolute Error': mae_scores,
'Accuracy': accuracy_scores
})
24