Python
Python
# Visualize correlations
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()
# Since there are no missing values in the Boston dataset, we can proceed to normalization
from sklearn.preprocessing import StandardScaler
# Normalize/Standardize features
scaler = StandardScaler()
features = df.drop('PRICE', axis=1)
features_scaled = scaler.fit_transform(features)
# Split the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize models
linear_model = LinearRegression()
tree_model = DecisionTreeRegressor(random_state=42)
forest_model = RandomForestRegressor(random_state=42)
# Train models
linear_model.fit(X_train, y_train)
tree_model.fit(X_train, y_train)
forest_model.fit(X_train, y_train)
# Make predictions
y_pred_linear = linear_model.predict(X_test)
y_pred_tree = tree_model.predict(X_test)
y_pred_forest = forest_model.predict(X_test)
Step 5: Model Evaluation
We'll evaluate the models using Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE).
python
Copy code
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Evaluate models
def evaluate_model(y_true, y_pred, model_name):
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f"\n{model_name} Performance:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")