AML_code_for_m2
AML_code_for_m2
import pandas as pd
import numpy as np
# 2. Loading CSV
# Replace 'your_dataset.csv' with your actual dataset
df = pd.read_csv('your_dataset.csv')
df.head()
df.drop(columns=["Employee_ID"],inplace=True)
# 3. Dataset Overview
print(df.head())
print(df.info())
print(df.describe())
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
counts = df[col].value_counts()
plt.bar(counts.index,counts.values)
plt.title(col)
plt.show()
counts = df[col].value_counts()
plt.title(col)
plt.show()
sns.histplot(df[target_col], kde=True)
plt.title(f"Distribution of {target_col}")
plt.show()
sns.boxplot(data=df[numerical_cols])
# 8. Bivariate Plots
sns.pairplot(df, hue=target_col if target_col in categorical_cols else None)
plt.show()
# 9. Scatter Plots
for col in numerical_cols:
if col != target_col:
sns.scatterplot(x=df[col], y=df[target_col])
plt.title(f"{col} vs {target_col}")
plt.show()
plt.title("Correlation Heatmap")
plt.show()
print(df[col].value_counts())
print()
ordinal_columns = {
ordinal_encoded_data = pd.DataFrame()
# Process each ordinal column
if col in df.columns:
encoder = OrdinalEncoder(categories=[order])
ordinal_encoded_data[col] = encoder.fit_transform(df[[col]]).flatten()
df.drop(columns=ohe_cols, inplace=True)
y = df[target_col]
if y.dtype == 'object':
le = LabelEncoder()
y = le.fit_transform(y)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 17. Model Training and Hyperparameter Tuning for Classifier
models = {
"DecisionTreeClassifier": DecisionTreeClassifier(),
"RandomForestClassifier": RandomForestClassifier(),
"DecisionTreeClassifier": {
'min_samples_leaf': [1, 2, 4]
},
"RandomForestClassifier": {
'min_samples_leaf': [1, 2]
},
best_models = {}
print(f"Training {model_name}...")
param_grid = param_grids[model_name]
# Fit GridSearchCV to the data (assuming X_train and y_train are your training data)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print(classification_report(y_test, y_pred))
"DecisionTreeRegressor": DecisionTreeRegressor(),
"RandomForestRegressor": RandomForestRegressor(),
"DecisionTreeRegressor": {
},
"RandomForestRegressor": {
},
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print()