import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.feature_selection import SelectKBest, chi2, mutual_info_classif

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, SimpleRNN, Dropout
from tensorflow.keras.utils import to_categorical

# Load dataset
df = pd.read_csv('your_dataset.csv')  # replace 'your_dataset.csv' with your dataset path

# Assuming the last column is the target variable and the rest are features
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encode target labels if they are categorical
le = LabelEncoder()
y = le.fit_transform(y)

# Check for missing values and impute them
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Select the best features using Chi-Square and Mutual Information
selector_chi2 = SelectKBest(chi2, k=10)  # Change 'k' to the number of features you want to select
X_chi2 = selector_chi2.fit_transform(X, y)

selector_mi = SelectKBest(mutual_info_classif, k=10)  # Change 'k' to the number of features you want to select
X_mi = selector_mi.fit_transform(X, y)

# Split the dataset into training and testing sets
X_train_chi2, X_test_chi2, y_train, y_test = train_test_split(X_chi2, y, test_size=0.2, random_state=42, stratify=y)
X_train_mi, X_test_mi, _, _ = train_test_split(X_mi, y, test_size=0.2, random_state=42, stratify=y)

# Standardize the features
scaler_chi2 = StandardScaler()
X_train_chi2 = scaler_chi2.fit_transform(X_train_chi2)
X_test_chi2 = scaler_chi2.transform(X_test_chi2)

scaler_mi = StandardScaler()
X_train_mi = scaler_mi.fit_transform(X_train_mi)
X_test_mi = scaler_mi.transform(X_test_mi)

# Function to calculate and print accuracy for each class
def calculate_accuracy(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracies = accuracy_score(y_test, y_pred)
    print(f"Overall accuracy: {accuracies}")
    return accuracies

# Initialize results dictionary
results = {
    "Model": [],
    "Feature Selection": [],
    "Accuracy": []
}

# Models to train
models = {
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}

# Train and evaluate models with Chi-Square selected features
for model_name, model in models.items():
    model.fit(X_train_chi2, y_train)
    accuracy = calculate_accuracy(model, X_test_chi2, y_test)
    results['Model'].append(model_name)
    results['Feature Selection'].append("Chi-Square")
    results['Accuracy'].append(accuracy)

# Train and evaluate models with Mutual Information selected features
for model_name, model in models.items():
    model.fit(X_train_mi, y_train)
    accuracy = calculate_accuracy(model, X_test_mi, y_test)
    results['Model'].append(model_name)
    results['Feature Selection'].append("Mutual Information")
    results['Accuracy'].append(accuracy)

# For deep learning models, we need to reshape the data
input_shape = (X_train_chi2.shape[1], 1)  # Modify according to your feature selection

X_train_dl_chi2 = X_train_chi2.reshape(-1, input_shape[0], 1)
X_test_dl_chi2 = X_test_chi2.reshape(-1, input_shape[0], 1)

X_train_dl_mi = X_train_mi.reshape(-1, input_shape[0], 1)
X_test_dl_mi = X_test_mi.reshape(-1, input_shape[0], 1)

y_train_dl = to_categorical(y_train, num_classes=len(set(y)))  # Modify num_classes according to your data
y_test_dl = to_categorical(y_test, num_classes=len(set(y)))

# CNN Model
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(input_shape[0], 1, 1)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(set(y)), activation='softmax')
])
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_dl_chi2, y_train_dl, epochs=10, validation_data=(X_test_dl_chi2, y_test_dl))
cnn_accuracies = cnn_model.evaluate(X_test_dl_chi2, y_test_dl, verbose=0)[1]
results['Model'].append("CNN")
results['Feature Selection'].append("Chi-Square")
results['Accuracy'].append(cnn_accuracies)

# RNN Model
rnn_model = Sequential([
    SimpleRNN(128, input_shape=(input_shape[0], 1)),
    Dense(128, activation='relu'),
    Dense(len(set(y)), activation='softmax')
])
rnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
rnn_model.fit(X_train_dl_chi2, y_train_dl, epochs=10, validation_data=(X_test_dl_chi2, y_test_dl))
rnn_accuracies = rnn_model.evaluate(X_test_dl_chi2, y_test_dl, verbose=0)[1]
results['Model'].append("RNN")
results['Feature Selection'].append("Chi-Square")
results['Accuracy'].append(rnn_accuracies)

# LSTM Model
lstm_model = Sequential([
    LSTM(128, input_shape=(input_shape[0], 1)),
    Dense(128, activation='relu'),
    Dense(len(set(y)), activation='softmax')
])
lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
lstm_model.fit(X_train_dl_chi2, y_train_dl, epochs=10, validation_data=(X_test_dl_chi2, y_test_dl))
lstm_accuracies = lstm_model.evaluate(X_test_dl_chi2, y_test_dl, verbose=0)[1]
results['Model'].append("LSTM")
results['Feature Selection'].append("Chi-Square")
results['Accuracy'].append(lstm_accuracies)

# ANN Model
ann_model = Sequential([
    Dense(128, activation='relu', input_shape=(input_shape[0],)),
    Dense(128, activation='relu'),
    Dense(len(set(y)), activation='softmax')
])
ann_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
ann_model.fit(X_train_chi2, y_train_dl, epochs=10, validation_data=(X_test_chi2, y_test_dl))
ann_accuracies = ann_model.evaluate(X_test_chi2, y_test_dl, verbose=0)[1]
results['Model'].append("ANN")
results['Feature Selection'].append("Chi-Square")
results['Accuracy'].append(ann_accuracies)

# Repeat for Mutual Information feature selection
cnn_model.fit(X_train_dl_mi, y_train_dl, epochs=10, validation_data=(X_test_dl_mi, y_test_dl))
cnn_accuracies = cnn_model.evaluate(X_test_dl_mi, y_test_dl, verbose=0)[1]
results['Model'].append("CNN")
results['Feature Selection'].append("Mutual Information")
results['Accuracy'].append(cnn_accuracies)

rnn_model.fit(X_train_dl_mi, y_train_dl, epochs=10, validation_data=(X_test_dl_mi, y_test_dl))
rnn_accuracies = rnn_model.evaluate(X_test_dl_mi, y_test_dl, verbose=0)[1]
results['Model'].append("RNN")
results['Feature Selection'].append("Mutual Information")
results['Accuracy'].append(rnn_accuracies)

lstm_model.fit(X_train_dl_mi, y_train_dl, epochs=10, validation_data=(X_test_dl_mi, y_test_dl))
lstm_accuracies = lstm_model.evaluate(X_test_dl_mi, y_test_dl, verbose=0)[1]
results['Model'].append("LSTM")
results['Feature Selection'].append("Mutual Information")
results['Accuracy'].append(lstm_accuracies)

ann_model.fit(X_train_mi, y_train_dl, epochs=10, validation_data=(X_test_mi, y_test_dl))
