if target[i] == "no": }
for x in range(len(specific_h)):
import csv
if h[x] != specific_h[x]: gains = []
general_h[x][x] = specific_h[x]
a = []
else: for attr in attributes:
with open('[Link]', 'r') as csvfile:
general_h[x][x] = '?' for val in values[attr]:
for row in [Link](csvfile):
subset = df[df[attr] == val]
[Link](row)
print("\nSteps of Candidate Elimination Algorithm after instance", i + 1) new_pos = len(subset[subset["Job Offer"] == "Yes"])
print("Specific Hypothesis:", specific_h) new_neg = len(subset[subset["Job Offer"] == "No"])
print(a)
print("General Hypothesis:", general_h) gain = foil_gain(total_pos, total_neg, new_pos, new_neg)
print("\nThe total number of training instances are:", len(a) - 1)
[Link]((f"{attr} = {val}", gain, new_pos, new_neg))
# Remove overly general hypotheses
num_attribute = len(a[0]) - 1
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']] # Step 5: Sort by FOIL Gain
print("\nThe initial hypothesis is:")
for i in indices: [Link](key=lambda x: x[1], reverse=True)
hypothesis = ['0'] * num_attribute
general_h.remove(['?', '?', '?', '?', '?', '?'])
print(hypothesis)
# Step 6: Output top rules
return specific_h, general_h print("FOIL Gain and Rule Candidates:\n")
for i in range(1, len(a)):
for rule, gain, pos, neg in gains:
if a[i][num_attribute] == 'yes':
print(f"Rule: IF {rule} THEN Job Offer = Yes | FOIL Gain = {gain:.4f} | Positives = {pos}
for j in range(0, num_attribute):
# Run learning algorithm
if hypothesis[j] == '0' or hypothesis[j] == a[i][j]:
s_final, g_final = learn(concepts, target)
hypothesis[j] = a[i][j]
else: from [Link] import load_breast_cancer
print("\nFinal Specific_h:", s_final, sep="\n")
hypothesis[j] = '?' from sklearn.model_selection import train_test_split
print("\nFinal General_h:", g_final, sep="\n")
print("\nThe hypothesis for the training instance {} is:\n".format(i), hypothesis) from [Link] import DecisionTreeClassifier
from [Link] import BaggingClassifier, AdaBoostClassifier
print("\nThe Maximally specific hypothesis for the training instance is:") from [Link] import accuracy_score, classification_report
print(hypothesis) import pandas as pd
import math # Load dataset
data = load_breast_cancer()
Show hidden output # Step 1: Create DataFrame from the table X = [Link]
data = { y = [Link]
Next steps: Explain error "[Link]": [1, 2, 3, 4, 5],
"CGPA": [">=9", "<8", ">=9", "<8", ">=8"], # Split dataset
"Interactiveness": ["Yes", "Yes", "Yes", "No", "Yes"], X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
import numpy as np "Practical Knowledge": ["Good", "Good", "Average", "Good", "Good"],
import pandas as pd "Job Offer": ["Yes", "Yes", "No", "No", "No"] # ---------------- Bagging ----------------
} bag_model = BaggingClassifier(
# Load dataset estimator=DecisionTreeClassifier(),
data = [Link](data=pd.read_csv('[Link]')) df = [Link](data) n_estimators=50,
random_state=42
# Extract concepts (all columns except last) and target (last column) # Step 2: Helper function )
concepts = [Link]([Link][:, 0:-1]) def foil_gain(pos, neg, new_pos, new_neg): bag_model.fit(X_train, y_train)
print("Concepts:\n", concepts) """Calculate FOIL gain""" y_pred_bag = bag_model.predict(X_test)
if new_pos == 0:
target = [Link]([Link][:, -1]) return 0 # Evaluation - Bagging
print("\nTarget:\n", target) gain = new_pos * ( print("Bagging Accuracy:", accuracy_score(y_test, y_pred_bag))
math.log2(new_pos / (new_pos + new_neg)) - print("\nClassification Report (Bagging):\n", classification_report(y_test, y_pred_bag))
math.log2(pos / (pos + neg))
def learn(concepts, target): ) # ---------------- Boosting ----------------
specific_h = concepts[0].copy() return gain boost_model = AdaBoostClassifier(
print("\nInitialization of specific_h and general_h") estimator=DecisionTreeClassifier(max_depth=1),
print("Specific Hypothesis:", specific_h) # Step 3: Count total positives and negatives n_estimators=50,
total_pos = len(df[df["Job Offer"] == "Yes"]) random_state=42
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))] total_neg = len(df[df["Job Offer"] == "No"]) )
print("General Hypothesis:", general_h) boost_model.fit(X_train, y_train)
# Step 4: Generate candidate literals and calculate FOIL Gain y_pred_boost = boost_model.predict(X_test)
for i, h in enumerate(concepts): attributes = ["CGPA", "Interactiveness", "Practical Knowledge"]
if target[i] == "yes": values = { # Evaluation - Boosting
for x in range(len(specific_h)): "CGPA": df["CGPA"].unique(), print("\nBoosting Accuracy:", accuracy_score(y_test, y_pred_boost))
if h[x] != specific_h[x]: "Interactiveness": df["Interactiveness"].unique(), print("\nClassification Report (Boosting):\n", classification_report(y_test, y_pred_boost))
specific_h[x] = '?' "Practical Knowledge": df["Practical Knowledge"].unique()
general_h[x][x] = '?'
import pandas as pd
import [Link] as plt
from [Link] import load_iris
from [Link] import StandardScaler
from [Link] import KMeans
# 1. Load dataset (Iris without labels to simulate unsupervised)
iris = load_iris()
X = [Link]([Link], columns=iris.feature_names)
print("Dataset Head:")
print([Link]())
# 2. Scale features (important for clustering)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 3. Apply KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42) # 3 clusters
clusters = kmeans.fit_predict(X_scaled)
# 4. Add cluster labels to dataframe
X['Cluster'] = clusters
print("\nClustered Data:")
print([Link]())
# 5. Visualize (using first 2 features for simplicity)
[Link]([Link][:, 0], [Link][:, 1], c=X['Cluster'], cmap='viridis', s=50)
[Link]('Sepal length (cm)')
[Link]('Sepal width (cm)')
[Link]('K-Means Clustering on Iris Dataset')
[Link]()