0% found this document useful (0 votes)
18 views1 page

Hypothesis Learning and Clustering Techniques

The document outlines the implementation of the Candidate Elimination Algorithm and the FOIL gain calculation for a dataset related to job offers. It includes steps for training a model using Bagging and Boosting classifiers, as well as KMeans clustering on the Iris dataset. The document also provides code snippets for data processing, hypothesis generation, and model evaluation.

Uploaded by

lavanyagongati12
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views1 page

Hypothesis Learning and Clustering Techniques

The document outlines the implementation of the Candidate Elimination Algorithm and the FOIL gain calculation for a dataset related to job offers. It includes steps for training a model using Bagging and Boosting classifiers, as well as KMeans clustering on the Iris dataset. The document also provides code snippets for data processing, hypothesis generation, and model evaluation.

Uploaded by

lavanyagongati12
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

if target[i] == "no": }

for x in range(len(specific_h)):
import csv
if h[x] != specific_h[x]: gains = []
general_h[x][x] = specific_h[x]
a = []
else: for attr in attributes:
with open('[Link]', 'r') as csvfile:
general_h[x][x] = '?' for val in values[attr]:
for row in [Link](csvfile):
subset = df[df[attr] == val]
[Link](row)
print("\nSteps of Candidate Elimination Algorithm after instance", i + 1) new_pos = len(subset[subset["Job Offer"] == "Yes"])
print("Specific Hypothesis:", specific_h) new_neg = len(subset[subset["Job Offer"] == "No"])
print(a)
print("General Hypothesis:", general_h) gain = foil_gain(total_pos, total_neg, new_pos, new_neg)
print("\nThe total number of training instances are:", len(a) - 1)
[Link]((f"{attr} = {val}", gain, new_pos, new_neg))
# Remove overly general hypotheses
num_attribute = len(a[0]) - 1
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']] # Step 5: Sort by FOIL Gain
print("\nThe initial hypothesis is:")
for i in indices: [Link](key=lambda x: x[1], reverse=True)
hypothesis = ['0'] * num_attribute
general_h.remove(['?', '?', '?', '?', '?', '?'])
print(hypothesis)
# Step 6: Output top rules
return specific_h, general_h print("FOIL Gain and Rule Candidates:\n")
for i in range(1, len(a)):
for rule, gain, pos, neg in gains:
if a[i][num_attribute] == 'yes':
print(f"Rule: IF {rule} THEN Job Offer = Yes | FOIL Gain = {gain:.4f} | Positives = {pos}
for j in range(0, num_attribute):
# Run learning algorithm
if hypothesis[j] == '0' or hypothesis[j] == a[i][j]:
s_final, g_final = learn(concepts, target)
hypothesis[j] = a[i][j]
else: from [Link] import load_breast_cancer
print("\nFinal Specific_h:", s_final, sep="\n")
hypothesis[j] = '?' from sklearn.model_selection import train_test_split
print("\nFinal General_h:", g_final, sep="\n")
print("\nThe hypothesis for the training instance {} is:\n".format(i), hypothesis) from [Link] import DecisionTreeClassifier
from [Link] import BaggingClassifier, AdaBoostClassifier
print("\nThe Maximally specific hypothesis for the training instance is:") from [Link] import accuracy_score, classification_report
print(hypothesis) import pandas as pd
import math # Load dataset
data = load_breast_cancer()
Show hidden output # Step 1: Create DataFrame from the table X = [Link]
data = { y = [Link]
Next steps: Explain error "[Link]": [1, 2, 3, 4, 5],
"CGPA": [">=9", "<8", ">=9", "<8", ">=8"], # Split dataset
"Interactiveness": ["Yes", "Yes", "Yes", "No", "Yes"], X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
import numpy as np "Practical Knowledge": ["Good", "Good", "Average", "Good", "Good"],
import pandas as pd "Job Offer": ["Yes", "Yes", "No", "No", "No"] # ---------------- Bagging ----------------
} bag_model = BaggingClassifier(
# Load dataset estimator=DecisionTreeClassifier(),
data = [Link](data=pd.read_csv('[Link]')) df = [Link](data) n_estimators=50,
random_state=42
# Extract concepts (all columns except last) and target (last column) # Step 2: Helper function )
concepts = [Link]([Link][:, 0:-1]) def foil_gain(pos, neg, new_pos, new_neg): bag_model.fit(X_train, y_train)
print("Concepts:\n", concepts) """Calculate FOIL gain""" y_pred_bag = bag_model.predict(X_test)
if new_pos == 0:
target = [Link]([Link][:, -1]) return 0 # Evaluation - Bagging
print("\nTarget:\n", target) gain = new_pos * ( print("Bagging Accuracy:", accuracy_score(y_test, y_pred_bag))
math.log2(new_pos / (new_pos + new_neg)) - print("\nClassification Report (Bagging):\n", classification_report(y_test, y_pred_bag))
math.log2(pos / (pos + neg))
def learn(concepts, target): ) # ---------------- Boosting ----------------
specific_h = concepts[0].copy() return gain boost_model = AdaBoostClassifier(
print("\nInitialization of specific_h and general_h") estimator=DecisionTreeClassifier(max_depth=1),
print("Specific Hypothesis:", specific_h) # Step 3: Count total positives and negatives n_estimators=50,
total_pos = len(df[df["Job Offer"] == "Yes"]) random_state=42
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))] total_neg = len(df[df["Job Offer"] == "No"]) )
print("General Hypothesis:", general_h) boost_model.fit(X_train, y_train)
# Step 4: Generate candidate literals and calculate FOIL Gain y_pred_boost = boost_model.predict(X_test)
for i, h in enumerate(concepts): attributes = ["CGPA", "Interactiveness", "Practical Knowledge"]
if target[i] == "yes": values = { # Evaluation - Boosting
for x in range(len(specific_h)): "CGPA": df["CGPA"].unique(), print("\nBoosting Accuracy:", accuracy_score(y_test, y_pred_boost))
if h[x] != specific_h[x]: "Interactiveness": df["Interactiveness"].unique(), print("\nClassification Report (Boosting):\n", classification_report(y_test, y_pred_boost))
specific_h[x] = '?' "Practical Knowledge": df["Practical Knowledge"].unique()
general_h[x][x] = '?'

import pandas as pd
import [Link] as plt
from [Link] import load_iris
from [Link] import StandardScaler
from [Link] import KMeans

# 1. Load dataset (Iris without labels to simulate unsupervised)


iris = load_iris()
X = [Link]([Link], columns=iris.feature_names)

print("Dataset Head:")
print([Link]())

# 2. Scale features (important for clustering)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. Apply KMeans clustering


kmeans = KMeans(n_clusters=3, random_state=42) # 3 clusters
clusters = kmeans.fit_predict(X_scaled)

# 4. Add cluster labels to dataframe


X['Cluster'] = clusters
print("\nClustered Data:")
print([Link]())

# 5. Visualize (using first 2 features for simplicity)


[Link]([Link][:, 0], [Link][:, 1], c=X['Cluster'], cmap='viridis', s=50)
[Link]('Sepal length (cm)')
[Link]('Sepal width (cm)')
[Link]('K-Means Clustering on Iris Dataset')
[Link]()

You might also like