ML Priyesha - 778
ML Priyesha - 778
BACHELOR OF TECHNOLOGY
Machine Learning
(203105403)
VII SEMESTER
Computer Science & Engineering
Department
CERTIFICATE
This is to certify that
ENROLLMENT NO:210303105778 1
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
# Generate a binary classification dataset
X, y = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=2,
n_redundant=0, random_state=0)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Initialize and fit the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict on the test set
y_pred = model.predict(X_test)
# Calculate accuracy and display the classification report
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
print('Classification Report:')
print(classification_report(y_test, y_pred))
# Plot the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 100),
np.linspace(X[:, 1].min(), X[:, 1].max(), 100))
Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, levels=[-1, 0, 1], cmap=plt.cm.Blues, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Logistic Regression')
plt.show()
Output:
ENROLLMENT NO:210303105778 2
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:02
//Coding:
import math
class Node:
def init (self):
self.value = None
self.children = {}
def entropy(data):
"""Calculate entropy for a given set of data."""
total_records = len(data)
if total_records == 0:
return 0
positive_count = sum(1 for record in data if record[-1] == 'Yes')
negative_count = total_records - positive_count
if positive_count == 0 or negative_count == 0:
return 0
positive_prob = positive_count / total_records
negative_prob = negative_count / total_records
return -positive_prob * math.log2(positive_prob) - negative_prob *
math.log2(negative_prob)
def information_gain(data, attribute_index):
"""Calculate information gain for a given attribute."""
total_entropy = entropy(data)
attribute_values = set(record[attribute_index] for record in data)
weighted_entropy = 0
for value in attribute_values:
subset = [record for record in data if record[attribute_index] == value]
subset_entropy = entropy(subset)
subset_weight = len(subset) / len(data)
weighted_entropy += subset_weight * subset_entropy
return total_entropy - weighted_entropy
def choose_best_attribute(data, attributes):
"""Choose the best attribute based on information gain."""
gains = [information_gain(data, i) for i in range(len(attributes) - 1)]
ENROLLMENT NO:210303105778 3
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
max_gain = max(gains)
best_attribute_index = gains.index(max_gain)
return best_attribute_index
def build_tree(data, attributes):
"""Build the decision tree using the ID3 algorithm."""
classes = [record[-1] for record in data]
# If all examples belong to the same class, return a leaf node
if len(set(classes)) == 1:
leaf_node = Node()
leaf_node.value = classes[0]
return leaf_node
# If there are no more attributes to split on, return a leaf node with the majority class
if len(attributes) == 1:
majority_class = max(set(classes), key=classes.count)
leaf_node = Node()
leaf_node.value = majority_class
return leaf_node
best_attribute_index = choose_best_attribute(data, attributes)
best_attribute = attributes[best_attribute_index]
tree = Node()
tree.value = best_attribute
# Remove the best attribute from the list of attributes
attributes = [attr for attr in attributes if attr != best_attribute]
attribute_values = set(record[best_attribute_index] for record in data)
for value in attribute_values:
subset = [record for record in data if record[best_attribute_index] == value]
child_node = build_tree(subset, attributes)
tree.children[value] = child_node
return tree
def print_tree(node, indent=''):
"""Print the decision tree."""
if not node.children:
print(indent + 'Class:', node.value)
return
print(indent + 'Attribute:', node.value)
for value, child_node in node.children.items():
print(indent + ' Value', value)
print_tree(child_node, indent + ' ')
# Sample dataset (Weather, Temperature, Humidity, Wind, Play)
data = [
['Sunny', 'Hot', 'High', 'Weak', 'No'],
['Sunny', 'Hot', 'High', 'Strong', 'No'],
['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
ENROLLMENT NO:210303105778 4
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
['Rain', 'Mild', 'High', 'Weak', 'Yes'],
['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
['Rain', 'Cool', 'Normal', 'Strong', 'No'],
['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
['Sunny', 'Mild', 'High', 'Weak', 'No'],
['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
['Rain', 'Mild', 'High', 'Strong', 'No']
]
# Attribute names (excluding the target attribute 'Play')
attributes = ['Weather', 'Temperature', 'Humidity', 'Wind']
# Build the decision tree
root_node = build_tree(data, attributes)
print_tree(root_node)
Output:
ENROLLMENT NO:210303105778 5
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:03
ENROLLMENT NO:210303105778 6
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
# Test the classifier and compute accuracy
accuracy = test_classifier(model, X_test, y_test)
print('Accuracy:', accuracy)
if name == " main ":
main()
Output:
ENROLLMENT NO:210303105778 7
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:04
Aim: Write a program to implement support vector machine in ML.
A Support Vector Machine (SVM) is a powerful and versatile supervised learning
algorithm used for both classification and regression tasks. It's particularly popular for
classification problems.
//Coding:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
# Load the iris dataset (as an example)
iris = datasets.load_iris()
X = iris.data
y = iris.target
# Split the dataset into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Create an SVM instance
clf = svm.SVC(kernel='linear')
# Fit the SVM model according to the given training data
clf.fit(X_train, y_train)
# Predict the labels of the test set
y_pred = clf.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
Output:
ENROLLMENT NO:210303105778 8
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:05
Aim: Write a program to implement the K-Nearest Neighbour algorithm to
classify the iris data set.
Implement the K-Nearest Neighbour algorithm to classify the well-known Iris dataset
based on the similarity of features, providing an effective and intuitive classification
approach.
//Coding:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize the K-Nearest Neighbors classifier
k = 3 # Set the value of k (number of neighbors)
knn = KNeighborsClassifier(n_neighbors=k)
# Fit the model to the training data
knn.fit(X_train, y_train)
# Predict the labels for the test set
y_pred = knn.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
# Display the classification report
print('Classification Report:')
print(classification_report(y_test, y_pred))
Output:
ENROLLMENT NO:210303105778 9
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:06
//Coding:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
# Step 1: Load and Explore the Dataset
iris = load_akshar()
data = pd.DataFrame(iris.data, columns=iris.feature_names)
data['target'] = akshar.target
# Display dataset information
print("Dataset Information:")
print(data.info())
# Display the first few rows of the dataset
print("\nFirst few rows of the dataset:")
print(data.head())
# Display summary statistics
print("\nSummary Statistics:")
print(data.describe())
# Check class distribution
print("\nClass Distribution:")
print(data['target'].value_counts())
# Step 2: Data Preprocessing and Splitting
X = data.drop('target', axis=1)
y = data['target']
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#Step 3: Train and Evaluate Different Algorithms
classifiers = {
'Logistic Regression': LogisticRegression(),
'Decision Tree': DecisionTreeClassifier(),
'Random Forest': RandomForestClassifier(),
'SVM': SVC(),
'KNN': KNeighborsClassifier()
ENROLLMENT NO:210303105778 10
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
}
Output:
ENROLLMENT NO:210303105778 11
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
ENROLLMENT NO:210303105778 12
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
ENROLLMENT NO:210303105778 13
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:07
Aim: : Apply EM algorithm to cluster a set of data stored in a .CSV file. Use
the same data set for clustering using k-Means algorithm
Apply the EM algorithm and k-Means algorithm to cluster a dataset from a .CSV file for
effective pattern recognition and grouping of data points.
//Coding:
import pandas as pd
from sklearn.mixture import GaussianMixture
from sklearn.impute import SimpleImputer
# Load the data from the CSV file with appropriate encoding
data = pd.read_csv('/OnlineRetail.csv', encoding='latin1')
# Assuming 'data.csv' contains relevant numeric features and we need to remove non-numeric
columns
data_numeric = data.select_dtypes(include=[float, int])
# Impute missing values with mean (you can choose other strategies)
imputer = SimpleImputer(strategy='mean')
data_numeric_imputed = imputer.fit_transform(data_numeric)
# Check the first few rows of the numeric data
print("Numeric Data Preview:")
print(data_numeric.head())
# Assume n_clusters is the number of clusters you want to find
n_clusters = 3 # You can change this based on your requirement
# Fit the Gaussian Mixture Model using the numeric data
em_model = GaussianMixture(n_components=n_clusters)
em_model.fit(data_numeric_imputed)
# Get the cluster assignments for each data point
em_labels = em_model.predict(data_numeric_imputed)
# Display the cluster assignments
print("\nEM Algorithm - Cluster Assignments:")
print(em_labels)
Output:
ENROLLMENT NO:210303105778 14
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:08
//Coding:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, DBSCAN
# Generate a sample dataset
X, y = make_blobs(n_samples=300, centers=4, cluster_std=1.0, random_state=42)
# Apply K-Means
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans_labels = kmeans.fit_predict(X)
# Apply DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(X)
# Plot the results
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], c=kmeans_labels, cmap='viridis')
plt.title('K-Means Clustering')
plt.subplot(1, 2, 2)
plt.scatter(X[:, 0], X[:, 1], c=dbscan_labels, cmap='viridis')
plt.title('DBSCAN Clustering')
plt.show()
Output:
ENROLLMENT NO:210303105778 15
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
ENROLLMENT NO:210303105778 16
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:09
Aim: : Write a program to construct a Bayesian network considering
medical data. Use this model to demonstrate the diagnosis of heart patients
using standard Heart Disease Data Set.
Construct a Bayesian network using medical data to facilitate the diagnosis of heart
patients utilizing the Heart Disease Data Set.
//Coding:
mport pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
# Load the Heart Disease dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-
disease/processed.cleveland.data"
names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak",
"slope", "ca", "thal", "target"]
data = pd.read_csv(url, names=names, na_values='?')
# Handling missing values
data.dropna(inplace=True)
# Define the structure of the Bayesian network
model = BayesianNetwork([('age', 'trestbps'), ('age', 'thalach'), ('sex', 'trestbps'), ('sex', 'chol'),
('trestbps', 'target'), ('chol', 'target'), ('thalach', 'target'), ('target', 'restecg')])
# Learning CPDs using Maximum Likelihood Estimation
data_model = MaximumLikelihoodEstimator(model, data)
for node in model.nodes():
cpd = data_model.estimate_cpd(node)
model.add_cpds(cpd)
# Performing inference (diagnosis of heart patients)
inference = VariableElimination(model)
query = inference.query(variables=['target'], evidence={'age': 50, 'sex': 1})
print(query)
# Visualization of the Bayesian network (requires matplotlib and networkx)
# model.draw()
Output:
ENROLLMENT NO:210303105778 17
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
PRACTICAL:10
ENROLLMENT NO:210303105778 18
Faculty of Engineering & Technology
Subject Name: Machine Learning
Subject Code: 203105403
B.Tech CSE Year 4th Semester 7th
Output:
ENROLLMENT NO:210303105778 19