DWDM Lab All
DWDM Lab All
1 Standard Scaler
1. Write a Python program to implement Standard Scaler
import numpy as np
import pandas as pd
class StandardNorm:
def scale(self, df):
for i in df.columns:
mean = df[i].mean()
sd = df[i].std()
df[i] = (df[i] - mean) / sd
return df
df = pd.DataFrame(
[[45000, 42], [32000, 26], [58000, 48], [37000, 32]],
columns=["Salary", "Age"]
)
print("Original Data")
print(df)
s = StandardNorm()
df_scaled = s.scale(df)
print("\nScaled Data")
print(df_scaled)
Original Data
Salary Age
0 45000 42
1 32000 26
2 58000 48
3 37000 32
Scaled Data
Salary Age
0 0.176318 0.506803
1 -0.969750 -1.114967
2 1.322386 1.114967
3 -0.528954 -0.506803
class MinMaxNorm:
def scale(self, df):
for c in df.columns:
min = df[c].min()
max = df[c].max()
df[c] = (df[c] - min) / (max - min)
return df
df = pd.DataFrame(
[[45000, 42], [32000, 26], [58000, 48], [37000, 32]],
columns=["Salary", "Age"]
)
print("Original Data")
print(df)
s = MinMaxNorm()
df_scaled = s.scale(df)
print("\nScaled Data")
print(df_scaled)
Original Data
Salary Age
0 45000 42
1 32000 26
2 58000 48
3 37000 32
Scaled Data
Salary Age
0 0.500000 0.727273
1 0.000000 0.000000
2 1.000000 1.000000
3 0.192308 0.272727
km = KMeans(n_clusters=3, init="random")
km.fit(data)
centers = km.cluster_centers_
labels = km.labels_
for i in range(len(data)):
plt.plot(data[i][0], data[i][1], color=colors[labels[i]],
marker=markers[labels[i]])
plt.scatter(centers[:, 0], centers[:, 1], marker="s", s=100,
linewidths=5)
plt.show()
km = KMeans(n_clusters=4, init="k-means++")
km.fit(data)
centers = km.cluster_centers_
labels = km.labels_
for i in range(len(data)):
plt.plot(data[i][0], data[i][1], color=colors[labels[i]],
marker=markers[labels[i]])
plt.scatter(centers[:, 0], centers[:, 1], marker="s", s=100,
linewidths=5)
plt.show()
km = KMeans(n_clusters=5, init="random")
t0 = time.process_time()
km.fit(data)
t1 = time.process_time()
tt = t1 - t0
print("Total Time:", tt)
centers = km.cluster_centers_
labels = km.labels_
for i in range(len(data)):
plt.plot(data[i][0], data[i][1], color=colors[labels[i]],
marker=markers[labels[i]])
plt.scatter(centers[:, 0], centers[:, 1], marker="o", s=50,
linewidths=5)
plt.show()
t0 = time.time()
mbk.fit(data)
t1 = time.time()
tt = t1 - t0
print("Total Time: ", tt)
centers = mbk.cluster_centers_
labels = mbk.labels_
for i in range(len(data)):
plt.plot(data[i][0], data[i][1], color=colors[labels[i]],
marker=markers[labels[i]])
plt.scatter(centers[:, 0], centers[:, 1], marker="o", s=50,
linewidths=5)
plt.show()
iris_data = load_iris()
x = iris_data.data
y = iris_data.target
# print(x[:5])
# print(y[:5])
sc = StandardScaler().fit(x)
sx = sc.transform(x)
km = KMedoids(n_clusters=3)
km.fit(sx)
py = km.fit_predict(sx)
# print("Predicted: ", py)
for i in range(len(sx)):
ax.scatter(sx[i][0], sx[i][1], sx[i][2], color=colors[py[i]],
marker=markers[py[i]])
plt.show()
ri = metrics.rand_score(y, py)
print("Rand Index:", ri)
hs = metrics.homogeneity_score(y, py)
print("Homogeniety Score:", hs)
cs = metrics.completeness_score(y, py)
print("Completeness Score:", cs)
iris_data = load_iris()
x = iris_data.data
y = iris_data.target
# print(x[:5])
# print(y[:5])
sc = StandardScaler().fit(x)
sx = sc.transform(x)
ac = AgglomerativeClustering(n_clusters=3)
ac.fit(sx)
py = ac.fit_predict(sx)
# print("Predicted: ", py)
for i in range(len(sx)):
ax.scatter(sx[i][0], sx[i][1], sx[i][2], color=colors[py[i]],
marker=markers[py[i]])
plt.show()
ri = metrics.rand_score(y, py)
print("Rand Index:", ri)
hs = metrics.homogeneity_score(y, py)
print("Homogeniety Score:", hs)
cs = metrics.completeness_score(y, py)
print("Completeness Score:", cs)
sc = metrics.silhouette_score(sx, py, metric="euclidean")
print("Silhouette Coefficient:", sc)
dataset = pd.read_csv("Diabetes.csv")
p = train["Pragnency"].values
g = train["Glucose"].values
bp = train["Blod Pressure"].values
st = train["Skin Thikness"].values
ins = train["Insulin"].values
bmi = train["BMI"].values
dpf = train["DFP"].values
a = train["Age"].values
d = train["Diabetes"].values
model = GaussianNB()
model.fit(traininput, d)
p = test["Pragnency"].values
g = test["Glucose"].values
bp = test["Blod Pressure"].values
st = test["Skin Thikness"].values
ins = test["Insulin"].values
bmi = test["BMI"].values
dpf = test["DFP"].values
a = test["Age"].values
d = test["Diabetes"].values
predicted = model.predict(testinput)
# print('Actual Class:', *d)
# print('Predicted Class:', *predicted)
print("Confusion Matrix:")
print(metrics.confusion_matrix(d, predicted))
print("\nClassification Measures:")
print("Accuracy:", metrics.accuracy_score(d, predicted))
print("Recall:", metrics.recall_score(d, predicted))
print("Precision:", metrics.precision_score(d, predicted))
print("F1-score:", metrics.f1_score(d, predicted))
Classification Measures:
Accuracy: 0.7662337662337663
Recall: 0.620253164556962
Precision: 0.6712328767123288
F1-score: 0.6447368421052632
dataset = pd.read_csv("Diabetes.csv")
p = train["Pragnency"].values
g = train["Glucose"].values
bp = train["Blod Pressure"].values
st = train["Skin Thikness"].values
ins = train["Insulin"].values
bmi = train["BMI"].values
dpf = train["DFP"].values
a = train["Age"].values
d = train["Diabetes"].values
trainfeatures = zip(p, g, bp, st, ins, bmi, dpf, a)
traininput = list(trainfeatures)
# print(traininput)
p = test["Pragnency"].values
g = test["Glucose"].values
bp = test["Blod Pressure"].values
st = test["Skin Thikness"].values
ins = test["Insulin"].values
bmi = test["BMI"].values
dpf = test["DFP"].values
a = test["Age"].values
d = test["Diabetes"].values
predicted = model.predict(testinput)
# print('Actual Class:', *d)
# print('Predicted Class:', *predicted)
print("Confusion Matrix:")
print(metrics.confusion_matrix(d, predicted))
print("\nClassification Measures:")
print("Accuracy:", metrics.accuracy_score(d, predicted))
print("Recall:", metrics.recall_score(d, predicted))
print("Precision:", metrics.precision_score(d, predicted))
print("F1-score:", metrics.f1_score(d, predicted))
Classification Measures:
Accuracy: 0.7792207792207793
Recall: 0.7848101265822784
Precision: 0.6458333333333334
F1-score: 0.7085714285714286
cancer = datasets.load_breast_cancer()
x = cancer.data
y = cancer.target
print("Length of Data:", len(cancer.data))
print("\nConfusion Matrix:")
print(metrics.confusion_matrix(testy, yp))
print("\nClassification Measures:")
print("Accuracy:", metrics.accuracy_score(testy, yp))
print("Recall:", metrics.recall_score(testy, yp))
print("Precision:", metrics.precision_score(testy, yp))
print("F1-score:", metrics.f1_score(testy, yp))
Confusion Matrix:
[[ 39 0]
[ 9 123]]
Classification Measures:
Accuracy: 0.9473684210526315
Recall: 0.9318181818181818
Precision: 1.0
F1-score: 0.9647058823529412
cancer = datasets.load_breast_cancer()
x = cancer.data
y = cancer.target
pred = []
for x in yp:
pred.append(np.round(x))
pred = np.array(pred)
pred = pred.ravel()
pred = pred.astype(int)
# print("Actual Class: ", *testy)
# print("Predicted Class: ", *yp)
print("\nConfusion Matrix:")
print(metrics.confusion_matrix(testy, pred))
print("\nClassification Measures:")
print("Accuracy:", metrics.accuracy_score(testy, pred))
print("Recall:", metrics.recall_score(testy, pred))
print("Precision:", metrics.precision_score(testy, pred))
print("F1-score:", metrics.f1_score(testy, pred))
Number of features: 30
Number of classes: 2
Class Labels: ['malignant' 'benign']
Confusion Matrix:
[[ 36 3]
[ 2 130]]
Classification Measures:
Accuracy: 0.9707602339181286
Recall: 0.9848484848484849
Precision: 0.9774436090225563
F1-score: 0.981132075471698
dataset = pd.read_csv("iris.csv")
dataset = dataset.values
dataset = shuffle(dataset)
x = dataset[:, 0:4].astype(float)
y = dataset[:, 4]
# Train/Test split
split = int(len(x) * 0.7)
trainx, testx = sx[:split], sx[split:]
trainy, testy = dy[:split], dy[split:]
a = list()
for i in range(len(testy)):
d = np.argmax(testy[i])
a.append(d)
a = np.array(a)
al = encoder.inverse_transform(a)
pl = encoder.inverse_transform(yp)
print(classification_report(al, pl))
accuracy 0.91 45
macro avg 0.92 0.91 0.91 45
weighted avg 0.91 0.91 0.91 45
8. Apriori Algorithm
# !pip install apyori
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from apyori import apriori
records = []
for i in range(0, 7501):
test = []
data = dataset.iloc[i]
data = data.dropna()
for j in range(0, len(data)):
test.append(str(dataset.values[i, j]))
records.append(test)
# print(records)
association_rules = apriori(
records, min_support=0.005, min_confidence=0.2, min_lift=3,
min_length=2
)
association_results = list(association_rules)