0% found this document useful (0 votes)
3 views

D3 docs

Ok

Uploaded by

bttghlgsj
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

D3 docs

Ok

Uploaded by

bttghlgsj
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

-- Create the books table

CREATE TABLE books (


book_id INT PRIMARY KEY,
title VARCHAR(255) NOT NULL,
edition VARCHAR(50),
author VARCHAR(255)
);

-- Insert data into the books table


INSERT INTO books (book_id, title, edition, author) VALUES (1, 'RAM', '1 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (2, 'sham', '2 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (3, 'pik', '3 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (4, 'ton', '4 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (5, 'mon', '5 EDITION', 'JOHN');

-- SLICE operation: count the books with '1 EDITION'


SELECT COUNT(book_id) AS num_books
FROM books
WHERE edition = '1 EDITION';

-- DICE operation: select books with the title 'pik'


SELECT book_id, title, edition
FROM books
WHERE title = 'pik';

-- ROLL-UP operation: group by edition and count the number of books for each
SELECT edition, COUNT(*) AS num_books
FROM books
GROUP BY edition;

-- DRILL-DOWN operation: select all books with the author 'JOHN'


SELECT *
FROM books
WHERE author = 'JOHN';

PAGE RANK
import numpy as np

def page_rank(n, links, d=0.85, max_iter=100, tol=1e-6):


transition_matrix = np.where(links.sum(axis=0) == 0, 1.0 / n, links / links.sum(axis=0))
ranks = np.ones(n) / n
for _ in range(max_iter):
new_ranks = (1 - d) / n + d * transition_matrix @ ranks
if np.linalg.norm(new_ranks - ranks, 1) < tol:
break
ranks = new_ranks
return ranks

def main():
n = int(input("Enter the number of pages: "))
links = np.array([list(map(int, input(f"Row {i + 1}: ").split())) for i in range(n)])
ranks = page_rank(n, links)
print("\nPageRank Values:")
for i, rank in enumerate(ranks, 1):
print(f"Page {i}: {rank:.6f}")

if __name__ == "__main__":
main()

output: Enter the number of pages: 3 PageRank Values:


Row 1: 0 1 1 Page 1: 0.333333
Row 2: 1 0 1 Page 2: 0.333333
Row 3: 1 1 0 Page 3: 0.333333

//DECISION TREE----------------------------------------------------------------------------------

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

print("Dataset:")
print(df.head())

X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


print(f"\nAccuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
APRIORI ALGO---------------------------------------------------------------------------------
class Transaction:
def __init__(self, items):
self.items = items

class Itemset:
def __init__(self, itemset):
self.itemset = itemset
self.count = 0

def get_user_input():
transactions = []
for _ in range(int(input("Enter the number of transactions: "))):
items = list(map(int, input("Enter items (comma-separated): ").split(',')))
transactions.append(Transaction(items))
return transactions

def count_items(transactions, num_items):


item_counts = [0] * (num_items + 1)
for transaction in transactions:
for item in transaction.items:
item_counts[item] += 1
return item_counts

def generate_candidates(prev_candidates, k):


return [Itemset(prev_candidates[i].itemset + [prev_candidates[j].itemset[k-2]])
for i in range(len(prev_candidates)) for j in range(i + 1, len(prev_candidates))
if prev_candidates[i].itemset[:k-2] == prev_candidates[j].itemset[:k-2]]

def calculate_support(candidates, transactions):


for candidate in candidates:
candidate.count = sum(1 for transaction in transactions if
set(candidate.itemset).issubset(transaction.items))

def prune_candidates(candidates, min_support):


return [candidate for candidate in candidates if candidate.count >= min_support]

def apriori(transactions, min_support):


item_counts = count_items(transactions, max(item for transaction in transactions for item in
transaction.items))
candidates = [Itemset([i]) for i in range(1, len(item_counts)) if item_counts[i] >= min_support]
k=2
while candidates:
calculate_support(candidates, transactions)
candidates = prune_candidates(candidates, min_support)
if candidates:
print(f"\nFrequent Itemsets of size {k}:")
for candidate in candidates:
print(f"{' '.join(map(str, candidate.itemset))} - Support: {candidate.count}")
candidates = generate_candidates(candidates, k)
k += 1

transactions = get_user_input()
min_support = int(input("Enter the minimum support (e.g., 2): "))
apriori(transactions, min_support)

OUTPUT:-------
Enter the number of transactions: 5
Enter items (comma-separated): 1,2
Enter items (comma-separated): 1
Enter items (comma-separated): 2
Enter items (comma-separated): 1,2,3
Enter items (comma-separated): 2,3
Enter the minimum support (e.g., 2): 2

Frequent Itemsets of size 2:


1 - Support: 3 Frequent Itemsets of size 3:
2 - Support: 4 1 2 - Support: 2
3 - Support: 2 2 3 - Support: 2

AGGLOMERATIVE hierarchical clustering--------------------------------------------------

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.cluster.hierarchy import linkage, dendrogram as dendogram

def get_user_input():
n = int(input("Enter the number of points in the dataset: "))
X = []
print("Enter the co-ordinates (x,y) for each point: ")
for i in range(n):
while True:
try:
coords = input(f"Point {i+1}: ").split()
if len(coords) != 2:
raise ValueError("Please enter exactly two values separated by a space: ")
x, y = map(float, coords)
X.append([x, y])
break
except ValueError as e:
print(f"Invalid input: {e}. Please try again")
return np.array(X)

def hierarchical_clustering_with_dendogram(X, method='single'):


Z = linkage(X, method=method)
plt.figure(figsize=(8, 5))
dendogram(Z, labels=[f"Point {i+1}" for i in range(len(X))])
plt.title(f'Dendrogram ({method.capitalize()} Linkage)')
plt.xlabel('Point')
plt.ylabel('Distance')
plt.show()
table = pd.DataFrame(Z, columns=["Cluster 1", "Cluster 2", "Distance", "New Cluster Size"])
table["Cluster 1"] = table["Cluster 1"].astype(int) + 1
table["Cluster 2"] = table["Cluster 2"].astype(int) + 1
print(f"\n{method.capitalize()} Linkage Clustering Merges in Tabular Format")
print(table)

X = get_user_input()
print("Single Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='single')
print("Complete Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='complete')
print("Average Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='average')

Output------------
Enter the number of points in the dataset: 6
Enter the co-ordinates (x,y) for each point:
Point 1: 0.4 0.53
Point 2: 0.22 0.38
Point 3: 0.35 0.32
Point 4: 0.26 0.19
Point 5: 0.08 0.41
Point 6: 0.45 0.30

Single Linkage Clustering:


Single Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size
3 5 0.101980 2.0
2 7 0.143178 3.0
6 8 0.143178 4.0
4 9 0.158114 5.0
1 10 0.215870 6.0

Complete Linkage Clustering:


Complete Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size
3 5 0.101980 2.0
2 6 0.143178 2.0
4 7 0.219545 3.0
1 8 0.341760 3.0
9 10 0.386005 6.0

Average Linkage Clustering:


Average Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size 4 7 0.188829 3.0
3 5 0.101980 2.0 8 9 0.255954 5.0
2 6 0.143178 2.0 1 10 0.279001 6.0
KMEAN ALGO-----------------------------------------------------------------------------------
from sklearn.cluster import KMeans
import numpy as np

# Get user input for data points


def get_user_data():
n = int(input("Enter the number of points: "))
data = []
for i in range(n):
value = float(input(f"Enter value for point {i+1}: "))
data.append([value])
return np.array(data)

data = get_user_data()

# Create and fit the KMeans model


kmeans = KMeans(n_clusters=2, random_state=0).fit(data)

# Retrieve the clusters and print the results


cluster_1 = data[kmeans.labels_ == 0]
cluster_2 = data[kmeans.labels_ == 1]

print("Cluster 1:", *cluster_1.flatten())


print("Cluster 2:", *cluster_2.flatten())

OUTPUT : -------------
Enter the number of points: 9
Enter value for point 1: 2
Enter value for point 2: 4
Enter value for point 3: 10
Enter value for point 4: 12
Enter value for point 5: 3
Enter value for point 6: 20
Enter value for point 7: 30
Enter value for point 8: 11
Enter value for point 9: 25
Cluster 1: 2.0 4.0 10.0 12.0 3.0 11.0
Cluster 2: 20.0 30.0 25.0

You might also like