0% found this document useful (0 votes)

3 views

D3 docs

Uploaded by

bttghlgsj

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views

D3 docs

Uploaded by

bttghlgsj

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 6

-- Create the books table

CREATE TABLE books (

book_id INT PRIMARY KEY,
title VARCHAR(255) NOT NULL,
edition VARCHAR(50),
author VARCHAR(255)
);

-- Insert data into the books table

INSERT INTO books (book_id, title, edition, author) VALUES (1, 'RAM', '1 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (2, 'sham', '2 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (3, 'pik', '3 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (4, 'ton', '4 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (5, 'mon', '5 EDITION', 'JOHN');

-- SLICE operation: count the books with '1 EDITION'

SELECT COUNT(book_id) AS num_books
FROM books
WHERE edition = '1 EDITION';

-- DICE operation: select books with the title 'pik'

SELECT book_id, title, edition
FROM books
WHERE title = 'pik';

-- ROLL-UP operation: group by edition and count the number of books for each
SELECT edition, COUNT(*) AS num_books
FROM books
GROUP BY edition;

-- DRILL-DOWN operation: select all books with the author 'JOHN'

SELECT *
FROM books
WHERE author = 'JOHN';

PAGE RANK
import numpy as np

def page_rank(n, links, d=0.85, max_iter=100, tol=1e-6):

transition_matrix = np.where(links.sum(axis=0) == 0, 1.0 / n, links / links.sum(axis=0))
ranks = np.ones(n) / n
for _ in range(max_iter):
new_ranks = (1 - d) / n + d * transition_matrix @ ranks
if np.linalg.norm(new_ranks - ranks, 1) < tol:
break
ranks = new_ranks
return ranks

def main():
n = int(input("Enter the number of pages: "))
links = np.array([list(map(int, input(f"Row {i + 1}: ").split())) for i in range(n)])
ranks = page_rank(n, links)
print("\nPageRank Values:")
for i, rank in enumerate(ranks, 1):
print(f"Page {i}: {rank:.6f}")

if __name__ == "__main__":
main()

output: Enter the number of pages: 3 PageRank Values:

Row 1: 0 1 1 Page 1: 0.333333
Row 2: 1 0 1 Page 2: 0.333333
Row 3: 1 1 0 Page 3: 0.333333

//DECISION TREE----------------------------------------------------------------------------------

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

print("Dataset:")
print(df.head())

X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"\nAccuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
APRIORI ALGO---------------------------------------------------------------------------------
class Transaction:
def __init__(self, items):
self.items = items

class Itemset:
def __init__(self, itemset):
self.itemset = itemset
self.count = 0

def get_user_input():
transactions = []
for _ in range(int(input("Enter the number of transactions: "))):
items = list(map(int, input("Enter items (comma-separated): ").split(',')))
transactions.append(Transaction(items))
return transactions

def count_items(transactions, num_items):

item_counts = [0] * (num_items + 1)
for transaction in transactions:
for item in transaction.items:
item_counts[item] += 1
return item_counts

def generate_candidates(prev_candidates, k):

return [Itemset(prev_candidates[i].itemset + [prev_candidates[j].itemset[k-2]])
for i in range(len(prev_candidates)) for j in range(i + 1, len(prev_candidates))
if prev_candidates[i].itemset[:k-2] == prev_candidates[j].itemset[:k-2]]

def calculate_support(candidates, transactions):

for candidate in candidates:
candidate.count = sum(1 for transaction in transactions if
set(candidate.itemset).issubset(transaction.items))

def prune_candidates(candidates, min_support):

return [candidate for candidate in candidates if candidate.count >= min_support]

def apriori(transactions, min_support):

item_counts = count_items(transactions, max(item for transaction in transactions for item in
transaction.items))
candidates = [Itemset([i]) for i in range(1, len(item_counts)) if item_counts[i] >= min_support]
k=2
while candidates:
calculate_support(candidates, transactions)
candidates = prune_candidates(candidates, min_support)
if candidates:
print(f"\nFrequent Itemsets of size {k}:")
for candidate in candidates:
print(f"{' '.join(map(str, candidate.itemset))} - Support: {candidate.count}")
candidates = generate_candidates(candidates, k)
k += 1

transactions = get_user_input()
min_support = int(input("Enter the minimum support (e.g., 2): "))
apriori(transactions, min_support)

OUTPUT:-------
Enter the number of transactions: 5
Enter items (comma-separated): 1,2
Enter items (comma-separated): 1
Enter items (comma-separated): 2
Enter items (comma-separated): 1,2,3
Enter items (comma-separated): 2,3
Enter the minimum support (e.g., 2): 2

Frequent Itemsets of size 2:

1 - Support: 3 Frequent Itemsets of size 3:
2 - Support: 4 1 2 - Support: 2
3 - Support: 2 2 3 - Support: 2

AGGLOMERATIVE hierarchical clustering--------------------------------------------------

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.cluster.hierarchy import linkage, dendrogram as dendogram

def get_user_input():
n = int(input("Enter the number of points in the dataset: "))
X = []
print("Enter the co-ordinates (x,y) for each point: ")
for i in range(n):
while True:
try:
coords = input(f"Point {i+1}: ").split()
if len(coords) != 2:
raise ValueError("Please enter exactly two values separated by a space: ")
x, y = map(float, coords)
X.append([x, y])
break
except ValueError as e:
print(f"Invalid input: {e}. Please try again")
return np.array(X)

def hierarchical_clustering_with_dendogram(X, method='single'):

Z = linkage(X, method=method)
plt.figure(figsize=(8, 5))
dendogram(Z, labels=[f"Point {i+1}" for i in range(len(X))])
plt.title(f'Dendrogram ({method.capitalize()} Linkage)')
plt.xlabel('Point')
plt.ylabel('Distance')
plt.show()
table = pd.DataFrame(Z, columns=["Cluster 1", "Cluster 2", "Distance", "New Cluster Size"])
table["Cluster 1"] = table["Cluster 1"].astype(int) + 1
table["Cluster 2"] = table["Cluster 2"].astype(int) + 1
print(f"\n{method.capitalize()} Linkage Clustering Merges in Tabular Format")
print(table)

X = get_user_input()
print("Single Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='single')
print("Complete Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='complete')
print("Average Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='average')

Output------------
Enter the number of points in the dataset: 6
Enter the co-ordinates (x,y) for each point:
Point 1: 0.4 0.53
Point 2: 0.22 0.38
Point 3: 0.35 0.32
Point 4: 0.26 0.19
Point 5: 0.08 0.41
Point 6: 0.45 0.30

Single Linkage Clustering:

Single Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size
3 5 0.101980 2.0
2 7 0.143178 3.0
6 8 0.143178 4.0
4 9 0.158114 5.0
1 10 0.215870 6.0

Complete Linkage Clustering:

Complete Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size
3 5 0.101980 2.0
2 6 0.143178 2.0
4 7 0.219545 3.0
1 8 0.341760 3.0
9 10 0.386005 6.0

Average Linkage Clustering:

Average Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size 4 7 0.188829 3.0
3 5 0.101980 2.0 8 9 0.255954 5.0
2 6 0.143178 2.0 1 10 0.279001 6.0
KMEAN ALGO-----------------------------------------------------------------------------------
from sklearn.cluster import KMeans
import numpy as np

# Get user input for data points

def get_user_data():
n = int(input("Enter the number of points: "))
data = []
for i in range(n):
value = float(input(f"Enter value for point {i+1}: "))
data.append([value])
return np.array(data)

data = get_user_data()

# Create and fit the KMeans model

kmeans = KMeans(n_clusters=2, random_state=0).fit(data)

# Retrieve the clusters and print the results

cluster_1 = data[kmeans.labels_ == 0]
cluster_2 = data[kmeans.labels_ == 1]

print("Cluster 1:", *cluster_1.flatten())

print("Cluster 2:", *cluster_2.flatten())

OUTPUT : -------------
Enter the number of points: 9
Enter value for point 1: 2
Enter value for point 2: 4
Enter value for point 3: 10
Enter value for point 4: 12
Enter value for point 5: 3
Enter value for point 6: 20
Enter value for point 7: 30
Enter value for point 8: 11
Enter value for point 9: 25
Cluster 1: 2.0 4.0 10.0 12.0 3.0 11.0
Cluster 2: 20.0 30.0 25.0

Wiley - Data Mining For Business Analytics - Concepts, Techniques and Applications in Python - 978-1-119-54984-0
0% (1)
Wiley - Data Mining For Business Analytics - Concepts, Techniques and Applications in Python - 978-1-119-54984-0
3 pages
1.1 Read The Data and Do Exploratory Data Analysis. Describe The Data Briefly
100% (19)
1.1 Read The Data and Do Exploratory Data Analysis. Describe The Data Briefly
50 pages
CH 01 Introduction
No ratings yet
CH 01 Introduction
21 pages
TOO
No ratings yet
TOO
7 pages
Prac7 8 9 10
No ratings yet
Prac7 8 9 10
12 pages
DATA MINING EX1
No ratings yet
DATA MINING EX1
10 pages
DWM Practical
No ratings yet
DWM Practical
12 pages
Practical 5
No ratings yet
Practical 5
6 pages
23CC554
No ratings yet
23CC554
10 pages
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
No ratings yet
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
28 pages
DM Lab Internal
No ratings yet
DM Lab Internal
37 pages
7 output
No ratings yet
7 output
4 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
6
No ratings yet
6
4 pages
DOC-20241108-WA0003
No ratings yet
DOC-20241108-WA0003
16 pages
assg 3
No ratings yet
assg 3
31 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
DWDM Lab All
No ratings yet
DWDM Lab All
20 pages
Data Mining Assignment No. 1
No ratings yet
Data Mining Assignment No. 1
22 pages
Python Course Cheat Sheet
No ratings yet
Python Course Cheat Sheet
30 pages
DMT Cia2
No ratings yet
DMT Cia2
11 pages
Data Mining - Project
100% (2)
Data Mining - Project
11 pages
Clustering
No ratings yet
Clustering
1 page
Data Mining Lab Manual
No ratings yet
Data Mining Lab Manual
7 pages
Python DM Lab Manual Part 2
No ratings yet
Python DM Lab Manual Part 2
8 pages
Mlext
No ratings yet
Mlext
1 page
ML assignment
No ratings yet
ML assignment
11 pages
ML Exp5 C36
No ratings yet
ML Exp5 C36
18 pages
Final ML File
No ratings yet
Final ML File
34 pages
PRAC9_23BME053
No ratings yet
PRAC9_23BME053
4 pages
MLLabManual
No ratings yet
MLLabManual
24 pages
Project Data Mining (AMAN YADAV)
No ratings yet
Project Data Mining (AMAN YADAV)
12 pages
Week 6 (PCA, SVD, LDA)
No ratings yet
Week 6 (PCA, SVD, LDA)
14 pages
AIML_LAB
No ratings yet
AIML_LAB
37 pages
5
No ratings yet
5
2 pages
Clustering Documentation Python Code
No ratings yet
Clustering Documentation Python Code
8 pages
DWDM Lab Report
No ratings yet
DWDM Lab Report
26 pages
Week 8 DS Practical (1)
No ratings yet
Week 8 DS Practical (1)
13 pages
Advanced Database
No ratings yet
Advanced Database
23 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
33 pages
Lab
No ratings yet
Lab
13 pages
Shiva Teja
No ratings yet
Shiva Teja
19 pages
K Means Algorithm
No ratings yet
K Means Algorithm
6 pages
Ai ML Programs
No ratings yet
Ai ML Programs
34 pages
Presentation 1
No ratings yet
Presentation 1
2 pages
Reading Data: #Importing Required Libraries
No ratings yet
Reading Data: #Importing Required Libraries
16 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
MS6711 Data Mining Homework 1: 1.1 Implement K-Means Manually (8 PTS)
No ratings yet
MS6711 Data Mining Homework 1: 1.1 Implement K-Means Manually (8 PTS)
6 pages
ml labs
No ratings yet
ml labs
14 pages
ML Journal
No ratings yet
ML Journal
58 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
Experiment1111
No ratings yet
Experiment1111
25 pages
Slip Clustering
No ratings yet
Slip Clustering
2 pages
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Working With Data
No ratings yet
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Working With Data
7 pages
DATASCIENCE_INTERNSHIP[1]
No ratings yet
DATASCIENCE_INTERNSHIP[1]
43 pages
K++
No ratings yet
K++
5 pages
05 E RandomForest LoanData
No ratings yet
05 E RandomForest LoanData
8 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
43 pages
ML Lab
No ratings yet
ML Lab
7 pages
ML Minors Exp7
No ratings yet
ML Minors Exp7
6 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
(Cybernetics and Information Technologies) Predicting Student Performance by Using Data Mining Methods For Classification
No ratings yet
(Cybernetics and Information Technologies) Predicting Student Performance by Using Data Mining Methods For Classification
12 pages
1.1 Overview: Data Mining Based Risk Estimation of Road Accidents
No ratings yet
1.1 Overview: Data Mining Based Risk Estimation of Road Accidents
61 pages
HEART DISEASE PREDICTION REPORT Op Edited
No ratings yet
HEART DISEASE PREDICTION REPORT Op Edited
29 pages
Data Mining in Banking and Its Applications - A Rev
No ratings yet
Data Mining in Banking and Its Applications - A Rev
9 pages
3 Big-Data
No ratings yet
3 Big-Data
14 pages
Chapter Four
No ratings yet
Chapter Four
10 pages
Information Technology
No ratings yet
Information Technology
23 pages
Case Study On Text Mining
No ratings yet
Case Study On Text Mining
8 pages
Project Report
No ratings yet
Project Report
7 pages
1 Pengenalan Penambangan Data-IMD
No ratings yet
1 Pengenalan Penambangan Data-IMD
34 pages
(Download PDF) Data Science and Analytics With Python 1St Edition Jesus Rogel Salazar Online Ebook All Chapter PDF
100% (16)
(Download PDF) Data Science and Analytics With Python 1St Edition Jesus Rogel Salazar Online Ebook All Chapter PDF
42 pages
NTU Master of Science (Information System) Curriculum
No ratings yet
NTU Master of Science (Information System) Curriculum
4 pages
MSC Hanif e 2019
No ratings yet
MSC Hanif e 2019
97 pages
Introduction To Course: Advanced Data Mining
No ratings yet
Introduction To Course: Advanced Data Mining
7 pages
Datasist: A Python-Based Library For Easy Data Analysis, Visualization and Modeling
No ratings yet
Datasist: A Python-Based Library For Easy Data Analysis, Visualization and Modeling
17 pages
Student Performance Prediction by Using Data Mining Classification Algorithms
No ratings yet
Student Performance Prediction by Using Data Mining Classification Algorithms
6 pages
Evaluation of Customer Ratings On Restaurant by Clustering Techniques Using R
No ratings yet
Evaluation of Customer Ratings On Restaurant by Clustering Techniques Using R
8 pages
A Comparative Analysis of Predictive Modeling, Data Mining, and Machine Learning
No ratings yet
A Comparative Analysis of Predictive Modeling, Data Mining, and Machine Learning
11 pages
A.I Seminal
No ratings yet
A.I Seminal
27 pages
Machine Learning File
No ratings yet
Machine Learning File
7 pages
Hierarchical clustering
No ratings yet
Hierarchical clustering
23 pages
Predictions in Heart Disease Using Techniques of Data Mining
No ratings yet
Predictions in Heart Disease Using Techniques of Data Mining
6 pages
Lec 1 Data Mining Introduction For Exam
No ratings yet
Lec 1 Data Mining Introduction For Exam
48 pages
AAM Summer 2024 Question Paper
No ratings yet
AAM Summer 2024 Question Paper
4 pages
Quiz 2 - Dimensionality reduction_ Machine Learning 3 - Ravi
No ratings yet
Quiz 2 - Dimensionality reduction_ Machine Learning 3 - Ravi
5 pages
Data Mining - Ensemble Methods
No ratings yet
Data Mining - Ensemble Methods
12 pages
Lecture-2 the Building Blocks
No ratings yet
Lecture-2 the Building Blocks
36 pages
Anatella Quick Guide
No ratings yet
Anatella Quick Guide
159 pages

D3 docs

Uploaded by

D3 docs

Uploaded by

-- Create the books table

CREATE TABLE books (

-- Insert data into the books table

-- SLICE operation: count the books with '1 EDITION'

-- DICE operation: select books with the title 'pik'

-- DRILL-DOWN operation: select all books with the author 'JOHN'

def page_rank(n, links, d=0.85, max_iter=100, tol=1e-6):

output: Enter the number of pages: 3 PageRank Values:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

accuracy = accuracy_score(y_test, y_pred)

def count_items(transactions, num_items):

def generate_candidates(prev_candidates, k):

def calculate_support(candidates, transactions):

def prune_candidates(candidates, min_support):

def apriori(transactions, min_support):

Frequent Itemsets of size 2:

AGGLOMERATIVE hierarchical clustering--------------------------------------------------

def hierarchical_clustering_with_dendogram(X, method='single'):

Single Linkage Clustering:

Complete Linkage Clustering:

Average Linkage Clustering:

# Get user input for data points

# Create and fit the KMeans model

# Retrieve the clusters and print the results

print("Cluster 1:", *cluster_1.flatten())

You might also like