0% found this document useful (0 votes)

11 views26 pages

Wa0009.

The document contains multiple Python programs focused on various topics, including word count, weather data analysis, support vector machines, decision trees, data visualization, and bioinformatics tasks such as fetching FASTA sequences and searching for articles related to diseases. Each program demonstrates specific functionalities, such as reading files, performing machine learning tasks, and retrieving data from online databases. The overall theme revolves around data analysis and bioinformatics applications using Python.

Uploaded by

BCA NIRMALA

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

11 views26 pages

Wa0009.

Uploaded by

BCA NIRMALA

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 26

WORD COUNT/FREQUENCY

PROGRAM :
import string
def word_count(file_path):
word_counts = {}
try:
with open(file_path, 'r') as file:
text = file.read()
text = text.translate(str.maketrans('', '', string.punctuation)).lower()
words = text.split()
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
return word_counts
except FileNotFoundError:
print(f"Error: The file at {file_path} was not found.")
return None
file_path = 'example.txt'
word_counts = word_count(file_path)
if word_counts is not None:
for word, count in word_counts.items():
print(f"{word}: {count}")
OUTPUT :
WEATHER DATASET

PROGRAM :

# Step 1: Import necessary libraries

import pandas as pd
# Step 2: Define the file path for the weather dataset
file_path = 'weather_data.csv' # Replace with the actual path to your CSV file
# Step 3: Read the CSV file into a DataFrame
# The read_csv function loads the CSV data into a structured format (pandas DataFrame)
df = pd.read_csv(file_path)
# Step 4: Display the first 15 rows from the dataset
# We use the iloc method, where 0:15 means rows from index 0 to index 14 (1-15 in human-
readable format)
print("Displaying lines 1 to 15 from the dataset:\n")
print(df.iloc[0:15]) # Prints rows 0 to 14 (lines 1-15)
# Step 5: Optionally, display some basic information about the dataset
print("\nBasic Info about the dataset:")
print(df.info()) # Displays the structure of the dataset, column types, and non-null counts
# Step 6: Check if there are any missing values in the dataset
print("\nChecking for missing values in the dataset:")
print(df.isnull().sum()) # Shows the count of missing values per column
# Step 7: Optional - Show a summary of statistics for numerical columns
print("\nSummary Statistics for numerical columns:")
print(df.describe()) # Provides summary statistics (e.g., mean, std, min, max) for numerical
data
OUTPUT :
SUPPORT VECTOR MACHINE

PROGRAM :
import matplotlib.pyplot as plt
from sklearn import datasets, svm
from sklearn.inspection import DecisionBoundaryDisplay
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target
C=1
models = (
svm.SVC(kernel="linear", C=C),
svm.LinearSVC(C=C, max_iter=10000),
svm.SVC(kernel="rbf", gamma=0.7, C=C),
svm.SVC(kernel="poly", degree=3, gamma="auto", C=C),
)
models = (clf.fit(X, y) for clf in models)
titles = (
"SVC with linear kernel",
"LinearSVC (linear kernel)",
"SVC with RBF kernel",
"SVC with polynomial (degree 3) kernel",
)
fig, sub = plt.subplots(2, 2)
plt.subplots_adjust(wspace=0.4, hspace=0.4)
X0, X1 = X[:, 0], X[:, 1]
for clf, title, ax in zip(models, titles, sub.flatten()):
disp = DecisionBoundaryDisplay.from_estimator(
clf,
X,
response_method="predict",
cmap=plt.cm.coolwarm,
alpha=0.8,
ax=ax,
xlabel=iris.feature_names[0],
ylabel=iris.feature_names[1],
)
ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)

plt.show()
OUTPUT :
DECISION TREE

PROGRAM :
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = load_iris()
X = data.data # Features
y = data.target # Labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
clf = DecisionTreeClassifier(
criterion='entropy',
max_depth=5,
random_state=42
)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 8))
plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names,
filled=True)
plt.show()
OUTPUT :
VISUALIZATION

PROGRAM :
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import pandas as pd
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['species'] = data.target
df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
sns.pairplot(df, hue='species', diag_kind='kde', corner=True)
plt.show()
plt.figure(figsize=(8, 6))
sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()
plt.figure(figsize=(10, 6))
sns.boxplot(data=df.iloc[:, :-1], orient='h', palette='Set2')
plt.title("Boxplot of Features")
plt.show()
plt.figure(figsize=(8, 6))
sns.scatterplot(
data=df,
x='sepal length (cm)',
y='petal length (cm)',
hue='species',
style='species',
palette='deep',
s=100
)
plt.title("Scatterplot of Sepal Length vs Petal Length")
plt.show()
OUTPUT :
FASTA SEQUENCE OF GABA TRANSAMINASE

PROGRAM :
import requests
def fetch_fasta_sequence(uniprot_id):
url = f'https://www.uniprot.org/uniprot/{uniprot_id}.fasta'
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return f"Failed to retrieve sequence for {uniprot_id}"
uniprot_ids = {
'human': 'P49815'
'mouse': 'P28752'
'pig': 'P40938'
'chick': 'Q09293',
}
for organism, uniprot_id in uniprot_ids.items():
print(f"FASTA sequence for GABA transaminase in {organism.capitalize()}:\n")
print(fetch_fasta_sequence(uniprot_id))
print("\n" + "-"*80 + "\n")
OUTPUT :
SWISSPORT FOR SERINE KINASE IN PIG

PROGRAM :

import requests

def get_serine_kinase_pig_count():

url = "https://rest.uniprot.org/uniprotkb/search"

params = {

'query': 'serine kinase',

'organism': '9823',

'format': 'json',

'fields': 'accession'

response = requests.get(url, params=params)

if response.status_code == 200:

data = response.json()

return len(data['results'])

else:

print(f"Error: Unable to fetch data (status code: {response.status_code})")

return 0

entries_count = get_serine_kinase_pig_count()

print(f"Number of entries for Serine kinase in PIG: {entries_count}")

OUTPUT :
GENE SEQUENCE

PROGRAM :
from Bio import Entrez, SeqIO

from Bio.Blast import NCBIWWW, NCBIXML

def fetch_sequence(seq_id):

Entrez.email = "[email protected]

handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="gb", retmode="text")

record = SeqIO.read(handle, "genbank")

handle.close()

return record.seq

def perform_blast(query_sequence):

result_handle = NCBIWWW.qblast("blastn", "nt", query_sequence)

return result_handle

def parse_blast_results(blast_handle):

blast_records = NCBIXML.parse(blast_handle)

for blast_record in blast_records:

print(f"Query: {blast_record.query_id}")

print(f"Query Length: {blast_record.query_length}")

for alignment in blast_record.alignments:

print(f"\n Alignment ")

print(f"Sequence: {alignment.title}")

print(f"Length: {alignment.length}")

print(f"E-value: {alignment.hsps[0].evalue}")

print(f"Similarity: {alignment.hsps[0].identities} out of {alignment.hsps[0].align_length}")

print(f"Alignment: {alignment.hsps[0].align_length}")

print(f"Seq: {alignment.hsps[0].s}")

print("-" * 80)

def main():
seq_id = "U80226.1"

sequence = fetch_sequence(seq_id)

print(f"Fetched sequence for {seq_id}:")

print(sequence[:100])

print("\nPerforming BLAST search...")

blast_handle = perform_blast(sequence)

print("\nParsing BLAST results...")

parse_blast_results(blast_handle)

if __name__ == "__main__":

main()
OUTPUT :
PPE PROTEIN

PROGRAM :

from Bio import Entrez

def search_ppe_diseases():

Entrez.email = "[email protected]

term = "PPE protein disease"

handle = Entrez.esearch(db="pubmed", term=term, retmax=10

record = Entrez.read(handle)

handle.close()

id_list = record["IdList"]

if id_list:

print(f"Found {len(id_list)} articles related to PPE protein and diseases:")

for pmid in id_list:

handle = Entrez.esummary(db="pubmed", id=pmid)

summary = Entrez.read(handle)

handle.close()

for article in summary:

title = article.get("Title", "No Title Available")

source = article.get("Source", "No Source Available")

print(f"Title: {title}")

print(f"Source: {source}")

print("-" * 60)
else:

print("No articles found for PPE protein and disease.")

search_ppe_diseases()
OUTPUT :
BIOINFORMATICS
DATA ANALYTICS

OASIS 3databasedownloadtutorial PDF
No ratings yet
OASIS 3databasedownloadtutorial PDF
16 pages
Marketo Questions
No ratings yet
Marketo Questions
61 pages
Introduction To Qualtrics Workshop Slides
No ratings yet
Introduction To Qualtrics Workshop Slides
12 pages
Pattern Recognition Lab
No ratings yet
Pattern Recognition Lab
24 pages
ML LabReport Final Index Edited
No ratings yet
ML LabReport Final Index Edited
35 pages
Da Programs
No ratings yet
Da Programs
10 pages
Assignment 3
No ratings yet
Assignment 3
15 pages
Da Program
No ratings yet
Da Program
18 pages
ML Record
No ratings yet
ML Record
19 pages
Pattern Recognition
No ratings yet
Pattern Recognition
26 pages
Dav Lab Manual
No ratings yet
Dav Lab Manual
28 pages
Machine Learning Lab Record: Dr. Sarika Hegde
No ratings yet
Machine Learning Lab Record: Dr. Sarika Hegde
23 pages
ML 7
No ratings yet
ML 7
6 pages
ML Yogesh
No ratings yet
ML Yogesh
23 pages
EX - NO:3: Algorithm
No ratings yet
EX - NO:3: Algorithm
11 pages
Machine File
No ratings yet
Machine File
27 pages
Advance Machine Learning
No ratings yet
Advance Machine Learning
28 pages
1
No ratings yet
1
13 pages
Cse Machine Learning Lab Manual
No ratings yet
Cse Machine Learning Lab Manual
22 pages
ML File Syllabus
No ratings yet
ML File Syllabus
43 pages
Exercise and Experiment 3
No ratings yet
Exercise and Experiment 3
14 pages
Question Diagonsis - Py
No ratings yet
Question Diagonsis - Py
8 pages
Machine Learning Lab (17CSL76)
No ratings yet
Machine Learning Lab (17CSL76)
48 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
43 pages
Machine Learning Lab Dlihebca6sem
100% (1)
Machine Learning Lab Dlihebca6sem
25 pages
Ai ML Programs
No ratings yet
Ai ML Programs
34 pages
ML LabManual
No ratings yet
ML LabManual
16 pages
ML Lab Programs For Exam
No ratings yet
ML Lab Programs For Exam
10 pages
MACHINE LEARNING Manual
No ratings yet
MACHINE LEARNING Manual
36 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Lab Manual
No ratings yet
Lab Manual
32 pages
Ai Tools and Applications-Lab
No ratings yet
Ai Tools and Applications-Lab
33 pages
Karmbir 19 ML
No ratings yet
Karmbir 19 ML
20 pages
ML Lab File Batch 1
No ratings yet
ML Lab File Batch 1
20 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
Machine Learning Laboratory (21AIL66)
No ratings yet
Machine Learning Laboratory (21AIL66)
7 pages
ML Functions
No ratings yet
ML Functions
12 pages
ML Lab - Abbs
No ratings yet
ML Lab - Abbs
23 pages
Aiml
No ratings yet
Aiml
85 pages
ML Lab
No ratings yet
ML Lab
46 pages
1st PGM
No ratings yet
1st PGM
10 pages
ML Journal
No ratings yet
ML Journal
37 pages
AI&ML PGM
No ratings yet
AI&ML PGM
53 pages
15CSL76 Students
No ratings yet
15CSL76 Students
18 pages
Ashwin Report
No ratings yet
Ashwin Report
18 pages
cs3362 Foundations of Data Science Lab Manual
No ratings yet
cs3362 Foundations of Data Science Lab Manual
53 pages
Practical No - 1
No ratings yet
Practical No - 1
5 pages
Mlpy 2
No ratings yet
Mlpy 2
18 pages
Naive
No ratings yet
Naive
5 pages
ML Lab
No ratings yet
ML Lab
14 pages
To Study About Numpy, Pandas and Matplotlib Libraries in Python
No ratings yet
To Study About Numpy, Pandas and Matplotlib Libraries in Python
21 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
KRAI LabManual
No ratings yet
KRAI LabManual
77 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
Sample Worksheet 1
No ratings yet
Sample Worksheet 1
8 pages
DS Manual
No ratings yet
DS Manual
34 pages
PR Final File
No ratings yet
PR Final File
70 pages
ML File
No ratings yet
ML File
13 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
33 pages
ML Lab
No ratings yet
ML Lab
7 pages
COMP5318
No ratings yet
COMP5318
42 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Modul 3 Data Science
No ratings yet
Modul 3 Data Science
10 pages
Cambridge Igcse Third Edition
No ratings yet
Cambridge Igcse Third Edition
78 pages
Report 20221216-1
No ratings yet
Report 20221216-1
50 pages
Export To Excel PDF CSV and XML Using Display Tag
No ratings yet
Export To Excel PDF CSV and XML Using Display Tag
2 pages
Unit 4 Pandas
No ratings yet
Unit 4 Pandas
8 pages
K1000 Asset Management Guide v5.3
No ratings yet
K1000 Asset Management Guide v5.3
32 pages
Creating Flight Plan and User Waypoint Files For The Flight1 GTN
No ratings yet
Creating Flight Plan and User Waypoint Files For The Flight1 GTN
6 pages
Weka Tool
No ratings yet
Weka Tool
9 pages
Quantitative Image Feature Engine (QIFE) : An Open-Source, Modular Engine For 3D Quantitative Feature Extraction From Volumetric Medical Images
No ratings yet
Quantitative Image Feature Engine (QIFE) : An Open-Source, Modular Engine For 3D Quantitative Feature Extraction From Volumetric Medical Images
12 pages
Report Specification Guide: Section 1 - Request
No ratings yet
Report Specification Guide: Section 1 - Request
6 pages
Salesforce Certified Platform Foundations Exam Valid Questions
No ratings yet
Salesforce Certified Platform Foundations Exam Valid Questions
5 pages
Xii PB1 QP CS
No ratings yet
Xii PB1 QP CS
14 pages
XII IP CH 4 Importing Exporting
No ratings yet
XII IP CH 4 Importing Exporting
14 pages
Project FINAL Presentation
No ratings yet
Project FINAL Presentation
40 pages
External Tables
No ratings yet
External Tables
105 pages
Azure Data Analytics
No ratings yet
Azure Data Analytics
269 pages
Important Questions of CSV File in Python
75% (4)
Important Questions of CSV File in Python
9 pages
Python Programming Pandas Across Examples
No ratings yet
Python Programming Pandas Across Examples
350 pages
News
No ratings yet
News
167 pages
GCP Snowflake
No ratings yet
GCP Snowflake
83 pages
AllCheatSheets Stata v15
100% (1)
AllCheatSheets Stata v15
6 pages
Informatics Practices Project - 221228 - 132356
No ratings yet
Informatics Practices Project - 221228 - 132356
30 pages
CMS LLDesign
No ratings yet
CMS LLDesign
7 pages
VibroSight Release Notes
No ratings yet
VibroSight Release Notes
50 pages
Data Science Training Report: Heart Disease Prediction Project
No ratings yet
Data Science Training Report: Heart Disease Prediction Project
13 pages
Csvkit Manual
No ratings yet
Csvkit Manual
53 pages

Wa0009.

Uploaded by

Wa0009.

Uploaded by

WORD COUNT/FREQUENCY

# Step 1: Import necessary libraries

'query': 'serine kinase',

response = requests.get(url, params=params)

print(f"Error: Unable to fetch data (status code: {response.status_code})")

print(f"Number of entries for Serine kinase in PIG: {entries_count}")

from Bio.Blast import NCBIWWW, NCBIXML

handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="gb", retmode="text")

record = SeqIO.read(handle, "genbank")

result_handle = NCBIWWW.qblast("blastn", "nt", query_sequence)

for blast_record in blast_records:

print(f"Query Length: {blast_record.query_length}")

for alignment in blast_record.alignments:

print(f"\n** Alignment **")

print(f"Similarity: {alignment.hsps[0].identities} out of {alignment.hsps[0].align_length}")

print(f"Fetched sequence for {seq_id}:")

print("\nPerforming BLAST search...")

print("\nParsing BLAST results...")

from Bio import Entrez

term = "PPE protein disease"

handle = Entrez.esearch(db="pubmed", term=term, retmax=10

print(f"Found {len(id_list)} articles related to PPE protein and diseases:")

for pmid in id_list:

handle = Entrez.esummary(db="pubmed", id=pmid)

for article in summary:

title = article.get("Title", "No Title Available")

source = article.get("Source", "No Source Available")

print("No articles found for PPE protein and disease.")

You might also like

print(f"\n Alignment ")