WORD COUNT/FREQUENCY
PROGRAM :
import string
def word_count(file_path):
word_counts = {}
try:
with open(file_path, 'r') as file:
text = file.read()
text = text.translate(str.maketrans('', '', string.punctuation)).lower()
words = text.split()
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
return word_counts
except FileNotFoundError:
print(f"Error: The file at {file_path} was not found.")
return None
file_path = 'example.txt'
word_counts = word_count(file_path)
if word_counts is not None:
for word, count in word_counts.items():
print(f"{word}: {count}")
OUTPUT :
WEATHER DATASET
PROGRAM :
# Step 1: Import necessary libraries
import pandas as pd
# Step 2: Define the file path for the weather dataset
file_path = 'weather_data.csv' # Replace with the actual path to your CSV file
# Step 3: Read the CSV file into a DataFrame
# The read_csv function loads the CSV data into a structured format (pandas DataFrame)
df = pd.read_csv(file_path)
# Step 4: Display the first 15 rows from the dataset
# We use the iloc method, where 0:15 means rows from index 0 to index 14 (1-15 in human-
readable format)
print("Displaying lines 1 to 15 from the dataset:\n")
print(df.iloc[0:15]) # Prints rows 0 to 14 (lines 1-15)
# Step 5: Optionally, display some basic information about the dataset
print("\nBasic Info about the dataset:")
print(df.info()) # Displays the structure of the dataset, column types, and non-null counts
# Step 6: Check if there are any missing values in the dataset
print("\nChecking for missing values in the dataset:")
print(df.isnull().sum()) # Shows the count of missing values per column
# Step 7: Optional - Show a summary of statistics for numerical columns
print("\nSummary Statistics for numerical columns:")
print(df.describe()) # Provides summary statistics (e.g., mean, std, min, max) for numerical
data
OUTPUT :
SUPPORT VECTOR MACHINE
PROGRAM :
import matplotlib.pyplot as plt
from sklearn import datasets, svm
from sklearn.inspection import DecisionBoundaryDisplay
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target
C=1
models = (
svm.SVC(kernel="linear", C=C),
svm.LinearSVC(C=C, max_iter=10000),
svm.SVC(kernel="rbf", gamma=0.7, C=C),
svm.SVC(kernel="poly", degree=3, gamma="auto", C=C),
)
models = (clf.fit(X, y) for clf in models)
titles = (
"SVC with linear kernel",
"LinearSVC (linear kernel)",
"SVC with RBF kernel",
"SVC with polynomial (degree 3) kernel",
)
fig, sub = plt.subplots(2, 2)
plt.subplots_adjust(wspace=0.4, hspace=0.4)
X0, X1 = X[:, 0], X[:, 1]
for clf, title, ax in zip(models, titles, sub.flatten()):
disp = DecisionBoundaryDisplay.from_estimator(
clf,
X,
response_method="predict",
cmap=plt.cm.coolwarm,
alpha=0.8,
ax=ax,
xlabel=iris.feature_names[0],
ylabel=iris.feature_names[1],
)
ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()
OUTPUT :
DECISION TREE
PROGRAM :
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = load_iris()
X = data.data # Features
y = data.target # Labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
clf = DecisionTreeClassifier(
criterion='entropy',
max_depth=5,
random_state=42
)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 8))
plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names,
filled=True)
plt.show()
OUTPUT :
VISUALIZATION
PROGRAM :
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import pandas as pd
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['species'] = data.target
df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
sns.pairplot(df, hue='species', diag_kind='kde', corner=True)
plt.show()
plt.figure(figsize=(8, 6))
sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()
plt.figure(figsize=(10, 6))
sns.boxplot(data=df.iloc[:, :-1], orient='h', palette='Set2')
plt.title("Boxplot of Features")
plt.show()
plt.figure(figsize=(8, 6))
sns.scatterplot(
data=df,
x='sepal length (cm)',
y='petal length (cm)',
hue='species',
style='species',
palette='deep',
s=100
)
plt.title("Scatterplot of Sepal Length vs Petal Length")
plt.show()
OUTPUT :
FASTA SEQUENCE OF GABA TRANSAMINASE
PROGRAM :
import requests
def fetch_fasta_sequence(uniprot_id):
url = f'https://www.uniprot.org/uniprot/{uniprot_id}.fasta'
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return f"Failed to retrieve sequence for {uniprot_id}"
uniprot_ids = {
'human': 'P49815'
'mouse': 'P28752'
'pig': 'P40938'
'chick': 'Q09293',
}
for organism, uniprot_id in uniprot_ids.items():
print(f"FASTA sequence for GABA transaminase in {organism.capitalize()}:\n")
print(fetch_fasta_sequence(uniprot_id))
print("\n" + "-"*80 + "\n")
OUTPUT :
SWISSPORT FOR SERINE KINASE IN PIG
PROGRAM :
import requests
def get_serine_kinase_pig_count():
url = "https://rest.uniprot.org/uniprotkb/search"
params = {
'query': 'serine kinase',
'organism': '9823',
'format': 'json',
'fields': 'accession'
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
return len(data['results'])
else:
print(f"Error: Unable to fetch data (status code: {response.status_code})")
return 0
entries_count = get_serine_kinase_pig_count()
print(f"Number of entries for Serine kinase in PIG: {entries_count}")
OUTPUT :
GENE SEQUENCE
PROGRAM :
from Bio import Entrez, SeqIO
from Bio.Blast import NCBIWWW, NCBIXML
def fetch_sequence(seq_id):
Entrez.email = "[email protected]
handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="gb", retmode="text")
record = SeqIO.read(handle, "genbank")
handle.close()
return record.seq
def perform_blast(query_sequence):
result_handle = NCBIWWW.qblast("blastn", "nt", query_sequence)
return result_handle
def parse_blast_results(blast_handle):
blast_records = NCBIXML.parse(blast_handle)
for blast_record in blast_records:
print(f"Query: {blast_record.query_id}")
print(f"Query Length: {blast_record.query_length}")
for alignment in blast_record.alignments:
print(f"\n** Alignment **")
print(f"Sequence: {alignment.title}")
print(f"Length: {alignment.length}")
print(f"E-value: {alignment.hsps[0].evalue}")
print(f"Similarity: {alignment.hsps[0].identities} out of {alignment.hsps[0].align_length}")
print(f"Alignment: {alignment.hsps[0].align_length}")
print(f"Seq: {alignment.hsps[0].s}")
print("-" * 80)
def main():
seq_id = "U80226.1"
sequence = fetch_sequence(seq_id)
print(f"Fetched sequence for {seq_id}:")
print(sequence[:100])
print("\nPerforming BLAST search...")
blast_handle = perform_blast(sequence)
print("\nParsing BLAST results...")
parse_blast_results(blast_handle)
if __name__ == "__main__":
main()
OUTPUT :
PPE PROTEIN
PROGRAM :
from Bio import Entrez
def search_ppe_diseases():
Entrez.email = "[email protected]
term = "PPE protein disease"
handle = Entrez.esearch(db="pubmed", term=term, retmax=10
record = Entrez.read(handle)
handle.close()
id_list = record["IdList"]
if id_list:
print(f"Found {len(id_list)} articles related to PPE protein and diseases:")
for pmid in id_list:
handle = Entrez.esummary(db="pubmed", id=pmid)
summary = Entrez.read(handle)
handle.close()
for article in summary:
title = article.get("Title", "No Title Available")
source = article.get("Source", "No Source Available")
print(f"Title: {title}")
print(f"Source: {source}")
print("-" * 60)
else:
print("No articles found for PPE protein and disease.")
search_ppe_diseases()
OUTPUT :
BIOINFORMATICS
DATA ANALYTICS