0% found this document useful (0 votes)
11 views26 pages

Wa0009.

The document contains multiple Python programs focused on various topics, including word count, weather data analysis, support vector machines, decision trees, data visualization, and bioinformatics tasks such as fetching FASTA sequences and searching for articles related to diseases. Each program demonstrates specific functionalities, such as reading files, performing machine learning tasks, and retrieving data from online databases. The overall theme revolves around data analysis and bioinformatics applications using Python.

Uploaded by

BCA NIRMALA
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views26 pages

Wa0009.

The document contains multiple Python programs focused on various topics, including word count, weather data analysis, support vector machines, decision trees, data visualization, and bioinformatics tasks such as fetching FASTA sequences and searching for articles related to diseases. Each program demonstrates specific functionalities, such as reading files, performing machine learning tasks, and retrieving data from online databases. The overall theme revolves around data analysis and bioinformatics applications using Python.

Uploaded by

BCA NIRMALA
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 26

WORD COUNT/FREQUENCY

PROGRAM :
import string
def word_count(file_path):
word_counts = {}
try:
with open(file_path, 'r') as file:
text = file.read()
text = text.translate(str.maketrans('', '', string.punctuation)).lower()
words = text.split()
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
return word_counts
except FileNotFoundError:
print(f"Error: The file at {file_path} was not found.")
return None
file_path = 'example.txt'
word_counts = word_count(file_path)
if word_counts is not None:
for word, count in word_counts.items():
print(f"{word}: {count}")
OUTPUT :
WEATHER DATASET

PROGRAM :

# Step 1: Import necessary libraries


import pandas as pd
# Step 2: Define the file path for the weather dataset
file_path = 'weather_data.csv' # Replace with the actual path to your CSV file
# Step 3: Read the CSV file into a DataFrame
# The read_csv function loads the CSV data into a structured format (pandas DataFrame)
df = pd.read_csv(file_path)
# Step 4: Display the first 15 rows from the dataset
# We use the iloc method, where 0:15 means rows from index 0 to index 14 (1-15 in human-
readable format)
print("Displaying lines 1 to 15 from the dataset:\n")
print(df.iloc[0:15]) # Prints rows 0 to 14 (lines 1-15)
# Step 5: Optionally, display some basic information about the dataset
print("\nBasic Info about the dataset:")
print(df.info()) # Displays the structure of the dataset, column types, and non-null counts
# Step 6: Check if there are any missing values in the dataset
print("\nChecking for missing values in the dataset:")
print(df.isnull().sum()) # Shows the count of missing values per column
# Step 7: Optional - Show a summary of statistics for numerical columns
print("\nSummary Statistics for numerical columns:")
print(df.describe()) # Provides summary statistics (e.g., mean, std, min, max) for numerical
data
OUTPUT :
SUPPORT VECTOR MACHINE

PROGRAM :
import matplotlib.pyplot as plt
from sklearn import datasets, svm
from sklearn.inspection import DecisionBoundaryDisplay
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target
C=1
models = (
svm.SVC(kernel="linear", C=C),
svm.LinearSVC(C=C, max_iter=10000),
svm.SVC(kernel="rbf", gamma=0.7, C=C),
svm.SVC(kernel="poly", degree=3, gamma="auto", C=C),
)
models = (clf.fit(X, y) for clf in models)
titles = (
"SVC with linear kernel",
"LinearSVC (linear kernel)",
"SVC with RBF kernel",
"SVC with polynomial (degree 3) kernel",
)
fig, sub = plt.subplots(2, 2)
plt.subplots_adjust(wspace=0.4, hspace=0.4)
X0, X1 = X[:, 0], X[:, 1]
for clf, title, ax in zip(models, titles, sub.flatten()):
disp = DecisionBoundaryDisplay.from_estimator(
clf,
X,
response_method="predict",
cmap=plt.cm.coolwarm,
alpha=0.8,
ax=ax,
xlabel=iris.feature_names[0],
ylabel=iris.feature_names[1],
)
ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)

plt.show()
OUTPUT :
DECISION TREE

PROGRAM :
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = load_iris()
X = data.data # Features
y = data.target # Labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
clf = DecisionTreeClassifier(
criterion='entropy',
max_depth=5,
random_state=42
)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 8))
plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names,
filled=True)
plt.show()
OUTPUT :
VISUALIZATION

PROGRAM :
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import pandas as pd
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['species'] = data.target
df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
sns.pairplot(df, hue='species', diag_kind='kde', corner=True)
plt.show()
plt.figure(figsize=(8, 6))
sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()
plt.figure(figsize=(10, 6))
sns.boxplot(data=df.iloc[:, :-1], orient='h', palette='Set2')
plt.title("Boxplot of Features")
plt.show()
plt.figure(figsize=(8, 6))
sns.scatterplot(
data=df,
x='sepal length (cm)',
y='petal length (cm)',
hue='species',
style='species',
palette='deep',
s=100
)
plt.title("Scatterplot of Sepal Length vs Petal Length")
plt.show()
OUTPUT :
FASTA SEQUENCE OF GABA TRANSAMINASE

PROGRAM :
import requests
def fetch_fasta_sequence(uniprot_id):
url = f'https://www.uniprot.org/uniprot/{uniprot_id}.fasta'
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return f"Failed to retrieve sequence for {uniprot_id}"
uniprot_ids = {
'human': 'P49815'
'mouse': 'P28752'
'pig': 'P40938'
'chick': 'Q09293',
}
for organism, uniprot_id in uniprot_ids.items():
print(f"FASTA sequence for GABA transaminase in {organism.capitalize()}:\n")
print(fetch_fasta_sequence(uniprot_id))
print("\n" + "-"*80 + "\n")
OUTPUT :
SWISSPORT FOR SERINE KINASE IN PIG

PROGRAM :

import requests

def get_serine_kinase_pig_count():

url = "https://rest.uniprot.org/uniprotkb/search"

params = {

'query': 'serine kinase',

'organism': '9823',

'format': 'json',

'fields': 'accession'

response = requests.get(url, params=params)

if response.status_code == 200:

data = response.json()

return len(data['results'])

else:

print(f"Error: Unable to fetch data (status code: {response.status_code})")

return 0

entries_count = get_serine_kinase_pig_count()

print(f"Number of entries for Serine kinase in PIG: {entries_count}")


OUTPUT :
GENE SEQUENCE

PROGRAM :
from Bio import Entrez, SeqIO

from Bio.Blast import NCBIWWW, NCBIXML

def fetch_sequence(seq_id):

Entrez.email = "[email protected]

handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="gb", retmode="text")

record = SeqIO.read(handle, "genbank")

handle.close()

return record.seq

def perform_blast(query_sequence):

result_handle = NCBIWWW.qblast("blastn", "nt", query_sequence)

return result_handle

def parse_blast_results(blast_handle):

blast_records = NCBIXML.parse(blast_handle)

for blast_record in blast_records:

print(f"Query: {blast_record.query_id}")

print(f"Query Length: {blast_record.query_length}")

for alignment in blast_record.alignments:

print(f"\n** Alignment **")

print(f"Sequence: {alignment.title}")

print(f"Length: {alignment.length}")

print(f"E-value: {alignment.hsps[0].evalue}")

print(f"Similarity: {alignment.hsps[0].identities} out of {alignment.hsps[0].align_length}")

print(f"Alignment: {alignment.hsps[0].align_length}")

print(f"Seq: {alignment.hsps[0].s}")

print("-" * 80)

def main():
seq_id = "U80226.1"

sequence = fetch_sequence(seq_id)

print(f"Fetched sequence for {seq_id}:")

print(sequence[:100])

print("\nPerforming BLAST search...")

blast_handle = perform_blast(sequence)

print("\nParsing BLAST results...")

parse_blast_results(blast_handle)

if __name__ == "__main__":

main()
OUTPUT :
PPE PROTEIN

PROGRAM :

from Bio import Entrez

def search_ppe_diseases():

Entrez.email = "[email protected]

term = "PPE protein disease"

handle = Entrez.esearch(db="pubmed", term=term, retmax=10

record = Entrez.read(handle)

handle.close()

id_list = record["IdList"]

if id_list:

print(f"Found {len(id_list)} articles related to PPE protein and diseases:")

for pmid in id_list:

handle = Entrez.esummary(db="pubmed", id=pmid)

summary = Entrez.read(handle)

handle.close()

for article in summary:

title = article.get("Title", "No Title Available")

source = article.get("Source", "No Source Available")

print(f"Title: {title}")

print(f"Source: {source}")

print("-" * 60)
else:

print("No articles found for PPE protein and disease.")

search_ppe_diseases()
OUTPUT :
BIOINFORMATICS
DATA ANALYTICS

You might also like