0% found this document useful (0 votes)
9 views

Mayank Chaudhary DEV Practicals

The document contains practical exercises involving data manipulation and analysis using NumPy, Pandas, and visualization libraries like Seaborn and Plotly. It includes tasks such as creating and reshaping arrays, performing statistical analysis on datasets, and visualizing data distributions and relationships. The exercises cover various datasets including penguins and titanic, as well as the iris dataset for visualization tasks.

Uploaded by

arnavdhankar422
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views

Mayank Chaudhary DEV Practicals

The document contains practical exercises involving data manipulation and analysis using NumPy, Pandas, and visualization libraries like Seaborn and Plotly. It includes tasks such as creating and reshaping arrays, performing statistical analysis on datasets, and visualizing data distributions and relationships. The exercises cover various datasets including penguins and titanic, as well as the iris dataset for visualization tasks.

Uploaded by

arnavdhankar422
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 14

Dev Lab Practicals

Ques 1.
import numpy as np

# Part A
print("=== Part A ===")
arr_A = np.arange(50, 100, 5).reshape(5, 2)
print("Original 5x2 Array:\n", arr_A)

reshaped_arr_A = arr_A.reshape(10, 1)
print("Reshaped 10x1 Array:\n", reshaped_arr_A)

# Part B
print("\n=== Part B ===")
arr_B = np.random.randint(1, 101, size=30)
print("Random Array:\n", arr_B)

min_val = np.min(arr_B)
max_val = np.max(arr_B)
mean_val = np.mean(arr_B)
median_val = np.median(arr_B)
std_dev = np.std(arr_B)
unique_vals, counts = np.unique(arr_B,
return_counts=True)
most_frequent = unique_vals[np.argmax(counts)]

print(f"Min: {min_val}")
print(f"Max: {max_val}")
print(f"Mean: {mean_val}")
print(f"Median: {median_val}")
print(f"Standard Deviation: {std_dev}")
print(f"Number of Unique Values: {len(unique_vals)}")
unique_counts = {int(k): int(v) for k, v in
zip(unique_vals, counts)}
print(f"Count of Unique Values: {unique_counts}")
print(f"Most Frequent Value: {most_frequent}")

# Part C
print("\n=== Part C ===")
identity_matrix = np.identity(5) * 5
print("5x5 Identity Matrix with Diagonal = 5:\n",
identity_matrix)

# Part D
print("\n=== Part D ===")

# a.
heights = np.array([165, 170, 175, 168, 172, 180, 160,
169, 176, 171,
174, 182, 158, 167, 173, 179, 163, 166, 177,
181])
weights = np.array([60, 65, 70, 75, 80, 85, 55, 58, 63,
68,
72, 77, 50, 62, 67, 74, 52, 57, 69, 73])

# b.
combined = np.column_stack((heights, weights))
print("Combined Array (20x2):\n", combined)

# c.
mean_height = np.mean(heights)
mean_weight = np.mean(weights)
print(f"Mean Height: {mean_height}")
print(f"Mean Weight: {mean_weight}")

# d.
shortest_idx = np.argmin(heights)
tallest_idx = np.argmax(heights)
print(f"Index of Shortest Individual: {shortest_idx}")
print(f"Index of Tallest Individual: {tallest_idx}")

# e.
sorted_by_height = combined[combined[:, 0].argsort()]
print("Sorted by Height:\n", sorted_by_height)

# f.
swapped = combined[:, ::-1]
print("Swapped Columns (Weight, Height):\n",
swapped)

# g.
below_70 = combined[combined[:, 1] < 70]
print("Individuals with Weight < 70kg:\n", below_70)

Ques 2.
import pandas as pd
import seaborn as sns

print("=== Penguins Dataset Analysis ===")

# A.
df = sns.load_dataset("penguins")

# B.
num_rows, num_cols = df.shape
print(f"\nNumber of Observations (Rows):
{num_rows}")
print(f"Number of Attributes (Columns): {num_cols}")

# C.
print("\nAttribute Names:", df.columns.tolist())
print(f"Row Index starts at {df.index.start} and ends at
{df.index.stop - 1}")
'''we can also use
df.index.tolist() --> But it returns all the indexes
which becomes too long.
df.index --> Looks a bit ugly.
'''

print("\nData Types of Each Attribute:\n", df.dtypes)

# D.
print("\nFirst 5 Records:\n", df.head())
print("\nLast 5 Records:\n", df.tail())

# E.
second_col = df.columns[1]
values_3rd_4th = df.iloc[[2, 3], 1]
print(f"\nValues in 2nd Column ({second_col}) for 3rd &
4th records:\n", values_3rd_4th)

# F.
print("\nSummary of Data Distribution:\n",
df.describe(include="all"))

# G.
print("\nPairwise Correlation:\n",
df.corr(numeric_only=True))

Ques 3.
import seaborn as sns
import pandas as pd

#A
df = sns.load_dataset('titanic')

#B.
Dup_count = df.duplicated().sum()
print(f"Total duplicate values : {Dup_count}")
New_df = df.drop_duplicates()
print(f'Record before removing duplicates : {df.shape}')
print(f'Record after removing duplicates :
{New_df.shape}')
Miss_count = df.isnull().sum()
print(f"Total missing values : \n {Miss_count}")
New_df1 = New_df.dropna()
print(f'Records after removing missing values :
{New_df1.shape}\n')

#C.
df_C1 = df.loc[df['survived'] == 1]
print(f'Total Number of passengers who survived :
{len(df_C1)}')

df_C2 = df.loc[df['survived'] == 0]
print(f'Total Number of passengers who did not
survived : {len(df_C2)} \n')

#D.
df_D = df.loc[df['age'] < 18]
print(f'Passengers with age under 18 : {len(df_D)} \n')

#E.
print("Average age of Passengers from each Class : ")
df_E = df.groupby('pclass')['age'].mean()
print(df_E)
print()

#F.
print("New DataFrame inluding Famaily Size : ")
df['Family Size'] = df['sibsp'] + df['parch'] + 1
print(df)

#G.
print(f'Correlation b/w e{df['age'].corr(df['fare'])} \n')

#H.
print("Contingency table : ")
DataTab = pd.crosstab(df['class'], df['survived'])
print(DataTab)
Ques 4 .
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

iris = load_iris()
df = pd.DataFrame(data=iris.data,
columns=iris.feature_names)
df['species'] = iris.target
df['species'] = df['species'].map(dict(zip(range(3),
iris.target_names)))

# A.
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='petal length (cm)', y='petal
width (cm)', hue='species', palette='Set2')
plt.title("Petal Length vs Petal Width")
plt.xlabel("Petal Length (cm)")
plt.ylabel("Petal Width (cm)")
plt.legend(title='Species')
plt.grid(True)
plt.tight_layout()
plt.show()

# B.
df.drop('species', axis=1).hist(bins=15, figsize=(10, 6),
edgecolor='black', color='skyblue')
plt.suptitle("Histograms of Iris Features", fontsize=14)
plt.tight_layout()
plt.show()

# C.
species_counts = df['species'].value_counts()
plt.figure(figsize=(6, 6))
plt.pie(species_counts, labels=species_counts.index,
autopct='%1.1f%%', startangle=140,
colors=['#66c2a5','#fc8d62','#8da0cb'])
plt.title("Distribution of Iris Species")
plt.axis('equal')
plt.tight_layout()
plt.show()
# D.
sns.pairplot(df, hue='species', palette='husl',
diag_kind='hist')
plt.suptitle("Pair Plot of Iris Dataset", y=1.02)
plt.show()

Ques 5.
import pandas as pd
import plotly.express as px
from sklearn.datasets import load_iris

iris = load_iris()
df = pd.DataFrame(data=iris.data,
columns=iris.feature_names)
df['species'] = iris.target
df['species'] = df['species'].map(dict(zip(range(3),
iris.target_names)))

# A.
fig_scatter = px.scatter(
df,
x='petal length (cm)',
y='petal width (cm)',
color='species',
title='Petal Length vs Petal Width (Iris Dataset)',
labels={'petal length (cm)': 'Petal Length (cm)', 'petal
width (cm)': 'Petal Width (cm)'},
symbol='species'
)
fig_scatter.show()

# C.
fig_pie = px.pie(
df,
names='species',
title='Distribution of Iris Species',
color_discrete_sequence=px.colors.qualitative.Set3
)
fig_pie.update_traces(textposition='inside',
textinfo='percent+label')
fig_pie.show()

You might also like