Mayank Chaudhary DEV Practicals
Mayank Chaudhary DEV Practicals
Ques 1.
import numpy as np
# Part A
print("=== Part A ===")
arr_A = np.arange(50, 100, 5).reshape(5, 2)
print("Original 5x2 Array:\n", arr_A)
reshaped_arr_A = arr_A.reshape(10, 1)
print("Reshaped 10x1 Array:\n", reshaped_arr_A)
# Part B
print("\n=== Part B ===")
arr_B = np.random.randint(1, 101, size=30)
print("Random Array:\n", arr_B)
min_val = np.min(arr_B)
max_val = np.max(arr_B)
mean_val = np.mean(arr_B)
median_val = np.median(arr_B)
std_dev = np.std(arr_B)
unique_vals, counts = np.unique(arr_B,
return_counts=True)
most_frequent = unique_vals[np.argmax(counts)]
print(f"Min: {min_val}")
print(f"Max: {max_val}")
print(f"Mean: {mean_val}")
print(f"Median: {median_val}")
print(f"Standard Deviation: {std_dev}")
print(f"Number of Unique Values: {len(unique_vals)}")
unique_counts = {int(k): int(v) for k, v in
zip(unique_vals, counts)}
print(f"Count of Unique Values: {unique_counts}")
print(f"Most Frequent Value: {most_frequent}")
# Part C
print("\n=== Part C ===")
identity_matrix = np.identity(5) * 5
print("5x5 Identity Matrix with Diagonal = 5:\n",
identity_matrix)
# Part D
print("\n=== Part D ===")
# a.
heights = np.array([165, 170, 175, 168, 172, 180, 160,
169, 176, 171,
174, 182, 158, 167, 173, 179, 163, 166, 177,
181])
weights = np.array([60, 65, 70, 75, 80, 85, 55, 58, 63,
68,
72, 77, 50, 62, 67, 74, 52, 57, 69, 73])
# b.
combined = np.column_stack((heights, weights))
print("Combined Array (20x2):\n", combined)
# c.
mean_height = np.mean(heights)
mean_weight = np.mean(weights)
print(f"Mean Height: {mean_height}")
print(f"Mean Weight: {mean_weight}")
# d.
shortest_idx = np.argmin(heights)
tallest_idx = np.argmax(heights)
print(f"Index of Shortest Individual: {shortest_idx}")
print(f"Index of Tallest Individual: {tallest_idx}")
# e.
sorted_by_height = combined[combined[:, 0].argsort()]
print("Sorted by Height:\n", sorted_by_height)
# f.
swapped = combined[:, ::-1]
print("Swapped Columns (Weight, Height):\n",
swapped)
# g.
below_70 = combined[combined[:, 1] < 70]
print("Individuals with Weight < 70kg:\n", below_70)
Ques 2.
import pandas as pd
import seaborn as sns
# A.
df = sns.load_dataset("penguins")
# B.
num_rows, num_cols = df.shape
print(f"\nNumber of Observations (Rows):
{num_rows}")
print(f"Number of Attributes (Columns): {num_cols}")
# C.
print("\nAttribute Names:", df.columns.tolist())
print(f"Row Index starts at {df.index.start} and ends at
{df.index.stop - 1}")
'''we can also use
df.index.tolist() --> But it returns all the indexes
which becomes too long.
df.index --> Looks a bit ugly.
'''
# D.
print("\nFirst 5 Records:\n", df.head())
print("\nLast 5 Records:\n", df.tail())
# E.
second_col = df.columns[1]
values_3rd_4th = df.iloc[[2, 3], 1]
print(f"\nValues in 2nd Column ({second_col}) for 3rd &
4th records:\n", values_3rd_4th)
# F.
print("\nSummary of Data Distribution:\n",
df.describe(include="all"))
# G.
print("\nPairwise Correlation:\n",
df.corr(numeric_only=True))
Ques 3.
import seaborn as sns
import pandas as pd
#A
df = sns.load_dataset('titanic')
#B.
Dup_count = df.duplicated().sum()
print(f"Total duplicate values : {Dup_count}")
New_df = df.drop_duplicates()
print(f'Record before removing duplicates : {df.shape}')
print(f'Record after removing duplicates :
{New_df.shape}')
Miss_count = df.isnull().sum()
print(f"Total missing values : \n {Miss_count}")
New_df1 = New_df.dropna()
print(f'Records after removing missing values :
{New_df1.shape}\n')
#C.
df_C1 = df.loc[df['survived'] == 1]
print(f'Total Number of passengers who survived :
{len(df_C1)}')
df_C2 = df.loc[df['survived'] == 0]
print(f'Total Number of passengers who did not
survived : {len(df_C2)} \n')
#D.
df_D = df.loc[df['age'] < 18]
print(f'Passengers with age under 18 : {len(df_D)} \n')
#E.
print("Average age of Passengers from each Class : ")
df_E = df.groupby('pclass')['age'].mean()
print(df_E)
print()
#F.
print("New DataFrame inluding Famaily Size : ")
df['Family Size'] = df['sibsp'] + df['parch'] + 1
print(df)
#G.
print(f'Correlation b/w e{df['age'].corr(df['fare'])} \n')
#H.
print("Contingency table : ")
DataTab = pd.crosstab(df['class'], df['survived'])
print(DataTab)
Ques 4 .
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(data=iris.data,
columns=iris.feature_names)
df['species'] = iris.target
df['species'] = df['species'].map(dict(zip(range(3),
iris.target_names)))
# A.
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='petal length (cm)', y='petal
width (cm)', hue='species', palette='Set2')
plt.title("Petal Length vs Petal Width")
plt.xlabel("Petal Length (cm)")
plt.ylabel("Petal Width (cm)")
plt.legend(title='Species')
plt.grid(True)
plt.tight_layout()
plt.show()
# B.
df.drop('species', axis=1).hist(bins=15, figsize=(10, 6),
edgecolor='black', color='skyblue')
plt.suptitle("Histograms of Iris Features", fontsize=14)
plt.tight_layout()
plt.show()
# C.
species_counts = df['species'].value_counts()
plt.figure(figsize=(6, 6))
plt.pie(species_counts, labels=species_counts.index,
autopct='%1.1f%%', startangle=140,
colors=['#66c2a5','#fc8d62','#8da0cb'])
plt.title("Distribution of Iris Species")
plt.axis('equal')
plt.tight_layout()
plt.show()
# D.
sns.pairplot(df, hue='species', palette='husl',
diag_kind='hist')
plt.suptitle("Pair Plot of Iris Dataset", y=1.02)
plt.show()
Ques 5.
import pandas as pd
import plotly.express as px
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(data=iris.data,
columns=iris.feature_names)
df['species'] = iris.target
df['species'] = df['species'].map(dict(zip(range(3),
iris.target_names)))
# A.
fig_scatter = px.scatter(
df,
x='petal length (cm)',
y='petal width (cm)',
color='species',
title='Petal Length vs Petal Width (Iris Dataset)',
labels={'petal length (cm)': 'Petal Length (cm)', 'petal
width (cm)': 'Petal Width (cm)'},
symbol='species'
)
fig_scatter.show()
# C.
fig_pie = px.pie(
df,
names='species',
title='Distribution of Iris Species',
color_discrete_sequence=px.colors.qualitative.Set3
)
fig_pie.update_traces(textposition='inside',
textinfo='percent+label')
fig_pie.show()