Ip HHW
Ip HHW
import pandas as pd
dict1 = {'a':10,'b':20,'c':30,'d':40}
s1 = pd.Series(dict1)
print(s1)
import pandas as pd
import numpy as np
array1 = np.array(['a','b','c','d'])
s1 = pd.Series(array1)
print(s1)
def series(series):
n = int(input(f"Enter the number of elements in {series}: "))
list1 = []
for i in range(n):
y = float(input(f"Enter element {i+1} of {series}: "))
list1.append(y)
return pd.Series(list1)
addition = s1 + s2
subtraction = s1 - s2
multiplication = s1 * s2
division = s1 / s2
print("\nAddition:\n", addition)
print("Subtraction:\n", subtraction)
print("Multiplication:\n", multiplication)
print("Division:\n", division)
l1 = [10,20,30,40]
s1 = pd.Series(l1) # Inital Series
print(s1)
l2 = [50,60,70]
s2 = pd.Series(l2) # Additional Data
print(s2)
data = {'Name':['Vishal','Ram','Shyam','Ravi'],
'Percentage':[90,85,45,75]}
df = pd.DataFrame(data)
print("Original Dataframe\n",df)
print("Required Dataframe\n",df[df['Percentage']>70])
5) Write a Pandas program to select the rows the percentage is
between 70 and 90 (inclusive)
import pandas as pd
data = {'Name':['Vishal','Ram','Shyam','Ravi'],
'Percentage':[92,85,45,75]}
df = pd.DataFrame(data)
print("Original Dataframe\n",df)
data = {'Name':['Vishal','Ram','Shyam','Ravi'],
'Percentage':[92,85,45,75]}
df = pd.DataFrame(data)
print("Original Dataframe\n",df)
def change_percentage(df,name,new_percentage):
df.loc[df['Name']== name,'Percentage'] = new_percentage
return df
df = change_percentage(df,name,new_percentage)
print("Updated Dataframe\n",df)
data1 = {'Name':['Vishal','Ram','Shyam'],
'Percentage':[90,85,75]}
print("DataFrame 1:\n",df1)
data2 = {'Name':['Ravi','Atul','Amir'],
'Percentage':[90,85,75]}
print("DataFrame 2:\n",df2)
print("Joined DataFrame:\n",df3)
data1 = {'Name':['Vishal','Ram','Shyam'],
'Percentage':[90,85,75]}
df1 = pd.DataFrame(data1) #1st DataFrame
print("DataFrame 1:\n",df1)
data2 = {'Age':[18,17,19],
'Grade':['A','B','C']}
print("DataFrame 2:\n",df2)
print("Joined DataFrame:\n",df3)
data = {"Name":["Vishal","Ram",'Shyam'],
'Percentage':[90,85,75]}
df = pd.DataFrame(data)
print("Original DataFrame\n",df) #Original DataFrame
new_dictionaries = [{'Name':'Atul','Percentage':78}, #List of Dictionaries
{'Name':'Ravi','Percentage':74}]
df = df._append(new_dictionaries,ignore_index=True)
print("DataFrame with appended list of dictionaries\n",df)
df = df._append(new_series,ignore_index = True)
print("DataFrame with appended list of series\n",df)
data = {'Name':['Vishal','Ram','Shyam','Anil','Raju'],
'Age':[18,17,18,15,16],
'Percentage':[90,75,84,74,85]}
df = pd.DataFrame(data)
def duplicate(df):
row = []
duplicate_value = []
for i in range(len(row)):
for j in range(i + 1, len(row)):
if row[i][1] == row[j][1]:
duplicate_value.append(row[j])
print("\nDuplicate Values:")
for idx, row_values in duplicate_value:
row_dict = {col: val for col, val in zip(df.columns, row_values)}
print(f"Index: {idx}, Values: {row_dict}")
data = {
'Name': ['Vishal', 'Ram', 'Shyam', 'Anil', 'Raju', 'Vishal', 'Anil'],
'Age': [18, 17, 14, 15, 16, 18, 15],
'Percentage': [90, 75, 84, 74, 85, 90, 74]
}
df = pd.DataFrame(data)
print("Original DataFrame:\n", df)
duplicate(df)
df = df.drop_duplicates()
print("\nAfter dropping duplicate values:\n", df)
12) Importing and exporting data between pandas and CSV file:
# To create and open a data frame using ‘Student_result.csv’ file using
Pandas.
import pandas as pd
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result.csv")
print(df)
# To display row labels, column labels data types of each column and
the dimensions.
# To display the shape (number of rows and columns) of the CSV file.
import pandas as pd
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result.csv")
print("Row labels:")
print(df.index)
print("\nColumn labels:")
print(df.columns)
print("\nData types:")
print(df.dtypes)
print("\nDimension:")
print(df.shape)
13) Read the ‘Student_result.csv’ to create data frame and do the
following operation:
# To display Adm_No, Gender and Percentage from ‘Student_result.csv’
file.
import pandas as pd
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result1.csv")
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result1.csv")
print("First 5 Records\n",df.head())
print("\nLast 5 Records\n",df.tail())
14) Read the ‘Student_result.csv’ to create data frame and do the
following operation:
# To display Student_result file with new column names.
import pandas as pd
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result1.csv",
names=['Admission no.','Student Name','Gender','Age','Percentage'],
header = 0)
print(df)
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result1.csv")
df[df['Percentage']<40]= np.NaN
print("Required Datafram:\n",df)
import pandas as pd
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result1.csv",
usecols = ['Adm_No','Name','Percentage'])
df.to_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result1_Copy.csv")
# Write the statement in Pandas to find the highest percentage and
also print the student’s name and percentage.
import pandas as pd
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\Student_Result1.csv")
print("Maximum Percentage\n",df[df['Percentage']==df['Percentage'].max()])
engine = create_engine('mysql+mysqldb://root:vishal@localhost/pandas_sql')
data = {
'Adm_No': [101, 102, 103],
'Name': ['Vishal', 'Ram', 'Shyam'],
'Percentage': [90, 75, 84]
}
df = pd.DataFrame(data)
engine = create_engine('mysql+mysqldb://root:vishal@localhost/pandas_sql')
print(df)
17) Find the sum of each column, or find the column with the
lowest mean.
import pandas as pd
data = {'A':[24,10,15],
'B':[1,25,18],
'C':[7,48,12]}
df = pd.DataFrame(data)
print("Sum of columns")
print(df.sum())
print("\nMean of columns")
print(df.mean())
print("\nLowest Mean")
print((df.mean()).idxmin())
data = {
'A': [10, 5, 8, 12],
'B': [20, 15, 25, 18],
'C': [30, 25, 35, 22]
}
df = pd.DataFrame(data)
rows = []
for index,row in df.iterrows():
for i in range(len(list(row))):
rows.append((list(row)[i]))
rows.sort()
data = {
'A': [10, 5, 8, 12],
'B': [20, 15, 25, 18],
'C': [30, 25, 35, 22]
}
df = pd.DataFrame(data)
mean = list(df.mean())
columns = list(df.columns)
for i in range(len(columns)):
df[columns[i]] = df[columns[i]] - mean[i]
print(df)
data = {
'A': [1.25, -3.75, -0.75, 3.25],
'B': [0.5, -4.5, 5.5, -1.5],
'C': [2.0, -3.0, 7.0, -6.0]
}
df = pd.DataFrame(data)
for i in range(df.shape[0]):
for j in range(df.shape[1]):
if df.iat[i,j] < 0:
df.iat[i,j] = 0
print("Required Dataframe")
print(df)
21) Replace all missing values in a data frame with a 999.
import pandas as pd
import numpy as np
data = {
'A': [10, np.NaN, 8, 12],
'B': [np.NaN, 15, 25, np.NaN],
'C': [30, 25, 35, np.NaN]
}
df = pd.DataFrame(data)
for i in range(df.shape[0]):
for j in range(df.shape[1]):
if pd.isna(df.iat[i,j]):
df.iat[i,j] = 999
print("Required DataFrame")
print(df)
22) Given a Series, print all the elements that are above the 75th
percentile.
import pandas as pd
data = [74, 21, 78, 89, 45, 81, 92, 63, 49, 84]
s1 = pd.Series(data)
print("Above 75 Percentile:")
print(s1[s1>75])
23) Create a Data Frame quarterly sale where each row contains
the item category, item name, and expenditure. Group the
rows by the category, and print the total expenditure per
category.
import pandas as pd
data = {
'Category': ['Electronics', 'Electronics', 'Electronics', 'Furniture', 'Furniture',
'Grocery', 'Grocery', 'Grocery'],
'Item': ['Laptop', 'Smartphone', 'Tablet', 'Chair', 'Table', 'Milk', 'Bread', 'Eggs'],
'Expenditure': [1200, 800, 300, 150, 200, 50, 20, 30]
}
df = pd.DataFrame(data)
data = {
'OrderID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'CustomerID': [101, 102, 103, 104, 105, 101, 102, 103, 104, 105],
'Category': ['Electronics', 'Electronics', 'Clothing', 'Clothing', 'Books',
'Electronics', 'Books', 'Clothing', 'Books', 'Electronics'],
'Sales': [250, 150, 50, 200, 300, 400, 100, 150, 200, 350],
'Quantity': [1, 2, 1, 4, 3, 1, 2, 3, 1, 2]
}
df = pd.DataFrame(data)
mean_sales = df['Sales'].mean()
median_sales = df['Sales'].median()
mode_sales = stats.mode(df['Sales'], keepdims=True)[0][0]
quartiles_sales = df['Sales'].quantile([0.25, 0.5, 0.75])
variance_sales = df['Sales'].var()
data = {
'StudentID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'Name': ['Vishal', 'Anil', 'Shyam', 'Ram', 'Raju', 'Deepa', 'Sita', 'Gita', 'Krishna',
'Meena'],
'Class': ['12th', '11th', '11th', '11th', '11th', '12th', '12th', '12th', '12th',
'11th'],
'Math': [95, 78, 92, 88, 76, 89, 90, 85, 87, 85],
'Computer': [92, 82, 94, 89, 78, 85, 91, 88, 86, 90],
'English': [93, 80, 85, 90, 82, 91, 87, 89, 88, 83]
}
df = pd.DataFrame(data)
Mean_Subject = df[['Math','Computer','English']].mean()
Mean_Class = df.groupby('Class')[['Math','Computer','English']].mean()
plt.figure(figsize=(10,5))
Mean_Subject.plot(kind='bar',color=['blue','green','orange'])
plt.title('Average Score Subject Wise')
plt.xlabel('Subjects')
plt.ylabel('Average Scores')
plt.show()
plt.figure(figsize=(10, 5))
df.plot(x='Name', y=['Math', 'Computer', 'English'], kind='bar', figsize=(10, 5))
plt.title('Individual Performance in Each Subject')
plt.xlabel('Name')
plt.ylabel('Scores')
plt.show()
26) Write a program to plot a bar chart in python to display the
result of a school for five consecutive years.
import matplotlib.pyplot as plt
import pandas as pd
data = {'Years':[2019,2020,2021,2022,2023],
'Maths':[85, 88, 90, 87, 92],
'Computer':[82, 84, 86, 83, 89],
'English':[78, 80, 82, 79, 85]}
df = pd.DataFrame(data)
plt.figure(figsize=(10, 5))
df.plot(x='Years', y=['Maths', 'Computer', 'English'], kind='bar', figsize=(10, 5))
plt.title('Individual Performance in Each Subject')
plt.xlabel('Years')
plt.ylabel('Scores')
plt.show()
27) For the Data frames created above, analyze, and plot
appropriate charts with title and legend. Number of Students
against Scores in all the 7 subjects Show the Highest score of
each subject
import pandas as pd
import matplotlib.pyplot as plt
data = {'Student ID':range(101,111),
'Name': ['Vishal', 'Anil', 'Shyam', 'Ram', 'Raju', 'Deepa', 'Sita', 'Gita',
'Krishna', 'Meena'],
'Class': ['12th', '11th', '11th', '11th', '11th', '12th', '12th', '12th', '12th',
'11th'],
'Math': [85, 78, 92, 88, 76, 89, 90, 85, 87, 95],
'Science': [90, 82, 94, 89, 78, 85, 91, 88, 86, 92],
'English': [88, 80, 85, 90, 82, 91, 87, 89, 88, 93],
'History': [70, 75, 68, 74, 69, 77, 72, 78, 74, 80],
'Geography': [72, 78, 74, 76, 71, 79, 75, 80, 77, 81],
'Hindi': [82, 85, 84, 83, 80, 86, 88, 87, 85, 89],
'Sanskrit': [78, 76, 80, 79, 75, 81, 77, 82, 80, 84]
}
df = pd.DataFrame(data)
plt.figure(figsize=(15, 10))
for i, subject in enumerate(subjects):
plt.subplot(3, 3, i+1)
df[subject].plot(kind='hist', bins=10, alpha=0.7)
plt.axvline(highest_scores[subject], color='red', linestyle='dashed', linewidth=2)
plt.title(subject)
plt.xlabel('Scores')
plt.ylabel('Number of Students')
plt.legend(['Highest Score'])
plt.figure(figsize=(10, 5))
highest_scores.plot(kind='bar', color='skyblue')
plt.title('Highest Scores in Each Subject')
plt.xlabel('Subjects')
plt.ylabel('Scores')
plt.xticks(rotation=45)
plt.show()
28) For the Data frames created above, analyze, and plot
appropriate charts with title and legend. Show the Average
score of each subject
import pandas as pd
import matplotlib.pyplot as plt
data = {
'Name': ['Vishal', 'Anil', 'Rajesh', 'Sunita', 'Amit'],
'Math': [88, 92, 85, 78, 90],
'Physics': [84, 89, 80, 75, 95],
'English': [90, 85, 87, 92, 88],
'Chemistry': [76, 80, 83, 85, 78],
'Hindi': [82, 87, 88, 90, 85],
'Computer': [95, 98, 94, 93, 97],
}
df = pd.DataFrame(data)
average_marks = df.mean(numeric_only=True)
plt.figure(figsize=(10, 6))
average_marks.plot(kind='bar', color='skyblue', alpha=0.7)
plt.axhline(y=average_marks.mean(), color='red', linestyle='dashed', linewidth=2,
label='Overall Average')
plt.show()
29) For the Data frames created above, analyze, and plot
appropriate charts with title and legend. Number of Females
and Males Average Percentage of Females and Males
import pandas as pd
import matplotlib.pyplot as plt
data = {
'Name': ['Vishal', 'Anil', 'Rajesh', 'Sunita', 'Amit', 'Deepa', 'Kiran', 'Nita',
'Rahul', 'Meena'],
'Gender': ['M', 'M', 'M', 'F', 'M', 'F', 'F', 'F', 'M', 'F'],
'Percentage': [88, 92, 85, 78, 90, 95, 88, 82, 87, 91]
}
df = pd.DataFrame(data)
gender_counts = df['Gender'].value_counts()
average_percentage_by_gender = df.groupby('Gender')['Percentage'].mean()
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
gender_counts.plot(kind='bar', color=['skyblue', 'pink'], alpha=0.7)
plt.title('Number of Females and Males')
plt.xlabel('Gender')
plt.ylabel('Number of Students')
plt.xticks(rotation=0)
plt.subplot(1, 2, 2)
average_percentage_by_gender.plot(kind='bar', color=['skyblue', 'pink'], alpha=0.7)
plt.title('Average Percentage of Females and Males')
plt.xlabel('Gender')
plt.ylabel('Average Percentage')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
30) Take data of your interest from an open source (e.g.
data.gov.in), aggregate and summarize it. Then plot it using
different plotting functions of the Matplotlib library.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(r"C:\Users\Vishal Shrivastawa\Desktop\datafile.csv")
year_columns = df.columns[2:]
plt.figure(figsize=(15, 10))
for sector in df["SECTORS"].unique():
sector_df = df[(df["SECTORS"] == sector) & (df["Economic Activity"] == "R&D
expenditure Rs. Crores")]
if not sector_df.empty:
plt.plot(year_columns, sector_df.iloc[0, 2:], label=sector)
plt.xlabel('Year')
plt.ylabel('R&D Expenditure Rs. Crores')
plt.title('R&D Expenditure by Sector Over the Years')
plt.legend()
plt.show()
plt.figure(figsize=(15, 10))
for sector in df["SECTORS"].unique():
sector_df = df[(df["SECTORS"] == sector) & (df["Economic Activity"].str.contains("GDP
by industry of origin"))]
if not sector_df.empty:
plt.plot(year_columns, sector_df.iloc[0, 2:], label=sector)
plt.xlabel('Year')
plt.ylabel('GDP by Industry of origin (at factor cost Rs. Crores)')
plt.title('GDP by Sector Over the Years')
plt.legend()
plt.show()