Some Exercises
Some Exercises
# Summary statistics
print(auto_data.describe())
# Visualizations
sns.pairplot(auto_data[['MPG', 'Cylinders', 'Displacement', 'Weight']])
plt.show()
# Correlation matrix
correlation_matrix = auto_data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.show()
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma', 'Frank', 'Grace', 'Harry', 'Ivy', 'Jack'],
'Age': [25, 28, None, 22, 30, 35, 28, None, 24, 29],
'Salary': [50000, 60000, 75000, 48000, None, 90000, 80000, 75000, 52000, 60000]
}
df = pd.DataFrame(data)
Tasks:
1. Handle missing values in the 'Age' and 'Salary' columns.
2. Drop any rows with missing values.
3. Display the cleaned dataset.
# Handle missing values
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Salary'].fillna(df['Salary'].median(), inplace=True)
data = {
'Student_ID': [1, 2, 3, 4, 5],
'Math_Score': [85, 90, 78, 92, 88],
'English_Score': [75, 80, 85, 88, 92],
'Science_Score': [90, 85, 88, 80, 95]
}
df = pd.DataFrame(data)
Tasks:
1. Calculate the mean, median, and standard deviation for each subject.
2. Plot a bar chart to visualize the average scores for each subject.
# Calculate mean, median, and standard deviation
subject_stats = df.describe().loc[['mean', '50%', 'std']].transpose()
print(subject_stats)
data = {
'Hours_Studied': [2, 3, 5, 1, 4, 6, 7, 3, 2, 5],
'Exam_Score': [50, 65, 80, 40, 75, 90, 95, 60, 55, 85]
}
df = pd.DataFrame(data)
Tasks:
1. Create a scatter plot to visualize the relationship between hours studied and exam
scores.
2. Add labels and a title to the plot.
import matplotlib.pyplot as plt
# Scatter plot
plt.scatter(df['Hours_Studied'], df['Exam_Score'])
plt.title('Relationship Between Hours Studied and Exam Score')
plt.xlabel('Hours Studied')
plt.ylabel('Exam Score')
plt.show()