import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
# Load your CSV file
df = pd.read_csv("path of dataset")
# Store dataset names for looping
datasets = {
"I": ("x1", "y1"),
"II": ("x2", "y2"),
"III": ("x3", "y3"),
"IV": ("x4", "y4")
}
# Loop through each dataset and calculate Pearson correlation
for name, (x_col, y_col) in datasets.items():
x = df[x_col]
y = df[y_col]
corr, _ = pearsonr(x, y)
print(f"Dataset {name}: Pearson correlation = {corr:.3f}")
# Plot each dataset in a grid
fig, axs = plt.subplots(2, 2, figsize=(10, 8))
fig.suptitle('Anscombe-like Quartet Plots', fontsize=16)
for i, (name, (x_col, y_col)) in enumerate(datasets.items()):
row = i // 2
col = i % 2
axs[row, col].scatter(df[x_col], df[y_col])
axs[row, col].set_title(f"Dataset {name}")
axs[row, col].set_xlabel(x_col)
axs[row, col].set_ylabel(y_col)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()