0% found this document useful (0 votes)
3 views

Normal

The document outlines a process for simulating rainfall and temperature data using a Normal Copula approach after fitting the marginals to uniform distributions. It includes generating synthetic data, transforming it, and visualizing the results through scatter plots and kernel density estimates. Additionally, it calculates and prints various comparison metrics, AIC, and BIC for evaluating the performance of the simulated data against the original data.

Uploaded by

gunjanc080
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Normal

The document outlines a process for simulating rainfall and temperature data using a Normal Copula approach after fitting the marginals to uniform distributions. It includes generating synthetic data, transforming it, and visualizing the results through scatter plots and kernel density estimates. Additionally, it calculates and prints various comparison metrics, AIC, and BIC for evaluating the performance of the simulated data against the original data.

Uploaded by

gunjanc080
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Function to fit and transform data to uniform marginals


def fit_marginals(data):
shape, loc, scale = stats.gamma.fit(data['Rainfall'])
data['Rainfall_U'] = stats.gamma.cdf(data['Rainfall'], shape, loc, scale)

mu, std = stats.norm.fit(data['Temperature'])


data['Temperature_U'] = stats.norm.cdf(data['Temperature'], mu, std)

return data, (shape, loc, scale), (mu, std)

# Normal Copula Implementation


class NormalCopula:
def __init__(self, correlation_matrix):
self.correlation_matrix = correlation_matrix

def sample(self, n):


# Generate samples from multivariate normal distribution
mean = np.zeros(len(self.correlation_matrix))
samples = np.random.multivariate_normal(mean, self.correlation_matrix,
size=n)
# Apply the CDF to transform to uniform marginals
u_samples = stats.norm.cdf(samples)
return u_samples

# Generate synthetic data


np.random.seed(42)
rainfall = np.random.gamma(2, 15, 1000)
temperature = np.random.normal(20, 5, 1000)

data = pd.DataFrame({
'Rainfall': rainfall,
'Temperature': temperature
})

# Fit marginals and transform to uniform


data, gamma_params, norm_params = fit_marginals(data)

# Create the correlation matrix


correlation_matrix = np.corrcoef(data['Rainfall_U'], data['Temperature_U'])

# Initialize and sample from the Normal Copula


copula = NormalCopula(correlation_matrix)
samples = copula.sample(1000)

# Transform back to original scale using inverse CDF


def transform_samples(samples, gamma_params, norm_params):
df = pd.DataFrame(samples, columns=['Rainfall_U', 'Temperature_U'])
df['Rainfall'] = stats.gamma.ppf(df['Rainfall_U'], *gamma_params)
df['Temperature'] = stats.norm.ppf(df['Temperature_U'], *norm_params)
return df

samples_normal_df = transform_samples(samples, gamma_params, norm_params)


# Visualization
plt.figure(figsize=(12, 6))
plt.scatter(data['Rainfall'], data['Temperature'], alpha=0.5, color='blue',
label='Original Data')
plt.scatter(samples_normal_df['Rainfall'], samples_normal_df['Temperature'],
alpha=0.5, color='orange', label='Normal Copula Samples')
plt.title('Original Data vs. Normal Copula Samples')
plt.xlabel('Rainfall')
plt.ylabel('Temperature')
plt.grid()
plt.legend()
plt.show()

# Kernel Density Estimates for Rainfall


plt.figure(figsize=(12, 5))
sns.kdeplot(data['Rainfall'], label='Original Rainfall', color='blue', fill=True,
alpha=0.5)
sns.kdeplot(samples_normal_df['Rainfall'], label='Normal Copula', color='orange',
linestyle='--', fill=True, alpha=0.5)
plt.title('Rainfall Distribution Comparison')
plt.xlabel('Rainfall')
plt.ylabel('Density')
plt.legend()
plt.grid()
plt.show()

# Kernel Density Estimates for Temperature


plt.figure(figsize=(12, 5))
sns.kdeplot(data['Temperature'], label='Original Temperature', color='blue',
fill=True, alpha=0.5)
sns.kdeplot(samples_normal_df['Temperature'], label='Normal Copula',
color='orange', linestyle='--', fill=True, alpha=0.5)
plt.title('Temperature Distribution Comparison')
plt.xlabel('Temperature')
plt.ylabel('Density')
plt.legend()
plt.grid()
plt.show()

# Comparison metrics for rainfall and temperature


def calculate_metrics(original, simulated):
mae_rainfall = mean_absolute_error(original['Rainfall'], simulated['Rainfall'])
mse_rainfall = mean_squared_error(original['Rainfall'], simulated['Rainfall'])
rmse_rainfall = np.sqrt(mse_rainfall)

mae_temperature = mean_absolute_error(original['Temperature'],
simulated['Temperature'])
mse_temperature = mean_squared_error(original['Temperature'],
simulated['Temperature'])
rmse_temperature = np.sqrt(mse_temperature)

return mae_rainfall, mse_rainfall, rmse_rainfall, mae_temperature,


mse_temperature, rmse_temperature

mae_rainfall, mse_rainfall, rmse_rainfall, mae_temperature, mse_temperature,


rmse_temperature = calculate_metrics(data, samples_normal_df)

# Print comparison metrics


print(f'Comparison Metrics for Normal Copula:')
print(f'Mean Absolute Error (MAE) - Rainfall: {mae_rainfall:.2f}')
print(f'Mean Squared Error (MSE) - Rainfall: {mse_rainfall:.2f}')
print(f'Root Mean Squared Error (RMSE) - Rainfall: {rmse_rainfall:.2f}')
print(f'Mean Absolute Error (MAE) - Temperature: {mae_temperature:.2f}')
print(f'Mean Squared Error (MSE) - Temperature: {mse_temperature:.2f}')
print(f'Root Mean Squared Error (RMSE): {rmse_temperature:.2f}')

# AIC and BIC calculations


def calculate_aic_bic(original, simulated):
n = len(original)
rss = np.sum((original - simulated) ** 2)
k = 2 # number of parameters in the model (simplified)
aic = n * np.log(rss/n) + 2 * k
bic = n * np.log(rss/n) + k * np.log(n)
return aic, bic

aic_rainfall, bic_rainfall = calculate_aic_bic(data['Rainfall'],


samples_normal_df['Rainfall'])
aic_temperature, bic_temperature = calculate_aic_bic(data['Temperature'],
samples_normal_df['Temperature'])

# Print AIC and BIC


print(f'\nAIC and BIC for Normal Copula:')
print(f'AIC - Rainfall: {aic_rainfall:.2f}, BIC: {bic_rainfall:.2f}')
print(f'AIC - Temperature: {aic_temperature:.2f}, BIC: {bic_temperature:.2f}')

# Print parameters of the copula


print(f'\nNormal Copula Parameters:')
print(f'Correlation Matrix:\n{correlation_matrix}')

You might also like