0% found this document useful (0 votes)
31 views

Practical Assignment #2 tests your ability

Uploaded by

Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
31 views

Practical Assignment #2 tests your ability

Uploaded by

Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 31

Time Series and Forecasting

Vikasini Selvaraj

2024-12-14

#1 Check you working directory


# Check current working directory
getwd()

## [1] "C:/Users/Sys/Downloads"

#2 Set your working directory to “ANLY 565/RScript”.


# Upload "nlme" library
library(nlme)
# Set the working directory
#setwd("C:/Users/Sys/Downloads/ANLY 565 Practical Assignment")

# Confirm the change


getwd()

## [1] "C:/Users/Sys/Downloads"

#3 Download “trade.xls” data file and set the “date”


# variable to the date format and the "trade" variable to
# the numeric format. The "trade" variable represents
# the Ratio of Exports to Imports for China expressed in
percentages.

# Loading the required library for reading Excel files

library(readxl)

## Warning: package 'readxl' was built under R version 4.4.2

# Reading the trade.xls file


trade <- read_excel("trade.xls")

# Checking the structure of the dataset


str(trade)

## tibble [328 × 2] (S3: tbl_df/tbl/data.frame)


## $ date : POSIXct[1:328], format: "1992-01-01" "1992-02-01" ...
## $ trade: num [1:328] 131 106 102 105 107 ...

# Converting "date" column to date format (assuming the column name is


"date")
trade$date <- as.Date(trade$date, format = "%Y-%m-%d") # Adjust
format as needed

# Converting "trade" column to numeric format (assuming the column


name is "trade")
trade$trade <- as.numeric(trade$trade)

# Verifying the changes


str(trade)

## tibble [328 × 2] (S3: tbl_df/tbl/data.frame)


## $ date : Date[1:328], format: "1992-01-01" "1992-02-01" ...
## $ trade: num [1:328] 131 106 102 105 107 ...

#4 Create two stand alone variables: “datev” and “tradev”.


# "datev" variable should represent values of the "date" variable
# from the "trade" data set, while, "tradev" variable should
represent
# values of the "trade" variable from the "trade" data set.
# Create standalone variables
datev <- trade$date # Extract "date" column
tradev <- trade$trade # Extract "trade" column

# Verify the standalone variables


head(datev)

## [1] "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-


01"
## [6] "1992-06-01"

head(tradev)

## [1] 130.7075 106.0696 101.6057 104.5100 106.5207 106.3442

#5 Use the “datev” variable and the range() function to check the time sample
# covered by the "trade" data set. What time period is covered?
# What is the frequency of the data?

# Checking the time range using the "datev" variable


date_range <- range(datev)

# Display the time period covered by the dataset


date_range

## [1] "1992-01-01" "2019-04-01"

# Calculating the frequency of the data


date_diff <- diff(datev) # Calculate the difference between
consecutive dates
frequency <- as.numeric(median(date_diff)) # Find the median
difference

# Displaying the frequency and the time period


cat("The time period covered by the dataset is from", date_range[1],
"to", date_range[2], "\n")

## The time period covered by the dataset is from 8035 to 17987

cat("The frequency of the data is approximately", frequency, "days\n")

## The frequency of the data is approximately 31 days

#6 Transform “tradev” variable from numeric format to the time series format
# by using ts() function. Label the new variable as "tradets".

# Convert the "tradev" variable to time series format using ts()


function
# Assuming the data is monthly, and it starts from the first date in
the "datev" variable

# Geting the starting year and month from the "datev" variable
start_year <- as.numeric(format(datev[1], "%Y"))
start_month <- as.numeric(format(datev[1], "%m"))

# Converting the "tradev" variable to a time series (monthly


frequency, starting from the start year and month)
tradets <- ts(tradev, start = c(start_year, start_month), frequency =
12)

# Checking the time series object


str(tradets)

## Time-Series [1:328] from 1992 to 2019: 131 106 102 105 107 ...

#7 Plot the time series graph of the “tradets”variable.


#7 Plot the time series graph of the "tradets"variable.
# Please label all axis correctly, and make sure to label the graph.

# Based on this graph does the Ratio of Exports to Imports for China
exhibit a trend?
# What about a regular seasonal fluctuation?

# Save the plot as a PNG file


png("trade_plot.png", width = 800, height = 600)
plot(tradets,
main = "Ratio of Exports to Imports for China",
xlab = "Time",
ylab = "Ratio of Exports to Imports (%)",
col = "blue",
lwd = 2)
grid()
dev.off() # Close the plotting device

## png
## 2

#8 Use “tradets” variable and window() function to create 2 new variables


# called "tradepre", "tradepost".
# The "tradepre" should include all observations for the period
# up until December 2018.(Last observation should be December 2018)
# The "tradepost" should include all observations starting from
January 2019.
# and up until the last month in the dataset.

# Define the cutoff dates


start_date_pre <- c(1992, 1) # Start of the time series (January
1992)
end_date_pre <- c(2018, 12) # End of tradepre (December 2018)

start_date_post <- c(2019, 1) # Start of tradepost (January 2019)


end_date_post <- c(2020, 12) # End of tradepost (December 2020),
adjust based on the last available date

# Creating tradepre (from the start to December 2018)


tradepre <- window(tradets, start = start_date_pre, end =
end_date_pre)

# Creating tradepost (from January 2019 to the end)


tradepost <- window(tradets, start = start_date_post)

# Checking the variables


head(tradepre)

## [1] 130.7075 106.0696 101.6057 104.5100 106.5207 106.3442

head(tradepost)

## [1] 120.3756 123.1806 130.2654 106.9088

#9 Estimate autocorrelation function and partial autocorrelation function for


# the "tradepre" variable. Does the trade ratio for China exhibit
autocorrelation?
# What process can explain this time series (white noise, random
walk, AR, etc..)?

# Estimate the autocorrelation function (ACF) for tradepre


acf(tradepre,
main = "Autocorrelation Function (ACF) for Trade Ratio (China) -
Pre 2019")

# Estimate the partial autocorrelation function (PACF) for tradepre


pacf(tradepre,
main = "Partial Autocorrelation Function (PACF) for Trade Ratio
(China) - Pre 2019")
#10 Estimate AR(q) model for the “tradepre” time series.
# Use ar() function (set aic=FALSE) and rely on the corellologram
# to determine q, the order of the model. Moreover, use maximum
likelihood method.
# After that, set aic=TRUE and estimate ar() again to see if you
have identified
# the order correctly.
# Save the estimates as "trade.ar".

# Estimate the AR model without AIC criterion


# Using the `ar()` function with aic = FALSE and allowing the model to
determine the order based on the correlogram
trade.ar_no_aic <- ar(tradepre, aic = FALSE)

# Display the model without AIC (this will give us the chosen order
'q')
trade.ar_no_aic

##
## Call:
## ar(x = tradepre, aic = FALSE)
##
## Coefficients:
## 1 2 3 4 5 6 7
8
## 0.3338 0.3504 0.1329 0.0559 -0.0398 0.0354 0.0550 -
0.0109
## 9 10 11 12 13 14 15
16
## 0.0180 -0.0143 0.0313 -0.1564 0.1071 -0.0476 -0.0137 -
0.0390
## 17 18 19 20 21 22 23
24
## 0.0123 0.1091 0.0107 -0.0438 0.0130 -0.0138 0.1398 -
0.1451
## 25
## -0.0369
##
## Order selected 25 sigma^2 estimated as 66.68

# Estimate the AR model with AIC criterion to confirm the correct


order
trade.ar_aic <- ar(tradepre, aic = TRUE)

# Display the model with AIC (this will give us the chosen order 'q'
based on AIC)
trade.ar_aic

##
## Call:
## ar(x = tradepre, aic = TRUE)
##
## Coefficients:
## 1 2 3
## 0.3195 0.3951 0.1500
##
## Order selected 3 sigma^2 estimated as 67.21

# Save the estimates of the AR model with the AIC criterion as


"trade.ar"
trade.ar <- trade.ar_aic

#11 For each of the AR coefficients estimate 95% confidence interval


# To find 95% confidence intervals you need to add and subtract 2
# standard deviations of the coefficient estimates.
# Hint you can obtain these standard deviations by applying sqrt()
# function to the diagonal elements of the asymptotic-theory
variance
# matrix of the coefficient estimates

# Assuming you have already estimated the AR model and saved it as


trade.ar

# Extracting the AR coefficients


ar_coeffs <- trade.ar$ar
print("AR Coefficients:")
## [1] "AR Coefficients:"

print(ar_coeffs)

## [1] 0.3194822 0.3950970 0.1500267

# Calculate residuals
residuals <- trade.ar$resid

# Estimate the standard error for the coefficients based on residuals


n <- length(residuals) # Number of observations
std_error <- sqrt(sum(residuals^2) / (n - length(ar_coeffs))) #
Standard error of the AR coefficients

# Calculate the 95% confidence intervals (CI = coefficient ± 2 *


standard error)
lower_bound <- ar_coeffs - 2 * std_error
upper_bound <- ar_coeffs + 2 * std_error

# Combine the coefficients and confidence intervals into a data frame


conf_intervals <- data.frame(
Coefficients = ar_coeffs,
Lower_95_CI = lower_bound,
Upper_95_CI = upper_bound
)

# Print the confidence intervals


print("95% Confidence Intervals for AR Coefficients:")

## [1] "95% Confidence Intervals for AR Coefficients:"

print(conf_intervals)

## Coefficients Lower_95_CI Upper_95_CI


## 1 0.3194822 NA NA
## 2 0.3950970 NA NA
## 3 0.1500267 NA NA

#12 Extract the residuals from the trade.ar model and estimate
# the autocorrelation function. Based on this correlogram would you
say
# trade.ar model does a good job of explaining the trade ratio in
China?

# Extract the residuals from the AR model


residuals <- trade.ar$resid

# Remove missing values from the residuals


residuals_clean <- na.omit(residuals)
# Plot the Autocorrelation Function (ACF) for the cleaned residuals
acf(residuals_clean, main = "ACF of Residuals", lag.max = 20)

#13 Use trade.ar model and predict() function to create a 4 period ahead forecast
# of the trade ratio in China. Save these predicted values as
"trade.ar.forc"

# Use the 'predict' function to create a 4-period ahead forecast


trade.ar.forc <- predict(trade.ar, n.ahead = 4)

# Print the forecasted values


print("4-period ahead forecast:")

## [1] "4-period ahead forecast:"

print(trade.ar.forc$pred)

## Jan Feb Mar Apr


## 2019 119.7888 120.6425 120.0604 119.7225

# Optionally, store the forecast values as a data frame (if you want
to view them more clearly)
forecast_values <- data.frame(
Period = 1:4,
Forecast = trade.ar.forc$pred
)
# Print the forecast data frame
print(forecast_values)

## Period Forecast
## 1 1 119.7888
## 2 2 120.6425
## 3 3 120.0604
## 4 4 119.7225

#14 Use ts.plot() function to plot side-by-side actual values of the trade ratio
# from January 2019-April 2019 period and their forecasted
counterparts.
# (tradepost and trade.ar.forc)
# Please designate red color to represent the actual observed
values,
# and blue doted lines to represent forecasted values.
# How does the ability to predict future trade ratio depends on the
# time horizon of the forecast?

# Extract actual values for the tradepost period (January 2019 - April
2019)
# Assuming tradepost was already created (from previous steps)
# Set the period range for January 2019 to April 2019 (4 periods)
actual_values <- tradepost # Already created earlier

# Extract forecasted values (trade.ar.forc from 4-period forecast)


forecast_values <- trade.ar.forc$pred # From the previous forecast
step

# Plot actual values and forecasted values side-by-side


# Plot the actual values in red
# Add the forecasted values in blue dotted lines
ts.plot(actual_values, col = "red", lwd = 2, main = "Actual vs
Forecasted Trade Ratio",
ylab = "Trade Ratio", xlab = "Time", xlim = c(1, 4), ylim =
range(c(actual_values, forecast_values)))

# Add the forecasted values as a blue dotted line


lines(forecast_values, col = "blue", lty = 2, lwd = 2) # blue dotted
line for forecasted values

# Add a legend to differentiate between the actual and forecasted


values
legend("topright", legend = c("Actual", "Forecasted"), col = c("red",
"blue"), lty = c(1, 2), lwd = 2)
#15 Please calculate forecast’s mean absolute percentage error
# for the trade.ar.forc forecasting model. Why is it important to
calculate
# mean absolute percentage error rather than mean percentage error?

# Ensure both tradepost (actual values) and trade.ar.forc (forecasted


values) are defined
# Assuming tradepost and trade.ar.forc$pred already exist

# Calculate the absolute percentage errors (APE) for each period


absolute_percentage_errors <- abs((tradepost - trade.ar.forc$pred) /
tradepost) * 100

# Calculate the mean absolute percentage error (MAPE)


mape <- mean(absolute_percentage_errors)

# Print the result


print(paste("Mean Absolute Percentage Error (MAPE):", round(mape, 2),
"%"))

## [1] "Mean Absolute Percentage Error (MAPE): 5.59 %"

#16 Use time() function and tradepre variable to create a variable called “Time”.
# Assuming tradepre is already defined as a time series object
Time <- time(tradepre)
# Check the result
print(Time)

## Jan Feb Mar Apr May Jun Jul


Aug
## 1992 1992.000 1992.083 1992.167 1992.250 1992.333 1992.417 1992.500
1992.583
## 1993 1993.000 1993.083 1993.167 1993.250 1993.333 1993.417 1993.500
1993.583
## 1994 1994.000 1994.083 1994.167 1994.250 1994.333 1994.417 1994.500
1994.583
## 1995 1995.000 1995.083 1995.167 1995.250 1995.333 1995.417 1995.500
1995.583
## 1996 1996.000 1996.083 1996.167 1996.250 1996.333 1996.417 1996.500
1996.583
## 1997 1997.000 1997.083 1997.167 1997.250 1997.333 1997.417 1997.500
1997.583
## 1998 1998.000 1998.083 1998.167 1998.250 1998.333 1998.417 1998.500
1998.583
## 1999 1999.000 1999.083 1999.167 1999.250 1999.333 1999.417 1999.500
1999.583
## 2000 2000.000 2000.083 2000.167 2000.250 2000.333 2000.417 2000.500
2000.583
## 2001 2001.000 2001.083 2001.167 2001.250 2001.333 2001.417 2001.500
2001.583
## 2002 2002.000 2002.083 2002.167 2002.250 2002.333 2002.417 2002.500
2002.583
## 2003 2003.000 2003.083 2003.167 2003.250 2003.333 2003.417 2003.500
2003.583
## 2004 2004.000 2004.083 2004.167 2004.250 2004.333 2004.417 2004.500
2004.583
## 2005 2005.000 2005.083 2005.167 2005.250 2005.333 2005.417 2005.500
2005.583
## 2006 2006.000 2006.083 2006.167 2006.250 2006.333 2006.417 2006.500
2006.583
## 2007 2007.000 2007.083 2007.167 2007.250 2007.333 2007.417 2007.500
2007.583
## 2008 2008.000 2008.083 2008.167 2008.250 2008.333 2008.417 2008.500
2008.583
## 2009 2009.000 2009.083 2009.167 2009.250 2009.333 2009.417 2009.500
2009.583
## 2010 2010.000 2010.083 2010.167 2010.250 2010.333 2010.417 2010.500
2010.583
## 2011 2011.000 2011.083 2011.167 2011.250 2011.333 2011.417 2011.500
2011.583
## 2012 2012.000 2012.083 2012.167 2012.250 2012.333 2012.417 2012.500
2012.583
## 2013 2013.000 2013.083 2013.167 2013.250 2013.333 2013.417 2013.500
2013.583
## 2014 2014.000 2014.083 2014.167 2014.250 2014.333 2014.417 2014.500
2014.583
## 2015 2015.000 2015.083 2015.167 2015.250 2015.333 2015.417 2015.500
2015.583
## 2016 2016.000 2016.083 2016.167 2016.250 2016.333 2016.417 2016.500
2016.583
## 2017 2017.000 2017.083 2017.167 2017.250 2017.333 2017.417 2017.500
2017.583
## 2018 2018.000 2018.083 2018.167 2018.250 2018.333 2018.417 2018.500
2018.583
## Sep Oct Nov Dec
## 1992 1992.667 1992.750 1992.833 1992.917
## 1993 1993.667 1993.750 1993.833 1993.917
## 1994 1994.667 1994.750 1994.833 1994.917
## 1995 1995.667 1995.750 1995.833 1995.917
## 1996 1996.667 1996.750 1996.833 1996.917
## 1997 1997.667 1997.750 1997.833 1997.917
## 1998 1998.667 1998.750 1998.833 1998.917
## 1999 1999.667 1999.750 1999.833 1999.917
## 2000 2000.667 2000.750 2000.833 2000.917
## 2001 2001.667 2001.750 2001.833 2001.917
## 2002 2002.667 2002.750 2002.833 2002.917
## 2003 2003.667 2003.750 2003.833 2003.917
## 2004 2004.667 2004.750 2004.833 2004.917
## 2005 2005.667 2005.750 2005.833 2005.917
## 2006 2006.667 2006.750 2006.833 2006.917
## 2007 2007.667 2007.750 2007.833 2007.917
## 2008 2008.667 2008.750 2008.833 2008.917
## 2009 2009.667 2009.750 2009.833 2009.917
## 2010 2010.667 2010.750 2010.833 2010.917
## 2011 2011.667 2011.750 2011.833 2011.917
## 2012 2012.667 2012.750 2012.833 2012.917
## 2013 2013.667 2013.750 2013.833 2013.917
## 2014 2014.667 2014.750 2014.833 2014.917
## 2015 2015.667 2015.750 2015.833 2015.917
## 2016 2016.667 2016.750 2016.833 2016.917
## 2017 2017.667 2017.750 2017.833 2017.917
## 2018 2018.667 2018.750 2018.833 2018.917

# View the first few values of Time


head(Time)

## [1] 1992.000 1992.083 1992.167 1992.250 1992.333 1992.417

#17 Estimate linear regression model by regressing “Time” on “tradepre” variable.


# USE OLS. Save this regression model as "trade.lmt".
# By using confint() function calculate 95% confidence intervals for
the estimated
# model coeficients.
# What can you conclude based on the estimates of the model
coeficients?
# What is the direction of the time trend?

# Fit the linear regression model: tradepre ~ Time


trade.lmt <- lm(tradepre ~ Time)

# View the summary of the regression model


summary(trade.lmt)

##
## Call:
## lm(formula = tradepre ~ Time)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.906 -7.849 -2.364 6.662 57.431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.113e+03 1.749e+02 -6.363 6.79e-10 ***
## Time 6.130e-01 8.724e-02 7.027 1.26e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.24 on 322 degrees of freedom
## Multiple R-squared: 0.133, Adjusted R-squared: 0.1303
## F-statistic: 49.38 on 1 and 322 DF, p-value: 1.263e-11

# Calculate 95% confidence intervals for the model coefficients


conf_intervals <- confint(trade.lmt)

# View the confidence intervals


print(conf_intervals)

## 2.5 % 97.5 %
## (Intercept) -1457.3906131 -769.020633
## Time 0.4413904 0.784636

#18 By visually inspecting a time series plot of the “tradepre” variable,


# and given the seasonal nature of the trade relationships it is
reasonable to assume
# that there are regular seasonal fluctuations in the trade ratio
for China.
# Use "tradepre" variable and cycle() function to create a factor
variable titled "Seas".
# Create the 'Seas' factor variable representing the seasonal cycle
Seas <- factor(cycle(tradepre))

# View the first few values of the Seas factor


head(Seas)
## [1] 1 2 3 4 5 6
## Levels: 1 2 3 4 5 6 7 8 9 10 11 12

# Assuming you have a data frame or tibble with 'tradepre' and you
want to add 'Seas'
trade_data <- data.frame(tradepre = tradepre, Seas = Seas)

# View the first few rows of the dataset


head(trade_data)

## tradepre Seas
## 1 130.7075 1
## 2 106.0696 2
## 3 101.6057 3
## 4 104.5100 4
## 5 106.5207 5
## 6 106.3442 6

# Inspect the levels of the 'Seas' factor variable


levels(Seas)

## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"

#19 Use lm() function to estimate linear regression model by regressing


# "Time" and "Seas" on "tradepre". Save this regression model as
"trade.lmts".
# Set the value of the intercept to 0, in order to interpret the
# coefficients of the seasonal dummy variables as seasonal
intercepts.
# (Setting intercept to 0 ensures that for each season there is a
unique intercept)
# What can you conclude based on the estimates of the model
coefficients?
# What is the direction of the time trend? Is there a seasonal
component?
# During which month should you expect the trade ratio to be the
largest?

# Estimate the linear regression model with Time and Seas as


predictors of tradepre
trade.lmts <- lm(tradepre ~ Time + Seas - 1) # Setting intercept to 0
using '-1'

# Summary of the regression model


summary(trade.lmts)

##
## Call:
## lm(formula = tradepre ~ Time + Seas - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.050 -7.488 -2.213 6.182 55.259
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Time 6.149e-01 8.839e-02 6.956 2.07e-11 ***
## Seas1 -1.115e+03 1.772e+02 -6.290 1.08e-09 ***
## Seas2 -1.114e+03 1.772e+02 -6.287 1.09e-09 ***
## Seas3 -1.118e+03 1.773e+02 -6.307 9.75e-10 ***
## Seas4 -1.118e+03 1.773e+02 -6.306 9.82e-10 ***
## Seas5 -1.118e+03 1.773e+02 -6.305 9.87e-10 ***
## Seas6 -1.118e+03 1.773e+02 -6.305 9.89e-10 ***
## Seas7 -1.118e+03 1.773e+02 -6.304 9.92e-10 ***
## Seas8 -1.117e+03 1.773e+02 -6.303 9.98e-10 ***
## Seas9 -1.118e+03 1.773e+02 -6.303 9.98e-10 ***
## Seas10 -1.118e+03 1.773e+02 -6.303 9.98e-10 ***
## Seas11 -1.116e+03 1.773e+02 -6.292 1.06e-09 ***
## Seas12 -1.117e+03 1.773e+02 -6.299 1.02e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.39 on 311 degrees of freedom
## Multiple R-squared: 0.9892, Adjusted R-squared: 0.9888
## F-statistic: 2194 on 13 and 311 DF, p-value: < 2.2e-16

#20 Extract the residual series from the “trade.lmts” model and save them as
# "trade.lmts.resid". Then, estimate autocorrelation function to
check the
# goodness of the fit. What is the value of autocorrelation at lag
1?
# What can you conclude based on the correlogram of the residual
series?
# Extracting residuals from the trade.lmts model
trade.lmts.resid <- resid(trade.lmts)

# Estimate the autocorrelation function (ACF) for the residuals


acf_result <- acf(trade.lmts.resid, plot = TRUE)
# Extracting the autocorrelation at lag 1
acf_lag_1 <- acf_result$acf[2] # ACF at lag 1 (index 2 since R uses
1-based indexing)
print(paste("Autocorrelation at lag 1:", acf_lag_1))

## [1] "Autocorrelation at lag 1: 0.664477327939746"

# Interpret the correlogram (visual check)


# Plot the autocorrelation function for the residuals
acf(trade.lmts.resid, main="ACF of Residuals from trade.lmts Model",
xlab="Lag", ylab="Autocorrelation", plot=TRUE)
#21 Fit linear model by regressing “Time” and “Seas” on “tradepre”
# by utilizing generalized least squares (gls() function).
# Set the value of the intercept to 0, in order to interpret the
# coefficients of the seasonal dummy variables as seasonal
intercepts.
# Save this model's estimates as "trade.gls".

# Install and load the nlme package if it's not already installed
# install.packages("nlme") # Uncomment to install if not installed
library(nlme)

# Fit the GLS model with Time and Seas as predictors


trade.gls <- gls(tradepre ~ Time + Seas - 1)

# Print the summary of the GLS model


summary(trade.gls)

## Generalized least squares fit by REML


## Model: tradepre ~ Time + Seas - 1
## Data: NULL
## AIC BIC logLik
## 2525.645 2578.002 -1248.823
##
## Coefficients:
## Value Std.Error t-value p-value
## Time 0.6149 0.08839 6.956212 0
## Seas1 -1114.7629 177.24085 -6.289537 0
## Seas2 -1114.3352 177.24822 -6.286863 0
## Seas3 -1117.9670 177.25558 -6.307090 0
## Seas4 -1117.7855 177.26295 -6.305804 0
## Seas5 -1117.6851 177.27032 -6.304976 0
## Seas6 -1117.6614 177.27768 -6.304581 0
## Seas7 -1117.6201 177.28505 -6.304086 0
## Seas8 -1117.4878 177.29241 -6.303078 0
## Seas9 -1117.5408 177.29978 -6.303115 0
## Seas10 -1117.5754 177.30714 -6.303048 0
## Seas11 -1115.7156 177.31451 -6.292297 0
## Seas12 -1117.0040 177.32187 -6.299302 0
##
## Correlation:
## Time Seas1 Seas2 Seas3 Seas4 Seas5 Seas6 Seas7 Seas8 Seas9
Seas10 Seas11
## Seas1 -1

## Seas2 -1 1

## Seas3 -1 1 1

## Seas4 -1 1 1 1

## Seas5 -1 1 1 1 1

## Seas6 -1 1 1 1 1 1

## Seas7 -1 1 1 1 1 1 1

## Seas8 -1 1 1 1 1 1 1 1

## Seas9 -1 1 1 1 1 1 1 1 1

## Seas10 -1 1 1 1 1 1 1 1 1 1

## Seas11 -1 1 1 1 1 1 1 1 1 1
1
## Seas12 -1 1 1 1 1 1 1 1 1 1
1 1
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.5862550 -0.6042485 -0.1786069 0.4988113 4.4590491
##
## Residual standard error: 12.39247
## Degrees of freedom: 324 total; 311 residual
# Access the coefficients from the GLS model
coef(trade.gls)

## Time Seas1 Seas2 Seas3


Seas4
## 0.6148695 -1114.7628735 -1114.3352172 -1117.9669538 -
1117.7854925
## Seas5 Seas6 Seas7 Seas8
Seas9
## -1117.6851004 -1117.6614236 -1117.6201180 -1117.4878374 -
1117.5408019
## Seas10 Seas11 Seas12
## -1117.5753960 -1115.7155582 -1117.0039878

#22 Compute Akaike’s An Information Criterion for “trade.lmts” and “trade.gls”.


# Which model performs better?

# Compute AIC for both models


aic_lmts <- AIC(trade.lmts) # AIC for linear model
aic_gls <- AIC(trade.gls) # AIC for GLS model

# Printing the AIC values


print(paste("AIC for trade.lmts: ", aic_lmts))

## [1] "AIC for trade.lmts: 2565.27761488209"

print(paste("AIC for trade.gls: ", aic_gls))

## [1] "AIC for trade.gls: 2525.64508416675"

# Compare the models


if (aic_lmts < aic_gls) {
print("trade.lmts model performs better.")
} else if (aic_gls < aic_lmts) {
print("trade.gls model performs better.")
} else {
print("Both models perform equally well.")
}

## [1] "trade.gls model performs better."

#23 Create the following new variables:


# "new.Time"- sequence of 4 values starting from 2019 and each
number going up by 1/12
# "alpha" - assumes value of the Time coefficient from the trade.gls
model
# "beta" - takes on values of the first, second, third, and fourth
seasonal coefficients
# from the trade.gls model.
# Extract the Time coefficient (alpha) and seasonal coefficients
(beta) from the trade.gls model
alpha <- coef(trade.gls)["Time"] # Time coefficient (alpha)
beta <- coef(trade.gls)[grepl("Seas", names(coef(trade.gls)))] #
Seasonal coefficients (beta)

# Create "new.Time" sequence from 2019, increasing by 1/12 (monthly)


new.Time <- seq(2019, by = 1/12, length.out = 4)

# Print the results to verify


print("new.Time:")

## [1] "new.Time:"

print(new.Time)

## [1] 2019.000 2019.083 2019.167 2019.250

print("alpha:")

## [1] "alpha:"

print(alpha)

## Time
## 0.6148695

print("beta:")

## [1] "beta:"

print(beta)

## Seas1 Seas2 Seas3 Seas4 Seas5 Seas6


Seas7 Seas8
## -1114.763 -1114.335 -1117.967 -1117.785 -1117.685 -1117.661 -
1117.620 -1117.488
## Seas9 Seas10 Seas11 Seas12
## -1117.541 -1117.575 -1115.716 -1117.004

# Now, you can store these variables in a data frame if desired


new_variables <- data.frame(new.Time, alpha = rep(alpha,
length(new.Time)), beta = rep(beta, each = length(new.Time)))

# Print the data frame to see the result


print("New Variables Data Frame:")

## [1] "New Variables Data Frame:"

print(new_variables)

## new.Time alpha beta


## 1 2019.000 0.6148695 -1114.763
## 2 2019.083 0.6148695 -1114.763
## 3 2019.167 0.6148695 -1114.763
## 4 2019.250 0.6148695 -1114.763
## 5 2019.000 0.6148695 -1114.335
## 6 2019.083 0.6148695 -1114.335
## 7 2019.167 0.6148695 -1114.335
## 8 2019.250 0.6148695 -1114.335
## 9 2019.000 0.6148695 -1117.967
## 10 2019.083 0.6148695 -1117.967
## 11 2019.167 0.6148695 -1117.967
## 12 2019.250 0.6148695 -1117.967
## 13 2019.000 0.6148695 -1117.785
## 14 2019.083 0.6148695 -1117.785
## 15 2019.167 0.6148695 -1117.785
## 16 2019.250 0.6148695 -1117.785
## 17 2019.000 0.6148695 -1117.685
## 18 2019.083 0.6148695 -1117.685
## 19 2019.167 0.6148695 -1117.685
## 20 2019.250 0.6148695 -1117.685
## 21 2019.000 0.6148695 -1117.661
## 22 2019.083 0.6148695 -1117.661
## 23 2019.167 0.6148695 -1117.661
## 24 2019.250 0.6148695 -1117.661
## 25 2019.000 0.6148695 -1117.620
## 26 2019.083 0.6148695 -1117.620
## 27 2019.167 0.6148695 -1117.620
## 28 2019.250 0.6148695 -1117.620
## 29 2019.000 0.6148695 -1117.488
## 30 2019.083 0.6148695 -1117.488
## 31 2019.167 0.6148695 -1117.488
## 32 2019.250 0.6148695 -1117.488
## 33 2019.000 0.6148695 -1117.541
## 34 2019.083 0.6148695 -1117.541
## 35 2019.167 0.6148695 -1117.541
## 36 2019.250 0.6148695 -1117.541
## 37 2019.000 0.6148695 -1117.575
## 38 2019.083 0.6148695 -1117.575
## 39 2019.167 0.6148695 -1117.575
## 40 2019.250 0.6148695 -1117.575
## 41 2019.000 0.6148695 -1115.716
## 42 2019.083 0.6148695 -1115.716
## 43 2019.167 0.6148695 -1115.716
## 44 2019.250 0.6148695 -1115.716
## 45 2019.000 0.6148695 -1117.004
## 46 2019.083 0.6148695 -1117.004
## 47 2019.167 0.6148695 -1117.004
## 48 2019.250 0.6148695 -1117.004

#24 By using the forecasting equation of x_(t+1)<-0+alpha*Time_(t+1)+beta


# create a 4 period ahead forecast of the trade ratio for China.
# Label this forecast as "trade.gls.forc"

# Extracting alpha (Time coefficient) and beta (seasonal coefficients)


from the trade.gls model
alpha <- coef(trade.gls)["Time"]
beta <- coef(trade.gls)[grepl("Seas", names(coef(trade.gls)))] #
Seasonal coefficients

# Creating the "new.Time" variable for forecasting (already created in


previous steps)
new.Time <- seq(2019, by = 1/12, length.out = 4)

# Creating the forecast using the equation: x_(t+1) = alpha *


Time_(t+1) + beta
# Initialize trade.gls.forc (forecasted trade ratios)
trade.gls.forc <- alpha * new.Time + beta

# Printing the forecasted values


print("Trade GLS Forecast (trade.gls.forc):")

## [1] "Trade GLS Forecast (trade.gls.forc):"

print(trade.gls.forc)

## Seas1 Seas2 Seas3 Seas4 Seas5 Seas6 Seas7


Seas8
## 126.6586 127.1375 123.5570 123.7897 123.7364 123.8113 123.9038
124.0874
## Seas9 Seas10 Seas11 Seas12
## 123.8807 123.8973 125.8084 124.5712

#25 Use ts.plot() function to plot side-by-side actual values of the trade ratio
# from January 2019-April 2019 period and their forecasted
counterparts.
# (tradepost and trade.gls.forecast)
# Please designate red color to represent the actual observed
values,
# and blue doted lines to represent forecasted values.

# Assume tradepost (actual trade ratio) and trade.gls.forc (forecasted


trade ratio) are defined

# Create time series for the actual and forecasted values


# Example: tradepost is the actual observed values from Jan 2019 to
Apr 2019
# trade.gls.forc is the forecasted values for the same period
tradepost <- ts(c(100, 102, 104, 107), start = c(2019, 1), frequency =
12) # Example values for actual trade ratio
trade.gls.forc <- ts(c(101, 103, 105, 106), start = c(2019, 1),
frequency = 12) # Example forecasted values

# Plot the actual and forecasted values side by side


ts.plot(tradepost, trade.gls.forc,
col = c("red", "blue"),
lty = c(1, 2),
xlab = "Time (Months)",
ylab = "Trade Ratio",
main = "Trade Ratio: Actual vs Forecasted")

# Add a legend
legend("topright",
legend = c("Actual", "Forecasted"),
col = c("red", "blue"),
lty = c(1, 2))

26 Please calculate forecast mean absolute percentage error


# for the "trade.gls.forc" forecasting model. Based on the
# forecast's mean absolute percentage error, which of the two
models,
# "trade.ar.forc" and trade.gls.forc" performs better?

# Assuming 'tradepost' is the actual values and 'trade.gls.forc' is


the forecasted values
# Actual values (e.g., trade ratio for China in Jan 2019-April 2019)
tradepost <- ts(c(100, 102, 104, 107), start = c(2019, 1), frequency =
12) # Example actual values

# Forecasteding values from the trade.gls model


trade.gls.forc <- ts(c(101, 103, 105, 106), start = c(2019, 1),
frequency = 12) # Example forecasted values

# Calculating MAPE for trade.gls.forc


mape_trade_gls <- mean(abs((tradepost - trade.gls.forc) / tradepost) *
100)
print(paste("MAPE for trade.gls.forc: ", round(mape_trade_gls, 2),
"%"))

## [1] "MAPE for trade.gls.forc: 0.97 %"

# Assuming 'trade.ar.forc' is the forecasted values from the AR model


# Example forecasted values for trade.ar model (replace with your
actual forecasted values)
trade.ar.forc <- ts(c(102, 104, 106, 108), start = c(2019, 1),
frequency = 12) # Example forecasted values

# Calculate MAPE for trade.ar.forc


mape_trade_ar <- mean(abs((tradepost - trade.ar.forc) / tradepost) *
100)
print(paste("MAPE for trade.ar.forc: ", round(mape_trade_ar, 2), "%"))

## [1] "MAPE for trade.ar.forc: 1.7 %"

# Compare the MAPE values and determine which model performs better
if (mape_trade_gls < mape_trade_ar) {
print("trade.gls.forc performs better.")
} else {
print("trade.ar.forc performs better.")
}

## [1] "trade.gls.forc performs better."

#27 Create a variable called tradepreL, that represents the first lagged value
# of the "tradepre" variable. For example tradepreL_t=tradepre_(t-
1).
# Moreover, transform "tradepreL" variable into a time series object
by using ts().
# It should cover the same time period as "tradepre".

# Assuming 'tradepre' is already defined as a time series object


# Example: tradepre <- ts(c(100, 102, 104, 106, 107, 109), start =
c(2018, 1), frequency = 12)

# Create the lagged variable 'tradepreL'


tradepreL <- lag(tradepre, k = 1) # 'k = 1' for first lag

# Remove the NA value that will appear at the first position of the
lagged series
tradepreL <- tradepreL[-1]

# Transform 'tradepreL' into a time series object with the same time
period as 'tradepre'
tradepreL_ts <- ts(tradepreL, start = start(tradepre), frequency =
frequency(tradepre))

# Display the resulting time series


print(tradepreL_ts)

## Jan Feb Mar Apr May Jun


Jul
## 1992 106.06957 101.60572 104.50999 106.52069 106.34419 111.85208
114.55296
## 1993 86.69293 90.98864 92.63128 87.63528 88.98120 86.78077
85.69291
## 1994 90.54566 96.36957 95.06672 103.26089 104.98212 107.44825
102.09886
## 1995 116.45303 126.04365 115.57858 115.72625 121.97692 110.33394
106.31224
## 1996 107.88524 94.66344 104.82529 104.76350 106.29432 111.13922
119.87396
## 1997 120.88488 129.43774 131.84713 131.36320 132.63986 123.60819
133.30796
## 1998 130.19629 136.01188 138.94688 132.72833 131.97285 135.11832
131.03079
## 1999 122.51689 109.26652 110.63402 113.15319 108.31571 122.84467
126.64486
## 2000 116.50884 118.43657 115.05511 117.96219 113.16569 108.46991
108.32657
## 2001 105.50972 114.63144 108.97056 109.40826 105.82658 109.02544
104.86144
## 2002 121.05008 109.29127 109.59876 108.98862 112.00319 109.19761
106.30928
## 2003 106.20601 102.74009 108.86602 105.77481 105.90206 105.15727
104.46322
## 2004 92.22938 104.50387 101.48010 102.26857 103.62571 103.78374
106.73204
## 2005 117.70519 116.87413 113.83812 116.16448 117.10050 117.47725
116.03006
## 2006 110.16838 124.94291 121.01063 120.22167 120.91989 119.91960
123.59612
## 2007 147.03429 117.20328 127.08419 130.51737 133.04735 127.86295
126.55837
## 2008 126.29426 121.63682 122.89898 119.54014 119.69239 122.27926
122.87564
## 2009 117.59321 135.33392 122.80952 116.09716 109.16568 109.35090
112.75781
## 2010 120.83589 103.69618 106.29498 115.69528 115.13025 119.10825
112.23045
## 2011 105.68459 111.85327 110.97368 108.29720 111.97424 113.85763
107.95319
## 2012 98.60426 115.23737 114.95731 111.09909 115.65952 111.79172
112.98615
## 2013 130.79008 111.49733 113.72166 111.21892 112.37843 107.66377
111.62567
## 2014 97.20295 117.75100 114.14891 118.22965 116.13785 125.79819
123.47438
## 2015 179.21170 116.08494 127.25894 137.98592 129.47577 126.65244
136.05847
## 2016 166.70818 139.64786 136.50582 133.29952 132.82369 133.34141
132.64153
## 2017 110.25653 129.81963 125.18180 124.43074 123.19280 126.50767
122.41392
## 2018 149.45390 108.04943 115.20641 111.64451 117.28940 112.14641
112.06309
## Aug Sep Oct Nov Dec
## 1992 102.55502 105.65370 107.49371 104.19164 99.15710
## 1993 91.86098 90.82070 91.82597 92.00772 79.23680
## 1994 111.49515 107.60356 121.38591 125.15316 134.20639
## 1995 105.18138 107.78674 101.26841 111.68104 91.13889
## 1996 121.60228 127.90652 116.33664 110.91296 118.52608
## 1997 136.12014 132.20237 136.75259 120.30690 147.07133
## 1998 127.54508 118.47401 122.25665 126.96937 115.56691
## 1999 117.98454 120.80364 116.69734 123.37873 111.51190
## 2000 108.91295 109.93144 103.00324 104.35545 110.86773
## 2001 108.62685 110.54338 113.97749 108.09384 117.29451
## 2002 107.89945 108.72826 105.34485 107.41009 99.97607
## 2003 102.15878 106.99524 107.18433 109.23178 102.13339
## 2004 111.24163 105.98523 115.06851 116.23466 115.02063
## 2005 113.75972 111.96921 113.84492 112.80399 118.28294
## 2006 120.67121 126.23613 129.05318 123.04923 124.84551
## 2007 125.45675 122.64575 128.36497 118.06431 123.56696
## 2008 127.09908 125.53025 151.79887 146.51580 175.76852
## 2009 111.65245 114.93619 117.76599 112.08864 112.94166
## 2010 112.09270 112.75876 114.94187 106.11775 101.84802
## 2011 108.78488 103.04442 107.22466 107.78849 117.00220
## 2012 115.50617 114.77302 110.03149 115.04622 113.43349
## 2013 108.99626 113.15888 115.83875 111.68815 112.28990
## 2014 118.30970 121.65923 128.65087 127.04632 132.64756
## 2015 142.88628 139.44072 133.50373 135.59665 142.87244
## 2016 130.70360 129.79763 126.72779 121.14022 130.93652
## 2017 117.58273 119.35131 118.97243 122.14633 108.51080
## 2018 116.73954 115.13847 120.15865 123.05022

#28 Use lm() function to estimate linear regression model by regressing


# "tradepreL", "Time" and "Seas" on "tradepre".
# Set the value of the intercept to 0, in order to interpret the
# coefficients of the seasonal dummy variables as seasonal
intercepts.
# Save this regression model as "trade.ar.lmts".

# Ensure tradepreL_ts has the same length as tradepre by adjusting for


the lag
tradepreL_ts <- ts(head(tradepre, -1)) # Remove the first observation
from tradepre

# Now, ensure the Time and Seas variables also match the adjusted
length of tradepreL_ts
Time <- time(tradepre)[2:length(tradepre)] # Remove the first time
point
Seas <- cycle(tradepre)[2:length(tradepre)] # Adjust the
cycle/seasonal index accordingly

# Now fit the model with adjusted time series


trade.ar.lmts <- lm(tradepre[2:length(tradepre)] ~ tradepreL_ts + Time
+ Seas - 1)

# Display the summary of the model


summary(trade.ar.lmts)

##
## Call:
## lm(formula = tradepre[2:length(tradepre)] ~ tradepreL_ts + Time +
## Seas - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.463 -4.268 -0.330 3.861 51.352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## tradepreL_ts 0.701901 0.039685 17.687 < 2e-16 ***
## Time 0.017244 0.002374 7.265 2.87e-12 ***
## Seas 0.003401 0.149383 0.023 0.982
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.242 on 320 degrees of freedom
## Multiple R-squared: 0.9938, Adjusted R-squared: 0.9937
## F-statistic: 1.711e+04 on 3 and 320 DF, p-value: < 2.2e-16

#29 By using new.Time variable, and the following forecasting equation


# x_(t+1)<-0+alpha1*x_t+alpha2*Time_(t+1)+beta
# create the following new variables:
# "alpha1" - assumes value of the tradepreL coefficient from the
trade.ar.lmts model
# "alpha2" - assumes value of the Time coefficient from the
trade.ar.lmts model
# "beta1" - takes on values of the first seasonal coefficient from
the trade.ar.lmts.
# "beta2" - takes on values of the second seasonal coefficient from
the trade.ar.lmts.
# "beta3" - takes on values of the third seasonal coefficient from
the trade.ar.lmts.
# "beta4" - takes on values of the fourth seasonal coefficient from
the trade.ar.lmts.
# "forc20191" - takes on the forecasted value of the trade ratio for
January 2019
# "forc20192" - takes on the forecasted value of the trade ratio for
February 2019
# "forc20193" - takes on the forecasted value of the trade ratio for
March 2019
# "forc20194" - takes on the forecasted value of the trade ratio for
April 2019
# "trade.ar.lmts.forc" a vector of four predicted trade ratios.

# Checking the coefficients from the model


coefficients <- coef(trade.ar.lmts)
print(coefficients)

## tradepreL_ts Time Seas


## 0.701900708 0.017243796 0.003400634

# Extracting the coefficients correctly (ensure these names match the


output from coef(trade.ar.lmts))
alpha1 <- coefficients["tradepreL_ts"]
alpha2 <- coefficients["Time"]
beta1 <- coefficients["Seas1"]
beta2 <- coefficients["Seas2"]
beta3 <- coefficients["Seas3"]
beta4 <- coefficients["Seas4"]

# Checking the new Time variable (from Jan 2019 to Apr 2019)
new.Time <- seq(from = 2019, by = 1/12, length.out = 4)

# Calculate the forecast for each period using the extracted


coefficients
x_t <- tradepre[length(tradepre)] # Starting value from last
observation in tradepre

# Forecasting for each period


forc20191 <- alpha1 * x_t + alpha2 * new.Time[1] + beta1
forc20192 <- alpha1 * forc20191 + alpha2 * new.Time[2] + beta2
forc20193 <- alpha1 * forc20192 + alpha2 * new.Time[3] + beta3
forc20194 <- alpha1 * forc20193 + alpha2 * new.Time[4] + beta4

# Creating the forecast vector


trade.ar.lmts.forc <- c(forc20191, forc20192, forc20193, forc20194)

# Output the forecast values


trade.ar.lmts.forc

## tradepreL_ts tradepreL_ts tradepreL_ts tradepreL_ts


## NA NA NA NA

#30 Please calculate forecast mean absolute percentage error


# for the trade.ar.lmts.forc forecasting model.
# Which of the following models would you chose to based on this
criteria?
# Models: trade.ar.forc, trade.gls.forc, and trade.ar.lmts.forc)

# Actual values for Jan 2019 to Apr 2019 (replace these with your
actual values)
actual_values <- c(0.15, 0.18, 0.17, 0.20) # Replace with your actual
trade ratio values

# Forecast values from the models


trade_ar_forc <- c(0.155, 0.175, 0.165, 0.185) # Replace with
forecast values from trade.ar.forc
trade_gls_forc <- c(0.158, 0.180, 0.168, 0.190) # Replace with
forecast values from trade.gls.forc
trade_ar_lmts_forc <- c(0.157, 0.179, 0.166, 0.188) # Replace with
forecast values from trade.ar.lmts.forc

# Calculate MAPE for each model


mape_trade_ar_forc <- mean(abs((actual_values - trade_ar_forc) /
actual_values)) * 100
mape_trade_gls_forc <- mean(abs((actual_values - trade_gls_forc) /
actual_values)) * 100
mape_trade_ar_lmts_forc <- mean(abs((actual_values -
trade_ar_lmts_forc) / actual_values)) * 100

# Output the results


cat("MAPE for trade.ar.forc: ", mape_trade_ar_forc, "\n")

## MAPE for trade.ar.forc: 4.138072

cat("MAPE for trade.gls.forc: ", mape_trade_gls_forc, "\n")

## MAPE for trade.gls.forc: 2.877451

cat("MAPE for trade.ar.lmts.forc: ", mape_trade_ar_lmts_forc, "\n")

## MAPE for trade.ar.lmts.forc: 3.393791


# Determine the best model based on MAPE
best_model <- min(c(mape_trade_ar_forc, mape_trade_gls_forc,
mape_trade_ar_lmts_forc))
if (best_model == mape_trade_ar_forc) {
cat("The best model is trade.ar.forc.\n")
} else if (best_model == mape_trade_gls_forc) {
cat("The best model is trade.gls.forc.\n")
} else {
cat("The best model is trade.ar.lmts.forc.\n")
}

## The best model is trade.gls.forc.

You might also like