### Example for TSA Lecture 2
# 1- Linear regression with matrix notation
# 2- forecast (prediction)
# 3- Global trend model notation
# 4- Global trend model iterative updates
# 5- WLS - example with correlated epsilons

# load some packages:
library(fpp2)
library(dplyr)
library(tidyverse)


# load data:
# The "austa" data = Total international visitors to Australia (in millions) 
# from 1980 to 2015. Yearly data (ie one datapoint per year). In total 36 datapoints.

head(austa)
# austa is a "time-series" object, but we will convert to a dataframe

austa_data <- data.frame(int_visit=as.matrix(austa), year=as.matrix(time(austa)))
rownames(austa_data) <- NULL
head(austa_data)

# Now we split the data into test and train:
austa_train <- austa_data %>% filter(., year <= 2005)
austa_test <- austa_data %>% filter(., year > 2005)


### Plot train and test data
ggplot(austa_train, aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_point(data=austa_test, col="blue")


# 1- Linear regression with matrix notation

# we will fit a linear model: Y_i = beta_0 + beta_1 * time_i + epsilon_i
# so we have two parameters: beta_0 and beta_1
nparams <- 2

# we also save the number of observations:
n <- length(austa_train$year)


# X is the "design matrix"
X <- cbind(1, austa_train$year)
print(X)

# y is vector with observations:
y <- cbind(austa_train$int_visit)
print(y)

# to estimate parameters we solve the "normal equations":
(OLS <- solve(t(X)%*%X)%*%t(X)%*%y)
# these are the parameter estimates!
theta_0 <- OLS[1]
theta_1 <- OLS[2]

# now we compute y_hat values for all observations:
yhat_ols <- X%*%OLS
print(yhat_ols)

# plot:
ggplot(austa_train, aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_ols), col="red", size=.5) 


# we will now calculate the standard errors on the parameters beta_0 and beta_1:

# first compute residuals:
e_ols <- y - yhat_ols
print(e_ols)

# calculate sum of squared residuals:
RSS_ols <- t(e_ols)%*%e_ols
print(RSS_ols)

# calculate sigma^2:
sigma2_ols <- as.numeric(RSS_ols/(n - nparams)) # = "Residual variance"
sqrt(sigma2_ols) # = "Residual standard error"

# calculate variance-covariance matrix of _parameters_:
V_ols <- sigma2_ols * solve(t(X) %*% X)
print(V_ols)

# the variances of the parameters are the values in the diagonal:
diag(V_ols)
# and the standard errors are given by:
(sqrt(diag(V_ols))) 

se_theta_0 <- (sqrt(diag(V_ols)))[1]
se_theta_1 <- (sqrt(diag(V_ols)))[2]

# now we have both point estimates and standard errors:
# intercept:
theta_0
se_theta_0
# slope:
theta_1
se_theta_1


# 2- forecast (prediction)

# now we use the model for predictions on future timepoints
# we use the timepoints from the testdata:
Xtest <- cbind(1, austa_test$year)
print(Xtest)

# compute predictions 
y_pred <- Xtest%*%OLS
print(y_pred)

# compute prediction variance-covariance matrix:
Vmatrix_pred <- sigma2_ols*(1+(Xtest%*%solve(t(X)%*%X))%*%t(Xtest))

# the variances of individual predictions are in the diagonal of the matrix above

# compute "prediction intervals"
y_pred_lwr <- y_pred - 1.96*sqrt(diag(Vmatrix_pred))
y_pred_upr <- y_pred + 1.96*sqrt(diag(Vmatrix_pred))


# plot forecast:
ggplot(austa_train, aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_ols), col="red", size=.5) +
  geom_point(data=austa_test, aes(x=year,y=y_pred), col="red", size=.5) +
  geom_ribbon(data=austa_test, aes(x=year,ymin=y_pred_lwr, ymax=y_pred_upr), inherit.aes=FALSE, alpha=0.2, fill="red")

# plot WITH test data:
ggplot(austa_train, aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_ols), col="red", size=.5) +
  geom_point(data=austa_test, aes(x=year,y=y_pred), col="red", size=.5) +
  geom_ribbon(data=austa_test, aes(x=year,ymin=y_pred_lwr, ymax=y_pred_upr), inherit.aes=FALSE, alpha=0.2, fill="red") +
  geom_point(data=austa_test, aes(x=year,y=int_visit), col="blue", size=.5)


# 3- Global trend model notation

# total number of observations (in train data) is 26:
print(n)

# we will now make a global trend model from the reference point at timepoint 26

# instead of our original x-values, which were:
austa_train$year

# we will now make the new x-values:
austa_train$x_26 <- seq(-(n-1), 0, 1)
print(austa_train$x_26)

# make design matrix for the trend model 
X_N <- cbind(1, austa_train$x_26)
print(X_N)

# compute F_N (= F_26)
F_N <- t(X_N)%*%X_N

# compute h_N (= h_26)
h_N <- t(X_N)%*%y

# estimate of parameters at timepoint 26:
theta_N <- solve(F_N)%*%h_N
print(theta_N)
# [1,] 5.115330
# [2,] 0.188649

# these estimates give the intercept at timepoint 26 and slope (in units of 1 per timepoint, i.e., one per year)

# lets compute yhat values:
yhat_N <- X_N%*%theta_N

# plot:
ggplot(austa_train, aes(x=x_26, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_N), col="red", size=.5) 

# its the same model - but scale of x-axis is changed, so parameters correspond to latest timepoint

# predictions (forecast) with trend model notation:
f <- function(j) rbind(1, j)
f(0)
f(1)
f(2)

# now make predictions for next 10 timepoints:
y_pred_N <- t(f(1:10)) %*% theta_N
 
# also calculate prediction intervals:
e_N <- y - yhat_N
RSS <-  t(e_N)%*%e_N
sigma_2 <- as.numeric(RSS/(n-nparams))

y_pred_N_lwr <- y_pred_N - 1.96*sqrt(sigma_2)*sqrt(1+diag(t(f(1:10))%*%solve(F_N)%*%f(1:10)))
y_pred_N_upr <- y_pred_N + 1.96*sqrt(sigma_2)*sqrt(1+diag(t(f(1:10))%*%solve(F_N)%*%f(1:10)))

# save predictions in dataframe in order to plot:
austa_test$x_26 <- 1:10
austa_test$y_pred_N <- y_pred_N

# plot:
ggplot(austa_train, aes(x=x_26, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_N), col="red", size=.5) +
  geom_point(data=austa_test, aes(x=x_26, y=y_pred_N), col="red", size=.5) + 
  geom_ribbon(data=austa_test, aes(x=x_26,ymin=y_pred_N_lwr, ymax=y_pred_N_upr), inherit.aes=FALSE, alpha=0.2, fill="red") +
  geom_point(data=austa_test, aes(x=x_26,y=int_visit), col="blue", size=.5)


# 4- Global trend model iterative updates

L <- matrix(c(1.,0., 1.,1.),
            byrow=TRUE, nrow=2)
print(L)
Linv <- solve(L)

# now we get the next observation (number 27) and update the data
austa_train_27 <- rbind(austa_train, austa_test[1,1:3])
austa_train_27$x_27 <- seq(-26,0, 1)

austa_test_27 <- austa_test[2:10, 1:3]
austa_test_27$x_27 <- seq(1,9, 1)
# add an extra line (with no value in int_visit)
austa_test_27 <- rbind(austa_test_27, c(NA, 2016,11,10))


# we UPDATE our model
ynew <- austa_train_27$int_visit[27]

F_N_27 <- F_N + f(-n) %*% t(f(-n))  
h_N_27 <- Linv %*% h_N + f(0)*ynew
theta_N_27 <- solve(F_N_27)%*%h_N_27

print(theta_N_27)
# 5.2702522
# 0.1867399
# slight change in parameters

# now make predictions for next 10 timepoints:
y_pred_N_27 <- t(f(1:10)) %*% theta_N_27


# also calculate prediction intervals:
X_N_27 <- cbind(1, austa_train_27$x_27)
y_hat_27 <- X_N_27 %*% theta_N_27
e_N_27 <- austa_train_27$int_visit - y_hat_27
RSS_27 <-  t(e_N_27)%*%e_N_27
sigma_2_27 <- as.numeric(RSS_27/(27-nparams))

y_pred_27_lwr <- y_pred_N_27 - 1.96*sqrt(sigma_2_27)*sqrt(1+diag(t(f(1:10))%*%solve(F_N)%*%f(1:10)))
y_pred_27_upr <- y_pred_N_27 + 1.96*sqrt(sigma_2_27)*sqrt(1+diag(t(f(1:10))%*%solve(F_N)%*%f(1:10)))

# save predictions in dataframe in order to plot:
austa_test_27$y_pred_N_27 <- y_pred_N_27

# plot the updated model WITH the old model:
ggplot(austa_train_27, aes(x=x_27, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=y_hat_27), col="red", size=.5) +
  geom_point(data=austa_test_27, aes(x=x_27, y=y_pred_N_27), col="red", size=.5) + 
  geom_ribbon(data=austa_test_27, aes(x=x_27,ymin=y_pred_27_lwr, ymax=y_pred_27_upr), inherit.aes=FALSE, alpha=0.2, fill="red") +
  geom_point(data=austa_test_27, aes(x=x_27,y=int_visit), col="blue", size=.5)+
  geom_line(data=austa_train, aes(x=(x_26-1), y=yhat_N), col="magenta", size=.5, linetype=2) +
  geom_point(data=austa_test, aes(x=(x_26-1), y=y_pred_N), col="magenta", size=.2)


# 5- WLS - example with correlated epsilons

# going back to the OLS model we made first:
ggplot(austa_train, aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_ols), col="red", size=.5) 
  
# lets inspect the residuals:
plot(e_ols)
# does it look like white noise?
# or is there a pattern?

qqnorm(e_ols)
qqline(e_ols)

acf(e_ols)
gglagplot(austa)


# define SIGMA matrix
SIGMA <- diag(n)
rho <- 0.7
SIGMA[1:5,1:5]
for (row in 1:n) {
  for (col in 1:n) {
    if(row==col) {
      SIGMA[row,col] <- 1
    } else {
      SIGMA[row,col] <- rho^abs(row-col)
    }
  }
}
print(SIGMA[1:5,1:5]) 


# estimate parameters with WLS
WLS <- solve(t(X)%*%solve(SIGMA)%*%X)%*%(t(X)%*%solve(SIGMA)%*%y)

print(WLS)
# [1,] -356.7172120
# [2,]    0.1804264

# compute standard errors on parameters
yhat_wls <- X%*%WLS
e_wls <- y - yhat_wls
RSS_wls <- t(e_wls)%*%solve(SIGMA)%*%e_wls
sigma2_wls <- as.numeric(RSS_wls/(n - 2))
sqrt(sigma2_wls)
V_wls <- sigma2_wls *  solve(t(X)%*%solve(SIGMA)%*%X)
(sqrt(diag(V_wls))) 
# 24.17891705  0.01213485


# compare in plot with OLS:
ggplot(austa_train, aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_wls), col="blue") +
  geom_line(aes(y=yhat_ols), col="red")

# blue is WLS
# red is OLS from before