### Example for TSA Lecture 2
# 1- Fit OLS to most recent data (partial data-set)
# 2- Fit WLS with "lambda-weights"
# 3- Local Trend Model - iterative updates
# 4- Local Trend Model - onestep predictions

# load some packages:
library(fpp2)
library(dplyr)
library(tidyverse)


# load data:
# The "austa" data = Total international visitors to Australia (in millions) 
# from 1980 to 2015. Yearly data (ie one datapoint per year). In total 36 datapoints.

head(austa)
# austa is a "time-series" object, but we will convert to a dataframe

austa_data <- data.frame(int_visit=as.matrix(austa), year=as.matrix(time(austa)))
rownames(austa_data) <- NULL
head(austa_data)


# 1- Fit OLS to most recent data (partial data-set)

nparams <- 2

X <- cbind(1, austa_data$year)
y <- cbind(austa_data$int_visit)


fit_austa <- function(start, stop) {
  
  n <- stop - start + 1
  
  # start=21
  # stop=30
  OLS <- solve(t(X[start:stop,])%*%X[start:stop,])%*%t(X[start:stop,])%*%y[start:stop]
  
  austa_data$yhat <- X%*%OLS
  Xpred <- cbind(1, seq(1980+stop, 1980+stop+9, 1))
  ypred <- Xpred%*%OLS
  pred <- data.frame(Xpred, ypred)
  
  RSS_ols <- sqrt(sum((austa_data$yhat[start:stop] - y[start:stop])^2))
  sigma2_ols <- as.numeric(RSS_ols/(n - nparams))
  Vmatrix_pred <- sigma2_ols*(1+(Xpred%*%solve(t(X[start:stop,])%*%X[start:stop,]))%*%t(Xpred))
  pred$ypred_lwr <- pred$ypred - 1.96*sqrt(diag(Vmatrix_pred))
  pred$ypred_upr <- pred$ypred + 1.96*sqrt(diag(Vmatrix_pred))
  
  myplot <- ggplot(austa_data, aes(x=year, y=int_visit)) +
    geom_point() + 
    geom_point(data=austa_data[start:stop,], col="red") + 
    geom_line(data=austa_data[1:(stop),], aes(y=yhat), col="red", size=.5) + 
    geom_point(data=pred, aes(x=Xpred[,2], y=ypred), col="red", size=.5) + 
    geom_ribbon(data=pred, aes(x=Xpred[,2],ymin=ypred_lwr, ymax=ypred_upr), inherit.aes=FALSE, alpha=0.2, fill="red") +
    ylim(0,9.5) + xlim(1980, 2026)
  
  print(myplot)
}

fit_austa(1,26)
fit_austa(17,26)
fit_austa(21,26)
fit_austa(25,26)


# 2- Fit WLS with "lambda-weights"

n <- 26
lambda = 0.6
weights <- lambda^((n-1):0)
# plot the weights:
barplot(weights, names=1:26)


SIGMA <- diag(n)
for (i in 1:n) {
  SIGMA[i,i] <- 1/lambda^(n-i)
}
print(SIGMA[20:26,20:26])

# estimate parameters with WLS (using only first 26 observations)
WLS <- solve(t(X[1:26,])%*%solve(SIGMA)%*%X[1:26,])%*%(t(X[1:26,])%*%solve(SIGMA)%*%y[1:26])
yhat_wls <- X[1:26,]%*%WLS

# estimate parameters with unweighted OLS (for comparison)
OLS <- solve(t(X[1:26,])%*%X[1:26,])%*%(t(X[1:26,])%*%y[1:26])
yhat_ols <- X[1:26,]%*%OLS


# plot
ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_line(aes(y=yhat_wls), col="blue") +
  geom_line(aes(y=yhat_ols), col="red", linetype=2)


# 3- Local Trend Model - iterative updates


f <- function(j) rbind(1, j)
L <- matrix(c(1.,0., 1.,1.),
            byrow=TRUE, nrow=2)
Linv <- solve(L) 


i <- 1
(F_N <-  (lambda^0) * f(0)%*%t(f(0)))
(h_N <-  (lambda^0) * f(0) * y[i])

theta_N <- solve(F_N)%*%h_N
# does not work - F_N is not invertible!

i <- 2
F_N <- F_N + lambda^(i-1) * f(-(i-1)) %*% t(f(-(i-1)))  
h_N <- lambda * Linv %*% h_N + f(0)*y[i]

theta_N <- solve(F_N)%*%h_N
# now it works (we used two observations and we are estimating two parameters...)

# plot
yhat_N <- t(f(-(i-1):(26-i)))%*%theta_N
ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
  geom_point() + 
  geom_point(data=austa_data[1:i,], col="blue") + 
  geom_line(data=austa_data[1:i,], aes(y=yhat_N[1:i]), col="blue")+
  geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
  xlim(1980, 2005) + ylim(0,5.5) + 
  ggtitle(paste0("N = ", i))

# update iteratively:
for (i in 3:26){
  F_N <- F_N + lambda^(i-1) * f(-(i-1)) %*% t(f(-(i-1)))  
  h_N <- lambda * Linv %*% h_N + f(0)*y[i]
  theta_N <- solve(F_N)%*%h_N
  
  yhat_N <- t(f(-(i-1):(26-i)))%*%theta_N
  plot_N <- ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
    geom_point() + 
    geom_point(data=austa_data[1:i,], col="blue") + 
    geom_line(data=austa_data[1:i,], aes(y=yhat_N[1:i]), col="blue")+
    geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
    xlim(1980, 2005) + ylim(0,5.5) + 
    ggtitle(paste0("N = ", i))
  
  print(plot_N)
}


# 4- Local Trend Model - onestep predictions

i <- 1
(F_N <-  (lambda^0) * f(0)%*%t(f(0)))
(h_N <-  (lambda^0) * f(0) * y[i])

# want to save onestep predictions in dataframe (for lambda = 0.6):
austa_data$onestep_lamb_06  <- NA

# update iteratively:
for (i in 2:25){
  F_N <- F_N + lambda^(i-1) * f(-(i-1)) %*% t(f(-(i-1)))  
  h_N <- lambda * Linv %*% h_N + f(0)*y[i]
  theta_N <- solve(F_N)%*%h_N
  
  yhat_N <- t(f(-(i-1):(26-i)))%*%theta_N
  austa_data$onestep_lamb_06[i+1] <- yhat_N[i+1]
  plot_N <- ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
    geom_point() + 
    geom_point(data=austa_data[1:i,], col="blue") + 
    geom_line(data=austa_data[1:i,], aes(y=yhat_N[1:i]), col="blue")+
    geom_line(data=austa_data[1:(i+1),], aes(y=yhat_N[1:(i+1)]), col="blue", linetype=2)+
    geom_point(data=austa_data[i+1,], aes(y=yhat_N[i+1]), col="blue", size=4)+
    geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
    xlim(1980, 2005) + ylim(0,5.5) + 
    ggtitle(paste0("N = ", i))
  
  print(plot_N)
}


ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
  geom_point(data=austa_data, aes(y=onestep_lamb_06), col="blue", size=4)+
  geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
  xlim(1980, 2005) + ylim(0,5.5) + 
  ggtitle(paste0("N = ", i)) +
  geom_point() 
  

# try another lambda
lambda <- 0.9
i <- 1
(F_N <-  (lambda^0) * f(0)%*%t(f(0)))
(h_N <-  (lambda^0) * f(0) * y[i])

# want to save onestep predictions in dataframe (for lambda = 0.6):
austa_data$onestep_lamb_09  <- NA

# update iteratively:
for (i in 2:25){
  F_N <- F_N + lambda^(i-1) * f(-(i-1)) %*% t(f(-(i-1)))  
  h_N <- lambda * Linv %*% h_N + f(0)*y[i]
  theta_N <- solve(F_N)%*%h_N
  
  yhat_N <- t(f(-(i-1):(26-i)))%*%theta_N
  austa_data$onestep_lamb_09[i+1] <- yhat_N[i+1]
  plot_N <- ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
    geom_point() + 
    geom_point(data=austa_data[1:i,], col="blue") + 
    geom_line(data=austa_data[1:i,], aes(y=yhat_N[1:i]), col="blue")+
    geom_line(data=austa_data[1:(i+1),], aes(y=yhat_N[1:(i+1)]), col="blue", linetype=2)+
    geom_point(data=austa_data[i+1,], aes(y=yhat_N[i+1]), col="blue", size=4)+
    geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
    xlim(1980, 2005) + ylim(0,5.5) + 
    ggtitle(paste0("N = ", i))
  
  print(plot_N)
}


ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
  geom_point(data=austa_data, aes(y=onestep_lamb_06), col="blue", size=4)+
  geom_point(data=austa_data, aes(y=onestep_lamb_09), col="green", size=4)+
  geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
  xlim(1980, 2005) + ylim(0,5.5) + 
  ggtitle(paste0("N = ", i)) +
  geom_point() 


lambda <- 0.3
i <- 1
(F_N <-  (lambda^0) * f(0)%*%t(f(0)))
(h_N <-  (lambda^0) * f(0) * y[i])

# want to save onestep predictions in dataframe (for lambda = 0.6):
austa_data$onestep_lamb_03  <- NA

# update iteratively:
for (i in 2:25){
  F_N <- F_N + lambda^(i-1) * f(-(i-1)) %*% t(f(-(i-1)))  
  h_N <- lambda * Linv %*% h_N + f(0)*y[i]
  theta_N <- solve(F_N)%*%h_N
  
  yhat_N <- t(f(-(i-1):(26-i)))%*%theta_N
  austa_data$onestep_lamb_03[i+1] <- yhat_N[i+1]
  plot_N <- ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
    geom_point() + 
    geom_point(data=austa_data[1:i,], col="blue") + 
    geom_line(data=austa_data[1:i,], aes(y=yhat_N[1:i]), col="blue")+
    geom_line(data=austa_data[1:(i+1),], aes(y=yhat_N[1:(i+1)]), col="blue", linetype=2)+
    geom_point(data=austa_data[i+1,], aes(y=yhat_N[i+1]), col="blue", size=4)+
    geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
    xlim(1980, 2005) + ylim(0,5.5) + 
    ggtitle(paste0("N = ", i))
  
  print(plot_N)
}


ggplot(austa_data[1:26,], aes(x=year, y=int_visit)) +
  geom_point(data=austa_data, aes(y=onestep_lamb_03), col="magenta", size=4)+
  geom_point(data=austa_data, aes(y=onestep_lamb_06), col="blue", size=4)+
  geom_point(data=austa_data, aes(y=onestep_lamb_09), col="green", size=4)+
  geom_line(aes(y=yhat_ols), col="red", linetype=2) + 
  xlim(1980, 2005) + ylim(0,5.5) + 
  ggtitle(paste0("N = ", i)) +
  geom_point()