rm(list=ls())

# Use for handling time series
#install.packages("data.table")
#install.packages("lubridate")
library(data.table)
library(lubridate)

# Install phyphox app on your phone and take "Acceleration with g", record a stable walk with your phone in the pocket
# The "dots" and Export data -> CSV (Comma, decimal point), and send it in a mail, download and unzip
# Read the walking data
D <- read.table("data/Acceleration_without_g-walking1/Raw Data.csv", header=TRUE, sep=",")
names(D) <- c("t","x","y","z","absacc")
# Sampling period, equi-distant?
plot(diff(D$t))
unique(diff(D$t))
median(diff(D$t))
# It's seconds
D$t[nrow(D)] - D$t[1]
# Convert
D$t <- as.POSIXct("2024-03-05 10:00", tz="UTC") + D$t

# As data.table
D <- setDT(D)

# Resample by averaging
nrow(D)
D <- D[ ,lapply(.SD,mean), by=ceiling_date(t,".05s")]
nrow(D)

# Plot them
plot.ts(D)

# Subset
i <- 300:680
plot.ts(D[i])

# Take one of them
x <- D$absacc[i]

# Plot diagnosis function
plotdiag <- function(x){
  layout(rbind(1,2:3))
  par(mar=c(3,3,1,1), mgp=c(2, 0.7,0))
  plot(x, ylab="X", type="l")
  acf(x, lag.max, lwd=2)
  if(!is.na(seasonalperiod)){ abline(v=(1:4)*seasonalperiod, col=2, lty=3) }
  pacf(x, lag.max, lwd=2)
  if(!is.na(seasonalperiod)){ abline(v=(1:4)*seasonalperiod, col=2, lty=3) }
}

# Let's plot the original time series
seasonalperiod <- 21
lag.max <- 100
plotdiag(x)

fit1 <- arima(x, order=c(0,0,0), seasonal = list(order = c(1, 0, 0), period = seasonalperiod), include.mean = TRUE)
fit1
plotdiag(fit1$residuals)

# Go on extending the model from here
fit2 <- arima(x, order=c(1,0,0), seasonal = list(order = c(1, 0, 0), period = seasonalperiod), include.mean = TRUE)
fit2
plotdiag(fit2$residuals)

# Go on extending the model from here
fit3 <- arima(x, order=c(2,0,0), seasonal = list(order = c(1, 0, 0), period = seasonalperiod), include.mean = TRUE)
fit3
plotdiag(fit3$residuals)


# ...

# See AIC (AIC isn't fair as you don't use the same observations!)
AIC(fit1)
AIC(fit2)
AIC(fit3)

# Take the best fit for use in rest of code
fit <- fit2

# Use it for prediction
lower <- val$pred - qnorm(0.975) * val$se
upper <- val$pred + qnorm(0.975) * val$se
val <- predict(fit, n.ahead=200)
plot(val$pred, ylim=range(lower,upper))
lines(lower, lty=2)
lines(upper, lty=2)

# Further residual analysis
# standardized residuals, ACF, Ljung-Box test
tsdiag(fit)
# cumulative periodogram
cpgram(fit$res)
# sign test
sum(fit$res[-n]-fit$res[-1]>0) # number of sign changes
(n-1)/2 - 2*sqrt((n-1)/4) # lower bound
(n-1)/2 + 2*sqrt((n-1)/4) # uper bound
# QQ-normal plot
qqnorm(val$pred)
qqline(val$pred)
# Histogram with normal density
hist(fit$res, probability = TRUE)
curve(dnorm(x,sd=sqrt(fit$sigma2)), col=2, lwd=2, add = TRUE)


################################################################
# Aternatively a "trend model" with periodicy modelled with Fourier series
xt <- 2*pi * 1:length(x) / seasonalperiod
# Base harmonic
plot(sin( xt ), type="l")
# First and second harmonic
plot(sin( 2*xt ), type="l")
plot(sin( 3*xt ), type="l")

fs <- function(n){
  sins <- sapply(1:n, function(i){
    sin( i*xt )
  })
  coss <- sapply(1:n, function(i){
    cos( i*xt )
  })
  val <- cbind(sins, coss)
  colnames(val) <- c(paste0("sin",1:n), paste0("cos",1:n))
  return(val)
}

fit <- lm(x ~ fs(3))
summary(fit)
plot(fit$residuals)
acf(fit$residuals)

# This is the estimated "smoothed" mean pattern
nper <- floor(length(x)/seasonalperiod)
tmp <- matrix(x[1:(nper*seasonalperiod)], nrow=seasonalperiod)
plot(0, xlim=c(1,seasonalperiod), ylim=range(tmp))
for(i in 1:nper){
  lines(tmp[ ,i], col=i)
}
lines(fit$fitted.values[1:seasonalperiod], lwd=4)

# Clearly something is needed, which aligns the periods!