## Using marima on Mink-Muskrat data ## ## Sample solution as follow up on lecture mm <- read.table("example_mink_muskrat.csv",header=TRUE,sep="\t") str(mm) ## 3 columns with integers :-) ## Years and number of skins traded that year. ## Plotting par(mar=c(3,3,1.5,1), mgp=c(2,0.7,0)) matplot(mm[,1], mm[,-1],type="l", lty=1,col=1:2,xlab="Years") ## Different scales so in two plots instead: par(mfrow=c(2,1)) plot(mink~years, mm, type="l", lty=1,col=1:2) plot(muskrat~years, mm, type="l", lty=1,col=1:2) summary(mm) ## Seems like variance is increasing with the value ... trying a log transform X <- data.frame(years=mm$years, lmink=log(mm$mink), lmuskrat=log(mm$muskrat)) ## Plots of transformed variables plot(lmink~years, X, type="l", lty=1,col=1:2) plot(lmuskrat~years, X, type="l", lty=1,col=1:2) ## Looks better. There still seem to be an upward trend for the lmuskrat. ## Plotting ACF and CCF acf(X[,-1]) ## Damping sine osc. are seen. For a bivariate model this can be handled with VAR(1). ## Starting marima library(marima2) ## Let us start with a marima(1,0,1) par(mfrow=c(1,1)) #struct11 <- define.model(kvar=3, ar=1, ma=1, rem.var=1, indep=NULL) # rem.var is to ignore the years M1 <- marima("lmink ~ AR(1) + lmuskrat(1) + MA(1)", "lmuskrat ~ AR(1)+ lmink(1) + MA(1)", data=X, means=1, Check=FALSE, Plot="log.det", penalty=0) ## See the estimates summary(M1) ## Looking at acf and ccf of residuals par(mfrow=c(2,1)) plot(M1$residuals[1, ], type="l") plot(M1$residuals[2, ], type="l") par(mfrow=c(1,3)) acf(M1$residuals[1, ], na.action=na.pass, main="LogMink residuals") acf(M1$residuals[2, ], na.action=na.pass, main="LogMuskrat residuals") ccf(M1$residuals[1, ], M1$residuals[2, ], na.action=na.pass, main="CCF()") ## Looks reasonable good ... there is a small correlation left in CCF for ## lag zero and some random ones at higher lags ## however, we'll try an marima(2,0,2) model M2 <- marima("lmink ~ AR(1:2) + lmuskrat(1:2) + MA(1:2)", "lmuskrat ~ AR(1:2) + lmink(1:2) + MA(1:2)", data=X, means=1, Check=FALSE, Plot="log.det", penalty=0) ## Again: Nice convergence. summary(M2) ## Just to illustrate what happens when using a penalty greater than zero: M2p <- marima("lmink ~ AR(1:2) + lmuskrat(1:2) + MA(1:2)", "lmuskrat ~ AR(1:2) + lmink(1:2) + MA(1:2)", data=X, means=1, Check=FALSE, Plot="log.det", penalty=2) ## Again: Nice convergence. summary(M2p) par(mfrow=c(1,3)) acf(M2p$residuals[1, ], na.action=na.pass, main="LogMink residuals") acf(M2p$residuals[2, ], na.action=na.pass, main="LogMuskrat residuals") ccf(M2p$residuals[1, ], M2p$residuals[2, ], na.action=na.pass, main="CCF()") ## One next step could be to see if year is significant as an explanatory variable. ## Predicting 5 years ahead newdata <- rbind(X, NA, NA, NA, NA, NA) #pred <- arma.forecast(newdata, nstart=nrow(X), nstep=5, marima=M2p3) pred <- predict(M2, newdata, nstart=nrow(X), nstep=5) ## Plot str(pred) par(mfrow=c(2,1)) i <- 1 plot(lmink~years, X, xlim=c(1850,1915)) lines(1850+0:66, pred$forecasts[i,]) pred.int <- pred$forecasts[i,63:67] + cbind(rep(0, 5), -1, 1)*qnorm(0.975)*sqrt(pred$pred.var[i,i,]) matlines(1850+62:66, pred.int, lty=c(1,2,2), col=2, lwd=2 ) i <- 2 plot(lmuskrat~years, X, xlim=c(1850,1915)) lines(1850+0:66, pred$forecasts[i,]) pred.int <- pred$forecasts[i,63:67] + cbind(rep(0, 5), -1, 1)*qnorm(0.975)*sqrt(pred$pred.var[i,i,]) matlines(1850+62:66, pred.int, lty=c(1,2,2), col=2, lwd=2 ) ## We should also plot in the original domain i <- 1 plot(exp(lmink)~years, X, xlim=c(1850,1915), ylab="Mink") lines(1850+0:66, exp(pred$forecasts[i,])) pred.int <- pred$forecasts[i,63:67] + cbind(rep(0, 5), -1, 1)*qnorm(0.975)*sqrt(pred$pred.var[i,i,]) matlines(1850+62:66, exp(pred.int), lty=c(1,2,2), col=2, lwd=2 ) i <- 2 plot(exp(lmuskrat)~years, X, xlim=c(1850,1915), ylab="Muskrat") lines(1850+0:66, exp(pred$forecasts[i,])) pred.int <- pred$forecasts[i,63:67] + cbind(rep(0, 5), -1, 1)*qnorm(0.975)*sqrt(pred$pred.var[i,i,]) matlines(1850+62:66, exp(pred.int), lty=c(1,2,2), col=2, lwd=2 )