天天看點

關于ACF以及PACF在時間序列分析中的應用---ARIMA滞後項階數确定(附代碼)

原理背景

在時間序列模組化中,我們常用到的一個模型就是ARIMA,但是在使用該模型時,一個問題就是如何确定AR,MA中的p和q,即滞後項的級數。這時,一般我們會采用ACF(auto-correlation function) 以及PACF(partial auto-correlation function)來确定。

關于ACF以及PACF的了解,這裡推薦一篇非常棒的英文部落格:

Significance of ACF and PACF Plots In Time Series Analysis

然後這裡我就文章的内容做一些說明。

0.假設我們已經通過差分(difference)得到了一個平穩(stationary)時間序列。

1.首先,我們先來看一下AR(auto regressive process)部分級數p的确定。我們會使用PACF,為什麼不使用ACF來确定呢?原因就是在ACF中即使是非常滞後的項也可能顯示很高的相關性,因為該滞後項可以通過影響靠前的滞後項間接影響目前時間節點的數值。為了排除這種影響,我們引入PACF,對于滞後項xt-m,其與xt的PACF相關系數為剔除前面所有滞後項xt-1,xt-2,xt-3,…xt-(m-1)之後的值。也就是說,該值能更加精準的反映出該滞後項自身對目前時間節點的影響,是以用PACF來确定AR滞後項級數會比ACF更有說服力。

2.然後我們來考慮MA(moving average process)。也就是說我們假定我們的目标序列是一個由随機擾動滞後項形成的序列。

關于ACF以及PACF在時間序列分析中的應用---ARIMA滞後項階數确定(附代碼)

這時,我們不應該選擇使用PACF來确定滞後項的級數。為什麼呢?因為對于這樣的序列,不存在說某個單獨的滞後項(随機擾動)會直接影響目前時間節點的值,而這恰恰是PACF能得到的,是以在這裡PACF就失去了它的價值。是以在這裡我們會選擇使用ACF作為級數的确定标準。

執行個體

下面是一個基于R語言的應用執行個體。

library(forecast)
year=trunc(time(AirPassengers))
month=cycle(AirPassengers)
dataset=cbind(year, month, AirPassengers)
data=data.frame(dataset)

Tseries=ts(data$AirPassengers,frequency=12,start=c(1949,1))

#train/test sets
train=window(Tseries,start=c(1949,1),end=c(1959,12))
test=window(Tseries,start=c(1960,1),end=c(1960,12))

#acf to original time series
Acf(train,lag.max=36)
#pacf to original time series
Pacf(train,lag.max=36)

#從acf的拖尾結果可以看出,該時間序列非平穩,是以我們需要對其差分處理
#這裡同時考慮差分以及季節差分(消除季節的周期性)

#我們需要先确定差分次數
N_DIFF=ndiffs(train)
N_SEAS_DIFF=nsdiffs(train)
DIFFS=data.frame(N_DIFF,N_SEAS_DIFF)

#差分後序列的Acf&Pacf
#根據前面的最佳差分次數的結果,普通差分(外層,lag=1)與季節差分(内層,lag=12)各進行一次
#這裡對train做log的意義在于将Multiplicative model 轉化為 addative model,友善差分
Acf(diff(diff(log(train),12),1),lag.max=36)
Pacf(diff(diff(log(train), 12),1), lag.max=36)


#接下來,嘗試不同的模型
air_arima_1=Arima(train, lambda=0, order=c(0,1,0), seasonal=c(0,1,0))
air_arima_2=Arima(train, lambda=0, order=c(1,1,0), seasonal=c(0,1,0))
air_arima_3=Arima(train, lambda=0, order=c(0,1,0), seasonal=c(1,1,0))
air_arima_4=Arima(train, lambda=0, order=c(0,1,1), seasonal=c(0,1,0))
air_arima_5=Arima(train, lambda=0, order=c(0,1,0), seasonal=c(0,1,1))
air_arima_6=Arima(train, lambda=0, order=c(0,1,1), seasonal=c(0,1,1))
air_arima_7=Arima(train, lambda=0, order=c(1,1,1), seasonal=c(0,1,1))
air_arima_8=Arima(train, lambda=0, order=c(0,1,1), seasonal=c(1,1,1))
air_arima_9=Arima(train, lambda=0, order=c(1,1,1), seasonal=c(1,1,1))
air_arima_auto=auto.arima(train, lambda=-0)

#aic
aic_corr=rbind(air_arima_1$aicc , air_arima_2$aicc , air_arima_3$aicc , air_arima_4$aicc , air_arima_5$aicc , air_arima_6$aicc, air_arima_7$aicc, air_arima_8$aicc, air_arima_9$aicc, air_arima_auto$aicc )
rownames(aic_corr)=list("arima_1", "arima_2", "arima_3", "arima_4", "arima_5", "arima_6",  "arima_7", "arima_8", "arima_9", "arima_auto")


#order info
arima_orders=rbind(air_arima_1$arma , air_arima_2$arma , air_arima_3$arma , air_arima_4$arma , air_arima_5$arma , air_arima_6$arma, air_arima_7$arma, air_arima_8$arma, air_arima_9$arma, air_arima_auto$arma)
arima_orders=arima_orders[,c(1,6,2,3,7,4,5)]

results=data.frame(aic_corr,arima_orders)
results

#forecast
air_arima_1_fc=forecast(air_arima_1, h=12)
air_arima_2_fc=forecast(air_arima_2, h=12)
air_arima_3_fc=forecast(air_arima_3, h=12)
air_arima_4_fc=forecast(air_arima_4, h=12)
air_arima_5_fc=forecast(air_arima_5, h=12)
air_arima_6_fc=forecast(air_arima_6, h=12)
air_arima_7_fc=forecast(air_arima_7, h=12)
air_arima_8_fc=forecast(air_arima_8, h=12)
air_arima_9_fc=forecast(air_arima_9, h=12)
air_arima_auto_fc=forecast(air_arima_auto, h=12)

#compute accuracy
acc_arima_1=accuracy(air_arima_1_fc ,test)
acc_arima_2=accuracy(air_arima_2_fc ,test)
acc_arima_3=accuracy(air_arima_3_fc ,test)
acc_arima_4=accuracy(air_arima_4_fc ,test)
acc_arima_5=accuracy(air_arima_5_fc ,test)
acc_arima_6=accuracy(air_arima_6_fc ,test)
acc_arima_7=accuracy(air_arima_4_fc ,test)
acc_arima_8=accuracy(air_arima_5_fc ,test)
acc_arima_9=accuracy(air_arima_6_fc ,test)
acc_arima_auto=accuracy(air_arima_auto_fc ,test)
performance=rbind(acc_arima_1 , acc_arima_2 , acc_arima_3 , acc_arima_4 , acc_arima_5 , acc_arima_6, acc_arima_7 , acc_arima_8 , acc_arima_9, acc_arima_auto )

rownames(performance)=list("Arima_1 Train","Arima_1 Test", "Arima_2 Train","Arima_2 Test", "Arima_3 Train","Arima_3 Test", "Arima_4 Train","Arima_4 Test", "Arima_5 Train", "Arima_5 Test", "Arima_6 Train", "Arima_6 Test",
                              "Arima_7 Train","Arima_7 Test", "Arima_8 Train", "Arima_8 Test", "Arima_9 Train", "Arima_9 Test","Arima Auto Train", "Arima Auto Test")

performance=performance[,1:6]
           

下面是不同order次數下的模型在訓練集以及測試集上的表現。

關于ACF以及PACF在時間序列分析中的應用---ARIMA滞後項階數确定(附代碼)

繼續閱讀