天天看點

用R中prophet包做時序預測

      最近又接到一個預測項目需求,主要是預測每天投資使用者會投資不同産品多少金額,屬于每天即時預測,需要拿最近一年的資料做測試集,來預測每天不同期限産品分别被投資多少金額,然後通過這些金額每天找借款端比對借款需求,借款端營運通過資金端需求來動态調整營銷活動,通過資金需求多少來有效營運借款需求,形成與資金端的良性互動,節約閑置資金成本。

      就這樣一個需求,如何實作?我首先想到通過業務經驗的方法來實作,需要從N多産品中抽絲剝繭,找到産品、推廣,促銷,節日之間的關系,最終确定一個期限的産品當天成交預測值;但是從成交資料來看,這些資料都是每天在變化的,不能明顯看出兩者之間有明顯的相關關系,用這些現有的資料關系能預測将來的成交嗎?

      既然是預測問題,那我首先想到的是時序預測,那麼問題就來了,如何程式化這些現有的資料關系?

      在網上找了一些相關模型,并沒有模型來實作我所說的推廣,促銷,節日,加息,降息等業務時點與預測資料的關系,仔細查找,首先(注意還有後續,我會在下篇博文做出解釋)發現了一個名叫prophet的時序預測包,裡面可以用不同類型的holiday值來對模型進行業務時點的比對和修正,非常适合目前的項目需求。不管三七二十一,先安裝了再說。

      安裝了之後需要初始化資料,初始化節假日,初始化模型,調整參數,衡量結果,具體代碼如下:

library(prophet)

library(dplyr) #初始化資料 all<-read.csv('d:/Rdata/zjd/ts/all.csv',na.string='NA',header=T)

alln<-read.csv('d:/Rdata/zjd/ts/alln.csv',na.string='NA',header=T)

qb30<-read.csv('d:/Rdata/zjd/ts/qb30.csv',na.string='NA',header=T)

qb45<-read.csv('d:/Rdata/zjd/ts/qb45.csv',na.string='NA',header=T)

qb60<-read.csv('d:/Rdata/zjd/ts/qb60.csv',na.string='NA',header=T)

qb90<-read.csv('d:/Rdata/zjd/ts/qb90.csv',na.string='NA',header=T)

qb180<-read.csv('d:/Rdata/zjd/ts/qb180.csv',na.string='NA',header=T)

qb270<-read.csv('d:/Rdata/zjd/ts/qb270.csv',na.string='NA',header=T)

qb365<-read.csv('d:/Rdata/zjd/ts/qb365.csv',na.string='NA',header=T)

qb730<-read.csv('d:/Rdata/zjd/ts/qb730.csv',na.string='NA',header=T)

qb1095<-read.csv('d:/Rdata/zjd/ts/qb1095.csv',na.string='NA',header=T)

qb1095m<-read.csv('d:/Rdata/zjd/ts/qb1095+.csv',na.string='NA',header=T)

qbother<-read.csv('d:/Rdata/zjd/ts/qbother.csv',na.string='NA',header=T)

#holiday<-read.csv('d:/Rdata/zjd/ts/holidays.csv',na.string='NA',header=T)

historyalln <- data.frame(ds = seq(as.Date('2017-01-01'), as.Date('2017-09-17'), by = 'd'), y = alln$yn)

historyalln <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yn)

historyallo <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yo)

history30n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yn)

plot(history30n$ds,history30n$y)

history45n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yn)

history60n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yn)

history90n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb90$yn)

history180n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb180$yn)

history270n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb270$yn)

history365n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb365$yn)

history730n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb730$yn)

history1095n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095$yn)

history1095m <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095m$yn)

historyothern <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qbother$yn)

history30o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yo)

history45o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yo)

history60o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yo)

history90o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb90$yo)

history180o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb180$yo)

history270o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb270$yo)

history365o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb365$yo)

history730o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb730$yo)

history1095o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095$yo)

history1095p <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095m$yo)

historyothero <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qbother$yo) #初始化節假日

holiday <- data_frame(

  holiday = 'holiday',

  ds = as.Date(c(  '2017-01-01','2017-01-27',  '2017-04-02','2017-04-29',

                   '2017-05-28')),

  lower_window = 0,

  upper_window = 2

)

payday <- data_frame(

  holiday = 'payday',

  ds = as.Date(c(  '2016-01-26',

    '2016-02-26','2016-03-26',  '2016-04-26','2016-05-26',

    '2016-06-26','2016-07-26',  '2016-08-26','2016-09-26',

    '2016-10-26','2016-11-26',  '2016-12-26','2017-01-26',

    '2017-02-26','2017-03-26',  '2017-04-26','2017-05-26',

    '2017-06-26','2017-07-26',  '2017-08-26','2017-09-26')),

  lower_window = 0,

  upper_window = 1

)

VIP <- data_frame(

  holiday = 'VIP',

  ds = as.Date(c(

    '2016-06-09','2016-07-09',  '2016-08-09','2016-09-09',

    '2016-10-09','2016-11-09',  '2016-12-09','2017-01-09',

    '2017-02-09','2017-03-09',  '2017-04-09','2017-05-09',

    '2017-06-09', '2017-07-09', '2017-08-09',  '2017-09-09')),

  lower_window = 0,

  upper_window = 1

)

raiserate <- data_frame(

  holiday = 'raiserate',

  ds = as.Date(c( '2017-05-08','2017-06-19',  '2017-07-24')),

  lower_window = 0,

  upper_window = 7

)

reducerate <- data_frame(

  holiday = 'reducerate',

  ds = as.Date(c('2016-06-20','2017-02-22', '2017-03-27', '2017-04-05')),

  lower_window = 0,

  upper_window = 7

)

newcustomer <- data_frame(

  holiday = 'newcustomer',

  ds = as.Date(c('2016-01-01','2016-01-15','2016-07-18','2016-08-24',

                 '2016-09-07','2016-12-09','2016-12-22','2017-01-06','2017-04-12')),

  lower_window = 0,

  upper_window = 7

)

promotion <- data_frame(

  holiday = 'promotion',

  ds = as.Date(c('2016-07-07','2016-11-03','2017-01-10', '2017-01-17', '2017-02-10')),

  lower_window = 0,

  upper_window = 7

)

holidays<-bind_rows(holiday,payday,VIP, raiserate,reducerate,newcustomer,promotion) #初始化模型 nall <- prophet(historyalln,

                holidays = holidays,

                seasonality.prior.scale=5,

                changepoint.prior.scale=0.9,

                holidays.prior.scale = 20,

                mcmc.samples = 500,

                interval.width=0.9,

                uncertainty.samples = 30)

oall <- prophet(historyallo,

                holidays = holidays,

                seasonality.prior.scale=5,

                changepoint.prior.scale=0.1,

                holidays.prior.scale = 1,

                mcmc.samples = 100,

                interval.width=0.9,

                uncertainty.samples = 100)

n30 <- prophet(history30n,

               holidays = holidays,

               seasonality.prior.scale=5,

               changepoint.prior.scale=0.1,

               holidays.prior.scale = 1,

               mcmc.samples = 100,

               interval.width=0.9,

               uncertainty.samples = 100)

n45 <- prophet(history45n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n60 <- prophet(history60n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n90 <- prophet(history90n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n180 <- prophet(history180n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n270 <- prophet(history270n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n365 <- prophet(history365n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n730 <- prophet(history730n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n1095 <- prophet(history1095n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n1095m <- prophet(history1095m,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

nother <- prophet(historyothern,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o30 <- prophet(history30o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o45 <- prophet(history45o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o60 <- prophet(history60o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o90 <- prophet(history90o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o180 <- prophet(history180o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o270 <- prophet(history270o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o365 <- prophet(history365o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o730 <- prophet(history730o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o1095 <- prophet(history1095o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o1095p <- prophet(history1095p,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

oother <- prophet(historyothero,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500) #設定預測時間 futurealln <- make_future_dataframe(nall, periods = 7)

futureallo <- make_future_dataframe(oall, periods = 7)

future30n <- make_future_dataframe(n30, periods = 7)

future45n <- make_future_dataframe(n45, periods = 7)

future60n <- make_future_dataframe(n60, periods = 7)

future90n <- make_future_dataframe(n90, periods = 7)

future180n <- make_future_dataframe(n180, periods = 7)

future270n <- make_future_dataframe(n270, periods = 7)

future365n <- make_future_dataframe(n365, periods = 7)

future730n <- make_future_dataframe(n730, periods = 7)

future1095n <- make_future_dataframe(n1095, periods = 7)

future1095m <- make_future_dataframe(n1095m, periods = 7)

futureothern <- make_future_dataframe(nother, periods = 7)

future30o <- make_future_dataframe(o30, periods = 7)

future45o <- make_future_dataframe(o45, periods = 7)

future60o <- make_future_dataframe(o60, periods = 7)

future90o <- make_future_dataframe(o90, periods = 7)

future180o <- make_future_dataframe(o180, periods = 7)

future270o <- make_future_dataframe(o270, periods = 7)

future365o <- make_future_dataframe(o365, periods = 7)

future730o <- make_future_dataframe(o730, periods = 7)

future1095o <- make_future_dataframe(o1095, periods = 7)

future1095p <- make_future_dataframe(o1095p, periods = 7)

futureothero <- make_future_dataframe(oother, periods = 7)

tail(future30n)

#預測

forecastalln <- predict(nall, futurealln)

forecastallo <- predict(oall, futureallo)

forecast30n <- predict(n30, future30n)

forecast45n <- predict(n45, future45n)

forecast60n <- predict(n60, future60n)

forecast90n <- predict(n90, future90n)

forecast180n <- predict(n180, future180n)

forecast270n <- predict(n270, future270n)

forecast365n <- predict(n365, future365n)

forecast730n <- predict(n730, future730n)

forecast1095n <- predict(n1095, future1095n)

forecast1095m <- predict(n1095m, future1095m)

forecastothern <- predict(nother, futureothern)

forecast30o <- predict(o30, future30o)

forecast45o <- predict(o45, future45o)

forecast60o <- predict(o60, future60o)

forecast90o <- predict(o90, future90o)

forecast180o <- predict(o180, future180o)

forecast270o <- predict(o270, future270o)

forecast365o <- predict(o365, future365o)

forecast730o <- predict(o730, future730o)

forecast1095o <- predict(o1095, future1095o)

forecast1095p <- predict(o1095p, future1095p)

forecastothero <- predict(oother, futureothero)

# tail(forecast90n[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')]) forecastalln %>%

  select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%

  filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%

  tail(10) forecastallo %>%

  select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%

  filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%

  tail(10) #直線預測

plot(nall, forecastalln)

plot(n30, forecast30n)

plot(oall, forecastallo)

#趨勢分解

prophet_plot_components(nall, forecastalln)

prophet_plot_components(oall, forecastallo) a<-cbind(futurealln[1],forecastalln$yhat,forecast30n$yhat,forecast45n$yhat,forecast60n$yhat,forecast90n$yhat,forecast180n$yhat,forecast270n$yhat,forecast365n$yhat,forecast730n$yhat,forecast1095n$yhat,forecast1095m$yhat,forecastothern$yhat)

b<-cbind(futureallo[1],forecastallo$yhat,forecast30o$yhat,forecast45o$yhat,forecast60o$yhat,forecast90o$yhat,forecast180o$yhat,forecast270o$yhat,forecast365o$yhat,forecast730o$yhat,forecast1095o$yhat,forecast1095p$yhat,forecastothero$yhat)

write.csv(a,file = "d:/Rdata/zjd/ts/qballn.csv")

write.csv(b,file = "d:/Rdata/zjd/ts/qballo.csv")

目前預測準确率總金額能較為準确的預測,調整好參數後總成交金額內插補點約為1%左右;不同産品期限成交金額預測會有10%上下的波動。 參考文獻:http://blog.csdn.net/sinat_26917383/article/details/57419862 http://blog.csdn.net/justinjia91/article/details/62470076

繼續閱讀