最近又接到一個預測項目需求,主要是預測每天投資使用者會投資不同産品多少金額,屬于每天即時預測,需要拿最近一年的資料做測試集,來預測每天不同期限産品分别被投資多少金額,然後通過這些金額每天找借款端比對借款需求,借款端營運通過資金端需求來動态調整營銷活動,通過資金需求多少來有效營運借款需求,形成與資金端的良性互動,節約閑置資金成本。
就這樣一個需求,如何實作?我首先想到通過業務經驗的方法來實作,需要從N多産品中抽絲剝繭,找到産品、推廣,促銷,節日之間的關系,最終确定一個期限的産品當天成交預測值;但是從成交資料來看,這些資料都是每天在變化的,不能明顯看出兩者之間有明顯的相關關系,用這些現有的資料關系能預測将來的成交嗎?
既然是預測問題,那我首先想到的是時序預測,那麼問題就來了,如何程式化這些現有的資料關系?
在網上找了一些相關模型,并沒有模型來實作我所說的推廣,促銷,節日,加息,降息等業務時點與預測資料的關系,仔細查找,首先(注意還有後續,我會在下篇博文做出解釋)發現了一個名叫prophet的時序預測包,裡面可以用不同類型的holiday值來對模型進行業務時點的比對和修正,非常适合目前的項目需求。不管三七二十一,先安裝了再說。
安裝了之後需要初始化資料,初始化節假日,初始化模型,調整參數,衡量結果,具體代碼如下:
library(prophet)
library(dplyr) #初始化資料 all<-read.csv('d:/Rdata/zjd/ts/all.csv',na.string='NA',header=T)
alln<-read.csv('d:/Rdata/zjd/ts/alln.csv',na.string='NA',header=T)
qb30<-read.csv('d:/Rdata/zjd/ts/qb30.csv',na.string='NA',header=T)
qb45<-read.csv('d:/Rdata/zjd/ts/qb45.csv',na.string='NA',header=T)
qb60<-read.csv('d:/Rdata/zjd/ts/qb60.csv',na.string='NA',header=T)
qb90<-read.csv('d:/Rdata/zjd/ts/qb90.csv',na.string='NA',header=T)
qb180<-read.csv('d:/Rdata/zjd/ts/qb180.csv',na.string='NA',header=T)
qb270<-read.csv('d:/Rdata/zjd/ts/qb270.csv',na.string='NA',header=T)
qb365<-read.csv('d:/Rdata/zjd/ts/qb365.csv',na.string='NA',header=T)
qb730<-read.csv('d:/Rdata/zjd/ts/qb730.csv',na.string='NA',header=T)
qb1095<-read.csv('d:/Rdata/zjd/ts/qb1095.csv',na.string='NA',header=T)
qb1095m<-read.csv('d:/Rdata/zjd/ts/qb1095+.csv',na.string='NA',header=T)
qbother<-read.csv('d:/Rdata/zjd/ts/qbother.csv',na.string='NA',header=T)
#holiday<-read.csv('d:/Rdata/zjd/ts/holidays.csv',na.string='NA',header=T)
historyalln <- data.frame(ds = seq(as.Date('2017-01-01'), as.Date('2017-09-17'), by = 'd'), y = alln$yn)
historyalln <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yn)
historyallo <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = all$yo)
history30n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yn)
plot(history30n$ds,history30n$y)
history45n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yn)
history60n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yn)
history90n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb90$yn)
history180n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb180$yn)
history270n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb270$yn)
history365n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb365$yn)
history730n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb730$yn)
history1095n <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095$yn)
history1095m <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095m$yn)
historyothern <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qbother$yn)
history30o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb30$yo)
history45o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb45$yo)
history60o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb60$yo)
history90o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb90$yo)
history180o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb180$yo)
history270o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb270$yo)
history365o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb365$yo)
history730o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb730$yo)
history1095o <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095$yo)
history1095p <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qb1095m$yo)
historyothero <- data.frame(ds = seq(as.Date('2016-01-01'), as.Date('2017-09-11'), by = 'd'), y = qbother$yo) #初始化節假日
holiday <- data_frame(
holiday = 'holiday',
ds = as.Date(c( '2017-01-01','2017-01-27', '2017-04-02','2017-04-29',
'2017-05-28')),
lower_window = 0,
upper_window = 2
)
payday <- data_frame(
holiday = 'payday',
ds = as.Date(c( '2016-01-26',
'2016-02-26','2016-03-26', '2016-04-26','2016-05-26',
'2016-06-26','2016-07-26', '2016-08-26','2016-09-26',
'2016-10-26','2016-11-26', '2016-12-26','2017-01-26',
'2017-02-26','2017-03-26', '2017-04-26','2017-05-26',
'2017-06-26','2017-07-26', '2017-08-26','2017-09-26')),
lower_window = 0,
upper_window = 1
)
VIP <- data_frame(
holiday = 'VIP',
ds = as.Date(c(
'2016-06-09','2016-07-09', '2016-08-09','2016-09-09',
'2016-10-09','2016-11-09', '2016-12-09','2017-01-09',
'2017-02-09','2017-03-09', '2017-04-09','2017-05-09',
'2017-06-09', '2017-07-09', '2017-08-09', '2017-09-09')),
lower_window = 0,
upper_window = 1
)
raiserate <- data_frame(
holiday = 'raiserate',
ds = as.Date(c( '2017-05-08','2017-06-19', '2017-07-24')),
lower_window = 0,
upper_window = 7
)
reducerate <- data_frame(
holiday = 'reducerate',
ds = as.Date(c('2016-06-20','2017-02-22', '2017-03-27', '2017-04-05')),
lower_window = 0,
upper_window = 7
)
newcustomer <- data_frame(
holiday = 'newcustomer',
ds = as.Date(c('2016-01-01','2016-01-15','2016-07-18','2016-08-24',
'2016-09-07','2016-12-09','2016-12-22','2017-01-06','2017-04-12')),
lower_window = 0,
upper_window = 7
)
promotion <- data_frame(
holiday = 'promotion',
ds = as.Date(c('2016-07-07','2016-11-03','2017-01-10', '2017-01-17', '2017-02-10')),
lower_window = 0,
upper_window = 7
)
holidays<-bind_rows(holiday,payday,VIP, raiserate,reducerate,newcustomer,promotion) #初始化模型 nall <- prophet(historyalln,
holidays = holidays,
seasonality.prior.scale=5,
changepoint.prior.scale=0.9,
holidays.prior.scale = 20,
mcmc.samples = 500,
interval.width=0.9,
uncertainty.samples = 30)
oall <- prophet(historyallo,
holidays = holidays,
seasonality.prior.scale=5,
changepoint.prior.scale=0.1,
holidays.prior.scale = 1,
mcmc.samples = 100,
interval.width=0.9,
uncertainty.samples = 100)
n30 <- prophet(history30n,
holidays = holidays,
seasonality.prior.scale=5,
changepoint.prior.scale=0.1,
holidays.prior.scale = 1,
mcmc.samples = 100,
interval.width=0.9,
uncertainty.samples = 100)
n45 <- prophet(history45n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n60 <- prophet(history60n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n90 <- prophet(history90n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n180 <- prophet(history180n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n270 <- prophet(history270n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n365 <- prophet(history365n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n730 <- prophet(history730n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n1095 <- prophet(history1095n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
n1095m <- prophet(history1095m,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
nother <- prophet(historyothern,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o30 <- prophet(history30o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o45 <- prophet(history45o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o60 <- prophet(history60o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o90 <- prophet(history90o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o180 <- prophet(history180o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o270 <- prophet(history270o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o365 <- prophet(history365o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o730 <- prophet(history730o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o1095 <- prophet(history1095o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
o1095p <- prophet(history1095p,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)
oother <- prophet(historyothero,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500) #設定預測時間 futurealln <- make_future_dataframe(nall, periods = 7)
futureallo <- make_future_dataframe(oall, periods = 7)
future30n <- make_future_dataframe(n30, periods = 7)
future45n <- make_future_dataframe(n45, periods = 7)
future60n <- make_future_dataframe(n60, periods = 7)
future90n <- make_future_dataframe(n90, periods = 7)
future180n <- make_future_dataframe(n180, periods = 7)
future270n <- make_future_dataframe(n270, periods = 7)
future365n <- make_future_dataframe(n365, periods = 7)
future730n <- make_future_dataframe(n730, periods = 7)
future1095n <- make_future_dataframe(n1095, periods = 7)
future1095m <- make_future_dataframe(n1095m, periods = 7)
futureothern <- make_future_dataframe(nother, periods = 7)
future30o <- make_future_dataframe(o30, periods = 7)
future45o <- make_future_dataframe(o45, periods = 7)
future60o <- make_future_dataframe(o60, periods = 7)
future90o <- make_future_dataframe(o90, periods = 7)
future180o <- make_future_dataframe(o180, periods = 7)
future270o <- make_future_dataframe(o270, periods = 7)
future365o <- make_future_dataframe(o365, periods = 7)
future730o <- make_future_dataframe(o730, periods = 7)
future1095o <- make_future_dataframe(o1095, periods = 7)
future1095p <- make_future_dataframe(o1095p, periods = 7)
futureothero <- make_future_dataframe(oother, periods = 7)
tail(future30n)
#預測
forecastalln <- predict(nall, futurealln)
forecastallo <- predict(oall, futureallo)
forecast30n <- predict(n30, future30n)
forecast45n <- predict(n45, future45n)
forecast60n <- predict(n60, future60n)
forecast90n <- predict(n90, future90n)
forecast180n <- predict(n180, future180n)
forecast270n <- predict(n270, future270n)
forecast365n <- predict(n365, future365n)
forecast730n <- predict(n730, future730n)
forecast1095n <- predict(n1095, future1095n)
forecast1095m <- predict(n1095m, future1095m)
forecastothern <- predict(nother, futureothern)
forecast30o <- predict(o30, future30o)
forecast45o <- predict(o45, future45o)
forecast60o <- predict(o60, future60o)
forecast90o <- predict(o90, future90o)
forecast180o <- predict(o180, future180o)
forecast270o <- predict(o270, future270o)
forecast365o <- predict(o365, future365o)
forecast730o <- predict(o730, future730o)
forecast1095o <- predict(o1095, future1095o)
forecast1095p <- predict(o1095p, future1095p)
forecastothero <- predict(oother, futureothero)
# tail(forecast90n[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')]) forecastalln %>%
select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%
filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%
tail(10) forecastallo %>%
select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%
filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%
tail(10) #直線預測
plot(nall, forecastalln)
plot(n30, forecast30n)
plot(oall, forecastallo)
#趨勢分解
prophet_plot_components(nall, forecastalln)
prophet_plot_components(oall, forecastallo) a<-cbind(futurealln[1],forecastalln$yhat,forecast30n$yhat,forecast45n$yhat,forecast60n$yhat,forecast90n$yhat,forecast180n$yhat,forecast270n$yhat,forecast365n$yhat,forecast730n$yhat,forecast1095n$yhat,forecast1095m$yhat,forecastothern$yhat)
b<-cbind(futureallo[1],forecastallo$yhat,forecast30o$yhat,forecast45o$yhat,forecast60o$yhat,forecast90o$yhat,forecast180o$yhat,forecast270o$yhat,forecast365o$yhat,forecast730o$yhat,forecast1095o$yhat,forecast1095p$yhat,forecastothero$yhat)
write.csv(a,file = "d:/Rdata/zjd/ts/qballn.csv")
write.csv(b,file = "d:/Rdata/zjd/ts/qballo.csv")
目前預測準确率總金額能較為準确的預測,調整好參數後總成交金額內插補點約為1%左右;不同産品期限成交金額預測會有10%上下的波動。 參考文獻:http://blog.csdn.net/sinat_26917383/article/details/57419862 http://blog.csdn.net/justinjia91/article/details/62470076