離線數倉-8-資料倉庫開發DWD層-工具域&互動域&流量域&使用者域相關事實表 離線數倉-8-資料倉庫開發DWD層設計要點-工具域&互動域&流量域&使用者域相關事實表 一、工具域相關事實表 1.工具域優惠券領取事務事實表&使用(下單)事務事實表&使用(支付)事務事實表 1.事務事實表 前期梳理 2.事務事實表 DDL表設計分析 1.優惠券領取事務事實表 DDL 2.優惠券使用(下單)事務事實表 DDL 3.優惠券使用(支付)事務事實表 DDL 3.事務事實表 加載資料分析 1.優惠券領取事務事實表 加載資料 2.優惠券使用(下單)事務事實表 加載資料 3.優惠券使用(支付)事務事實表 加載資料 二、互動域相關事實表 1.互動域收藏商品事務事實表&評價事務事實表 1.事務事實表 前期梳理 2.事務事實表 DDL表設計分析 3.事務事實表 加載資料分析 2.互動域評價事務事實表 1.事務事實表 前期梳理 2.事務事實表 DDL表設計分析 3.事務事實表 加載資料分析 三、流量域相關事實表 1.流量域頁面浏覽事務事實表 1.事務事實表 前期梳理 2.事務事實表 DDL表設計分析 3.事務事實表 加載資料分析 2.流量域啟動事務事實表 1.啟動事務事實表 前期梳理 2.啟動事務事實表 DDL表設計分析 3.啟動事務事實表 加載資料分析 3.流量域動作事務事實表 1.動作事務事實表 前期梳理 2.動作事務事實表 DDL表設計分析 3.動作事務事實表 加載資料分析 4.流量域錯誤事務事實表 1.錯誤事務事實表 前期梳理 2.錯誤事務事實表 DDL表設計分析 3.錯誤事務事實表 加載資料分析 四、使用者域相關事實表 1.使用者域使用者注冊事務事實表 1.使用者注冊事務事實表 前期梳理 2.使用者注冊事務事實表 DDL表設計分析 3.使用者注冊事務事實表 加載資料分析 2.使用者域使用者登入事務事實表 1.使用者登入事務事實表 前期梳理 2.使用者登入事務事實表 DDL表設計分析 3.使用者登入事務事實表 加載資料分析 五、建表語句腳本 六、首日裝載腳本 七、每日裝載腳本 離線數倉-8-資料倉庫開發DWD層設計要點-工具域&互動域&流量域&使用者域相關事實表 一、工具域相關事實表 1.工具域優惠券領取事務事實表&使用(下單)事務事實表&使用(支付)事務事實表 1.事務事實表 前期梳理 關聯的表格coupon_info 領取後,會新增一條資料 下單後,會更新狀态,以及更新時間 支付後,會更新狀态,以及更新時間 有的業務過程沒有明顯的路徑成本,是以就不需要在建立表格的時候硬要展現出來 2.事務事實表 DDL表設計分析 1.優惠券領取事務事實表 DDL DROP TABLE IF EXISTS dwd_tool_coupon_get_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_get_inc
(
`id` STRING COMMENT '編号',
`coupon_id` STRING COMMENT '優惠券ID',
`user_id` STRING COMMENT 'userid',
`date_id` STRING COMMENT '日期ID',
`get_time` STRING COMMENT '領取時間'
) COMMENT '優惠券領取事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_get_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
2.優惠券使用(下單)事務事實表 DDL DROP TABLE IF EXISTS dwd_tool_coupon_order_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_order_inc
(
`id` STRING COMMENT '編号',
`coupon_id` STRING COMMENT '優惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`order_time` STRING COMMENT '使用下單時間'
) COMMENT '優惠券使用下單事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_order_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.優惠券使用(支付)事務事實表 DDL DROP TABLE IF EXISTS dwd_tool_coupon_pay_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_pay_inc
(
`id` STRING COMMENT '編号',
`coupon_id` STRING COMMENT '優惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`payment_time` STRING COMMENT '使用下單時間'
) COMMENT '優惠券使用支付事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_pay_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.事務事實表 加載資料分析 1.優惠券領取事務事實表 加載資料 insert overwrite table dwd_tool_coupon_get_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time,
date_format(data.get_time,'yyyy-MM-dd')
from ods_coupon_use_inc
where dt='2020-06-14'
and type='bootstrap-insert';
insert overwrite table dwd_tool_coupon_get_inc partition (dt='2020-06-15')
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time
from ods_coupon_use_inc
where dt='2020-06-15'
and type='insert';
2.優惠券使用(下單)事務事實表 加載資料 insert overwrite table dwd_tool_coupon_order_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time,
date_format(data.using_time,'yyyy-MM-dd')
from ods_coupon_use_inc
where dt='2020-06-14'
and type='bootstrap-insert'
and data.using_time is not null;
insert overwrite table dwd_tool_coupon_order_inc partition(dt='2020-06-15')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time
from ods_coupon_use_inc
where dt='2020-06-15'
and type='update'
and array_contains(map_keys(old),'using_time');
3.優惠券使用(支付)事務事實表 加載資料 insert overwrite table dwd_tool_coupon_pay_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time,
date_format(data.used_time,'yyyy-MM-dd')
from ods_coupon_use_inc
where dt='2020-06-14'
and type='bootstrap-insert'
and data.used_time is not null;
insert overwrite table dwd_tool_coupon_pay_inc partition(dt='2020-06-15')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time
from ods_coupon_use_inc
where dt='2020-06-15'
and type='update'
and array_contains(map_keys(old),'used_time');
二、互動域相關事實表 1.互動域收藏商品事務事實表&評價事務事實表 1.事務事實表 前期梳理 關聯的表格:favor_info 字段分析:xx使用者收藏xx商品,xx時間收藏的。 收藏商品事務事實表 一條記錄代表 使用者收藏一個商品 收藏商品這個業務過程對表格影響:收藏商品:業務表新增一條記錄,取消收藏:業務表中更新業務狀态為取消,并且取消時間更新上。 2.事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_interaction_favor_add_inc;
CREATE EXTERNAL TABLE dwd_interaction_favor_add_inc
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者id',
`sku_id` STRING COMMENT 'sku_id',
`date_id` STRING COMMENT '日期id',
`create_time` STRING COMMENT '收藏時間'
) COMMENT '收藏事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_favor_add_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.事務事實表 加載資料分析 set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_interaction_favor_add_inc partition(dt)
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time,
date_format(data.create_time,'yyyy-MM-dd')
from ods_favor_info_inc
where dt='2020-06-14'
and type = 'bootstrap-insert';
insert overwrite table dwd_interaction_favor_add_inc partition(dt='2020-06-15')
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time
from ods_favor_info_inc
where dt='2020-06-15'
and type = 'insert';
2.互動域評價事務事實表 1.事務事實表 前期梳理 關聯的表格:comment_info 字段分析:xx使用者xx時間評價了xx訂單中的xx商品。 在某些條件下,某些表格中 度量 和 次元 之間的界限比較模糊,既可以是度量又可以是次元,根據業務需求來進行判斷使用。 2.事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_interaction_comment_inc;
CREATE EXTERNAL TABLE dwd_interaction_comment_inc
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者ID',
`sku_id` STRING COMMENT 'sku_id',
`order_id` STRING COMMENT '訂單ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '評價時間',
`appraise_code` STRING COMMENT '評價編碼',
`appraise_name` STRING COMMENT '評價名稱'
) COMMENT '評價事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_comment_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.事務事實表 加載資料分析 insert overwrite table dwd_interaction_comment_inc partition(dt)
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ods_comment_info_inc
where dt='2020-06-14'
and type='bootstrap-insert'
)ci
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-14'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
insert overwrite table dwd_interaction_comment_inc partition(dt='2020-06-15')
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ods_comment_info_inc
where dt='2020-06-15'
and type='insert'
)ci
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-15'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
三、流量域相關事實表 流量域事務事實表資料通常都來自于使用者行為日志資料 1.流量域頁面浏覽事務事實表 1.事務事實表 前期梳理 關聯的表格:使用者行為日志資料-頁面日志資料 ods_log_inc 行分析:一行代表一條浏覽記錄,xx使用者在xx時間浏覽了xx頁面。 xx使用者:使用裝置id作為使用者辨別 xx時間:日期時間 xx頁面:page_id 流量相關的日志資料,最後對應dwd層的事務事實表中,将次元資訊全部退化到了事實表中,這樣做的目的:因為使用者行為日志資料都是将次元資訊和業務過程資料全部封裝到一起,使用flume采集上來,存放到ods層,與業務庫中表格資料不同,業務庫中已經把各次元資訊存放到不同業務庫了,采集後存放到ods層不同表格中,使用的時候直接根據關聯關系擷取即可,但是使用者行為日志中,沒有維護這些關聯關系,原始日志資料中的子彈直接落地到dwd層作為次元資料即可。 總之:業務庫采集上來的資料,建立對應的dim層次元表 ;埋點采集上來的資料,不需要建立對應的dim層,因為次元資料和事實一并存放在一條記錄上報,如果拆開,後面dwd層以及dws層使用的時候,還需要再join關聯,這樣損耗了大量時間,是以直接次元退化,放在一條記錄中即可。 2.事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_traffic_page_view_inc;
CREATE EXTERNAL TABLE dwd_traffic_page_view_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标類型',
`last_page_id` STRING COMMENT '上頁類型',
`page_id` STRING COMMENT '頁面ID ',
`source_type` STRING COMMENT '來源類型',
`date_id` STRING COMMENT '日期id',
`view_time` STRING COMMENT '跳入時間',
`session_id` STRING COMMENT '所屬會話id',
`during_time` BIGINT COMMENT '持續時間毫秒'
) COMMENT '頁面日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_page_view_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.事務事實表 加載資料分析 使用者行為日志資料,隻存在當天的資料,不存在曆史資料,不需要處理跟之前的首日裝載資料的腳本 以2020-06-14資料為準,實作每日裝載 hive中sql語句: struct is not null 問題:這個是hive的bug,需要注意, 結構體中的字段,然後書寫sql判斷裡面字段 is not null ,最終執行計劃的時候,不執行,失效。 問題的原因:CBO導緻的,基于性能開銷的優化政策,選擇最小性能開銷的優化政策去執行。 處理方式: 1.設定hive中cbo為關閉狀态,set hive.cbo.enable = false; 2.使用struct結構體中任意字段進行過濾,不使用結構體本身,即可實作過濾操作。 hive中開窗函數的複習: FIRST_VALUE:按照限制條件開窗後,取第一個值,如果參數裡面設定了true,就跳過null值,否則不跳過null值。 LAST_VALUE:按照限制條件開窗後,取最後一個值,如果參數裡面設定了true,就跳過null值,否則不跳過null值。 字段中的session_id如何處理: 下圖可以看出,紅框标注的是一個會話id 【離線數倉-8-資料倉庫開發DWD層設計要點-工具域&互動域&流量域&使用者域相關事實表】離線數倉-8-資料倉庫開發DWD層設計要點-工具域&互動域&流量域&使用者域相關事實表 怎樣設計sessionId 涉及到跨行操作,使用Hive中開窗函數 ,hive官網上開窗函數介紹文檔:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+WindowingAndAnalytics sessionId的設定,需要關注三點: 1.sessionId是什麼:使用者通路一個網站,從開始到最後浏覽關閉網站的一個會話。 2.由于使用者行為日志中沒有相關字段,需要自己去設定,這裡就使用:使用者+會話開始時間作為一次會話的sessionId 3.怎樣去确認開始時間,使用開窗函數,last_value(時間字段,true) ,時間字段需要處理一下,設定一個新的字段,隻使用目前使用者登入第一個頁面的那個時間,然後下面記錄中此字段都是null,下一個會話開始的時候,又是一個新的時間,登入其他頁面這個字段的時間也都是null。這樣使用last_value開窗函數,并且跳過null值的話,就完全可以實作 sessionid設定的第二種方案,新開一個字段,判斷last_page_id是否為空,為空則值為1,否則值為0,然後執行該字段求和即可,使用者登入幾次會話,後面的值就變成了幾,這樣就可實作不同會話,但是後面需要添加天次元,因為如果第二天的話,。 最終流量域頁面浏覽事務事實表裝載資料的sql set hive.cbo.enable=false;
insert overwrite table dwd_traffic_page_view_inc partition (dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,
concat(mid_id,'-',last_value(session_start_point,true) over (partition by mid_id order by ts)) session_id,
during_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ods_log_inc
where dt='2020-06-14'
and page is not null
)log
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
2.流量域啟動事務事實表 1.啟動事務事實表 前期梳理 關聯的表格:使用者行為日志資料-啟動日志資料 ods_log_inc 行分析:一行代表一條啟動日志,xx使用者在xx時間啟動了xx程式。 列分析:具體字段來源于ods_log_inc裡面的啟動日志 2.啟動事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_traffic_start_inc;
CREATE EXTERNAL TABLE dwd_traffic_start_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`entry` STRING COMMENT 'icon手機圖示 notice 通知',
`open_ad_id` STRING COMMENT '廣告頁ID ',
`date_id` STRING COMMENT '日期id',
`start_time` STRING COMMENT '啟動時間',
`loading_time_ms` BIGINT COMMENT '啟動加載時間',
`open_ad_ms` BIGINT COMMENT '廣告總共播放時間',
`open_ad_skip_ms` BIGINT COMMENT '使用者跳過廣告時點'
) COMMENT '啟動日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_start_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.啟動事務事實表 加載資料分析 hive的sql處理: start 在hive中是一個關鍵字,需要使用``start`來進行處理 set hive.cbo.enable=false;
insert overwrite table dwd_traffic_start_inc partition(dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
entry,
open_ad_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time,
loading_time,
open_ad_ms,
open_ad_skip_ms
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
`start`.entry,
`start`.loading_time,
`start`.open_ad_id,
`start`.open_ad_ms,
`start`.open_ad_skip_ms,
ts
from ods_log_inc
where dt='2020-06-14'
and `start` is not null
)log
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
3.流量域動作事務事實表 1.動作事務事實表 前期梳理 關聯的表格:使用者行為日志資料-使用者行為日志資料 ods_log_inc 行分析:一行代表一條動作資訊,xx使用者在xx時間在xx頁面觸發了xx動作。 列分析:具體字段來源于ods_log_inc裡面的使用者行為 2.動作事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_traffic_action_inc;
CREATE EXTERNAL TABLE dwd_traffic_action_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`during_time` BIGINT COMMENT '持續時間毫秒',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标類型',
`last_page_id` STRING COMMENT '上頁類型',
`page_id` STRING COMMENT '頁面id ',
`source_type` STRING COMMENT '來源類型',
`action_id` STRING COMMENT '動作id',
`action_item` STRING COMMENT '目标id ',
`action_item_type` STRING COMMENT '目标類型',
`date_id` STRING COMMENT '日期id',
`action_time` STRING COMMENT '動作發生時間'
) COMMENT '動作日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_action_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.動作事務事實表 加載資料分析 因為使用者行為日志中,動作actions字段對應的數組結構,但是最終裝載到動作事務事實表中的時候,需要展開為一個一個的動作,這裡牽扯到了一進多出的sql處理方式。 hive中sql實作:一進多出的邏輯 udf:一進一出 udaf:多進一出 udtf: 一進多出 udtf中**炸裂函數 ** 炸裂數組 :explode(arr) tmp as item :其中tmp 是炸裂出來的item字段組成的表,可以使用tmp.item,來擷取item的字段資訊 文法:select * from table lateral view explode(arr) tmp as item 最後炸裂完畢,多出來一列為item,其他兩列跟原來資料保持一緻。 炸裂Map集合: explode(map) tmp as key,value 最終炸裂效果如下圖: 【離線數倉-8-資料倉庫開發DWD層設計要點-工具域&互動域&流量域&使用者域相關事實表】離線數倉-8-資料倉庫開發DWD層設計要點-工具域&互動域&流量域&使用者域相關事實表 最終裝載資料如下: set hive.cbo.enable=false;
insert overwrite table dwd_traffic_action_inc partition(dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
action_id,
action_item,
action_item_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
action.action_id,
action.item action_item,
action.item_type action_item_type,
action.ts
from ods_log_inc lateral view explode(actions) tmp as action
where dt='2020-06-14'
and actions is not null
)log
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
4.流量域錯誤事務事實表 1.錯誤事務事實表 前期梳理 關聯的表格:使用者行為日志資料-使用者行為日志資料+啟動日志 ods_log_inc,這兩部分都有可能産生錯誤日志 行分析:一行代表一條錯誤資訊。 列分析:錯誤表格中需要包含:common資訊、page資訊、start資訊、actions資訊、曝光資訊和最終的錯誤資訊,前面的幾個都作為次元資訊,友善定位錯誤問題 2.錯誤事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_traffic_error_inc;
CREATE EXTERNAL TABLE dwd_traffic_error_inc
(
`province_id` STRING COMMENT '地區編碼',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标類型',
`last_page_id` STRING COMMENT '上頁類型',
`page_id` STRING COMMENT '頁面ID ',
`source_type` STRING COMMENT '來源類型',
`entry` STRING COMMENT 'icon手機圖示 notice 通知',
`loading_time` STRING COMMENT '啟動加載時間',
`open_ad_id` STRING COMMENT '廣告頁ID ',
`open_ad_ms` STRING COMMENT '廣告總共播放時間',
`open_ad_skip_ms` STRING COMMENT '使用者跳過廣告時點',
`actions` ARRAY<STRUCT<action_id:STRING,item:STRING,item_type:STRING,ts:BIGINT>> COMMENT '動作資訊',
`displays` ARRAY<STRUCT<display_type :STRING,item :STRING,item_type :STRING,`order` :STRING,pos_id
:STRING>> COMMENT '曝光資訊',
`date_id` STRING COMMENT '日期id',
`error_time` STRING COMMENT '錯誤時間',
`error_code` STRING COMMENT '錯誤碼',
`error_msg` STRING COMMENT '錯誤資訊'
) COMMENT '錯誤日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_error_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.錯誤事務事實表 加載資料分析 執行導入資料的時候,會出現資料導入失敗的問題: 是由于查詢的資料時候存在數組類型,在使用了hive on spark引擎的時候,不支援這樣的操作,切換為hive on MR,指令如下:set hive.execution.engine=mr; 使用hive on spark的時候,有時候sql正常,但是運作時一直報錯,切換為mr引擎以後,檢視是否能正常運作,正常運作的話,就代表hive on spark有bug。可以臨時切換執行引擎mr,然後在程式結尾在切換為spark即可,具體sql如下。 set hive.cbo.enable=false;
set hive.execution.engine=mr;
insert overwrite table dwd_traffic_error_inc partition(dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
entry,
loading_time,
open_ad_id,
open_ad_ms,
open_ad_skip_ms,
actions,
displays,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') error_time,
error_code,
error_msg
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
`start`.entry,
`start`.loading_time,
`start`.open_ad_id,
`start`.open_ad_ms,
`start`.open_ad_skip_ms,
actions,
displays,
err.error_code,
err.msg error_msg,
ts
from ods_log_inc
where dt='2020-06-14'
and err is not null
)log
join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
四、使用者域相關事實表 1.使用者域使用者注冊事務事實表 1.使用者注冊事務事實表 前期梳理 使用者注冊 特别之處:一個使用者隻對應表格中一條注冊資訊,不存在多條的情況。 關聯的表格:業務過程:注冊成功,會對哪些表格産生影響:使用者表 use_info,注冊成功,插入資料。 行分析:一條代表一個使用者注冊成功的資訊。 列分析: 隻從業務系統使用者系統資料庫擷取次元資訊的話,此張事實表的次元很少,不符合多元的的原則,需要再添加一些其他次元; 還需要從使用者行為日志表中擷取使用者注冊的資訊以及使用者注冊成功後生成的使用者id,然後添加到使用者注冊事務事實表中。 兩邊資料,以業務系統使用者系統資料庫資料為主。 2.使用者注冊事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_user_register_inc;
CREATE EXTERNAL TABLE dwd_user_register_inc
(
`user_id` STRING COMMENT '使用者ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '注冊時間',
`channel` STRING COMMENT '應用下載下傳管道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '應用版本',
`mid_id` STRING COMMENT '裝置id',
`brand` STRING COMMENT '裝置品牌',
`model` STRING COMMENT '裝置型号',
`operate_system` STRING COMMENT '裝置作業系統'
) COMMENT '使用者域使用者注冊事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_register_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.使用者注冊事務事實表 加載資料分析 1.首日全量裝載資料 set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_user_register_inc partition(dt)
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id user_id,
data.create_time
from ods_user_info_inc
where dt='2020-06-14'
and type='bootstrap-insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ods_log_inc
where dt='2020-06-14'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
2.每日增量裝載資料 insert overwrite table dwd_user_register_inc partition(dt='2020-06-15')
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
data.id user_id,
data.create_time
from ods_user_info_inc
where dt='2020-06-15'
and type='insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ods_log_inc
where dt='2020-06-15'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-15'
)bp
on log.area_code=bp.area_code;
2.使用者域使用者登入事務事實表 1.使用者登入事務事實表 前期梳理 關聯的表格:業務過程:登入成功,會對哪些表格産生影響:使用者行為日志表,使用者登入會産生登入日志。 行分析:一條代表一個使用者登入資訊。 列分析: 從使用者行為日志中 啟動日志 和 頁面日志擷取相關列字段 使用者登入情況分為三種情況: 1.前期浏覽頁面未登入,後面登入後再浏覽 2.登入後再浏覽頁面 3.隻浏覽頁面,不登入 2.使用者登入事務事實表 DDL表設計分析 DROP TABLE IF EXISTS dwd_user_login_inc;
CREATE EXTERNAL TABLE dwd_user_login_inc
(
`user_id` STRING COMMENT '使用者ID',
`date_id` STRING COMMENT '日期ID',
`login_time` STRING COMMENT '登入時間',
`channel` STRING COMMENT '應用下載下傳管道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '應用版本',
`mid_id` STRING COMMENT '裝置id',
`brand` STRING COMMENT '裝置品牌',
`model` STRING COMMENT '裝置型号',
`operate_system` STRING COMMENT '裝置作業系統'
) COMMENT '使用者域使用者登入事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_login_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.使用者登入事務事實表 加載資料分析 怎樣擷取登入資訊,在一次會話中的登入資訊。 1.前期浏覽頁面未登入,後面登入後再浏覽 2.登入後再浏覽頁面 1.首先繪制sessionId,使用開窗函數 2.擷取每個會話的第一個頁面,分組取TopN insert overwrite table dwd_user_login_inc partition(dt='2020-06-14')
select
user_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
row_number() over (partition by session_id order by ts) rn
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
concat(mid_id,'-',last_value(session_start_point,true) over(partition by mid_id order by ts)) session_id
from
(
select
common.uid user_id,
common.ch channel,
common.ar area_code,
common.vc version_code,
common.mid mid_id,
common.ba brand,
common.md model,
common.os operate_system,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ods_log_inc
where dt='2020-06-14'
and page is not null
)t1
)t2
where user_id is not null
)t3
where rn=1
)t4
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on t4.area_code=bp.area_code;
總結: 事實表,每天有一個分區,對應資料寫入到對應分區上面,首日全量同步的時候,也是寫到對應分區上,每天分區裡面放的是當天的操作記錄,在進行查詢資料的時候,查詢對應分區資料即可。 次元表,大多數是每日全量快照表,每天一個分區,此分區内部是全量資料,查詢時候,直接查詢最新分區即可擷取全量資料,如果查詢曆史資料,可對應查詢曆史某天的次元資料。 五、建表語句腳本 DROP TABLE IF EXISTS dwd_trade_cart_add_inc;
CREATE EXTERNAL TABLE dwd_trade_cart_add_inc
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者id',
`sku_id` STRING COMMENT '商品id',
`date_id` STRING COMMENT '時間id',
`create_time` STRING COMMENT '加購時間',
`source_id` STRING COMMENT '來源類型ID',
`source_type_code` STRING COMMENT '來源類型編碼',
`source_type_name` STRING COMMENT '來源類型名稱',
`sku_num` BIGINT COMMENT '加購物車件數'
) COMMENT '交易域加購物車事務事實表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cart_add_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_order_detail_inc;
CREATE EXTERNAL TABLE dwd_trade_order_detail_inc
(
`id` STRING COMMENT '編号',
`order_id` STRING COMMENT '訂單id',
`user_id` STRING COMMENT '使用者id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '參與活動規則id',
`activity_rule_id` STRING COMMENT '參與活動規則id',
`coupon_id` STRING COMMENT '使用優惠券id',
`date_id` STRING COMMENT '下單日期id',
`create_time` STRING COMMENT '下單時間',
`source_id` STRING COMMENT '來源編号',
`source_type_code` STRING COMMENT '來源類型編碼',
`source_type_name` STRING COMMENT '來源類型名稱',
`sku_num` BIGINT COMMENT '商品數量',
`split_original_amount` DECIMAL(16, 2) COMMENT '原始價格',
`split_activity_amount` DECIMAL(16, 2) COMMENT '活動優惠分攤',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '優惠券優惠分攤',
`split_total_amount` DECIMAL(16, 2) COMMENT '最終價格分攤'
) COMMENT '交易域下單明細事務事實表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_order_detail_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_cancel_detail_inc;
CREATE EXTERNAL TABLE dwd_trade_cancel_detail_inc
(
`id` STRING COMMENT '編号',
`order_id` STRING COMMENT '訂單id',
`user_id` STRING COMMENT '使用者id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '參與活動規則id',
`activity_rule_id` STRING COMMENT '參與活動規則id',
`coupon_id` STRING COMMENT '使用優惠券id',
`date_id` STRING COMMENT '取消訂單日期id',
`cancel_time` STRING COMMENT '取消訂單時間',
`source_id` STRING COMMENT '來源編号',
`source_type_code` STRING COMMENT '來源類型編碼',
`source_type_name` STRING COMMENT '來源類型名稱',
`sku_num` BIGINT COMMENT '商品數量',
`split_original_amount` DECIMAL(16, 2) COMMENT '原始價格',
`split_activity_amount` DECIMAL(16, 2) COMMENT '活動優惠分攤',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '優惠券優惠分攤',
`split_total_amount` DECIMAL(16, 2) COMMENT '最終價格分攤'
) COMMENT '交易域取消訂單明細事務事實表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cancel_detail_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_pay_detail_suc_inc;
CREATE EXTERNAL TABLE dwd_trade_pay_detail_suc_inc
(
`id` STRING COMMENT '編号',
`order_id` STRING COMMENT '訂單id',
`user_id` STRING COMMENT '使用者id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '參與活動規則id',
`activity_rule_id` STRING COMMENT '參與活動規則id',
`coupon_id` STRING COMMENT '使用優惠券id',
`payment_type_code` STRING COMMENT '支付類型編碼',
`payment_type_name` STRING COMMENT '支付類型名稱',
`date_id` STRING COMMENT '支付日期id',
`callback_time` STRING COMMENT '支付成功時間',
`source_id` STRING COMMENT '來源編号',
`source_type_code` STRING COMMENT '來源類型編碼',
`source_type_name` STRING COMMENT '來源類型名稱',
`sku_num` BIGINT COMMENT '商品數量',
`split_original_amount` DECIMAL(16, 2) COMMENT '應支付原始金額',
`split_activity_amount` DECIMAL(16, 2) COMMENT '支付活動優惠分攤',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '支付優惠券優惠分攤',
`split_payment_amount` DECIMAL(16, 2) COMMENT '支付金額'
) COMMENT '交易域成功支付事務事實表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_pay_detail_suc_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_order_refund_inc;
CREATE EXTERNAL TABLE dwd_trade_order_refund_inc
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者ID',
`order_id` STRING COMMENT '訂單ID',
`sku_id` STRING COMMENT '商品ID',
`province_id` STRING COMMENT '地區ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '退單時間',
`refund_type_code` STRING COMMENT '退單類型編碼',
`refund_type_name` STRING COMMENT '退單類型名稱',
`refund_reason_type_code` STRING COMMENT '退單原因類型編碼',
`refund_reason_type_name` STRING COMMENT '退單原因類型名稱',
`refund_reason_txt` STRING COMMENT '退單原因描述',
`refund_num` BIGINT COMMENT '退單件數',
`refund_amount` DECIMAL(16, 2) COMMENT '退單金額'
) COMMENT '交易域退單事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_order_refund_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_trade_refund_pay_suc_inc;
CREATE EXTERNAL TABLE dwd_trade_refund_pay_suc_inc
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者ID',
`order_id` STRING COMMENT '訂單編号',
`sku_id` STRING COMMENT 'SKU編号',
`province_id` STRING COMMENT '地區ID',
`payment_type_code` STRING COMMENT '支付類型編碼',
`payment_type_name` STRING COMMENT '支付類型名稱',
`date_id` STRING COMMENT '日期ID',
`callback_time` STRING COMMENT '支付成功時間',
`refund_num` DECIMAL(16, 2) COMMENT '退款件數',
`refund_amount` DECIMAL(16, 2) COMMENT '退款金額'
) COMMENT '交易域送出退款成功事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_refund_pay_suc_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_trade_cart_full;
CREATE EXTERNAL TABLE dwd_trade_cart_full
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者id',
`sku_id` STRING COMMENT '商品id',
`sku_name` STRING COMMENT '商品名稱',
`sku_num` BIGINT COMMENT '加購物車件數'
) COMMENT '交易域購物車周期快照事實表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cart_full/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_tool_coupon_get_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_get_inc
(
`id` STRING COMMENT '編号',
`coupon_id` STRING COMMENT '優惠券ID',
`user_id` STRING COMMENT 'userid',
`date_id` STRING COMMENT '日期ID',
`get_time` STRING COMMENT '領取時間'
) COMMENT '優惠券領取事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_get_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_tool_coupon_order_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_order_inc
(
`id` STRING COMMENT '編号',
`coupon_id` STRING COMMENT '優惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`order_time` STRING COMMENT '使用下單時間'
) COMMENT '優惠券使用下單事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_order_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_tool_coupon_pay_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_pay_inc
(
`id` STRING COMMENT '編号',
`coupon_id` STRING COMMENT '優惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`payment_time` STRING COMMENT '使用下單時間'
) COMMENT '優惠券使用支付事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_pay_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_interaction_favor_add_inc;
CREATE EXTERNAL TABLE dwd_interaction_favor_add_inc
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者id',
`sku_id` STRING COMMENT 'sku_id',
`date_id` STRING COMMENT '日期id',
`create_time` STRING COMMENT '收藏時間'
) COMMENT '收藏事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_favor_add_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_interaction_comment_inc;
CREATE EXTERNAL TABLE dwd_interaction_comment_inc
(
`id` STRING COMMENT '編号',
`user_id` STRING COMMENT '使用者ID',
`sku_id` STRING COMMENT 'sku_id',
`order_id` STRING COMMENT '訂單ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '評價時間',
`appraise_code` STRING COMMENT '評價編碼',
`appraise_name` STRING COMMENT '評價名稱'
) COMMENT '評價事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_comment_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_traffic_page_view_inc;
CREATE EXTERNAL TABLE dwd_traffic_page_view_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标類型',
`last_page_id` STRING COMMENT '上頁類型',
`page_id` STRING COMMENT '頁面ID ',
`source_type` STRING COMMENT '來源類型',
`date_id` STRING COMMENT '日期id',
`view_time` STRING COMMENT '跳入時間',
`session_id` STRING COMMENT '所屬會話id',
`during_time` BIGINT COMMENT '持續時間毫秒'
) COMMENT '頁面日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_page_view_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_start_inc;
CREATE EXTERNAL TABLE dwd_traffic_start_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`entry` STRING COMMENT 'icon手機圖示 notice 通知',
`open_ad_id` STRING COMMENT '廣告頁ID ',
`date_id` STRING COMMENT '日期id',
`start_time` STRING COMMENT '啟動時間',
`loading_time_ms` BIGINT COMMENT '啟動加載時間',
`open_ad_ms` BIGINT COMMENT '廣告總共播放時間',
`open_ad_skip_ms` BIGINT COMMENT '使用者跳過廣告時點'
) COMMENT '啟動日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_start_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_action_inc;
CREATE EXTERNAL TABLE dwd_traffic_action_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`during_time` BIGINT COMMENT '持續時間毫秒',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标類型',
`last_page_id` STRING COMMENT '上頁類型',
`page_id` STRING COMMENT '頁面id ',
`source_type` STRING COMMENT '來源類型',
`action_id` STRING COMMENT '動作id',
`action_item` STRING COMMENT '目标id ',
`action_item_type` STRING COMMENT '目标類型',
`date_id` STRING COMMENT '日期id',
`action_time` STRING COMMENT '動作發生時間'
) COMMENT '動作日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_action_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_display_inc;
CREATE EXTERNAL TABLE dwd_traffic_display_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`during_time` BIGINT COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标類型',
`last_page_id` STRING COMMENT '上頁類型',
`page_id` STRING COMMENT '頁面ID ',
`source_type` STRING COMMENT '來源類型',
`date_id` STRING COMMENT '日期id',
`display_time` STRING COMMENT '曝光時間',
`display_type` STRING COMMENT '曝光類型',
`display_item` STRING COMMENT '曝光對象id ',
`display_item_type` STRING COMMENT 'app版本号',
`display_order` BIGINT COMMENT '曝光順序',
`display_pos_id` BIGINT COMMENT '曝光位置'
) COMMENT '曝光日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_display_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_error_inc;
CREATE EXTERNAL TABLE dwd_traffic_error_inc
(
`province_id` STRING COMMENT '地區編碼',
`brand` STRING COMMENT '手機品牌',
`channel` STRING COMMENT '管道',
`is_new` STRING COMMENT '是否首次啟動',
`model` STRING COMMENT '手機型号',
`mid_id` STRING COMMENT '裝置id',
`operate_system` STRING COMMENT '作業系統',
`user_id` STRING COMMENT '會員id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标類型',
`last_page_id` STRING COMMENT '上頁類型',
`page_id` STRING COMMENT '頁面ID ',
`source_type` STRING COMMENT '來源類型',
`entry` STRING COMMENT 'icon手機圖示 notice 通知',
`loading_time` STRING COMMENT '啟動加載時間',
`open_ad_id` STRING COMMENT '廣告頁ID ',
`open_ad_ms` STRING COMMENT '廣告總共播放時間',
`open_ad_skip_ms` STRING COMMENT '使用者跳過廣告時點',
`actions` ARRAY<STRUCT<action_id:STRING,item:STRING,item_type:STRING,ts:BIGINT>> COMMENT '動作資訊',
`displays` ARRAY<STRUCT<display_type :STRING,item :STRING,item_type :STRING,`order` :STRING,pos_id
:STRING>> COMMENT '曝光資訊',
`date_id` STRING COMMENT '日期id',
`error_time` STRING COMMENT '錯誤時間',
`error_code` STRING COMMENT '錯誤碼',
`error_msg` STRING COMMENT '錯誤資訊'
) COMMENT '錯誤日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_error_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_user_register_inc;
CREATE EXTERNAL TABLE dwd_user_register_inc
(
`user_id` STRING COMMENT '使用者ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '注冊時間',
`channel` STRING COMMENT '應用下載下傳管道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '應用版本',
`mid_id` STRING COMMENT '裝置id',
`brand` STRING COMMENT '裝置品牌',
`model` STRING COMMENT '裝置型号',
`operate_system` STRING COMMENT '裝置作業系統'
) COMMENT '使用者域使用者注冊事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_register_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_user_login_inc;
CREATE EXTERNAL TABLE dwd_user_login_inc
(
`user_id` STRING COMMENT '使用者ID',
`date_id` STRING COMMENT '日期ID',
`login_time` STRING COMMENT '登入時間',
`channel` STRING COMMENT '應用下載下傳管道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '應用版本',
`mid_id` STRING COMMENT '裝置id',
`brand` STRING COMMENT '裝置品牌',
`model` STRING COMMENT '裝置型号',
`operate_system` STRING COMMENT '裝置作業系統'
) COMMENT '使用者域使用者登入事務事實表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_login_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
六、首日裝載腳本 #!/bin/bash
APP=gmall
if [ -n "$2" ] ;then
do_date=$2
else
echo "請傳入日期參數"
exit
fi
dwd_interaction_comment_inc="
insert overwrite table ${APP}.dwd_interaction_comment_inc partition(dt)
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ${APP}.ods_comment_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ci
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
"
dwd_interaction_favor_add_inc="
insert overwrite table ${APP}.dwd_interaction_favor_add_inc partition(dt)
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time,
date_format(data.create_time,'yyyy-MM-dd')
from ${APP}.ods_favor_info_inc
where dt='$do_date'
and type = 'bootstrap-insert';
"
dwd_tool_coupon_get_inc="
insert overwrite table ${APP}.dwd_tool_coupon_get_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time,
date_format(data.get_time,'yyyy-MM-dd')
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='bootstrap-insert';
"
dwd_tool_coupon_order_inc="
insert overwrite table ${APP}.dwd_tool_coupon_order_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time,
date_format(data.using_time,'yyyy-MM-dd')
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='bootstrap-insert'
and data.using_time is not null;
"
dwd_tool_coupon_pay_inc="
insert overwrite table ${APP}.dwd_tool_coupon_pay_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time,
date_format(data.used_time,'yyyy-MM-dd')
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='bootstrap-insert'
and data.used_time is not null;
"
dwd_trade_cancel_detail_inc="
insert overwrite table ${APP}.dwd_trade_cancel_detail_inc partition (dt)
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(canel_time,'yyyy-MM-dd') date_id,
canel_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(canel_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) od
join
(
select
data.id,
data.user_id,
data.province_id,
data.operate_time canel_time
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
and data.order_status='1003'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_cart_add_inc="
insert overwrite table ${APP}.dwd_trade_cart_add_inc partition (dt)
select
id,
user_id,
sku_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
source_id,
source_type,
dic.dic_name,
sku_num,
date_format(create_time, 'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num
from ${APP}.ods_cart_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
)ci
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on ci.source_type=dic.dic_code;
"
dwd_trade_cart_full="
insert overwrite table ${APP}.dwd_trade_cart_full partition(dt='$do_date')
select
id,
user_id,
sku_id,
sku_name,
sku_num
from ${APP}.ods_cart_info_full
where dt='$do_date'
and is_ordered='0';
"
dwd_trade_order_detail_inc="
insert overwrite table ${APP}.dwd_trade_order_detail_inc partition (dt)
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(create_time, 'yyyy-MM-dd') date_id,
create_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) od
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_order_refund_inc="
insert overwrite table ${APP}.dwd_trade_order_refund_inc partition(dt)
select
ri.id,
user_id,
order_id,
sku_id,
province_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
refund_type,
type_dic.dic_name,
refund_reason_type,
reason_dic.dic_name,
refund_reason_txt,
refund_num,
refund_amount,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.order_id,
data.sku_id,
data.refund_type,
data.refund_num,
data.refund_amount,
data.refund_reason_type,
data.refund_reason_txt,
data.create_time
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ri
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)oi
on ri.order_id=oi.id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '15'
)type_dic
on ri.refund_type=type_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '13'
)reason_dic
on ri.refund_reason_type=reason_dic.dic_code;
"
dwd_trade_pay_detail_suc_inc="
insert overwrite table ${APP}.dwd_trade_pay_detail_suc_inc partition (dt)
select
od.id,
od.order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
payment_type,
pay_dic.dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
source_id,
source_type,
src_dic.dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(callback_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) od
join
(
select
data.user_id,
data.order_id,
data.payment_type,
data.callback_time
from ${APP}.ods_payment_info_inc
where dt='$do_date'
and type='bootstrap-insert'
and data.payment_status='1602'
) pi
on od.order_id=pi.order_id
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
) pay_dic
on pi.payment_type=pay_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)src_dic
on od.source_type=src_dic.dic_code;
"
dwd_trade_refund_pay_suc_inc="
insert overwrite table ${APP}.dwd_trade_refund_pay_suc_inc partition(dt)
select
rp.id,
user_id,
rp.order_id,
rp.sku_id,
province_id,
payment_type,
dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
refund_num,
total_amount,
date_format(callback_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.payment_type,
data.callback_time,
data.total_amount
from ${APP}.ods_refund_payment_inc
where dt='$do_date'
and type = 'bootstrap-insert'
and data.refund_status='1602'
)rp
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)oi
on rp.order_id=oi.id
left join
(
select
data.order_id,
data.sku_id,
data.refund_num
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ri
on rp.order_id=ri.order_id
and rp.sku_id=ri.sku_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
)dic
on rp.payment_type=dic.dic_code;
"
dwd_traffic_action_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_action_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
action_id,
action_item,
action_item_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
action.action_id,
action.item action_item,
action.item_type action_item_type,
action.ts
from ${APP}.ods_log_inc lateral view explode(actions) tmp as action
where dt='$do_date'
and actions is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_display_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_display_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') display_time,
display_type,
display_item,
display_item_type,
display_order,
display_pos_id
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
display.display_type,
display.item display_item,
display.item_type display_item_type,
display.\`order\` display_order,
display.pos_id display_pos_id,
ts
from ${APP}.ods_log_inc lateral view explode(displays) tmp as display
where dt='$do_date'
and displays is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_error_inc="
set hive.cbo.enable=false;
set hive.execution.engine=mr;
insert overwrite table ${APP}.dwd_traffic_error_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
entry,
loading_time,
open_ad_id,
open_ad_ms,
open_ad_skip_ms,
actions,
displays,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') error_time,
error_code,
error_msg
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
actions,
displays,
err.error_code,
err.msg error_msg,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and err is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
set hive.execution.engine=spark;
"
dwd_traffic_page_view_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_page_view_inc partition (dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,
concat(mid_id,'-',last_value(session_start_point,true) over (partition by mid_id order by ts)) session_id,
during_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_start_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_start_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
entry,
open_ad_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time,
loading_time,
open_ad_ms,
open_ad_skip_ms
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and \`start\` is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_user_login_inc="
insert overwrite table ${APP}.dwd_user_login_inc partition(dt='$do_date')
select
user_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
row_number() over (partition by session_id order by ts) rn
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
concat(mid_id,'-',last_value(session_start_point,true) over(partition by mid_id order by ts)) session_id
from
(
select
common.uid user_id,
common.ch channel,
common.ar area_code,
common.vc version_code,
common.mid mid_id,
common.ba brand,
common.md model,
common.os operate_system,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)t1
)t2
where user_id is not null
)t3
where rn=1
)t4
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on t4.area_code=bp.area_code;
"
dwd_user_register_inc="
insert overwrite table ${APP}.dwd_user_register_inc partition(dt)
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id user_id,
data.create_time
from ${APP}.ods_user_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ${APP}.ods_log_inc
where dt='$do_date'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
case $1 in
"dwd_interaction_comment_inc" )
hive -e "$dwd_interaction_comment_inc"
;;
"dwd_interaction_favor_add_inc" )
hive -e "$dwd_interaction_favor_add_inc"
;;
"dwd_tool_coupon_get_inc" )
hive -e "$dwd_tool_coupon_get_inc"
;;
"dwd_tool_coupon_order_inc" )
hive -e "$dwd_tool_coupon_order_inc"
;;
"dwd_tool_coupon_pay_inc" )
hive -e "$dwd_tool_coupon_pay_inc"
;;
"dwd_trade_cancel_detail_inc" )
hive -e "$dwd_trade_cancel_detail_inc"
;;
"dwd_trade_cart_add_inc" )
hive -e "$dwd_trade_cart_add_inc"
;;
"dwd_trade_cart_full" )
hive -e "$dwd_trade_cart_full"
;;
"dwd_trade_order_detail_inc" )
hive -e "$dwd_trade_order_detail_inc"
;;
"dwd_trade_order_refund_inc" )
hive -e "$dwd_trade_order_refund_inc"
;;
"dwd_trade_pay_detail_suc_inc" )
hive -e "$dwd_trade_pay_detail_suc_inc"
;;
"dwd_trade_refund_pay_suc_inc" )
hive -e "$dwd_trade_refund_pay_suc_inc"
;;
"dwd_traffic_action_inc" )
hive -e "$dwd_traffic_action_inc"
;;
"dwd_traffic_display_inc" )
hive -e "$dwd_traffic_display_inc"
;;
"dwd_traffic_error_inc" )
hive -e "$dwd_traffic_error_inc"
;;
"dwd_traffic_page_view_inc" )
hive -e "$dwd_traffic_page_view_inc"
;;
"dwd_traffic_start_inc" )
hive -e "$dwd_traffic_start_inc"
;;
"dwd_user_login_inc" )
hive -e "$dwd_user_login_inc"
;;
"dwd_user_register_inc" )
hive -e "$dwd_user_register_inc"
;;
"all" )
hive -e "$dwd_interaction_comment_inc$dwd_interaction_favor_add_inc$dwd_tool_coupon_get_inc$dwd_tool_coupon_order_inc$dwd_tool_coupon_pay_inc$dwd_trade_cancel_detail_inc$dwd_trade_cart_add_inc$dwd_trade_cart_full$dwd_trade_order_detail_inc$dwd_trade_order_refund_inc$dwd_trade_pay_detail_suc_inc$dwd_trade_refund_pay_suc_inc$dwd_traffic_action_inc$dwd_traffic_display_inc$dwd_traffic_error_inc$dwd_traffic_page_view_inc$dwd_traffic_start_inc$dwd_user_login_inc$dwd_user_register_inc"
esac
七、每日裝載腳本 #!/bin/bash
APP=gmall
# 如果是輸入的日期按照取輸入日期;如果沒輸入日期取目前時間的前一天
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d "-1 day" +%F`
fi
dwd_interaction_comment_inc="
insert overwrite table ${APP}.dwd_interaction_comment_inc partition(dt='$do_date')
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ${APP}.ods_comment_info_inc
where dt='$do_date'
and type='insert'
)ci
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
"
dwd_interaction_favor_add_inc="
insert overwrite table ${APP}.dwd_interaction_favor_add_inc partition(dt='$do_date')
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time
from ${APP}.ods_favor_info_inc
where dt='$do_date'
and type = 'insert';
"
dwd_tool_coupon_get_inc="
insert overwrite table ${APP}.dwd_tool_coupon_get_inc partition (dt='$do_date')
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='insert';
"
dwd_tool_coupon_order_inc="
insert overwrite table ${APP}.dwd_tool_coupon_order_inc partition(dt='$do_date')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='update'
and array_contains(map_keys(old),'using_time');
"
dwd_tool_coupon_pay_inc="
insert overwrite table ${APP}.dwd_tool_coupon_pay_inc partition(dt='$do_date')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='update'
and array_contains(map_keys(old),'used_time');
"
dwd_trade_cancel_detail_inc="
insert overwrite table ${APP}.dwd_trade_cancel_detail_inc partition (dt='$do_date')
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(canel_time,'yyyy-MM-dd') date_id,
canel_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where (dt='$do_date' or dt=date_add('$do_date',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) od
join
(
select
data.id,
data.user_id,
data.province_id,
data.operate_time canel_time
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'update'
and data.order_status='1003'
and array_contains(map_keys(old),'order_status')
) oi
on order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where (dt='$do_date' or dt=date_add('$do_date',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where (dt='$do_date' or dt=date_add('$do_date',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_cart_add_inc="
insert overwrite table ${APP}.dwd_trade_cart_add_inc partition(dt='$do_date')
select
id,
user_id,
sku_id,
date_id,
create_time,
source_id,
source_type_code,
source_type_name,
sku_num
from
(
select
data.id,
data.user_id,
data.sku_id,
date_format(from_utc_timestamp(ts*1000,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts*1000,'GMT+8'),'yyyy-MM-dd HH:mm:ss') create_time,
data.source_id,
data.source_type source_type_code,
if(type='insert',data.sku_num,data.sku_num-old['sku_num']) sku_num
from ${APP}.ods_cart_info_inc
where dt='$do_date'
and (type='insert'
or(type='update' and old['sku_num'] is not null and data.sku_num>cast(old['sku_num'] as int)))
)cart
left join
(
select
dic_code,
dic_name source_type_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on cart.source_type_code=dic.dic_code;
"
dwd_trade_cart_full="
insert overwrite table ${APP}.dwd_trade_cart_full partition(dt='$do_date')
select
id,
user_id,
sku_id,
sku_name,
sku_num
from ${APP}.ods_cart_info_full
where dt='$do_date'
and is_ordered='0';
"
dwd_trade_order_detail_inc="
insert overwrite table ${APP}.dwd_trade_order_detail_inc partition (dt='$do_date')
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_id,
create_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
date_format(data.create_time, 'yyyy-MM-dd') date_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'insert'
) od
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_order_refund_inc="
insert overwrite table ${APP}.dwd_trade_order_refund_inc partition(dt='$do_date')
select
ri.id,
user_id,
order_id,
sku_id,
province_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
refund_type,
type_dic.dic_name,
refund_reason_type,
reason_dic.dic_name,
refund_reason_txt,
refund_num,
refund_amount
from
(
select
data.id,
data.user_id,
data.order_id,
data.sku_id,
data.refund_type,
data.refund_num,
data.refund_amount,
data.refund_reason_type,
data.refund_reason_txt,
data.create_time
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='insert'
)ri
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='update'
and data.order_status='1005'
and array_contains(map_keys(old),'order_status')
)oi
on ri.order_id=oi.id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '15'
)type_dic
on ri.refund_type=type_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '13'
)reason_dic
on ri.refund_reason_type=reason_dic.dic_code;
"
dwd_trade_pay_detail_suc_inc="
insert overwrite table ${APP}.dwd_trade_pay_detail_suc_inc partition (dt='$do_date')
select
od.id,
od.order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
payment_type,
pay_dic.dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
source_id,
source_type,
src_dic.dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) od
join
(
select
data.user_id,
data.order_id,
data.payment_type,
data.callback_time
from ${APP}.ods_payment_info_inc
where dt='$do_date'
and type='update'
and array_contains(map_keys(old),'payment_status')
and data.payment_status='1602'
) pi
on od.order_id=pi.order_id
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
) pay_dic
on pi.payment_type=pay_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)src_dic
on od.source_type=src_dic.dic_code;
"
dwd_trade_refund_pay_suc_inc="
insert overwrite table ${APP}.dwd_trade_refund_pay_suc_inc partition(dt='$do_date')
select
rp.id,
user_id,
rp.order_id,
rp.sku_id,
province_id,
payment_type,
dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
refund_num,
total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.payment_type,
data.callback_time,
data.total_amount
from ${APP}.ods_refund_payment_inc
where dt='$do_date'
and type = 'update'
and array_contains(map_keys(old),'refund_status')
and data.refund_status='1602'
)rp
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='update'
and data.order_status='1006'
and array_contains(map_keys(old),'order_status')
)oi
on rp.order_id=oi.id
left join
(
select
data.order_id,
data.sku_id,
data.refund_num
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='update'
and data.refund_status='0705'
and array_contains(map_keys(old),'refund_status')
)ri
on rp.order_id=ri.order_id
and rp.sku_id=ri.sku_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
)dic
on rp.payment_type=dic.dic_code;
"
dwd_traffic_action_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_action_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
action_id,
action_item,
action_item_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
action.action_id,
action.item action_item,
action.item_type action_item_type,
action.ts
from ${APP}.ods_log_inc lateral view explode(actions) tmp as action
where dt='$do_date'
and actions is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_display_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_display_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') display_time,
display_type,
display_item,
display_item_type,
display_order,
display_pos_id
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
display.display_type,
display.item display_item,
display.item_type display_item_type,
display.\`order\` display_order,
display.pos_id display_pos_id,
ts
from ${APP}.ods_log_inc lateral view explode(displays) tmp as display
where dt='$do_date'
and displays is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_error_inc="
set hive.cbo.enable=false;
set hive.execution.engine=mr;
insert overwrite table ${APP}.dwd_traffic_error_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
entry,
loading_time,
open_ad_id,
open_ad_ms,
open_ad_skip_ms,
actions,
displays,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') error_time,
error_code,
error_msg
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
actions,
displays,
err.error_code,
err.msg error_msg,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and err is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
set hive.execution.engine=spark;
"
dwd_traffic_page_view_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_page_view_inc partition (dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,
concat(mid_id,'-',last_value(session_start_point,true) over (partition by mid_id order by ts)) session_id,
during_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_start_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_start_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
entry,
open_ad_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time,
loading_time,
open_ad_ms,
open_ad_skip_ms
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and \`start\` is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_user_login_inc="
insert overwrite table ${APP}.dwd_user_login_inc partition(dt='$do_date')
select
user_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
row_number() over (partition by session_id order by ts) rn
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
concat(mid_id,'-',last_value(session_start_point,true) over(partition by mid_id order by ts)) session_id
from
(
select
common.uid user_id,
common.ch channel,
common.ar area_code,
common.vc version_code,
common.mid mid_id,
common.ba brand,
common.md model,
common.os operate_system,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)t1
)t2
where user_id is not null
)t3
where rn=1
)t4
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on t4.area_code=bp.area_code;
"
dwd_user_register_inc="
insert overwrite table ${APP}.dwd_user_register_inc partition(dt='$do_date')
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
data.id user_id,
data.create_time
from ${APP}.ods_user_info_inc
where dt='$do_date'
and type='insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ${APP}.ods_log_inc
where dt='$do_date'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
case $1 in
"dwd_interaction_comment_inc" )
hive -e "$dwd_interaction_comment_inc"
;;
"dwd_interaction_favor_add_inc" )
hive -e "$dwd_interaction_favor_add_inc"
;;
"dwd_tool_coupon_get_inc" )
hive -e "$dwd_tool_coupon_get_inc"
;;
"dwd_tool_coupon_order_inc" )
hive -e "$dwd_tool_coupon_order_inc"
;;
"dwd_tool_coupon_pay_inc" )
hive -e "$dwd_tool_coupon_pay_inc"
;;
"dwd_trade_cancel_detail_inc" )
hive -e "$dwd_trade_cancel_detail_inc"
;;
"dwd_trade_cart_add_inc" )
hive -e "$dwd_trade_cart_add_inc"
;;
"dwd_trade_cart_full" )
hive -e "$dwd_trade_cart_full"
;;
"dwd_trade_order_detail_inc" )
hive -e "$dwd_trade_order_detail_inc"
;;
"dwd_trade_order_refund_inc" )
hive -e "$dwd_trade_order_refund_inc"
;;
"dwd_trade_pay_detail_suc_inc" )
hive -e "$dwd_trade_pay_detail_suc_inc"
;;
"dwd_trade_refund_pay_suc_inc" )
hive -e "$dwd_trade_refund_pay_suc_inc"
;;
"dwd_traffic_action_inc" )
hive -e "$dwd_traffic_action_inc"
;;
"dwd_traffic_display_inc" )
hive -e "$dwd_traffic_display_inc"
;;
"dwd_traffic_error_inc" )
hive -e "$dwd_traffic_error_inc"
;;
"dwd_traffic_page_view_inc" )
hive -e "$dwd_traffic_page_view_inc"
;;
"dwd_traffic_start_inc" )
hive -e "$dwd_traffic_start_inc"
;;
"dwd_user_login_inc" )
hive -e "$dwd_user_login_inc"
;;
"dwd_user_register_inc" )
hive -e "$dwd_user_register_inc"
;;
"all" )
hive -e "$dwd_interaction_comment_inc$dwd_interaction_favor_add_inc$dwd_tool_coupon_get_inc$dwd_tool_coupon_order_inc$dwd_tool_coupon_pay_inc$dwd_trade_cancel_detail_inc$dwd_trade_cart_add_inc$dwd_trade_cart_full$dwd_trade_order_detail_inc$dwd_trade_order_refund_inc$dwd_trade_pay_detail_suc_inc$dwd_trade_refund_pay_suc_inc$dwd_traffic_action_inc$dwd_traffic_display_inc$dwd_traffic_error_inc$dwd_traffic_page_view_inc$dwd_traffic_start_inc$dwd_user_login_inc$dwd_user_register_inc"
esac