模型代碼,解析json和打标簽
一、建表語句
create table target_tab(
event_time bigint comment '事件時間',
event_timestamp bigint comment '事件時間戳',
video_time bigint comment '直播跨天時間',
video_timestamp bigint comment '直播跨天時間戳',
req_idx int comment '埋點請求次數序号',
did string comment '裝置id',
client_type int comment '端類型',
app_version string comment 'app版本号',
sdk_version string comment 'sdk版本号',
biz_type int comment '業務線',
user_agent string comment '使用者裝置UA',
user_number string comment '使用者ID',
gif string comment '資料源topic',
biz_name string comment '業務線-産品辨別 源自新版SDK公共字段name',
ngx_timestamp bigint comment '日志到達ngnix的unix時間戳',
is_video int comment '直播标簽 1直播 0非直播',
label_map map<string,string> comment '名額标簽')
partitioned by (dt int comment '分區')
二、模型代碼
set hive.exec.dynamic.partition.mode = nonstrict;
insert overwrite table bdg_inf.target_tab partition(dt)
select
event_time
,event_timestamp
,video_time
,video_timestamp
,req_idx
,case
when gid is not null then gid
when did is not null then did
end did
,client_type
,app_version
,sdk_version
,biz_type
,user_agent
,user_number
,gif
,biz_name
,ngx_timestamp
,is_video
,map(
----丢失率
--web
'label_loss_02', IF(web直播條件,'1','0'),
--ios直播
'label_loss_03', IF(ios直播條件,'1','0'),
--android直播
'label_loss_04', IF(安卓直播條件,'1','0'),
----延遲率
--web
'label_delay_02', IF(web延遲率,'1','0'),
--ios直播
'label_delay_03', IF(~~~~,'1','0'),
--android直播
'label_delay_04', IF(~~~~,'1','0')
) as label_map
,dt
from (
select
from_unixtime(cast(event_timestamp / 1000 as bigint),'yyyyMMdd') as event_time
,event_timestamp
,from_unixtime(cast(video_timestamp / 1000 as bigint),'yyyyMMdd') as video_time
,video_timestamp
,req_idx
,did
,gid
,case
--轉成和視訊一樣 3:ios 4:android
when is_video=0 and (client_type = 3 or client_type = 4) then 3
when is_video=0 and client_type = 2 then 4
else client_type --直播的特殊,已經提前轉化
end client_type
,app_version
,sdk_version
,biz_type
,user_agent
,user_number
,gif
,biz_name
,ngx_timestamp
,is_video
,cast(regexp_extract(sdk_version, '(\\d+).(\\d+).(\\d+)', 1) as BigInt) as web_v1
,cast(regexp_extract(sdk_version, '(\\d+).(\\d+).(\\d+)', 2) as BigInt) as web_v2
,cast(regexp_extract(app_version, '(\\d+).(\\d+).(\\d+)', 1) as BigInt) as app_v1
,cast(regexp_extract(app_version, '(\\d+).(\\d+).(\\d+)', 2) as BigInt) as app_v2
,dt
from(
select
event_timestamp,
0 as video_timestamp,
cast(get_json_object(params, '$.req_idx') as BigInt) req_idx,
tid as did,
get_json_object(params, '$.gid') gid,
client_type,
get_json_object(params, '$.app_version') app_version,
event_id,
user_id,
sdk_version,
biz_type,
gif,
net_type,
user_agent,
user_number,
biz_name,
ngx_timestamp,
0 as is_video,
dt
from
dw.tab1
where dt =${dt}
union all
select
event_timestamp,
0 as video_timestamp,
cast(get_json_object(params, '$.req_idx') as BigInt) req_idx,
tid as did,
get_json_object(params, '$.gid') gid,
client_type,
get_json_object(params, '$.app_version') app_version,
event_id,
user_id,
sdk_version,
biz_type,
gif,
net_type,
user_agent,
user_number,
biz_name,
ngx_timestamp,
0 as is_video,
dt
from
dw.tab2
where dt =${dt}
)as union_tab
)as out_tab
where (web_v1>2 or (web_v1 >= 2 and web_v2 >= 1))
or (app_v1>4 or (app_v1 >= 4 and app_v2 >= 23))