天天看點

埋點-埋點模型

 模型代碼,解析json和打标簽

一、建表語句

create table target_tab(
  event_time bigint comment '事件時間',
  event_timestamp bigint comment '事件時間戳',
  video_time bigint comment '直播跨天時間',
  video_timestamp bigint comment '直播跨天時間戳',
  req_idx int comment '埋點請求次數序号',
  did string comment '裝置id',
  client_type int comment '端類型',
  app_version string comment 'app版本号',
  sdk_version string comment 'sdk版本号',
  biz_type int comment '業務線',
  user_agent string comment '使用者裝置UA',
  user_number string comment '使用者ID',
  gif string comment '資料源topic',
  biz_name string comment '業務線-産品辨別 源自新版SDK公共字段name',
  ngx_timestamp bigint comment '日志到達ngnix的unix時間戳',
  is_video int comment '直播标簽 1直播 0非直播',
  label_map map<string,string> comment '名額标簽')
partitioned by (dt int comment '分區')      

二、模型代碼

set hive.exec.dynamic.partition.mode = nonstrict;
insert overwrite table bdg_inf.target_tab partition(dt)

select 
 event_time
 ,event_timestamp
 ,video_time
 ,video_timestamp
 ,req_idx
 ,case
    when gid is not null then gid
    when did is not null then did
 end did
 ,client_type
 ,app_version
 ,sdk_version
 ,biz_type
 ,user_agent
 ,user_number
 ,gif
 ,biz_name
 ,ngx_timestamp
 ,is_video
 ,map(
    ----丢失率
    --web
    'label_loss_02', IF(web直播條件,'1','0'),
    --ios直播
    'label_loss_03', IF(ios直播條件,'1','0'),
    --android直播
    'label_loss_04', IF(安卓直播條件,'1','0'),
    ----延遲率
    --web
    'label_delay_02', IF(web延遲率,'1','0'),
    --ios直播
    'label_delay_03', IF(~~~~,'1','0'),
    --android直播
    'label_delay_04', IF(~~~~,'1','0')
 ) as label_map
 ,dt
 from (
    select 
    from_unixtime(cast(event_timestamp / 1000 as bigint),'yyyyMMdd') as event_time
    ,event_timestamp
    ,from_unixtime(cast(video_timestamp / 1000 as bigint),'yyyyMMdd') as video_time
    ,video_timestamp
    ,req_idx
    ,did
    ,gid
    ,case
        --轉成和視訊一樣  3:ios  4:android
        when is_video=0 and (client_type = 3 or client_type = 4) then 3
        when is_video=0 and client_type = 2 then 4
        else client_type  --直播的特殊,已經提前轉化
    end client_type
    ,app_version
    ,sdk_version
    ,biz_type
    ,user_agent
    ,user_number
    ,gif
    ,biz_name
    ,ngx_timestamp
    ,is_video
    ,cast(regexp_extract(sdk_version, '(\\d+).(\\d+).(\\d+)', 1) as BigInt) as web_v1
    ,cast(regexp_extract(sdk_version, '(\\d+).(\\d+).(\\d+)', 2) as BigInt) as web_v2
    ,cast(regexp_extract(app_version, '(\\d+).(\\d+).(\\d+)', 1) as BigInt) as app_v1
    ,cast(regexp_extract(app_version, '(\\d+).(\\d+).(\\d+)', 2) as BigInt) as app_v2
    ,dt
    from(
        select
        event_timestamp,
        0 as video_timestamp,
        cast(get_json_object(params, '$.req_idx') as BigInt) req_idx,
        tid as did,
        get_json_object(params, '$.gid') gid,
        client_type,
        get_json_object(params, '$.app_version') app_version,
        event_id,
        user_id,
        sdk_version,
        biz_type,
        gif,
        net_type,
        user_agent,
        user_number,
        biz_name,
        ngx_timestamp,
        0 as is_video,
        dt
        from
        dw.tab1
        where dt =${dt}
        union all
        select
        event_timestamp,
        0 as video_timestamp,
        cast(get_json_object(params, '$.req_idx') as BigInt) req_idx,
        tid as did,
        get_json_object(params, '$.gid') gid,
        client_type,
        get_json_object(params, '$.app_version') app_version,
        event_id,
        user_id,
        sdk_version,
        biz_type,
        gif,
        net_type,
        user_agent,
        user_number,
        biz_name,
        ngx_timestamp,
        0 as is_video,
        dt
        from
        dw.tab2
        where dt =${dt}
    )as union_tab
)as out_tab 
where (web_v1>2 or (web_v1 >= 2 and web_v2 >= 1))
or (app_v1>4 or (app_v1 >= 4 and app_v2 >= 23))