项目场景:
通过hivesql进行指标计算
求同比:
创建hive内表
create table ccic_dev.test_yoy(
yearmonth string,
city string,
count int
)STORED AS PARQUET;
插入数据
insert into ccic_dev.test_yoy values ('2020-01-01','上海',1000);
insert into ccic_dev.test_yoy values ('2020-02-01','杭州',1200);
insert into ccic_dev.test_yoy values ('2020-03-01','深圳',1300);
insert into ccic_dev.test_yoy values ('2020-04-01','广州',1400);
insert into ccic_dev.test_yoy values ('2019-01-01','上海',900);
insert into ccic_dev.test_yoy values ('2019-02-01','杭州',1300);
insert into ccic_dev.test_yoy values ('2019-03-01','深圳',700);
insert into ccic_dev.test_yoy values ('2019-04-01','广州',600);
求同比
SELECT
t1.year_month,
t1.city,
CASE WHEN t1.now_count IS NULL
OR t1.now_count = 0 THEN
0 ELSE t1.now_count
END now_count,
CASE
WHEN t2.last_count IS NULL OR t2.last_count = 0 THEN 0 ELSE last_count
END last_count,
CASE WHEN t2.last_count IS NULL OR t2.last_count = 0 THEN 0 ELSE( t1.now_count - t2.last_count )
END count_yoy
FROM
(
SELECT
DATE_FORMAT( yearmonth, 'yyyy-MM' ) AS year_month,
sum( count ) AS now_count ,
city
FROM ccic_dev.test_yoy
GROUP BY
DATE_FORMAT( yearmonth, 'yyyy-MM' ),
city
) t1
LEFT JOIN (
SELECT
DATE_FORMAT( DATE_ADD( yearmonth, 366 ), 'yyyy-MM' ) AS year_month,
sum( count ) AS last_count ,
city
FROM ccic_dev.test_yoy
GROUP BY
DATE_FORMAT( DATE_ADD( yearmonth, 366 ), 'yyyy-MM' ),
city
) AS t2 ON t1.year_month = t2.year_month and t1.city=t2.city;
结果: