天天看点

大数据开发第6课 hive分区表

作者:anjunact
大数据开发 第6课 分区表创建
1) 分区表技术与意义
   * 避免hive全表扫描,提升查询效率
   * 减少数据冗余进而提高特定(分区)查询分析的效率
   * 在逻辑上分区表与未分区表没有区别,在物理上分区表将数据按分区键分区的列值存储在表目录的子目录中,目录名为"分区键=键值"
   * 查询时尽量利用分区字段,如果不用分区字段,会全部扫描
2) 分区表类型
   * 静态分区表
   * 动态分区表           
create database class6;
use class6;
-- 创建分区表
create table test_partition1(
    sku_id string comment '商品id',
    sku_name string comment '商品名称'
) partitioned by (sku_class string);
-- 建分区表后,些时没数据,也没分区,需要建分区
alter table test_partition1 add partition (sku_class='xiaomi');
-- 查看表现有分区
show partitions test_partition1;
--静态分区添加数据
insert into table test_partition1 partition (sku_class='xiaomi') values ('001','xiaomi');
insert into table test_partition1 partition (sku_class='xiaomi') select sku_id,sku_name from sales_info;
select * from test_partition1;
--本地文件加载
--load data  local inpath '/xxx' into table test_partition1 partition(sku_class='xiaomi');

--严格模式下,对分区表,要加where过滤分区字段
set hive.mapred.mode=strict;
select * from test_partition1 where sku_class='xiaomi';
set hive.mapred.mode=nonstrict;
select * from test_partition1;

--动态分区添加数据
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table test_partition1 partition (sku_class)values ('001','xiaomi2','xiaomi');
insert into table test_partition1 partition (sku_class)values ('002','huawei2','huawei');
select  * from test_partition1;

--多字段分区
create table test_partition_mul(
    sku_id string comment '商品id',
    sku_name string comment '商品名称')
    partitioned by (sku_class string,sku_label string);
drop table test_partition_mul;
alter table test_partition_mul add partition (sku_class='xiaomi',sku_label='dianzi');
--查看现有分区
show partitions test_partition_mul;
--静态分区添加数据
insert into table test_partition_mul partition (sku_class='xiaomi',sku_label='dianzi')
values ('001','xiaomi1');
insert into table test_partition_mul partition (sku_class='xiaomi',sku_label='dianzi')
select sku_id ,sku_name from sales_info;
select * from test_partition_mul;

-- 动态分区 添加数据
insert into table test_partition_mul partition (sku_class,sku_label)
values ('001','xiaomi1','xiaomi1','dianzi1');
select * from test_partition_mul;
-- 删除分区及数据
alter table test_partition1 drop partition (sku_class='xiaomi');
alter table test_partition_mul drop partition (sku_class='xiaomi',sku_label='dianzi');