Hive #hive #数据 #mapreduce #加载数据 #分区表

create [external] table [if not exists ] table_name( //external外部表，删除表，内部表元数据和数据会被一起删除，外部表只删除元数据，不删除数据,数据仍在hdfs上

col_name data_type [comment '字段描述信息']

[comment '表的描述信息']

[partitioned by (col_name data_type, ...)] //表分区，一个表可以有多个分区，每个分区单独存在一个目录下

[clustered by (col_name.col_name, ...)] //分桶，分文件

[sorted by (col_name [asc|desc], ... ) into num_buckets buckets] //排序

[row format row_format] //指定表数据的分隔符

[storted as ...] //指定表数据存储类型纯文本时：storted as textfile 需要压缩: storted as sequencefile

[location '指定表的存储路径' ]

create table stu(id int,name string) row format delimited fields terminated by '\t'

create table stu(id int,name string) row format delimited fields terminated by '\t' location 'usr/students' //hdfs中的文件路径

create table stu1 like students

load data local inpath '文件路径' into table students

load data local inpath '文件路径' overwrite into table students //加载数据并覆盖

load data inpath '文件路径' into table students

创建分区表

create table score(id int,subject int,score int) partitioned by (year string,month string,day string) row format delimited fields terminated by ',';

加载数据到分区表中

load data local inpath '/usr/local/data/score.txt' into table score partition (year='2021',month='9',day='26');

分区的过程以及结果

union和union all的区别是：union会自动去重

这时会走mapreduce操作，出现union操作

创建文件目录：hadoop dfs -mkdir -p /datacource/month=8888

上传文件：hadoop dfs -put /usr/local/data/cource.txt /datacource/month=8888

hive建外部表（可以保留数据）：hive> create external table courcepa1 (id int,subject string,score int) partitioned by (month string) row format delimited fields terminated by ',' location '/datacource';

进行表的修复：msck repair table courcepa1

1.开启hive的分桶功能

set hive.enforce.bucketing=true;

2.设置reduce的个数

set mapreduce.job.reduces=3;

3.创建分桶表

create table coursecl (id int,subject string,score int) clustered by (id) into 3 buckets row format delimited fields terminated by ','

4.分桶表加载数据

　　4.1创建普通表

　　create table common_coursecl (id int,subject string,score int) row format delimited fields terminated by 　　 ',';

　　4.2普通表加载数据

　　load data local inpath '/usr/local/data/cource.txt' into table common_coursecl;

　　4.3通过普通表加载数据到分桶表（insert overwrite）

　　insert overwrite table coursecl select * from common_coursecl cluster by(id); //会启动mapreduce任务

（这次实验我的数据有十个但是分了3个桶最后只有九个数据，缺少了第一行的数据）

修改表名

alter table score rename to score1;

添加列（add columns）

alter table score1 add columns(add1 string,add2 string);

更新列

alter table score1 change column add2 add2new int;

1.设置reduce个数

set mapreduce.job.reduces=3

查看reduce个数

set mapreduce.job.reduces

2.进行降序排序

select * from score1 sort by score;

3.将查询结果加载到文件(linux中)

insert overwrite local directory '/usr/local/data/sort' select * from score1 sort by score

set mapreduce.job.reduces=7

2.分区排序

insert overwrite local directory '/sort' select * from score1 distribute by id sort by score;

select * from score1 cluster by id

等价于 select * from score1 distribute by id sort by id

hive参数配置：参数声明>命令行参数>配置文件参数（hive）

1.查看系统自带的函数

hive> show functions;

2.显示自带的函数用法

hive> desc function in;

3.详细显示自带函数的用法

hive> desc function extended in;

4.常用函数

hive> select concat('hello','world','zyl'); //字符串连接

hive> select concat_ws(',','hello','world','zyl'); //指定分隔符拼接

select cast(1.5 as int); //类型转换

hive> select get_json_object('{"name":"zs","age":"20"}','$.name'); //josn字符串的解析

url解析函数

hive> select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#ref1','host');

Hive

继续阅读

大数据技术原理与应用（最后三天备考了！！！）

PHP辅导代做编程：CS353 Database System

《Hive权威指南》第八章：HiveQL索引8 HiveQL：索引

自学Zabbix3.10.2-事件通知Notifications upon events-Actions报警配置点击返回：自学zabbix集锦

HDU 5678 ztr loves trees

拓端tecdat|R语言弹性网络Elastic Net正则化惩罚回归模型交叉验证可视化

二叉树及其应用--二叉树创建

MapReduce运行Wordcount时一直卡在INFO mapreduce.Job: Running job，web查看一直处于accepted阶段

MapReduce(一)：入门级程序wordcount及其分析

HiveQl语句应用实例：WordCount具体步骤如下：

用mapreduce计算wordCount和手机流量统计程序运行过程WordCount统计手机流量统计

Hadoop之运行wordcount

Eclipse运行WordCount（详细版）相关连接Eclipse运行WordCount

专家访谈：搜索开源力量：Lucene技术前景

MapReduce的几个企业级经典面试案例MapReduce的几个企业级经典面试案例

详解STM32单片机的堆栈