天天看點

Hive

create [external] table [if not exists ]  table_name(      //external外部表,删除表,内部表中繼資料和資料會被一起删除,外部表隻删除中繼資料,不删除資料,資料仍在hdfs上

col_name  data_type [comment '字段描述資訊']

col_name  data_type [comment '字段描述資訊']   

[comment  '表的描述資訊']

[partitioned  by (col_name  data_type, ...)]   //表分區,一個表可以有多個分區,每個分區單獨存在一個目錄下

[clustered by (col_name.col_name, ...)]   //分桶,分檔案

[sorted by (col_name [asc|desc], ... ) into num_buckets buckets]  //排序

[row format row_format]  //指定表資料的分隔符

[storted as ...]  //指定表資料存儲類型  純文字時:storted as textfile  需要壓縮: storted as sequencefile

[location '指定表的存儲路徑' ]   

create table stu(id int,name string) row format delimited fields terminated by '\t'

create table stu(id int,name string) row format delimited fields terminated by '\t' location 'usr/students'  //hdfs中的檔案路徑

create table stu1 like students

load data local  inpath '檔案路徑' into table students

load data local inpath  '檔案路徑' overwrite into table students  //加載資料并覆寫

load data inpath '檔案路徑' into table students

建立分區表

create table score(id int,subject int,score int) partitioned by (year string,month string,day string) row format delimited fields terminated by ',';

加載資料到分區表中

load data local inpath '/usr/local/data/score.txt' into table score partition (year='2021',month='9',day='26');

Hive

 分區的過程以及結果

Hive
Hive
Hive
Hive

union和union all的差別是:union會自動去重

Hive

 這時會走mapreduce操作,出現union操作

Hive
Hive

 建立檔案目錄:hadoop dfs -mkdir -p /datacource/month=8888   

 上傳檔案:hadoop dfs -put /usr/local/data/cource.txt /datacource/month=8888

 hive建外部表(可以保留資料):hive> create external table courcepa1 (id int,subject string,score int)   partitioned by (month string) row format delimited fields terminated by ',' location '/datacource';

 進行表的修複:msck repair table courcepa1

1.開啟hive的分桶功能

set hive.enforce.bucketing=true;

2.設定reduce的個數

set mapreduce.job.reduces=3;

3.建立分桶表

create table coursecl (id int,subject string,score int) clustered by (id) into 3 buckets row format delimited fields terminated by ','

4.分桶表加載資料

  4.1建立普通表

  create table common_coursecl (id int,subject string,score int) row format delimited fields terminated by    ',';

  4.2普通表加載資料

  load data local inpath '/usr/local/data/cource.txt' into table common_coursecl;

  4.3通過普通表加載資料到分桶表(insert overwrite)

  insert overwrite table coursecl select * from common_coursecl cluster by(id);  //會啟動mapreduce任務

Hive

 (這次實驗我的資料有十個但是分了3個桶最後隻有九個資料,缺少了第一行的資料)

Hive

修改表名

alter table score rename to score1;

添加列(add columns)

alter table score1 add columns(add1 string,add2 string);

Hive

 更新列

alter table score1 change column add2 add2new int;

Hive

1.設定reduce個數

set mapreduce.job.reduces=3

   檢視reduce個數 

set mapreduce.job.reduces

2.進行降序排序

select * from score1 sort by score;

3.将查詢結果加載到檔案(linux中)

insert overwrite local directory '/usr/local/data/sort' select * from score1 sort by score

set mapreduce.job.reduces=7

2.分區排序

insert overwrite local directory '/sort' select * from score1 distribute by id sort by score;

Hive

select * from score1 cluster by id

等價于 select * from score1 distribute by id sort by id

hive參數配置: 參數聲明>指令行參數>配置檔案參數(hive)

1.檢視系統自帶的函數

hive> show functions;

2.顯示自帶的函數用法

hive> desc function in;

3.詳細顯示自帶函數的用法

hive> desc function extended in;

4.常用函數

hive> select concat('hello','world','zyl');    //字元串連接配接

hive> select concat_ws(',','hello','world','zyl');    //指定分隔符拼接

select cast(1.5 as int);   //類型轉換

hive> select get_json_object('{"name":"zs","age":"20"}','$.name');    //josn字元串的解析

Hive

 url解析函數

hive> select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#ref1','host');

Hive

繼續閱讀