Hive #hive #資料 #mapreduce #加載資料 #分區表

create [external] table [if not exists ] table_name( //external外部表，删除表，内部表中繼資料和資料會被一起删除，外部表隻删除中繼資料，不删除資料,資料仍在hdfs上

col_name data_type [comment '字段描述資訊']

[comment '表的描述資訊']

[partitioned by (col_name data_type, ...)] //表分區，一個表可以有多個分區，每個分區單獨存在一個目錄下

[clustered by (col_name.col_name, ...)] //分桶，分檔案

[sorted by (col_name [asc|desc], ... ) into num_buckets buckets] //排序

[row format row_format] //指定表資料的分隔符

[storted as ...] //指定表資料存儲類型純文字時：storted as textfile 需要壓縮: storted as sequencefile

[location '指定表的存儲路徑' ]

create table stu(id int,name string) row format delimited fields terminated by '\t'

create table stu(id int,name string) row format delimited fields terminated by '\t' location 'usr/students' //hdfs中的檔案路徑

create table stu1 like students

load data local inpath '檔案路徑' into table students

load data local inpath '檔案路徑' overwrite into table students //加載資料并覆寫

load data inpath '檔案路徑' into table students

建立分區表

create table score(id int,subject int,score int) partitioned by (year string,month string,day string) row format delimited fields terminated by ',';

加載資料到分區表中

load data local inpath '/usr/local/data/score.txt' into table score partition (year='2021',month='9',day='26');

分區的過程以及結果

union和union all的差別是：union會自動去重

這時會走mapreduce操作，出現union操作

建立檔案目錄：hadoop dfs -mkdir -p /datacource/month=8888

上傳檔案：hadoop dfs -put /usr/local/data/cource.txt /datacource/month=8888

hive建外部表（可以保留資料）：hive> create external table courcepa1 (id int,subject string,score int) partitioned by (month string) row format delimited fields terminated by ',' location '/datacource';

進行表的修複：msck repair table courcepa1

1.開啟hive的分桶功能

set hive.enforce.bucketing=true;

2.設定reduce的個數

set mapreduce.job.reduces=3;

3.建立分桶表

create table coursecl (id int,subject string,score int) clustered by (id) into 3 buckets row format delimited fields terminated by ','

4.分桶表加載資料

　　4.1建立普通表

　　create table common_coursecl (id int,subject string,score int) row format delimited fields terminated by 　　 ',';

　　4.2普通表加載資料

　　load data local inpath '/usr/local/data/cource.txt' into table common_coursecl;

　　4.3通過普通表加載資料到分桶表（insert overwrite）

　　insert overwrite table coursecl select * from common_coursecl cluster by(id); //會啟動mapreduce任務

（這次實驗我的資料有十個但是分了3個桶最後隻有九個資料，缺少了第一行的資料）

修改表名

alter table score rename to score1;

添加列（add columns）

alter table score1 add columns(add1 string,add2 string);

更新列

alter table score1 change column add2 add2new int;

1.設定reduce個數

set mapreduce.job.reduces=3

檢視reduce個數

set mapreduce.job.reduces

2.進行降序排序

select * from score1 sort by score;

3.将查詢結果加載到檔案(linux中)

insert overwrite local directory '/usr/local/data/sort' select * from score1 sort by score

set mapreduce.job.reduces=7

2.分區排序

insert overwrite local directory '/sort' select * from score1 distribute by id sort by score;

select * from score1 cluster by id

等價于 select * from score1 distribute by id sort by id

hive參數配置：參數聲明>指令行參數>配置檔案參數（hive）

1.檢視系統自帶的函數

hive> show functions;

2.顯示自帶的函數用法

hive> desc function in;

3.詳細顯示自帶函數的用法

hive> desc function extended in;

4.常用函數

hive> select concat('hello','world','zyl'); //字元串連接配接

hive> select concat_ws(',','hello','world','zyl'); //指定分隔符拼接

select cast(1.5 as int); //類型轉換

hive> select get_json_object('{"name":"zs","age":"20"}','$.name'); //josn字元串的解析

url解析函數

hive> select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#ref1','host');

Hive

繼續閱讀

大資料技術原理與應用（最後三天備考了！！！）

PHP輔導代做程式設計：CS353 Database System

《Hive權威指南》第八章：HiveQL索引8 HiveQL：索引

自學Zabbix3.10.2-事件通知Notifications upon events-Actions報警配置點選傳回：自學zabbix集錦

HDU 5678 ztr loves trees

拓端tecdat|R語言彈性網絡Elastic Net正則化懲罰回歸模型交叉驗證可視化

二叉樹及其應用--二叉樹建立

MapReduce運作Wordcount時一直卡在INFO mapreduce.Job: Running job，web檢視一直處于accepted階段

MapReduce(一)：入門級程式wordcount及其分析

HiveQl語句應用執行個體：WordCount具體步驟如下：

用mapreduce計算wordCount和手機流量統計程式運作過程WordCount統計手機流量統計

Hadoop之運作wordcount

Eclipse運作WordCount（詳細版）相關連接配接Eclipse運作WordCount

專家訪談：搜尋開源力量：Lucene技術前景

MapReduce的幾個企業級經典面試案例MapReduce的幾個企業級經典面試案例

詳解STM32單片機的堆棧