天天看點

impala使用sql筆記

#修改分隔符

alter table firstround.person_aaa_crime_criminalperson1 set SERDEPROPERTIES('field.delim'='\001'); 

#檢視建表語句

show CREATE TABLE firstround.banklog_decrypt

#修改字段類型

ALTER TABLE data_market.last_process_time CHANGE hive_count_num hive_count_num string 

#更改表名

ALTER TABLE transfer_sjdk.bigdata_blacklist RENAME TO transfer_sjdk.tbl_riskprice_it;

#添加表字段

ALTER TABLE transfer_sjdk.bigdata_blacklist_history ADD COLUMNS (is_black string);

#删除表字段

ALTER TABLE transfer_sjdk.bigdata_blacklist_history drop  is_black;

#清除表資料

TRUNCATE transfer_sjdk.tbl_riskprice_it;

#重新整理表

REFRESH transfer_sjdk.tbl_riskprice_it;

#重新整理表中繼資料

INVALIDATE METADATA transfer_sjdk.tbl_riskprice_it;

#重新shuffle資料

INSERT OVERWRITE data_market.repayment_split_table SELECT * FROM data_market.repayment_split_table;

#複制表結果和資料

create table transfer_zhifu_real.sy_role as select * from transfer_zhifu.sy_role;

#複制表結構

create table transfer_zhifu_real.sy_role like transfer_zhifu.sy_role;

#修改表名

ALTER TABLE dh_0016_bigdata_08 RENAME TO dh_0016_bigdata

#修改表字段類型

alter TABLE users CHANGE dt dt string

#顯示所有函數

show functions;

#檢視函數用法(hive)

describe function substr;

#導出查詢結果到檔案

impala-shell -q  "select * from table limit 10" -B --output_delimiter="\t" -o output.txt

#收集表的統計資訊,讓Impala 基于每一個表的大小、每一個列不同值的個數、等等資訊自動的優化查詢。 

compute stats firstround.person_aaa_crime_escapeperson;

#導入和導出

export table poc.chan_info to '/user/hive/test/chan_info';

import from '/user/hive/test/chan_info';

#分區表導出和導入

export table poc.cust_basic_info partition (etl_dt="2017-12-14") to '/user/hive/test/cust_basic_info14';

import from '/user/hive/test/cust_basic_info14';

#import重命名表

import table cust_basic_info from '/user/hive/test/cust_basic_info14';

#導出表并且導入到分區表分區

import table cust_basic_info partition (etl_dt="2017-12-14") from '/user/hive/test/cust_basic_info';

#指定導入位置

import table cust_basic_info from '/user/hive/test/cust_basic_info' location '/user/hive/test/cust_basic_info';

#導入作為一個外部表

import external table cust_basic_info from '/user/hive/test/cust_basic_info';

#強制删除資料庫

DROP DATABASE  zxfinance_alpha1_tuomin cascade

#資料類型轉換

SELECT cast(substr(createdate,1,10) as int)/86400 created FROM frontlog 

#分組排序

row_number() over (PARTITION BY t4.extractcashbillid,t1.ze_checktime ORDER BY t4.overduedateb DESC) flag 

除Row_number外還有rank,dense_rank  

rank() over([partition by col1] order by col2) 

dense_rank() over([partition by col1] order by col2) 

row_number() over([partition by col1] order by col2)

rank排序時出現相等的值時會有并列,即值相等的兩條資料會有相同的序列值 

row_number的排序不允許并列,即使兩條記錄的值相等也不會出現相等的排序值 

dense_rank排序的值允許并列,但會跳躍的排序,像這樣:1,1,3,4,5,5,7.

#添加udf 方法

impala-shell -q "create function AES(string) returns string location '/user/hive/udf/aesUDF.jar' symbol='com.netease.hive.udf.AESUDF';"

#建立分區表

CREATE TABLE IF NOT EXISTS asset_auxiliary.dh_0016_bigdata_history(

autoindex INT, vindicatorteamid STRING,

createtime STRING) 

PARTITIONED BY (dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'  STORED AS TEXTFILE;

#添加分區

alter table asset_auxiliary.dh_0016_bigdata_history add partition (dt='2017-12-12');

#删除分區

ALTER TABLE asset_auxiliary.dh_0016_bigdata_history DROP IF EXISTS PARTITION(dt='2017-12-12');

#加載資料

LOAD DATA INPATH ‘/user/hive/warehouse/asset_auxiliary.db/dh_0016_bigdata_history/2017-12-12/000000_0’ INTO TABLE asset_auxiliary.dh_0016_bigdata_history PARTITION(dt='2017-12-12');

#加載資料到hdfs(hive)

insert overwrite directory '/user/hive/warehouse/asset_auxiliary.db/dh_0016_bigdata_history/dt=2017-12-12'

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'

select * from asset_auxiliary.dh_0016_bigdata_tmp where queryday = '2017-12-12'

#加載資料到本地(hive)

insert overwrite local directory '/data/2017-12-12'

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'

select * from asset_auxiliary.dh_0016_bigdata_tmp where queryday = '2017-12-12'

#插入資料到表分區

insert into table asset_auxiliary.dh_0016_bigdata_history PARTITION(dt='2017-12-10') select * from asset_auxiliary.dh_0016_bigdata_tmp where queryday = '2017-12-10'

#展示表中有多少分區

show partitions asset_auxiliary.dh_0016_bigdata_history;

#重新整理分區

REFRESH asset_auxiliary.dh_0016_bigdata PARTITION(dt='2017-12-12');

#修複表分區 (hive)

msck repair table 表名

#hive 建表

create table test_label.user_json  

   userid string,

   accountid string,

   banknumber string,

   bankname string,

   isreceiveaccount string,

   isrepayaccount string,

   birthday string,

   createtime string

)  

comment '使用者大寬表'

partitioned by(createtime string comment '時間分區字段')  

row format delimited   

fields terminated by '\t'  

stored as textfile  location '/user/hdfs/fico/output/1491964346/user_back_json/';

###impala查詢記憶體限制Memory limit exceeded

ERROR: 

Memory limit exceeded

Query did not have enough memory to get the minimum required buffers in the block manager.

mem_limit=-1

default_pool_mem_limit=-1b

note:https://blog.csdn.net/oosongoo/article/details/78435779