天天看點

Phoenix優化

hbase-site.xml:

<property>

<name>hbase.master.maxclockskew</name>

<value>45000000</value>

</property>

<property>   

<name>hbase.rpc.timeout</name>

<value>36000000</value>

   </property>

<name>hbase.client.scanner.timeout.period</name>

<!--控制逾時的屬性-->

<name>mapreduce.task.timeout</name>

<value>1200000</value>

<name>zookeeper.session.timeout</name>

<!--寫緩存大小-->

<name>hbase.client.write.buffer</name>

<value>20971520</value>

<!--hbase.balancer.period-->

<name>hbase.balancer.period</name>

<value>300000</value>

<!--二級索引-->

<name>hbase.regionserver.wal.codec</name>

<value>org.apache.hadoop.hbase.regionserver.wal.indexedwaleditcodec</value>

組合主鍵:

create table "test_keys2" ( "v_1" decimal(24,8), "v_2" varchar, "year" integer not null , "period" integer not null ,"account" integer not null , "entity" integer not null , "scenario" integer not null , "currency" integer not null , "version" integer not null , "cst_dim_02217" integer not null, "cst_dim_30453" integer not null, "cst_dim_47894" integer not null , "cst_dim_61310" integer  not null , "cst_dim_81981" integer not null , "cst_dim_01216" integer not null, "cst_dim_25287" integer  not null, "cst_dim_41183" integer not null constraint pk primary key("year" ,  "period" , "account" ,"entity" , "scenario" , "currency" , "version","cst_dim_02217" ,"cst_dim_30453" ,"cst_dim_47894" , "cst_dim_61310","cst_dim_81981" ,"cst_dim_01216" , "cst_dim_25287" ,"cst_dim_41183"));

upsert插入資料有問題

二級索引:

同步建立索引

create index ifact1 on c1_fact("diminfo".year) include("diminfo"."v_1","diminfo".v_2 ,  "diminfo".period ,  "diminfo".account , "diminfo".entity , "diminfo".scenario , "diminfo".currency , "diminfo".version ,"diminfo"."cst_dim_02217" , "diminfo"."cst_dim_30453" , "diminfo"."cst_dim_47894" , "diminfo"."cst_dim_61310", "diminfo"."cst_dim_81981" , "diminfo"."cst_dim_01216" , "diminfo"."cst_dim_25287" , "diminfo"."cst_dim_41183")

當執行create index的時候,索引表會直接與源資料表進行同步。但是,有時候我們的源表資料量很大,同步建立索引會抛出異常。

異步建立索引

create index ifact1 on c1_fact ("diminfo".year) include("diminfo"."v_1","diminfo".v_2 ,  "diminfo".period ,  "diminfo".account , "diminfo".entity , "diminfo".scenario , "diminfo".currency , "diminfo".version ,"diminfo"."cst_dim_02217" , "diminfo"."cst_dim_30453" , "diminfo"."cst_dim_47894" , "diminfo"."cst_dim_61310", "diminfo"."cst_dim_81981" , "diminfo"."cst_dim_01216" , "diminfo"."cst_dim_25287" , "diminfo"."cst_dim_41183") async

通過create index的時候指定 async 關鍵字來指定異步建立索引。執行這個指令之後并不會引起索引表與源表的直接同步。這個時候查詢并不會使用這個索引表。那麼索引資料的導入還需要采用phoenix提供的索引同步工具類 indextool , 這是一個mapreduce工具類,使用方式如下:

$hbase_home/bin/hbase org.apache.phoenix.mapreduce.index.indextool --data-table c1_fact --index-table ifact10   --output-path  hdfs:/phoenixindex

資料量大,報異常

加上--direct  如果指定,避免批量加載(可選)