hbase-site.xml:
<property>
<name>hbase.master.maxclockskew</name>
<value>45000000</value>
</property>
<property>
<name>hbase.rpc.timeout</name>
<value>36000000</value>
</property>
<name>hbase.client.scanner.timeout.period</name>
<!--控制超时的属性-->
<name>mapreduce.task.timeout</name>
<value>1200000</value>
<name>zookeeper.session.timeout</name>
<!--写缓存大小-->
<name>hbase.client.write.buffer</name>
<value>20971520</value>
<!--hbase.balancer.period-->
<name>hbase.balancer.period</name>
<value>300000</value>
<!--二级索引-->
<name>hbase.regionserver.wal.codec</name>
<value>org.apache.hadoop.hbase.regionserver.wal.indexedwaleditcodec</value>
组合主键:
create table "test_keys2" ( "v_1" decimal(24,8), "v_2" varchar, "year" integer not null , "period" integer not null ,"account" integer not null , "entity" integer not null , "scenario" integer not null , "currency" integer not null , "version" integer not null , "cst_dim_02217" integer not null, "cst_dim_30453" integer not null, "cst_dim_47894" integer not null , "cst_dim_61310" integer not null , "cst_dim_81981" integer not null , "cst_dim_01216" integer not null, "cst_dim_25287" integer not null, "cst_dim_41183" integer not null constraint pk primary key("year" , "period" , "account" ,"entity" , "scenario" , "currency" , "version","cst_dim_02217" ,"cst_dim_30453" ,"cst_dim_47894" , "cst_dim_61310","cst_dim_81981" ,"cst_dim_01216" , "cst_dim_25287" ,"cst_dim_41183"));
upsert插入数据有问题
二级索引:
同步创建索引
create index ifact1 on c1_fact("diminfo".year) include("diminfo"."v_1","diminfo".v_2 , "diminfo".period , "diminfo".account , "diminfo".entity , "diminfo".scenario , "diminfo".currency , "diminfo".version ,"diminfo"."cst_dim_02217" , "diminfo"."cst_dim_30453" , "diminfo"."cst_dim_47894" , "diminfo"."cst_dim_61310", "diminfo"."cst_dim_81981" , "diminfo"."cst_dim_01216" , "diminfo"."cst_dim_25287" , "diminfo"."cst_dim_41183")
当执行create index的时候,索引表会直接与源数据表进行同步。但是,有时候我们的源表数据量很大,同步创建索引会抛出异常。
异步创建索引
create index ifact1 on c1_fact ("diminfo".year) include("diminfo"."v_1","diminfo".v_2 , "diminfo".period , "diminfo".account , "diminfo".entity , "diminfo".scenario , "diminfo".currency , "diminfo".version ,"diminfo"."cst_dim_02217" , "diminfo"."cst_dim_30453" , "diminfo"."cst_dim_47894" , "diminfo"."cst_dim_61310", "diminfo"."cst_dim_81981" , "diminfo"."cst_dim_01216" , "diminfo"."cst_dim_25287" , "diminfo"."cst_dim_41183") async
通过create index的时候指定 async 关键字来指定异步创建索引。执行这个命令之后并不会引起索引表与源表的直接同步。这个时候查询并不会使用这个索引表。那么索引数据的导入还需要采用phoenix提供的索引同步工具类 indextool , 这是一个mapreduce工具类,使用方式如下:
$hbase_home/bin/hbase org.apache.phoenix.mapreduce.index.indextool --data-table c1_fact --index-table ifact10 --output-path hdfs:/phoenixindex
数据量大,报异常
加上--direct 如果指定,避免批量加载(可选)