HBase
1. 安裝zookeeper
[[email protected] ~]# tar -zvxf /root/zookeeper-3.4.6.tar.gz -C /home/
[[email protected] home]# cp /home/zookeeper/conf/zoo_sample.cfg /home/zookeeper/conf/zk.cfg
[[email protected] conf]# mkdir /home/zk_data
[[email protected] conf]# vi zk.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.zk
dataDir=/home/zk_data
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
[[email protected] home]# /home/zookeeper/bin/zkServer.sh start /home/zookeeper/conf/zk.cfg
[[email protected] home]# jps
64436 QuorumPeerMain
64461 Jps
2. 安裝hbase
2.1 解壓
tar -zxf hbase-1.2.4-bin.tar.gz -C /home/hbase
#修改環境變量
2.2 修改配置檔案
/home/hbase/hbase-1.2.4/conf/hbase-site.xml
<property>
<name>hbase.rootdir</name>
<value>hdfs://hadoopnode01:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>hadoopnode01</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
2.3 添加regionservers
/home/hbase/hbase-1.2.4/conf/regionservers
hadoopnode01
3. 啟動
start|stop-hbase.sh
[[email protected] ~]# jps
64849 HMaster
1489 DataNode
65073 Jps
64436 QuorumPeerMain
1668 SecondaryNameNode
62503 ResourceManager
64968 HRegionServer
62584 NodeManager
1401 NameNode
通路:`http://centos:16010`這是hbase中hmaster程序提供的web-ui頁面,通過該頁面可以檢視hbase資訊
3.1 腳本連接配接
hbase shell
4. shell操作
4.1 常見指令
status
,
table_help
,
version
,
whoami
hbase(main):001:0> status
1 active master, 0 backup masters, 1 servers, 0 dead, 2.0000 average load
hbase(main):002:0> version
1.2.4, rUnknown, Wed Feb 15 18:58:00 CST 2017
hbase(main):003:0> whoami
root (auth:SIMPLE)
groups: root
4.2 關于命名空間
等同于Mysql資料庫操作,在指令空間中有四個名詞:
(1)tables(命名空間中的成員,可以了解為資料庫),如果沒有指定,則在default預設的命名空間中。
(2)RegionServer group:一個命名空間包含了預設的RegionServer Group
(3)Permission:權限,命名空間能夠讓我們來定義通路控制清單ACL(Access Control List)
(4)Quota:限額,可以強制一個命名空間可包含的region的數量
alter_namespace, create_namespace, describe_namespace, drop_namespace, list_namespace, list_namespace_tables
#建立命名空間 描述
hbase(main):002:0> create_namespace 'GetLogs', {'user'=>'zhangsan'}
hbase(main):003:0> describe_namespace 'GetLogs'
DESCRIPTION
{NAME => 'GetLogs', user => 'zhangsan'}
#删除命名空間
drop_namespace 'GetLogs'
#檢視所有命名空間
hbase(main):004:0> list_namespace
NAMESPACE
GetLogs
default
hbase
#增删改
Alter namespace properties.
To add/modify a property:
hbase> alter_namespace 'ns1', {METHOD => 'set', 'PROPERTY_NAME' => 'PROPERTY_VALUE'}
To delete a property:
hbase> alter_namespace 'ns1', {METHOD => 'unset', NAME=>'PROPERTY_NAME'}
#修改命名空間 增加屬性 關鍵字set
hbase(main):009:0> alter_namespace 'GetLogs',{METHOD=>'set','user'=>'lisi'}
#删除user屬性 關鍵字unset
hbase(main):016:0> alter_namespace 'GetLogs',{METHOD=>'unset','NAME'=>'user'}
4.3 資料定義語言 DDL (表)
alter, alter_async, alter_status, create, describe, disable, disable_all, drop, drop_all, enable, enable_all, exists, get_table, is_disabled, is_enabled, list, locate_region, show_filters
4.3.1建立表
#建立表
hbase(main):005:0> create 'GetLogs:t_response_request','response','request'
#檢視表的描述
hbase(main):006:0> describe 'GetLogs:test_response_request'
Table GetLogs:test_response_request is ENABLED
GetLogs:test_response_request
COLUMN FAMILIES DESCRIPTION
{NAME => 'request', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACH
E => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'response', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCAC
HE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
#檢視list
hbase(main):005:0> list
TABLE
GetLogs:test_response_request
=> ["GetLogs:test_response_request"]
hbase(main):007:0>
t = get_table 'GetLogs:test_response_request'
t.sacn
4.3.2檢視指定namespace的表
hbase(main):021:0> list_namespace_tables 'GetLogs'
TABLE
test_response_request
4.4 資料管理語言DML (CURD)
append, count, delete, deleteall, get, put, scan, truncate
4.4.1 put(插入|更新)
hbase(main):024:0> t=get_table "GetLogs:test_response_request"
# 插入資料行鍵RowKey為9989929391008468668008374520191204155654 列簇request:url 值為/member-client/member/card-info
hbase(main):002:0> t.put 9989929391008468668008374520191204155654 ,'request:url','/member-client/member/card-info'
或
hbase(main):002:0>put 'GetLogs:test_response_request', 9989929391008468668008374520191204155654,'request:url','/member-client/member/card-info'
4.4.2 get (取值)
hbase(main):005:0> t.get 9989929391008468668008374520191204155654
# 擷取行鍵為1 列族為cf1:name 版本為3 的所有資料
hbase(main):007:0> request:url 9989929391008468668008374520191204155654 ,{COLUMNS=>'request:url',VERSIONS=>3}
# 擷取行鍵為1 列族為cf1:name 時間戳為1547484378850~1558973012353 版本為3 的資料 (時間戳區間查詢) 指定時間戳 TIMERANGE=>1558973012353
hbase(main):020:0> t.get 9989929391008468668008374520191204155654 ,{COLUMNS=>'request:url',TIMERANGE=>[1558973012353,1558973281488],VERSIONS=>2}
4.4.3 delete/deleteall (删除)
# 删除行鍵為1 版本為3 列蔟為cf1的資料(有可能會删除多個cell)
hbase(main):034:0> t.get 1,{VERSIONS=>3,COLUMNS=>'cf1'}
COLUMN CELL
cf1:name timestamp=1547484605994, value=zs1
cf1:name timestamp=1547484378850, value=zs
# 删除該版本以及該版本以前的所有資料
hbase(main):035:0> t.delete 1 ,'cf1:name',1558973012353
0 row(s) in 0.0200 seconds
hbase(main):036:0> t.get 1,{VERSIONS=>3,COLUMNS=>'cf1'}
COLUMN CELL
0 row(s) in 0.0290 seconds
# 删除該rowkey對應的所有列
hbase(main):037:0> t.deleteall 1
0 row(s) in 0.0100 seconds
4.4.4 scan
hbase(main):058:0> scan 'GetLogs:test_response_request', {COLUMNS=>['request'],STARTROW => '3',LIMIT=>3,REVERSED=>true}
ROW COLUMN+CELL
2 column=cf1:name, timestamp=1547485439614, value=ls
1 column=cf1:age, timestamp=1547485477592, value=18
1 column=cf1:name, timestamp=1547485432058, value=zs
4.4.5 count
t.count
4.4.6 append (追加)
t.append 1,'cf1:name','110'
hbase(main):063:0> t.get 1
COLUMN CELL
cf1:age timestamp=1547485477592, value=18
cf1:name timestamp=1547486003061, value=zs110