1.没有副本的分布式引擎表
1.1 修改文件/etc/clcikhouse-server/config.xml,
<listen_host>::</listen_host>
115 <!-- Same for hosts with disabled ipv6: -->
116 <!-- <listen_host>0.0.0.0</listen_host> -->
117
118 <!-- Default values - try listen localhost on ipv4 and ipv6: -->
119 <!-- <listen_host>::1</listen_host> -->
120 <!-- <listen_host>127.0.0.1</listen_host> -->
1.2修改/etc/metrika.xml
<yandex>
<clickhouse_remote_servers>
<linux>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>linux01</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<internal_replication>true</internal_replication>
<host>linux02</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>linux03</host>
<port>9000</port>
</replica>
</shard>
</linux>
</clickhouse_remote_servers>
<zookeeper-servers>
<node index="1">
<host>linux01</host>
<port>2181</port>
</node>
<node index="2">
<host>linux02</host>
<port>2181</port>
</node>
<node index="3">
<host>linux03</host>
<port>2181</port>
</node>
</zookeeper-servers>
<macros>
<replica>linux01</replica>
</macros>
<networks>
<ip>::/0</ip>
</networks>
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
1.3分发metrika.xml文件
1.4启动zookeeper
1.5重启clickhouse服务
1.6在每个clickhouse服务端建立普通表
create table d_a(id UInt16, name String) ENGINE=TinyLog;
1.7在任意clickhouse服务端建立分布式引擎表
create table d_x(id UInt16, name String) ENGINE=Distributed(linux, default, d_a, id);
引擎中参数linux为分布式集群的名字,default为数据库名字,d_a为普通表名字,id为分片字段
2创建有副本的分布式引擎表
2.1在/etc/metrika.xml添加如下代码并分发到集群:
<doit01>
<!--数据只有一个分片-->
<shard>
<!--分片0-->
<internal_replication>false</internal_replication>
<replica>
<host>hadoop01</host>
<port>9000</port>
</replica>
<replica>
<host>hadoop02</host>
<port>9000</port>
</replica>
<replica>
<host>hadoop03</host>
<port>9000</port>
</replica>
</shard>
</doit01>
2.2重启clickhouse service
2.3在每个服务端创建普通表
CREATE TABLE d_aa(
id String,
date Date
) ENGINE = MergeTree(date, (id, date), 8192);
其中(id,date)代表排序字段
2.4创建分布式引擎表
CREATE TABLE d_all AS d_aa ENGINE = Distributed(doit01, default, d_aa, rand()) ;