本文作者系:視野金服工程師 | 吳海勝
首發于 Nebula Graph 論壇:
https://discuss.nebula-graph.com.cn/t/topic/1388
一、前言
本文介紹如何使用 Docker Swarm 來部署
Nebula Graph叢集,并部署用戶端負載均衡和高可用。

二、nebula 叢集搭建
2.1 環境準備
機器準備
| ip | 記憶體(Gb) | cpu(核數) |
| --- | --- | --- |
| 192.168.1.166 | 16 | 4 |
| 192.168.1.167 | 16 | 4 |
| 192.168.1.168 | 16 | 4 |
在安裝前確定所有機器已安裝 Docker
2.2 初始化 swarm 叢集
在 192.168.1.166 機器上執行
$ docker swarm init --advertise-addr 192.168.1.166
Swarm initialized: current node (dxn1zf6l61qsb1josjja83ngz) is now a manager.
To add a worker to this swarm, run the following command:
docker swarm join \
--token SWMTKN-1-49nj1cmql0jkz5s954yi3oex3nedyz0fb0xx14ie39trti4wxv-8vxv8rssmk743ojnwacrr2e7c \
192.168.1.166:2377
To add a manager to this swarm, run 'docker swarm join-token manager' and follow the instructions.
2.3 加入 worker 節點
根據 init 指令提示内容,加入 swarm worker 節點,在 192.168.1.167 192.168.1.168 分别執行
docker swarm join \
--token SWMTKN-1-49nj1cmql0jkz5s954yi3oex3nedyz0fb0xx14ie39trti4wxv-8vxv8rssmk743ojnwacrr2e7c \
192.168.1.166:2377
2.4 驗證叢集
docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
h0az2wzqetpwhl9ybu76yxaen * KF2-DATA-166 Ready Active Reachable 18.06.1-ce
q6jripaolxsl7xqv3cmv5pxji KF2-DATA-167 Ready Active Leader 18.06.1-ce
h1iql1uvm7123h3gon9so69dy KF2-DATA-168 Ready Active 18.06.1-ce
2.5 配置 docker stack
vi docker-stack.yml
配置如下内容
version: '3.6'
services:
metad0:
image: vesoft/nebula-metad:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --local_ip=192.168.1.166
- --ws_ip=192.168.1.166
- --port=45500
- --data_path=/data/meta
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-166
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.166:11000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 11000
published: 11000
protocol: tcp
mode: host
- target: 11002
published: 11002
protocol: tcp
mode: host
- target: 45500
published: 45500
protocol: tcp
mode: host
volumes:
- data-metad0:/data/meta
- logs-metad0:/logs
networks:
- nebula-net
metad1:
image: vesoft/nebula-metad:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --local_ip=192.168.1.167
- --ws_ip=192.168.1.167
- --port=45500
- --data_path=/data/meta
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-167
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.167:11000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 11000
published: 11000
protocol: tcp
mode: host
- target: 11002
published: 11002
protocol: tcp
mode: host
- target: 45500
published: 45500
protocol: tcp
mode: host
volumes:
- data-metad1:/data/meta
- logs-metad1:/logs
networks:
- nebula-net
metad2:
image: vesoft/nebula-metad:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --local_ip=192.168.1.168
- --ws_ip=192.168.1.168
- --port=45500
- --data_path=/data/meta
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-168
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.168:11000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 11000
published: 11000
protocol: tcp
mode: host
- target: 11002
published: 11002
protocol: tcp
mode: host
- target: 45500
published: 45500
protocol: tcp
mode: host
volumes:
- data-metad2:/data/meta
- logs-metad2:/logs
networks:
- nebula-net
storaged0:
image: vesoft/nebula-storaged:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --local_ip=192.168.1.166
- --ws_ip=192.168.1.166
- --port=44500
- --data_path=/data/storage
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-166
depends_on:
- metad0
- metad1
- metad2
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.166:12000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 12000
published: 12000
protocol: tcp
mode: host
- target: 12002
published: 12002
protocol: tcp
mode: host
volumes:
- data-storaged0:/data/storage
- logs-storaged0:/logs
networks:
- nebula-net
storaged1:
image: vesoft/nebula-storaged:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --local_ip=192.168.1.167
- --ws_ip=192.168.1.167
- --port=44500
- --data_path=/data/storage
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-167
depends_on:
- metad0
- metad1
- metad2
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.167:12000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 12000
published: 12000
protocol: tcp
mode: host
- target: 12002
published: 12004
protocol: tcp
mode: host
volumes:
- data-storaged1:/data/storage
- logs-storaged1:/logs
networks:
- nebula-net
storaged2:
image: vesoft/nebula-storaged:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --local_ip=192.168.1.168
- --ws_ip=192.168.1.168
- --port=44500
- --data_path=/data/storage
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-168
depends_on:
- metad0
- metad1
- metad2
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.168:12000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 12000
published: 12000
protocol: tcp
mode: host
- target: 12002
published: 12006
protocol: tcp
mode: host
volumes:
- data-storaged2:/data/storage
- logs-storaged2:/logs
networks:
- nebula-net
graphd1:
image: vesoft/nebula-graphd:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --port=3699
- --ws_ip=192.168.1.166
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-166
depends_on:
- metad0
- metad1
- metad2
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.166:13000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 3699
published: 3699
protocol: tcp
mode: host
- target: 13000
published: 13000
protocol: tcp
# mode: host
- target: 13002
published: 13002
protocol: tcp
mode: host
volumes:
- logs-graphd:/logs
networks:
- nebula-net
graphd2:
image: vesoft/nebula-graphd:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --port=3699
- --ws_ip=192.168.1.167
- --log_dir=/logs
- --v=2
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-167
depends_on:
- metad0
- metad1
- metad2
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.167:13001/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 3699
published: 3640
protocol: tcp
mode: host
- target: 13000
published: 13001
protocol: tcp
mode: host
- target: 13002
published: 13003
protocol: tcp
# mode: host
volumes:
- logs-graphd2:/logs
networks:
- nebula-net
graphd3:
image: vesoft/nebula-graphd:nightly
env_file:
- ./nebula.env
command:
- --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
- --port=3699
- --ws_ip=192.168.1.168
- --log_dir=/logs
- --v=0
- --minloglevel=2
deploy:
replicas: 1
restart_policy:
condition: on-failure
placement:
constraints:
- node.hostname == KF2-DATA-168
depends_on:
- metad0
- metad1
- metad2
healthcheck:
test: ["CMD", "curl", "-f", "http://192.168.1.168:13002/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- target: 3699
published: 3641
protocol: tcp
mode: host
- target: 13000
published: 13002
protocol: tcp
# mode: host
- target: 13002
published: 13004
protocol: tcp
mode: host
volumes:
- logs-graphd3:/logs
networks:
- nebula-net
networks:
nebula-net:
external: true
attachable: true
name: host
volumes:
data-metad0:
logs-metad0:
data-metad1:
logs-metad1:
data-metad2:
logs-metad2:
data-storaged0:
logs-storaged0:
data-storaged1:
logs-storaged1:
data-storaged2:
logs-storaged2:
logs-graphd:
logs-graphd2:
logs-graphd3:
編輯 nebula.env,加入如下内容
TZ=UTC
USER=root
2.6 啟動 nebula 叢集
docker stack deploy nebula -c docker-stack.yml
三、叢集負載均衡及高可用配置
的用戶端目前(
1.X)沒有提供負載均衡的能力,隻是随機選一個 graphd 去連接配接。是以生産使用的時候要自己做個負載均衡和高可用。
圖 3.1
将整個部署架構分為三層,資料服務層,負載均衡層及高可用層。如圖 3.1 所示
負載均衡層:對 client 請求做負載均衡,将請求分發至下方資料服務層
高可用層: 這裡實作的是 haproxy 的高可用,保證負載均衡層的服務進而保證整個叢集的正常服務
3.1 負載均衡配置
haproxy 使用 docker-compose 配置。分别編輯以下三個檔案
Dockerfile 加入以下内容
FROM haproxy:1.7
COPY haproxy.cfg /usr/local/etc/haproxy/haproxy.cfg
EXPOSE 3640
docker-compose.yml 加入以下内容
version: "3.2"
services:
haproxy:
container_name: haproxy
build: .
volumes:
- ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg
ports:
- 3640:3640
restart: always
networks:
- app_net
networks:
app_net:
external: true
haproxy.cfg 加入以下内容
global
daemon
maxconn 30000
log 127.0.0.1 local0 info
log 127.0.0.1 local1 warning
defaults
log-format %hr\ %ST\ %B\ %Ts
log global
mode http
option http-keep-alive
timeout connect 5000ms
timeout client 10000ms
timeout server 50000ms
timeout http-request 20000ms
# custom your own frontends && backends && listen conf
# CUSTOM
listen graphd-cluster
bind *:3640
mode tcp
maxconn 300
balance roundrobin
server server1 192.168.1.166:3699 maxconn 300 check
server server2 192.168.1.167:3699 maxconn 300 check
server server3 192.168.1.168:3699 maxconn 300 check
listen stats
bind *:1080
stats refresh 30s
stats uri /stats
3.2 啟動 haproxy
docker-compose up -d
3.3 高可用配置
注:配置 keepalive 需預先準備好 vip(虛拟 ip),在以下配置中 192.168.1.99 便為虛拟 ip
在 192.168.1.166 、192.168.1.167、192.168.1.168上 均做以下配置
安裝 keepalived
apt-get update && apt-get upgrade && apt-get install keepalived -y
更改 keepalived配置檔案
/etc/keepalived/keepalived.conf
(三台機器中 做如下配置,priority 應設定不同值确定優先級)
192.168.1.166 機器配置
global_defs {
router_id lb01 # 辨別資訊,一個名字而已;
}
vrrp_script chk_haproxy {
script "killall -0 haproxy" interval 2
}
vrrp_instance VI_1 {
state MASTER
interface ens160
virtual_router_id 52
priority 999
# 設定 MASTER 與 BACKUP 負載均衡器之間同步檢查的時間間隔,機關是秒
advert_int 1
# 設定驗證類型和密碼
authentication {
# 設定驗證類型,主要有 PASS 和 AH 兩種
auth_type PASS
# 設定驗證密碼,在同一個 vrrp_instance 下,MASTER 與 BACKUP 必須使用相同的密碼才能正常通信
auth_pass amber1
}
virtual_ipaddress {
# 虛拟 IP 為 192.168.1.99/24; 綁定接口為 ens160; 别名 ens169:1,主備相同
192.168.1.99/24 dev ens160 label ens160:1
}
track_script {
chk_haproxy
}
}
167 機器配置
global_defs {
router_id lb01 # 辨別資訊,一個名字而已;
}
vrrp_script chk_haproxy {
script "killall -0 haproxy" interval 2
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 52
priority 888
# 設定 MASTER 與 BACKUP 負載均衡器之間同步檢查的時間間隔,機關是秒
advert_int 1
# 設定驗證類型和密碼
authentication {
# 設定驗證類型,主要有 PASS 和 AH 兩種
auth_type PASS
# 設定驗證密碼,在同一個 vrrp_instance 下,MASTER 與 BACKUP 必須使用相同的密碼才能正常通信
auth_pass amber1
}
virtual_ipaddress {
# 虛拟 IP 為 192.168.1.99/24; 綁定接口為 ens160; 别名 ens160:1,主備相同
192.168.1.99/24 dev ens160 label ens160:1
}
track_script {
chk_haproxy
}
}
168 機器配置
global_defs {
router_id lb01 # 辨別資訊,一個名字而已;
}
vrrp_script chk_haproxy {
script "killall -0 haproxy" interval 2
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 52
priority 777
# 設定 MASTER 與 BACKUP 負載均衡器之間同步檢查的時間間隔,機關是秒
advert_int 1
# 設定驗證類型和密碼
authentication {
# 設定驗證類型,主要有 PASS 和 AH 兩種
auth_type PASS
# 設定驗證密碼,在同一個 vrrp_instance 下,MASTER 與 BACKUP 必須使用相同的密碼才能正常通信
auth_pass amber1
}
virtual_ipaddress {
# 虛拟 IP 為 192.168.1.99/24;綁定接口為 ens160; 别名 ens160:1,主備相同
192.168.1.99/24 dev ens160 label ens160:1
}
track_script {
chk_haproxy
}
}
keepalived 相關指令
# 啟動 keepalived
systemctl start keepalived
# 使 keepalived 開機自啟
systemctl enable keeplived
# 重新開機 keepalived
systemctl restart keepalived
四、其他
離線怎麼部署?把鏡像更改為私有鏡像庫就成了,有問題歡迎來勾搭啊。
我的小魚你醒了 還認識早晨嗎 昨夜你曾經說 願夜幕永不開啟
如果你對本文有任何疑問,歡迎來論壇和原作者聊聊~~ 原帖位址: