故障重制
keepalived配置如下
# vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
notification_email {
root@localhost
}
notification_email_from [email protected]
smtp_connect_timeout 3
smtp_server 127.0.0.1
router_id LVS_DEVEL
}
vrrp_script chk_maintaince_down {
script "[[ -f /etc/keepalived/down ]] && exit 1 || exit 0"
interval 1
weight 2
}
vrrp_script chk_haproxy {
script "killall -0 haproxy"
interval 1
weight 2
}
vrrp_instance VI_1 {
interface eth0
state MASTER
priority 100
virtual_router_id 125
garp_master_delay 1
authentication {
auth_type PASS
auth_pass 1e3459f77aba4ded
}
track_interface {
eth0
}
virtual_ipaddress {
172.16.25.10/16 dev eth0 label eth0:0
}
track_script {
chk_haproxy
chk_maintaince_down
}
notify_master "/etc/keepalived/notify.sh master 172.16.25.10"
notify_backup "/etc/keepalived/notify.sh backup 172.16.25.10"
notify_fault "/etc/keepalived/notify.sh fault 172.16.25.10"
}
vrrp_instance VI_2 {
interface eth0
state BACKUP
priority 99
virtual_router_id 126
garp_master_delay 1
authentication {
auth_type PASS
auth_pass 7615c4b7f518cede
}
track_interface {
eth0
}
virtual_ipaddress {
172.16.25.11/16 dev eth0 label eth0:1
}
track_script {
chk_haproxy
chk_maintaince_down
}
notify_master "/etc/keepalived/notify.sh master 172.16.25.11"
notify_backup "/etc/keepalived/notify.sh backup 172.16.25.11"
notify_fault "/etc/keepalived/notify.sh fault 172.16.25.11"
}
# vi /etc/keepalived/notify.sh
#!/bin/bash
# Author: Jason.Yu <[email protected]>
# description: An example of notify script
#
contact='root@localhost'
notify() {
mailsubject="`hostname` to be $1: $2 floating"
mailbody="`date '+%F %H:%M:%S'`: vrrp transition, `hostname` changed to be $1"
echo $mailbody | mail -s "$mailsubject" $contact
}
case "$1" in
master)
notify master $2
/etc/rc.d/init.d/haproxy start
exit 0
;;
backup)
notify backup $2
/etc/rc.d/init.d/haproxy stop
exit 0
;;
fault)
notify fault $2
/etc/rc.d/init.d/haproxy stop
exit 0
;;
*)
echo 'Usage: `basename $0` {master|backup|fault}'
exit 1
;;
esac
引發的故障1:keepalived當機恢複後VIP集體漂移故障

引發的故障2:haproxy服務停止後重新開機VIP集體漂移故障
原因
每次主備狀态切換時,會引發notify_backup,而在notify.sh腳本中backup部分會執行/etc/rc.d/init.d/haproxy stop,導緻權重在2個節點上都改變一次,進而單一節點上對于所有instance的權重都處于最大或者最小,故VIP集體漂移也就不奇怪了;
解決方法
修改notify.sh腳本,在處理backup部分,隻發送通知郵件,而無需刻意停止haproxy服務;
# vi /etc/keepalived/notify.sh
#!/bin/bash
# Author: Jason.Yu <[email protected]>
# description: An example of notify script
#
contact='root@localhost'
notify() {
mailsubject="`hostname` to be $1: $2 floating"
mailbody="`date '+%F %H:%M:%S'`: vrrp transition, `hostname` changed to be $1"
echo $mailbody | mail -s "$mailsubject" $contact
}
case "$1" in
master)
notify master $2
/etc/rc.d/init.d/haproxy start
exit 0
;;
backup)
notify backup $2
# /etc/rc.d/init.d/haproxy stop # 注釋掉或删除此行
exit 0
;;
fault)
notify fault $2
# /etc/rc.d/init.d/haproxy stop # 同上
exit 0
;;
*)
echo 'Usage: `basename $0` {master|backup|fault}'
exit 1
;;
esac
調整後的正常權重改變流程
vrrp_script中節點權重改變算法
vrrp_script 裡的script傳回值為0時認為檢測成功,其它值都會當成檢測失敗;
- weight 為正時,腳本檢測成功時此weight會加到priority上,檢測失敗時不加;
- 主失敗:
- 主 priority < 從 priority + weight 時會切換。
- 主成功:
- 主 priority + weight > 從 priority + weight 時,主依然為主
- 主失敗:
- weight 為負時,腳本檢測成功時此weight不影響priority,檢測失敗時priority – abs(weight)
-
- 主 priority – abs(weight) < 從priority 時會切換主從
-
-