天天看點

GlusterFS更換Brick

故障環境還原

GlusterFS叢集系統一共有4個節點,叢集資訊如下

# 分别在各個節點上配置hosts、同步好系統時間,關閉防火牆和selinux

[root@glusterfs-master-8 ~]# tail -4 /etc/hosts
192.168.0.9 glusterfs-slave-9
192.168.0.6  k8s-node-6
192.168.0.7  k8s-node-7
192.168.0.10 client
 
------------------------------------------------------------------------------------
# 分别在四個節點機上使用df建立一個虛拟分區,然後在這個分區上建立存儲目錄
# dd指令建立一個虛拟分區出來,格式化并挂載到/data目錄下
[root@glusterfs-master-8 ~]# dd if=/dev/sda2  of=/dev/vdb1
[root@glusterfs-master-8 ~]# mkfs.xfs -f /dev/vdb1                        //這裡格式成xfs格式檔案,也可以格式化成ext4格式的。
meta-data=/dev/vdb1              isize=512    agcount=4, agsize=65536 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=0, sparse=0
data     =                       bsize=4096   blocks=262144, imaxpct=25
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0 ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
 
[root@glusterfs-master-8 ~]# mkdir /data
[root@glusterfs-master-8 ~]# mount /dev/vdb1 /data
[root@glusterfs-master-8 ~]# df -h |grep data
/dev/loop0           979M   33M  947M   4% /data
 
# 設定開機自動挂載
[root@GlusterFS-master ~]# echo '/dev/loop0 /data xfs defaults 1 2' >> /etc/fstab
 
記住:以上操作要在四台節點機器上都要執行一遍,即建立存儲目錄環境!
----------------------------------------------------------------------------------
 
# 部署glusterfs叢集的中間部分操作在此省略,可看資料存儲的glusterfs目錄
 
# 建立叢集,在GlusterFS-master節點上操作:
[root@GlusterFS-master ~]# gluster peer probe 192.168.0.9
peer probe: success.
[root@GlusterFS-master ~]# gluster peer probe 192.168.0.7
peer probe: success.
[root@GlusterFS-master ~]# gluster peer probe 192.168.0.6
peer probe: success.
 
# 檢視叢集情況
[root@glusterfs-master-8 ~]# gluster peer status
Number of Peers: 3

Hostname: glusterfs-slave-9
Uuid: e8a38fe1-aa9c-4330-9508-eed66b8f3da6
State: Peer in Cluster (Connected)

Hostname: 192.168.0.6
Uuid: 853633be-32e8-44bd-ba17-6a687f86d59b
State: Peer in Cluster (Connected)

Hostname: 192.168.0.7
Uuid: ee258b68-4e58-4757-8398-1e520c49b734
State: Peer in Cluster (Connected)
 
#建立副本卷
[root@GlusterFS-master ~]# gluster volume info
No volumes present
 
[root@GlusterFS-master ~]# gluster volume create models replica 2 192.168.0.8:/data/gluster 192.168.0.9:/data/gluster force
 
[root@GlusterFS-master ~]# gluster volume list
models
 
[root@glusterfs-master-8 ~]# gluster volume info
 
Volume Name: models
Type: Distributed-Replicate
Volume ID: ef9816b9-8b40-4c31-b10f-c2898656e5b1
Status: Started
Number of Bricks: 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.0.8:/opt/gluster/data
Brick2: 192.168.0.9:/opt/gluster/data
Brick3: 192.168.0.6:/opt/gluster/data
Brick4: 192.168.0.7:/opt/gluster/data
Options Reconfigured:
performance.write-behind: on
performance.io-thread-count: 32
performance.flush-behind: on
performance.cache-size: 128MB
features.quota: on

#啟動models卷
[root@GlusterFS-master ~]# gluster volume start models
volume start: models: success
 
[root@glusterfs-master-8 ~]# gluster volume status models
Status of volume: models
Gluster process						Port	Online	Pid
------------------------------------------------------------------------------
Brick 192.168.0.8:/opt/gluster/data			49152	Y	40994
Brick 192.168.0.9:/opt/gluster/data			49152	Y	40666
Brick 192.168.0.6:/opt/gluster/data			49152	Y	19081
Brick 192.168.0.7:/opt/gluster/data			49152	Y	17882
NFS Server on localhost					N/A	N	N/A
Self-heal Daemon on localhost				N/A	Y	41013
Quota Daemon on localhost				N/A	Y	41020
NFS Server on glusterfs-slave-9				N/A	N	N/A
Self-heal Daemon on glusterfs-slave-9			N/A	Y	40687
Quota Daemon on glusterfs-slave-9			N/A	Y	40688
NFS Server on 192.168.0.7				2049	Y	17895
Self-heal Daemon on 192.168.0.7				N/A	Y	17901
Quota Daemon on 192.168.0.7				N/A	Y	17908
NFS Server on 192.168.0.6				2049	Y	19094
Self-heal Daemon on 192.168.0.6				N/A	Y	19100
Quota Daemon on 192.168.0.6				N/A	Y	19107
 
Task Status of Volume models
------------------------------------------------------------------------------
Task                 : Rebalance           
ID                   : 1ad408fc-7a3b-4b3a-b7f5-27ea696e592c
Status               : completed  
 
#将另外兩個節點追加到叢集中。即卷擴容
[root@GlusterFS-master ~]# gluster volume add-brick models 192.168.0.6:/data/gluster 192.168.0.7:/data/gluster force
 
[root@glusterfs-master-8 ~]#  gluster volume info
 
Volume Name: models
Type: Distributed-Replicate
Volume ID: ef9816b9-8b40-4c31-b10f-c2898656e5b1
Status: Started
Number of Bricks: 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.0.8:/opt/gluster/data
Brick2: 192.168.0.9:/opt/gluster/data
Brick3: 192.168.0.6:/opt/gluster/data
Brick4: 192.168.0.7:/opt/gluster/data
Options Reconfigured:
performance.write-behind: on
performance.io-thread-count: 32
performance.flush-behind: on
performance.cache-size: 128MB
features.quota: on
           

測試Gluster卷

#用戶端挂載glusterfs
[root@Client ~]# mount -t glusterfs 192.168.0.8:models /opt/gfsmount
[root@Client gfsmount]# df -h
........
192.168.10.239:models    2.0G   65M  2.0G   4% /opt/gfsmount
 
[root@Client ~]# cd /opt/gfsmount/
[root@Client gfsmount]# ls
[root@Client gfsmount]#
 
#寫入測試資料
[root@Client gfsmount]# for i in `seq -w 1 100`; do cp -rp /var/log/messages /opt/gfsmount/copy-test-$i; done
[root@Client gfsmount]# ls /opt/gfsmount/
copy-test-001  copy-test-014  copy-test-027  copy-test-040  copy-test-053  copy-test-066  copy-test-079  copy-test-092
copy-test-002  copy-test-015  copy-test-028  copy-test-041  copy-test-054  copy-test-067  copy-test-080  copy-test-093
copy-test-003  copy-test-016  copy-test-029  copy-test-042  copy-test-055  copy-test-068  copy-test-081  copy-test-094
copy-test-004  copy-test-017  copy-test-030  copy-test-043  copy-test-056  copy-test-069  copy-test-082  copy-test-095
copy-test-005  copy-test-018  copy-test-031  copy-test-044  copy-test-057  copy-test-070  copy-test-083  copy-test-096
copy-test-006  copy-test-019  copy-test-032  copy-test-045  copy-test-058  copy-test-071  copy-test-084  copy-test-097
copy-test-007  copy-test-020  copy-test-033  copy-test-046  copy-test-059  copy-test-072  copy-test-085  copy-test-098
copy-test-008  copy-test-021  copy-test-034  copy-test-047  copy-test-060  copy-test-073  copy-test-086  copy-test-099
copy-test-009  copy-test-022  copy-test-035  copy-test-048  copy-test-061  copy-test-074  copy-test-087  copy-test-100
copy-test-010  copy-test-023  copy-test-036  copy-test-049  copy-test-062  copy-test-075  copy-test-088
copy-test-011  copy-test-024  copy-test-037  copy-test-050  copy-test-063  copy-test-076  copy-test-089
copy-test-012  copy-test-025  copy-test-038  copy-test-051  copy-test-064  copy-test-077  copy-test-090
copy-test-013  copy-test-026  copy-test-039  copy-test-052  copy-test-065  copy-test-078  copy-test-091
 
[root@Client gfsmount]# ls -lA /opt/gfsmount|wc -l
101
    

在各節點機器上也确認下,發現這100個檔案随機地各自分為了兩個50份的檔案(均衡),分别同步到了第1-2節點和第3-4節點上了。
[root@GlusterFS-master ~]# ls /data/gluster
copy-test-001  copy-test-016  copy-test-028  copy-test-038  copy-test-054  copy-test-078  copy-test-088  copy-test-100
copy-test-004  copy-test-017  copy-test-029  copy-test-039  copy-test-057  copy-test-079  copy-test-090
copy-test-006  copy-test-019  copy-test-030  copy-test-041  copy-test-060  copy-test-081  copy-test-093
copy-test-008  copy-test-021  copy-test-031  copy-test-046  copy-test-063  copy-test-082  copy-test-094
copy-test-011  copy-test-022  copy-test-032  copy-test-048  copy-test-065  copy-test-083  copy-test-095
copy-test-012  copy-test-023  copy-test-033  copy-test-051  copy-test-073  copy-test-086  copy-test-098
copy-test-015  copy-test-024  copy-test-034  copy-test-052  copy-test-077  copy-test-087  copy-test-099
[root@GlusterFS-master ~]# ll /data/gluster|wc -l
51
 
[root@GlusterFS-slave ~]# ls /data/gluster/
copy-test-001  copy-test-016  copy-test-028  copy-test-038  copy-test-054  copy-test-078  copy-test-088  copy-test-100
copy-test-004  copy-test-017  copy-test-029  copy-test-039  copy-test-057  copy-test-079  copy-test-090
copy-test-006  copy-test-019  copy-test-030  copy-test-041  copy-test-060  copy-test-081  copy-test-093
copy-test-008  copy-test-021  copy-test-031  copy-test-046  copy-test-063  copy-test-082  copy-test-094
copy-test-011  copy-test-022  copy-test-032  copy-test-048  copy-test-065  copy-test-083  copy-test-095
copy-test-012  copy-test-023  copy-test-033  copy-test-051  copy-test-073  copy-test-086  copy-test-098
copy-test-015  copy-test-024  copy-test-034  copy-test-052  copy-test-077  copy-test-087  copy-test-099
[root@GlusterFS-slave ~]# ll /data/gluster/|wc -l
51
 
[root@GlusterFS-slave2 ~]# ls /data/gluster/
copy-test-002  copy-test-014  copy-test-036  copy-test-047  copy-test-059  copy-test-069  copy-test-080  copy-test-097
copy-test-003  copy-test-018  copy-test-037  copy-test-049  copy-test-061  copy-test-070  copy-test-084
copy-test-005  copy-test-020  copy-test-040  copy-test-050  copy-test-062  copy-test-071  copy-test-085
copy-test-007  copy-test-025  copy-test-042  copy-test-053  copy-test-064  copy-test-072  copy-test-089
copy-test-009  copy-test-026  copy-test-043  copy-test-055  copy-test-066  copy-test-074  copy-test-091
copy-test-010  copy-test-027  copy-test-044  copy-test-056  copy-test-067  copy-test-075  copy-test-092
copy-test-013  copy-test-035  copy-test-045  copy-test-058  copy-test-068  copy-test-076  copy-test-096
[root@GlusterFS-slave2 ~]# ll /data/gluster/|wc -l
51
 
[root@GlusterFS-slave3 ~]# ls /data/gluster/
copy-test-002  copy-test-014  copy-test-036  copy-test-047  copy-test-059  copy-test-069  copy-test-080  copy-test-097
copy-test-003  copy-test-018  copy-test-037  copy-test-049  copy-test-061  copy-test-070  copy-test-084
copy-test-005  copy-test-020  copy-test-040  copy-test-050  copy-test-062  copy-test-071  copy-test-085
copy-test-007  copy-test-025  copy-test-042  copy-test-053  copy-test-064  copy-test-072  copy-test-089
copy-test-009  copy-test-026  copy-test-043  copy-test-055  copy-test-066  copy-test-074  copy-test-091
copy-test-010  copy-test-027  copy-test-044  copy-test-056  copy-test-067  copy-test-075  copy-test-092
copy-test-013  copy-test-035  copy-test-045  copy-test-058  copy-test-068  copy-test-076  copy-test-096
[root@GlusterFS-slave3 ~]# ll /data/gluster/|wc -l
51
           

模拟brick故障

1)檢視目前存儲狀态
在GlusterFS-slave3節點機器上操作
[root@GlusterFS-slave3 ~]# gluster volume status
Status of volume: models
Gluster process           Port  Online  Pid
------------------------------------------------------------------------------
Brick 192.168.10.239:/data/gluster      49156 Y 16040
Brick 192.168.10.212:/data/gluster      49157 Y 5544
Brick 192.168.10.204:/data/gluster      49157 Y 12432
Brick 192.168.10.220:/data/gluster      49158 Y 17678
NFS Server on localhost         N/A N N/A
Self-heal Daemon on localhost       N/A Y 17697
NFS Server on GlusterFS-master        N/A N N/A
Self-heal Daemon on GlusterFS-master      N/A Y 16104
NFS Server on 192.168.10.204        N/A N N/A
Self-heal Daemon on 192.168.10.204      N/A Y 12451
NFS Server on 192.168.10.212        N/A N N/A
Self-heal Daemon on 192.168.10.212      N/A Y 5593
  
Task Status of Volume models
------------------------------------------------------------------------------
There are no active volume tasks
 
 
注:注意到Online項全部為"Y"
     
2)制造故障(注意這裡模拟的是檔案系統故障,假設實體硬碟沒有問題或已經更換陣列中的硬碟)
在GlusterFS-slave3節點機器上操作
[root@GlusterFS-slave3 ~]# vim /etc/fstab     //注釋掉如下行
......
#/dev/loop0 /data xfs defaults 1 2
 
#重新開機伺服器
[root@GlusterFS-slave3 ~]# reboot
 
#重新開機後,發現GlusterFS-slave3節點的/data沒有挂載上
[root@GlusterFS-slave3 ~]# df -h
 
#重新開機後,發現GlusterFS-slave3節點的存儲目錄不在了,資料沒有了。
[root@GlusterFS-slave3 ~]# ls /data/
[root@GlusterFS-slave3 ~]#


#重新開機伺服器後,記得啟動glusterd服務
[root@GlusterFS-slave3 ~]# /usr/local/glusterfs/sbin/glusterd
[root@GlusterFS-slave3 ~]# ps -ef|grep gluster
root     11122     1  4 23:13 ?        00:00:00 /usr/local/glusterfs/sbin/glusterd
root     11269     1  2 23:13 ?        00:00:00 /usr/local/glusterfs/sbin/glusterfs -s localhost --volfile-id gluster/glustershd -p /usr/local/glusterfs/var/lib/glusterd/glustershd/run/glustershd.pid -l /usr/local/glusterfs/var/log/glusterfs/glustershd.log -S /var/run/98e3200bc6620c9d920e9dc65624dbe0.socket --xlator-option *replicate*.node-uuid=dd99743a-285b-4aed-b3d6-e860f9efd965
root     11280  5978  0 23:13 pts/0    00:00:00 grep --color=auto gluster
 
 
3)檢視目前存儲狀态
[root@GlusterFS-slave3 ~]# gluster volume status
Status of volume: models
Gluster process           Port  Online  Pid
------------------------------------------------------------------------------
Brick 192.168.10.239:/data/gluster      49156 Y 16040
Brick 192.168.10.212:/data/gluster      49157 Y 5544
Brick 192.168.10.204:/data/gluster      49157 Y 12432
Brick 192.168.10.220:/data/gluster      N/A N N/A
NFS Server on localhost         N/A N N/A
Self-heal Daemon on localhost       N/A Y 11269
NFS Server on GlusterFS-master        N/A N N/A
Self-heal Daemon on GlusterFS-master      N/A Y 16104
NFS Server on 192.168.10.212        N/A N N/A
Self-heal Daemon on 192.168.10.212      N/A Y 5593
NFS Server on 192.168.10.204        N/A N N/A
Self-heal Daemon on 192.168.10.204      N/A Y 12451
  
Task Status of Volume models
------------------------------------------------------------------------------
There are no active volume tasks
 
注意:發現GlusterFS-slave3節點(192.168.10.220)的Online項狀态為"N"了!
 
4)恢複故障brick方法
  
4.1)結束故障brick的程序
如上通過"gluster volume status"指令,如果檢視到狀态Online項為"N"的GlusterFS-slave3節點存在PID号(不顯示N/A),則應當使用"kill -15 pid"殺死它!
一般當Online項為"N"時就不顯示pid号了。
     
4.2)建立新的資料目錄(注意絕不可以與之前目錄一樣)
[root@GlusterFS-slave3 ~]# dd if=/dev/vda1 of=/dev/vdb1
2097152+0 records in
2097152+0 records out
1073741824 bytes (1.1 GB) copied, 2.05684 s, 522 MB/s
[root@GlusterFS-slave3 ~]# du -sh /dev/vdb1
1.0G  /dev/vdb1
[root@GlusterFS-slave3 ~]# mkfs.xfs -f /dev/vdb1
meta-data=/dev/vdb1              isize=512    agcount=4, agsize=65536 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=0, sparse=0
data     =                       bsize=4096   blocks=262144, imaxpct=25
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0 ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
 
重新挂載
[root@GlusterFS-slave3 ~]# mount /dev/vdb1 /data
 
 
[root@GlusterFS-slave3 ~]# vim /etc/fstab       //去掉下面注釋
......
/dev/loop0 /data xfs defaults 1 2
 
4.3)查詢故障節點的備份節點(GlusterFS-slave2)目錄的擴充屬性(使用"yum search getfattr"指令getfattr工具的安裝途徑)
[root@GlusterFS-slave2 ~]# yum install -y attr.x86_64
[root@GlusterFS-slave2 ~]# getfattr -d -m. -e hex /data/gluster
getfattr: Removing leading '/' from absolute path names
# file: data/gluster
trusted.gfid=0x00000000000000000000000000000001
trusted.glusterfs.dht=0x00000001000000007fffffffffffffff
trusted.glusterfs.volume-id=0x8eafb261e0d24f3b8e0905475c63dcc6
 
4.4)挂載卷并觸發自愈
在用戶端先解除安裝掉之前的挂載
[root@Client ~]# umount /data/gluster
 
然後重新挂載GlusterFS-slave3(其實挂載哪一個節點的都可以)
[root@Client ~]# mount -t glusterfs 192.168.10.220:models /opt/gfsmount
[root@Client ~]# df -h
.......
192.168.10.220:models    2.0G   74M  2.0G   4% /opt/gfsmount
  

建立一個卷中不存在的目錄并删除
[root@Client ~]# cd /opt/gfsmount/
[root@Client gfsmount]# mkdir testDir001
[root@Client gfsmount]# rm -rf testDir001
 
設定擴充屬性觸發自愈
[root@Client gfsmount]# setfattr -n trusted.non-existent-key -v abc /opt/gfsmount
[root@Client gfsmount]# setfattr -x trusted.non-existent-key /opt/gfsmount
 
4.5)檢查目前節點是否挂起xattrs
再次查詢故障節點的備份節點(GlusterFS-slave2)目錄的擴充屬性
[root@GlusterFS-slave2 ~]# getfattr -d -m. -e hex /data/gluster
getfattr: Removing leading '/' from absolute path names
# file: data/gluster
trusted.afr.dirty=0x000000000000000000000000
trusted.afr.models-client-2=0x000000000000000000000000
trusted.afr.models-client-3=0x000000000000000200000002
trusted.gfid=0x00000000000000000000000000000001
trusted.glusterfs.dht=0x00000001000000007fffffffffffffff
trusted.glusterfs.volume-id=0x8eafb261e0d24f3b8e0905475c63dcc6
 
注意:留意第5行,表示xattrs已經将源标記為GlusterFS-slave3:/data/gluster
 
4.6)檢查卷的狀态是否顯示需要替換
[root@GlusterFS-slave3 ~]# gluster volume heal models info
Brick GlusterFS-master:/data/gluster/
Number of entries: 0
 
Brick GlusterFS-slave:/data/gluster/
Number of entries: 0
 
Brick GlusterFS-slave2:/data/gluster/
/
Number of entries: 1
 
Brick 192.168.10.220:/data/gluster
Status: Transport endpoint is not connected
 
注:狀态提示傳輸端點未連接配接(最後一行)
     
4.7)使用強制送出完成操作
[root@GlusterFS-slave3 ~]# gluster volume replace-brick models 192.168.10.220:/data/gluster 192.168.10.220:/data/gluster1 commit force
提示如下表示正常完成:
volume replace-brick: success: replace-brick commit force operation successful
 
-------------------------------------------------------------------------------------
注意:也可以将資料恢複到另外一台伺服器,詳細指令如下(192.168.10.230為新增的另一個glusterfs節點)(可選):
# gluster peer probe 192.168.10.230
# gluster volume replace-brick models 192.168.10.220:/data/gluster 192.168.10.230:/data/gluster commit force
-------------------------------------------------------------------------------------
 
4.8)檢查存儲的線上狀态
[root@GlusterFS-slave3 ~]# gluster volume status
Status of volume: models
Gluster process           Port  Online  Pid
  
  
------------------------------------------------------------------------------
Brick 192.168.10.239:/data/gluster      49156 Y 16040
Brick 192.168.10.212:/data/gluster      49157 Y 5544
Brick 192.168.10.204:/data/gluster      49157 Y 12432
Brick 192.168.10.220:/data/gluster1     49159 Y 11363
NFS Server on localhost         N/A N N/A
Self-heal Daemon on localhost       N/A Y 11375
NFS Server on 192.168.10.204        N/A N N/A
Self-heal Daemon on 192.168.10.204      N/A Y 12494
NFS Server on 192.168.10.212        N/A N N/A
Self-heal Daemon on 192.168.10.212      N/A Y 5625
NFS Server on GlusterFS-master        N/A N N/A
Self-heal Daemon on GlusterFS-master      N/A Y 16161
  
Task Status of Volume models
------------------------------------------------------------------------------
There are no active volume tasks
 
 
從上面資訊可以看出,192.168.10.220(GlusterFS-slave3)節點的Online項的狀态為"Y"了,不過存儲目錄是/data/gluster1
 
這個時候,檢視GlusterFS-slave3節點的存儲目錄,發現資料已經恢複了
[root@GlusterFS-slave3 ~]# ls /data/gluster/
copy-test-002  copy-test-014  copy-test-036  copy-test-047  copy-test-059  copy-test-069  copy-test-080  copy-test-097
copy-test-003  copy-test-018  copy-test-037  copy-test-049  copy-test-061  copy-test-070  copy-test-084
copy-test-005  copy-test-020  copy-test-040  copy-test-050  copy-test-062  copy-test-071  copy-test-085
copy-test-007  copy-test-025  copy-test-042  copy-test-053  copy-test-064  copy-test-072  copy-test-089
copy-test-009  copy-test-026  copy-test-043  copy-test-055  copy-test-066  copy-test-074  copy-test-091
copy-test-010  copy-test-027  copy-test-044  copy-test-056  copy-test-067  copy-test-075  copy-test-092
copy-test-013  copy-test-035  copy-test-045  copy-test-058  copy-test-068  copy-test-076  copy-test-096
[root@GlusterFS-slave3 ~]# ll /data/gluster/|wc -l
           

上面模拟的故障是gluster節點的存儲目錄所在的分區挂載失敗,導緻存儲目錄不在的資料修複方法。

如果存儲目錄删除了,還可以根據文檔:http://www.cnblogs.com/kevingrace/p/8778123.html中介紹的複制卷資料故障的相關方法進行資料恢複。

繼續閱讀