最近看到關于redis故障轉移的文章,然後發現自己項目的redis并沒有應用故障轉移的機制,項目目前的機制是如果redis所在的伺服器當機或者redis服務異常/關閉等時候遊戲伺服器就擷取不到redis相關資訊,之前也有過幾次這種情況,但是當時的處理方式都是重新開機redis服務。
我按照這邊文章https://blog.csdn.net/guweiyu_thinker/article/details/79188281?tdsourcetag=s_pctim_aiomsg 來自己部署了下故障轉移,這個文章很詳細,具體的原理就不贅述了,他是在同一台伺服器上開啟不同的端口來嘗試的,我是在3台不同的伺服器上部署的,每個伺服器部署1個redis服務,1個sentinel服務,其中要提到一點 sentinel的myid不需要自己去設定 服務啟動的時候會自動生成一個myid 寫入到sentinel*.conf配置中,還有一些配置如
sentinel config-epoch mymaster 10
sentinel leader-epoch mymaster 10
sentinel known-slave mymaster 10.1.67.28 6379
sentinel known-slave mymaster 10.1.67.44 6399
sentinel known-sentinel mymaster 10.1.67.27 26379 0e86212b10f3e231376f84aa0315c7e1f9bcdb4f
sentinel known-sentinel mymaster 10.1.67.44 26379 c7d8950964042eb5ff3871f36929934ad46a8779
都是故障轉移後自動寫入到sentinel*.conf中裡面的,自己不需要配置
最後故障轉移成功,然後redis的master的host發生了變化,變成了某個slave的host,現在的問題是遊戲伺服器怎麼能擷取到最新的master的host,如果通過redis用戶端redis-cli可以直接SENTINEL get-master-addr-by-name mymaster來擷取,但是遊戲服中需要連接配接到某個sentinel然後擷取 每個項目架構不同連接配接的方法應該不一樣,我們用的是predis,predis中有ServerSentinel.php,然後自己寫了一個檔案sentinel.lua用來來連接配接到sentinel的cli然後進行查詢最新的master的host
sentinel.lua
-------------------------------------------
_G.Sentinel = class("ServerSentinel")
function Sentinel:__construct(host, onConnect)
local parent = self
local onconnect = function(net, ip, port, myip, myport)
parent:onConnect(net)
if onConnect then
onConnect(net, ip, port, myip, myport)
end
end
local onclose = function()
self.__net = nil
self:onClose(net, timeout, notconn, err)
printi("Sentinel:onClose", timeout, notconn, err)
end
self.__net = _connect(host, onconnect, onclose, 30)--30秒逾時
end
function Sentinel:onConnect(net)
printi("Sentinel:onConnect", net:closed())
if net:closed() then
return
end
self.net = net
delayf("Sentinel.keeplive", self.keeplive, 60, self)
end
function Sentinel:onClose(net, timeout, notconn, err)
self.net = nil
end
Sentinel.lastcommand = {}
function Sentinel:command(cmd, ...)
Sentinel.lastcommand.cmd = cmd
Sentinel.lastcommand.args = {...}
print("-------------->Sentinel:command", cmd)
if not self.net or self.net:closed() then
print("INFO", "Sentinel:command net is nil", cmd, self.net, self.net and self.net:closed())
return
end
local config = Sentinel.commands[cmd]
if not config then return end
local r, parse = Sentinel.multibulk
if type(config) == "table" then
if config.request then
r = config.request
end
if config.response then
parse = config.response
end
end
self.net:send(r(cmd, ...))
local reply = Sentinel.response(self.net)
if type(reply) == 'table' and reply.queued then
reply.parser = parse
return reply
else
if parse then
return parse(reply, cmd, ...)
end
return reply
end
end
function Sentinel.hash_multi_request_builder(builder_callback)
return function(command, ...)
local args, arguments = {...}, {}
if #args == 2 then
table.insert(arguments, args[1])
for k, v in pairs(args[2]) do
builder_callback(arguments, k, v)
end
else
arguments = args
end
return Sentinel.multibulk(command, arguments)
end
end
function Sentinel.multibulk(cmd, ...)
local args = {...}
local argsn = #args
local buffer = {true, true}
if argsn == 1 and type(args[1]) == 'table' then
argsn, args = #args[1], args[1]
end
buffer[1] = '*' .. tostring(argsn + 1) .. "\r\n"
buffer[2] = '$' .. #cmd .. "\r\n" .. cmd .. "\r\n"
local table_insert = table.insert
for i = 1, argsn do
local s_argument = tostring(args[i] or '')
table_insert(buffer, '$' .. #s_argument .. "\r\n" .. s_argument .. "\r\n")
end
return table.concat(buffer)
end
Sentinel.commands = {
-- Sentinel = {-- >= 2.0
-- request = Sentinel.hash_multi_request_builder(function(args, k, v)
-- print('=====kkk', k, v)
-- table.insert(args, k)
-- table.insert(args, v)
-- end),
-- },
Sentinel = true, -- >= 2.0
}
function Sentinel.finalerr(net, msg)
net:close()
print("ERROR", "Sentinel lastcommand", Sentinel.lastcommand.cmd or 'nocmd', serialize(Sentinel.lastcommand.args))
error(msg)
end
function Sentinel.getdata(net)
local res, payload = pcall(net.receiving, net, "\r\n", 32, 10)--10秒逾時
if not res then
Sentinel.finalerr(net, "Sentinel err, receiving something wrong.")
end
if not payload or not payload.tostr then
Sentinel.finalerr(net, "redis err, receiving payload wrong, "..type(payload))
end
local ret = payload:tostr()
return ret:sub(1, -#ret), ret:sub(2)
end
function Sentinel.response(net)
local prefix, data = Sentinel.getdata(net)
print("------------------>Sentinel.response", prefix, data)
if not prefix then return end
return Sentinel.response_data(net, prefix, data)
end
function Sentinel.response_data(net, prefix, data)
-- status reply
if prefix == '+' then
if data == 'OK' then
return true
elseif data == 'QUEUED' then
return {queued = true}
else
return data
end
-- error reply
elseif prefix == '-' then
return Sentinel.finalerr(net, "Sentinel err, -" .. data)
-- integer reply
elseif prefix == ':' then
local number = tonumber(data)
if not number then
if data == 'nil' then
return nil
end
return Sentinel.finalerr(net, "Sentinel err, isnot number")
end
return number
-- bulk reply
elseif prefix == '$' then
local length = tonumber(data)
if not length then
return Sentinel.finalerr(net, "redis err, cannot parse " .. length .. " as data length")
end
if length == -1 then
return nil
end
local res, nextchunk = pcall(net.receiving, net, length + 2, 2)
if not res then
return Sentinel.finalerr(net, "redis err, something wrong.")
end
return nextchunk:tostr():sub(1, -3)
-- multibulk reply
elseif prefix == '*' then
local count = tonumber(data)
if count == -1 then
return nil
end
local list = {}
if count > 0 then
for i = 1, count do
list[i] = Redis.response(net)
end
end
return list
-- unknown type of reply
else
error('Sentinel error: ' .. 'unknown response prefix: ' .. prefix)
end
end
function Sentinel.getMasterHost()
local info = sentinel:command('Sentinel', 'get-master-addr-by-name', 'mymaster')
if info then
return string.format('%s:%s', info[1], info[2])
else
error('Sentinel error: ' .. 'unknown errorr no master')
end
end
--應該周遊 選擇一個可用的
_G.sentinel = new(Sentinel, '10.1.67.28:26379')
-----------------------------------------------------------------------------------