最近看到关于redis故障转移的文章,然后发现自己项目的redis并没有应用故障转移的机制,项目当前的机制是如果redis所在的服务器宕机或者redis服务异常/关闭等时候游戏服务器就获取不到redis相关信息,之前也有过几次这种情况,但是当时的处理方式都是重启redis服务。
我按照这边帖子https://blog.csdn.net/guweiyu_thinker/article/details/79188281?tdsourcetag=s_pctim_aiomsg 来自己部署了下故障转移,这个帖子很详细,具体的原理就不赘述了,他是在同一台服务器上开启不同的端口来尝试的,我是在3台不同的服务器上部署的,每个服务器部署1个redis服务,1个sentinel服务,其中要提到一点 sentinel的myid不需要自己去设置 服务启动的时候会自动生成一个myid 写入到sentinel*.conf配置中,还有一些配置如
sentinel config-epoch mymaster 10
sentinel leader-epoch mymaster 10
sentinel known-slave mymaster 10.1.67.28 6379
sentinel known-slave mymaster 10.1.67.44 6399
sentinel known-sentinel mymaster 10.1.67.27 26379 0e86212b10f3e231376f84aa0315c7e1f9bcdb4f
sentinel known-sentinel mymaster 10.1.67.44 26379 c7d8950964042eb5ff3871f36929934ad46a8779
都是故障转移后自动写入到sentinel*.conf中里面的,自己不需要配置
最后故障转移成功,然后redis的master的host发生了变化,变成了某个slave的host,现在的问题是游戏服务器怎么能获取到最新的master的host,如果通过redis客户端redis-cli可以直接SENTINEL get-master-addr-by-name mymaster来获取,但是游戏服中需要连接到某个sentinel然后获取 每个项目框架不同连接的方法应该不一样,我们用的是predis,predis中有ServerSentinel.php,然后自己写了一个文件sentinel.lua用来来连接到sentinel的cli然后进行查询最新的master的host
sentinel.lua
-------------------------------------------
_G.Sentinel = class("ServerSentinel")
function Sentinel:__construct(host, onConnect)
local parent = self
local onconnect = function(net, ip, port, myip, myport)
parent:onConnect(net)
if onConnect then
onConnect(net, ip, port, myip, myport)
end
end
local onclose = function()
self.__net = nil
self:onClose(net, timeout, notconn, err)
printi("Sentinel:onClose", timeout, notconn, err)
end
self.__net = _connect(host, onconnect, onclose, 30)--30秒超时
end
function Sentinel:onConnect(net)
printi("Sentinel:onConnect", net:closed())
if net:closed() then
return
end
self.net = net
delayf("Sentinel.keeplive", self.keeplive, 60, self)
end
function Sentinel:onClose(net, timeout, notconn, err)
self.net = nil
end
Sentinel.lastcommand = {}
function Sentinel:command(cmd, ...)
Sentinel.lastcommand.cmd = cmd
Sentinel.lastcommand.args = {...}
print("-------------->Sentinel:command", cmd)
if not self.net or self.net:closed() then
print("INFO", "Sentinel:command net is nil", cmd, self.net, self.net and self.net:closed())
return
end
local config = Sentinel.commands[cmd]
if not config then return end
local r, parse = Sentinel.multibulk
if type(config) == "table" then
if config.request then
r = config.request
end
if config.response then
parse = config.response
end
end
self.net:send(r(cmd, ...))
local reply = Sentinel.response(self.net)
if type(reply) == 'table' and reply.queued then
reply.parser = parse
return reply
else
if parse then
return parse(reply, cmd, ...)
end
return reply
end
end
function Sentinel.hash_multi_request_builder(builder_callback)
return function(command, ...)
local args, arguments = {...}, {}
if #args == 2 then
table.insert(arguments, args[1])
for k, v in pairs(args[2]) do
builder_callback(arguments, k, v)
end
else
arguments = args
end
return Sentinel.multibulk(command, arguments)
end
end
function Sentinel.multibulk(cmd, ...)
local args = {...}
local argsn = #args
local buffer = {true, true}
if argsn == 1 and type(args[1]) == 'table' then
argsn, args = #args[1], args[1]
end
buffer[1] = '*' .. tostring(argsn + 1) .. "\r\n"
buffer[2] = '$' .. #cmd .. "\r\n" .. cmd .. "\r\n"
local table_insert = table.insert
for i = 1, argsn do
local s_argument = tostring(args[i] or '')
table_insert(buffer, '$' .. #s_argument .. "\r\n" .. s_argument .. "\r\n")
end
return table.concat(buffer)
end
Sentinel.commands = {
-- Sentinel = {-- >= 2.0
-- request = Sentinel.hash_multi_request_builder(function(args, k, v)
-- print('=====kkk', k, v)
-- table.insert(args, k)
-- table.insert(args, v)
-- end),
-- },
Sentinel = true, -- >= 2.0
}
function Sentinel.finalerr(net, msg)
net:close()
print("ERROR", "Sentinel lastcommand", Sentinel.lastcommand.cmd or 'nocmd', serialize(Sentinel.lastcommand.args))
error(msg)
end
function Sentinel.getdata(net)
local res, payload = pcall(net.receiving, net, "\r\n", 32, 10)--10秒超时
if not res then
Sentinel.finalerr(net, "Sentinel err, receiving something wrong.")
end
if not payload or not payload.tostr then
Sentinel.finalerr(net, "redis err, receiving payload wrong, "..type(payload))
end
local ret = payload:tostr()
return ret:sub(1, -#ret), ret:sub(2)
end
function Sentinel.response(net)
local prefix, data = Sentinel.getdata(net)
print("------------------>Sentinel.response", prefix, data)
if not prefix then return end
return Sentinel.response_data(net, prefix, data)
end
function Sentinel.response_data(net, prefix, data)
-- status reply
if prefix == '+' then
if data == 'OK' then
return true
elseif data == 'QUEUED' then
return {queued = true}
else
return data
end
-- error reply
elseif prefix == '-' then
return Sentinel.finalerr(net, "Sentinel err, -" .. data)
-- integer reply
elseif prefix == ':' then
local number = tonumber(data)
if not number then
if data == 'nil' then
return nil
end
return Sentinel.finalerr(net, "Sentinel err, isnot number")
end
return number
-- bulk reply
elseif prefix == '$' then
local length = tonumber(data)
if not length then
return Sentinel.finalerr(net, "redis err, cannot parse " .. length .. " as data length")
end
if length == -1 then
return nil
end
local res, nextchunk = pcall(net.receiving, net, length + 2, 2)
if not res then
return Sentinel.finalerr(net, "redis err, something wrong.")
end
return nextchunk:tostr():sub(1, -3)
-- multibulk reply
elseif prefix == '*' then
local count = tonumber(data)
if count == -1 then
return nil
end
local list = {}
if count > 0 then
for i = 1, count do
list[i] = Redis.response(net)
end
end
return list
-- unknown type of reply
else
error('Sentinel error: ' .. 'unknown response prefix: ' .. prefix)
end
end
function Sentinel.getMasterHost()
local info = sentinel:command('Sentinel', 'get-master-addr-by-name', 'mymaster')
if info then
return string.format('%s:%s', info[1], info[2])
else
error('Sentinel error: ' .. 'unknown errorr no master')
end
end
--应该遍历 选择一个可用的
_G.sentinel = new(Sentinel, '10.1.67.28:26379')
-----------------------------------------------------------------------------------