天天看点

Postgres-xl GTM(全局事务管理器 Globale Transaction Manager)GTM Master & Standby Failover

Postgres-xl GTM(全局事务管理器 Globale Transaction Manager)GTM Master & Standby Failover

GTM slave 提升

当GTM master宕机之后,但是GTM slave还在运行,可以使用pgxc_ctl提升GTM slave(postgres-xc)。

如果没有配置GTM Proxy:pgxc_ctl没有提供提升功能,需手动处理。在gtm slave运行gtm_ctl,编辑postgres.conf使其能够连接到gtm master,重启所有的协调节点和数据节点。

如果配置GTM Proxy:如果GTM master挂了,事务正在运行,则事务会abort。第一步提升,failver gtm。将所有gtm_proxy重新连接到新GTM master,reconnect gtm_proxy all,该步骤会在gtm slave运行gtm_ctl promote,配置被提升的gtm的gtm.conf,以使其以master角色运行。

PGXC$ failover gtm
PGXC$ reconnect gtm_proxy all      

pgxc_Concept_Implementation_and_Achievements Chapter 24 Node failover

gtm_ctl

gtm_ctl工具处于gtm_ctl.c中,如下为其支持的选项,用于启停(start、stop、restart)GTM server、GTM standby和GTM proxy:

// Common options
-D GTMDATA  //location of the database storage area
-i nodename //set gtm_proxy nodename registered on GTM (option ignored if used with GTM)
-t SECS     //seconds to wait when using -w option
-w          //wait until operation completes
-W          //do not wait until operation completes
//Options for start or restart
-l log_file            //write (or append) server log to FILENAME
-o OPTIONS             //command line options to pass to gtm(GTM server executable)
-p PATH-TO-GTM/PROXY   //path to gtm/gtm_proxy executables
-Z STARTUP-MODE        //can be \"gtm\", \"gtm_standby\" or \"gtm_proxy\
//Options for stop or restart
-m SHUTDOWN-MODE //smart/fast/immediate/s/f/i
//Options for      

gtm_ctl支持如下功能:启动、停止、提升、重启、状态、重连接。

gtm_ctl start   -Z STARTUP_MODE [-w] [-t SECS] [-D DATADIR] [-l FILENAME] [-o \"OPTIONS\"]       --> do_start()->start_gtm()
gtm_ctl stop    -Z STARTUP_MODE [-W] [-t SECS] [-D DATADIR] [-m SHUTDOWN-MODE]                   --> do_stop()函数->kill()
gtm_ctl promote -Z STARTUP_MODE [-w] [-t SECS] [-D DATADIR]                                      --> do_promote()函数->kill((pid_t) pid, SIGUSR1)
gtm_ctl restart -Z STARTUP_MODE [-w] [-t SECS] [-D DATADIR] [-m SHUTDOWN-MODE] [-o \"OPTIONS\"]  --> do_restart()函数
gtm_ctl status  -Z STARTUP_MODE [-w] [-t SECS] [-D DATADIR]                                      --> do_status()函数
gtm_ctl reconnect -Z STARTUP_MODE [-D DATADIR] -o \"OPTIONS\"]                                   --> do_reconnect()函数      
gtm gtm_proxy gtm_standby
pid_file $GTMDATA/gtm.pid $GTMDATA/gtm_proxy.pid $GTMDATA/gtm.pid
gtmopts_file $GTMDATA/gtm.opts $GTMDATA/gtm_proxy.opts $GTMDATA/gtm.opts
conf_file $GTMDATA/gtm.conf $GTMDATA/gtm_proxy.conf $GTMDATA/gtm.conf

start_gtm函数主要用于start/test/stop,如果指定了log_file则调用RunAsDaemon函数,否则直接调用system函数执行命令。

static int start_gtm(void) {
  char    cmd[MAXPGPATH];
  char    gtm_app_path[MAXPGPATH];
  int      len;
  /* Since there might be quotes to handle here, it is easier simply to pass everything to a shell to process them */
  memset(gtm_app_path, 0, MAXPGPATH);
  memset(cmd, 0, MAXPGPATH);
  /* Build gtm binary path. We should leave one byte at the end for '\0' */
  len = 0;
  if (gtm_path != NULL) {
    strncpy(gtm_app_path, gtm_path, MAXPGPATH - len - 1);
    len = strlen(gtm_app_path);
    strncat(gtm_app_path, "/", MAXPGPATH - len - 1);
    len = strlen(gtm_app_path);
  }
  if (strlen(gtm_app) >= (MAXPGPATH - len - 1)){
    write_stderr("gtm command exceeds max size");exit(1);
  }
  strncat(gtm_app_path, gtm_app, MAXPGPATH - len - 1);

  if (log_file != NULL)
    len = snprintf(cmd, MAXPGPATH - 1, "\"%s\" %s%s -l %s &" ,gtm_app_path, gtmdata_opt, gtm_opts, log_file);
  else
    len = snprintf(cmd, MAXPGPATH - 1, "\"%s\" %s%s < \"%s\" 2>&1 &" ,gtm_app_path, gtmdata_opt, gtm_opts, DEVNULL);
  if (len >= MAXPGPATH - 1){
    write_stderr("gtm command exceeds max size");exit(1);
  }
  if (log_file)
    return (RunAsDaemon(cmd));
  else
    return system(cmd);
}      

只有gtm_proxy能够执行reconnect命令,用于连接到新提升的GTM Master。通过将命令行参数写入$GTMDATA/newgtm,并向进程发送SIGUSER1信号进行提升(kill((pid_t) pid, SIGUSR1))。

static void do_reconnect(void) {
  pgpid_t  pid;
  char *reconnect_point_file_nam;
  FILE *reconnect_point_file;
  /* Target must be "gtm_proxy" */
  if (strcmp(gtm_app, "gtm_proxy") != 0){
    write_stderr(_("%s: only gtm_proxy can accept reconnect command\n"), progname);exit(1);
  }
  pid = get_pgpid();
  if (pid == 0)/* no pid file */{
    write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);write_stderr(_("Is server running?\n"));exit(1);
  }else if (pid < 0)/* standalone backend, not gtm */{
    pid = -pid;
    write_stderr(_("%s: cannot promote server; single-user server is running (PID: %ld)\n"), progname, pid);
    exit(1);
  }
  read_gtm_opts();
  /* Pass reconnect info to GTM-Proxy. Option arguments are written to new gtm file under -D directory. */
  reconnect_point_file_nam = malloc(strlen(gtm_data) + 9);
  if (reconnect_point_file_nam == NULL){
    write_stderr(_("%s: No memory available.\n"), progname);exit(1);
  }
  snprintf(reconnect_point_file_nam, strlen(gtm_data) + 8, "%s/newgtm", gtm_data);
  reconnect_point_file = fopen(reconnect_point_file_nam, "w");
  if (reconnect_point_file == NULL){
    write_stderr(_("%s: Cannot open reconnect point file %s\n"), progname, reconnect_point_file_nam);
    exit(1);
  }
  fprintf(reconnect_point_file, "%s\n", gtm_opts);
  fclose(reconnect_point_file);
  free(reconnect_point_file_nam);
  if (kill((pid_t) pid, SIGUSR1) != 0){
    write_stderr(_("%s: could not send promote signal (PID: %ld): %s\n"), progname, pid, strerror(errno));
    exit(1);
  }
}      

GTM对SIGUSR1信号的处理:​

​pqsignal(SIGUSR1, GTM_SigleHandler);​

static void GTM_SigleHandler(int signal) {
  fprintf(stderr, "Received signal %d\n", signal);
  switch (signal){
    case SIGKILL:
    case SIGTERM:
    case SIGQUIT:
    case SIGINT:
    case SIGHUP:
      break;
    case SIGUSR1:
      if (Recovery_IsStandby())
        PromoteToActive();
      return;
    default:
      fprintf(stderr, "Unknown signal %d\n", signal);
      return;
  }
  /* XXX We should do a clean shutdown here. */
  /* Rewrite Register Information (clean up unregister records) */
  Recovery_SaveRegisterInfo();
  /* Delete pid file before shutting down */
  DeleteLockFile(GTM_PID_FILE);
  PG_SETMASK(&BlockSig);
  GTMAbortPending = true;
  return;
}      

对于SIGUSR1信号,如果是GTM Slave,则需要调用PromoteToActive()函数,否则不做操作。最后执行关闭前清理函数。

static void PromoteToActive(void) {
  const char *conf_file;
  FILE     *fp;
  elog(LOG, "Promote signal received. Becoming an active...");
  /* Set starting and next client idendifier before promotion is complete */
  GTM_SetInitialAndNextClientIdentifierAtPromote();
  /* Do promoting things here. */
  Recovery_StandbySetStandby(false);
  CreateDataDirLockFile();
  /* Update the GTM config file for the next restart.. */
  conf_file = GetConfigOption("config_file", true);
  elog(LOG, "Config file is %s...", conf_file);
  if ((fp = fopen(conf_file, PG_BINARY_A)) == NULL){
    ereport(FATAL,(EINVAL,errmsg("could not open GTM configuration file \"%s\": %m",conf_file)));
  }else{
    time_t    stamp_time = (time_t) time(NULL);
    char    strfbuf[128];
    strftime(strfbuf, sizeof(strfbuf),"%Y-%m-%d %H:%M:%S %Z",localtime(&stamp_time));
    fprintf(fp, 
        "#===================================================\n"
        "# Updated due to GTM promote request\n"
        "# %s\nstartup = ACT\n"
        "#===================================================\n", strfbuf);
    if (fclose(fp))
      ereport(FATAL,(EINVAL,errmsg("could not close GTM configuration file \"%s\": %m",conf_file)));
  }
  GTM_SetNeedBackup();
  GTM_WriteRestorePoint();
  return;
}      
Postgres-xl GTM(全局事务管理器 Globale Transaction Manager)GTM Master &amp; Standby Failover

GTM Standby启动

GTM Standby启动流程:

main()
 –> InitializeGTMOptions() --> InitGTMProcess() --> Parse command line options and load configuration file --> BaseInit()
 –Recovery_IsStandby()–> Recovery_StandbySetConnInfo(active_addr, active_port) –Recovery_IsStandby()–> gtm_standby_start_startup() –Recovery_IsStandby()–> gtm_standby_begin_backup() --> gtm_standby_restore_next_gxid() --> gtm_standby_restore_gxid() --> gtm_standby_restore_sequence()
 –> GTM_SetNeedBackup() --> GTM_WriteRestorePoint()
 –Recovery_IsStandby()–> gtm_standby_register_self(NodeName, GTMPortNumber, GTMDataDir) –Recovery_IsStandby()–> gtm_standby_restore_node()
 –> Establish input sockets --> Setup signal handlers
 –Recovery_IsStandby()–> gtm_standby_activate_self() --> gtm_standby_end_backup() --> gtm_standby_finish_startup()
 –> ServerLoop()      

GTM Standby和Master在启动过程中的交互过程:

GTM Standby连接Master:GTM Standby --gtm_standby_connectToActiveGTM 连接--> GTM Master
GTM Standby向GTM Master发送开始备份命令:GTM Standby --set_begin_end_backup(GTM_ActiveConn, true)--> GTM Master
GTM Standby向GTM Master发送获取next全局事务号命令:GTM Standby --get_next_gxid(GTM_ActiveConn)--> GTM Master
GTM Standby恢复TxnInfo:GTM_RestoreTxnInfo(NULL, next_gxid, NULL, true)
GTM Standby从GTM Master恢复gxid:GTM Standby <--gtm_standby_restore_gxid-- GTM Master
GTM Standby从GTM Master恢复sequence:GTM Standby <--gtm_standby_restore_sequence-- GTM Master
GTM Standby备份恢复点:GTM_WriteRestorePoint项control file中写入PointVersion、PointXid和PointSeq
GTM Standby向GTM Master将自己注册为disconnected节点:GTM Standby --gtm_standby_register_self--> GTM Master
GTM Standby从GTM Master上获取Node节点信息:GTM Standby <--gtm_standby_restore_node-- GTM Master
GTM Standby将standby状态从disconnected更新为connected:GTM Standby --gtm_standby_activate_self--> GTM Master
GTM Standby向GTM Master发送结束备份命令:GTM Standby --gtm_standby_end_backup-->      

GTM Standby连接master

GTM Standby连接Master,利用Recovery_StandbySetConnInfo(active_addr, active_port) 函数设置master的GTM_ActiveAddress和GTM_ActivePort,利用gtm_standby_start_startup()建立stanby和master之间的连接(利用PQconnectGTM函数)。

GTM Standby复制同步Master

gtm/recovery/replication.c

GTM Standby向GTM Master发送开始备份命令(利用gtm_standby_begin_backup函数),

int gtm_standby_begin_backup(void) { int rc = set_begin_end_backup(GTM_ActiveConn, true); return (rc ? 0 : 1); }
int set_begin_end_backup(GTM_Conn *conn, bool begin) {
  GTM_Result *res = NULL;
  time_t finish_time;
  if (gtmpqPutMsgStart('C', true, conn)) { goto send_failed; }
  if(gtmpqPutInt(begin ? MSG_BEGIN_BACKUP : MSG_END_BACKUP, sizeof(GTM_MessageType), conn)) { goto send_failed; }

  if (gtmpqPutMsgEnd(conn))
    goto send_failed;

  if (gtmpqFlush(conn))
    goto send_failed;

  finish_time = time(NULL) + CLIENT_GTM_TIMEOUT;
  if (gtmpqWaitTimed(true, false, conn, finish_time) ||
    gtmpqReadData(conn) < 0)
    goto receive_failed;

  if ((res = GTMPQgetResult(conn)) == NULL)
    goto receive_failed;

  return res->gr_status;

receive_failed:
send_failed:
  conn->result = makeEmptyResultIfIsNull(conn->result);
  conn->result->gr_status = GTM_RESULT_COMM_ERROR;
  return -1;
}      

节点注册信息同步

子进程信息结构体同步

Txn/Seq同步

GTM Master Standby 切换

GTM Master检测Standby状态

从GTM Standby的初始化过程,可以看出GTM Master和Standby之间只有一个连接。那么如果检查GTM Standby是否更新呢?GTM Master产生的子Threads中对连接Standby的处理,可以看到如果全局标志GTMThreads->gt_standby_ready为True,则说明需要连接Standby,如果子线程信息结构体中的Standby没有相应的信息(子线程创建时的线程信息结构体Standby字段会被赋值),说明目前master和standby未连接,需要重新尝试连接,并确定standby是否真连接不上。如果全局标志GTMThreads->gt_standby_ready为fasle,则需要将GTM Master和Standby之间断链。

for (;;) {
        ...
    if (GTMThreads->gt_standby_ready && thrinfo->thr_conn->standby == NULL && thrinfo->thr_status != GTM_THREAD_BACKUP){
      /* Connect to GTM-Standby */
      thrinfo->thr_conn->standby = gtm_standby_connect_to_standby();
      if (thrinfo->thr_conn->standby == NULL)
        GTMThreads->gt_standby_ready = false;  /* This will make other threads to disconnect from the standby, if needed.*/
    }else if (GTMThreads->gt_standby_ready == false && thrinfo->thr_conn->standby){
      /* Disconnect from GTM-Standby */
      gtm_standby_disconnect_from_standby(thrinfo->thr_conn->standby);
      thrinfo->thr_conn->standby = NULL;
    }      

GTM Standby角色切换Master

Standby切换Master的过程如下:收到SIGUSR1信号,GTM Standby调用信号处理函数,执行PromoteToActive函数,调用GTM_SetInitialAndNextClientIdentifierAtPromote()函数,调用Recovery_StandbySetStandby(false)函数,创建数据目录锁,更新GTM配置文件,调用GTM_SetNeedBackup()函数,调用GTM_WriteRestorePoint()函数。

static void PromoteToActive(void) {
  const char *conf_file;
  FILE     *fp;
  elog(LOG, "Promote signal received. Becoming an active...");
  /* Set starting and next client idendifier before promotion is complete */
  GTM_SetInitialAndNextClientIdentifierAtPromote();
  /* Do promoting things here. */
  Recovery_StandbySetStandby(false);
  CreateDataDirLockFile();
  /* Update the GTM config file for the next restart.. */
  conf_file = GetConfigOption("config_file", true);
  elog(LOG, "Config file is %s...", conf_file);
  if ((fp = fopen(conf_file, PG_BINARY_A)) == NULL){
    ereport(FATAL,(EINVAL,errmsg("could not open GTM configuration file \"%s\": %m",conf_file)));
  }else{
    time_t    stamp_time = (time_t) time(NULL);
    char    strfbuf[128];
    strftime(strfbuf, sizeof(strfbuf),"%Y-%m-%d %H:%M:%S %Z",localtime(&stamp_time));
    fprintf(fp, 
        "#===================================================\n"
        "# Updated due to GTM promote request\n"
        "# %s\nstartup = ACT\n"
        "#===================================================\n", strfbuf);
    if (fclose(fp))
      ereport(FATAL,(EINVAL,errmsg("could not close GTM configuration file \"%s\": %m",conf_file)));
  }
  GTM_SetNeedBackup();
  GTM_WriteRestorePoint();
  return;
}      

GTM_SetInitialAndNextClientIdentifierAtPromote()函数会将全局线程信息GTMThreads中的gt_starting_client_id更新为open transactions列表中最新的客户标识符,将gt_next_client_id更新为下一个客户标识符。

Recovery_StandbySetStandby(false)函数将全局standby标识设置为fasle,也就是标识自己为master。

GTM_SetNeedBackup()函数将需要backup标识设置为True。

GTM_WriteRestorePoint()函数向log文件写入恢复点(GTM_WriteRestorePointVersion(f)、GTM_WriteRestorePointXid(f)、GTM_WriteRestorePointSeq(f);)

void GTM_WriteRestorePoint(void){
  FILE *f = fopen(GTMControlFile, "w");
  if (f == NULL){
    ereport(LOG, (errno,errmsg("Cannot open control file"),errhint("%s", strerror(errno))));
    return;
  }
  GTM_RWLockAcquire(&gtm_bkup_lock, GTM_LOCKMODE_WRITE);
  if (!gtm_need_bkup){
    GTM_RWLockRelease(&gtm_bkup_lock);
    fclose(f);
    return;
  }
  gtm_need_bkup = FALSE;
  GTM_RWLockRelease(&gtm_bkup_lock);
  GTM_WriteRestorePointVersion(f);
  GTM_WriteRestorePointXid(f);
  GTM_WriteRestorePointSeq(f);
  fclose(f);
}      

GTM Proxy重连接Standby