天天看点

PostgreSQL数据库统计信息——analyze统计信息收集

analyze统计信息收集主要分为两个部分:一个统计信息系统表pg_statistic和pg_class的更新;二是向PgStat进程发送统计信息,其调用堆栈如下所示。

standard_ProcessUtility
 | -- ExecVacuum(pstate, stmt, isTopLevel)
       | -- 参数处理
       | -- vacuum(vacstmt->rels, &params, NULL, isTopLevel)
             | -- pgstat_vacuum_stat # 向PgStat进程发送关于dead objects的信息
             | -- foreach(cur, relations)
               | -- analyze_rel(vrel->oid, vrel->relation, params, vrel->va_cols, in_outer_xact, vac_strategy)
                     | -- 确定采样函数acquirefunc
                     | -- do_analyze_rel
                           | -- update_attstats # 更新pg_statistic
                           | -- vac_update_relstats # 更新pg_class
                           | --      

update_attstats

update_attstats函数用于向pg_statistic系统表中更新相应列的统计信息。主要流程分为两块:一是更新表列的统计信息,而是更新对应列索引的统计信息。如果需要构建扩展统计信息,调用BuildRelationExtStatistics函数。如果在分析父表的情况下,不需要构建扩展统计数据,因为只对单个表构建扩展统计数据。

/* Emit the completed stats rows into pg_statistic, replacing any previous statistics for the target columns.  (If there are stats in pg_statistic for columns we didn't process, we leave them alone.) */ // 将完成的统计数据行发送到pg_statistic,替换目标列的任何以前的统计数据。(如果pg_statistic中有我们没有处理的列的统计信息,我们就不处理它们。)
    update_attstats(RelationGetRelid(onerel), inh, attr_cnt, vacattrstats); 
    for (ind = 0; ind < nindexes; ind++){
      AnlIndexData *thisdata = &indexdata[ind];
      update_attstats(RelationGetRelid(Irel[ind]), false, thisdata->attr_cnt, thisdata->vacattrstats);
    }
    /* Build extended statistics (if there are any). For now we only build extended statistics on individual relations, not for relations representing inheritance trees. */ // 构建扩展统计数据(如果有)。目前,我们只对单个关系构建扩展统计数据,而不是表示继承树的关系
    if (!inh) BuildRelationExtStatistics(onerel, totalrows, numrows, rows,  attr_cnt, vacattrstats);
  }      

由于pg_statistic系统表中列条目的stainherit字段为true,该行代表所有继承子表列的统计信息,如果为false,该行代表starelid指定表staattnum指定列的统计信息,所以update_attstats函数形参包含了inh,用于标记上述情况。遍历需要更新统计信息的列,其统计信息存储在​

​vacattrstats[attno]​

​中,执行流程如下:

  • 初始化pg_statistic系统表对应列的值,用nulls[i]表示该列没有数据,用replaces[i]表示该列要更新数据
  • 向values中填充列所属表oid、列号、stainherit、列数据NULL比例stanullfrac等简单信息和指标
  • 向values中填充​

    ​vacattrstats[attno]​

    ​​中的stakindN、staopN、stacollN。​

    ​vacattrstats[attno]​

    ​​中的​

    ​nnum = numnumbers[STATISTIC_NUM_SLOTS]​

    ​​代表着stanumbersN槽中存储的数据的数量,调用construct_array函数将​

    ​vacattrstats[attno]->stanumbers[STATISTIC_NUM_SLOTS][nnum]​

    ​​数据构建成array设置到系统表stanumbersN中。调用construct_array函数将​

    ​stavalues[STATISTIC_NUM_SLOTS]​

    ​构成array,并设置到系统表对应的stavaluesN中
  • 判定系统表中是否有对应条目,以进入插入或更新元组流程
static void update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) {
  int      attno;
  if (natts <= 0) return;          /* nothing to do */ // 没有需要更新的列
  Relation sd = table_open(StatisticRelationId, RowExclusiveLock);
  for (attno = 0; attno < natts; attno++) {
      int      i,k,n;
    VacAttrStats *stats = vacattrstats[attno];
    HeapTuple  stup,oldtup;
        
    Datum    values[Natts_pg_statistic];  //  pg_statistic系统表对应列的值
    if (!stats->stats_valid) continue; /* Ignore attr if we weren't able to collect stats */ // 非法的,直接放弃
    bool    nulls[Natts_pg_statistic], replaces[Natts_pg_statistic];
    for (i = 0; i < Natts_pg_statistic; ++i){ /* Construct a new pg_statistic tuple */ // 构建pg_statistic系统表的所有列,用nulls[i]表示该列没有数据,用replaces[i]表示该列要更新数据
      nulls[i] = false; replaces[i] = true;
    }
    values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
    values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
    values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
    values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
    values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
    values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
    
    i = Anum_pg_statistic_stakind1 - 1;
    for (k = 0; k < STATISTIC_NUM_SLOTS; k++){ // STATISTIC_NUM_SLOTS为5,也就是stakind1...5
      values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
    }
    i = Anum_pg_statistic_staop1 - 1;
    for (k = 0; k < STATISTIC_NUM_SLOTS; k++){
      values[i++] = ObjectIdGetDatum(stats->staop[k]);  /* staopN */
    }
    i = Anum_pg_statistic_stacoll1 - 1;
    for (k = 0; k < STATISTIC_NUM_SLOTS; k++){
      values[i++] = ObjectIdGetDatum(stats->stacoll[k]);  /* stacollN */
    }
    i = Anum_pg_statistic_stanumbers1 - 1;
    for (k = 0; k < STATISTIC_NUM_SLOTS; k++){
      int      nnum = stats->numnumbers[k];
      if (nnum > 0){
        Datum     *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
        ArrayType  *arry;
        for (n = 0; n < nnum; n++) numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
        /* XXX knows more than it should about type float4: */
        arry = construct_array(numdatums, nnum, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT);
        values[i++] = PointerGetDatum(arry);  /* stanumbersN */
      }else{ nulls[i] = true; values[i++] = (Datum) 0; }
    }
    i = Anum_pg_statistic_stavalues1 - 1;
    for (k = 0; k < STATISTIC_NUM_SLOTS; k++){
      if (stats->numvalues[k] > 0){
        ArrayType  *arry = construct_array(stats->stavalues[k], stats->numvalues[k], stats->statypid[k], stats->statyplen[k], stats->statypbyval[k], stats->statypalign[k]);
        values[i++] = PointerGetDatum(arry);  /* stavaluesN */
      }else{
        nulls[i] = true;
        values[i++] = (Datum) 0;
      }
    }

    /* Is there already a pg_statistic tuple for this attribute? */
    oldtup = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(stats->attr->attnum), BoolGetDatum(inh));
    if (HeapTupleIsValid(oldtup)){/* Yes, replace it */
      stup = heap_modify_tuple(oldtup, RelationGetDescr(sd), values, nulls, replaces);
      ReleaseSysCache(oldtup);CatalogTupleUpdate(sd, &stup->t_self, stup);
    }else{/* No, insert new tuple */
      stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);CatalogTupleInsert(sd, stup);
    }
    heap_freetuple(stup);
  }
  table_close(sd, RowExclusiveLock);
}      

vac_update_relstats

vac_update_relstats函数用于向pg_class中更新pages/tuples。

/* Update pages/tuples stats in pg_class ... but not if we're doing inherited stats. */ // 更新pg_class中的页面/元组统计
  if (!inh) {
    BlockNumber relallvisible;
    visibilitymap_count(onerel, &relallvisible, NULL);
    vac_update_relstats(onerel,relpages,totalrows,relallvisible,hasindex,InvalidTransactionId,InvalidMultiXactId,in_outer_xact);
  }
  /* Same for indexes. Vacuum always scans all indexes, so if we're part of VACUUM ANALYZE, don't overwrite the accurate count already inserted by VACUUM. */
  if (!inh && !(params->options & VACOPT_VACUUM)){
    for (ind = 0; ind < nindexes; ind++){
      AnlIndexData *thisdata = &indexdata[ind];
      double    totalindexrows;
      totalindexrows = ceil(thisdata->tupleFract * totalrows);
      vac_update_relstats(Irel[ind],RelationGetNumberOfBlocks(Irel[ind]),totalindexrows,0,false,InvalidTransactionId,InvalidMultiXactId,in_outer_xact);
    }
  }      

vac_update_relstats函数首先从pg_class中获取relation对应的记录拷贝,判定我们更新的字段是否真正需要更新,如果需要更新设置dirty为true。更新relfrozenxid,除非调用方传递了InvalidTransactionId,表明它没有新数据。通常,我们不会让relfrozenxid倒退:如果一切正常,新的Frozen XID可能更老的唯一方法是,如果以前的VACUUM是在更严格的冻结_min_age条件下完成的,在这种情况下,我们不想忘记它已经完成的工作。然而,如果存储的relfrozenxid是“未来”,那么它一定是损坏的,并且似乎最好用我们这次使用的截断覆盖它。这应该与我们认为“未来”的vac_update_datfrozenxid相匹配。Update relfrozenxid, unless caller passed InvalidTransactionId indicating it has no new data. Ordinarily, we don’t let relfrozenxid go backwards: if things are working correctly, the only way the new frozenxid could be older would be if a previous VACUUM was done with a tighter freeze_min_age, in which case we don’t want to forget the work it already did. However, if the stored relfrozenxid is “in the future”, then it must be corrupt and it seems best to overwrite it with the cutoff we used this time. This should match vac_update_datfrozenxid() concerning what we consider to be “in the future”. 如果dirty为true,则调用heap_inplace_update更新对应的记录。

void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples,
          BlockNumber num_all_visible_pages,
          bool hasindex, TransactionId frozenxid,MultiXactId minmulti,bool in_outer_xact){
  Oid      relid = RelationGetRelid(relation);
  Relation rd = table_open(RelationRelationId, RowExclusiveLock);  
  HeapTuple ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); /* Fetch a copy of the tuple to scribble on */
  if (!HeapTupleIsValid(ctup)) elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", relid);
  Form_pg_class pgcform = (Form_pg_class) GETSTRUCT(ctup);

  /* Apply statistical updates, if any, to copied tuple */
  bool dirty = false;
  if (pgcform->relpages != (int32) num_pages){ pgcform->relpages = (int32) num_pages; dirty = true; }
  if (pgcform->reltuples != (float4) num_tuples){ pgcform->reltuples = (float4) num_tuples; dirty = true; }
  if (pgcform->relallvisible != (int32) num_all_visible_pages){ pgcform->relallvisible = (int32) num_all_visible_pages; dirty = true; }
  /* Apply DDL updates, but not inside an outer transaction (see above) */
  if (!in_outer_xact){/* If we didn't find any indexes, reset relhasindex. */
    if (pgcform->relhasindex && !hasindex){ pgcform->relhasindex = false; dirty = true; }    
    if (pgcform->relhasrules && relation->rd_rules == NULL){ /* We also clear relhasrules and relhastriggers if needed */    
      pgcform->relhasrules = false; dirty = true;
    }
    if (pgcform->relhastriggers && relation->trigdesc == NULL){ pgcform->relhastriggers = false;dirty = true; }
  }
  if (TransactionIdIsNormal(frozenxid) && pgcform->relfrozenxid != frozenxid && (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) || TransactionIdPrecedes(ReadNextTransactionId(), pgcform->relfrozenxid))){ pgcform->relfrozenxid = frozenxid; dirty = true; }

  /* Similarly for relminmxid */
  if (MultiXactIdIsValid(minmulti) &&pgcform->relminmxid != minmulti &&(MultiXactIdPrecedes(pgcform->relminmxid, minmulti) || MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid))) { pgcform->relminmxid = minmulti; dirty = true; }

  /* If anything changed, write out the tuple. */
  if (dirty) heap_inplace_update(rd, ctup);

  table_close(rd, RowExclusiveLock);
}      

pgstat_report_analyze

pgstat_report_analyze向pg_stat进程发送该次分析获取到的totalrows和totaldeadrows,如果va_cols为NIL,也就是没有分析列,则需要设置m_resetcounter为true,重置计数器。

/* Report ANALYZE to the stats collector, too.  However, if doing inherited stats we shouldn't report, because the stats collector only tracks per-table stats.  Reset the changes_since_analyze counter only if we analyzed all columns; otherwise, there is still work for auto-analyze to do. */ // 也向统计收集器报告分析。但是,如果执行继承的统计,我们不应该报告,因为统计收集器只跟踪每个表的统计。仅当我们分析了所有列时,重置changes_since_analyze计数器;否则,自动分析仍有工作要做
  if (!inh) pgstat_report_analyze(onerel, totalrows, totaldeadrows, (va_cols == NIL));

void pgstat_report_analyze(Relation rel,  PgStat_Counter livetuples, PgStat_Counter deadtuples, bool resetcounter){
  PgStat_MsgAnalyze msg;
  if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) return;

  /* Unlike VACUUM, ANALYZE might be running inside a transaction that has already inserted and/or deleted rows in the target table. ANALYZE will have counted such rows as live or dead respectively. Because we will report our counts of such rows at transaction end, we should subtract off these counts from what we send to the collector now, else they'll be double-counted after commit.  (This approach also ensures that the collector ends up with the right numbers if we abort instead of committing.) Waste no time on partitioned tables, though. */ // 与真空VACUUM, ANALYZE可能在已经在目标表中插入和/或删除行的事务中运行。ANALYZE将分别将这些行计数为“活”或“死”。因为我们将在事务结束时报告这些行的计数,所以我们应该从我们现在发送给收集器的数据中减去这些计数,否则在提交后它们将被重复计数。(如果我们中止而不是提交,这种方法还可以确保收集器最终得到正确的数字。)不过,不要浪费时间在分区表上。
  if (rel->pgstat_info != NULL && rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) {
    for (PgStat_TableXactStatus *trans = rel->pgstat_info->trans; trans; trans = trans->upper){
      livetuples -= trans->tuples_inserted - trans->tuples_deleted;
      deadtuples -= trans->tuples_updated + trans->tuples_deleted;
    }
    /* count stuff inserted by already-aborted subxacts, too */
    deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
    /* Since ANALYZE's counts are estimates, we could have underflowed */
    livetuples = Max(livetuples, 0);
    deadtuples = Max(deadtuples, 0);
  }

  pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
  msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
  msg.m_tableoid = RelationGetRelid(rel);
  msg.m_autovacuum = IsAutoVacuumWorkerProcess();
  msg.m_resetcounter = resetcounter;
  msg.m_analyzetime = GetCurrentTimestamp();
  msg.m_live_tuples = livetuples;  // live元组
  msg.m_dead_tuples = deadtuples;  // dead元组
  pgstat_send(&msg, sizeof(msg));
}