Move attribute statistics functions to stat_utils.c master github/master
authorMichael Paquier <[email protected]>
Thu, 25 Dec 2025 06:13:39 +0000 (15:13 +0900)
committerMichael Paquier <[email protected]>
Thu, 25 Dec 2025 06:13:39 +0000 (15:13 +0900)
Many of the operations done for attribute stats in attribute_stats.c
share the same logic as extended stats, as done by a patch under
discussion to add support for extended stats import and export.  All the
pieces necessary for extended statistics are moved to stats_utils.c,
which is the file where common facilities are shared for stats files.

The following renames are done:
* get_attr_stat_type() -> statatt_get_type()
* init_empty_stats_tuple() -> statatt_init_empty_tuple()
* set_stats_slot() -> statatt_set_slot()
* get_elem_stat_type() -> statatt_get_elem_type()

While on it, this commit adds more documentation for all these
functions, describing more their internals and the dependencies that
have been implied for attribute statistics.  The same concepts apply to
extended statistics, at some degree.

Author: Corey Huinker <[email protected]>
Reviewed-by: Chao Li <[email protected]>
Reviewed-by: Yu Wang <[email protected]>
Reviewed-by: Michael Paquier <[email protected]>
Discussion: https://postgr.es/m/CADkLM=dpz3KFnqP-dgJ-zvRvtjsa8UZv8wDAQdqho=qN3kX0Zg@mail.gmail.com

src/backend/statistics/attribute_stats.c
src/backend/statistics/stat_utils.c
src/include/statistics/stat_utils.h

index ef4d768feab7e5192f4ae329fccadaaf09d9537c..06bc1a05fc14b51588f533c651f2fe68a120c1b4 100644 (file)
 #include "access/heapam.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
 #include "access/heapam.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
-#include "catalog/pg_collation.h"
 #include "catalog/pg_operator.h"
 #include "nodes/makefuncs.h"
 #include "catalog/pg_operator.h"
 #include "nodes/makefuncs.h"
-#include "nodes/nodeFuncs.h"
 #include "statistics/statistics.h"
 #include "statistics/stat_utils.h"
 #include "utils/array.h"
 #include "statistics/statistics.h"
 #include "statistics/stat_utils.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
-#define DEFAULT_NULL_FRAC      Float4GetDatum(0.0)
-#define DEFAULT_AVG_WIDTH      Int32GetDatum(0) /* unknown */
-#define DEFAULT_N_DISTINCT     Float4GetDatum(0.0) /* unknown */
-
 /*
  * Positional argument numbers, names, and types for
  * attribute_statistics_update() and pg_restore_attribute_stats().
 /*
  * Positional argument numbers, names, and types for
  * attribute_statistics_update() and pg_restore_attribute_stats().
@@ -111,24 +105,9 @@ static struct StatsArgInfo cleararginfo[] =
 };
 
 static bool attribute_statistics_update(FunctionCallInfo fcinfo);
 };
 
 static bool attribute_statistics_update(FunctionCallInfo fcinfo);
-static Node *get_attr_expr(Relation rel, int attnum);
-static void get_attr_stat_type(Oid reloid, AttrNumber attnum,
-                              Oid *atttypid, int32 *atttypmod,
-                              char *atttyptype, Oid *atttypcoll,
-                              Oid *eq_opr, Oid *lt_opr);
-static bool get_elem_stat_type(Oid atttypid, char atttyptype,
-                              Oid *elemtypid, Oid *elem_eq_opr);
-static Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d,
-                              Oid typid, int32 typmod, bool *ok);
-static void set_stats_slot(Datum *values, bool *nulls, bool *replaces,
-                          int16 stakind, Oid staop, Oid stacoll,
-                          Datum stanumbers, bool stanumbers_isnull,
-                          Datum stavalues, bool stavalues_isnull);
 static void upsert_pg_statistic(Relation starel, HeapTuple oldtup,
                                const Datum *values, const bool *nulls, const bool *replaces);
 static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit);
 static void upsert_pg_statistic(Relation starel, HeapTuple oldtup,
                                const Datum *values, const bool *nulls, const bool *replaces);
 static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit);
-static void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited,
-                                  Datum *values, bool *nulls, bool *replaces);
 
 /*
  * Insert or Update Attribute Statistics
 
 /*
  * Insert or Update Attribute Statistics
@@ -298,16 +277,16 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
    }
 
    /* derive information from attribute */
    }
 
    /* derive information from attribute */
-   get_attr_stat_type(reloid, attnum,
-                      &atttypid, &atttypmod,
-                      &atttyptype, &atttypcoll,
-                      &eq_opr, &lt_opr);
+   statatt_get_type(reloid, attnum,
+                    &atttypid, &atttypmod,
+                    &atttyptype, &atttypcoll,
+                    &eq_opr, &lt_opr);
 
    /* if needed, derive element type */
    if (do_mcelem || do_dechist)
    {
 
    /* if needed, derive element type */
    if (do_mcelem || do_dechist)
    {
-       if (!get_elem_stat_type(atttypid, atttyptype,
-                               &elemtypid, &elem_eq_opr))
+       if (!statatt_get_elem_type(atttypid, atttyptype,
+                                  &elemtypid, &elem_eq_opr))
        {
            ereport(WARNING,
                    (errmsg("could not determine element type of column \"%s\"", attname),
        {
            ereport(WARNING,
                    (errmsg("could not determine element type of column \"%s\"", attname),
@@ -361,8 +340,8 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
    if (HeapTupleIsValid(statup))
        heap_deform_tuple(statup, RelationGetDescr(starel), values, nulls);
    else
    if (HeapTupleIsValid(statup))
        heap_deform_tuple(statup, RelationGetDescr(starel), values, nulls);
    else
-       init_empty_stats_tuple(reloid, attnum, inherited, values, nulls,
-                              replaces);
+       statatt_init_empty_tuple(reloid, attnum, inherited, values, nulls,
+                                replaces);
 
    /* if specified, set to argument values */
    if (!PG_ARGISNULL(NULL_FRAC_ARG))
 
    /* if specified, set to argument values */
    if (!PG_ARGISNULL(NULL_FRAC_ARG))
@@ -386,18 +365,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
    {
        bool        converted;
        Datum       stanumbers = PG_GETARG_DATUM(MOST_COMMON_FREQS_ARG);
    {
        bool        converted;
        Datum       stanumbers = PG_GETARG_DATUM(MOST_COMMON_FREQS_ARG);
-       Datum       stavalues = text_to_stavalues("most_common_vals",
-                                                 &array_in_fn,
-                                                 PG_GETARG_DATUM(MOST_COMMON_VALS_ARG),
-                                                 atttypid, atttypmod,
-                                                 &converted);
+       Datum       stavalues = statatt_build_stavalues("most_common_vals",
+                                                       &array_in_fn,
+                                                       PG_GETARG_DATUM(MOST_COMMON_VALS_ARG),
+                                                       atttypid, atttypmod,
+                                                       &converted);
 
        if (converted)
        {
 
        if (converted)
        {
-           set_stats_slot(values, nulls, replaces,
-                          STATISTIC_KIND_MCV,
-                          eq_opr, atttypcoll,
-                          stanumbers, false, stavalues, false);
+           statatt_set_slot(values, nulls, replaces,
+                            STATISTIC_KIND_MCV,
+                            eq_opr, atttypcoll,
+                            stanumbers, false, stavalues, false);
        }
        else
            result = false;
        }
        else
            result = false;
@@ -409,18 +388,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
        Datum       stavalues;
        bool        converted = false;
 
        Datum       stavalues;
        bool        converted = false;
 
-       stavalues = text_to_stavalues("histogram_bounds",
-                                     &array_in_fn,
-                                     PG_GETARG_DATUM(HISTOGRAM_BOUNDS_ARG),
-                                     atttypid, atttypmod,
-                                     &converted);
+       stavalues = statatt_build_stavalues("histogram_bounds",
+                                           &array_in_fn,
+                                           PG_GETARG_DATUM(HISTOGRAM_BOUNDS_ARG),
+                                           atttypid, atttypmod,
+                                           &converted);
 
        if (converted)
        {
 
        if (converted)
        {
-           set_stats_slot(values, nulls, replaces,
-                          STATISTIC_KIND_HISTOGRAM,
-                          lt_opr, atttypcoll,
-                          0, true, stavalues, false);
+           statatt_set_slot(values, nulls, replaces,
+                            STATISTIC_KIND_HISTOGRAM,
+                            lt_opr, atttypcoll,
+                            0, true, stavalues, false);
        }
        else
            result = false;
        }
        else
            result = false;
@@ -433,10 +412,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
        ArrayType  *arry = construct_array_builtin(elems, 1, FLOAT4OID);
        Datum       stanumbers = PointerGetDatum(arry);
 
        ArrayType  *arry = construct_array_builtin(elems, 1, FLOAT4OID);
        Datum       stanumbers = PointerGetDatum(arry);
 
-       set_stats_slot(values, nulls, replaces,
-                      STATISTIC_KIND_CORRELATION,
-                      lt_opr, atttypcoll,
-                      stanumbers, false, 0, true);
+       statatt_set_slot(values, nulls, replaces,
+                        STATISTIC_KIND_CORRELATION,
+                        lt_opr, atttypcoll,
+                        stanumbers, false, 0, true);
    }
 
    /* STATISTIC_KIND_MCELEM */
    }
 
    /* STATISTIC_KIND_MCELEM */
@@ -446,18 +425,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
        bool        converted = false;
        Datum       stavalues;
 
        bool        converted = false;
        Datum       stavalues;
 
-       stavalues = text_to_stavalues("most_common_elems",
-                                     &array_in_fn,
-                                     PG_GETARG_DATUM(MOST_COMMON_ELEMS_ARG),
-                                     elemtypid, atttypmod,
-                                     &converted);
+       stavalues = statatt_build_stavalues("most_common_elems",
+                                           &array_in_fn,
+                                           PG_GETARG_DATUM(MOST_COMMON_ELEMS_ARG),
+                                           elemtypid, atttypmod,
+                                           &converted);
 
        if (converted)
        {
 
        if (converted)
        {
-           set_stats_slot(values, nulls, replaces,
-                          STATISTIC_KIND_MCELEM,
-                          elem_eq_opr, atttypcoll,
-                          stanumbers, false, stavalues, false);
+           statatt_set_slot(values, nulls, replaces,
+                            STATISTIC_KIND_MCELEM,
+                            elem_eq_opr, atttypcoll,
+                            stanumbers, false, stavalues, false);
        }
        else
            result = false;
        }
        else
            result = false;
@@ -468,10 +447,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
    {
        Datum       stanumbers = PG_GETARG_DATUM(ELEM_COUNT_HISTOGRAM_ARG);
 
    {
        Datum       stanumbers = PG_GETARG_DATUM(ELEM_COUNT_HISTOGRAM_ARG);
 
-       set_stats_slot(values, nulls, replaces,
-                      STATISTIC_KIND_DECHIST,
-                      elem_eq_opr, atttypcoll,
-                      stanumbers, false, 0, true);
+       statatt_set_slot(values, nulls, replaces,
+                        STATISTIC_KIND_DECHIST,
+                        elem_eq_opr, atttypcoll,
+                        stanumbers, false, 0, true);
    }
 
    /*
    }
 
    /*
@@ -486,18 +465,18 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
        bool        converted = false;
        Datum       stavalues;
 
        bool        converted = false;
        Datum       stavalues;
 
-       stavalues = text_to_stavalues("range_bounds_histogram",
-                                     &array_in_fn,
-                                     PG_GETARG_DATUM(RANGE_BOUNDS_HISTOGRAM_ARG),
-                                     atttypid, atttypmod,
-                                     &converted);
+       stavalues = statatt_build_stavalues("range_bounds_histogram",
+                                           &array_in_fn,
+                                           PG_GETARG_DATUM(RANGE_BOUNDS_HISTOGRAM_ARG),
+                                           atttypid, atttypmod,
+                                           &converted);
 
        if (converted)
        {
 
        if (converted)
        {
-           set_stats_slot(values, nulls, replaces,
-                          STATISTIC_KIND_BOUNDS_HISTOGRAM,
-                          InvalidOid, InvalidOid,
-                          0, true, stavalues, false);
+           statatt_set_slot(values, nulls, replaces,
+                            STATISTIC_KIND_BOUNDS_HISTOGRAM,
+                            InvalidOid, InvalidOid,
+                            0, true, stavalues, false);
        }
        else
            result = false;
        }
        else
            result = false;
@@ -514,17 +493,17 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
        bool        converted = false;
        Datum       stavalues;
 
        bool        converted = false;
        Datum       stavalues;
 
-       stavalues = text_to_stavalues("range_length_histogram",
-                                     &array_in_fn,
-                                     PG_GETARG_DATUM(RANGE_LENGTH_HISTOGRAM_ARG),
-                                     FLOAT8OID, 0, &converted);
+       stavalues = statatt_build_stavalues("range_length_histogram",
+                                           &array_in_fn,
+                                           PG_GETARG_DATUM(RANGE_LENGTH_HISTOGRAM_ARG),
+                                           FLOAT8OID, 0, &converted);
 
        if (converted)
        {
 
        if (converted)
        {
-           set_stats_slot(values, nulls, replaces,
-                          STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
-                          Float8LessOperator, InvalidOid,
-                          stanumbers, false, stavalues, false);
+           statatt_set_slot(values, nulls, replaces,
+                            STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+                            Float8LessOperator, InvalidOid,
+                            stanumbers, false, stavalues, false);
        }
        else
            result = false;
        }
        else
            result = false;
@@ -539,291 +518,6 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
    return result;
 }
 
    return result;
 }
 
-/*
- * If this relation is an index and that index has expressions in it, and
- * the attnum specified is known to be an expression, then we must walk
- * the list attributes up to the specified attnum to get the right
- * expression.
- */
-static Node *
-get_attr_expr(Relation rel, int attnum)
-{
-   List       *index_exprs;
-   ListCell   *indexpr_item;
-
-   /* relation is not an index */
-   if (rel->rd_rel->relkind != RELKIND_INDEX &&
-       rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
-       return NULL;
-
-   index_exprs = RelationGetIndexExpressions(rel);
-
-   /* index has no expressions to give */
-   if (index_exprs == NIL)
-       return NULL;
-
-   /*
-    * The index attnum points directly to a relation attnum, then it's not an
-    * expression attribute.
-    */
-   if (rel->rd_index->indkey.values[attnum - 1] != 0)
-       return NULL;
-
-   indexpr_item = list_head(rel->rd_indexprs);
-
-   for (int i = 0; i < attnum - 1; i++)
-       if (rel->rd_index->indkey.values[i] == 0)
-           indexpr_item = lnext(rel->rd_indexprs, indexpr_item);
-
-   if (indexpr_item == NULL)   /* shouldn't happen */
-       elog(ERROR, "too few entries in indexprs list");
-
-   return (Node *) lfirst(indexpr_item);
-}
-
-/*
- * Derive type information from the attribute.
- */
-static void
-get_attr_stat_type(Oid reloid, AttrNumber attnum,
-                  Oid *atttypid, int32 *atttypmod,
-                  char *atttyptype, Oid *atttypcoll,
-                  Oid *eq_opr, Oid *lt_opr)
-{
-   Relation    rel = relation_open(reloid, AccessShareLock);
-   Form_pg_attribute attr;
-   HeapTuple   atup;
-   Node       *expr;
-   TypeCacheEntry *typcache;
-
-   atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(reloid),
-                          Int16GetDatum(attnum));
-
-   /* Attribute not found */
-   if (!HeapTupleIsValid(atup))
-       ereport(ERROR,
-               (errcode(ERRCODE_UNDEFINED_COLUMN),
-                errmsg("column %d of relation \"%s\" does not exist",
-                       attnum, RelationGetRelationName(rel))));
-
-   attr = (Form_pg_attribute) GETSTRUCT(atup);
-
-   if (attr->attisdropped)
-       ereport(ERROR,
-               (errcode(ERRCODE_UNDEFINED_COLUMN),
-                errmsg("column %d of relation \"%s\" does not exist",
-                       attnum, RelationGetRelationName(rel))));
-
-   expr = get_attr_expr(rel, attr->attnum);
-
-   /*
-    * When analyzing an expression index, believe the expression tree's type
-    * not the column datatype --- the latter might be the opckeytype storage
-    * type of the opclass, which is not interesting for our purposes. This
-    * mimics the behavior of examine_attribute().
-    */
-   if (expr == NULL)
-   {
-       *atttypid = attr->atttypid;
-       *atttypmod = attr->atttypmod;
-       *atttypcoll = attr->attcollation;
-   }
-   else
-   {
-       *atttypid = exprType(expr);
-       *atttypmod = exprTypmod(expr);
-
-       if (OidIsValid(attr->attcollation))
-           *atttypcoll = attr->attcollation;
-       else
-           *atttypcoll = exprCollation(expr);
-   }
-   ReleaseSysCache(atup);
-
-   /*
-    * If it's a multirange, step down to the range type, as is done by
-    * multirange_typanalyze().
-    */
-   if (type_is_multirange(*atttypid))
-       *atttypid = get_multirange_range(*atttypid);
-
-   /* finds the right operators even if atttypid is a domain */
-   typcache = lookup_type_cache(*atttypid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR);
-   *atttyptype = typcache->typtype;
-   *eq_opr = typcache->eq_opr;
-   *lt_opr = typcache->lt_opr;
-
-   /*
-    * Special case: collation for tsvector is DEFAULT_COLLATION_OID. See
-    * compute_tsvector_stats().
-    */
-   if (*atttypid == TSVECTOROID)
-       *atttypcoll = DEFAULT_COLLATION_OID;
-
-   relation_close(rel, NoLock);
-}
-
-/*
- * Derive element type information from the attribute type.
- */
-static bool
-get_elem_stat_type(Oid atttypid, char atttyptype,
-                  Oid *elemtypid, Oid *elem_eq_opr)
-{
-   TypeCacheEntry *elemtypcache;
-
-   if (atttypid == TSVECTOROID)
-   {
-       /*
-        * Special case: element type for tsvector is text. See
-        * compute_tsvector_stats().
-        */
-       *elemtypid = TEXTOID;
-   }
-   else
-   {
-       /* find underlying element type through any domain */
-       *elemtypid = get_base_element_type(atttypid);
-   }
-
-   if (!OidIsValid(*elemtypid))
-       return false;
-
-   /* finds the right operator even if elemtypid is a domain */
-   elemtypcache = lookup_type_cache(*elemtypid, TYPECACHE_EQ_OPR);
-   if (!OidIsValid(elemtypcache->eq_opr))
-       return false;
-
-   *elem_eq_opr = elemtypcache->eq_opr;
-
-   return true;
-}
-
-/*
- * Cast a text datum into an array with element type elemtypid.
- *
- * If an error is encountered, capture it and re-throw a WARNING, and set ok
- * to false. If the resulting array contains NULLs, raise a WARNING and set ok
- * to false. Otherwise, set ok to true.
- */
-static Datum
-text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid,
-                 int32 typmod, bool *ok)
-{
-   LOCAL_FCINFO(fcinfo, 8);
-   char       *s;
-   Datum       result;
-   ErrorSaveContext escontext = {T_ErrorSaveContext};
-
-   escontext.details_wanted = true;
-
-   s = TextDatumGetCString(d);
-
-   InitFunctionCallInfoData(*fcinfo, array_in, 3, InvalidOid,
-                            (Node *) &escontext, NULL);
-
-   fcinfo->args[0].value = CStringGetDatum(s);
-   fcinfo->args[0].isnull = false;
-   fcinfo->args[1].value = ObjectIdGetDatum(typid);
-   fcinfo->args[1].isnull = false;
-   fcinfo->args[2].value = Int32GetDatum(typmod);
-   fcinfo->args[2].isnull = false;
-
-   result = FunctionCallInvoke(fcinfo);
-
-   pfree(s);
-
-   if (escontext.error_occurred)
-   {
-       escontext.error_data->elevel = WARNING;
-       ThrowErrorData(escontext.error_data);
-       *ok = false;
-       return (Datum) 0;
-   }
-
-   if (array_contains_nulls(DatumGetArrayTypeP(result)))
-   {
-       ereport(WARNING,
-               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                errmsg("\"%s\" array must not contain null values", staname)));
-       *ok = false;
-       return (Datum) 0;
-   }
-
-   *ok = true;
-
-   return result;
-}
-
-/*
- * Find and update the slot with the given stakind, or use the first empty
- * slot.
- */
-static void
-set_stats_slot(Datum *values, bool *nulls, bool *replaces,
-              int16 stakind, Oid staop, Oid stacoll,
-              Datum stanumbers, bool stanumbers_isnull,
-              Datum stavalues, bool stavalues_isnull)
-{
-   int         slotidx;
-   int         first_empty = -1;
-   AttrNumber  stakind_attnum;
-   AttrNumber  staop_attnum;
-   AttrNumber  stacoll_attnum;
-
-   /* find existing slot with given stakind */
-   for (slotidx = 0; slotidx < STATISTIC_NUM_SLOTS; slotidx++)
-   {
-       stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
-
-       if (first_empty < 0 &&
-           DatumGetInt16(values[stakind_attnum]) == 0)
-           first_empty = slotidx;
-       if (DatumGetInt16(values[stakind_attnum]) == stakind)
-           break;
-   }
-
-   if (slotidx >= STATISTIC_NUM_SLOTS && first_empty >= 0)
-       slotidx = first_empty;
-
-   if (slotidx >= STATISTIC_NUM_SLOTS)
-       ereport(ERROR,
-               (errmsg("maximum number of statistics slots exceeded: %d",
-                       slotidx + 1)));
-
-   stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
-   staop_attnum = Anum_pg_statistic_staop1 - 1 + slotidx;
-   stacoll_attnum = Anum_pg_statistic_stacoll1 - 1 + slotidx;
-
-   if (DatumGetInt16(values[stakind_attnum]) != stakind)
-   {
-       values[stakind_attnum] = Int16GetDatum(stakind);
-       replaces[stakind_attnum] = true;
-   }
-   if (DatumGetObjectId(values[staop_attnum]) != staop)
-   {
-       values[staop_attnum] = ObjectIdGetDatum(staop);
-       replaces[staop_attnum] = true;
-   }
-   if (DatumGetObjectId(values[stacoll_attnum]) != stacoll)
-   {
-       values[stacoll_attnum] = ObjectIdGetDatum(stacoll);
-       replaces[stacoll_attnum] = true;
-   }
-   if (!stanumbers_isnull)
-   {
-       values[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = stanumbers;
-       nulls[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = false;
-       replaces[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = true;
-   }
-   if (!stavalues_isnull)
-   {
-       values[Anum_pg_statistic_stavalues1 - 1 + slotidx] = stavalues;
-       nulls[Anum_pg_statistic_stavalues1 - 1 + slotidx] = false;
-       replaces[Anum_pg_statistic_stavalues1 - 1 + slotidx] = true;
-   }
-}
-
 /*
  * Upsert the pg_statistic record.
  */
 /*
  * Upsert the pg_statistic record.
  */
@@ -880,44 +574,6 @@ delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit)
    return result;
 }
 
    return result;
 }
 
-/*
- * Initialize values and nulls for a new stats tuple.
- */
-static void
-init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited,
-                      Datum *values, bool *nulls, bool *replaces)
-{
-   memset(nulls, true, sizeof(bool) * Natts_pg_statistic);
-   memset(replaces, true, sizeof(bool) * Natts_pg_statistic);
-
-   /* must initialize non-NULL attributes */
-
-   values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(reloid);
-   nulls[Anum_pg_statistic_starelid - 1] = false;
-   values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(attnum);
-   nulls[Anum_pg_statistic_staattnum - 1] = false;
-   values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inherited);
-   nulls[Anum_pg_statistic_stainherit - 1] = false;
-
-   values[Anum_pg_statistic_stanullfrac - 1] = DEFAULT_NULL_FRAC;
-   nulls[Anum_pg_statistic_stanullfrac - 1] = false;
-   values[Anum_pg_statistic_stawidth - 1] = DEFAULT_AVG_WIDTH;
-   nulls[Anum_pg_statistic_stawidth - 1] = false;
-   values[Anum_pg_statistic_stadistinct - 1] = DEFAULT_N_DISTINCT;
-   nulls[Anum_pg_statistic_stadistinct - 1] = false;
-
-   /* initialize stakind, staop, and stacoll slots */
-   for (int slotnum = 0; slotnum < STATISTIC_NUM_SLOTS; slotnum++)
-   {
-       values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0;
-       nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false;
-       values[Anum_pg_statistic_staop1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
-       nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false;
-       values[Anum_pg_statistic_stacoll1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
-       nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false;
-   }
-}
-
 /*
  * Delete statistics for the given attribute.
  */
 /*
  * Delete statistics for the given attribute.
  */
index 0c139bf43a7b9ae20cdfe0abba49f5c7afa34d8c..dd6c1216b9a74b022423e535e97725c3b3607e76 100644 (file)
 #include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_class.h"
 #include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_database.h"
+#include "catalog/pg_statistic.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
 #include "statistics/stat_utils.h"
 #include "storage/lmgr.h"
 #include "utils/acl.h"
 #include "statistics/stat_utils.h"
 #include "storage/lmgr.h"
 #include "utils/acl.h"
 #include "utils/rel.h"
 #include "utils/syscache.h"
 
 #include "utils/rel.h"
 #include "utils/syscache.h"
 
+/* Default values assigned to new pg_statistic tuples. */
+#define DEFAULT_STATATT_NULL_FRAC      Float4GetDatum(0.0) /* stanullfrac */
+#define DEFAULT_STATATT_AVG_WIDTH      Int32GetDatum(0) /* stawidth, same as
+                                                        * unknown */
+#define DEFAULT_STATATT_N_DISTINCT     Float4GetDatum(0.0) /* stadistinct, same as
+                                                            * unknown */
+
+static Node *statatt_get_index_expr(Relation rel, int attnum);
+
 /*
  * Ensure that a given argument is not null.
  */
 /*
  * Ensure that a given argument is not null.
  */
@@ -280,6 +292,50 @@ stats_check_arg_type(const char *argname, Oid argtype, Oid expectedtype)
    return true;
 }
 
    return true;
 }
 
+/*
+ * Check if attribute of an index is an expression, then retrieve the
+ * expression if is it the case.
+ *
+ * If the attnum specified is known to be an expression, then we must
+ * walk the list attributes up to the specified attnum to get the right
+ * expression.
+ */
+static Node *
+statatt_get_index_expr(Relation rel, int attnum)
+{
+   List       *index_exprs;
+   ListCell   *indexpr_item;
+
+   /* relation is not an index */
+   if (rel->rd_rel->relkind != RELKIND_INDEX &&
+       rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+       return NULL;
+
+   index_exprs = RelationGetIndexExpressions(rel);
+
+   /* index has no expressions to give */
+   if (index_exprs == NIL)
+       return NULL;
+
+   /*
+    * The index's attnum points directly to a relation attnum, hence it is
+    * not an expression attribute.
+    */
+   if (rel->rd_index->indkey.values[attnum - 1] != 0)
+       return NULL;
+
+   indexpr_item = list_head(rel->rd_indexprs);
+
+   for (int i = 0; i < attnum - 1; i++)
+       if (rel->rd_index->indkey.values[i] == 0)
+           indexpr_item = lnext(rel->rd_indexprs, indexpr_item);
+
+   if (indexpr_item == NULL)   /* shouldn't happen */
+       elog(ERROR, "too few entries in indexprs list");
+
+   return (Node *) lfirst(indexpr_item);
+}
+
 /*
  * Translate variadic argument pairs from 'pairs_fcinfo' into a
  * 'positional_fcinfo' appropriate for calling relation_statistics_update() or
 /*
  * Translate variadic argument pairs from 'pairs_fcinfo' into a
  * 'positional_fcinfo' appropriate for calling relation_statistics_update() or
@@ -365,3 +421,325 @@ stats_fill_fcinfo_from_arg_pairs(FunctionCallInfo pairs_fcinfo,
 
    return result;
 }
 
    return result;
 }
+
+/*
+ * Derive type information from a relation attribute.
+ *
+ * This is needed for setting most slot statistics for all data types.
+ *
+ * This duplicates the logic in examine_attribute() but it will not skip the
+ * attribute if the attstattarget is 0.
+ *
+ * This information, retrieved from pg_attribute and pg_type with some
+ * specific handling for index expressions, is a prerequisite to calling
+ * any of the other statatt_*() functions.
+ */
+void
+statatt_get_type(Oid reloid, AttrNumber attnum,
+                Oid *atttypid, int32 *atttypmod,
+                char *atttyptype, Oid *atttypcoll,
+                Oid *eq_opr, Oid *lt_opr)
+{
+   Relation    rel = relation_open(reloid, AccessShareLock);
+   Form_pg_attribute attr;
+   HeapTuple   atup;
+   Node       *expr;
+   TypeCacheEntry *typcache;
+
+   atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(reloid),
+                          Int16GetDatum(attnum));
+
+   /* Attribute not found */
+   if (!HeapTupleIsValid(atup))
+       ereport(ERROR,
+               (errcode(ERRCODE_UNDEFINED_COLUMN),
+                errmsg("column %d of relation \"%s\" does not exist",
+                       attnum, RelationGetRelationName(rel))));
+
+   attr = (Form_pg_attribute) GETSTRUCT(atup);
+
+   if (attr->attisdropped)
+       ereport(ERROR,
+               (errcode(ERRCODE_UNDEFINED_COLUMN),
+                errmsg("column %d of relation \"%s\" does not exist",
+                       attnum, RelationGetRelationName(rel))));
+
+   expr = statatt_get_index_expr(rel, attr->attnum);
+
+   /*
+    * When analyzing an expression index, believe the expression tree's type
+    * not the column datatype --- the latter might be the opckeytype storage
+    * type of the opclass, which is not interesting for our purposes.  This
+    * mimics the behavior of examine_attribute().
+    */
+   if (expr == NULL)
+   {
+       *atttypid = attr->atttypid;
+       *atttypmod = attr->atttypmod;
+       *atttypcoll = attr->attcollation;
+   }
+   else
+   {
+       *atttypid = exprType(expr);
+       *atttypmod = exprTypmod(expr);
+
+       if (OidIsValid(attr->attcollation))
+           *atttypcoll = attr->attcollation;
+       else
+           *atttypcoll = exprCollation(expr);
+   }
+   ReleaseSysCache(atup);
+
+   /*
+    * If it's a multirange, step down to the range type, as is done by
+    * multirange_typanalyze().
+    */
+   if (type_is_multirange(*atttypid))
+       *atttypid = get_multirange_range(*atttypid);
+
+   /* finds the right operators even if atttypid is a domain */
+   typcache = lookup_type_cache(*atttypid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR);
+   *atttyptype = typcache->typtype;
+   *eq_opr = typcache->eq_opr;
+   *lt_opr = typcache->lt_opr;
+
+   /*
+    * Special case: collation for tsvector is DEFAULT_COLLATION_OID. See
+    * compute_tsvector_stats().
+    */
+   if (*atttypid == TSVECTOROID)
+       *atttypcoll = DEFAULT_COLLATION_OID;
+
+   relation_close(rel, NoLock);
+}
+
+/*
+ * Derive element type information from the attribute type.  This information
+ * is needed when the given type is one that contains elements of other types.
+ *
+ * The atttypid and atttyptype should be derived from a previous call to
+ * statatt_get_type().
+ */
+bool
+statatt_get_elem_type(Oid atttypid, char atttyptype,
+                     Oid *elemtypid, Oid *elem_eq_opr)
+{
+   TypeCacheEntry *elemtypcache;
+
+   if (atttypid == TSVECTOROID)
+   {
+       /*
+        * Special case: element type for tsvector is text. See
+        * compute_tsvector_stats().
+        */
+       *elemtypid = TEXTOID;
+   }
+   else
+   {
+       /* find underlying element type through any domain */
+       *elemtypid = get_base_element_type(atttypid);
+   }
+
+   if (!OidIsValid(*elemtypid))
+       return false;
+
+   /* finds the right operator even if elemtypid is a domain */
+   elemtypcache = lookup_type_cache(*elemtypid, TYPECACHE_EQ_OPR);
+   if (!OidIsValid(elemtypcache->eq_opr))
+       return false;
+
+   *elem_eq_opr = elemtypcache->eq_opr;
+
+   return true;
+}
+
+/*
+ * Build an array with element type elemtypid from a text datum, used as
+ * value of an attribute in a tuple to-be-inserted into pg_statistic.
+ *
+ * The typid and typmod should be derived from a previous call to
+ * statatt_get_type().
+ *
+ * If an error is encountered, capture it and throw a WARNING, with "ok" set
+ * to false.  If the resulting array contains NULLs, raise a WARNING and
+ * set "ok" to false.  When the operation succeeds, set "ok" to true.
+ */
+Datum
+statatt_build_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid,
+                       int32 typmod, bool *ok)
+{
+   LOCAL_FCINFO(fcinfo, 8);
+   char       *s;
+   Datum       result;
+   ErrorSaveContext escontext = {T_ErrorSaveContext};
+
+   escontext.details_wanted = true;
+
+   s = TextDatumGetCString(d);
+
+   InitFunctionCallInfoData(*fcinfo, array_in, 3, InvalidOid,
+                            (Node *) &escontext, NULL);
+
+   fcinfo->args[0].value = CStringGetDatum(s);
+   fcinfo->args[0].isnull = false;
+   fcinfo->args[1].value = ObjectIdGetDatum(typid);
+   fcinfo->args[1].isnull = false;
+   fcinfo->args[2].value = Int32GetDatum(typmod);
+   fcinfo->args[2].isnull = false;
+
+   result = FunctionCallInvoke(fcinfo);
+
+   pfree(s);
+
+   if (escontext.error_occurred)
+   {
+       escontext.error_data->elevel = WARNING;
+       ThrowErrorData(escontext.error_data);
+       *ok = false;
+       return (Datum) 0;
+   }
+
+   if (array_contains_nulls(DatumGetArrayTypeP(result)))
+   {
+       ereport(WARNING,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("\"%s\" array must not contain null values", staname)));
+       *ok = false;
+       return (Datum) 0;
+   }
+
+   *ok = true;
+
+   return result;
+}
+
+/*
+ * Find and update the slot of a stakind, or use the first empty slot.
+ *
+ * Core statistics types expect the stakind value to be one of the
+ * STATISTIC_KIND_* constants defined in pg_statistic.h, but types defined
+ * by extensions are not restricted to those values.
+ *
+ * In the case of core statistics, the required staop is determined by the
+ * stakind given and will either be a hardcoded oid, or the eq/lt operator
+ * derived from statatt_get_type().  Likewise, types defined by extensions
+ * have no such restriction.
+ *
+ * The stacoll value should be either the atttypcoll derived from
+ * statatt_get_type(), or a harcoded value required by that particular
+ * stakind.
+ *
+ * The value/null pairs for stanumbers and stavalues should be calculated
+ * based on the stakind, using statatt_build_stavalues() or constructed arrays.
+ */
+void
+statatt_set_slot(Datum *values, bool *nulls, bool *replaces,
+                int16 stakind, Oid staop, Oid stacoll,
+                Datum stanumbers, bool stanumbers_isnull,
+                Datum stavalues, bool stavalues_isnull)
+{
+   int         slotidx;
+   int         first_empty = -1;
+   AttrNumber  stakind_attnum;
+   AttrNumber  staop_attnum;
+   AttrNumber  stacoll_attnum;
+
+   /* find existing slot with given stakind */
+   for (slotidx = 0; slotidx < STATISTIC_NUM_SLOTS; slotidx++)
+   {
+       stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
+
+       if (first_empty < 0 &&
+           DatumGetInt16(values[stakind_attnum]) == 0)
+           first_empty = slotidx;
+       if (DatumGetInt16(values[stakind_attnum]) == stakind)
+           break;
+   }
+
+   if (slotidx >= STATISTIC_NUM_SLOTS && first_empty >= 0)
+       slotidx = first_empty;
+
+   if (slotidx >= STATISTIC_NUM_SLOTS)
+       ereport(ERROR,
+               (errmsg("maximum number of statistics slots exceeded: %d",
+                       slotidx + 1)));
+
+   stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
+   staop_attnum = Anum_pg_statistic_staop1 - 1 + slotidx;
+   stacoll_attnum = Anum_pg_statistic_stacoll1 - 1 + slotidx;
+
+   if (DatumGetInt16(values[stakind_attnum]) != stakind)
+   {
+       values[stakind_attnum] = Int16GetDatum(stakind);
+       replaces[stakind_attnum] = true;
+   }
+   if (DatumGetObjectId(values[staop_attnum]) != staop)
+   {
+       values[staop_attnum] = ObjectIdGetDatum(staop);
+       replaces[staop_attnum] = true;
+   }
+   if (DatumGetObjectId(values[stacoll_attnum]) != stacoll)
+   {
+       values[stacoll_attnum] = ObjectIdGetDatum(stacoll);
+       replaces[stacoll_attnum] = true;
+   }
+   if (!stanumbers_isnull)
+   {
+       values[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = stanumbers;
+       nulls[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = false;
+       replaces[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = true;
+   }
+   if (!stavalues_isnull)
+   {
+       values[Anum_pg_statistic_stavalues1 - 1 + slotidx] = stavalues;
+       nulls[Anum_pg_statistic_stavalues1 - 1 + slotidx] = false;
+       replaces[Anum_pg_statistic_stavalues1 - 1 + slotidx] = true;
+   }
+}
+
+/*
+ * Initialize values and nulls for a new pg_statistic tuple.
+ *
+ * The caller is responsible for allocating the arrays where the results are
+ * stored, which should be of size Natts_pg_statistic.
+ *
+ * When using this routine for a tuple inserted into pg_statistic, reloid,
+ * attnum and inherited flags should all be set.
+ *
+ * When using this routine for a tuple that is an element of a stxdexpr
+ * array inserted into pg_statistic_ext_data, reloid, attnum and inherited
+ * should be respectively set to InvalidOid, InvalidAttrNumber and false.
+ */
+void
+statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited,
+                        Datum *values, bool *nulls, bool *replaces)
+{
+   memset(nulls, true, sizeof(bool) * Natts_pg_statistic);
+   memset(replaces, true, sizeof(bool) * Natts_pg_statistic);
+
+   /* This must initialize non-NULL attributes */
+   values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(reloid);
+   nulls[Anum_pg_statistic_starelid - 1] = false;
+   values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(attnum);
+   nulls[Anum_pg_statistic_staattnum - 1] = false;
+   values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inherited);
+   nulls[Anum_pg_statistic_stainherit - 1] = false;
+
+   values[Anum_pg_statistic_stanullfrac - 1] = DEFAULT_STATATT_NULL_FRAC;
+   nulls[Anum_pg_statistic_stanullfrac - 1] = false;
+   values[Anum_pg_statistic_stawidth - 1] = DEFAULT_STATATT_AVG_WIDTH;
+   nulls[Anum_pg_statistic_stawidth - 1] = false;
+   values[Anum_pg_statistic_stadistinct - 1] = DEFAULT_STATATT_N_DISTINCT;
+   nulls[Anum_pg_statistic_stadistinct - 1] = false;
+
+   /* initialize stakind, staop, and stacoll slots */
+   for (int slotnum = 0; slotnum < STATISTIC_NUM_SLOTS; slotnum++)
+   {
+       values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0;
+       nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false;
+       values[Anum_pg_statistic_staop1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
+       nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false;
+       values[Anum_pg_statistic_stacoll1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
+       nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false;
+   }
+}
index f41b181d4d3a39d97d43a89ac358739a16611c7a..e2bc62a561462aa7dfadfa1f0fc74a34bdd9e3a9 100644 (file)
@@ -13,6 +13,7 @@
 #ifndef STATS_UTILS_H
 #define STATS_UTILS_H
 
 #ifndef STATS_UTILS_H
 #define STATS_UTILS_H
 
+#include "access/attnum.h"
 #include "fmgr.h"
 
 /* avoid including primnodes.h here */
 #include "fmgr.h"
 
 /* avoid including primnodes.h here */
@@ -40,4 +41,21 @@ extern bool stats_fill_fcinfo_from_arg_pairs(FunctionCallInfo pairs_fcinfo,
                                             FunctionCallInfo positional_fcinfo,
                                             struct StatsArgInfo *arginfo);
 
                                             FunctionCallInfo positional_fcinfo,
                                             struct StatsArgInfo *arginfo);
 
+extern void statatt_get_type(Oid reloid, AttrNumber attnum,
+                            Oid *atttypid, int32 *atttypmod,
+                            char *atttyptype, Oid *atttypcoll,
+                            Oid *eq_opr, Oid *lt_opr);
+extern void statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited,
+                                    Datum *values, bool *nulls, bool *replaces);
+
+extern void statatt_set_slot(Datum *values, bool *nulls, bool *replaces,
+                            int16 stakind, Oid staop, Oid stacoll,
+                            Datum stanumbers, bool stanumbers_isnull,
+                            Datum stavalues, bool stavalues_isnull);
+
+extern Datum statatt_build_stavalues(const char *staname, FmgrInfo *array_in, Datum d,
+                                    Oid typid, int32 typmod, bool *ok);
+extern bool statatt_get_elem_type(Oid atttypid, char atttyptype,
+                                 Oid *elemtypid, Oid *elem_eq_opr);
+
 #endif                         /* STATS_UTILS_H */
 #endif                         /* STATS_UTILS_H */