Rework output format of pg_ndistinct

author Michael Paquier <[email protected]>

Mon, 17 Nov 2025 00:52:20 +0000 (09:52 +0900)

committer Michael Paquier <[email protected]>

Mon, 17 Nov 2025 00:52:20 +0000 (09:52 +0900)
author Michael Paquier <[email protected]>
Mon, 17 Nov 2025 00:52:20 +0000 (09:52 +0900)
committer Michael Paquier <[email protected]>
Mon, 17 Nov 2025 00:52:20 +0000 (09:52 +0900)
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml

index 106583fb2965d57a7ec36c3e070d4f1a9adad772..a88690150a8fab35d1d4d4234052b1c2f0db42ca 100644 (file)
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1576,12 +1576,42 @@ CREATE STATISTICS stts2 (ndistinct) ON city, state, zip FROM zipcodes;
  
  ANALYZE zipcodes;
  
-SELECT stxkeys AS k, stxdndistinct AS nd
-  FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid)
+SELECT stxkeys AS k, jsonb_pretty(stxdndistinct::text::jsonb) AS nd
+  FROM pg_statistic_ext JOIN pg_statistic_ext_data on (oid = stxoid)
    WHERE stxname = 'stts2';
--[ RECORD 1 ]------------------------------------------------------&zwsp;--
+-[ RECORD 1 ]-------------------
  k  | 1 2 5
-nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178}
+nd | [                          +
+   |     {                      +
+   |         "ndistinct": 33178,+
+   |         "attributes": [    +
+   |             1,             +
+   |             2              +
+   |         ]                  +
+   |     },                     +
+   |     {                      +
+   |         "ndistinct": 33178,+
+   |         "attributes": [    +
+   |             1,             +
+   |             5              +
+   |         ]                  +
+   |     },                     +
+   |     {                      +
+   |         "ndistinct": 27435,+
+   |         "attributes": [    +
+   |             2,             +
+   |             5              +
+   |         ]                  +
+   |     },                     +
+   |     {                      +
+   |         "ndistinct": 33178,+
+   |         "attributes": [    +
+   |             1,             +
+   |             2,             +
+   |             5              +
+   |         ]                  +
+   |     }                      +
+   | ]
  (1 row)
  </programlisting>
       This indicates that there are three combinations of columns that
diff --git a/src/backend/utils/adt/pg_ndistinct.c b/src/backend/utils/adt/pg_ndistinct.c

index 667ada9c3b45d5933db32ea341d8ea1bf6c2ecc3..97efc290ef5e2b3cf6f9746842e01c420d0df983 100644 (file)
--- a/src/backend/utils/adt/pg_ndistinct.c
+++ b/src/backend/utils/adt/pg_ndistinct.c
@@ -16,6 +16,7 @@
  
  #include "lib/stringinfo.h"
  #include "statistics/extended_stats_internal.h"
+#include "statistics/statistics_format.h"
  #include "utils/fmgrprotos.h"
  
  
@@ -51,26 +52,29 @@ pg_ndistinct_out(PG_FUNCTION_ARGS)
         StringInfoData str;
  
         initStringInfo(&str);
-       appendStringInfoChar(&str, '{');
+       appendStringInfoChar(&str, '[');
  
         for (i = 0; i < ndist->nitems; i++)
         {
-               int                     j;
                 MVNDistinctItem item = ndist->items[i];
  
                 if (i > 0)
                         appendStringInfoString(&str, ", ");
  
-               for (j = 0; j < item.nattributes; j++)
-               {
-                       AttrNumber      attnum = item.attributes[j];
+               if (item.nattributes <= 0)
+                       elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
  
-                       appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
-               }
-               appendStringInfo(&str, "\": %d", (int) item.ndistinct);
+               appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
+                                                item.attributes[0]);
+
+               for (int j = 1; j < item.nattributes; j++)
+                       appendStringInfo(&str, ", %d", item.attributes[j]);
+
+               appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
+                                                (int) item.ndistinct);
         }
  
-       appendStringInfoChar(&str, '}');
+       appendStringInfoChar(&str, ']');
  
         PG_RETURN_CSTRING(str.data);
  }
diff --git a/src/include/statistics/statistics_format.h b/src/include/statistics/statistics_format.h

new file mode 100644 (file)

index 0000000..fb0ae58
--- /dev/null
+++ b/src/include/statistics/statistics_format.h
@@ -0,0 +1,32 @@
+/*-------------------------------------------------------------------------
+ *
+ * statistics_format.h
+ *       Data related to the format of extended statistics, usable by both
+ *       frontend and backend code.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/include/statistics/statistics_format.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STATISTICS_FORMAT_H
+#define STATISTICS_FORMAT_H
+
+/* ----------
+ * pg_ndistinct in human-readable format is a JSON array made of elements with
+ * a predefined set of keys, like:
+ *
+ * [{"ndistinct": 11, "attributes": [3,4]},
+ *  {"ndistinct": 11, "attributes": [3,6]},
+ *  {"ndistinct": 11, "attributes": [4,6]},
+ *  {"ndistinct": 11, "attributes": [3,4,6]},
+ *  ... ]
+ * ----------
+ */
+#define PG_NDISTINCT_KEY_ATTRIBUTES    "attributes"
+#define PG_NDISTINCT_KEY_NDISTINCT     "ndistinct"
+
+#endif                                                 /* STATISTICS_FORMAT_H */
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out

index 495a1b350189c73b4105f447f91bfdbe7961ba31..e9379afe39e266fccf71e431697c44d680f44426 100644 (file)
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -196,7 +196,7 @@ Statistics objects:
      "public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0
  
  ANALYZE ab1;
-SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit
+SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
    FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
   WHERE s.stxname = 'ab1_a_b_stats';
      stxname    | stxdndistinct | stxddependencies | stxdmcv | stxdinherit 
@@ -476,13 +476,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
  -- correct command
  CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
  ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
- stxkind |                    stxdndistinct                    
----------+-----------------------------------------------------
- {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
+ stxkind |      stxdndistinct       
+---------+--------------------------
+ {d,f,m} | [                       +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             3,          +
+         |             4           +
+         |         ]               +
+         |     },                  +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             3,          +
+         |             6           +
+         |         ]               +
+         |     },                  +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             4,          +
+         |             6           +
+         |         ]               +
+         |     },                  +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             3,          +
+         |             4,          +
+         |             6           +
+         |         ]               +
+         |     }                   +
+         | ]
  (1 row)
  
  -- minor improvement, make sure the ctid does not break the matching
@@ -558,13 +588,43 @@ INSERT INTO ndistinct (a, b, c, filler1)
              mod(i,23) || ' dollars and zero cents'
         FROM generate_series(1,1000) s(i);
  ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
- stxkind |                      stxdndistinct                       
----------+----------------------------------------------------------
- {d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000}
+ stxkind |       stxdndistinct        
+---------+----------------------------
+ {d,f,m} | [                         +
+         |     {                     +
+         |         "ndistinct": 221, +
+         |         "attributes": [   +
+         |             3,            +
+         |             4             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 247, +
+         |         "attributes": [   +
+         |             3,            +
+         |             6             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 323, +
+         |         "attributes": [   +
+         |             4,            +
+         |             6             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 1000,+
+         |         "attributes": [   +
+         |             3,            +
+         |             4,            +
+         |             6             +
+         |         ]                 +
+         |     }                     +
+         | ]
  (1 row)
  
  -- correct estimates
@@ -623,7 +683,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
  (1 row)
  
  DROP STATISTICS s10;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
@@ -707,13 +767,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
  
  CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
  ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
- stxkind |                           stxdndistinct                           
----------+-------------------------------------------------------------------
- {d,e}   | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000}
+ stxkind |       stxdndistinct        
+---------+----------------------------
+ {d,e}   | [                         +
+         |     {                     +
+         |         "ndistinct": 221, +
+         |         "attributes": [   +
+         |             -1,           +
+         |             -2            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 247, +
+         |         "attributes": [   +
+         |             -1,           +
+         |             -3            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 323, +
+         |         "attributes": [   +
+         |             -2,           +
+         |             -3            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 1000,+
+         |         "attributes": [   +
+         |             -1,           +
+         |             -2,           +
+         |             -3            +
+         |         ]                 +
+         |     }                     +
+         | ]
  (1 row)
  
  SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
@@ -756,13 +846,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b
  
  CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
  ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
- stxkind |                        stxdndistinct                        
----------+-------------------------------------------------------------
- {d,e}   | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000}
+ stxkind |       stxdndistinct        
+---------+----------------------------
+ {d,e}   | [                         +
+         |     {                     +
+         |         "ndistinct": 221, +
+         |         "attributes": [   +
+         |             3,            +
+         |             4             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 247, +
+         |         "attributes": [   +
+         |             3,            +
+         |             -1            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 323, +
+         |         "attributes": [   +
+         |             4,            +
+         |             -1            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 1000,+
+         |         "attributes": [   +
+         |             3,            +
+         |             4,            +
+         |             -1            +
+         |         ]                 +
+         |     }                     +
+         | ]
  (1 row)
  
  SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql

index fc6f152a072fd90082d9b95f0273e3d1d039f665..fc4aee6d8399907a79c5c657e4bc2ca623156c15 100644 (file)
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -125,7 +125,7 @@ ALTER TABLE ab1 ALTER a SET STATISTICS -1;
  ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
  \d ab1
  ANALYZE ab1;
-SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit
+SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
    FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
   WHERE s.stxname = 'ab1_a_b_stats';
  ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
@@ -297,7 +297,7 @@ CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
  
  ANALYZE ndistinct;
  
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
@@ -338,7 +338,7 @@ INSERT INTO ndistinct (a, b, c, filler1)
  
  ANALYZE ndistinct;
  
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
@@ -364,7 +364,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
  
  DROP STATISTICS s10;
  
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
@@ -399,7 +399,7 @@ CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
  
  ANALYZE ndistinct;
  
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
@@ -423,7 +423,7 @@ CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
  
  ANALYZE ndistinct;
  
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
    FROM pg_statistic_ext s, pg_statistic_ext_data d
   WHERE s.stxrelid = 'ndistinct'::regclass
     AND d.stxoid = s.oid;
author	Michael Paquier <[email protected]>
	Mon, 17 Nov 2025 00:52:20 +0000 (09:52 +0900)
committer	Michael Paquier <[email protected]>
	Mon, 17 Nov 2025 00:52:20 +0000 (09:52 +0900)
doc/src/sgml/perform.sgml		patch \| blob \| blame \| history
src/backend/utils/adt/pg_ndistinct.c		patch \| blob \| blame \| history
src/include/statistics/statistics_format.h	[new file with mode: 0644]	patch \| blob
src/test/regress/expected/stats_ext.out		patch \| blob \| blame \| history
src/test/regress/sql/stats_ext.sql		patch \| blob \| blame \| history