Implement GetStatistics() of csv_fdw based on cost_seqscan().
authorShigeru Hanada <[email protected]>
Fri, 8 Oct 2010 08:40:37 +0000 (17:40 +0900)
committerShigeru Hanada <[email protected]>
Fri, 8 Oct 2010 08:45:44 +0000 (17:45 +0900)
Also introduce ForeignPath to store some information which are
collected by GetStatistics().  Such information would be usefull
for other APIs which are called from executor.

contrib/postgresql_fdw/postgresql_fdw.c
src/backend/foreign/csv_fdw.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/util/pathnode.c
src/include/nodes/execnodes.h
src/include/nodes/nodes.h
src/include/nodes/relation.h
src/include/optimizer/cost.h

index f32d914245a5670257c7bb3c78783bfc6abf2c35..774bd6562f42e3c3d76e51138b32e1f26fae6905 100644 (file)
@@ -54,7 +54,7 @@ static void pgOpen(ForeignScanState *scanstate);
 static void pgIterate(ForeignScanState *scanstate);
 static void pgClose(ForeignScanState *scanstate);
 static void pgReOpen(ForeignScanState *scanstate);
-static void pgGetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+static void pgGetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel);
 
 /* deparse SQL from the request */
 static bool is_immutable_func(Oid funcid);
@@ -787,7 +787,7 @@ get_server_costs(Oid relid, double *connection_cost, double *transfer_cost)
  * baserel->baserestrictinfo can be used to examine quals on the relation.
  */
 static void
-pgGetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+pgGetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel)
 {
    RangeTblEntry  *rte;
    double          connection_cost = 0.0;
@@ -796,17 +796,17 @@ pgGetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel)
    /*
     * Estimate as same as sequencial scan on local table as approximate value.
     */
-   cost_seqscan(path, root, baserel);
+   cost_seqscan(&path.path, root, baserel);
 
    /* XXX override estimated cost */
-   path->startup_cost = 1000;
-   path->total_cost = 1000;
+   path->path.startup_cost = 1000;
+   path->path.total_cost = 1000;
 
    /* Get cost factor from catalog and correct costs with them. */
    rte = planner_rt_fetch(baserel->relid, root);
    get_server_costs(rte->relid, &connection_cost, &transfer_cost);
-   path->startup_cost += connection_cost;
-   path->total_cost += connection_cost;
-   path->total_cost += transfer_cost *
-                       path->parent->width * path->parent->rows;
+   path->path.startup_cost += connection_cost;
+   path->path.total_cost += connection_cost;
+   path->path.total_cost += transfer_cost *
+                       path->path.parent->width * path->path.parent->rows;
 }
index b3a0b89844cabf53e832fac6cbd1e4db786f291e..3aba44bc333a8630e550024eff95161d857aa910 100644 (file)
@@ -18,6 +18,8 @@
 #include "foreign/foreign.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "optimizer/cost.h"
+#include "parser/parsetree.h"
 #include "storage/fd.h"
 #include "utils/builtins.h"
 
@@ -85,7 +87,7 @@ static void Open(ForeignScanState *scanstate);
 static void Iterate(ForeignScanState *scanstate);
 static void Close(ForeignScanState *scanstate);
 static void ReOpen(ForeignScanState *scanstate);
-static void GetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+static void GetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel);
 
 static HeapTuple get_next_tuple(ForeignScanState *scanstate);
 
@@ -233,7 +235,6 @@ FreeFSConnection(FSConnection *conn)
 static void
 Open(ForeignScanState *scanstate)
 {
-   elog(DEBUG1, "%s called", __func__);
    Relation        rel = scanstate->ss.ss_currentRelation;
    CsvFdwReply    *reply;
    int             i;
@@ -241,6 +242,8 @@ Open(ForeignScanState *scanstate)
    const char    **keywords;
    const char    **values;
 
+   elog(DEBUG1, "%s called", __func__);
+
    /* create CsvFdwReply and set default settings */
    reply = (CsvFdwReply *) palloc(sizeof(*reply));
    reply->filename = NULL;
@@ -304,6 +307,8 @@ Iterate(ForeignScanState *scanstate)
    {
        struct stat st;
 
+       elog(DEBUG1, "opening CSV file \"%s\"", reply->filename);
+
        reply->fp = AllocateFile(reply->filename, PG_BINARY_R);
        if (reply->fp == NULL)
            ereport(ERROR,
@@ -347,7 +352,8 @@ Close(ForeignScanState *scanstate)
 
    scanstate->reply = NULL;
 
-   FreeFile(reply->fp);
+   if (reply->fp)
+       FreeFile(reply->fp);
    pfree(reply);
 }
 
@@ -372,9 +378,75 @@ ReOpen(ForeignScanState *scanstate)
  * Estimate costs of scanning on a foreign table.
  */
 static void
-GetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+GetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel)
 {
+   RangeTblEntry  *rte;
+   ForeignTable   *table;
+   int             n;
+   const char    **keywords;
+   const char    **values;
+   int             i;
+   char           *filename = NULL;
+   struct stat     stat;
+   BlockNumber     pages;
+   double          run_cost = 0;
+   double          startup_cost = 0;
+   double          cpu_per_tuple;
+
    elog(DEBUG1, "%s called", __func__);
+
+   /* get filename from generic option of the foreign table */
+   rte = planner_rt_fetch(baserel->relid, root);
+   table = GetForeignTable(rte->relid);
+   keywords = palloc(sizeof(char *) * list_length(table->options));
+   values = palloc(sizeof(char *) * list_length(table->options));
+   n = flatten_generic_options(table->options, keywords, values);
+
+   for (i = 0; i < n; i++)
+   {
+       if (strcmp(keywords[i], "filename") == 0)
+       {
+           filename = pstrdup(values[i]);
+           break;
+       }
+   }
+
+   pfree(keywords);
+   pfree(values);
+
+   /* at least filename must be specified */
+   if (filename == NULL)
+   {
+       ereport(ERROR,
+               (errcode(ERRCODE_FDW_UNABLE_TO_CREATE_REPLY),
+                errmsg("generic option \"filename\" is required")));
+   }
+
+   /* get size of the CSV file */
+   if (lstat(filename, &stat) == -1)
+   {
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not stat file \"%s\": %m", filename)));
+   }
+   pfree(filename);
+
+   /*
+    * The way to estimate costs is almost same as cost_seqscan(), but there
+    * are some differences:
+    * - DISK costs are estimated based on CSV file size.
+    * - CPU costs are 2x of seq scan, incremented are overhead to parse CSV
+    */
+   pages = stat.st_size / BLCKSZ;
+   run_cost += seq_page_cost * pages;
+
+   startup_cost += baserel->baserestrictcost.startup;
+   cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+   cpu_per_tuple *= 2;
+   run_cost += cpu_per_tuple * baserel->tuples;
+
+   path->path.startup_cost = startup_cost;
+   path->path.total_cost = startup_cost + run_cost;
 }
 
 /*
index 3cf7e9c281d93086c1d062b2d64f88ed260624f5..8c8711ea072a63766e90d4ac6d166067e91becd5 100644 (file)
@@ -1457,6 +1457,14 @@ _outTidPath(StringInfo str, TidPath *node)
    WRITE_NODE_FIELD(tidquals);
 }
 
+static void
+_outForeignPath(StringInfo str, ForeignPath *node)
+{
+   WRITE_NODE_TYPE("FOREIGNPATH");
+
+   _outPathInfo(str, (Path *) node);
+}
+
 static void
 _outAppendPath(StringInfo str, AppendPath *node)
 {
@@ -2753,6 +2761,9 @@ _outNode(StringInfo str, void *obj)
            case T_TidPath:
                _outTidPath(str, obj);
                break;
+           case T_ForeignPath:
+               _outForeignPath(str, obj);
+               break;
            case T_AppendPath:
                _outAppendPath(str, obj);
                break;
index cae54a3dc20e6f46cdccfa7b63af8cbfcbf0f330..84b70231f357f26c0752ffdf0ef459a1bc160329 100644 (file)
@@ -1391,6 +1391,9 @@ print_path(PlannerInfo *root, Path *path, int indent)
        case T_TidPath:
            ptype = "TidScan";
            break;
+       case T_ForeignPath:
+           ptype = "ForeignScan";
+           break;
        case T_AppendPath:
            ptype = "Append";
            break;
index 59282aa89c52e11c4099938efa87dfb2f85a75bd..93e7a6c518767ff631f70938b9e4b3a8bb878f5c 100644 (file)
@@ -1026,7 +1026,7 @@ cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
  *   Determines and returns the cost of scanning a foreign table sequentially.
  */
 void
-cost_foreignscan(Path *path, PlannerInfo *root,
+cost_foreignscan(ForeignPath *path, PlannerInfo *root,
             RelOptInfo *baserel)
 {
    RangeTblEntry  *rte;
index 0a9c097788c81902b07dcb77c7b04b1ba06a6f08..d2325965ed704d55ae0c0b47fddc7f41b4bb846b 100644 (file)
@@ -1320,11 +1320,11 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel)
 Path *
 create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel)
 {
-   Path       *pathnode = makeNode(Path);
+   ForeignPath    *pathnode = makeNode(ForeignPath);
 
-   pathnode->pathtype = T_ForeignScan;
-   pathnode->parent = rel;
-   pathnode->pathkeys = NIL;   /* result is always unordered */
+   pathnode->path.pathtype = T_ForeignScan;
+   pathnode->path.parent = rel;
+   pathnode->path.pathkeys = NIL;  /* result is always unordered */
 
    cost_foreignscan(pathnode, root, rel);
 
index 329dd321f9eee88e1b8b37105c75c830afea9b19..0f5eea7be638a612b4a6a653621f538c9b9610ca 100644 (file)
@@ -1400,7 +1400,8 @@ struct FdwRoutine
     * Estimate costs of a foreign path.
     * FDW should update startup_cost and total_cost in the Path.
     */
-   void (*GetStatistics)(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+   void (*GetStatistics)(ForeignPath *path, PlannerInfo *root,
+                         RelOptInfo *baserel);
 
    /*
     * Deparse query request and open a cursor for the foreign scan.
index b3ef014bd70bf3b32c8931319ef202541530bb36..9067ef7ed59eba1ed310fcecd17b52f0db166629 100644 (file)
@@ -216,6 +216,7 @@ typedef enum NodeTag
    T_MergePath,
    T_HashPath,
    T_TidPath,
+   T_ForeignPath,
    T_AppendPath,
    T_ResultPath,
    T_MaterialPath,
index 91f4c5c1c43716786e0f6382491ce7115535c13b..2728895b5aba672f3a569e3a79323ac2f58328db 100644 (file)
@@ -737,6 +737,14 @@ typedef struct TidPath
    List       *tidquals;       /* qual(s) involving CTID = something */
 } TidPath;
 
+/*
+ * ForeignPath represents a scan on a foreign table
+ */
+typedef struct ForeignPath
+{
+   Path        path;
+} ForeignPath;
+
 /*
  * AppendPath represents an Append plan, ie, successive execution of
  * several member plans.
index 8698c5332c68ff51a2eacc697b57840278c0bef5..3c155cf97b24dc952c250b7bca13aad7e89cc38d 100644 (file)
@@ -81,7 +81,7 @@ extern void cost_functionscan(Path *path, PlannerInfo *root,
 extern void cost_valuesscan(Path *path, PlannerInfo *root,
                RelOptInfo *baserel);
 extern void cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
-extern void cost_foreignscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+extern void cost_foreignscan(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel);
 extern void cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm);
 extern void cost_sort(Path *path, PlannerInfo *root,
          List *pathkeys, Cost input_cost, double tuples, int width,