From: Shigeru Hanada Date: Fri, 8 Oct 2010 08:40:37 +0000 (+0900) Subject: Implement GetStatistics() of csv_fdw based on cost_seqscan(). X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=d1cfcd21b22ac3e33afb3fe74c0485d2b31cab4d;p=users%2Fhanada%2Fpostgres.git Implement GetStatistics() of csv_fdw based on cost_seqscan(). Also introduce ForeignPath to store some information which are collected by GetStatistics(). Such information would be usefull for other APIs which are called from executor. --- diff --git a/contrib/postgresql_fdw/postgresql_fdw.c b/contrib/postgresql_fdw/postgresql_fdw.c index f32d914245..774bd6562f 100644 --- a/contrib/postgresql_fdw/postgresql_fdw.c +++ b/contrib/postgresql_fdw/postgresql_fdw.c @@ -54,7 +54,7 @@ static void pgOpen(ForeignScanState *scanstate); static void pgIterate(ForeignScanState *scanstate); static void pgClose(ForeignScanState *scanstate); static void pgReOpen(ForeignScanState *scanstate); -static void pgGetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel); +static void pgGetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel); /* deparse SQL from the request */ static bool is_immutable_func(Oid funcid); @@ -787,7 +787,7 @@ get_server_costs(Oid relid, double *connection_cost, double *transfer_cost) * baserel->baserestrictinfo can be used to examine quals on the relation. */ static void -pgGetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel) +pgGetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel) { RangeTblEntry *rte; double connection_cost = 0.0; @@ -796,17 +796,17 @@ pgGetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel) /* * Estimate as same as sequencial scan on local table as approximate value. */ - cost_seqscan(path, root, baserel); + cost_seqscan(&path.path, root, baserel); /* XXX override estimated cost */ - path->startup_cost = 1000; - path->total_cost = 1000; + path->path.startup_cost = 1000; + path->path.total_cost = 1000; /* Get cost factor from catalog and correct costs with them. */ rte = planner_rt_fetch(baserel->relid, root); get_server_costs(rte->relid, &connection_cost, &transfer_cost); - path->startup_cost += connection_cost; - path->total_cost += connection_cost; - path->total_cost += transfer_cost * - path->parent->width * path->parent->rows; + path->path.startup_cost += connection_cost; + path->path.total_cost += connection_cost; + path->path.total_cost += transfer_cost * + path->path.parent->width * path->path.parent->rows; } diff --git a/src/backend/foreign/csv_fdw.c b/src/backend/foreign/csv_fdw.c index b3a0b89844..3aba44bc33 100644 --- a/src/backend/foreign/csv_fdw.c +++ b/src/backend/foreign/csv_fdw.c @@ -18,6 +18,8 @@ #include "foreign/foreign.h" #include "funcapi.h" #include "miscadmin.h" +#include "optimizer/cost.h" +#include "parser/parsetree.h" #include "storage/fd.h" #include "utils/builtins.h" @@ -85,7 +87,7 @@ static void Open(ForeignScanState *scanstate); static void Iterate(ForeignScanState *scanstate); static void Close(ForeignScanState *scanstate); static void ReOpen(ForeignScanState *scanstate); -static void GetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel); +static void GetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel); static HeapTuple get_next_tuple(ForeignScanState *scanstate); @@ -233,7 +235,6 @@ FreeFSConnection(FSConnection *conn) static void Open(ForeignScanState *scanstate) { - elog(DEBUG1, "%s called", __func__); Relation rel = scanstate->ss.ss_currentRelation; CsvFdwReply *reply; int i; @@ -241,6 +242,8 @@ Open(ForeignScanState *scanstate) const char **keywords; const char **values; + elog(DEBUG1, "%s called", __func__); + /* create CsvFdwReply and set default settings */ reply = (CsvFdwReply *) palloc(sizeof(*reply)); reply->filename = NULL; @@ -304,6 +307,8 @@ Iterate(ForeignScanState *scanstate) { struct stat st; + elog(DEBUG1, "opening CSV file \"%s\"", reply->filename); + reply->fp = AllocateFile(reply->filename, PG_BINARY_R); if (reply->fp == NULL) ereport(ERROR, @@ -347,7 +352,8 @@ Close(ForeignScanState *scanstate) scanstate->reply = NULL; - FreeFile(reply->fp); + if (reply->fp) + FreeFile(reply->fp); pfree(reply); } @@ -372,9 +378,75 @@ ReOpen(ForeignScanState *scanstate) * Estimate costs of scanning on a foreign table. */ static void -GetStatistics(Path *path, PlannerInfo *root, RelOptInfo *baserel) +GetStatistics(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel) { + RangeTblEntry *rte; + ForeignTable *table; + int n; + const char **keywords; + const char **values; + int i; + char *filename = NULL; + struct stat stat; + BlockNumber pages; + double run_cost = 0; + double startup_cost = 0; + double cpu_per_tuple; + elog(DEBUG1, "%s called", __func__); + + /* get filename from generic option of the foreign table */ + rte = planner_rt_fetch(baserel->relid, root); + table = GetForeignTable(rte->relid); + keywords = palloc(sizeof(char *) * list_length(table->options)); + values = palloc(sizeof(char *) * list_length(table->options)); + n = flatten_generic_options(table->options, keywords, values); + + for (i = 0; i < n; i++) + { + if (strcmp(keywords[i], "filename") == 0) + { + filename = pstrdup(values[i]); + break; + } + } + + pfree(keywords); + pfree(values); + + /* at least filename must be specified */ + if (filename == NULL) + { + ereport(ERROR, + (errcode(ERRCODE_FDW_UNABLE_TO_CREATE_REPLY), + errmsg("generic option \"filename\" is required"))); + } + + /* get size of the CSV file */ + if (lstat(filename, &stat) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", filename))); + } + pfree(filename); + + /* + * The way to estimate costs is almost same as cost_seqscan(), but there + * are some differences: + * - DISK costs are estimated based on CSV file size. + * - CPU costs are 2x of seq scan, incremented are overhead to parse CSV + */ + pages = stat.st_size / BLCKSZ; + run_cost += seq_page_cost * pages; + + startup_cost += baserel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + cpu_per_tuple *= 2; + run_cost += cpu_per_tuple * baserel->tuples; + + path->path.startup_cost = startup_cost; + path->path.total_cost = startup_cost + run_cost; } /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 3cf7e9c281..8c8711ea07 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1457,6 +1457,14 @@ _outTidPath(StringInfo str, TidPath *node) WRITE_NODE_FIELD(tidquals); } +static void +_outForeignPath(StringInfo str, ForeignPath *node) +{ + WRITE_NODE_TYPE("FOREIGNPATH"); + + _outPathInfo(str, (Path *) node); +} + static void _outAppendPath(StringInfo str, AppendPath *node) { @@ -2753,6 +2761,9 @@ _outNode(StringInfo str, void *obj) case T_TidPath: _outTidPath(str, obj); break; + case T_ForeignPath: + _outForeignPath(str, obj); + break; case T_AppendPath: _outAppendPath(str, obj); break; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index cae54a3dc2..84b70231f3 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1391,6 +1391,9 @@ print_path(PlannerInfo *root, Path *path, int indent) case T_TidPath: ptype = "TidScan"; break; + case T_ForeignPath: + ptype = "ForeignScan"; + break; case T_AppendPath: ptype = "Append"; break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 59282aa89c..93e7a6c518 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1026,7 +1026,7 @@ cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel) * Determines and returns the cost of scanning a foreign table sequentially. */ void -cost_foreignscan(Path *path, PlannerInfo *root, +cost_foreignscan(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel) { RangeTblEntry *rte; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 0a9c097788..d2325965ed 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1320,11 +1320,11 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel) Path * create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel) { - Path *pathnode = makeNode(Path); + ForeignPath *pathnode = makeNode(ForeignPath); - pathnode->pathtype = T_ForeignScan; - pathnode->parent = rel; - pathnode->pathkeys = NIL; /* result is always unordered */ + pathnode->path.pathtype = T_ForeignScan; + pathnode->path.parent = rel; + pathnode->path.pathkeys = NIL; /* result is always unordered */ cost_foreignscan(pathnode, root, rel); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 329dd321f9..0f5eea7be6 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1400,7 +1400,8 @@ struct FdwRoutine * Estimate costs of a foreign path. * FDW should update startup_cost and total_cost in the Path. */ - void (*GetStatistics)(Path *path, PlannerInfo *root, RelOptInfo *baserel); + void (*GetStatistics)(ForeignPath *path, PlannerInfo *root, + RelOptInfo *baserel); /* * Deparse query request and open a cursor for the foreign scan. diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index b3ef014bd7..9067ef7ed5 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -216,6 +216,7 @@ typedef enum NodeTag T_MergePath, T_HashPath, T_TidPath, + T_ForeignPath, T_AppendPath, T_ResultPath, T_MaterialPath, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 91f4c5c1c4..2728895b5a 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -737,6 +737,14 @@ typedef struct TidPath List *tidquals; /* qual(s) involving CTID = something */ } TidPath; +/* + * ForeignPath represents a scan on a foreign table + */ +typedef struct ForeignPath +{ + Path path; +} ForeignPath; + /* * AppendPath represents an Append plan, ie, successive execution of * several member plans. diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 8698c5332c..3c155cf97b 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -81,7 +81,7 @@ extern void cost_functionscan(Path *path, PlannerInfo *root, extern void cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel); extern void cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel); -extern void cost_foreignscan(Path *path, PlannerInfo *root, RelOptInfo *baserel); +extern void cost_foreignscan(ForeignPath *path, PlannerInfo *root, RelOptInfo *baserel); extern void cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm); extern void cost_sort(Path *path, PlannerInfo *root, List *pathkeys, Cost input_cost, double tuples, int width,