From: Shigeru Hanada Date: Mon, 7 Feb 2011 06:02:22 +0000 (+0900) Subject: Add file_fdw, foreign-data wrapper for server-side COPY FROM files. X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=0fd511b672bdbcbbcdcfce1d5970abc19473f4ef;p=users%2Fhanada%2Fpostgres.git Add file_fdw, foreign-data wrapper for server-side COPY FROM files. File_fdw uses routines which are exported from COPY FROM, and accept same formats as COPY FROM. --- diff --git a/contrib/Makefile b/contrib/Makefile index 2b314501f7..696776795e 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -18,6 +18,7 @@ SUBDIRS = \ dict_xsyn \ dummy_seclabel \ earthdistance \ + file_fdw \ fuzzystrmatch \ hstore \ intagg \ diff --git a/contrib/README b/contrib/README index 3c4e324271..5d5bdcec40 100644 --- a/contrib/README +++ b/contrib/README @@ -73,6 +73,9 @@ earthdistance - Functions for computing distances between two points on Earth by Bruno Wolff III and Hal Snyder +file_fdw + Foreign-data wrapper for server-side CSV/TEXT files + fuzzystrmatch - Levenshtein, metaphone, and soundex fuzzy string matching by Joe Conway and Joel Burton diff --git a/contrib/file_fdw/.gitignore b/contrib/file_fdw/.gitignore new file mode 100644 index 0000000000..36dc02fc75 --- /dev/null +++ b/contrib/file_fdw/.gitignore @@ -0,0 +1,3 @@ +/file_fdw.sql +# Generated subdirectories +/results/ diff --git a/contrib/file_fdw/Makefile b/contrib/file_fdw/Makefile new file mode 100644 index 0000000000..1d85645e39 --- /dev/null +++ b/contrib/file_fdw/Makefile @@ -0,0 +1,20 @@ +# contrib/file_fdw/Makefile + +MODULE_big = file_fdw +OBJS = file_fdw.o + +DATA_built = file_fdw.sql +DATA = uninstall_file_fdw.sql +REGRESS = file_fdw + + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/file_fdw +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/file_fdw/data/agg.bad b/contrib/file_fdw/data/agg.bad new file mode 100644 index 0000000000..3415b15007 --- /dev/null +++ b/contrib/file_fdw/data/agg.bad @@ -0,0 +1,4 @@ +56;@7.8@ +100;@99.097@ +0;@aaa@ +42;@324.78@ diff --git a/contrib/file_fdw/data/agg.csv b/contrib/file_fdw/data/agg.csv new file mode 100644 index 0000000000..3ee6bf24db --- /dev/null +++ b/contrib/file_fdw/data/agg.csv @@ -0,0 +1,4 @@ +56;@7.8@ +100;@99.097@ +0;@0.09561@ +42;@324.78@ diff --git a/contrib/file_fdw/data/agg.data b/contrib/file_fdw/data/agg.data new file mode 100644 index 0000000000..d92c7df4d0 --- /dev/null +++ b/contrib/file_fdw/data/agg.data @@ -0,0 +1,4 @@ +56 7.8 +100 99.097 +0 0.09561 +42 324.78 diff --git a/contrib/file_fdw/expected/.gitignore b/contrib/file_fdw/expected/.gitignore new file mode 100644 index 0000000000..a464ad144f --- /dev/null +++ b/contrib/file_fdw/expected/.gitignore @@ -0,0 +1 @@ +/file_fdw.out diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c new file mode 100644 index 0000000000..3f2f6928fa --- /dev/null +++ b/contrib/file_fdw/file_fdw.c @@ -0,0 +1,544 @@ +/*------------------------------------------------------------------------- + * + * file_fdw.c + * foreign-datga wrapper for server-side flat files. + * + * Copyright (c) 2011, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/file_fdw/file_fdw.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/reloptions.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_type.h" +#include "commands/copy.h" +#include "commands/defrem.h" +#include "foreign/foreign.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/cost.h" +#include "parser/parsetree.h" +#include "storage/fd.h" +#include "utils/builtins.h" + +PG_MODULE_MAGIC; + +/* + * Describes the valid options for objects which uses this wrapper. + */ +struct FileFdwOption +{ + const char *optname; + Oid optcontext; /* Oid of catalog in which option may appear */ +}; + +/* + * Valid options for file_fdw. + * These options are based on the options for COPY FROM command. + * + * Note: If you are adding new option for user mapping, you need to modify + * fileBeginScan(). See comments of the function for detail. + */ +static struct FileFdwOption valid_options[] = { + /* File options */ + { "filename", ForeignTableRelationId }, + + /* Format options */ + /* oids option is not supported */ + { "format", ForeignTableRelationId }, + { "header", ForeignTableRelationId }, + { "delimiter", ForeignTableRelationId }, + { "quote", ForeignTableRelationId }, + { "escape", ForeignTableRelationId }, + { "null", ForeignTableRelationId }, + + /* FIXME: implement force_not_null option */ + + /* Centinel */ + { NULL, InvalidOid } +}; + +/* + * FDW-specific information for FdwExecutionState. + */ +typedef struct FileFdwPrivate { + char *filename; + Relation rel; /* scan target relation */ + CopyState cstate; /* state of reaind file */ + List *options; /* merged generic options, excluding filename */ +} FileFdwPrivate; + +/* + * SQL functions + */ +extern Datum file_fdw_validator(PG_FUNCTION_ARGS); +extern Datum file_fdw_handler(PG_FUNCTION_ARGS); + +/* + * FDW routines + */ +static FdwPlan *filePlanRelScan(Oid foreigntableid, PlannerInfo *root, + RelOptInfo *baserel); +static FdwExecutionState *fileBeginScan(FdwPlan *fplan, ParamListInfo params); +static void fileIterate(FdwExecutionState *festate, TupleTableSlot *slot); +static void fileEndScan(FdwExecutionState *festate); +static void fileReScan(FdwExecutionState *festate); + +/* + * Helper functions + */ +static char *generate_explain_info(const char *filename, unsigned long size); +static unsigned long estimate_costs(const char *filename, RelOptInfo *baserel, + double *startup_cost, double *total_cost); + +/* + * Check if the provided option is one of valid options. + * context is the Oid of the catalog the option came from, or 0 if we + * don't care. + */ +static bool +is_valid_option(const char *option, Oid context) +{ + struct FileFdwOption *opt; + + for (opt = valid_options; opt->optname; opt++) + if (context == opt->optcontext && strcmp(opt->optname, option) == 0) + return true; + return false; +} + +/* + * Validate the generic option given to FOREIGN DATA WRAPPER, SERVER, USER + * MAPPING or FOREIGN TABLE which use file_fdw. + * Raise an ERROR if the option or its value is considered + * invalid. + */ +PG_FUNCTION_INFO_V1(file_fdw_validator); +Datum +file_fdw_validator(PG_FUNCTION_ARGS) +{ + List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); + Oid catalog = PG_GETARG_OID(1); + + ListCell *cell; + + char *format = NULL; + char *delimiter = NULL; + char *quote = NULL; + char *escape = NULL; + char *null = NULL; + bool header; + + /* Only superuser can change generic options of the foreign table */ + if (catalog == ForeignTableRelationId && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser can change foreign table options"))); + + /* Vaidate each options */ + foreach(cell, options_list) + { + DefElem *def = lfirst(cell); + + if (!is_valid_option(def->defname, catalog)) + { + struct FileFdwOption *opt; + StringInfoData buf; + + /* + * Unknown option specified, complain about it. Provide a hint + * with list of valid options for the object. + */ + initStringInfo(&buf); + for (opt = valid_options; opt->optname; opt++) + if (catalog == opt->optcontext) + appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "", + opt->optname); + + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("invalid option \"%s\"", def->defname), + errhint("Valid options in this context are: %s", buf.data))); + + PG_RETURN_BOOL(false); + } + + if (strcmp(def->defname, "format") == 0) + { + if (pg_strcasecmp(strVal(def->arg), "csv") != 0 && + pg_strcasecmp(strVal(def->arg), "text") != 0) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("format must be csv or text"))); + format = strVal(def->arg); + } + else if (strcmp(def->defname, "header") == 0) + { + header = defGetBoolean(def); + } + else if (strcmp(def->defname, "delimiter") == 0) + { + if (strlen(strVal(def->arg)) != 1) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("delimiter must be a single one-byte1 character"))); + if (strchr(strVal(def->arg), '\r') != NULL || + strchr(strVal(def->arg), '\n') != NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("delimiter cannot be newline or carriage return"))); + delimiter = strVal(def->arg); + } + else if (strcmp(def->defname, "quote") == 0) + { + if (strlen(strVal(def->arg)) != 1) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("quote must be 1 byte"))); + quote = strVal(def->arg); + } + else if (strcmp(def->defname, "escape") == 0) + { + if (strlen(strVal(def->arg)) != 1) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("escape must be 1 byte"))); + escape = strVal(def->arg); + } + else if (strcmp(def->defname, "null") == 0) + { + if (strchr(strVal(def->arg), '\r') != NULL || + strchr(strVal(def->arg), '\n') != NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("null representation cannot use newline or carriage return"))); + null = strVal(def->arg); + } + } + + /* Check options which depend on the file format. */ + if (format != NULL && pg_strcasecmp(format, "text") == 0) + { + if (delimiter && strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789", + delimiter[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("delimiter cannot be \"%s\"", delimiter))); + + if (escape != NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("escape available only in CSV mode"))); + } + else if (format != NULL && pg_strcasecmp(format, "csv") == 0) + { + if (null != NULL && quote != NULL && strchr(null, quote[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("quote must not appear in the NULL specification"))); + } + + if (delimiter != NULL && quote != NULL) + if (strcmp(delimiter, quote) == 0) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("delimiter and quote must be different"))); + + if (null != NULL && delimiter != NULL) + if (strchr(null, delimiter[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("delimiter must not appear in the NULL specification"))); + + PG_RETURN_BOOL(true); +} + +/* + * return foreign-data wrapper handler object to execute foreign-data wrapper + * routines. + */ +PG_FUNCTION_INFO_V1(file_fdw_handler); +Datum +file_fdw_handler(PG_FUNCTION_ARGS) +{ + static FdwRoutine file_fdw_routine = + { + filePlanRelScan, + fileBeginScan, + fileIterate, + fileReScan, + fileEndScan, + }; + + PG_RETURN_POINTER(&file_fdw_routine); +} + +/* + * Create a FdwPlan for a scan on the foreign table. + * + * FdwPlan must be able to be copied by copyObject(), so private area is a list + * of copy-able elements. The list consists of elements below: + * + * (1) oid of the target relation, Oid Const + * (2) name of the file, String Value + * (3) list of fdw options excluding filename, List of DefElem + * + * This format must be used commonly in other planning functions, such as + * PlanQuery and PlanNative. + */ +static FdwPlan * +filePlanRelScan(Oid foreigntableid, PlannerInfo *root, RelOptInfo *rel) +{ + Const *relid; + Value *filename = NULL; + ulong size; + FdwPlan *fplan; + ForeignTable *table; + ForeignServer *server; + ForeignDataWrapper *wrapper; + List *options; + ListCell *lc, *prev; + + /* + * Create new relid instance because we use 'private' list as a pointer + * list. + */ + relid = makeConst(OIDOID, + -1, + sizeof(Oid), + ObjectIdGetDatum(foreigntableid), + false, true); + + /* Extract options from FDW objects */ + table = GetForeignTable(foreigntableid); + server = GetForeignServer(table->serverid); + wrapper = GetForeignDataWrapper(server->fdwid); + options = NIL; + options = list_concat(options, wrapper->options); + options = list_concat(options, server->options); + options = list_concat(options, table->options); + + /* + * Split filename option off from the list because filename should be + * passed as another parameter to BeginCopyFrom(). + */ + prev = NULL; + foreach (lc, options) + { + DefElem *def = lfirst(lc); + if (strcmp(def->defname, "filename") == 0) + { + filename = makeString(strVal(def->arg)); + options = list_delete_cell(options, lc, prev); + break; + } + prev = lc; + } + if (filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_UNABLE_TO_CREATE_REPLY), + errmsg("filename is required for file_fdw scan"))); + + /* Construct FdwPlan and store relid and options in private area */ + fplan = makeNode(FdwPlan); + size = estimate_costs(strVal(filename), rel, + &fplan->startup_cost, &fplan->total_cost); + fplan->explainInfo = generate_explain_info(strVal(filename), size); + fplan->fdw_private = NIL; + fplan->fdw_private = lappend(fplan->fdw_private, relid); + fplan->fdw_private = lappend(fplan->fdw_private, filename); + fplan->fdw_private = lappend(fplan->fdw_private, options); + + return fplan; +} + +/* + * BeginScan() + * - initiate access to the file with creating CopyState + * + * Parameters for parsing file such as filename and format are passed via + * generic options of FDW-related objects; foreign-data wrapper, server and + * foreign table. User mapping is not used to get options because there is no + * valid option in context of user mapping. + */ +static FdwExecutionState * +fileBeginScan(FdwPlan *fplan, ParamListInfo params) +{ + Const *relid_const; + Oid relid; + Value *filename; + List *options; + Relation rel; + CopyState cstate; + FileFdwPrivate *fdw_private; + FdwExecutionState *festate; + + elog(DEBUG3, "%s called", __FUNCTION__); + + /* Get oid of the relation and option list from private area of FdwPlan. */ + relid_const = list_nth(fplan->fdw_private, 0); + filename = list_nth(fplan->fdw_private, 1); + options = list_nth(fplan->fdw_private, 2); + + relid = DatumGetObjectId(relid_const->constvalue); + + /* + * Create CopyState from FDW options. We always acquire all columns. + * We open the relation with no lock because it's assumed that appropriate + * lock has been acquired already. The rel should be closed in + * fileEndScan(). + */ + rel = heap_open(relid, NoLock); + cstate = BeginCopyFrom(rel, strVal(filename), NIL, options); + + /* + * Pack file information into private and pass it to subsequent functions. + * We also store information enough to call BeginCopyFrom() again. + */ + festate = palloc0(sizeof(FdwExecutionState)); + fdw_private = palloc0(sizeof(FileFdwPrivate)); + fdw_private->filename = strVal(filename); + fdw_private->rel = rel; + fdw_private->cstate = cstate; + fdw_private->options = options; + festate->fdw_private = (void *) fdw_private; + + return festate; +} + +/* + * Iterate() + * - create HeapTuple from the record in the file. + */ +static void +fileIterate(FdwExecutionState *festate, TupleTableSlot *slot) +{ + FileFdwPrivate *fdw_private = (FileFdwPrivate *) festate->fdw_private; + bool found; + ErrorContextCallback errcontext; + + elog(DEBUG3, "%s called for \"%s\"", __FUNCTION__, fdw_private->filename); + + /* Set up callback to identify error line number. */ + errcontext.callback = CopyFromErrorCallback; + errcontext.arg = (void *) fdw_private->cstate; + errcontext.previous = error_context_stack; + error_context_stack = &errcontext; + + /* + * If next tuple has been found, store it into the slot as materialized + * tuple. Otherwise, clear the slot to tell executor that we have reached + * EOF. + */ + ExecClearTuple(slot); + found = NextCopyFrom(fdw_private->cstate, slot->tts_values, slot->tts_isnull, + NULL); + if (found) + ExecStoreVirtualTuple(slot); + + /* + * Cleanup error callback. We must uninstall callback before leaving + * Iterate() because other scan in the same plan tree might generate error. + */ + error_context_stack = errcontext.previous; +} + +/* + * Finish scanning foreign table and dispose objects used for this scan. + */ +static void +fileEndScan(FdwExecutionState *festate) +{ + FileFdwPrivate *fdw_private; + + elog(DEBUG3, "%s called", __FUNCTION__); + + fdw_private = (FileFdwPrivate *) festate->fdw_private; + EndCopyFrom(fdw_private->cstate); + + heap_close(fdw_private->rel, NoLock); + pfree(fdw_private); + pfree(festate); +} + +/* + * Execute query with new parameter. + */ +static void +fileReScan(FdwExecutionState *festate) +{ + FileFdwPrivate *fdw_private = (FileFdwPrivate *) festate->fdw_private; + + elog(DEBUG3, "%s called for \"%s\"", __FUNCTION__, fdw_private->filename); + + EndCopyFrom(fdw_private->cstate); + fdw_private->cstate = BeginCopyFrom(fdw_private->rel, + fdw_private->filename, + NIL, + fdw_private->options); +} + +/* + * Generate explain info string from information about the file. + */ +static char * +generate_explain_info(const char *filename, unsigned long size) +{ + StringInfoData explainInfo; + + initStringInfo(&explainInfo); + + /* + * Construct explain information. + */ + appendStringInfo(&explainInfo, "file=\"%s\", size=%lu", filename, size); + + return explainInfo.data; +} + +/* + * Estimate costs of scanning on a foreign table, and return size of the file. + */ +static unsigned long +estimate_costs(const char *filename, RelOptInfo *baserel, + double *startup_cost, double *total_cost) +{ + struct stat stat; + BlockNumber pages; + double run_cost = 0; + double cpu_per_tuple; + + elog(DEBUG3, "%s called", __FUNCTION__); + + /* get size of the file */ + if (lstat(filename, &stat) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", filename))); + } + + /* + * The way to estimate costs is almost same as cost_seqscan(), but there + * are some differences: + * - DISK costs are estimated from file size. + * - CPU costs are 10x of seq scan, for overhead of parsing records. + */ + pages = stat.st_size / BLCKSZ + (stat.st_size % BLCKSZ > 0 ? 1 : 0); + run_cost += seq_page_cost * pages; + + *startup_cost += baserel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + run_cost += cpu_per_tuple * 10 * baserel->tuples; + *total_cost = *startup_cost + run_cost; + + return stat.st_size; +} + diff --git a/contrib/file_fdw/file_fdw.sql.in b/contrib/file_fdw/file_fdw.sql.in new file mode 100644 index 0000000000..8aadd2496e --- /dev/null +++ b/contrib/file_fdw/file_fdw.sql.in @@ -0,0 +1,19 @@ +/* contrib/file_fdw/file_fdw.sql.in */ + +-- Adjust this setting to control where the objects get created. +SET search_path = public; + +-- create wrapper with validator and handler +CREATE OR REPLACE FUNCTION file_fdw_validator (text[], oid) +RETURNS bool +AS 'MODULE_PATHNAME','file_fdw_validator' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION file_fdw_handler () +RETURNS fdw_handler +AS 'MODULE_PATHNAME','file_fdw_handler' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER file_fdw +VALIDATOR file_fdw_validator HANDLER file_fdw_handler; + diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source new file mode 100644 index 0000000000..cf2c5b782c --- /dev/null +++ b/contrib/file_fdw/input/file_fdw.source @@ -0,0 +1,110 @@ +-- +-- Test foreign-data wrapper file_fdw. +-- + +-- Clean up in case a prior regression run failed + +-- Suppress NOTICE messages when roles don't exist +SET client_min_messages TO 'error'; + +DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user; + +RESET client_min_messages; + +CREATE ROLE file_fdw_superuser LOGIN SUPERUSER; -- is a superuser +CREATE ROLE file_fdw_user LOGIN; -- has priv and user mapping +CREATE ROLE no_priv_user LOGIN; -- has priv but no user mapping + +-- Install file_fdw +SET client_min_messages = warning; +\set ECHO none +\i file_fdw.sql +\set ECHO all +RESET client_min_messages; + +-- file_fdw_superuser owns fdw-related objects +SET ROLE file_fdw_superuser; +CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw; + +-- privilege tests +SET ROLE file_fdw_user; +CREATE FOREIGN DATA WRAPPER file_fdw2 VALIDATOR file_fdw_validator HANDLER file_fdw_handler; -- ERROR +CREATE SERVER file_server2 FOREIGN DATA WRAPPER file_fdw; -- ERROR +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; -- ERROR + +SET ROLE file_fdw_superuser; +GRANT USAGE ON FOREIGN SERVER file_server TO file_fdw_user; + +SET ROLE file_fdw_user; +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; + +-- create user mappings and grant privilege to test users +SET ROLE file_fdw_superuser; +CREATE USER MAPPING FOR file_fdw_superuser SERVER file_server; +CREATE USER MAPPING FOR no_priv_user SERVER file_server; + +-- validator tests +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape '-'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '-', null '=-='); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', null '=-='); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', quote '-'); -- ERROR +CREATE FOREIGN TABLE agg_text ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'text', filename '@abs_srcdir@/data/agg.data', delimiter ' ', null '\N'); +GRANT SELECT ON agg_text TO file_fdw_user; +CREATE FOREIGN TABLE agg_csv ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_bad ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null ''); + +-- basic query tests +SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; +SELECT * FROM agg_csv ORDER BY a; +SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + +-- error context report tests +SELECT * FROM agg_bad; -- ERROR + +-- misc query tests +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv; +\t off +PREPARE st(int) AS SELECT * FROM agg_csv WHERE a = $1; +EXECUTE st(100); +EXECUTE st(100); +DEALLOCATE st; + +-- privilege tests +SET ROLE file_fdw_superuser; +SELECT * FROM agg_text ORDER BY a; +SET ROLE file_fdw_user; +SELECT * FROM agg_text ORDER BY a; +SET ROLE no_priv_user; +SELECT * FROM agg_text ORDER BY a; -- ERROR +SET ROLE file_fdw_user; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_text; +\t off + +-- privilege tests for object +SET ROLE file_fdw_superuser; +ALTER FOREIGN TABLE agg_text OWNER TO file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +SET ROLE file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +SET ROLE file_fdw_superuser; + +-- cleanup +RESET ROLE; +DROP FOREIGN DATA WRAPPER file_fdw CASCADE; +DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source new file mode 100644 index 0000000000..f8ce4ca265 --- /dev/null +++ b/contrib/file_fdw/output/file_fdw.source @@ -0,0 +1,166 @@ +-- +-- Test foreign-data wrapper file_fdw. +-- +-- Clean up in case a prior regression run failed +-- Suppress NOTICE messages when roles don't exist +SET client_min_messages TO 'error'; +DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user; +RESET client_min_messages; +CREATE ROLE file_fdw_superuser LOGIN SUPERUSER; -- is a superuser +CREATE ROLE file_fdw_user LOGIN; -- has priv and user mapping +CREATE ROLE no_priv_user LOGIN; -- has priv but no user mapping +-- Install file_fdw +SET client_min_messages = warning; +\set ECHO none +RESET client_min_messages; +-- file_fdw_superuser owns fdw-related objects +SET ROLE file_fdw_superuser; +CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw; +-- privilege tests +SET ROLE file_fdw_user; +CREATE FOREIGN DATA WRAPPER file_fdw2 VALIDATOR file_fdw_validator HANDLER file_fdw_handler; -- ERROR +ERROR: permission denied to create foreign-data wrapper "file_fdw2" +HINT: Must be superuser to create a foreign-data wrapper. +CREATE SERVER file_server2 FOREIGN DATA WRAPPER file_fdw; -- ERROR +ERROR: permission denied for foreign-data wrapper file_fdw +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; -- ERROR +ERROR: permission denied for foreign server file_server +SET ROLE file_fdw_superuser; +GRANT USAGE ON FOREIGN SERVER file_server TO file_fdw_user; +SET ROLE file_fdw_user; +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; +-- create user mappings and grant privilege to test users +SET ROLE file_fdw_superuser; +CREATE USER MAPPING FOR file_fdw_superuser SERVER file_server; +CREATE USER MAPPING FOR no_priv_user SERVER file_server; +-- validator tests +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR +ERROR: format must be csv or text +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR +ERROR: delimiter cannot be "a" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape '-'); -- ERROR +ERROR: escape available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '-', null '=-='); -- ERROR +ERROR: quote must not appear in the NULL specification +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', null '=-='); -- ERROR +ERROR: delimiter must not appear in the NULL specification +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', quote '-'); -- ERROR +ERROR: delimiter and quote must be different +CREATE FOREIGN TABLE agg_text ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'text', filename '@abs_srcdir@/data/agg.data', delimiter ' ', null '\N'); +GRANT SELECT ON agg_text TO file_fdw_user; +CREATE FOREIGN TABLE agg_csv ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_bad ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null ''); +-- basic query tests +SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; + a | b +-----+-------- + 42 | 324.78 + 100 | 99.097 +(2 rows) + +SELECT * FROM agg_csv ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 100 | 99.097 +(3 rows) + +SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + a | b | a | b +-----+---------+-----+--------- + 0 | 0.09561 | 0 | 0.09561 + 42 | 324.78 | 42 | 324.78 + 100 | 99.097 | 100 | 99.097 +(3 rows) + +-- error context report tests +SELECT * FROM agg_bad; -- ERROR +ERROR: invalid input syntax for type real: "aaa" +CONTEXT: COPY agg_bad, line 3, column b: "aaa" +-- misc query tests +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv; + Foreign Scan on public.agg_csv + Output: a, b + FDW-Info: file="@abs_srcdir@/data/agg.csv", size=46 + +\t off +PREPARE st(int) AS SELECT * FROM agg_csv WHERE a = $1; +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +DEALLOCATE st; +-- privilege tests +SET ROLE file_fdw_superuser; +SELECT * FROM agg_text ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 56 | 7.8 + 100 | 99.097 +(4 rows) + +SET ROLE file_fdw_user; +SELECT * FROM agg_text ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 56 | 7.8 + 100 | 99.097 +(4 rows) + +SET ROLE no_priv_user; +SELECT * FROM agg_text ORDER BY a; -- ERROR +ERROR: permission denied for relation agg_text +SET ROLE file_fdw_user; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_text; + Foreign Scan on public.agg_text + Output: a, b + FDW-Info: file="@abs_srcdir@/data/agg.data", size=38 + +\t off +-- privilege tests for object +SET ROLE file_fdw_superuser; +ALTER FOREIGN TABLE agg_text OWNER TO file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +SET ROLE file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +ERROR: only superuser can change foreign table options +SET ROLE file_fdw_superuser; +-- cleanup +RESET ROLE; +DROP FOREIGN DATA WRAPPER file_fdw CASCADE; +NOTICE: drop cascades to 7 other objects +DETAIL: drop cascades to server file_server +drop cascades to user mapping for file_fdw_user +drop cascades to user mapping for file_fdw_superuser +drop cascades to user mapping for no_priv_user +drop cascades to foreign table agg_text +drop cascades to foreign table agg_csv +drop cascades to foreign table agg_bad +DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/sql/.gitignore b/contrib/file_fdw/sql/.gitignore new file mode 100644 index 0000000000..ebf16fed94 --- /dev/null +++ b/contrib/file_fdw/sql/.gitignore @@ -0,0 +1 @@ +/file_fdw.sql diff --git a/contrib/file_fdw/uninstall_file_fdw.sql b/contrib/file_fdw/uninstall_file_fdw.sql new file mode 100644 index 0000000000..b0b58f0fb9 --- /dev/null +++ b/contrib/file_fdw/uninstall_file_fdw.sql @@ -0,0 +1,10 @@ +/* contrib/file_fdw/uninstall_file_fdw.sql */ + +-- Adjust this setting to control where the objects get created. +SET search_path = public; + +-- create wrapper with validator and handler +DROP FOREIGN DATA WRAPPER file_fdw; +DROP FUNCTION file_fdw_handler (); +DROP FUNCTION file_fdw_validator (text[], oid); + diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 75d08d5f69..c781c5608d 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -93,6 +93,7 @@ psql -d dbname -f SHAREDIR/contrib/module.sql &dict-xsyn; &dummy-seclabel; &earthdistance; + &file-fdw; &fuzzystrmatch; &hstore; &intagg; diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml new file mode 100644 index 0000000000..f1a293fba6 --- /dev/null +++ b/doc/src/sgml/file-fdw.sgml @@ -0,0 +1,123 @@ + + + + file_fdw + + + file_fdw + + + + The file_fdw module provides foreign-data wrapper + file_fdw which can be used to access + files on the server-side with format accepted by + COPY FROM. + + + + Functions + + + + + + file_fdw_validator(text[], oid) returns bool + + + + + file_fdw_validator is a validator function to + validate generic options of file_fdw and related objects. + + + The location of data file is specified in filename + generic option of the foreign table. + Options valid for COPY FROM can be specified in + generic options of the foreign table, other than + force_not_null and oids. + The force_not_null must be specified in generic + option of the column of the foreign table with boolean value. + The oids is not supported in file_fdw. + + + All of foreign-data wrapper, server or user mapping accepts no generic + option. + + + + + + + file_fdw_handler() returns fdw_handler + + + + + file_fdw_handler is a foreign-data wrapper + handler function which returns foreign-data wrapper handler for + COPY FROM files in type of fdw_handler. + Since fdw_hanlder is a pseudo type, file_fdw_handler can't be + called from a SQL statement. + + + This handler opens the data file pointed by the generic option + filename, and read records from the file along the + format specified with other generic options. The result tuples + are returned to backend one by one, so the memory usage would not depend + on the size of the file. + + + + + + + + + + Details of file_fdw + + + Table-level options + + Changing table-level options requires superuser privilege for security reason. + Non-superuser can't change any table-level option even if the user is the + owner of the table. This restriction is essentially for 'filename' option, but + currently applied to all table-level options. + + + + + Connection management + + The file_fdw doesn't use any connection, so ConnectServer and + FreeFSConnection is not implemented. + + + + + Cost estimation + + The file_fdw estimates the costs of scanning with the size of the file. + + + + + WHERE clause push-down + + The file_fdw can't push down any WHERE clause, so it always reads all of + the records for each scan. If the plan requires to rescan the table, + file_fdw resets the read-pointer and reads the whole of the file again. + + + + + EXPLAIN VERBOSE + + EXPLAIN VERBOSE on the foreign table which uses file_fdw shows the name and + the size of the data file. + + + + + + diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index b9d4ea59b1..42149dc234 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -105,6 +105,7 @@ +