Collect and return query substrings corresponding to each SQL statement
authorPavan Deolasee <[email protected]>
Thu, 3 Mar 2016 05:35:30 +0000 (11:05 +0530)
committerPavan Deolasee <[email protected]>
Thu, 3 Mar 2016 05:35:30 +0000 (11:05 +0530)
while parsing a multi-statement query separated by ';'

raw_parser() returns a list of parsetrees after parsing a multi-statement SQL
query, where each parsetree corresponds to one SQL statement. It does not have
any mechanism to return the source text of the SQL statement. In Postgres-XL,
we send out the query text as it is to remote datanodes and coordinators while
dealing with utility statements. Not having access to individual SQL statement
is a problem because we end up sending the same text again and again, leading
to various issues.

This patch adds some rudimentary mechanism to return a list of query strings
along with the list of parsetress.

src/backend/commands/tablecmds.c
src/backend/parser/gram.y
src/backend/parser/parse_type.c
src/backend/parser/parser.c
src/backend/parser/scan.l
src/backend/tcop/postgres.c
src/include/parser/gramparse.h
src/include/parser/parser.h
src/include/parser/scanner.h
src/include/tcop/tcopprot.h
src/pl/plpgsql/src/pl_gram.y

index 7ce0f6bad98026fa88ba284224a700f1cc33f56d..49db8ff58dea435142277c25dc30a62f685836e8 100644 (file)
@@ -8892,7 +8892,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
         * parse_analyze() or the rewriter, but instead we need to pass them
         * through parse_utilcmd.c to make them ready for execution.
         */
-       raw_parsetree_list = raw_parser(cmd);
+       raw_parsetree_list = raw_parser(cmd, NULL);
        querytree_list = NIL;
        foreach(list_item, raw_parsetree_list)
        {
index 3d12c3cc035ff5f454223ea367110d441942ca37..4593316fb93582f2347327b5ddbfdcab1018880c 100644 (file)
@@ -123,6 +123,14 @@ typedef struct ImportQual
        List       *table_names;
 } ImportQual;
 
+typedef struct StmtMulti
+{
+       List    *parsetrees;
+       List    *queries;
+       int             offset;
+       char    *lastQuery;
+} StmtMulti;
+
 /* ConstraintAttributeSpec yields an integer bitmask of these flags: */
 #define CAS_NOT_DEFERRABLE                     0x01
 #define CAS_DEFERRABLE                         0x02
@@ -231,6 +239,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
        InsertStmt                      *istmt;
        VariableSetStmt         *vsetstmt;
 /* PGXC_BEGIN */
+       struct StmtMulti                        *stmtmulti;
        DistributeBy            *distby;
        PGXCSubCluster          *subclus;
 /* PGXC_END */
@@ -354,7 +363,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <ival>   import_qualification_type
 %type <importqual> import_qualification
 
-%type <list>   stmtblock stmtmulti
+%type <stmtmulti> stmtmulti
+%type <list>   stmtblock
                                OptTableElementList TableElementList OptInherit definition
                                OptTypedTableElementList TypedTableElementList
                                reloptions opt_reloptions
@@ -758,7 +768,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  */
 stmtblock:     stmtmulti
                        {
-                               pg_yyget_extra(yyscanner)->parsetree = $1;
+                               pg_yyget_extra(yyscanner)->parsetree = $1 ? $1->parsetrees : NIL;
+                               pg_yyget_extra(yyscanner)->queries = $1 ? $1->queries : NIL;
                        }
                ;
 
@@ -766,16 +777,59 @@ stmtblock:        stmtmulti
 stmtmulti:     stmtmulti ';' stmt
                                {
                                        if ($3 != NULL)
-                                               $$ = lappend($1, $3);
+                                       {
+                                               char *query = scanner_get_query(@3, -1, yyscanner);
+                                               /*
+                                                * Because of the way multi-commands are parsed by the
+                                                * parser, when the earlier command was parsed and
+                                                * reduced to a 'stmtmulti', we did not have the
+                                                * end-of-the-query marker. But now that we have seen
+                                                * the ';' token, add '\0' at the corresponding offset
+                                                * to get a separated command.
+                                                */
+                                               if ($1->lastQuery)
+                                                       $1->lastQuery[@2 - $1->offset] = '\0';
+                                               $1->offset = @2;
+                                               $1->parsetrees = lappend($1->parsetrees, $3);
+                                               $1->queries = lappend($1->queries, makeString(query));
+                                               $1->lastQuery = query;
+                                               $$ = $1;
+                                       }
                                        else
                                                $$ = $1;
                                }
                        | stmt
                                {
                                        if ($1 != NULL)
-                                               $$ = list_make1($1);
+                                       {
+                                               StmtMulti *n = (StmtMulti *) palloc0(sizeof (StmtMulti));
+                                               char *query = scanner_get_query(@1, -1, yyscanner);
+                                               n->lastQuery = query;
+
+                                               /*
+                                                * Keep track of the offset where $1 started. We don't
+                                                * have the offset where it ends so we copy the entire
+                                                * query to the end. If later, we find a ';' followed
+                                                * by another command, we'll add the '\0' at the
+                                                * appropriate offset
+                                                *
+                                                * XXX May be there is a better way to get the matching  
+                                                * portion of the query string, but this does the trick
+                                                * for regression as well as the problem we are trying
+                                                * to solve with multi-command queries
+                                                */
+                                               n->offset = @1;
+
+                                               /*
+                                                * Collect both parsetree as well as the original query
+                                                * that resulted in the parsetree
+                                                */
+                                               n->parsetrees = list_make1($1);
+                                               n->queries = list_make1(makeString(query));
+                                               $$ = n;
+                                       }
                                        else
-                                               $$ = NIL;
+                                               $$ = NULL;
                                }
                ;
 
index 661663994eec09e94662f19772a9ceeae226e154..69023cea8d5b6b54c75b2d228c4885baa18dc32e 100644 (file)
@@ -739,7 +739,7 @@ typeStringToTypeName(const char *str)
        ptserrcontext.previous = error_context_stack;
        error_context_stack = &ptserrcontext;
 
-       raw_parsetree_list = raw_parser(buf.data);
+       raw_parsetree_list = raw_parser(buf.data, NULL);
 
        error_context_stack = ptserrcontext.previous;
 
index fdf5a6a1cafc6f5559a9ce2438cbc45a36ec0e60..6a01faa247aa5b0f4c9a7094ca2489330a1d4a2f 100644 (file)
@@ -32,7 +32,7 @@
  * Returns a list of raw (un-analyzed) parse trees.
  */
 List *
-raw_parser(const char *str)
+raw_parser(const char *str, List **queries)
 {
        core_yyscan_t yyscanner;
        base_yy_extra_type yyextra;
@@ -57,6 +57,9 @@ raw_parser(const char *str)
        if (yyresult)                           /* error */
                return NIL;
 
+       if (queries)
+               *queries = yyextra.queries;
+
        return yyextra.parsetree;
 }
 
index 82b20c6e5f695054acbebf7e6f40a169fd7c5597..0d36717f4e05877acc513ff40a9c972b1c7aa1d8 100644 (file)
@@ -1103,6 +1103,10 @@ scanner_init(const char *str,
        yyext->keywords = keywords;
        yyext->num_keywords = num_keywords;
 
+#ifdef XCP
+       yyext->query = pstrdup(str);
+#endif
+
        yyext->backslash_quote = backslash_quote;
        yyext->escape_string_warning = escape_string_warning;
        yyext->standard_conforming_strings = standard_conforming_strings;
@@ -1526,3 +1530,35 @@ core_yyfree(void *ptr, core_yyscan_t yyscanner)
        if (ptr)
                pfree(ptr);
 }
+
+/*
+ * Return a copy of a substring of the original query string, starting at
+ * 'start' offset and 'len' bytes long, Be mindful of the invalid arguments
+ * being passed by the caller
+ */
+char *
+scanner_get_query(int start, int len, core_yyscan_t yyscanner)
+{
+       char *query;
+
+       /*
+        * If the caller passes a wrong offset, just assume 0
+        */
+       if (start == -1)
+               start = 0; 
+       if (start > strlen(yyextra->query))
+               return NULL;
+       /*
+        * Similarly, if the passed-in length is more than remaining
+        * bytes in the string, just return whatever is available
+        */
+       if (len == -1)
+               len = strlen(yyextra->query) - start;
+       else if (len + start > strlen(yyextra->query))
+               return NULL;
+
+       query = palloc0(len + 1);
+       memcpy(query, yyextra->query + start, len);
+       query[len] = '\0';
+       return query; 
+}
index 25c82b4c17a148e4bc620f771dacadc9777d409f..52dd180877e00be44da3b2ca9be4447dc78e6bec 100644 (file)
@@ -754,7 +754,7 @@ ProcessClientWriteInterrupt(bool blocked)
  * commands are not processed any further than the raw parse stage.
  */
 List *
-pg_parse_query(const char *query_string)
+pg_parse_query_internal(const char *query_string, List **querysource_list)
 {
        List       *raw_parsetree_list;
 
@@ -763,7 +763,7 @@ pg_parse_query(const char *query_string)
        if (log_parser_stats)
                ResetUsage();
 
-       raw_parsetree_list = raw_parser(query_string);
+       raw_parsetree_list = raw_parser(query_string, querysource_list);
 
        if (log_parser_stats)
                ShowUsage("PARSER STATISTICS");
@@ -786,6 +786,18 @@ pg_parse_query(const char *query_string)
        return raw_parsetree_list;
 }
 
+List *
+pg_parse_query(const char *query_string)
+{
+       return pg_parse_query_internal(query_string, NULL);
+}
+
+List *
+pg_parse_query_get_source(const char *query_string, List **querysource_list)
+{
+       return pg_parse_query_internal(query_string, querysource_list);
+}
+
 /*
  * Given a raw parsetree (gram.y output), and optionally information about
  * types of parameter symbols ($n), perform parse analysis and rule rewriting.
@@ -1044,6 +1056,8 @@ exec_simple_query(const char *query_string)
        MemoryContext oldcontext;
        List       *parsetree_list;
        ListCell   *parsetree_item;
+       List       *querysource_list;
+       ListCell   *querysource_item;
        bool            save_log_statement_stats = log_statement_stats;
        bool            was_logged = false;
        bool            isTopLevel;
@@ -1092,7 +1106,7 @@ exec_simple_query(const char *query_string)
         * Do basic parsing of the query or queries (this should be safe even if
         * we are in aborted transaction state!)
         */
-       parsetree_list = pg_parse_query(query_string);
+       parsetree_list = pg_parse_query_get_source(query_string, &querysource_list);
 
 #ifdef XCP
        if (IS_PGXC_LOCAL_COORDINATOR && list_length(parsetree_list) > 1)
@@ -1159,9 +1173,10 @@ exec_simple_query(const char *query_string)
        /*
         * Run through the raw parsetree(s) and process each one.
         */
-       foreach(parsetree_item, parsetree_list)
+       forboth(parsetree_item, parsetree_list, querysource_item, querysource_list)
        {
                Node       *parsetree = (Node *) lfirst(parsetree_item);
+               char       *querysource = ((Value *) lfirst(querysource_item))->val.str;
                bool            snapshot_set = false;
                const char *commandTag;
                char            completionTag[COMPLETION_TAG_BUFSIZE];
@@ -1274,10 +1289,23 @@ exec_simple_query(const char *query_string)
                 * We don't have to copy anything into the portal, because everything
                 * we are passing here is in MessageContext, which will outlive the
                 * portal anyway.
+                *
+                * The query_string may contain multiple commands separated by ';' and
+                * we have a separate parsetree corresponding to each such command.
+                * Since we later may send down the query to the remote nodes
+                * (especially for utility queries), using the query_string is a
+                * problem because the same query will be sent out multiple times, one
+                * for each command processed. So we taught the parser to return the
+                * portion of the query_string along with the parsetree and use that
+                * while defining a portal below.
+                *
+                * XXX Since the portal expects to see a valid query_string, if the
+                * substring is available, use the original query_string. Not elegant,
+                * but far better than what we were doing earlier
                 */
                PortalDefineQuery(portal,
                                                  NULL,
-                                                 query_string,
+                                                 querysource ? querysource : query_string,
                                                  commandTag,
                                                  plantree_list,
                                                  NULL);
index 100fdfb213ef442c8676a2dc90267e129372f8b3..6c49917da3f64af4413b88c21211abbfccfcf676 100644 (file)
@@ -53,6 +53,7 @@ typedef struct base_yy_extra_type
         * State variables that belong to the grammar.
         */
        List       *parsetree;          /* final parse result is delivered here */
+       List       *queries;
 } base_yy_extra_type;
 
 /*
index 4208d545ab7e4f29150bf5643f192f5ed3755488..af77da59aac0c0699aa7a432ca6e1b3ce5b443b3 100644 (file)
@@ -32,7 +32,7 @@ extern PGDLLIMPORT bool standard_conforming_strings;
 
 
 /* Primary entry point for the raw parsing functions */
-extern List *raw_parser(const char *str);
+extern List *raw_parser(const char *str, List **queries);
 
 /* Utility functions exported by gram.y (perhaps these should be elsewhere) */
 extern List *SystemFuncName(char *name);
index 9e38c0d1971f5acb41c66fe2b7a2fa0b16aabd7b..b0d890e82ce9b76abb25301d353348efd00241b0 100644 (file)
@@ -72,6 +72,13 @@ typedef struct core_yy_extra_type
        char       *scanbuf;
        Size            scanbuflen;
 
+#ifdef XCP
+       /*
+        * Pointer to the original query string
+        */
+       char       *query;
+#endif
+
        /*
         * The keyword list to use.
         */
@@ -126,5 +133,6 @@ extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
                   core_yyscan_t yyscanner);
 extern int     scanner_errposition(int location, core_yyscan_t yyscanner);
 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn();
+extern char *scanner_get_query(int start, int len, core_yyscan_t yyscanner);
 
 #endif   /* SCANNER_H */
index 5abc26e864d5fbb5fac34d47388fb672beef0072..de4591f110e799d4274931e2ac84ef4bd78c7674 100644 (file)
@@ -47,6 +47,7 @@ typedef enum
 extern int     log_statement;
 
 extern List *pg_parse_query(const char *query_string);
+extern List *pg_parse_query_get_source(const char *query_string, List **queries);
 extern List *pg_analyze_and_rewrite(Node *parsetree, const char *query_string,
                                           Oid *paramTypes, int numParams);
 extern List *pg_analyze_and_rewrite_params(Node *parsetree,
index 00978909a34f3bddb489e12b56db2cba6b7a4533..80bb07041a15eb777eedc1f9b52718e195b83c23 100644 (file)
@@ -3470,7 +3470,7 @@ check_sql_expr(const char *stmt, int location, int leaderlen)
        error_context_stack = &syntax_errcontext;
 
        oldCxt = MemoryContextSwitchTo(compile_tmp_cxt);
-       (void) raw_parser(stmt);
+       (void) raw_parser(stmt, NULL);
        MemoryContextSwitchTo(oldCxt);
 
        /* Restore former ereport callback */