Add a new GUC parameter backslash_quote, which determines whether the SQL
authorTom Lane <[email protected]>
Sun, 21 May 2006 20:11:25 +0000 (20:11 +0000)
committerTom Lane <[email protected]>
Sun, 21 May 2006 20:11:25 +0000 (20:11 +0000)
parser will allow "\'" to be used to represent a literal quote mark.  The
"\'" representation has been deprecated for some time in favor of the
SQL-standard representation "''" (two single quote marks), but it has been
used often enough that just disallowing it immediately won't do.  Hence
backslash_quote allows the settings "on", "off", and "safe_encoding",
the last meaning to allow "\'" only if client_encoding is a valid server
encoding.  That is now the default, and the reason is that in encodings
such as SJIS that allow 0x5c (ASCII backslash) to be the last byte of a
multibyte character, accepting "\'" allows SQL-injection attacks as per
CVE-2006-2314 (further details will be published after release).  The
"on" setting is available for backward compatibility, but it must not be
used with clients that are exposed to untrusted input.

Thanks to Akio Ishida and Yasuo Ohgaki for identifying this security issue.

doc/src/sgml/runtime.sgml
src/backend/parser/scan.l
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/bin/psql/tab-complete.c
src/include/parser/gramparse.h

index 509bcfa62417053b6c77ce55cefd802bd088e6ab..a5f7c30e47fa459d833b3f00bfa8bb8170dea365 100644 (file)
@@ -3490,39 +3490,31 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
       </listitem>
      </varlistentry>
 
-     <varlistentry id="guc-regex-flavor" xreflabel="regex_flavor">
-      <term><varname>regex_flavor</varname> (<type>string</type>)</term>
-      <indexterm><primary>regular expressions</></>
+     <varlistentry id="guc-backslash-quote" xreflabel="backslash_quote">
+      <term><varname>backslash_quote</varname> (<type>string</type>)</term>
+      <indexterm><primary>strings</><secondary>backslash quotes</></>
       <indexterm>
-       <primary><varname>regex_flavor</> configuration parameter</primary>
+       <primary><varname>backslash_quote</> configuration parameter</primary>
       </indexterm>
       <listitem>
        <para>
-        The regular expression <quote>flavor</> can be set to
-        <literal>advanced</>, <literal>extended</>, or <literal>basic</>.
-        The default is <literal>advanced</>.  The <literal>extended</>
-        setting may be useful for exact backwards compatibility with
-        pre-7.4 releases of <productname>PostgreSQL</>.  See
-        <xref linkend="posix-syntax-details"> for details.
-       </para>
-      </listitem>
-     </varlistentry>
-
-     <varlistentry id="guc-sql-inheritance" xreflabel="sql_inheritance">
-      <term><varname>sql_inheritance</varname> (<type>boolean</type>)</term>
-      <indexterm>
-       <primary><varname>sql_inheritance</> configuration parameter</primary>
-      </indexterm>
-      <indexterm><primary>inheritance</></>
-      <listitem>
-       <para>
-        This controls the inheritance semantics, in particular whether
-        subtables are included by various commands by default. They were
-        not included in versions prior to 7.1. If you need the old
-        behavior you can set this variable to off, but in the long run
-        you are encouraged to change your applications to use the
-        <literal>ONLY</literal> key word to exclude subtables. See
-        <xref linkend="ddl-inherit"> for more information about inheritance.
+        This controls whether a quote mark can be represented by
+        <literal>\'</> in a string literal.  The preferred, SQL-standard way
+        to represent a quote mark is by doubling it (<literal>''</>) but
+        <productname>PostgreSQL</> has historically also accepted
+        <literal>\'</>. However, use of <literal>\'</> creates security risks
+        because in some client character set encodings, there are multibyte
+        characters in which the last byte is numerically equivalent to ASCII
+        <literal>\</>.  If client-side code does escaping incorrectly then a
+        SQL-injection attack is possible.  This risk can be prevented by
+        making the server reject queries in which a quote mark appears to be
+        escaped by a backslash.
+        The allowed values of <varname>backslash_quote</> are
+        <literal>on</> (allow <literal>\'</> always),
+        <literal>off</> (reject always), and
+        <literal>safe_encoding</> (allow only if client encoding does not
+        allow ASCII <literal>\</> within a multibyte character).
+        <literal>safe_encoding</> is the default setting.
        </para>
       </listitem>
      </varlistentry>
@@ -3560,6 +3552,43 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-regex-flavor" xreflabel="regex_flavor">
+      <term><varname>regex_flavor</varname> (<type>string</type>)</term>
+      <indexterm><primary>regular expressions</></>
+      <indexterm>
+       <primary><varname>regex_flavor</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        The regular expression <quote>flavor</> can be set to
+        <literal>advanced</>, <literal>extended</>, or <literal>basic</>.
+        The default is <literal>advanced</>.  The <literal>extended</>
+        setting may be useful for exact backwards compatibility with
+        pre-7.4 releases of <productname>PostgreSQL</>.  See
+        <xref linkend="posix-syntax-details"> for details.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-sql-inheritance" xreflabel="sql_inheritance">
+      <term><varname>sql_inheritance</varname> (<type>boolean</type>)</term>
+      <indexterm>
+       <primary><varname>sql_inheritance</> configuration parameter</primary>
+      </indexterm>
+      <indexterm><primary>inheritance</></>
+      <listitem>
+       <para>
+        This controls the inheritance semantics, in particular whether
+        subtables are included by various commands by default. They were
+        not included in versions prior to 7.1. If you need the old
+        behavior you can set this variable to off, but in the long run
+        you are encouraged to change your applications to use the
+        <literal>ONLY</literal> key word to exclude subtables. See
+        <xref linkend="ddl-inherit"> for more information about inheritance.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
     </sect3>
     <sect3 id="runtime-config-compatible-clients">
index 828fa82f1d07529163c9221297be5dbfe109e847..392f9662ca728618ebebf2e11b37d351c5041792 100644 (file)
@@ -35,6 +35,15 @@ extern YYSTYPE yylval;
 static int             xcdepth = 0;    /* depth of nesting in slash-star comments */
 static char    *dolqstart;      /* current $foo$ quote start string */
 
+/*
+ * GUC variables.  This is a DIRECT violation of the warning given at the
+ * head of gram.y, ie flex/bison code must not depend on any GUC variables;
+ * as such, changing their values can induce very unintuitive behavior.
+ * But we shall have to live with it as a short-term thing until the switch
+ * to SQL-standard string syntax is complete.
+ */
+BackslashQuoteType backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
+
 /*
  * literalbuf is used to accumulate literal values when multiple rules
  * are needed to parse a single literal.  Call startlit to reset buffer
@@ -49,6 +58,7 @@ static int            literalalloc;   /* current allocated buffer size */
 static void addlit(char *ytext, int yleng);
 static void addlitchar(unsigned char ychar);
 static char *litbufdup(void);
+static int     pg_err_position(void);
 
 /*
  * When we parse a token that requires multiple lexer rules to process,
@@ -391,6 +401,17 @@ other                      .
                                        addlit(yytext, yyleng);
                                }
 <xq>{xqescape}  {
+                                       if (yytext[1] == '\'')
+                                       {
+                                               if (backslash_quote == BACKSLASH_QUOTE_OFF ||
+                                                       (backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
+                                                        PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
+                                                       ereport(ERROR,
+                                                                       (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER),
+                                                                        errmsg("unsafe use of \\' in a string literal"),
+                                                                        errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
+                                                                        errposition(pg_err_position())));
+                                       }
                                        addlitchar(unescape_single_char(yytext[1]));
                                }
 <xq>{xqoctesc}  {
@@ -613,14 +634,20 @@ other                     .
 
 %%
 
-void
-yyerror(const char *message)
+static int
+pg_err_position(void)
 {
        const char *loc = token_start ? token_start : yytext;
-       int                     cursorpos;
 
        /* in multibyte encodings, return index in characters not bytes */
-       cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1;
+       return pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1;
+}
+
+void
+yyerror(const char *message)
+{
+       const char *loc = token_start ? token_start : yytext;
+       int                     cursorpos = pg_err_position();
 
        if (*loc == YY_END_OF_BUFFER_CHAR)
        {
index b85f81e282a6c5fe97f9bb0daa07c5cb9ad25fe1..4e5f546d32133972965bda8cc509aef739b25f70 100644 (file)
@@ -42,6 +42,7 @@
 #include "optimizer/geqo.h"
 #include "optimizer/paths.h"
 #include "optimizer/prep.h"
+#include "parser/gramparse.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_relation.h"
 #include "parser/scansup.h"
@@ -113,6 +114,7 @@ static bool assign_stage_log_stats(bool newval, bool doit, GucSource source);
 static bool assign_log_stats(bool newval, bool doit, GucSource source);
 static bool assign_transaction_read_only(bool newval, bool doit, GucSource source);
 static const char *assign_canonical_path(const char *newval, bool doit, GucSource source);
+static const char *assign_backslash_quote(const char *newval, bool doit, GucSource source);
 
 
 /*
@@ -169,6 +171,7 @@ static char *log_destination_string;
 static bool phony_autocommit;
 static bool session_auth_is_superuser;
 static double phony_random_seed;
+static char *backslash_quote_string;
 static char *client_encoding_string;
 static char *datestyle_string;
 static char *default_iso_level_string;
@@ -1423,6 +1426,15 @@ static struct config_string ConfigureNamesString[] =
                "", NULL, NULL
        },
 
+       {
+               {"backslash_quote", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
+                       gettext_noop("Sets whether \"\\'\" is allowed in string literals."),
+                       gettext_noop("Valid values are ON, OFF, and SAFE_ENCODING.")
+               },
+               &backslash_quote_string,
+               "safe_encoding", assign_backslash_quote, NULL
+       },
+
        {
                {"client_encoding", PGC_USERSET, CLIENT_CONN_LOCALE,
                        gettext_noop("Sets the client's character set encoding."),
@@ -5613,6 +5625,32 @@ assign_log_stats(bool newval, bool doit, GucSource source)
        return true;
 }
 
+static const char *
+assign_backslash_quote(const char *newval, bool doit, GucSource source)
+{
+       BackslashQuoteType bq;
+       bool    bqbool;
+
+       /*
+        * Although only "on", "off", and "safe_encoding" are documented,
+        * we use parse_bool so we can accept all the likely variants of
+        * "on" and "off".
+        */
+       if (pg_strcasecmp(newval, "safe_encoding") == 0)
+               bq = BACKSLASH_QUOTE_SAFE_ENCODING;
+       else if (parse_bool(newval, &bqbool))
+       {
+               bq = bqbool ? BACKSLASH_QUOTE_ON : BACKSLASH_QUOTE_OFF;
+       }
+       else
+               return NULL;                    /* reject */
+
+       if (doit)
+               backslash_quote = bq;
+
+       return newval;
+}
+
 static bool
 assign_transaction_read_only(bool newval, bool doit, GucSource source)
 {
index 021b732096dbfe1371107ee751e155cf615ad4c6..61d0bfebf3fdad6bd04bb6e262f8b100ef81c832 100644 (file)
 # - Previous Postgres Versions -
 
 #add_missing_from = true
+#backslash_quote = safe_encoding       # on, off, or safe_encoding
+#default_with_oids = true
 #regex_flavor = advanced       # advanced, extended, or basic
 #sql_inheritance = true
-#default_with_oids = true
 
 # - Other Platforms & Clients -
 
index 10a8ede498669524e94e8bda1455596db4748cdc..2370212a778c3da1d4842d6dfd809e06d2908cd0 100644 (file)
@@ -512,6 +512,7 @@ psql_completion(char *text, int start, int end)
                 */
                "add_missing_from",
                "australian_timezones",
+               "backslash_quote",
                "client_encoding",
                "client_min_messages",
                "commit_delay",
@@ -1026,7 +1027,7 @@ psql_completion(char *text, int start, int end)
                          pg_strcasecmp(prev2_wd, "TO") == 0))
                {
                        static const char *const list_COPY[] =
-                       {"BINARY", "OIDS", "DELIMETER", "NULL", "CSV", NULL};
+                       {"BINARY", "OIDS", "DELIMITER", "NULL", "CSV", NULL};
 
                        COMPLETE_WITH_LIST(list_COPY);
                }
index 1e21312739182854739a3f35531b66cb7263a162..db1e60ee8b9bbdc0f270f82b80eda3dc8a02b817 100644 (file)
 #include "nodes/parsenodes.h"
 
 
+typedef enum
+{
+       BACKSLASH_QUOTE_OFF,
+       BACKSLASH_QUOTE_ON,
+       BACKSLASH_QUOTE_SAFE_ENCODING
+} BackslashQuoteType;
+
+/* GUC variables in scan.l (every one of these is a bad idea :-() */
+extern BackslashQuoteType backslash_quote;
+
+
 /* from parser.c */
 extern int     yylex(void);