Perform post-escaping encoding validity checks on SQL literals and COPY input
authorAndrew Dunstan <andrew@dunslane.net>
Wed, 12 Sep 2007 20:49:27 +0000 (20:49 +0000)
committerAndrew Dunstan <andrew@dunslane.net>
Wed, 12 Sep 2007 20:49:27 +0000 (20:49 +0000)
so that invalidly encoded data cannot enter the database by these means.

src/backend/commands/copy.c
src/backend/parser/scan.l

index ad60fb3908762c2e02b6a9a90f4046f931b5cf1f..dbce49df5cd07b24a9b68cba7456b83ab661dedf 100644 (file)
@@ -2685,6 +2685,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
                char       *start_ptr;
                char       *end_ptr;
                int                     input_len;
+               bool        saw_high_bit = false;
 
                /* Make sure space remains in fieldvals[] */
                if (fieldno >= maxfields)
@@ -2749,6 +2750,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
                                                                }
                                                        }
                                                        c = val & 0377;
+                                                       if (IS_HIGHBIT_SET(c))
+                                                               saw_high_bit = true;
                                                }
                                                break;
                                        case 'x':
@@ -2772,6 +2775,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
                                                                        }
                                                                }
                                                                c = val & 0xff;
+                                                               if (IS_HIGHBIT_SET(c))
+                                                                       saw_high_bit = true;                                                    
                                                        }
                                                }
                                                break;
@@ -2799,7 +2804,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
                                                 * literally
                                                 */
                                }
-                       }
+                       }                       
 
                        /* Add c to output string */
                        *output_ptr++ = c;
@@ -2808,6 +2813,16 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
                /* Terminate attribute value in output area */
                *output_ptr++ = '\0';
 
+               /* If we de-escaped a char with the high bit set, make sure
+                * we still have valid data for the db encoding. Avoid calling strlen 
+                * here for the sake of efficiency.
+                */
+               if (saw_high_bit)
+               {
+                       char *fld = fieldvals[fieldno];
+                       pg_verifymbstr(fld, output_ptr - (fld + 1), false);
+               }
+
                /* Check whether raw input matched null marker */
                input_len = end_ptr - start_ptr;
                if (input_len == cstate->null_print_len &&
index 2e1621585641e4ae762c93bd201a3a9e7377ca3a..2a515502d0c1301fa526e1ab52175516c0dda7a7 100644 (file)
@@ -60,6 +60,7 @@ bool                  escape_string_warning = true;
 bool                   standard_conforming_strings = false;
 
 static bool            warn_on_first_escape;
+static bool     saw_high_bit = false;
 
 /*
  * literalbuf is used to accumulate literal values when multiple rules
@@ -426,6 +427,7 @@ other                       .
 
 {xqstart}              {
                                        warn_on_first_escape = true;
+                                       saw_high_bit = false;
                                        SET_YYLLOC();
                                        if (standard_conforming_strings)
                                                BEGIN(xq);
@@ -435,6 +437,7 @@ other                       .
                                }
 {xestart}              {
                                        warn_on_first_escape = false;
+                                       saw_high_bit = false;
                                        SET_YYLLOC();
                                        BEGIN(xe);
                                        startlit();
@@ -443,6 +446,11 @@ other                      .
 <xq,xe>{quotefail} {
                                        yyless(1);
                                        BEGIN(INITIAL);
+                                       /* check that the data remains valid if it might have been
+                                        * made invalid by unescaping any chars.
+                                        */
+                                       if (saw_high_bit)
+                                               pg_verifymbstr(literalbuf, literallen, false);
                                        yylval.str = litbufdup();
                                        return SCONST;
                                }
@@ -475,12 +483,16 @@ other                     .
 
                                        check_escape_warning();
                                        addlitchar(c);
+                                       if (IS_HIGHBIT_SET(c))
+                                               saw_high_bit = true;
                                }
 <xe>{xehexesc}  {
                                        unsigned char c = strtoul(yytext+2, NULL, 16);
 
                                        check_escape_warning();
                                        addlitchar(c);
+                                       if (IS_HIGHBIT_SET(c))
+                                               saw_high_bit = true;
                                }
 <xq,xe>{quotecontinue} {
                                        /* ignore */
@@ -892,6 +904,14 @@ litbufdup(void)
 static unsigned char
 unescape_single_char(unsigned char c)
 {
+       /* Normally we wouldn't expect to see \n where n has its high bit set
+        * but we set the flag to check the string if we do get it, so
+        * that this doesn't become a way of getting around the coding validity
+        * checks.
+        */
+       if (IS_HIGHBIT_SET(c))
+               saw_high_bit = true;
+
        switch (c)
        {
                case 'b':