Sync psql's scanner with recent changes in backend scanner's flex rules.

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 27 Sep 2009 03:27:24 +0000 (03:27 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 27 Sep 2009 03:27:24 +0000 (03:27 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 27 Sep 2009 03:27:24 +0000 (03:27 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 27 Sep 2009 03:27:24 +0000 (03:27 +0000)
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l

index 1557a300ab34c7afe8469495b43d32a82a2bad97..2c632154ce3d99e3ba689cd4606cda2eb9ef0fc8 100644 (file)
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -571,18 +571,16 @@ other                     .
  
                                         BEGIN(xe);
                                 }
-<xeu>.                 |
-<xeu>\n                        |
+<xeu>.                 { yyerror("invalid Unicode surrogate pair"); }
+<xeu>\n                        { yyerror("invalid Unicode surrogate pair"); }
  <xeu><<EOF>>   { yyerror("invalid Unicode surrogate pair"); }
-
  <xe,xeu>{xeunicodefail}        {
                                                 ereport(ERROR,
                                                                 (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                                                                  errmsg("invalid Unicode escape"),
                                                                  errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
                                                                  lexer_errposition()));
-                                       }
-
+                               }
  <xe>{xeescape}  {
                                         if (yytext[1] == '\'')
                                         {
diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l

index 7f08da22e0557ff6e62c8b5325d18dac67fa42fd..6cd38eccca262887b6ae7105e0b0616e51b0828a 100644 (file)
--- a/src/bin/psql/psqlscan.l
+++ b/src/bin/psql/psqlscan.l
@@ -117,6 +117,7 @@ static void push_new_buffer(const char *newstr);
  static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
                                                                           char **txtcopy);
  static void emit(const char *txt, int len);
+static bool is_utf16_surrogate_first(uint32 c);
  
  #define ECHO emit(yytext, yyleng)
  
@@ -158,6 +159,7 @@ static void emit(const char *txt, int len);
   *  <xdolq> $foo$ quoted strings
   *  <xui> quoted identifier with Unicode escapes
   *  <xus> quoted string with Unicode escapes
+ *  <xeu> Unicode surrogate pair in extended quoted string
   */
  
  %x xb
@@ -169,6 +171,7 @@ static void emit(const char *txt, int len);
  %x xdolq
  %x xui
  %x xus
+%x xeu
  /* Additional exclusive states for psql only: lex backslash commands */
  %x xslashcmd
  %x xslasharg
@@ -192,6 +195,9 @@ static void emit(const char *txt, int len);
   * did not end with a newline.
   *
   * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix scanner_isspace()
+ * to agree, and see also the plpgsql lexer.
   */
  
  space                  [ \t\n\r\f]
@@ -253,6 +259,8 @@ xeinside            [^\\']+
  xeescape               [\\][^0-7]
  xeoctesc               [\\][0-7]{1,3}
  xehexesc               [\\]x[0-9A-Fa-f]{1,2}
+xeunicode              [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
+xeunicodefail  [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
  
  /* Extended quote
   * xqdouble implements embedded quote, ''''
@@ -334,6 +342,10 @@ identifier         {ident_start}{ident_cont}*
  
  typecast               "::"
  
+/* these two token types are used by PL/pgsql, though not in core SQL */
+dot_dot                        \.\.
+colon_equals   ":="
+
  /*
   * "self" is the set of chars that should be returned as single-character
   * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
@@ -511,6 +523,22 @@ other                      .
  <xe>{xeinside}  {
                                         ECHO;
                                 }
+<xe>{xeunicode} {
+                                       uint32 c = strtoul(yytext+2, NULL, 16);
+
+                                       if (is_utf16_surrogate_first(c))
+                                               BEGIN(xeu);
+                                       ECHO;
+                               }
+<xeu>{xeunicode} {
+                                       BEGIN(xe);
+                                       ECHO;
+                               }
+<xeu>.                 { ECHO; }
+<xeu>\n                        { ECHO; }
+<xe,xeu>{xeunicodefail}        {
+                                       ECHO;
+                               }
  <xe>{xeescape}  {
                                         ECHO;
                                 }
@@ -605,6 +633,14 @@ other                      .
                                         ECHO;
                                 }
  
+{dot_dot}              {
+                                       ECHO;
+                               }
+
+{colon_equals} {
+                                       ECHO;
+                               }
+
         /*
          * These rules are specific to psql --- they implement parenthesis
          * counting and detection of command-ending semicolon.  These must
@@ -1690,3 +1726,9 @@ emit(const char *txt, int len)
                 }
         }
  }
+
+static bool
+is_utf16_surrogate_first(uint32 c)
+{
+       return (c >= 0xD800 && c <= 0xDBFF);
+}
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 27 Sep 2009 03:27:24 +0000 (03:27 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 27 Sep 2009 03:27:24 +0000 (03:27 +0000)
src/backend/parser/scan.l		patch \| blob \| blame \| history
src/bin/psql/psqlscan.l		patch \| blob \| blame \| history