Fix bugs in plpgsql and ecpg caused by assuming that isspace() would only
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Sep 2006 21:39:58 +0000 (21:39 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Sep 2006 21:39:58 +0000 (21:39 +0000)
return true for exactly the characters treated as whitespace by their flex
scanners.  Per report from Victor Snezhko and subsequent investigation.

Also fix a passel of unsafe usages of <ctype.h> functions, that is, ye olde
char-vs-unsigned-char issue.  I won't miss <ctype.h> when we are finally
able to stop using it.

19 files changed:
contrib/fuzzystrmatch/dmetaphone.c
contrib/hstore/hstore_io.c
contrib/isn/isn.c
contrib/ltree/crc32.c
contrib/ltree/ltree_io.c
contrib/ltree/ltxtquery_io.c
contrib/pgcrypto/imath.c
src/backend/parser/scan.l
src/backend/parser/scansup.c
src/backend/utils/misc/guc.c
src/bin/pg_dump/pg_dump.c
src/bin/psql/tab-complete.c
src/include/parser/scansup.h
src/include/port.h
src/interfaces/ecpg/preproc/pgc.l
src/interfaces/libpq/fe-auth.c
src/pl/plpgsql/src/pl_exec.c
src/pl/plpgsql/src/pl_funcs.c
src/port/path.c

index 6883dbebb60269e94a2d508a421d6695fd5eb13a..4f4fa18f4f7e210fcfbc1edf2587680fb5073136 100644 (file)
@@ -318,7 +318,7 @@ MakeUpper(metastring * s)
        char       *i;
 
        for (i = s->str; *i; i++)
-               *i = toupper(*i);
+               *i = toupper((unsigned char) *i);
 }
 
 
index b905ff7f3d5ba94285ea4e799c9710a8f7c1a9af..051a411a99346f369275e9eb7fc9e4d74fb1a3e2 100644 (file)
@@ -51,7 +51,7 @@ get_val( HSParser *state, bool ignoreeq, bool *escaped ) {
                                elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
                        } else if ( *(state->ptr) == '\\' ) {
                                st = GV_WAITESCIN;
-                       } else if ( !isspace(*(state->ptr)) ) {
+                       } else if ( !isspace((unsigned char) *(state->ptr)) ) {
                                *(state->cur) = *(state->ptr);
                                state->cur++;
                                st = GV_INVAL;
@@ -65,7 +65,7 @@ get_val( HSParser *state, bool ignoreeq, bool *escaped ) {
                        } else if ( *(state->ptr) == ',' && ignoreeq ) {
                                state->ptr--;
                                return true;
-                       } else if ( isspace(*(state->ptr)) ) {
+                       } else if ( isspace((unsigned char) *(state->ptr)) ) {
                                return true;
                        } else if ( *(state->ptr) == '\0' ) {
                                state->ptr--;
@@ -146,7 +146,7 @@ parse_hstore( HSParser *state ) {
                                st = WGT;
                        } else if ( *(state->ptr) == '\0' ) {
                                elog(ERROR,"Unexpectd end of string");
-                       } else if (!isspace(*(state->ptr))) {
+                       } else if (!isspace((unsigned char) *(state->ptr))) {
                                elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
                        }
                } else if ( st == WGT ) {
@@ -177,7 +177,7 @@ parse_hstore( HSParser *state ) {
                                st = WKEY;
                        } else if ( *(state->ptr) == '\0' ) {
                                return;
-                       } else if (!isspace(*(state->ptr))) {
+                       } else if (!isspace((unsigned char) *(state->ptr))) {
                                elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
                        }
                } else 
index b84e6a2658b04b6cd2f00e009964914ad1e9400f..f8025e5b8e5a4429d56de7e4aa6c4a6ea4f88659 100644 (file)
@@ -72,13 +72,16 @@ bool check_table(const char *(*TABLE)[2], const unsigned TABLE_index[10][2])
                aux2 = TABLE[i][1];
 
                /* must always start with a digit: */
-               if(!isdigit(*aux1) || !isdigit(*aux2)) goto invalidtable;
+               if (!isdigit((unsigned char) *aux1) || !isdigit((unsigned char) *aux2))
+                       goto invalidtable;
                a = *aux1 - '0';
                b = *aux2 - '0';
 
                /* must always have the same format and length: */
                while(*aux1 && *aux2) {
-                       if(!(isdigit(*aux1) && isdigit(*aux2)) && (*aux1!=*aux2 || *aux1 != '-')) 
+                       if (!(isdigit((unsigned char) *aux1) &&
+                                 isdigit((unsigned char) *aux2)) &&
+                               (*aux1 != *aux2 || *aux1 != '-')) 
                                goto invalidtable;
                        aux1++;
                        aux2++;
@@ -124,7 +127,7 @@ unsigned dehyphenate(char *bufO, char *bufI)
 {
        unsigned ret = 0;
        while(*bufI) {
-               if(isdigit(*bufI)) {
+               if(isdigit((unsigned char) *bufI)) {
                        *bufO++ = *bufI;
                        ret++;
                }
@@ -183,7 +186,7 @@ unsigned hyphenate(char *bufO, char *bufI, const char *(*TABLE)[2], const unsign
 
                        firstdig++, ean_aux1++, ean_aux2++;
                        if(!(*ean_aux1 && *ean_aux2 && *firstdig)) break;
-                       if(!isdigit(*ean_aux1)) ean_aux1++, ean_aux2++;
+                       if(!isdigit((unsigned char) *ean_aux1)) ean_aux1++, ean_aux2++;
                } else {
                        /* check in what direction we should go and move the pointer accordingly */
                        if(*firstdig < *ean_aux1 && !ean_in1) upper = search;
@@ -227,7 +230,7 @@ unsigned weight_checkdig(char *isn, unsigned size)
 {
        unsigned weight = 0;
        while(*isn && size>1) {
-               if(isdigit(*isn)) {
+               if(isdigit((unsigned char) *isn)) {
                        weight += size-- * (*isn - '0');
                }
                isn++;
@@ -254,7 +257,7 @@ unsigned checkdig(char *num, unsigned size)
                pos = 1;
        }
        while(*num && size>1) {
-               if(isdigit(*num)) {
+               if(isdigit((unsigned char) *num)) {
                        if(pos++%2) check3 += *num - '0';
                        else check += *num - '0';
                        size--;
@@ -366,7 +369,7 @@ void ean2ISBN(char *isn)
        hyphenate(isn, isn+4, NULL, NULL);
        check = weight_checkdig(isn, 10);
        aux = strchr(isn, '\0');
-       while(!isdigit(*--aux));
+       while(!isdigit((unsigned char) *--aux));
        if(check == 10) *aux = 'X';
        else *aux = check + '0';
 }
@@ -411,7 +414,7 @@ ean13 str2ean(const char *num)
 {
        ean13 ean = 0;  /* current ean */
        while(*num) {
-               if(isdigit(*num)) ean = 10 * ean + (*num - '0');
+               if(isdigit((unsigned char) *num)) ean = 10 * ean + (*num - '0');
                num++;
        }
     return (ean<<1); /* also give room to a flag */
@@ -570,7 +573,7 @@ bool string2ean(const char *str, bool errorOK, ean13 *result,
        /* recognize and validate the number: */
        while(*aux2 && length <= 13) {
                last = (*(aux2+1) == '!' || *(aux2+1) == '\0'); /* is the last character */
-               digit = (isdigit(*aux2)!=0); /* is current character a digit? */
+               digit = (isdigit((unsigned char) *aux2)!=0); /* is current character a digit? */
                if(*aux2=='?' && last) /* automagically calculate check digit if it's '?' */
                        magic = digit = true;
                if(length == 0 &&  (*aux2=='M' || *aux2=='m')) {
@@ -583,13 +586,13 @@ bool string2ean(const char *str, bool errorOK, ean13 *result,
                        /* only ISSN can be here */
                        if(type != INVALID) goto eaninvalid;
                        type = ISSN;
-                       *aux1++ = toupper(*aux2);
+                       *aux1++ = toupper((unsigned char) *aux2);
                        length++;
                } else if(length == 9 && (digit || *aux2=='X' || *aux2=='x') && last) {
                        /* only ISBN and ISMN can be here */
                        if(type != INVALID && type != ISMN) goto eaninvalid;
                        if(type == INVALID) type = ISBN; /* ISMN must start with 'M' */
-                       *aux1++ = toupper(*aux2);
+                       *aux1++ = toupper((unsigned char) *aux2);
                        length++;
                } else if(length == 11 && digit && last) {
                        /* only UPC can be here */
index fea972b46639738fee60535e5dba46b1a9028a1a..736230691593455943981a470ac625996c265d75 100644 (file)
@@ -8,7 +8,7 @@
 
 #ifdef LOWER_NODE
 #include <ctype.h>
-#define TOLOWER(x)     tolower(x)
+#define TOLOWER(x)     tolower((unsigned char) (x))
 #else
 #define TOLOWER(x)     (x)
 #endif
index 353e06512b6b474070ef7f06a48a15ad3be4f39c..ff1ace2989bcfdb6f5ccdc109392856bb2d767ca 100644 (file)
@@ -332,7 +332,7 @@ lquery_in(PG_FUNCTION_ARGS)
                {
                        if (*ptr == ',')
                                state = LQPRS_WAITSNUM;
-                       else if (isdigit((unsigned int) *ptr))
+                       else if (isdigit((unsigned char) *ptr))
                        {
                                curqlevel->low = atoi(ptr);
                                state = LQPRS_WAITND;
@@ -342,7 +342,7 @@ lquery_in(PG_FUNCTION_ARGS)
                }
                else if (state == LQPRS_WAITSNUM)
                {
-                       if (isdigit((unsigned int) *ptr))
+                       if (isdigit((unsigned char) *ptr))
                        {
                                curqlevel->high = atoi(ptr);
                                state = LQPRS_WAITCLOSE;
@@ -359,7 +359,7 @@ lquery_in(PG_FUNCTION_ARGS)
                {
                        if (*ptr == '}')
                                state = LQPRS_WAITEND;
-                       else if (!isdigit((unsigned int) *ptr))
+                       else if (!isdigit((unsigned char) *ptr))
                                UNCHAR;
                }
                else if (state == LQPRS_WAITND)
@@ -371,7 +371,7 @@ lquery_in(PG_FUNCTION_ARGS)
                        }
                        else if (*ptr == ',')
                                state = LQPRS_WAITSNUM;
-                       else if (!isdigit((unsigned int) *ptr))
+                       else if (!isdigit((unsigned char) *ptr))
                                UNCHAR;
                }
                else if (state == LQPRS_WAITEND)
index 783005f33013b05e8ef5f4d6458096a81ae75752..76011b4e927973a0feabdb9b3f9a383afdbf534a 100644 (file)
@@ -81,7 +81,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint1
                                        *lenval = 1;
                                        *flag = 0;
                                }
-                               else if (!isspace((unsigned int) *(state->buf)))
+                               else if (!isspace((unsigned char) *(state->buf)))
                                        ereport(ERROR,
                                                        (errcode(ERRCODE_SYNTAX_ERROR),
                                                         errmsg("operand syntax error")));
index 67587dcc28c9b4b12c8d66d7928b09d933f9d617..70ee74706b34a3a38545182908e70a7d3b68d2b1 100644 (file)
@@ -1799,7 +1799,7 @@ mp_result mp_int_read_cstring(mp_int z, mp_size radix, const char *str, char **e
     return MP_RANGE;
 
   /* Skip leading whitespace */
-  while(isspace((int)*str))
+  while(isspace((unsigned char) *str))
     ++str;
 
   /* Handle leading sign tag (+/-, positive default) */
@@ -3127,10 +3127,10 @@ static int       s_ch2val(char c, int r)
 {
   int out;
 
-  if(isdigit((int)c))
+  if(isdigit((unsigned char)c))
     out = c - '0';
-  else if(r > 10 && isalpha((int)c))
-    out = toupper(c) - 'A' + 10;
+  else if(r > 10 && isalpha((unsigned char) c))
+    out = toupper((unsigned char) c) - 'A' + 10;
   else
     return -1;
 
@@ -3151,7 +3151,7 @@ static char      s_val2ch(int v, int caps)
     char out = (v - 10) + 'a';
 
     if(caps)
-      return toupper(out);
+      return toupper((unsigned char) out);
     else
       return out;
   }
index 19675138c81566f0f2f35482a7d3e1b57fea062f..5abb1334b15a35613102284648984c7f29a6f923 100644 (file)
@@ -145,6 +145,9 @@ static unsigned char unescape_single_char(unsigned char c);
  * did not end with a newline.
  *
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix scanner_isspace()
+ * to agree, and see also the plpgsql lexer.
  */
 
 space                  [ \t\n\r\f]
index 504bab81d6bad90e4f3167637758985c3e9020fe..152e07ff881e40b240039cc1da33bcd0c003a5c0 100644 (file)
@@ -183,3 +183,26 @@ truncate_identifier(char *ident, int len, bool warn)
                ident[len] = '\0';
        }
 }
+
+/*
+ * scanner_isspace() --- return TRUE if flex scanner considers char whitespace
+ *
+ * This should be used instead of the potentially locale-dependent isspace()
+ * function when it's important to match the lexer's behavior.
+ *
+ * In principle we might need similar functions for isalnum etc, but for the
+ * moment only isspace seems needed.
+ */
+bool
+scanner_isspace(char ch)
+{
+       /* This must match scan.l's list of {space} characters */
+       /* and plpgsql's scan.l as well */
+       if (ch == ' ' ||
+               ch == '\t' ||
+               ch == '\n' ||
+               ch == '\r' ||
+               ch == '\f')
+               return true;
+       return false;
+}
index 7fcddacfae16ec431eb2be09c68c8789971d7212..7999c8e8f77133eda679428905fb6a9cb346ed94 100644 (file)
@@ -6155,7 +6155,7 @@ assign_custom_variable_classes(const char *newval, bool doit, GucSource source)
        initStringInfo(&buf);
        while ((c = *cp++) != 0)
        {
-               if (isspace(c))
+               if (isspace((unsigned char) c))
                {
                        if (symLen > 0)
                                hasSpaceAfterToken = true;
@@ -6173,7 +6173,7 @@ assign_custom_variable_classes(const char *newval, bool doit, GucSource source)
                        continue;
                }
 
-               if (hasSpaceAfterToken || !isalnum(c))
+               if (hasSpaceAfterToken || !isalnum((unsigned char) c))
                {
                        /*
                         * Syntax error due to token following space after token or non
index 48073ea7136f96a91abedd113191b05c4b27a7ba..373f5530cb65c5589a13e9ae735dcc377f642c96 100644 (file)
@@ -367,10 +367,10 @@ main(int argc, char **argv)
 
                                        new_obj_name->next = NULL;
                                        new_obj_name->name = strdup(optarg);
-                                       new_obj_name->is_include = islower(c) ? true : false;
+                                       new_obj_name->is_include = islower((unsigned char) c) ? true : false;
 
                                        /* add new entry to the proper list */
-                                       if (tolower(c) == 'n')
+                                       if (tolower((unsigned char) c) == 'n')
                                        {
                                                if (!schemaList_tail)
                                                        schemaList_tail = schemaList = new_obj_name;
index 43f8f5c977c8ce3df058fbff6a78dac01f3d3f9c..2a8cbd6ceabd9d87049f347f04c877647ca48aa3 100644 (file)
@@ -1090,8 +1090,8 @@ psql_completion(char *text, int start, int end)
        /* Complete "AS ON <sth with a 'T' :)>" with a "TO" */
        else if (pg_strcasecmp(prev3_wd, "AS") == 0 &&
                         pg_strcasecmp(prev2_wd, "ON") == 0 &&
-                        (toupper((unsigned char) prev_wd[4]) == 'T' ||
-                         toupper((unsigned char) prev_wd[5]) == 'T'))
+                        (pg_toupper((unsigned char) prev_wd[4]) == 'T' ||
+                         pg_toupper((unsigned char) prev_wd[5]) == 'T'))
                COMPLETE_WITH_CONST("TO");
        /* Complete "AS ON <sth> TO" with a table name */
        else if (pg_strcasecmp(prev4_wd, "AS") == 0 &&
index 18032497438ae303bccc6fb3a0c74bc46f4a3280..80ae82b350ab61fb0089d27f6f0f4d577e1cc74a 100644 (file)
@@ -22,4 +22,6 @@ extern char *downcase_truncate_identifier(const char *ident, int len,
 
 extern void truncate_identifier(char *ident, int len, bool warn);
 
+extern bool scanner_isspace(char ch);
+
 #endif   /* SCANSUP_H */
index 85f9da07e677f27ce93319ee138286dba51afef5..6bc7022ce72ac726f75d0979dec1800dbe9cb3f0 100644 (file)
@@ -59,7 +59,7 @@ extern void get_parent_directory(char *path);
 ( \
        ((filename)[0] == '/') || \
        (filename)[0] == '\\' || \
-       (isalpha((filename)[0]) && (filename)[1] == ':' && \
+       (isalpha((unsigned char) ((filename)[0])) && (filename)[1] == ':' && \
        ((filename)[2] == '\\' || (filename)[2] == '/')) \
 )
 #endif
index b709bbbc7281111f321c3e237eaf5c3341efce52..d8632bf78f2feecdffbfb90e59aa6f9effd175ee 100644 (file)
@@ -47,6 +47,7 @@ static void addlit(char *ytext, int yleng);
 static void addlitchar (unsigned char);
 static void parse_include (void);
 static void check_escape_warning(void);
+static bool ecpg_isspace(char ch);
 
 char *token_start;
 int state_before;
@@ -245,6 +246,9 @@ param                       \${integer}
  * did not end with a newline.
  *
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix ecpg_isspace()
+ * to agree.
  */
 
 ccomment               "//".*\n
@@ -872,7 +876,7 @@ cppline                     {space}*#(.*\\{space})*.*{newline}
                                         *      contains at least one non-space character plus the ";"
                                         */
                                        for (i = strlen(yytext)-2;
-                                                i > 0 && isspace((unsigned char) yytext[i]);
+                                                i > 0 && ecpg_isspace(yytext[i]);
                                                 i-- )
                                                ;
                                        yytext[i+1] = '\0';
@@ -1060,7 +1064,7 @@ cppline                   {space}*#(.*\\{space})*.*{newline}
                                                 *      contains at least one non-space character plus the ";"
                                                 */
                                                for (i = strlen(yytext)-2;
-                                                        i > 0 && isspace((unsigned char) yytext[i]);
+                                                        i > 0 && ecpg_isspace(yytext[i]);
                                                         i-- )
                                                        ;
                                                yytext[i+1] = '\0';
@@ -1252,7 +1256,7 @@ parse_include(void)
         * yytext contains at least one non-space character plus the ";" 
         */
        for (i = strlen(yytext)-2;
-                i > 0 && isspace((unsigned char) yytext[i]);
+                i > 0 && ecpg_isspace(yytext[i]);
                 i--)
                ;
 
@@ -1328,3 +1332,18 @@ check_escape_warning(void)
                mmerror (PARSE_ERROR, ET_WARNING, "nonstandard use of escape in a string literal");
        warn_on_first_escape = false;   /* warn only once per string */
 }
+
+/*
+ * ecpg_isspace() --- return TRUE if flex scanner considers char whitespace
+ */
+static bool
+ecpg_isspace(char ch)
+{
+       if (ch == ' ' ||
+               ch == '\t' ||
+               ch == '\n' ||
+               ch == '\r' ||
+               ch == '\f')
+               return true;
+       return false;
+}
index 40d46c5fb1a609464fd865b91d03a5e0c4487026..0583c8fe7eb66b852880f971450f952de9f96293 100644 (file)
@@ -89,7 +89,7 @@ pg_an_to_ln(char *aname)
                *p = '\0';
 #ifdef WIN32
        for (p = aname; *p; p++)
-               *p = pg_tolower(*p);
+               *p = pg_tolower((unsigned char) *p);
 #endif
 
        return aname;
index 7a7baf5bb71f993b49346e986af37d66957e3239..ae7fa2f0baa89cde5a60433454e2b6f514b53a02 100644 (file)
@@ -26,6 +26,7 @@
 #include "funcapi.h"
 #include "optimizer/clauses.h"
 #include "parser/parse_expr.h"
+#include "parser/scansup.h"
 #include "tcop/tcopprot.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
@@ -2527,7 +2528,7 @@ exec_stmt_dynexecute(PLpgSQL_execstate *estate,
                                char       *ptr;
 
                                for (ptr = querystr; *ptr; ptr++)
-                                       if (!isspace((unsigned char) *ptr))
+                                       if (!scanner_isspace(*ptr))
                                                break;
                                if (*ptr == 'S' || *ptr == 's')
                                        ereport(ERROR,
index b6377872a131fbb3e807ed54f67d55556685795c..9c169e6068a08b1389f3dcda0c4e6968c78e2f94 100644 (file)
@@ -381,7 +381,7 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
                        /* Normal identifier: extends till dot or whitespace */
                        const char *thisstart = s;
 
-                       while (*s && *s != '.' && !isspace((unsigned char) *s))
+                       while (*s && *s != '.' && !scanner_isspace(*s))
                                s++;
                        /* Downcase and truncate to NAMEDATALEN */
                        curident = downcase_truncate_identifier(thisstart, s - thisstart,
@@ -400,11 +400,11 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
                /* If not done, skip whitespace, dot, whitespace */
                if (*s)
                {
-                       while (*s && isspace((unsigned char) *s))
+                       while (*s && scanner_isspace(*s))
                                s++;
                        if (*s++ != '.')
                                elog(ERROR, "expected dot between identifiers: %s", sstart);
-                       while (*s && isspace((unsigned char) *s))
+                       while (*s && scanner_isspace(*s))
                                s++;
                        if (*s == '\0')
                                elog(ERROR, "expected another identifier: %s", sstart);
index 410c285e4b685bcc3d7c31ceac8313c9c70f0269..355788e9d66e556678274644d13429f0e98f2fdf 100644 (file)
@@ -69,7 +69,7 @@ skip_drive(const char *path)
                while (*path && !IS_DIR_SEP(*path))
                        path++;
        }
-       else if (isalpha(path[0]) && path[1] == ':')
+       else if (isalpha((unsigned char) path[0]) && path[1] == ':')
        {
                path += 2;
        }