Close previously open holes for invalidly encoded data to enter the

author Andrew Dunstan <andrew@dunslane.net>

Tue, 18 Sep 2007 17:41:17 +0000 (17:41 +0000)

committer Andrew Dunstan <andrew@dunslane.net>

Tue, 18 Sep 2007 17:41:17 +0000 (17:41 +0000)
author Andrew Dunstan <andrew@dunslane.net>
Tue, 18 Sep 2007 17:41:17 +0000 (17:41 +0000)
committer Andrew Dunstan <andrew@dunslane.net>
Tue, 18 Sep 2007 17:41:17 +0000 (17:41 +0000)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index c59504a4d8afb9ac9a8392c7f66160d02f40d07e..510fe22557c6280c37dfa231df08154bce6dbabc 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1122,13 +1122,14 @@
        <row>
         <entry><literal><function>convert</function>(<parameter>string</parameter>
         using <parameter>conversion_name</parameter>)</literal></entry>
-       <entry><type>text</type></entry>
+       <entry><type>bytea</type></entry>
         <entry>
          Change encoding using specified conversion name.  Conversions
          can be defined by <command>CREATE CONVERSION</command>.  Also
          there are some pre-defined conversion names. See <xref
          linkend="conversion-names"> for available conversion
-        names.
+        names. The <parameter>string</parameter> must be valid in the
+               source encoding.
         </entry>
         <entry><literal>convert('PostgreSQL' using iso_8859_1_to_utf8)</literal></entry>
         <entry><literal>'PostgreSQL'</literal> in UTF8 (Unicode, 8-bit) encoding</entry>
@@ -1244,6 +1245,12 @@
     <indexterm>
      <primary>chr</primary>
     </indexterm>
+   <indexterm>
+    <primary>convert_from</primary>
+   </indexterm>
+   <indexterm>
+    <primary>convert_to</primary>
+   </indexterm>
     <indexterm>
      <primary>decode</primary>
     </indexterm>
@@ -1319,7 +1326,12 @@
        <row>
         <entry><literal><function>ascii</function>(<parameter>string</parameter>)</literal></entry>
         <entry><type>int</type></entry>
-       <entry><acronym>ASCII</acronym> code of the first byte of the argument</entry>
+       <entry>
+            <acronym>ASCII</acronym> code of the first character of the argument. 
+                For <acronym>UTF8</acronym> returns the Unicode code point of the character. 
+                For other multi-byte encodings. the argument must be a strictly 
+                <acronym>ASCII</acronym> character.
+          </entry>
         <entry><literal>ascii('x')</literal></entry>
         <entry><literal>120</literal></entry>
        </row>
@@ -1340,29 +1352,61 @@
        <row>
         <entry><literal><function>chr</function>(<type>int</type>)</literal></entry>
         <entry><type>text</type></entry>
-       <entry>Character with the given <acronym>ASCII</acronym> code</entry>
+       <entry>
+            Character with the given code. For <acronym>UTF8</acronym> the argument is
+                treated as a Unicode code point. For other multi-byte encodings the argument
+                must designate a strictly <acronym>ASCII</acronym> character.
+          </entry>
         <entry><literal>chr(65)</literal></entry>
         <entry><literal>A</literal></entry>
        </row>
  
        <row>
         <entry>
-        <literal><function>convert</function>(<parameter>string</parameter> <type>text</type>,
-        <optional><parameter>src_encoding</parameter> <type>name</type>,</optional>
+        <literal><function>convert</function>(<parameter>string</parameter> <type>bytea</type>,
+        <parameter>src_encoding</parameter> <type>name</type>,
          <parameter>dest_encoding</parameter> <type>name</type>)</literal>
         </entry>
-       <entry><type>text</type></entry>
+       <entry><type>bytea</type></entry>
         <entry>
          Convert string to <parameter>dest_encoding</parameter>.
          The original encoding is specified by
-        <parameter>src_encoding</parameter>.  If
-        <parameter>src_encoding</parameter> is omitted, database
-        encoding is assumed.
+               <parameter>src_encoding</parameter>. The <parameter>string</parameter>
+               must be valid in this encoding.
         </entry>
         <entry><literal>convert( 'text_in_utf8', 'UTF8', 'LATIN1')</literal></entry>
         <entry><literal>text_in_utf8</literal> represented in ISO 8859-1 encoding</entry>
        </row>
  
+      <row>
+       <entry>
+        <literal><function>convert_from</function>(<parameter>string</parameter> <type>bytea</type>,
+        <parameter>src_encoding</parameter> <type>name</type>)</literal>
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Convert string to the database encoding.
+        The original encoding is specified by
+               <parameter>src_encoding</parameter>. The <parameter>string</parameter>
+               must be valid in this encoding.
+       </entry>
+       <entry><literal>convert_from( 'text_in_utf8', 'UTF8')</literal></entry>
+       <entry><literal>text_in_utf8</literal> represented in the current database encoding</entry>
+      </row>
+
+      <row>
+       <entry>
+        <literal><function>convert_to</function>(<parameter>string</parameter> <type>text</type>,
+        <parameter>dest_encoding</parameter> <type>name</type>)</literal>
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Convert string to <parameter>dest_encoding</parameter>.
+       </entry>
+       <entry><literal>convert_to( 'some text', 'UTF8')</literal></entry>
+       <entry><literal>some text</literal> represented in the UTF8 encoding</entry>
+      </row>
+
        <row>
         <entry>
          <literal><function>decode</function>(<parameter>string</parameter> <type>text</type>,
@@ -1415,6 +1459,19 @@
         <entry><literal>4</literal></entry>
        </row>
  
+      <row>
+       <entry><literal><function>length</function>(<parameter>string</parameter><type>bytea</type>,
+        <parameter>encoding</parameter> <type>name</type> )</literal></entry>
+       <entry><type>int</type></entry>
+       <entry>
+        Number of characters in <parameter>string</parameter> in the 
+               given <parameter>encoding</parameter>. The 
+               <parameter>string</parameter> must be valid in this encoding.
+       </entry>
+       <entry><literal>length('jose', 'UTF8')</literal></entry>
+       <entry><literal>4</literal></entry>
+      </row>
+
        <row>
         <entry>
          <literal><function>lpad</function>(<parameter>string</parameter> <type>text</type>,
diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c

index 33eb20918626ddba8dfb1b7f72143ce18c4247d3..160a5135c09ca4f7e80f470dbab274e4002d7c25 100644 (file)
--- a/src/backend/catalog/pg_conversion.c
+++ b/src/backend/catalog/pg_conversion.c
@@ -282,7 +282,10 @@ FindConversion(const char *conname, Oid connamespace)
   * CONVERT <left paren> <character value expression>
   * USING <form-of-use conversion name> <right paren>
   *
- * TEXT convert_using(TEXT string, TEXT conversion_name)
+ * BYTEA convert_using(TEXT string, TEXT conversion_name)
+ *
+ * bytea is returned so we don't give a value that is
+ * not valid in the database encoding.
   */
  Datum
  pg_convert_using(PG_FUNCTION_ARGS)
@@ -344,5 +347,5 @@ pg_convert_using(PG_FUNCTION_ARGS)
         pfree(result);
         pfree(str);
  
-       PG_RETURN_TEXT_P(retval);
+       PG_RETURN_BYTEA_P(retval);
  }
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c

index 68d2c1bd98b79161bec79542b4d01c0fab819d6c..b4195cca26082bfe919cf793da0d0c24c73f78c2 100644 (file)
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -1246,6 +1246,13 @@ translate(PG_FUNCTION_ARGS)
   *
   *      Returns the decimal representation of the first character from
   *      string.
+ *   If the string is empty we return 0.
+ *   If the database encoding is UTF8, we return the Unicode codepoint. 
+ *   If the database encoding is any other multi-byte encoding, we
+ *   return the value of the first byte if it is an ASCII character
+ *   (range 1 .. 127), or raise an error.
+ *   For all other encodings we return the value of the first byte,
+ *   (range 1..255).
   *
   ********************************************************************/
  
@@ -1253,11 +1260,57 @@ Datum
  ascii(PG_FUNCTION_ARGS)
  {
         text       *string = PG_GETARG_TEXT_P(0);
+       int encoding = GetDatabaseEncoding();
+       unsigned char *data;
  
         if (VARSIZE(string) <= VARHDRSZ)
                 PG_RETURN_INT32(0);
  
-       PG_RETURN_INT32((int32) *((unsigned char *) VARDATA(string)));
+       data = (unsigned char *) VARDATA(string);
+
+       if (encoding == PG_UTF8 && *data > 127)
+       {
+               /* return the code point for Unicode */
+
+               int result = 0, tbytes = 0, i;
+
+               if (*data >= 0xF0)
+               {
+                       result = *data & 0x07;
+                       tbytes = 3;
+               }
+               else if (*data >= 0xE0)
+               {
+                       result = *data & 0x0F;
+                       tbytes = 2;
+               }
+               else
+               {
+                       Assert (*data > 0xC0);
+                       result = *data & 0x1f;
+                       tbytes = 1;
+               }
+
+               Assert (tbytes > 0);
+
+               for (i = 1; i <= tbytes; i++)
+               {
+                       Assert ((data[i] & 0xC0) == 0x80);
+                       result = (result << 6) + (data[i] & 0x3f);
+               }
+
+               PG_RETURN_INT32(result);
+       }
+       else
+       {
+               if (pg_encoding_max_length(encoding) > 1 && *data > 127)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                        errmsg("requested character too large")));
+
+
+               PG_RETURN_INT32((int32) *data);
+       }
  }
  
  /********************************************************************
@@ -1270,19 +1323,96 @@ ascii(PG_FUNCTION_ARGS)
   *
   * Purpose:
   *
- *     Returns the character having the binary equivalent to val
+ *     Returns the character having the binary equivalent to val.
+ *
+ * For UTF8 we treat the argumwent as a Unicode code point.
+ * For other multi-byte encodings we raise an error for arguments
+ * outside the strict ASCII range (1..127).
+ *
+ * It's important that we don't ever return a value that is not valid
+ * in the database encoding, so that this doesn't become a way for
+ * invalid data to enter the database.
   *
   ********************************************************************/
  
  Datum
  chr(PG_FUNCTION_ARGS)
  {
-       int32           cvalue = PG_GETARG_INT32(0);
+       uint32          cvalue = PG_GETARG_UINT32(0);
         text       *result;
+       int encoding = GetDatabaseEncoding();
+
+       if (encoding == PG_UTF8 && cvalue > 127)
+       {
+               /* for Unicode we treat the argument as a code point */
+               int bytes ;
+               char *wch;
  
-       result = (text *) palloc(VARHDRSZ + 1);
-       SET_VARSIZE(result, VARHDRSZ + 1);
-       *VARDATA(result) = (char) cvalue;
+               /* We only allow valid Unicode code points */
+               if (cvalue > 0x001fffff)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                        errmsg("requested character too large for encoding: %d", 
+                                                       cvalue)));
+
+               if (cvalue > 0xffff)
+                       bytes = 4;
+               else if (cvalue > 0x07ff)
+                       bytes = 3;
+               else
+                       bytes = 2;
+
+               result = (text *) palloc(VARHDRSZ + bytes);
+               SET_VARSIZE(result, VARHDRSZ + bytes);
+               wch = VARDATA(result);
+
+               if (bytes == 2)
+               {
+                       wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
+                       wch[1] = 0x80 | (cvalue & 0x3F);;
+               }
+               else if (bytes == 3)
+               {
+                       wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
+                       wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
+                       wch[2] = 0x80 | (cvalue & 0x3F);
+               }
+               else
+               {
+                       wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
+                       wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
+                       wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
+                       wch[3] = 0x80 | (cvalue & 0x3F);
+               }
+               
+       }
+
+       else
+       {
+               bool is_mb;
+
+               /* Error out on arguments that make no sense or that we
+                * can't validly represent in the encoding.
+                */
+
+               if (cvalue == 0)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                        errmsg("null character not permitted")));
+
+               is_mb = pg_encoding_max_length(encoding) > 1;
+
+               if ((is_mb && (cvalue > 255)) || (! is_mb && (cvalue > 127)))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                        errmsg("requested character too large for encoding: %d",
+                                                       cvalue)));
+               
+
+               result = (text *) palloc(VARHDRSZ + 1);
+               SET_VARSIZE(result, VARHDRSZ + 1);
+               *VARDATA(result) = (char) cvalue;
+       }
  
         PG_RETURN_TEXT_P(result);
  }
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c

index 96b8f23754f11c535ac35b186aa7c2af30c936fe..de814346a08aee2f34428a2dca58b640aea6df2c 100644 (file)
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -292,12 +292,12 @@ pg_do_encoding_conversion(unsigned char *src, int len,
  }
  
  /*
- * Convert string using encoding_nanme. We assume that string's
- * encoding is same as DB encoding.
+ * Convert string using encoding_name. The source
+ * encoding is the DB encoding.
   *
- * TEXT convert(TEXT string, NAME encoding_name) */
+ * BYTEA convert_to(TEXT string, NAME encoding_name) */
  Datum
-pg_convert(PG_FUNCTION_ARGS)
+pg_convert_to(PG_FUNCTION_ARGS)
  {
         Datum           string = PG_GETARG_DATUM(0);
         Datum           dest_encoding_name = PG_GETARG_DATUM(1);
@@ -306,7 +306,30 @@ pg_convert(PG_FUNCTION_ARGS)
         Datum           result;
  
         result = DirectFunctionCall3(
-                                pg_convert2, string, src_encoding_name, dest_encoding_name);
+                                pg_convert, string, src_encoding_name, dest_encoding_name);
+
+       /* free memory allocated by namein */
+       pfree((void *) src_encoding_name);
+
+       PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * Convert string using encoding_name. The destination
+ * encoding is the DB encoding.
+ *
+ * TEXT convert_from(BYTEA string, NAME encoding_name) */
+Datum
+pg_convert_from(PG_FUNCTION_ARGS)
+{
+       Datum           string = PG_GETARG_DATUM(0);
+       Datum           src_encoding_name = PG_GETARG_DATUM(1);
+       Datum           dest_encoding_name = DirectFunctionCall1(
+                                                       namein, CStringGetDatum(DatabaseEncoding->name));
+       Datum           result;
+
+       result = DirectFunctionCall3(
+                                pg_convert, string, src_encoding_name, dest_encoding_name);
  
         /* free memory allocated by namein */
         pfree((void *) src_encoding_name);
@@ -315,20 +338,20 @@ pg_convert(PG_FUNCTION_ARGS)
  }
  
  /*
- * Convert string using encoding_name.
+ * Convert string using encoding_names.
   *
- * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
+ * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
   */
  Datum
-pg_convert2(PG_FUNCTION_ARGS)
+pg_convert(PG_FUNCTION_ARGS)
  {
-       text       *string = PG_GETARG_TEXT_P(0);
+       bytea      *string = PG_GETARG_TEXT_P(0);
         char       *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
         int                     src_encoding = pg_char_to_encoding(src_encoding_name);
         char       *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
         int                     dest_encoding = pg_char_to_encoding(dest_encoding_name);
         unsigned char *result;
-       text       *retval;
+       bytea      *retval;
         unsigned char *str;
         int                     len;
  
@@ -343,8 +366,9 @@ pg_convert2(PG_FUNCTION_ARGS)
                                  errmsg("invalid destination encoding name \"%s\"",
                                                 dest_encoding_name)));
  
-       /* make sure that source string is null terminated */
+       /* make sure that source string is valid and null terminated */
         len = VARSIZE(string) - VARHDRSZ;
+       pg_verify_mbstr(src_encoding,VARDATA(string),len,false);
         str = palloc(len + 1);
         memcpy(str, VARDATA(string), len);
         *(str + len) = '\0';
@@ -354,8 +378,7 @@ pg_convert2(PG_FUNCTION_ARGS)
                 elog(ERROR, "encoding conversion failed");
  
         /*
-        * build text data type structure. we cannot use textin() here, since
-        * textin assumes that input string encoding is same as database encoding.
+        * build bytea data type structure.
          */
         len = strlen((char *) result) + VARHDRSZ;
         retval = palloc(len);
@@ -369,7 +392,28 @@ pg_convert2(PG_FUNCTION_ARGS)
         /* free memory if allocated by the toaster */
         PG_FREE_IF_COPY(string, 0);
  
-       PG_RETURN_TEXT_P(retval);
+       PG_RETURN_BYTEA_P(retval);
+}
+
+/*
+ * get the length of the string considered as text in the specified
+ * encoding. Raises an error if the data is not valid in that
+ * encoding.
+ *
+ * INT4 length (BYTEA string, NAME src_encoding_name)
+ */
+Datum
+length_in_encoding(PG_FUNCTION_ARGS)
+{
+       bytea      *string = PG_GETARG_BYTEA_P(0);
+       char       *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+       int                     src_encoding = pg_char_to_encoding(src_encoding_name);
+       int         len = VARSIZE(string) - VARHDRSZ;
+       int         retval;
+
+       retval = pg_verify_mbstr_len(src_encoding, VARDATA(string), len, false);
+       PG_RETURN_INT32(retval);
+       
  }
  
  /*
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c

index 119d090e3506171c3099d10b228016e2a5f5197f..460ca40d1cc32ef53a711bb46bf89afd860a01be 100644 (file)
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -3,8 +3,6 @@
   * Tatsuo Ishii
   * $PostgreSQL$
   *
- * WIN1250 client encoding updated by Pavel Behal
- *
   */
  /* can be used in either frontend or backend */
  #ifdef FRONTEND
@@ -1435,23 +1433,37 @@ pg_database_encoding_max_length(void)
  bool
  pg_verifymbstr(const char *mbstr, int len, bool noError)
  {
-       return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
+       return 
+               pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
  }
  
  /*
+ * Verify mbstr to make sure that it is validly encoded in the specified
+ * encoding.
+ *
+ */
+bool
+pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
+{
+       return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
+}
+
+/* 
   * Verify mbstr to make sure that it is validly encoded in the specified
   * encoding.
   *
   * mbstr is not necessarily zero terminated; length of mbstr is
   * specified by len.
   *
- * If OK, return TRUE. If a problem is found, return FALSE when noError is
+ * If OK, return length of string in the encoding.     
+ * If a problem is found, return -1 when noError is
   * true; when noError is false, ereport() a descriptive message.
- */
-bool
-pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
+ */ 
+int
+pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
  {
         mbverifier      mbverify;
+       int mb_len;
  
         Assert(PG_VALID_ENCODING(encoding));
  
@@ -1463,14 +1475,16 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
                 const char *nullpos = memchr(mbstr, 0, len);
  
                 if (nullpos == NULL)
-                       return true;
+                       return len;
                 if (noError)
-                       return false;
+                       return -1;
                 report_invalid_encoding(encoding, nullpos, 1);
         }
  
         /* fetch function pointer just once */
         mbverify = pg_wchar_table[encoding].mbverify;
+       
+       mb_len = 0;
  
         while (len > 0)
         {
@@ -1481,12 +1495,13 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
                 {
                         if (*mbstr != '\0')
                         {
+                               mb_len++;
                                 mbstr++;
                                 len--;
                                 continue;
                         }
                         if (noError)
-                               return false;
+                               return -1;
                         report_invalid_encoding(encoding, mbstr, len);
                 }
  
@@ -1495,14 +1510,15 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
                 if (l < 0)
                 {
                         if (noError)
-                               return false;
+                               return -1;
                         report_invalid_encoding(encoding, mbstr, len);
                 }
  
                 mbstr += l;
                 len -= l;
+               mb_len++;
         }
-       return true;
+       return mb_len;
  }
  
  /*
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 6ab543da779c9eac3c673739451ac4787f735157..ce010c56b164f6dfc21c8b9b0d029291bd7bd35c 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200709101
+#define CATALOG_VERSION_NO     200709181
  
  #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index c1bf8d357e84e76f975cc5426f8a42eabadcd91d..60a7a60babf9738ca86d4f8e0d041203392650df 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2232,13 +2232,19 @@ DESCR("encoding name of current database");
  DATA(insert OID = 810 (  pg_client_encoding    PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_ pg_client_encoding - _null_ _null_ ));
  DESCR("encoding name of current database");
  
-DATA(insert OID = 1717 (  convert                 PGNSP PGUID 12 1 0 f f t f s 2 25 "25 19" _null_ _null_ _null_ pg_convert - _null_ _null_ ));
+DATA(insert OID = 1713 (  length                  PGNSP PGUID 12 1 0 f f t f s 2 23 "17 19" _null_ _null_ _null_ length_in_encoding - _null_ _null_ ));
+DESCR("length of string in specified encoding");
+
+DATA(insert OID = 1714 (  convert_from            PGNSP PGUID 12 1 0 f f t f s 2 25 "17 19" _null_ _null_ _null_ pg_convert_from - _null_ _null_ ));
+DESCR("convert string with specified source encoding name");
+
+DATA(insert OID = 1717 (  convert_to              PGNSP PGUID 12 1 0 f f t f s 2 17 "25 19" _null_ _null_ _null_ pg_convert_to - _null_ _null_ ));
  DESCR("convert string with specified destination encoding name");
  
-DATA(insert OID = 1813 (  convert                 PGNSP PGUID 12 1 0 f f t f s 3 25 "25 19 19" _null_ _null_ _null_    pg_convert2 - _null_ _null_ ));
+DATA(insert OID = 1813 (  convert                 PGNSP PGUID 12 1 0 f f t f s 3 17 "17 19 19" _null_ _null_ _null_ pg_convert - _null_ _null_ ));
  DESCR("convert string with specified encoding names");
  
-DATA(insert OID = 1619 (  convert_using    PGNSP PGUID 12 1 0 f f t f s 2 25 "25 25" _null_ _null_ _null_  pg_convert_using - _null_ _null_ ));
+DATA(insert OID = 1619 (  convert_using    PGNSP PGUID 12 1 0 f f t f s 2 17 "25 25" _null_ _null_ _null_ pg_convert_using - _null_ _null_ ));
  DESCR("convert string with specified conversion name");
  
  DATA(insert OID = 1264 (  pg_char_to_encoding     PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_  PG_char_to_encoding - _null_ _null_ ));
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h

index 34cc62e1084f5ffda2f3ab4036340563989e95be..8d151749e69120985eeef9751a46da2dbcccaacf 100644 (file)
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -372,6 +372,8 @@ extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
  extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
  extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
                                 bool noError);
+extern int pg_verify_mbstr_len(int encoding, const char *mbstr, int len,
+                               bool noError);
  
  extern void report_invalid_encoding(int encoding, const char *mbstr, int len);
  extern void report_untranslatable_char(int src_encoding, int dest_encoding,
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h

index 13862d08a9b54333973bfd04d446766a818028e2..ffbcda772c0ec123d8370ddff24df7c2d3d532db 100644 (file)
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -902,7 +902,9 @@ extern Datum PG_char_to_encoding(PG_FUNCTION_ARGS);
  extern Datum PG_character_set_name(PG_FUNCTION_ARGS);
  extern Datum PG_character_set_id(PG_FUNCTION_ARGS);
  extern Datum pg_convert(PG_FUNCTION_ARGS);
-extern Datum pg_convert2(PG_FUNCTION_ARGS);
+extern Datum pg_convert_to(PG_FUNCTION_ARGS);
+extern Datum pg_convert_from(PG_FUNCTION_ARGS);
+extern Datum length_in_encoding(PG_FUNCTION_ARGS);
  
  /* format_type.c */
  extern Datum format_type(PG_FUNCTION_ARGS);
author	Andrew Dunstan <andrew@dunslane.net>
	Tue, 18 Sep 2007 17:41:17 +0000 (17:41 +0000)
committer	Andrew Dunstan <andrew@dunslane.net>
	Tue, 18 Sep 2007 17:41:17 +0000 (17:41 +0000)
doc/src/sgml/func.sgml		patch \| blob \| blame \| history
src/backend/catalog/pg_conversion.c		patch \| blob \| blame \| history
src/backend/utils/adt/oracle_compat.c		patch \| blob \| blame \| history
src/backend/utils/mb/mbutils.c		patch \| blob \| blame \| history
src/backend/utils/mb/wchar.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.h		patch \| blob \| blame \| history
src/include/mb/pg_wchar.h		patch \| blob \| blame \| history
src/include/utils/builtins.h		patch \| blob \| blame \| history