From 4242b9f91ba101348bd2dce1046ed4693dce4d67 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sat, 24 Dec 2005 10:40:55 +0000 Subject: [PATCH] Fix long standing Asian multibyte charsets bug. See: Subject: [HACKERS] bugs with certain Asian multibyte charsets From: Tatsuo Ishii To: pgsql-hackers@postgresql.org Date: Sat, 24 Dec 2005 18:25:33 +0900 (JST) for more details. --- src/backend/utils/mb/wchar.c | 43 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index de40ba60b7..c8dfb143a7 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -52,7 +52,6 @@ pg_ascii_mblen(const unsigned char *s) /* * EUC */ - static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) { @@ -60,26 +59,26 @@ static int pg_euc2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 2) + if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte KANA") */ { from++; - *to = 0xff & *from++; + *to = (SS2 << 8) | *from++; len -= 2; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* JIS X 0208 KANJI */ { *to = *from++ << 8; *to |= *from++; len -= 2; } - else + else /* must be ASCII */ { *to = *from++; len--; @@ -139,6 +138,7 @@ pg_euckr_mblen(const unsigned char *s) /* * EUC_CN + * */ static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) @@ -147,21 +147,21 @@ static int pg_euccn2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 3) + if (*from == SS2 && len >= 3) /* code set 2 (unused?) */ { from++; - *to = 0x3f00 & (*from++ << 8); - *to = *from++; + *to = (SS2 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* code set 3 (unsed ?) */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* code set 1 */ { *to = *from++ << 8; *to |= *from++; @@ -193,6 +193,7 @@ pg_euccn_mblen(const unsigned char *s) /* * EUC_TW + * */ static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) @@ -201,22 +202,22 @@ static int pg_euctw2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 4) + if (*from == SS2 && len >= 4) /* code set 2 */ { from++; - *to = *from++ << 16; + *to = (SS2 << 24) | (*from++ << 16) ; *to |= *from++ << 8; *to |= *from++; len -= 4; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* code set 2 */ { *to = *from++ << 8; *to |= *from++; -- 2.39.5