From: Hiroshi Inoue Date: Wed, 26 Apr 2017 02:10:14 +0000 (+0900) Subject: Reorganize the language conversion between the current locale and unicode. X-Git-Tag: REL-09_06_0400~50 X-Git-Url: http://waps.l3s.uni-hannover.de/gitweb/queryCache.php?a=commitdiff_plain;h=5eeb9345c7f7c67fd1530db779e188e40ba73273;p=psqlodbc.git Reorganize the language conversion between the current locale and unicode. Support the platforms e.g. FreeBSD which satisfies __STDC_UTF_16__ (but not satify __STDC_ISO_10646__) using mbrtoc16() and c16rtomb(). Use the functions bindpara_msg_to_utf8(), bindpara_wchar_to_msg(), bindcol_hybrid_estimate(), bindcol_hybrid_exec(), bindcol_localize_estimate() and bindcol_localize_exec() which hide the difference of platforms instead of platform specific functions. They are implemented using already implemented wstrtomsg(), msgtowstr(), wcs_to_utf8(), utf8_to_wcs_lf() and newly implemented mbstoc16_lf(), c16tomsb(), ucs4_to_ucs2_lf(), ucs2_to_ucs4(). Add eucJP encoding case to wchar-char test. [Minor changes] Add conversion error check between unicode and the current locale. Rename buf to send_buf in ResolveOneParam() so that grep or find can them find easily. Separate "unicode_support.h" from "psqlodbc.h" Change convert_linefeeds() from static to external. Add AC_CHECK_FUNCS(mbrtoc16 c16rtomb) to Makefile.am. Remove win_unicode.c from psqlsetup.vcxproj. --- diff --git a/configure.ac b/configure.ac index 2cc4ce8..b415672 100644 --- a/configure.ac +++ b/configure.ac @@ -239,7 +239,7 @@ AC_C_CONST # 7. Functions, global variables AC_FUNC_STRERROR_R -AC_CHECK_FUNCS(strtoul strtoll strlcat mbstowcs wcstombs) +AC_CHECK_FUNCS(strtoul strtoll strlcat mbstowcs wcstombs mbrtoc16 c16rtomb) if test "$enable_pthreads" = yes; then AC_CHECK_FUNCS(localtime_r strtok_r pthread_mutexattr_settype) diff --git a/convert.c b/convert.c index dec2198..6171fad 100644 --- a/convert.c +++ b/convert.c @@ -18,6 +18,7 @@ /* Multibyte support Eiji Tokuya 2001-03-15 */ #include "convert.h" +#include "unicode_support.h" #include "misc.h" #ifdef WIN32 #include @@ -167,7 +168,7 @@ typedef struct static const char *mapFunction(const char *func, int param_count); static BOOL convert_money(const char *s, char *sout, size_t soutmax); static char parse_datetime(const char *buf, SIMPLE_TIME *st); -static size_t convert_linefeeds(const char *s, char *dst, size_t max, BOOL convlf, BOOL *changed); +size_t convert_linefeeds(const char *s, char *dst, size_t max, BOOL convlf, BOOL *changed); static size_t convert_from_pgbinary(const char *value, char *rgbValue, SQLLEN cbValueMax); static int convert_lo(StatementClass *stmt, const void *value, SQLSMALLINT fCType, PTR rgbValue, SQLLEN cbValueMax, SQLLEN *pcbValue); @@ -852,49 +853,22 @@ static int effective_fraction(int fraction, int *width) gdata SC_get_GDTI(stmt) current_col stmt->current_col */ + static int convert_text_field_to_sql_c(GetDataInfo *gdata, int current_col, const char *neut_str, OID field_type, SQLSMALLINT fCType, char *rgbValueBindRow, SQLLEN cbValueMax, const ConnectionClass *conn, SQLLEN *length_return) { - BOOL hex_bin_format = FALSE, changed = FALSE; + BOOL bytea_bad_format = FALSE; int result = COPY_OK; - SQLLEN len = *length_return; + SQLLEN len = (-2); GetDataClass *pgdc; - int copy_len = 0, needbuflen = 0, len_for_wcs_term = 0, i; + int copy_len = 0, needbuflen = 0, i; const char *ptr; - BOOL lf_conv = conn->connInfo.lf_conversion; #ifdef UNICODE_SUPPORT - BOOL wcs_debug = conn->connInfo.wcs_debug; - ssize_t wstrlen = 0; - wchar_t *allocbuf = NULL; - int wcstype = 0, ucount = 0; - BOOL localize_needed = FALSE; - BOOL same_encoding = (conn->ccsc == pg_CS_code(conn->locale_encoding)); - BOOL is_utf8 = (UTF8 == conn->ccsc); - BOOL hybrid = FALSE; - - wcstype = get_wcstype(); - switch (field_type) - { - case PG_TYPE_UNKNOWN: - case PG_TYPE_BPCHAR: - case PG_TYPE_VARCHAR: - case PG_TYPE_TEXT: - case PG_TYPE_BPCHARARRAY: - case PG_TYPE_VARCHARARRAY: - case PG_TYPE_TEXTARRAY: - if (SQL_C_CHAR == fCType || SQL_C_BINARY == fCType) - localize_needed = ((!same_encoding || wcs_debug) && - wcstype > 0); - if (SQL_C_WCHAR == fCType) - hybrid = ((!is_utf8 || (same_encoding && wcs_debug)) && - wcstype > 0); - } - - mylog("%s:field_type=%u type=%d is_utf8=%d same_encoding=%d localize=%d\n", __FUNCTION__, field_type, fCType, is_utf8, same_encoding, localize_needed); -#else - mylog("%s:field_type=%u type=%dd\n", __FUNCTION__, field_type, fCType); + char *allocbuf = NULL; #endif /* UNICODE_SUPPORT */ + mylog("%s:field_type=%u type=%d\n", __FUNCTION__, field_type, fCType); + switch (field_type) { case PG_TYPE_FLOAT4: @@ -903,11 +877,10 @@ convert_text_field_to_sql_c(GetDataInfo *gdata, int current_col, const char *neu set_client_decimal_point((char *) neut_str); break; case PG_TYPE_BYTEA: - if (0 == strnicmp(neut_str, "\\x", 2)) - { - hex_bin_format = TRUE; + if (0 == strnicmp(neut_str, "\\x", 2)) /* hex format */ neut_str += 2; - } + else + bytea_bad_format = TRUE; break; } @@ -920,62 +893,93 @@ convert_text_field_to_sql_c(GetDataInfo *gdata, int current_col, const char *neu pgdc = &gdata->gdata[current_col]; if (pgdc->data_left < 0) { - if (PG_TYPE_BYTEA == field_type) - { - if (hex_bin_format) - len = strlen(neut_str); - else - { - len = convert_from_pgbinary(neut_str, NULL, 0); - len *= 2; - } - changed = TRUE; + BOOL lf_conv = conn->connInfo.lf_conversion; + BOOL already_processed = FALSE; + BOOL changed = FALSE; + int len_for_wcs_term = 0; + #ifdef UNICODE_SUPPORT - ucount = len; - if (fCType == SQL_C_WCHAR) - len *= WCLEN; + int unicode_count = -1; + BOOL localize_needed = FALSE; + BOOL hybrid = FALSE; #endif /* UNICODE_SUPPORT */ + + /* process bad bytea format first */ + if (bytea_bad_format) + { + len = convert_from_pgbinary(neut_str, NULL, 0) * 2; + already_processed = changed = TRUE; } - else + #ifdef UNICODE_SUPPORT - if (fCType == SQL_C_WCHAR) + if (!already_processed && get_convtype() > 0) /* coversion between the current locale is available */ + { + BOOL wcs_debug = conn->connInfo.wcs_debug; + BOOL same_encoding = (conn->ccsc == pg_CS_code(conn->locale_encoding)); + BOOL is_utf8 = (UTF8 == conn->ccsc); + + switch (field_type) + { + case PG_TYPE_UNKNOWN: + case PG_TYPE_BPCHAR: + case PG_TYPE_VARCHAR: + case PG_TYPE_TEXT: + case PG_TYPE_BPCHARARRAY: + case PG_TYPE_VARCHARARRAY: + case PG_TYPE_TEXTARRAY: + if (SQL_C_CHAR == fCType || SQL_C_BINARY == fCType) + localize_needed = (!same_encoding || wcs_debug); + if (SQL_C_WCHAR == fCType) + hybrid = (!is_utf8 || (same_encoding && wcs_debug)); + } + mylog("%s:localize=%d hybrid=%d is_utf8=%d same_encoding=%d wcs_debug=%d\n", __FUNCTION__, localize_needed, hybrid, is_utf8, same_encoding, wcs_debug); + } + if (already_processed) /* skip */ + ; + else if (fCType == SQL_C_WCHAR) { if (hybrid) { mylog("%s:hybrid estimate\n", __FUNCTION__); - ucount = msgtowstr(neut_str, NULL, 0); - if (WCSTYPE_UTF16_LE != wcstype) + if ((unicode_count = bindcol_hybrid_estimate(neut_str, lf_conv, &allocbuf)) < 0) { - wchar_t *alw; - char *alc; - - alw = (wchar_t *) malloc(sizeof(wchar_t) * (ucount + 1)); - ucount = msgtowstr(neut_str, alw, ucount + 1); - alc = wcs_to_utf8(alw, SQL_NTS, NULL, FALSE); - free(alw); - ucount = utf8_to_ucs2_lf(alc, SQL_NTS, lf_conv, NULL, 0, FALSE); - allocbuf = (wchar_t *) alc; + result = COPY_INVALID_STRING_CONVERSION; + goto cleanup; } } else /* normally */ { - ucount = utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, NULL, 0, FALSE); + unicode_count = utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, NULL, 0, FALSE); } - len = WCLEN * ucount; - changed = TRUE; + len = WCLEN * unicode_count; + already_processed = changed = TRUE; } else if (localize_needed) { - wstrlen = utf8_to_wcs_lf(neut_str, SQL_NTS, lf_conv, NULL, 0, FALSE); - allocbuf = (wchar_t *) malloc(sizeof(wchar_t) * (wstrlen + 1)); - wstrlen = utf8_to_wcs_lf(neut_str, SQL_NTS, lf_conv, allocbuf, wstrlen + 1, FALSE); - len = wstrtomsg(allocbuf, NULL, 0); - changed = TRUE; + if ((len = bindcol_localize_estimate(neut_str, lf_conv, &allocbuf)) < 0) + { + result = COPY_INVALID_STRING_CONVERSION; + goto cleanup; + } + already_processed = changed = TRUE; } - else #endif /* UNICODE_SUPPORT */ + + if (!already_processed) /* not yet processed */ /* convert linefeeds to carriage-return/linefeed */ len = convert_linefeeds(neut_str, NULL, 0, lf_conv, &changed); + +#ifdef UNICODE_SUPPORT + if (fCType == SQL_C_WCHAR) + { + if (unicode_count < 0) + { + unicode_count = len; + len *= WCLEN; + } + } +#endif /* UNICODE_SUPPORT */ + /* just returns length info */ if (cbValueMax == 0) { @@ -1018,10 +1022,12 @@ convert_text_field_to_sql_c(GetDataInfo *gdata, int current_col, const char *neu pgdc->ttlbuf = realloc(pgdc->ttlbuf, needbuflen + len_for_wcs_term); pgdc->ttlbuflen = needbuflen; } + + already_processed = FALSE; #ifdef UNICODE_SUPPORT if (fCType == SQL_C_WCHAR) { - if (PG_TYPE_BYTEA == field_type && !hex_bin_format) + if (bytea_bad_format) { len = convert_from_pgbinary(neut_str, pgdc->ttlbuf, pgdc->ttlbuflen); len = pg_bin2whex(pgdc->ttlbuf, (SQLWCHAR *) pgdc->ttlbuf, len); @@ -1029,45 +1035,38 @@ convert_text_field_to_sql_c(GetDataInfo *gdata, int current_col, const char *neu else { if (!hybrid) /* normally */ - utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, (SQLWCHAR *) pgdc->ttlbuf, ucount, FALSE); + utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, (SQLWCHAR *) pgdc->ttlbuf, unicode_count, FALSE); else /* hybrid */ { mylog("%s:hybrid convert\n", __FUNCTION__); - if (WCSTYPE_UTF16_LE == wcstype) - msgtowstr(neut_str, (wchar_t *) pgdc->ttlbuf, (int) ucount + 1); - else + if (bindcol_hybrid_exec((SQLWCHAR *) pgdc->ttlbuf, neut_str, unicode_count + 1, lf_conv, &allocbuf) < 0) { - utf8_to_ucs2_lf((char *) allocbuf, SQL_NTS, lf_conv, (SQLWCHAR *) pgdc->ttlbuf, ucount, FALSE); + result = COPY_INVALID_STRING_CONVERSION; + goto cleanup; } } } + already_processed = TRUE; } - else -#endif /* UNICODE_SUPPORT */ - if (PG_TYPE_BYTEA == field_type) + else if (localize_needed) { - if (hex_bin_format) + if (bindcol_localize_exec(pgdc->ttlbuf, len + 1, lf_conv, &allocbuf) < 0) { - len = strlen(neut_str); - strncpy_null(pgdc->ttlbuf, neut_str, pgdc->ttlbuflen); - } - else - { - len = convert_from_pgbinary(neut_str, pgdc->ttlbuf, pgdc->ttlbuflen); - len = pg_bin2hex(pgdc->ttlbuf, pgdc->ttlbuf, len); + result = COPY_INVALID_STRING_CONVERSION; + goto cleanup; } + already_processed = TRUE; } - else -#ifdef UNICODE_SUPPORT - if (localize_needed) +#endif /* UNICODE_SUPPORT */ + + if (already_processed) + ; + else if (bytea_bad_format) { - len = wstrtomsg(allocbuf, pgdc->ttlbuf, (int) pgdc->ttlbuflen + len_for_wcs_term); - pgdc->ttlbuflen = len; - free(allocbuf); - allocbuf = NULL; + len = convert_from_pgbinary(neut_str, pgdc->ttlbuf, pgdc->ttlbuflen); + len = pg_bin2hex(pgdc->ttlbuf, pgdc->ttlbuf, len); } else -#endif /* UNICODE_SUPPORT */ convert_linefeeds(neut_str, pgdc->ttlbuf, pgdc->ttlbuflen, lf_conv, &changed); ptr = pgdc->ttlbuf; pgdc->ttlbufused = len; @@ -1626,15 +1625,9 @@ inolog("2stime fr=%d\n", std_time.fr); default: pre_convert = FALSE; } - switch (pre_convert) - { - case TRUE: - neut_str = midtemp; - /* fall through */ - default: - result = convert_text_field_to_sql_c(gdata, stmt->current_col, neut_str, field_type, fCType, rgbValueBindRow, cbValueMax, conn, &len); - } - + if (pre_convert) + neut_str = midtemp; + result = convert_text_field_to_sql_c(gdata, stmt->current_col, neut_str, field_type, fCType, rgbValueBindRow, cbValueMax, conn, &len); } else { @@ -4112,7 +4105,9 @@ ResolveOneParam(QueryBuild *qb, QueryParse *qp, BOOL *isnull, BOOL *isbinary, struct tm tm; #endif /* HAVE_LOCALTIME_R */ SQLLEN used; - char *buffer, *buf, *allocbuf = NULL, *lastadd = NULL; + char *send_buf; + + char *buffer, *allocbuf = NULL, *lastadd = NULL; OID lobj_oid; int lobj_fd; SQLULEN offset = apdopts->param_offset_ptr ? *apdopts->param_offset_ptr : 0; @@ -4132,13 +4127,6 @@ ResolveOneParam(QueryBuild *qb, QueryParse *qp, BOOL *isnull, BOOL *isbinary, BOOL final_binary_convert = FALSE; RETCODE retval = SQL_ERROR; -#ifdef UNICODE_SUPPORT - BOOL wcs_debug = conn->connInfo.wcs_debug; - BOOL is_utf8 = (UTF8 == conn->ccsc); - BOOL same_encoding = (conn->ccsc == pg_CS_code(conn->locale_encoding)); - int wcstype = get_wcstype(); -#endif - *isnull = FALSE; *isbinary = FALSE; *pgType = 0; @@ -4355,7 +4343,7 @@ inolog("ipara=%p paramType=%d %d proc_return=%d\n", ipara, ipara ? ipara->paramT #endif } - allocbuf = buf = NULL; + allocbuf = send_buf = NULL; param_string[0] = '\0'; cbuf[0] = '\0'; memset(&st, 0, sizeof(st)); @@ -4363,54 +4351,64 @@ inolog("ipara=%p paramType=%d %d proc_return=%d\n", ipara, ipara ? ipara->paramT ivstruct = (SQL_INTERVAL_STRUCT *) buffer; /* Convert input C type to a neutral format */ +#ifdef UNICODE_SUPPORT + if (get_convtype() > 0) /* coversion between the current locale is available */ + { + BOOL wcs_debug = conn->connInfo.wcs_debug; + BOOL is_utf8 = (UTF8 == conn->ccsc); + BOOL same_encoding = (conn->ccsc == pg_CS_code(conn->locale_encoding)); + + switch (param_ctype) + { + case SQL_C_CHAR: + if (!same_encoding || wcs_debug) + { + mylog("%s:locale param convert\n", __FUNCTION__); + if ((used = bindpara_msg_to_utf8(buffer, &allocbuf)) < 0) + { + qb->errormsg = "Could not convert from the current locale to wide characters"; + qb->errornumber = STMT_EXEC_ERROR; + retval = SQL_ERROR; + goto cleanup; + } + send_buf = allocbuf; + } + break; + case SQL_C_WCHAR: + if (!is_utf8 || (same_encoding && wcs_debug)) + { + mylog("%s:hybrid param convert\n", __FUNCTION__); + if ((used = bindpara_wchar_to_msg((SQLWCHAR *) buffer, &allocbuf)) < 0) + { + qb->errormsg = "Could not convert from wide characters to the current locale"; + qb->errornumber = STMT_EXEC_ERROR; + retval = SQL_ERROR; + goto cleanup; + } + send_buf = allocbuf; + } + break; + } + } +#endif /* UNICODE_SUPPORT */ switch (param_ctype) { case SQL_C_BINARY: - buf = buffer; + send_buf = buffer; break; case SQL_C_CHAR: -#ifdef UNICODE_SUPPORT - if ((!same_encoding || wcs_debug) && - wcstype > 0) - { - if (SQL_NTS == used) - used = strlen(buffer); - allocbuf = malloc(sizeof(wchar_t) * (used + 1)); - if (allocbuf) - { - used = msgtowstr(buffer, (wchar_t *) allocbuf, used + 1); - buf = wcs_to_utf8((const wchar_t *) allocbuf, used, &used, FALSE); - free(allocbuf); - allocbuf = buf; - } - } - else -#endif /* UNICODE_SUPPORT */ - buf = buffer; + if (NULL == send_buf) + send_buf = buffer; break; #ifdef UNICODE_SUPPORT case SQL_C_WCHAR: mylog(" %s:C_WCHAR=%d contents=%s(%d)\n", __FUNCTION__, param_ctype, buffer, used); - allocbuf = ucs2_to_utf8((SQLWCHAR *) buffer, used > 0 ? used / WCLEN : used, &used, FALSE); - if ((!is_utf8 || (same_encoding && wcs_debug)) && - wcstype > 0) /* hybrid case */ + if (NULL == send_buf) { - ssize_t wstrlen = ucs2strlen((SQLWCHAR *) buffer); - size_t allen = (wstrlen + 1) * sizeof(wchar_t); - wchar_t *al1; - - mylog("%s:hybrid convert wstrlen=%d\n", __FUNCTION__, wstrlen); - al1 = (wchar_t *) malloc(allen); - wstrlen = utf8_to_wcs_lf(allocbuf, SQL_NTS, FALSE, al1, allen, FALSE); - free(allocbuf); - allen = 4 * wstrlen; - allocbuf = malloc(allen + 1); - used = wstrtomsg(al1, allocbuf, allen + 1); + allocbuf = ucs2_to_utf8((SQLWCHAR *) buffer, used > 0 ? used / WCLEN : used, &used, FALSE); + send_buf = allocbuf; } - buf = allocbuf; - - /* used *= WCLEN; */ break; #endif /* UNICODE_SUPPORT */ @@ -4642,16 +4640,16 @@ mylog(" %s:C_WCHAR=%d contents=%s(%d)\n", __FUNCTION__, param_ctype, buffer, use */ /* Special handling NULL string For FOXPRO */ -mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date_string, param_pgtype, buf); +mylog("cvt_null_date_string=%d pgtype=%d send_buf=%p\n", conn->connInfo.cvt_null_date_string, param_pgtype, send_buf); if (conn->connInfo.cvt_null_date_string > 0 && (PG_TYPE_DATE == param_pgtype || PG_TYPE_DATETIME == param_pgtype || PG_TYPE_TIMESTAMP_NO_TMZONE == param_pgtype) && - NULL != buf && + NULL != send_buf && ( - (SQL_C_CHAR == param_ctype && '\0' == buf[0]) + (SQL_C_CHAR == param_ctype && '\0' == send_buf[0]) #ifdef UNICODE_SUPPORT - || (SQL_C_WCHAR ==param_ctype && '\0' == buf[0] && '\0' == buf[1]) + || (SQL_C_WCHAR ==param_ctype && '\0' == send_buf[0] && '\0' == send_buf[1]) #endif /* UNICODE_SUPPORT */ )) { @@ -4666,18 +4664,18 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date * We now have the value we want to print in one of these three canonical * formats: * - * 1. As a string in 'buf', with length indicated by 'used' (can be + * 1. As a string in 'send_buf', with length indicated by 'used' (can be * SQL_NTS). * 2. As a null-terminated string in 'param_string'. * 3. Time-related fields in 'st'. */ /* - * For simplicity, fold the param_string representation into 'buf'. + * For simplicity, fold the param_string representation into 'send_buf'. */ - if (!buf && param_string[0]) + if (!send_buf && param_string[0]) { - buf = param_string; + send_buf = param_string; used = SQL_NTS; } @@ -4709,34 +4707,34 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date * with -1, like "-1234" or "-1foobar". Is that * intentional? */ - if (NULL != buf && '-' == buf[0] && '1' == buf[1]) + if (NULL != send_buf && '-' == send_buf[0] && '1' == send_buf[1]) { - buf = "1"; + send_buf = "1"; used = 1; } break; case PG_TYPE_FLOAT4: case PG_TYPE_FLOAT8: case PG_TYPE_NUMERIC: - if (NULL != buf) - set_server_decimal_point(buf, used); + if (NULL != send_buf) + set_server_decimal_point(send_buf, used); break; } - if (!buf) + if (!send_buf) { /* it was date,time,timestamp -- use m,d,y,hh,mm,ss */ snprintf(tmp, sizeof(tmp), "%.4d-%.2d-%.2d %.2d:%.2d:%.2d", st.y, st.m, st.d, st.hh, st.mm, st.ss); - buf = tmp; + send_buf = tmp; used = SQL_NTS; } break; case SQL_DATE: case SQL_TYPE_DATE: /* 91 */ - if (buf) + if (send_buf) { /* copy char data to time */ - my_strcpy(cbuf, sizeof(cbuf), buf, used); + my_strcpy(cbuf, sizeof(cbuf), send_buf, used); parse_datetime(cbuf, &st); } @@ -4745,15 +4743,15 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date else sprintf(tmp, "%.4d-%.2d-%.2d", st.y, st.m, st.d); lastadd = "::date"; - buf = tmp; + send_buf = tmp; used = SQL_NTS; break; case SQL_TIME: case SQL_TYPE_TIME: /* 92 */ - if (buf) + if (send_buf) { /* copy char data to time */ - my_strcpy(cbuf, sizeof(cbuf), buf, used); + my_strcpy(cbuf, sizeof(cbuf), send_buf, used); parse_datetime(cbuf, &st); } @@ -4766,15 +4764,15 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date else sprintf(tmp, "%.2d:%.2d:%.2d", st.hh, st.mm, st.ss); lastadd = "::time"; - buf = tmp; + send_buf = tmp; used = SQL_NTS; break; case SQL_TIMESTAMP: case SQL_TYPE_TIMESTAMP: /* 93 */ - if (buf) + if (send_buf) { - my_strcpy(cbuf, sizeof(cbuf), buf, used); + my_strcpy(cbuf, sizeof(cbuf), send_buf, used); parse_datetime(cbuf, &st); } @@ -4785,7 +4783,7 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date /* Time zone stuff is unreliable */ stime2timestamp(&st, tmp, sizeof(tmp), USE_ZONE, 6); lastadd = "::timestamp"; - buf = tmp; + send_buf = tmp; used = SQL_NTS; break; @@ -4803,14 +4801,14 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date switch (used) { case SQL_NTS: - used = strlen(buf); + used = strlen(send_buf); break; } allocbuf = malloc(used / 2 + 1); if (allocbuf) { - pg_hex2bin(buf, allocbuf, used); - buf = allocbuf; + pg_hex2bin(send_buf, allocbuf, used); + send_buf = allocbuf; used /= 2; } break; @@ -4917,7 +4915,7 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date */ sprintf(param_string, "%u", lobj_oid); lastadd = "::lo"; - buf = param_string; + send_buf = param_string; used = SQL_NTS; break; @@ -4928,12 +4926,12 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date /* must be quoted (0 or 1 is ok to use inside the quotes) */ case SQL_REAL: - set_server_decimal_point(buf, used); + set_server_decimal_point(send_buf, used); lastadd = "::float4"; break; case SQL_FLOAT: case SQL_DOUBLE: - set_server_decimal_point(buf, used); + set_server_decimal_point(send_buf, used); lastadd = "::float8"; break; case SQL_NUMERIC: @@ -4949,7 +4947,7 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date * a query like "SELECT 0-?" would turn into "SELECT 0--123". */ case SQL_INTEGER: - if (valid_int_literal(buf, used, &negative)) + if (valid_int_literal(send_buf, used, &negative)) { need_quotes = FALSE; add_parens = negative; @@ -4965,7 +4963,7 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date } break; case SQL_SMALLINT: - if (valid_int_literal(buf, used, &negative)) + if (valid_int_literal(send_buf, used, &negative)) { need_quotes = FALSE; add_parens = negative; @@ -4978,17 +4976,17 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date } if (used == SQL_NTS) - used = strlen(buf); + used = strlen(send_buf); /* - * Ok, we now have the final string representation in 'buf', length 'used'. + * Ok, we now have the final string representation in 'send_buf', length 'used'. * We're ready to output the final string, with quotes and other * embellishments if necessary. * * In bind-mode, we don't need to do any quoting. */ if (req_bind) - CVT_APPEND_DATA(qb, buf, used); + CVT_APPEND_DATA(qb, send_buf, used); else { if (add_parens) @@ -5001,14 +4999,14 @@ mylog("cvt_null_date_string=%d pgtype=%d buf=%p\n", conn->connInfo.cvt_null_date CVT_APPEND_CHAR(qb, LITERAL_QUOTE); if (final_binary_convert) - CVT_APPEND_BINARY(qb, buf, used); + CVT_APPEND_BINARY(qb, send_buf, used); else - CVT_SPECIAL_CHARS(qb, buf, used); + CVT_SPECIAL_CHARS(qb, send_buf, used); CVT_APPEND_CHAR(qb, LITERAL_QUOTE); } else - CVT_APPEND_DATA(qb, buf, used); + CVT_APPEND_DATA(qb, send_buf, used); if (add_parens) CVT_APPEND_CHAR(qb, ')'); @@ -5601,7 +5599,7 @@ parse_datetime(const char *buf, SIMPLE_TIME *st) /* Change linefeed to carriage-return/linefeed */ -static size_t +size_t convert_linefeeds(const char *si, char *dst, size_t max, BOOL convlf, BOOL *changed) { size_t i = 0, diff --git a/convert.h b/convert.h index 3411cee..dec0769 100644 --- a/convert.h +++ b/convert.h @@ -21,6 +21,7 @@ extern "C" { #define COPY_RESULT_TRUNCATED 3 #define COPY_GENERAL_ERROR 4 #define COPY_NO_DATA_FOUND 5 +#define COPY_INVALID_STRING_CONVERSION 6 int copy_and_convert_field_bindinfo(StatementClass *stmt, OID field_type, int atttypmod, void *value, int col); int copy_and_convert_field(StatementClass *stmt, diff --git a/info.c b/info.c index 07933e1..333a200 100644 --- a/info.c +++ b/info.c @@ -17,6 +17,7 @@ */ #include "psqlodbc.h" +#include "unicode_support.h" #include #include diff --git a/odbcapi30w.c b/odbcapi30w.c index 0082bb2..87592f8 100644 --- a/odbcapi30w.c +++ b/odbcapi30w.c @@ -13,6 +13,7 @@ */ #include "psqlodbc.h" +#include "unicode_support.h" #include #include diff --git a/odbcapiw.c b/odbcapiw.c index 452ca79..526ba00 100644 --- a/odbcapiw.c +++ b/odbcapiw.c @@ -18,6 +18,7 @@ */ #include "psqlodbc.h" +#include "unicode_support.h" #include #include diff --git a/options.c b/options.c index 3c03e0e..6406db7 100644 --- a/options.c +++ b/options.c @@ -14,6 +14,7 @@ */ #include "psqlodbc.h" +#include "unicode_support.h" #include #include "misc.h" diff --git a/psqlodbc.h b/psqlodbc.h index bbb264d..220917c 100644 --- a/psqlodbc.h +++ b/psqlodbc.h @@ -265,7 +265,10 @@ extern int posix_snprintf(char *buf, size_t size, const char *format, ...); #define DRIVER_ODBC_VER "03.51" -#ifndef UNICODE_SUPPORT +#ifdef UNICODE_SUPPORT +#define WCLEN sizeof(SQLWCHAR) +SQLULEN ucs2strlen(const SQLWCHAR *); +#else #undef SQL_WCHAR #undef SQL_WVARCHAR #undef SQL_WLONGVARCHAR @@ -618,23 +621,6 @@ void CC_copy_conninfo(ConnInfo *ci, const ConnInfo *sci); #ifdef POSIX_THREADMUTEX_SUPPORT const pthread_mutexattr_t *getMutexAttr(void); #endif /* POSIX_THREADMUTEX_SUPPORT */ -#ifdef UNICODE_SUPPORT -#define WCLEN sizeof(SQLWCHAR) -enum { - WCSTYPE_UNKNOWN - ,WCSTYPE_UTF16_LE - ,WCSTYPE_UTF32_LE - }; -SQLULEN ucs2strlen(const SQLWCHAR *ucs2str); -char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL tolower); -SQLULEN utf8_to_ucs2_lf(const char * utf8str, SQLLEN ilen, BOOL lfconv, SQLWCHAR *ucs2str, SQLULEN buflen, BOOL errcheck); -int get_wcstype(void); -int msgtowstr(const char *, wchar_t *, int); -int wstrtomsg(const wchar_t *, char *, int); -char *wcs_to_utf8(const wchar_t *wcsstr, SQLLEN ilen, SQLLEN *olen, BOOL tolower); -SQLULEN utf8_to_wcs_lf(const char * utf8str, SQLLEN ilen, BOOL lfconv, wchar_t *wcsstr, SQLULEN buflen, BOOL errcheck); -#define utf8_to_ucs2(utf8str, ilen, ucs2str, buflen) utf8_to_ucs2_lf(utf8str, ilen, FALSE, ucs2str, buflen, FALSE) -#endif /* UNICODE_SUPPORT */ /* Define a type for defining a constant string expression */ #ifndef CSTR diff --git a/results.c b/results.c index 316c017..d826bee 100644 --- a/results.c +++ b/results.c @@ -1116,6 +1116,11 @@ inolog("currT=%d base=%d rowset=%d\n", stmt->currTuple, QR_get_rowstart_in_cache result = SQL_SUCCESS_WITH_INFO; break; + case COPY_INVALID_STRING_CONVERSION: /* invalid string */ + SC_set_error(stmt, STMT_STRING_CONVERSION_ERROR, "invalid string conversion occured.", func); + result = SQL_ERROR; + break; + case COPY_GENERAL_ERROR: /* error msg already filled in */ result = SQL_ERROR; break; diff --git a/setup.c b/setup.c index e522129..2e55a38 100644 --- a/setup.c +++ b/setup.c @@ -468,7 +468,7 @@ test_connection(HANDLE hwnd, ConnInfo *ci, BOOL withDTC) makeConnectString(out_conn, ci, sizeof(out_conn)); mylog("conn_string=%s\n", out_conn); #ifdef UNICODE_SUPPORT - msgtowstr(out_conn, wout_conn, _countof(wout_conn)); + MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, out_conn, -1, wout_conn, sizeof(wout_conn) / sizeof(wout_conn[0])); conn_str = wout_conn; #else conn_str = out_conn; diff --git a/statement.c b/statement.c index 889dc91..69ee387 100644 --- a/statement.c +++ b/statement.c @@ -1729,6 +1729,11 @@ inolog("curt=%d\n", curt); result = SQL_SUCCESS_WITH_INFO; break; + case COPY_INVALID_STRING_CONVERSION: /* invalid string */ + SC_set_error(self, STMT_STRING_CONVERSION_ERROR, "invalid string conversion occured.", func); + result = SQL_ERROR; + break; + /* error msg already filled in */ case COPY_GENERAL_ERROR: result = SQL_ERROR; diff --git a/statement.h b/statement.h index 1e11e3a..d564c94 100644 --- a/statement.h +++ b/statement.h @@ -96,6 +96,7 @@ enum { ,STMT_INVALID_NULL_ARG ,STMT_NO_RESPONSE ,STMT_COMMUNICATION_ERROR + ,STMT_STRING_CONVERSION_ERROR }; /* statement types */ diff --git a/test/expected/wchar-char_3.out b/test/expected/wchar-char_3.out new file mode 100644 index 0000000..61c0738 --- /dev/null +++ b/test/expected/wchar-char_3.out @@ -0,0 +1,5 @@ +connected +EUCJP test +ANSI=»ä¤Ï°æ¾åÇî»Ë¤Ç¤¹¡£µ®Êý¤ÏÀÆÆ£¹À¤µ¤ó¤Ç¤¹¤Í¡© +U+79C1U+306FU+4E95U+4E0AU+535AU+53F2U+3067U+3059U+3002U+8CB4U+65B9U+306FU+6589U+85E4U+6D69U+3055U+3093U+3067U+3059U+306DU+FF1F +disconnecting diff --git a/test/src/wchar-char-test-eucjp.c b/test/src/wchar-char-test-eucjp.c new file mode 100644 index 0000000..17c9bbf --- /dev/null +++ b/test/src/wchar-char-test-eucjp.c @@ -0,0 +1,49 @@ + +static int eucjp_test(HSTMT hstmt) +{ + int rc; + + SQLLEN ind, cbParam, cbParam2; + unsigned char lovedt[100] = {0x95, 0x4e, 0x0a, 0x4e, 0x5a, 0x53, 0xf2, 0x53, 0x0, 0x0}; + SQLWCHAR wchar[100]; + SQLCHAR str[100]; + SQLCHAR chardt[100]; + SQLTCHAR query[] = _T("select '»ä¤Ï' || ?::text || '¤Ç¤¹¡£µ®Êý¤Ï' || ?::text || '¤µ¤ó¤Ç¤¹¤Í¡©'"); + + rc = SQLBindCol(hstmt, 1, SQL_C_CHAR, (SQLPOINTER) chardt, sizeof(chardt), &ind); + CHECK_STMT_RESULT(rc, "SQLBindCol to SQL_C_CHAR failed", hstmt); + + cbParam = SQL_NTS; + rc = SQLBindParameter(hstmt, 1, SQL_PARAM_INPUT, + SQL_C_WCHAR, /* value type */ + SQL_WCHAR, /* param type */ + sizeof(lovedt) / sizeof(lovedt[0]), /* column size */ + 0, /* dec digits */ + lovedt, // param1, /* param value ptr */ + sizeof(lovedt), /* buffer len */ + &cbParam /* StrLen_or_IndPtr */); + CHECK_STMT_RESULT(rc, "SQLBindParameter 1 failed", hstmt); + cbParam2 = SQL_NTS; + rc = SQLBindParameter(hstmt, 2, SQL_PARAM_INPUT, SQL_C_CHAR, SQL_CHAR, sizeof(str), 0, str, sizeof(str), &cbParam2); + CHECK_STMT_RESULT(rc, "SQLBindParameter 2 failed", hstmt); + strncpy((char *) str, "ÀÆÆ£¹À", sizeof(str)); + rc = SQLExecDirect(hstmt, query, SQL_NTS); + CHECK_STMT_RESULT(rc, "SQLExecDirect failed to return SQL_C_CHAR", hstmt); + while (SQL_SUCCEEDED(SQLFetch(hstmt))) + printf("ANSI=%s\n", chardt); + fflush(stdout); + SQLFreeStmt(hstmt, SQL_CLOSE); + + rc = SQLBindCol(hstmt, 1, SQL_C_WCHAR, (SQLPOINTER) wchar, sizeof(wchar) / sizeof(wchar[0]), &ind); + CHECK_STMT_RESULT(rc, "SQLBindCol to SQL_C_WCHAR failed", hstmt); + + + rc = SQLExecDirect(hstmt, query, SQL_NTS); + CHECK_STMT_RESULT(rc, "SQLExecDirect failed to return SQL_C_WCHAR", hstmt); + while (SQL_SUCCEEDED(rc = SQLFetch(hstmt))) + { + print_utf16_le(wchar); + } + + return rc; +} diff --git a/test/src/wchar-char-test.c b/test/src/wchar-char-test.c index a763706..32f37b5 100644 --- a/test/src/wchar-char-test.c +++ b/test/src/wchar-char-test.c @@ -37,10 +37,12 @@ print_utf16_le(const SQLWCHAR *wdt) #include "wchar-char-test-sjis.c" #include "wchar-char-test-utf8.c" +#include "wchar-char-test-eucjp.c" enum { SJIS_TEST ,UTF8_TEST + ,EUCJP_TEST }; int main(int argc, char **argv) @@ -58,6 +60,7 @@ int main(int argc, char **argv) ,{ "932", SJIS_TEST } ,{ "utf-8", UTF8_TEST } ,{ "utf8", UTF8_TEST } + ,{ "eucjp", EUCJP_TEST } }; loc = setlocale(LC_ALL, ""); @@ -102,6 +105,11 @@ int main(int argc, char **argv) fflush(stdout); rc = utf8_test(hstmt); break; + case EUCJP_TEST: + printf("EUCJP test\n"); + fflush(stdout); + rc = eucjp_test(hstmt); + break; } /* Clean up */ diff --git a/unicode_support.h b/unicode_support.h new file mode 100644 index 0000000..a349519 --- /dev/null +++ b/unicode_support.h @@ -0,0 +1,35 @@ +/* File: psqlodbc.h + * + * Description: This file contains defines and declarations that are related to + * the entire driver. + * + * Comments: See "readme.txt" for copyright and license information. + */ + +#ifndef __UNICODE_SUPPORT_H__ +#define __UNICODE_SUPPORT_H__ + +#include "psqlodbc.h" + +#ifdef UNICODE_SUPPORT +#define WCLEN sizeof(SQLWCHAR) +enum { + CONVTYPE_UNKNOWN + ,WCSTYPE_UTF16_LE + ,WCSTYPE_UTF32_LE + ,C16TYPE_UTF16_LE + }; +char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL tolower); +SQLULEN utf8_to_ucs2_lf(const char * utf8str, SQLLEN ilen, BOOL lfconv, SQLWCHAR *ucs2str, SQLULEN buflen, BOOL errcheck); +int get_convtype(void); +#define utf8_to_ucs2(utf8str, ilen, ucs2str, buflen) utf8_to_ucs2_lf(utf8str, ilen, FALSE, ucs2str, buflen, FALSE) + +SQLLEN bindcol_hybrid_estimate(const char *ldt, BOOL lf_conv, char **wcsbuf); +SQLLEN bindcol_hybrid_exec(SQLWCHAR *utf16, const char *ldt, size_t n, BOOL lf_conv, char **wcsbuf); +SQLLEN bindcol_localize_estimate(const char *utf8dt, BOOL lf_conv, char **wcsbuf); +SQLLEN bindcol_localize_exec(char *ldt, size_t n, BOOL lf_conv, char **wcsbuf); +SQLLEN bindpara_msg_to_utf8(const char *ldt, char **wcsbuf); +SQLLEN bindpara_wchar_to_msg(const SQLWCHAR *utf16, char **wcsbuf); +#endif /* UNICODE_SUPPORT */ + +#endif /* __UNICODE_SUPPORT_H__ */ diff --git a/win_unicode.c b/win_unicode.c index 203e880..bcddd47 100644 --- a/win_unicode.c +++ b/win_unicode.c @@ -9,11 +9,89 @@ #ifdef UNICODE_SUPPORT -#include "psqlodbc.h" +#include "unicode_support.h" #include #include #include +#if (defined(__STDC_ISO_10646__) && defined(HAVE_MBSTOWCS) && defined(HAVE_WCSTOMBS)) || defined(WIN32) +#define __WCS_ISO10646__ +static BOOL use_wcs = FALSE; +#endif + +#if (defined(__STDC_UTF_16__) && defined(HAVE_UCHAR_H) && defined(HAVE_MBRTOC16) && defined(HAVE_C16RTOMB)) +#define __CHAR16_UTF_16__ +#include +static BOOL use_c16 = FALSE; +#endif + +static int convtype = -1; + +int get_convtype(void) +{ + const UCHAR *cdt; + +#if defined(__WCS_ISO10646__) + if (convtype < 0) + { + wchar_t *wdt = L"a"; + int sizeof_w = sizeof(wchar_t); + + cdt = (UCHAR *) wdt; + switch (sizeof_w) + { + case 2: + if ('a' == cdt[0] && + '\0' == cdt[1] && + '\0' == cdt[2] && + '\0' == cdt[3]) + { + mylog(" %s:UTF-16LE detected\n", __FUNCTION__); + convtype = WCSTYPE_UTF16_LE; + use_wcs = TRUE; + } + break; + case 4: + if ('a' == cdt[0] && + '\0' == cdt[1] && + '\0' == cdt[2] && + '\0' == cdt[3] && + '\0' == cdt[4] && + '\0' == cdt[5] && + '\0' == cdt[6] && + '\0' == cdt[7]) + { + mylog(" %s:UTF32-LE detected\n", __FUNCTION__); + convtype = WCSTYPE_UTF32_LE; + use_wcs = TRUE; + } + break; + } + } +#endif /* __WCS_ISO10646__ */ +#ifdef __CHAR16_UTF_16__ + if (convtype < 0) + { + char16_t *c16dt = u"a"; + + cdt = (UCHAR *) c16dt; + if ('a' == cdt[0] && + '\0' == cdt[1] && + '\0' == cdt[2] && + '\0' == cdt[3]) + { + mylog(" %s:C16_UTF-16LE detected\n", __FUNCTION__); + convtype = C16TYPE_UTF16_LE; + use_c16 = TRUE; + } + } +#endif /* __CHAR16_UTF_16__ */ + if (convtype < 0) + convtype = CONVTYPE_UNKNOWN; /* unknown */ + return convtype; +} + + #define byte3check 0xfffff800 #define byte2_base 0x80c0 #define byte2_mask1 0x07c0 @@ -311,6 +389,8 @@ cleanup: } +#ifdef __WCS_ISO10646__ + /* UCS4 => utf8 */ #define byte4check 0xffff0000 #define byte4_check 0x10000 @@ -572,57 +652,122 @@ mylog(" %s:ocount=%d\n", __FUNCTION__, ocount); return rtn; } -static int wcstype = -1; +#define SURROGATE_CHECK 0xfc +#define SURROG1_BYTE 0xd8 +#define SURROG2_BYTE 0xdc -int get_wcstype(void) +static +int ucs4_to_ucs2_lf(const unsigned int *ucs4str, SQLLEN ilen, SQLWCHAR *ucs2str, int bufcount, BOOL lfconv) { - wchar_t *wdt; - int sizeof_w = sizeof(wchar_t); - const UCHAR *cdt; + int outlen = 0, i; + UCHAR *ucdt; + SQLWCHAR *sqlwdt, dmy_wchar; + UCHAR * const udt = (UCHAR *) &dmy_wchar; + unsigned int uintdt; - if (wcstype >= 0) - return wcstype; -#if defined(__STDC_ISO_10646__) || defined(WIN32) - wdt = L"a"; - cdt = (UCHAR *) wdt; - switch (sizeof_w) +mylog(" %s:ilen=%d bufcount=%d\n", __FUNCTION__, ilen, bufcount); + if (ilen < 0) + ilen = ucs4strlen(ucs4str); + for (i = 0; i < ilen && (uintdt = ucs4str[i]); i++) { - case 2: - if ('a' == cdt[0] && - '\0' == cdt[1] && - '\0' == cdt[2] && - '\0' == cdt[3]) + sqlwdt = (SQLWCHAR *)&uintdt; + ucdt = (UCHAR *)&uintdt; + if (0 == sqlwdt[1]) + { + if (lfconv && PG_LINEFEED == ucdt[0] && + (i == 0 || + PG_CARRIAGE_RETURN != *((UCHAR *)&ucs4str[i - 1])) + ) { - mylog(" %s:UTF-16LE detected\n", __FUNCTION__); - wcstype = WCSTYPE_UTF16_LE; + if (outlen < bufcount) + { + udt[0] = PG_CARRIAGE_RETURN; + udt[1] = 0; + ucs2str[outlen] = *((SQLWCHAR *) udt); + } + outlen++; } - break; - case 4: - if ('a' == cdt[0] && - '\0' == cdt[1] && - '\0' == cdt[2] && - '\0' == cdt[3] && - '\0' == cdt[4] && - '\0' == cdt[5] && - '\0' == cdt[6] && - '\0' == cdt[7]) + if (outlen < bufcount) + ucs2str[outlen] = sqlwdt[0]; + outlen++; + continue; + } + sqlwdt[1]--; + udt[0] = ((0xfc & ucdt[1]) >> 2) | ((0x3 & ucdt[2]) << 6); + // printf("%02x", udt[0]); + udt[1] = SURROG1_BYTE | ((0xc & ucdt[2]) >> 2); + // printf("%02x", udt[1]); + if (outlen < bufcount) + ucs2str[outlen] = *((SQLWCHAR *)udt); + outlen++; + udt[0] = ucdt[0]; + // printf("%02x", udt[0]); + udt[1] = SURROG2_BYTE | (0x3 & ucdt[1]); + // printf("%02x\n", udt[1]); + if (outlen < bufcount) + ucs2str[outlen] = *((SQLWCHAR *)udt); + outlen++; + } + if (outlen < bufcount) + ucs2str[outlen] = 0; + + return outlen; +} +static +int ucs2_to_ucs4(const SQLWCHAR *ucs2str, SQLLEN ilen, unsigned int *ucs4str, int bufcount) +{ + int outlen = 0, i; + UCHAR *ucdt; + SQLWCHAR sqlwdt; + unsigned int dmy_uint; + UCHAR * const udt = (UCHAR *) &dmy_uint; + +mylog(" %s:ilen=%d bufcount=%d\n", __FUNCTION__, ilen, bufcount); + if (ilen < 0) + ilen = ucs2strlen(ucs2str); + udt[3] = 0; /* always */ + for (i = 0; i < ilen && (sqlwdt = ucs2str[i]); i++) + { + ucdt = (UCHAR *)(ucs2str + i); + // printf("IN=%x\n", sqlwdt); + if ((ucdt[1] & SURROGATE_CHECK) != SURROG1_BYTE) + { + // printf("SURROG1=%2x\n", ucdt[1] & SURROG1_BYTE); + if (outlen < bufcount) { - mylog(" %s:UTF32-LE detected\n", __FUNCTION__); - wcstype = WCSTYPE_UTF32_LE; + udt[0] = ucdt[0]; + udt[1] = ucdt[1]; + udt[2] = 0; + ucs4str[outlen] = *((unsigned int *)udt); } - break; + outlen++; + continue; + } + /* surrogate pair */ + udt[0] = ucdt[2]; + udt[1] = (ucdt[3] & 0x3) | ((ucdt[0] & 0x3f) << 2); + udt[2] = (((ucdt[0] & 0xc0) >> 6) | ((ucdt[1] & 0x3) << 2)) + 1; + // udt[3] = 0; needless + if (outlen < bufcount) + ucs4str[outlen] = *((unsigned int *)udt); + outlen++; + i++; } -#endif /* __STDC_ISO_10646__ */ - if (wcstype < 0) - wcstype = WCSTYPE_UNKNOWN; /* unknown */ - return wcstype; + if (outlen < bufcount) + ucs4str[outlen] = 0; + + return outlen; } +#endif /* __WCS_ISO10646__ */ -SQLULEN + +#if defined(__WCS_ISO10646__) + +static SQLULEN utf8_to_wcs_lf(const char *utf8str, SQLLEN ilen, BOOL lfconv, wchar_t *wcsstr, SQLULEN bufcount, BOOL errcheck) { - switch (get_wcstype()) + switch (get_convtype()) { case WCSTYPE_UTF16_LE: return utf8_to_ucs2_lf(utf8str, ilen, lfconv, @@ -634,9 +779,10 @@ utf8_to_wcs_lf(const char *utf8str, SQLLEN ilen, BOOL lfconv, return -1; } +static char *wcs_to_utf8(const wchar_t *wcsstr, SQLLEN ilen, SQLLEN *olen, BOOL lower_identifier) { - switch (get_wcstype()) + switch (get_convtype()) { case WCSTYPE_UTF16_LE: return ucs2_to_utf8((const SQLWCHAR *) wcsstr, ilen, olen, lower_identifier); @@ -656,6 +802,7 @@ char *wcs_to_utf8(const wchar_t *wcsstr, SQLLEN ilen, SQLLEN *olen, BOOL lower_i * if outmsg is NULL or buflen is 0, only output length is returned. * As for return values, NULL terminators aren't counted. */ +static int msgtowstr(const char *inmsg, wchar_t *outmsg, int buflen) { int outlen = -1; @@ -670,14 +817,12 @@ mylog(" %s:inmsg=%p buflen=%d\n", __FUNCTION__, inmsg, buflen); else if (ERROR_INSUFFICIENT_BUFFER == GetLastError()) outlen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED | MB_ERR_INVALID_CHARS, inmsg, -1, NULL, 0) - 1; + else + outlen = -1; #else -#ifdef HAVE_MBSTOWCS -#ifdef __STDC_ISO_10646__ if (0 == buflen) outmsg = NULL; outlen = mbstowcs((wchar_t *) outmsg, inmsg, buflen); -#endif /* __STDC_ISO_10646__ */ -#endif /* HAVE_MBSTOWCS */ #endif /* WIN32 */ if (outmsg && outlen >= buflen) { @@ -698,6 +843,7 @@ mylog(" %s in=%dchars out=%dchars\n", __FUNCTION__, buflen, outlen); * if outmsg is NULL or buflen is 0, only output length is returned. * As for return values, NULL terminators aren't counted. */ +static int wstrtomsg(const wchar_t *wstr, char *outmsg, int buflen) { int outlen = -1; @@ -710,14 +856,12 @@ mylog(" %s:wstr=%p buflen=%d\n", __FUNCTION__, wstr, buflen); outlen--; else if (ERROR_INSUFFICIENT_BUFFER == GetLastError()) outlen = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL) - 1; + else + outlen = -1; #else -#ifdef HAVE_MBSTOWCS -#ifdef __STDC_ISO_10646__ if (0 == buflen) outmsg = NULL; outlen = wcstombs(outmsg, wstr, buflen); -#endif /* __STDC_ISO_10646__ */ -#endif /* HAVE_MBSTOWCS */ #endif /* WIN32 */ if (outmsg && outlen >= buflen) { @@ -728,5 +872,355 @@ mylog(" %s buf=%dbytes outlen=%dbytes\n", __FUNCTION__, buflen, outlen); return outlen; } +#endif /* __WCS_ISO10646__ */ + + + + +#if defined(__CHAR16_UTF_16__) + +static mbstate_t initial_state; + +static +SQLLEN mbstoc16_lf(char16_t *c16dt, const char *c8dt, size_t n, BOOL lf_conv) +{ + int i; + size_t brtn; + const char *cdt; + mbstate_t mbst = initial_state; + +mylog(" %s:c16dt=%p size=%lu\n", __FUNCTION__, c16dt, n); + for (i = 0, cdt = c8dt; i < n || (!c16dt); i++) + { + if (lf_conv && PG_LINEFEED == *cdt && i > 0 && PG_CARRIAGE_RETURN != cdt[-1]) + { + if (c16dt) + c16dt[i] = PG_CARRIAGE_RETURN; + i++; + } + brtn = mbrtoc16(c16dt ? c16dt + i : NULL, cdt, 4, &mbst); + if (0 == brtn) + break; + if (brtn == (size_t) -1 || + brtn == (size_t) -2) + return -1; + if (brtn == (size_t) -3) + continue; + cdt += brtn; + } + if (c16dt && i >= n) + c16dt[n - 1] = 0; + + return i; +} + +static +SQLLEN c16tombs(char *c8dt, const char16_t *c16dt, size_t n) +{ + int i; + SQLLEN result = 0; + size_t brtn; + char *cdt, c4byte[4]; + mbstate_t mbst = initial_state; + +mylog(" %s:c8dt=%p size=%lu\n", __FUNCTION__, c8dt, n); + if (!c8dt) + n = 0; + for (i = 0, cdt = c8dt; c16dt[i] && (result < n || (!cdt)); i++) + { + if (NULL != cdt && result + 4 < n) + brtn = c16rtomb(cdt, c16dt[i], &mbst); + else + { + brtn = c16rtomb(c4byte, c16dt[i], &mbst); + if (brtn < 5) + { + SQLLEN result_n = result + brtn; + + if (result_n < n) + memcpy(cdt, c4byte, brtn); + else + { + if (cdt && n > 0) + { + c8dt[result] = '\0'; /* truncate */ + return result_n; + } + } + } + } + /* + printf("c16dt=%04X brtn=%lu result=%ld cdt=%02X%02X%02X%02X\n", + c16dt[i], brtn, result, (UCHAR) cdt[0], (UCHAR) cdt[1], (UCHAR) cdt[2], (UCHAR) cdt[3]); + */ + if (brtn == (size_t) -1) + { + if (n > 0) + c8dt[n - 1] = '\0'; + return -1; + } + if (cdt) + cdt += brtn; + result += brtn; + } + if (cdt) + *cdt = '\0'; + + return result; +} +#endif /* __CHAR16_UTF_16__ */ + +// +// SQLBindParameter SQL_C_CHAR to UTF-8 case +// the current locale => UTF-8 +// +SQLLEN bindpara_msg_to_utf8(const char *ldt, char **wcsbuf) +{ + SQLLEN l = (-2); + char *utf8 = NULL; + int count = strlen(ldt); + + mylog(" %s\n", __FUNCTION__); +#if defined(__WCS_ISO10646__) + if (use_wcs) + { + wchar_t *wcsdt = (wchar_t *) malloc((count + 1) * sizeof(wchar_t)); + + if ((l = msgtowstr(ldt, (wchar_t *) wcsdt, count + 1)) >= 0) + utf8 = wcs_to_utf8(wcsdt, SQL_NTS, &l, FALSE); + free(wcsdt); + } +#endif /* __WCS_ISO10646__ */ +#ifdef __CHAR16_UTF_16__ + if (use_c16) + { + SQLWCHAR *utf16 = (SQLWCHAR *) malloc((count + 1) * sizeof(SQLWCHAR)); + + if ((l = mbstoc16_lf((char16_t *) utf16, ldt, count + 1, FALSE)) >= 0) + utf8 = ucs2_to_utf8(utf16, SQL_NTS, &l, FALSE); + free(utf16); + } +#endif /* __CHAR16_UTF_16__ */ + if (l < 0 && NULL != utf8) + free(utf8); + else + *wcsbuf = (char *) utf8; + + return l; +} + + +// +// SQLBindParameter hybrid case +// SQLWCHAR(UTF-16) => the current locale +// +SQLLEN bindpara_wchar_to_msg(const SQLWCHAR *utf16, char **wcsbuf) +{ + SQLLEN l = (-2); + char *ldt = NULL; + int count = ucs2strlen(utf16); + +mylog(" %s\n", __FUNCTION__); +#if defined(__WCS_ISO10646__) + if (use_wcs) + { + if (sizeof(SQLWCHAR) == sizeof(wchar_t)) + { + ldt = (char *) malloc(2 * count + 1); + l = wstrtomsg((wchar_t *) utf16, ldt, 2 * count + 1); + } + else + { + unsigned int *utf32 = (unsigned int *) malloc((count + 1) * sizeof(unsigned int)); + + l = ucs2_to_ucs4(utf16, -1, utf32, count + 1); + if ((l = wstrtomsg((wchar_t *)utf32, NULL, 0)) >= 0) + { + ldt = (char *) malloc(l + 1); + l = wstrtomsg((wchar_t *)utf32, ldt, l + 1); + } + free(utf32); + } + } +#endif /* __WCS_ISO10646__ */ +#ifdef __CHAR16_UTF_16__ + if (use_c16) + { + ldt = (char *) malloc(4 * count + 1); + l = c16tombs(ldt, (const char16_t *) utf16, 4 * count + 1); + } +#endif /* __CHAR16_UTF_16__ */ + if (l < 0 && NULL != ldt) + free(ldt); + else + *wcsbuf = ldt; + + return l; +} + +size_t convert_linefeeds(const char *s, char *dst, size_t max, BOOL convlf, BOOL *changed); +// +// SQLBindCol hybrid case +// the current locale => SQLWCHAR(UTF-16) +// +SQLLEN bindcol_hybrid_estimate(const char *ldt, BOOL lf_conv, char **wcsbuf) +{ + SQLLEN l = (-2); + + mylog(" %s:lf_conv=%d\n", __FUNCTION__, lf_conv); +#if defined(__WCS_ISO10646__) + if (use_wcs) + { + unsigned int *utf32 = NULL; + + if (sizeof(SQLWCHAR) == sizeof(wchar_t)) + { + l = msgtowstr(ldt, (wchar_t *) NULL, 0); + if (l >= 0 && lf_conv) + { + BOOL changed; + size_t len; + + len = convert_linefeeds(ldt, NULL, 0, TRUE, &changed); + if (changed) + { + l += (len - strlen(ldt)); + *wcsbuf = (char *) malloc(len + 1); + convert_linefeeds(ldt, *wcsbuf, len + 1, TRUE, NULL); + } + } + } + else + { + int count = strlen(ldt); + + utf32 = (unsigned int *) malloc((count + 1) * sizeof(unsigned int)); + if ((l = msgtowstr(ldt, (wchar_t *) utf32, count + 1)) >= 0) + { + l = ucs4_to_ucs2_lf(utf32, -1, NULL, 0, lf_conv); + *wcsbuf = (char *) utf32; + } + } + if (l < 0 && NULL != utf32) + free(utf32); + } +#endif /* __WCS_ISO10646__ */ +#ifdef __CHAR16_UTF_16__ + if (use_c16) + l = mbstoc16_lf((char16_t *) NULL, ldt, 0, lf_conv); +#endif /* __CHAR16_UTF_16__ */ + + return l; +} + +SQLLEN bindcol_hybrid_exec(SQLWCHAR *utf16, const char *ldt, size_t n, BOOL lf_conv, char **wcsbuf) +{ + SQLLEN l = (-2); + + mylog(" %s:size=%zu lf_conv=%d\n", __FUNCTION__, n, lf_conv); +#if defined(__WCS_ISO10646__) + if (use_wcs) + { + unsigned int *utf32 = NULL; + BOOL midbuf = (wcsbuf && *wcsbuf); + + if (sizeof(SQLWCHAR) == sizeof(wchar_t)) + { + if (midbuf) + l = msgtowstr(*wcsbuf, (wchar_t *) utf16, n); + else + l = msgtowstr(ldt, (wchar_t *) utf16, n); + } + else if (midbuf) + { + utf32 = (unsigned int *) *wcsbuf; + l = ucs4_to_ucs2_lf(utf32, -1, utf16, n, lf_conv); + } + if (midbuf) + { + free(*wcsbuf); + *wcsbuf = NULL; + } + } +#endif /* __WCS_ISO10646__ */ +#ifdef __CHAR16_UTF_16__ + if (use_c16) + { + l = mbstoc16_lf((char16_t *) utf16, ldt, n, lf_conv); + } +#endif /* __CHAR16_UTF_16__ */ + + return l; +} +// +// SQLBindCol localize case +// UTF-8 => the current locale +// +SQLLEN bindcol_localize_estimate(const char *utf8dt, BOOL lf_conv, char **wcsbuf) +{ + SQLLEN l = (-2); + char *convalc = NULL; + + mylog(" %s:lf_conv=%d\n", __FUNCTION__, lf_conv); +#if defined(__WCS_ISO10646__) + if (use_wcs) + { + wchar_t *wcsalc = NULL; + + l = utf8_to_wcs_lf(utf8dt, -1, lf_conv, NULL, 0, FALSE); + wcsalc = (wchar_t *) malloc(sizeof(wchar_t) * (l + 1)); + convalc = (char *) wcsalc; + l = utf8_to_wcs_lf(utf8dt, -1, lf_conv, wcsalc, l + 1, FALSE); + l = wstrtomsg(wcsalc, NULL, 0); + } +#endif /* __WCS_ISO10646__ */ +#ifdef __CHAR16_UTF_16__ + if (use_c16) + { + SQLWCHAR *wcsalc = NULL; + + l = utf8_to_ucs2_lf(utf8dt, -1, lf_conv, (SQLWCHAR *) NULL, 0, FALSE); + wcsalc = (SQLWCHAR *) malloc(sizeof(SQLWCHAR) * (l + 1)); + convalc = (char *) wcsalc; + l = utf8_to_ucs2_lf(utf8dt, -1, lf_conv, wcsalc, l + 1, FALSE); + l = c16tombs(NULL, (char16_t *) wcsalc, 0); + } +#endif /* __CHAR16_UTF_16__ */ + if (l < 0 && NULL != convalc) + free(convalc); + else if (NULL != convalc) + *wcsbuf = (char *) convalc; + +mylog(" %s:return=%d\n", __FUNCTION__, l); + return l; +} + +SQLLEN bindcol_localize_exec(char *ldt, size_t n, BOOL lf_conv, char **wcsbuf) +{ + SQLLEN l = (-2); + + mylog(" %s:size=%zu\n", __FUNCTION__, n); +#if defined(__WCS_ISO10646__) + if (use_wcs) + { + wchar_t *wcsalc = (wchar_t *) *wcsbuf; + + l = wstrtomsg(wcsalc, ldt, n); + } +#endif /* __WCS_ISO10646__ */ +#ifdef __CHAR16_UTF_16__ + if (use_c16) + { + char16_t *wcsalc = (char16_t *) *wcsbuf; + + l = c16tombs(ldt, (char16_t *) wcsalc, n); + } +#endif /* __CHAR16_UTF_16__ */ + free(*wcsbuf); + *wcsbuf = NULL; + +mylog(" %s:return=%d\n", __FUNCTION__, l); + return l; +} #endif /* UNICODE_SUPPORT */ diff --git a/winbuild/psqlsetup.vcxproj b/winbuild/psqlsetup.vcxproj index a171a04..a75bc28 100644 --- a/winbuild/psqlsetup.vcxproj +++ b/winbuild/psqlsetup.vcxproj @@ -265,7 +265,6 @@ -