From f8cbdf3559f40f32e5ec27f7bd00c7e46a3fbef7 Mon Sep 17 00:00:00 2001 From: Marko Kreen Date: Mon, 31 Dec 2012 14:18:49 +0200 Subject: [PATCH] usual/wchar: wide-char utilities. --- Makefile | 3 +- usual/wchar.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++ usual/wchar.h | 31 +++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 usual/wchar.c create mode 100644 usual/wchar.h diff --git a/Makefile b/Makefile index 36b9698..bb342cf 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,8 @@ libusual_la_SOURCES = usual/config.h.in \ usual/string.h usual/string.c \ usual/strpool.h usual/strpool.c \ usual/time.h usual/time.c \ - usual/utf8.h usual/utf8.c + usual/utf8.h usual/utf8.c \ + usual/wchar.h usual/wchar.c # we want to filter headers, so cannot use usual install method via _HEADERS USUAL_HEADERS = $(filter %.h,$(libusual_la_SOURCES) $(nodist_libusual_la_SOURCES)) diff --git a/usual/wchar.c b/usual/wchar.c new file mode 100644 index 0000000..f0d2afc --- /dev/null +++ b/usual/wchar.c @@ -0,0 +1,107 @@ +/* + * wchar utility functions. + * + * Copyright (c) 2012 Marko Kreen + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include + +wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p, + wchar_t *wbuf, int wbuf_len, bool allow_invalid) +{ + mbstate_t ps; + int clen, wcnt; + wchar_t *dst, *w, *wend; + const char *s; + const char *str_end; + int wmax; + + if (str_len < 0) + str_len = strlen(str); + str_end = str + str_len; + + /* max number of wchar_t that the output can take plus zero-terminator */ + wmax = str_len + 1; + if (wbuf != NULL && wmax < wbuf_len) { + dst = wbuf; + } else { + dst = malloc(sizeof(wchar_t) * wmax); + if (!dst) + return NULL; + } + + /* try full decode at once */ + s = str; + memset(&ps, 0, sizeof(ps)); + wcnt = mbsnrtowcs(dst, &s, str_len, wmax, &ps); + if (wcnt > 0 && s == NULL) { + if (wlen_p) + *wlen_p = wcnt; + return dst; + } + + /* full decode failed, decode chars one-by-one */ + s = str; + w = dst; + wend = dst + wmax - 1; + memset(&ps, 0, sizeof(ps)); + while (s < str_end && w < wend) { + clen = mbrtowc(w, s, str_end - s, &ps); + if (clen > 0) { + w++; + s += clen; + } else if (allow_invalid) { + /* allow invalid encoding */ + memset(&ps, 0, sizeof(ps)); + *w++ = (unsigned char)*s++; + } else { + goto fail; + } + } + + if (s != str_end) + goto fail; + + *w = 0; + if (wlen_p != NULL) + *wlen_p = w - dst; + return dst; + +fail: + if (dst != wbuf) + free(dst); + errno = EILSEQ; + return NULL; +} + +wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen) +{ + char buf[10]; + unsigned int i; + + if (namelen >= sizeof(buf)) + return (wctype_t)0; + for (i = 0; i < namelen; i++) { + wchar_t c = name[i]; + if (c < 0x20 || c > 127) + return (wctype_t)0; + buf[i] = c; + } + buf[i] = 0; + return wctype(buf); +} + diff --git a/usual/wchar.h b/usual/wchar.h new file mode 100644 index 0000000..1d06a84 --- /dev/null +++ b/usual/wchar.h @@ -0,0 +1,31 @@ +/* + * wchar.h - wchar_t utilities. + * + * Copyright (c) 2012 Marko Kreen + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _USUAL_WCHAR_H_ +#define _USUAL_WCHAR_H_ + +#include + +#include +#include + +wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p, wchar_t *wbuf, int wbuf_len, bool allow_invalid); + +wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen); + +#endif -- 2.39.5