diff -urNp libc_bsd/Makefile libc_apple/Makefile --- libc_bsd/Makefile 2008-07-28 20:59:22.000000000 +0200 +++ libc_apple/Makefile 2008-08-03 14:03:43.000000000 +0200 @@ -14,8 +14,9 @@ SHLIBDIR?= /lib LIB=c SHLIB_MAJOR= 8 WARNS?= 2 +CFLAGS=-g CFLAGS+=-I${.CURDIR}/include -I${.CURDIR}/../../include -CFLAGS+=-I${.CURDIR}/${MACHINE_ARCH} +CFLAGS+=-I${.CURDIR}/${MACHINE_ARCH} #-DLOCALE_DEBUG CLEANFILES+=tags INSTALL_PIC_ARCHIVE= PRECIOUSLIB= diff -urNp libc_bsd/locale/collate.c libc_apple/locale/collate.c --- libc_bsd/locale/collate.c 2008-08-03 20:30:12.000000000 +0200 +++ libc_apple/locale/collate.c 2008-08-03 22:49:41.000000000 +0200 @@ -28,14 +28,24 @@ #include __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $"); +#define __collate_chain_equiv_table (__collate_data->__chain_equiv_table) +#define __collate_chain_pri_table (__collate_data->__chain_pri_table) +#define __collate_char_pri_table (__collate_data->__char_pri_table) +#define __collate_info (&__collate_data->__info) +#define __collate_large_char_pri_table (__collate_data->__large_char_pri_table) +#define __collate_substitute_table (__collate_data->__substitute_table) + #include "namespace.h" #include #include #include +#include #include +#include #include #include #include +#include #include "un-namespace.h" #include "collate.h" @@ -44,24 +54,25 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/ #include "libc_private.h" +#if _BYTE_ORDER == _LITTLE_ENDIAN +static void wntohl(wchar_t *, int); +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ +void __collate_err(int ex, const char *f) __dead2; + +#undef __collate_load_error int __collate_load_error = 1; int __collate_substitute_nontrivial; - -u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN]; -struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; -struct __collate_st_chain_pri *__collate_chain_pri_table; - -void __collate_err(int ex, const char *f) __dead2; +struct __locale_st_collate *__collate_data = NULL; int __collate_load_tables(const char *encoding) { FILE *fp; - int i, saverr, chains; - uint32_t u32; + int i, saverr, chains, z; char strbuf[STR_LEN], buf[PATH_MAX]; - void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table; - static char collate_encoding[ENCODING_LEN + 1]; + struct __locale_st_collate *TMP; + struct __collate_st_info info; + void *vp; /* 'encoding' must be already checked. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { @@ -72,7 +83,7 @@ __collate_load_tables(const char *encodi /* * If the locale name is the same as our cache, use the cache. */ - if (strcmp(encoding, collate_encoding) == 0) { + if (__collate_data && strcmp(encoding, __collate_data->__encoding) == 0) { __collate_load_error = 0; return (_LDP_CACHE); } @@ -87,6 +98,9 @@ __collate_load_tables(const char *encodi (void)strcat(buf, "/"); (void)strcat(buf, encoding); (void)strcat(buf, "/LC_COLLATE"); +#ifdef LOCALE_DEBUG + fprintf(stderr, "__collate_load_tables: opening %s\n", buf); +#endif if ((fp = fopen(buf, "r")) == NULL) return (_LDP_ERROR); @@ -97,23 +111,30 @@ __collate_load_tables(const char *encodi return (_LDP_ERROR); } chains = -1; - if (strcmp(strbuf, COLLATE_VERSION) == 0) - chains = 0; - else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0) + if (strcmp(strbuf, COLLATE_VERSION1_3) == 0) chains = 1; if (chains < 0) { + fprintf(stderr, "__collate_load_tables: wrong signature: %s\n", strbuf); (void)fclose(fp); errno = EFTYPE; return (_LDP_ERROR); } if (chains) { - if (fread(&u32, sizeof(u32), 1, fp) != 1) { + if (fread(&info, sizeof(info), 1, fp) != 1) { saverr = errno; (void)fclose(fp); errno = saverr; return (_LDP_ERROR); } - if ((chains = (int)ntohl(u32)) < 1) { +#if _BYTE_ORDER == _LITTLE_ENDIAN + for(z = 0; z < info.directive_count; z++) { + info.undef_pri[z] = ntohl(info.undef_pri[z]); + info.subst_count[z] = ntohl(info.subst_count[z]); + } + info.chain_count = ntohl(info.chain_count); + info.large_pri_count = ntohl(info.large_pri_count); +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ + if ((chains = info.chain_count) < 0) { (void)fclose(fp); errno = EFTYPE; return (_LDP_ERROR); @@ -121,26 +142,13 @@ __collate_load_tables(const char *encodi } else chains = TABLE_SIZE; - if ((TMP_substitute_table = - malloc(sizeof(__collate_substitute_table))) == NULL) { - saverr = errno; - (void)fclose(fp); - errno = saverr; - return (_LDP_ERROR); - } - if ((TMP_char_pri_table = - malloc(sizeof(__collate_char_pri_table))) == NULL) { - saverr = errno; - free(TMP_substitute_table); - (void)fclose(fp); - errno = saverr; - return (_LDP_ERROR); - } - if ((TMP_chain_pri_table = - malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) { + i = sizeof(struct __locale_st_collate) + + sizeof(struct __collate_st_chain_pri) * chains + + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count; + for(z = 0; z < info.directive_count; z++) + i += sizeof(struct __collate_st_subst) * info.subst_count[z]; + if ((TMP = (struct __locale_st_collate *)malloc(i)) == NULL) { saverr = errno; - free(TMP_substitute_table); - free(TMP_char_pri_table); (void)fclose(fp); errno = saverr; return (_LDP_ERROR); @@ -150,111 +158,412 @@ __collate_load_tables(const char *encodi { \ if (fread(a, b, c, d) != c) { \ saverr = errno; \ - free(TMP_substitute_table); \ - free(TMP_char_pri_table); \ - free(TMP_chain_pri_table); \ + free(TMP); \ (void)fclose(d); \ errno = saverr; \ return (_LDP_ERROR); \ } \ } - FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp); - FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp); - FREAD(TMP_chain_pri_table, - sizeof(*__collate_chain_pri_table), chains, fp); + /* adjust size to read the remaining in one chunk */ + i -= offsetof(struct __locale_st_collate, __char_pri_table); + FREAD(TMP->__char_pri_table, i, 1, fp); (void)fclose(fp); - (void)strcpy(collate_encoding, encoding); - if (__collate_substitute_table_ptr != NULL) - free(__collate_substitute_table_ptr); - __collate_substitute_table_ptr = TMP_substitute_table; - if (__collate_char_pri_table_ptr != NULL) - free(__collate_char_pri_table_ptr); - __collate_char_pri_table_ptr = TMP_char_pri_table; - for (i = 0; i < UCHAR_MAX + 1; i++) { - __collate_char_pri_table[i].prim = - ntohl(__collate_char_pri_table[i].prim); - __collate_char_pri_table[i].sec = - ntohl(__collate_char_pri_table[i].sec); - } - if (__collate_chain_pri_table != NULL) - free(__collate_chain_pri_table); - __collate_chain_pri_table = TMP_chain_pri_table; - for (i = 0; i < chains; i++) { - __collate_chain_pri_table[i].prim = - ntohl(__collate_chain_pri_table[i].prim); - __collate_chain_pri_table[i].sec = - ntohl(__collate_chain_pri_table[i].sec); - } - __collate_substitute_nontrivial = 0; - for (i = 0; i < UCHAR_MAX + 1; i++) { - if (__collate_substitute_table[i][0] != i || - __collate_substitute_table[i][1] != 0) { - __collate_substitute_nontrivial = 1; - break; + vp = (void *)(TMP + 1); + + /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */ + if (info.subst_count[0] > 0) { + TMP->__substitute_table[0] = (struct __collate_st_subst *)vp; + vp += info.subst_count[0] * sizeof(struct __collate_st_subst); + } else + TMP->__substitute_table[0] = NULL; + if (info.flags & COLLATE_SUBST_DUP) + TMP->__substitute_table[1] = TMP->__substitute_table[0]; + else if (info.subst_count[1] > 0) { + TMP->__substitute_table[1] = (struct __collate_st_subst *)vp; + vp += info.subst_count[1] * sizeof(struct __collate_st_subst); + } else + TMP->__substitute_table[1] = NULL; + + if (chains > 0) { + TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp; + vp += chains * sizeof(struct __collate_st_chain_pri); + } else + TMP->__chain_pri_table = NULL; + if (info.large_pri_count > 0) + TMP->__large_char_pri_table = + (struct __collate_st_large_char_pri *)vp; + else + TMP->__large_char_pri_table = NULL; + +#if _BYTE_ORDER == _LITTLE_ENDIAN + { + struct __collate_st_char_pri *p = TMP->__char_pri_table; + for(i = UCHAR_MAX + 1; i-- > 0; p++) { + for(z = 0; z < info.directive_count; z++) + p->pri[z] = ntohl(p->pri[z]); + } + } + for(z = 0; z < info.directive_count; z++) + if (info.subst_count[z] > 0) { + struct __collate_st_subst *p = + TMP->__substitute_table[z]; + for(i = info.subst_count[z]; i-- > 0; p++) { + p->val = ntohl(p->val); + wntohl(p->str, STR_LEN); + } + } + { + struct __collate_st_chain_pri *p = TMP->__chain_pri_table; + for(i = chains; i-- > 0; p++) { + wntohl(p->str, STR_LEN); + for(z = 0; z < info.directive_count; z++) + p->pri[z] = ntohl(p->pri[z]); } } + if (info.large_pri_count > 0) { + struct __collate_st_large_char_pri *p = + TMP->__large_char_pri_table; + for(i = info.large_pri_count; i-- > 0; p++) { + p->val = ntohl(p->val); + for(z = 0; z < info.directive_count; z++) + p->pri.pri[z] = ntohl(p->pri.pri[z]); + } + } +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ + (void)strcpy(TMP->__encoding, encoding); + (void)memcpy(&TMP->__info, &info, sizeof(info)); + __collate_data = TMP; + + __collate_load_error = (info.subst_count[0] > 0 || + info.subst_count[1] > 0); __collate_load_error = 0; +#ifdef LOCALE_DEBUG + fprintf(stderr, "__collate_load_tables: loaded successfully\n"); +#endif return (_LDP_LOADED); } -u_char * -__collate_substitute(const u_char *s) +static int +__collate_wcsnlen(const wchar_t *s, int len) +{ + int n = 0; + while (*s && n < len) { + s++; + n++; + } + return n; +} + +static struct __collate_st_subst * +substsearch(const wchar_t key, struct __collate_st_subst *tab, int n) +{ + int low = 0; + int high = n - 1; + int next, compar; + struct __collate_st_subst *p; + + while (low <= high) { + next = (low + high) / 2; + p = tab + next; + compar = key - p->val; + if (compar == 0) + return p; + if (compar > 0) + low = next + 1; + else + high = next - 1; + } + return NULL; +} + +wchar_t * +__collate_substitute(const wchar_t *s, int which) { int dest_len, len, nlen; - int delta = strlen(s); - u_char *dest_str = NULL; + int n, delta, nsubst; + wchar_t *dest_str = NULL; + const wchar_t *fp; + struct __collate_st_subst *subst, *match; if (s == NULL || *s == '\0') - return (__collate_strdup("")); - delta += delta / 8; - dest_str = malloc(dest_len = delta); + return (__collate_wcsdup(L"")); + dest_len = wcslen(s); + nsubst = __collate_info->subst_count[which]; + if (nsubst <= 0) + return __collate_wcsdup(s); + subst = __collate_substitute_table[which]; + delta = dest_len / 4; + if (delta < 2) + delta = 2; + dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t)); if (dest_str == NULL) __collate_err(EX_OSERR, __func__); len = 0; while (*s) { - nlen = len + strlen(__collate_substitute_table[*s]); + if ((match = substsearch(*s, subst, nsubst)) != NULL) { + fp = match->str; + n = __collate_wcsnlen(fp, STR_LEN); + } else { + fp = s; + n = 1; + } + nlen = len + n; if (dest_len <= nlen) { - dest_str = reallocf(dest_str, dest_len = nlen + delta); + dest_str = reallocf(dest_str, (dest_len = nlen + delta) + * sizeof(wchar_t)); if (dest_str == NULL) __collate_err(EX_OSERR, __func__); } - (void)strcpy(dest_str + len, __collate_substitute_table[*s++]); - len = nlen; + wcsncpy(dest_str + len, fp, n); + len += n; + s++; } + dest_str[len] = 0; return (dest_str); } +static struct __collate_st_chain_pri * +chainsearch(const wchar_t *key, int *len) +{ + int low = 0; + int high = __collate_info->chain_count - 1; + int next, compar, l; + struct __collate_st_chain_pri *p; + struct __collate_st_chain_pri *tab = __collate_chain_pri_table; + + while (low <= high) { + next = (low + high) / 2; + p = tab + next; + compar = *key - *p->str; + if (compar == 0) { + l = __collate_wcsnlen(p->str, STR_LEN); + compar = wcsncmp(key, p->str, l); + if (compar == 0) { + *len = l; + return p; + } + } + if (compar > 0) + low = next + 1; + else + high = next - 1; + } + return NULL; +} + +static struct __collate_st_large_char_pri * +largesearch(const wchar_t key) +{ + int low = 0; + int high = __collate_info->large_pri_count - 1; + int next, compar; + struct __collate_st_large_char_pri *p; + struct __collate_st_large_char_pri *tab = + __collate_large_char_pri_table; + + while (low <= high) { + next = (low + high) / 2; + p = tab + next; + compar = key - p->val; + if (compar == 0) + return p; + if (compar > 0) + low = next + 1; + else + high = next - 1; + } + return NULL; +} + void -__collate_lookup(const u_char *t, int *len, int *prim, int *sec) +__collate_lookup(const wchar_t *t, int *len, int *prim, int *sec) { struct __collate_st_chain_pri *p2; + int l; *len = 1; *prim = *sec = 0; - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) { - if (*t == p2->str[0] && - strncmp(t, p2->str, strlen(p2->str)) == 0) { - *len = strlen(p2->str); - *prim = p2->prim; - *sec = p2->sec; + p2 = chainsearch(t, &l); + /* use the chain if prim >= 0 */ + if (p2 && p2->pri[0] >= 0) { + *len = l; + *prim = p2->pri[0]; + *sec = p2->pri[1]; + return; + } + if (*t <= UCHAR_MAX) { + *prim = __collate_char_pri_table[*t].pri[0]; + *sec = __collate_char_pri_table[*t].pri[1]; + return; + } + if (__collate_info->large_pri_count > 0) { + struct __collate_st_large_char_pri *match; + match = largesearch(*t); + if (match) { + *prim = match->pri.pri[0]; + *sec = match->pri.pri[1]; + return; + } + } + *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; + *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; +} + +void +__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which) +{ + struct __collate_st_chain_pri *p2; + int p, l; + + *len = 1; + *pri = 0; + p2 = chainsearch(t, &l); + if (p2) { + p = p2->pri[which]; + /* use the chain if pri >= 0 */ + if (p >= 0) { + *len = l; + *pri = p; + return; + } + } + if (*t <= UCHAR_MAX) { + *pri = __collate_char_pri_table[*t].pri[which]; + return; + } + if (__collate_info->large_pri_count > 0) { + struct __collate_st_large_char_pri *match; + match = largesearch(*t); + if (match) { + *pri = match->pri.pri[which]; return; } } - *prim = __collate_char_pri_table[*t].prim; - *sec = __collate_char_pri_table[*t].sec; + *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l; +} + +wchar_t * +__collate_mbstowcs(const char *s) +{ + static const mbstate_t initial; + mbstate_t st; + size_t len; + const char *ss; + wchar_t *wcs; + + ss = s; + st = initial; + if ((len = mbsrtowcs(NULL, &ss, 0, &st)) == (size_t)-1) + return NULL; + if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL) + __collate_err(EX_OSERR, __func__); + st = initial; + mbsrtowcs(wcs, &s, len, &st); + wcs[len] = 0; + + return (wcs); } -u_char * -__collate_strdup(u_char *s) +wchar_t * +__collate_wcsdup(const wchar_t *s) { - u_char *t = strdup(s); + size_t len = wcslen(s) + 1; + wchar_t *wcs; - if (t == NULL) + if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL) __collate_err(EX_OSERR, __func__); - return (t); + wcscpy(wcs, s); + return (wcs); +} + +void +__collate_xfrm(const wchar_t *src, wchar_t **xf) +{ + int pri, len; + size_t slen; + const wchar_t *t; + wchar_t *tt = NULL, *tr = NULL; + int direc, pass; + wchar_t *xfp; + struct __collate_st_info *info = __collate_info; + int sverrno; + + for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) + xf[pass] = NULL; + for(pass = 0; pass < info->directive_count; pass++) { + direc = info->directive[pass]; + if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { + sverrno = errno; + free(tt); + errno = sverrno; + tt = __collate_substitute(src, pass); + } + if (direc & DIRECTIVE_BACKWARD) { + wchar_t *bp, *fp, c; + sverrno = errno; + free(tr); + errno = sverrno; + tr = __collate_wcsdup(tt ? tt : src); + bp = tr; + fp = tr + wcslen(tr) - 1; + while(bp < fp) { + c = *bp; + *bp++ = *fp; + *fp-- = c; + } + t = (const wchar_t *)tr; + } else if (tt) + t = (const wchar_t *)tt; + else + t = (const wchar_t *)src; + sverrno = errno; + if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * + (wcslen(t) + 1))) == NULL) { + errno = sverrno; + slen = 0; + goto end; + } + errno = sverrno; + xfp = xf[pass]; + if (direc & DIRECTIVE_POSITION) { + while(*t) { + __collate_lookup_which(t, &len, &pri, pass); + t += len; + if (pri <= 0) { + if (pri < 0) { + errno = EINVAL; + slen = 0; + goto end; + } + pri = COLLATE_MAX_PRIORITY; + } + *xfp++ = pri; + } + } else { + while(*t) { + __collate_lookup_which(t, &len, &pri, pass); + t += len; + if (pri <= 0) { + if (pri < 0) { + errno = EINVAL; + slen = 0; + goto end; + } + continue; + } + *xfp++ = pri; + } + } + *xfp = 0; + } + end: + sverrno = errno; + free(tt); + free(tr); + errno = sverrno; } void @@ -275,24 +584,350 @@ __collate_err(int ex, const char *f) exit(ex); } +/* + * __collate_collating_symbol takes the multibyte string specified by + * src and slen, and using ps, converts that to a wide character. Then + * it is checked to verify it is a collating symbol, and then copies + * it to the wide character string specified by dst and dlen (the + * results are not null terminated). The length of the wide characters + * copied to dst is returned if successful. Zero is returned if no such + * collating symbol exists. (size_t)-1 is returned if there are wide-character + * conversion errors, if the length of the converted string is greater that + * STR_LEN or if dlen is too small. It is up to the calling routine to + * preserve the mbstate_t structure as needed. + */ +size_t +__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, + size_t slen, mbstate_t *ps) +{ + wchar_t wname[STR_LEN]; + wchar_t w, *wp; + size_t len, l; + + /* POSIX locale */ + if (__collate_load_error) { + if (dlen < 1) + return (size_t)-1; + if (slen != 1 || !isascii(*src)) + return 0; + *dst = *src; + return 1; + } + for(wp = wname, len = 0; slen > 0; len++) { + l = mbrtowc(&w, src, slen, ps); + if (l == (size_t)-1 || l == (size_t)-2) + return (size_t)-1; + if (l == 0) + break; + if (len >= STR_LEN) + return -1; + *wp++ = w; + src += l; + slen = (long)slen - (long)l; + } + if (len == 0 || len > dlen) + return (size_t)-1; + if (len == 1) { + if (*wname <= UCHAR_MAX) { + if (__collate_char_pri_table[*wname].pri[0] >= 0) { + if (dlen > 0) + *dst = *wname; + return 1; + } + return 0; + } else if (__collate_info->large_pri_count > 0) { + struct __collate_st_large_char_pri *match; + match = largesearch(*wname); + if (match && match->pri.pri[0] >= 0) { + if (dlen > 0) + *dst = *wname; + return 1; + } + } + return 0; + } + *wp = 0; + if (__collate_info->chain_count > 0) { + struct __collate_st_chain_pri *match; + int ll; + match = chainsearch(wname, &ll); + if (match) { + if (ll < dlen) + dlen = ll; + wcsncpy(dst, wname, dlen); + return ll; + } + } + return 0; +} + +/* + * __collate_equiv_class returns the equivalence class number for the symbol + * specified by src and slen, using ps to convert from multi-byte to wide + * character. Zero is returned if the symbol is not in an equivalence + * class. -1 is returned if there are wide character conversion error, + * if there are any greater-than-8-bit characters or if a multi-byte symbol + * is greater or equal to STR_LEN in length. It is up to the calling + * routine to preserve the mbstate_t structure as needed. + */ +int +__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps) +{ + wchar_t wname[STR_LEN]; + wchar_t w, *wp; + size_t len, l; + int e; + + /* POSIX locale */ + if (__collate_load_error) + return 0; + for(wp = wname, len = 0; slen > 0; len++) { + l = mbrtowc(&w, src, slen, ps); + if (l == (size_t)-1 || l == (size_t)-2) + return -1; + if (l == 0) + break; + if (len >= STR_LEN) + return -1; + *wp++ = w; + src += l; + slen = (long)slen - (long)l; + } + if (len == 0) + return -1; + if (len == 1) { + e = -1; + if (*wname <= UCHAR_MAX) + e = __collate_char_pri_table[*wname].pri[0]; + else if (__collate_info->large_pri_count > 0) { + struct __collate_st_large_char_pri *match; + match = largesearch(*wname); + if (match) + e = match->pri.pri[0]; + } + if (e == 0) + return IGNORE_EQUIV_CLASS; + return e > 0 ? e : 0; + } + *wp = 0; + if (__collate_info->chain_count > 0) { + struct __collate_st_chain_pri *match; + int ll; + match = chainsearch(wname, &ll); + if (match) { + e = match->pri[0]; + if (e == 0) + return IGNORE_EQUIV_CLASS; + return e < 0 ? -e : e; + } + } + return 0; +} + +/* + * __collate_equiv_match tries to match any single or multi-character symbol + * in equivalence class equiv_class in the multi-byte string specified by src + * and slen. If start is non-zero, it is taken to be the first (pre-converted) + * wide character. Subsequence wide characters, if needed, will use ps in + * the conversion. On a successful match, the length of the matched string + * is returned (including the start character). If dst is non-NULL, the + * matched wide-character string is copied to dst, a wide character array of + * length dlen (the results are not zero-terminated). If rlen is non-NULL, + * the number of character in src actually used is returned. Zero is + * returned by __collate_equiv_match if there is no match. (size_t)-1 is + * returned on error: if there were conversion errors or if dlen is too small + * to accept the results. On no match or error, ps is restored to its incoming + * state. + */ +size_t +__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen) +{ + wchar_t w; + size_t len, l, clen; + int i; + wchar_t buf[STR_LEN], *wp; + mbstate_t save; + const char *s = src; + size_t sl = slen; + struct __collate_st_chain_pri *ch = NULL; + + /* POSIX locale */ + if (__collate_load_error) + return (size_t)-1; + if (equiv_class == IGNORE_EQUIV_CLASS) + equiv_class = 0; + if (ps) + save = *ps; + wp = buf; + len = clen = 0; + if (start) { + *wp++ = start; + len = 1; + } + /* convert up to the max chain length */ + while(sl > 0 && len < __collate_info->chain_max_len) { + l = mbrtowc(&w, s, sl, ps); + if (l == (size_t)-1 || l == (size_t)-2 || l == 0) + break; + *wp++ = w; + s += l; + clen += l; + sl -= l; + len++; + } + *wp = 0; + if (len > 1 && (ch = chainsearch(buf, &i)) != NULL) { + int e = ch->pri[0]; + if (e < 0) + e = -e; + if (e == equiv_class) + goto found; + } + /* try single character */ + i = 1; + if (*buf <= UCHAR_MAX) { + if (equiv_class == __collate_char_pri_table[*buf].pri[0]) + goto found; + } else if (__collate_info->large_pri_count > 0) { + struct __collate_st_large_char_pri *match; + match = largesearch(*buf); + if (match && equiv_class == match->pri.pri[0]) + goto found; + } + /* no match */ + if (ps) + *ps = save; + return 0; +found: + /* if we converted more than we used, restore to initial and reconvert + * up to what did match */ + if (i < len) { + len = i; + if (ps) + *ps = save; + if (start) + i--; + clen = 0; + while(i-- > 0) { + l = mbrtowc(&w, src, slen, ps); + src += l; + clen += l; + slen -= l; + } + } + if (dst) { + if (dlen < len) { + if (ps) + *ps = save; + return (size_t)-1; + } + for(wp = buf; len > 0; len--) + *dst++ = *wp++; + } + if (rlen) + *rlen = clen; + return len; +} + +#if _BYTE_ORDER == _LITTLE_ENDIAN +static void +wntohl(wchar_t *str, int len) +{ + for(; *str && len > 0; str++, len--) + *str = ntohl(*str); +} +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ + #ifdef COLLATE_DEBUG +static char * +show(int c) +{ + static char buf[5]; + + if (c >=32 && c <= 126) + sprintf(buf, "'%c' ", c); + else + sprintf(buf, "\\x{%02x}", c); + return buf; +} + +static char * +showwcs(const wchar_t *t, int len) +{ + static char buf[64]; + char *cp = buf; + + for(; *t && len > 0; len--, t++) { + if (*t >=32 && *t <= 126) + *cp++ = *t; + else { + sprintf(cp, "\\x{%02x}", *t); + cp += strlen(cp); + } + } + *cp = 0; + return buf; +} + void __collate_print_tables() { - int i; - struct __collate_st_chain_pri *p2; + int i, z; + locale_t loc = __current_locale(); - printf("Substitute table:\n"); - for (i = 0; i < UCHAR_MAX + 1; i++) - if (i != *__collate_substitute_table[i]) - printf("\t'%c' --> \"%s\"\n", i, - __collate_substitute_table[i]); - printf("Chain priority table:\n"); - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) - printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec); + printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d " + "sc=%d cc=%d lc=%d\n", + __collate_info->directive[0], __collate_info->directive[1], + __collate_info->flags, __collate_info->chain_max_len, + __collate_info->directive_count, + __collate_info->undef_pri[0], __collate_info->undef_pri[1], + __collate_info->subst_count[0], __collate_info->subst_count[1], + __collate_info->chain_count, __collate_info->large_pri_count); + for(z = 0; z < __collate_info->directive_count; z++) { + if (__collate_info->subst_count[z] > 0) { + struct __collate_st_subst *p2 = + __collate_substitute_table[z]; + if (z == 0 && (__collate_info->flags & + COLLATE_SUBST_DUP)) + printf("Both substitute tables:\n"); + else + printf("Substitute table %d:\n", z); + for (i = __collate_info->subst_count[z]; i-- > 0; p2++) + printf("\t%s --> \"%s\"\n", + show(p2->val), + showwcs(p2->str, STR_LEN)); + } + } + if (__collate_info->chain_count > 0) { + printf("Chain priority table:\n"); + struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; + for (i = __collate_info->chain_count; i-- > 0; p2++) { + printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); + for(z = 0; z < __collate_info->directive_count; z++) + printf(" %d", p2->pri[z]); + putchar('\n'); + } + } printf("Char priority table:\n"); - for (i = 0; i < UCHAR_MAX + 1; i++) - printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim, - __collate_char_pri_table[i].sec); + { + struct __collate_st_char_pri *p2 = __collate_char_pri_table; + for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { + printf("\t%s :", show(i)); + for(z = 0; z < __collate_info->directive_count; z++) + printf(" %d", p2->pri[z]); + putchar('\n'); + } + } + if (__collate_info->large_pri_count > 0) { + struct __collate_st_large_char_pri *p2 = + __collate_large_char_pri_table; + printf("Large priority table:\n"); + for (i = __collate_info->large_pri_count; i-- > 0; p2++) { + printf("\t%s :", show(p2->val)); + for(z = 0; z < __collate_info->directive_count; z++) + printf(" %d", p2->pri.pri[z]); + putchar('\n'); + } + } } #endif diff -urNp libc_bsd/locale/collate.h libc_apple/locale/collate.h --- libc_bsd/locale/collate.h 2008-07-28 20:59:19.000000000 +0200 +++ libc_apple/locale/collate.h 2008-08-03 10:37:45.000000000 +0200 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/lib/libc/locale/collate.h,v 1.15 2005/02/27 20:31:13 ru Exp $ + * $FreeBSD: src/lib/libc/locale/collate.h,v 1.14 2002/08/30 20:26:02 ache Exp $ */ #ifndef _COLLATE_H_ @@ -33,34 +33,91 @@ #include #include #include +#include "setlocale.h" /* for ENCODING_LEN */ + +#undef COLL_WEIGHTS_MAX +#define COLL_WEIGHTS_MAX 4 #define STR_LEN 10 #define TABLE_SIZE 100 -#define COLLATE_VERSION "1.0\n" -#define COLLATE_VERSION1_2 "1.2\n" +#define COLLATE_VERSION "1.0\n" +#define COLLATE_VERSION1_1 "1.1\n" +#define COLLATE_VERSION1_1A "1.1A\n" +#define COLLATE_VERSION1_2 "1.2\n" +#define COLLATE_VERSION1_3 "1.3\n" +/* see discussion in string/FreeBSD/strxfrm for this value */ +#define COLLATE_MAX_PRIORITY ((1 << 24) - 1) + +#define DIRECTIVE_UNDEF 0x00 +#define DIRECTIVE_FORWARD 0x01 +#define DIRECTIVE_BACKWARD 0x02 +#define DIRECTIVE_POSITION 0x04 + +#define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD) + +#define COLLATE_SUBST_DUP 0x0001 + +#define IGNORE_EQUIV_CLASS 1 + +struct __collate_st_info { + __uint8_t directive[COLL_WEIGHTS_MAX]; + __uint8_t flags; +#if _BYTE_ORDER == _LITTLE_ENDIAN + unsigned int directive_count:4; + unsigned int chain_max_len:4; +#else + unsigned int chain_max_len:4; + unsigned int directive_count:4; +#endif + __int32_t undef_pri[COLL_WEIGHTS_MAX]; + __int32_t subst_count[COLL_WEIGHTS_MAX]; + __int32_t chain_count; + __int32_t large_pri_count; +}; struct __collate_st_char_pri { - int prim, sec; + __int32_t pri[COLL_WEIGHTS_MAX]; }; struct __collate_st_chain_pri { - u_char str[STR_LEN]; - int prim, sec; + wchar_t str[STR_LEN]; + __int32_t pri[COLL_WEIGHTS_MAX]; +}; +struct __collate_st_large_char_pri { + __int32_t val; + struct __collate_st_char_pri pri; +}; +struct __collate_st_subst { + __int32_t val; + wchar_t str[STR_LEN]; +}; + +struct __locale_st_collate { + char __encoding[ENCODING_LEN + 1]; + struct __collate_st_info __info; + struct __collate_st_subst *__substitute_table[COLL_WEIGHTS_MAX]; + struct __collate_st_chain_pri *__chain_pri_table; + struct __collate_st_large_char_pri *__large_char_pri_table; + struct __collate_st_char_pri __char_pri_table[UCHAR_MAX + 1]; }; extern int __collate_load_error; extern int __collate_substitute_nontrivial; -#define __collate_substitute_table (*__collate_substitute_table_ptr) -extern u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN]; -#define __collate_char_pri_table (*__collate_char_pri_table_ptr) -extern struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; -extern struct __collate_st_chain_pri *__collate_chain_pri_table; +extern struct __locale_st_collate *__collate_data; __BEGIN_DECLS -u_char *__collate_strdup(u_char *); -u_char *__collate_substitute(const u_char *); +wchar_t *__collate_mbstowcs(const char *); +wchar_t *__collate_wcsdup(const wchar_t *); +wchar_t *__collate_substitute(const wchar_t *, int); int __collate_load_tables(const char *); -void __collate_lookup(const u_char *, int *, int *, int *); -int __collate_range_cmp(int, int); +void __collate_lookup(const wchar_t *, int *, int *, int *); +void __collate_lookup_which(const wchar_t *, int *, int *, int); +void __collate_xfrm(const wchar_t *, wchar_t **); +int __collate_range_cmp(wchar_t, wchar_t); +size_t __collate_collating_symbol(wchar_t *, size_t, const char *, size_t, + mbstate_t *); +int __collate_equiv_class(const char *, size_t, mbstate_t *); +size_t __collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *, + size_t, mbstate_t *, size_t *); #ifdef COLLATE_DEBUG void __collate_print_tables(void); #endif diff -urNp libc_bsd/locale/collcmp.c libc_apple/locale/collcmp.c --- libc_bsd/locale/collcmp.c 2008-07-28 20:59:19.000000000 +0200 +++ libc_apple/locale/collcmp.c 2008-07-28 20:59:06.000000000 +0200 @@ -28,17 +28,18 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/collcmp.c,v 1.18 2005/02/27 14:54:23 phantom Exp $"); #include +#include #include "collate.h" /* * Compare two characters using collate */ -int __collate_range_cmp(int c1, int c2) +int __collate_range_cmp(wchar_t c1, wchar_t c2) { - static char s1[2], s2[2]; + static wchar_t s1[2], s2[2]; s1[0] = c1; s2[0] = c2; - return (strcoll(s1, s2)); + return (wcscoll(s1, s2)); } diff -urNp libc_bsd/locale/setlocale.c libc_apple/locale/setlocale.c --- libc_bsd/locale/setlocale.c 2008-08-02 20:31:50.000000000 +0200 +++ libc_apple/locale/setlocale.c 2008-08-03 00:18:19.000000000 +0200 @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/ #include #include #include +#include #include "collate.h" #include "lmonetary.h" /* for __monetary_load_locale() */ #include "lnumeric.h" /* for __numeric_load_locale() */ @@ -106,6 +107,10 @@ setlocale(category, locale) int i, j, len, saverr; const char *env, *r; +#ifdef LOCALE_DEBUG + fprintf(stderr, "setlocale: %s: %s\n", categories[category], + locale ? locale : "NULL"); +#endif if (category < LC_ALL || category >= _LC_LAST) { errno = EINVAL; return (NULL); @@ -192,11 +197,16 @@ setlocale(category, locale) for (i = 1; i < _LC_LAST; ++i) { (void)strcpy(saved_categories[i], current_categories[i]); - if (loadlocale(i) == NULL) { + if ((r = loadlocale(i)) == NULL) { saverr = errno; for (j = 1; j < i; j++) { (void)strcpy(new_categories[j], saved_categories[j]); +#ifdef LOCALE_DEBUG + fprintf(stderr, "setlocale: resetting " + "locale to %s\n", + saved_categories[j]); +#endif if (loadlocale(j) == NULL) { (void)strcpy(new_categories[j], "C"); (void)loadlocale(j); @@ -205,6 +215,9 @@ setlocale(category, locale) errno = saverr; return (NULL); } +#ifdef LOCALE_DEBUG + fprintf(stderr, "setlocale: loadlocale returned %s\n", r); +#endif } return (currentlocale()); } @@ -237,6 +250,10 @@ loadlocale(category) int (*func)(const char *); int saved_errno; +#ifdef LOCALE_DEBUG + fprintf(stderr, "loadlocale: %s->%s (was %s)\n", + categories[category], new, old); +#endif if ((new[0] == '.' && (new[1] == '\0' || (new[1] == '.' && new[2] == '\0'))) || strchr(new, '/') != NULL) { @@ -281,6 +298,9 @@ loadlocale(category) (void)strcpy(old, new); return (old); } +#ifdef LOCALE_DEBUG + fprintf(stderr, "loadlocale: returning NULL\n"); +#endif return (NULL); } @@ -328,6 +348,9 @@ __detect_path_locale(void) } else _PathLocale = _PATH_LOCALE; } +#ifdef LOCALE_DEBUG + fprintf(stderr, "__detect_path_locale: %s\n", _PathLocale); +#endif return (0); } diff -urNp libc_bsd/string/strcoll.c libc_apple/string/strcoll.c --- libc_bsd/string/strcoll.c 2008-07-28 20:59:21.000000000 +0200 +++ libc_apple/string/strcoll.c 2008-08-03 00:22:24.000000000 +0200 @@ -30,57 +30,31 @@ __FBSDID("$FreeBSD: src/lib/libc/string/ #include #include +#include +#include #include "collate.h" int -strcoll(s, s2) - const char *s, *s2; +strcoll(const char *s, const char *s2) { - int len, len2, prim, prim2, sec, sec2, ret, ret2; - const char *t, *t2; - char *tt, *tt2; + int ret; + const wchar_t *t = NULL, *t2 = NULL; + int sverrno; - if (__collate_load_error) + if (__collate_load_error || (t = __collate_mbstowcs(s)) == NULL || + (t2 = __collate_mbstowcs(s2)) == NULL) { + sverrno = errno; + free((void *)t); + free((void *)t2); + errno = sverrno; return strcmp(s, s2); - - len = len2 = 1; - ret = ret2 = 0; - if (__collate_substitute_nontrivial) { - t = tt = __collate_substitute(s); - t2 = tt2 = __collate_substitute(s2); - } else { - tt = tt2 = NULL; - t = s; - t2 = s2; - } - while(*t && *t2) { - prim = prim2 = 0; - while(*t && !prim) { - __collate_lookup(t, &len, &prim, &sec); - t += len; - } - while(*t2 && !prim2) { - __collate_lookup(t2, &len2, &prim2, &sec2); - t2 += len2; - } - if(!prim || !prim2) - break; - if(prim != prim2) { - ret = prim - prim2; - goto end; - } - if(!ret2) - ret2 = sec - sec2; } - if(!*t && *t2) - ret = -(int)((u_char)*t2); - else if(*t && !*t2) - ret = (u_char)*t; - else if(!*t && !*t2) - ret = ret2; - end: - free(tt); - free(tt2); + + ret = wcscoll(t, t2); + sverrno = errno; + free((void *)t); + free((void *)t2); + errno = sverrno; return ret; } diff -urNp libc_bsd/string/strxfrm.c libc_apple/string/strxfrm.c --- libc_bsd/string/strxfrm.c 2008-07-28 20:59:21.000000000 +0200 +++ libc_apple/string/strxfrm.c 2008-07-28 20:59:04.000000000 +0200 @@ -30,22 +30,53 @@ __FBSDID("$FreeBSD: src/lib/libc/string/ #include #include +#include +#include #include "collate.h" +/* + * In the non-POSIX case, we transform each character into a string of + * characters representing the character's priority. Since char is usually + * signed, we are limited by 7 bits per byte. To avoid zero, we need to add + * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 + * bits per byte. We choose 4 bytes per character as a good compromise + * between maximum coverage and minimum size. This gives 24 bits, or 16M + * priorities. So we choose COLLATE_MAX_PRIORITY to be (2^24 - 1). This + * this can be increased if more is needed. + */ + +#define XFRM_BYTES 4 +#define XFRM_OFFSET ('0') /* make all printable characters */ +#define XFRM_SHIFT 6 +#define XFRM_MASK ((1 << XFRM_SHIFT) - 1) + +static void +xfrm(unsigned char *p, int pri) +{ + + p[3] = (pri & XFRM_MASK) + XFRM_OFFSET; + pri >>= XFRM_SHIFT; + p[2] = (pri & XFRM_MASK) + XFRM_OFFSET; + pri >>= XFRM_SHIFT; + p[1] = (pri & XFRM_MASK) + XFRM_OFFSET; + pri >>= XFRM_SHIFT; + p[0] = (pri & XFRM_MASK) + XFRM_OFFSET; +} + size_t strxfrm(char * __restrict dest, const char * __restrict src, size_t len) { - int prim, sec, l; size_t slen; - char *s, *ss; + wchar_t *wcs, *xf[2]; + int sverrno; - if (!*src) { + if (!*src && dest) { if (len > 0) *dest = '\0'; return 0; } - if (__collate_load_error) { + if (__collate_load_error || (wcs = __collate_mbstowcs(src)) == NULL) { slen = strlen(src); if (len > 0) { if (slen < len) @@ -58,26 +89,57 @@ strxfrm(char * __restrict dest, const ch return slen; } - slen = 0; - prim = sec = 0; - ss = s = __collate_substitute(src); - while (*s) { - while (*s && !prim) { - __collate_lookup(s, &l, &prim, &sec); - s += l; + __collate_xfrm(wcs, xf); + + slen = wcslen(xf[0]) * XFRM_BYTES; + if (xf[1]) + slen += (wcslen(xf[1]) + 1) * XFRM_BYTES; + if (len > 0) { + wchar_t *w = xf[0]; + int b = 0; + unsigned char buf[XFRM_BYTES]; + unsigned char *bp; + while (len > 1) { + if (!b) { + if (!*w) + break; + xfrm(bp = buf, *w++); + b = XFRM_BYTES; + } + *dest++ = *(char *)bp++; + b--; + len--; } - if (prim) { - if (len > 1) { - *dest++ = (char)prim; + if ((w = xf[1]) != NULL) { + xfrm(bp = buf, 0); + b = XFRM_BYTES; + while (len > 1) { + if (!b) + break; + *dest++ = *(char *)bp++; + b--; + len--; + } + b = 0; + while (len > 1) { + if (!b) { + if (!*w) + break; + xfrm(bp = buf, *w++); + b = XFRM_BYTES; + } + *dest++ = *(char *)bp++; + b--; len--; } - slen++; - prim = 0; } - } - free(ss); - if (len > 0) - *dest = '\0'; + *dest = 0; + } + sverrno = errno; + free(wcs); + free(xf[0]); + free(xf[1]); + errno = sverrno; return slen; } diff -urNp libc_bsd/string/wcscoll.c libc_apple/string/wcscoll.c --- libc_bsd/string/wcscoll.c 2008-07-28 20:59:21.000000000 +0200 +++ libc_apple/string/wcscoll.c 2008-08-03 12:40:28.000000000 +0200 @@ -27,72 +27,220 @@ #include __FBSDID("$FreeBSD: src/lib/libc/string/wcscoll.c,v 1.3 2004/04/07 09:47:56 tjr Exp $"); + #include #include #include #include #include "collate.h" -static char *__mbsdup(const wchar_t *); +#define NOTFORWARD (DIRECTIVE_BACKWARD | DIRECTIVE_POSITION) -/* - * Placeholder implementation of wcscoll(). Attempts to use the single-byte - * collation ordering where possible, and falls back on wcscmp() in locales - * with extended character sets. - */ int wcscoll(const wchar_t *ws1, const wchar_t *ws2) { - char *mbs1, *mbs2; - int diff, sverrno; + int sverrno; + int len, len2, prim, prim2, sec, sec2, ret, ret2; + const wchar_t *t, *t2; + wchar_t *tt = NULL, *tt2 = NULL; + wchar_t *tr = NULL, *tr2 = NULL; + struct __collate_st_info *info; - if (__collate_load_error || MB_CUR_MAX > 1) + if (__collate_load_error) /* - * Locale has no special collating order, could not be - * loaded, or has an extended character set; do a fast binary - * comparison. + * Locale has no special collating order or could not be + * loaded, do a fast binary comparison. */ return (wcscmp(ws1, ws2)); - if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) { - /* - * Out of memory or illegal wide chars; fall back to wcscmp() - * but leave errno indicating the error. Callers that don't - * check for error will get a reasonable but often slightly - * incorrect result. - */ - sverrno = errno; - free(mbs1); - errno = sverrno; - return (wcscmp(ws1, ws2)); + info = &__collate_data->__info; + len = len2 = 1; + ret = ret2 = 0; + + if ((info->directive[0] & NOTFORWARD) || + (info->directive[1] & NOTFORWARD) || + (!(info->flags && COLLATE_SUBST_DUP) && + (info->subst_count[0] > 0 || info->subst_count[1] > 0))) { + int direc, pass; + for(pass = 0; pass < info->directive_count; pass++) { + direc = info->directive[pass]; + if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { + free(tt); + tt = __collate_substitute(ws1, pass); + free(tt2); + tt2 = tt ? __collate_substitute(ws2, pass) : + NULL; + } + if (direc & DIRECTIVE_BACKWARD) { + wchar_t *bp, *fp, c; + tr = __collate_wcsdup(tt ? tt : ws1); + bp = tr; + fp = tr + wcslen(tr) - 1; + while(bp < fp) { + c = *bp; + *bp++ = *fp; + *fp-- = c; + } + tr2 = __collate_wcsdup(tt2 ? tt2 : ws2); + bp = tr2; + fp = tr2 + wcslen(tr2) - 1; + while(bp < fp) { + c = *bp; + *bp++ = *fp; + *fp-- = c; + } + t = (const wchar_t *)tr; + t2 = (const wchar_t *)tr2; + } else if (tt) { + t = (const wchar_t *)tt; + t2 = (const wchar_t *)tt2; + } else { + t = (const wchar_t *)ws1; + t2 = (const wchar_t *)ws2; + } + if(direc & DIRECTIVE_POSITION) { + while(*t && *t2) { + prim = prim2 = 0; + __collate_lookup_which(t, &len, + &prim, pass); + if (prim <= 0) { + if (prim < 0) { + errno = EINVAL; + ret = -1; + goto end; + } + prim = COLLATE_MAX_PRIORITY; + } + __collate_lookup_which(t2, &len2, + &prim2, pass); + if (prim2 <= 0) { + if (prim2 < 0) { + errno = EINVAL; + ret = -1; + goto end; + } + prim2 = COLLATE_MAX_PRIORITY; + } + if(prim != prim2) { + ret = prim - prim2; + goto end; + } + t += len; + t2 += len2; + } + } else { + while(*t && *t2) { + prim = prim2 = 0; + while(*t) { + __collate_lookup_which(t, &len, + &prim, pass); + if(prim > 0) + break; + if (prim < 0) { + errno = EINVAL; + ret = -1; + goto end; + } + t += len; + } + while(*t2) { + __collate_lookup_which(t2, + &len2, &prim2, pass); + if(prim2 > 0) + break; + if (prim2 < 0) { + errno = EINVAL; + ret = -1; + goto end; + } + t2 += len2; + } + if(!prim || !prim2) + break; + if(prim != prim2) { + ret = prim - prim2; + goto end; + } + t += len; + t2 += len2; + } + } + if(!*t) { + if(*t2) { + ret = -(int)*t2; + goto end; + } + } else { + ret = *t; + goto end; + } + } + ret = 0; + goto end; } - diff = strcoll(mbs1, mbs2); + /* + * Optimized common case: order_start forward;forward and duplicate + * (or no) substitute tables. + */ + tt = __collate_substitute(ws1, 0); + if (tt == NULL) { + tt2 = NULL; + t = (const wchar_t *)ws1; + t2 = (const wchar_t *)ws2; + } else { + tt2 = __collate_substitute(ws2, 0); + t = (const wchar_t *)tt; + t2 = (const wchar_t *)tt2; + } + while(*t && *t2) { + prim = prim2 = 0; + while(*t) { + __collate_lookup(t, &len, &prim, &sec); + if (prim > 0) + break; + if (prim < 0) { + errno = EINVAL; + ret = -1; + goto end; + } + t += len; + } + while(*t2) { + __collate_lookup(t2, &len2, &prim2, &sec2); + if (prim2 > 0) + break; + if (prim2 < 0) { + errno = EINVAL; + ret = -1; + goto end; + } + t2 += len2; + } + if(!prim || !prim2) + break; + if(prim != prim2) { + ret = prim - prim2; + goto end; + } + if(!ret2) + ret2 = sec - sec2; + t += len; + t2 += len2; + } + if(!*t && *t2) + ret = -(int)*t2; + else if(*t && !*t2) + ret = *t; + else if(!*t && !*t2) + ret = ret2; + end: sverrno = errno; - free(mbs1); - free(mbs2); + free(tt); + free(tt2); + free(tr); + free(tr2); errno = sverrno; - return (diff); -} - -static char * -__mbsdup(const wchar_t *ws) -{ - static const mbstate_t initial; - mbstate_t st; - const wchar_t *wcp; - size_t len; - char *mbs; - - wcp = ws; - st = initial; - if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) - return (NULL); - if ((mbs = malloc(len + 1)) == NULL) - return (NULL); - st = initial; - wcsrtombs(mbs, &ws, len + 1, &st); - - return (mbs); + return ret; } diff -urNp libc_bsd/string/wcsxfrm.c libc_apple/string/wcsxfrm.c --- libc_bsd/string/wcsxfrm.c 2008-07-28 20:59:21.000000000 +0200 +++ libc_apple/string/wcsxfrm.c 2008-07-28 20:59:04.000000000 +0200 @@ -34,20 +34,17 @@ __FBSDID("$FreeBSD: src/lib/libc/string/ #include #include #include +#include #include "collate.h" -static char *__mbsdup(const wchar_t *); +#define WCS_XFRM_OFFSET 1 -/* - * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of - * the logic used. - */ size_t wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len) { - int prim, sec, l; size_t slen; - char *mbsrc, *s, *ss; + wchar_t *xf[2]; + int sverrno; if (*src == L'\0') { if (len != 0) @@ -55,7 +52,7 @@ wcsxfrm(wchar_t * __restrict dest, const return (0); } - if (__collate_load_error || MB_CUR_MAX > 1) { + if (__collate_load_error) { slen = wcslen(src); if (len > 0) { if (slen < len) @@ -68,49 +65,35 @@ wcsxfrm(wchar_t * __restrict dest, const return (slen); } - mbsrc = __mbsdup(src); - slen = 0; - prim = sec = 0; - ss = s = __collate_substitute(mbsrc); - while (*s != '\0') { - while (*s != '\0' && prim == 0) { - __collate_lookup(s, &l, &prim, &sec); - s += l; + __collate_xfrm(src, xf); + + slen = wcslen(xf[0]); + if (xf[1]) + slen += wcslen(xf[1]) + 1; + if (len > 0) { + wchar_t *w = xf[0]; + while (len > 1) { + if (!*w) + break; + *dest++ = *w++ + WCS_XFRM_OFFSET; + len--; } - if (prim != 0) { - if (len > 1) { - *dest++ = (wchar_t)prim; + if ((w = xf[1]) != NULL) { + if (len > 1) + *dest++ = WCS_XFRM_OFFSET; + while (len > 1) { + if (!*w) + break; + *dest++ = *w++ + WCS_XFRM_OFFSET; len--; } - slen++; - prim = 0; } - } - free(ss); - free(mbsrc); - if (len != 0) - *dest = L'\0'; - + *dest = 0; + } + sverrno = errno; + free(xf[0]); + free(xf[1]); + errno = sverrno; + return (slen); } - -static char * -__mbsdup(const wchar_t *ws) -{ - static const mbstate_t initial; - mbstate_t st; - const wchar_t *wcp; - size_t len; - char *mbs; - - wcp = ws; - st = initial; - if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) - return (NULL); - if ((mbs = malloc(len + 1)) == NULL) - return (NULL); - st = initial; - wcsrtombs(mbs, &ws, len + 1, &st); - - return (mbs); -}