Index: direntry.h =================================================================== RCS file: /home/ncvs/src/sys/fs/msdosfs/direntry.h,v retrieving revision 1.16 diff -u -r1.16 direntry.h --- direntry.h 19 Mar 2002 22:20:10 -0000 1.16 +++ direntry.h 24 Aug 2003 05:45:15 -0000 @@ -1,4 +1,25 @@ -/* $FreeBSD: src/sys/fs/msdosfs/direntry.h,v 1.16 2002/03/19 22:20:10 alfred Exp $ */ +/* $FreeBSD: src/sys/msdosfs/direntry.h,v 1.15 1999/12/29 04:54:52 peter Exp $ */ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /* $NetBSD: direntry.h,v 1.14 1997/11/17 15:36:32 ws Exp $ */ /*- @@ -132,12 +153,15 @@ void unix2dostime(struct timespec *tsp, u_int16_t *ddp, u_int16_t *dtp, u_int8_t *dhp); void dos2unixtime(u_int dd, u_int dt, u_int dh, struct timespec *tsp); -int dos2unixfn(u_char dn[11], u_char *un, int lower, int d2u_loaded, u_int8_t *d2u, int ul_loaded, u_int8_t *ul); -int unix2dosfn(const u_char *un, u_char dn[12], int unlen, u_int gen, int u2d_loaded, u_int8_t *u2d, int lu_loaded, u_int8_t *lu); -int unix2winfn(const u_char *un, int unlen, struct winentry *wep, int cnt, int chksum, int table_loaded, u_int16_t *u2w); -int winChkName(const u_char *un, int unlen, struct winentry *wep, int chksum, int u2w_loaded, u_int16_t *u2w, int ul_loaded, u_int8_t *ul); -int win2unixfn(struct winentry *wep, struct dirent *dp, int chksum, int table_loaded, u_int16_t *u2w); +int dos2unicodefn(u_char dn[11], u_int16_t *un, int lower, int d2u_loaded, u_int8_t *d2u, int ul_loaded, u_int8_t *ul); +int unicode2dosfn(const u_int16_t *un, u_char dn[12], int unlen, u_int gen); +int unicode2winfn(const u_int16_t *un, int unlen, struct winentry *wep, int cnt, int chksum); +int winChkName(const u_int16_t *un, int unlen, struct winentry *wep, int chksum, int u2w_loaded, u_int16_t *u2w, int ul_loaded, u_int8_t *ul); +int getunicodefn(struct winentry *wep, u_int16_t *ucfn, u_int16_t *unichars, int chksum); u_int8_t winChksum(u_int8_t *name); -int winSlotCnt(const u_char *un, int unlen); -int winLenFixup(const u_char *un, int unlen); +int winSlotCnt(const u_int16_t *un, int unlen); +int winLenFixup(const u_int16_t *un, int unlen); +int msdosfs_utf8_encodestr(const u_int16_t *in, size_t inlen, u_int8_t *out, size_t *outlen); +int msdosfs_utf8_decodestr(const u_int8_t *in, size_t inlen, u_int16_t *out, size_t *outlen); + #endif /* _KERNEL */ Index: msdosfs_conv.c =================================================================== RCS file: /home/ncvs/src/sys/fs/msdosfs/msdosfs_conv.c,v retrieving revision 1.33 diff -u -r1.33 msdosfs_conv.c --- msdosfs_conv.c 3 Feb 2003 19:49:33 -0000 1.33 +++ msdosfs_conv.c 24 Aug 2003 05:45:16 -0000 @@ -1,6 +1,27 @@ -/* $FreeBSD: src/sys/fs/msdosfs/msdosfs_conv.c,v 1.33 2003/02/03 19:49:33 phk Exp $ */ -/* $NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $ */ +/* $FreeBSD$ */ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* $NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $ */ /*- * Copyright (C) 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1995, 1997 TooLs GmbH. @@ -64,6 +85,11 @@ #include /* + * Replacement character for unconvertable characters. + */ +#define REPLACEMENT_CHAR '?' + +/* * Total number of days that have passed for each month in a regular year. */ static u_short regyear[] = { @@ -89,6 +115,7 @@ static u_short lastdtime; static __inline u_int8_t find_lcode(u_int16_t code, u_int16_t *u2w); +static u_char unicode2dos(u_int16_t uc); /* * Convert the unix version of time to dos's idea of time to be used in @@ -231,6 +258,8 @@ } /* + * Unicode (LSB) to Win Latin1 (ANSI CodePage 1252) + * * 0 - character disallowed in long file name. * 1 - character should be replaced by '_' in DOS file name, * and generation number inserted. @@ -238,7 +267,7 @@ * and generation number inserted. */ static u_char -unix2dos[256] = { +unilsb2dos[256] = { 0, 0, 0, 0, 0, 0, 0, 0, /* 00-07 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 08-0f */ 0, 0, 0, 0, 0, 0, 0, 0, /* 10-17 */ @@ -259,92 +288,41 @@ 0, 0, 0, 0, 0, 0, 0, 0, /* 88-8f */ 0, 0, 0, 0, 0, 0, 0, 0, /* 90-97 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 98-9f */ - 0, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, /* a0-a7 */ - 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, /* a8-af */ - 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, /* b0-b7 */ - 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, /* b8-bf */ - 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* c0-c7 */ - 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* c8-cf */ - 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, /* d0-d7 */ - 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, /* d8-df */ - 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* e0-e7 */ - 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* e8-ef */ - 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0xf6, /* f0-f7 */ - 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0x98, /* f8-ff */ + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* c0-c7 */ + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* c8-cf */ + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* d0-d7 */ + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* d8-df */ + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ }; +/* Unicode punctuation marks to Win Latin1 (ANSI CodePage 1252) */ static u_char -dos2unix[256] = { - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 00-07 */ - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 08-0f */ - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 10-17 */ - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 18-1f */ - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */ - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */ - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */ - 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ - 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ - 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, /* 80-87 */ - 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, /* 88-8f */ - 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, /* 90-97 */ - 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x3f, /* 98-9f */ - 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, /* a0-a7 */ - 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, /* a8-af */ - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xc1, 0xc2, 0xc0, /* b0-b7 */ - 0xa9, 0x3f, 0x3f, 0x3f, 0x3f, 0xa2, 0xa5, 0x3f, /* b8-bf */ - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xe3, 0xc3, /* c0-c7 */ - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xa4, /* c8-cf */ - 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x3f, 0xcd, 0xce, /* d0-d7 */ - 0xcf, 0x3f, 0x3f, 0x3f, 0x3f, 0xa6, 0xcc, 0x3f, /* d8-df */ - 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, /* e0-e7 */ - 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0x3f, /* e8-ef */ - 0xad, 0xb1, 0x3f, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, /* f0-f7 */ - 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x3f, 0x3f, /* f8-ff */ +unipunct2dos[48] = { + 1, 1, 1, 0x96, 0x97, 1, 1, 1, /* 2010-2017 */ + 0x91, 0x92, 0x82, 1, 0x93, 0x94, 0x84, 1, /* 2018-201F */ + 0x86, 0x87, 0x95, 1, 1, 1, 0x85, 1, /* 2020-2027 */ + 1, 1, 1, 1, 1, 1, 1, 1, /* 2028-202F */ + 0x89, 1, 1, 1, 1, 1, 1, 1, /* 2030-2037 */ + 1, 0x8B, 0x9B, 1, 1, 1, 1, 1 /* 2038-203F */ }; -static u_char -u2l[256] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */ - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */ - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */ - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ - 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */ - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */ - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */ - 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ - 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */ - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */ - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */ - 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */ - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ - 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */ - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */ - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */ - 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */ - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ - 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ +/* Win Latin1 (ANSI CodePage 1252) to Unicode */ +static u_int16_t +dos2unicode[32] = { + 0x20AC, 0x003f, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, /* 80-87 */ + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x003f, 0x017D, 0x003f, /* 88-8F */ + 0x003f, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, /* 90-97 */ + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x003f, 0x017E, 0x0178, /* 98-9F */ }; + static u_char l2u[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ @@ -381,6 +359,54 @@ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ }; +/* map a Unicode char into a DOS char */ +static u_char +unicode2dos(uc) + u_int16_t uc; +{ + if (uc < 0x100) + return (unilsb2dos[uc]); + + if (uc > 0x2122) + return (1); + + if (uc >= 0x2010 && uc <= 0x203F) + return (unipunct2dos[uc - 0x2010]); + + if (uc >= 0x0152 && uc <= 0x02DC) + switch (uc) { + case 0x0152: + return (0x8C); /* LATIN CAPITAL LIGATURE OE */ + case 0x0153: + return (0x9C); /* LATIN SMALL LIGATURE OE */ + case 0x0160: + return (0x8A); /* CAPITAL LETTER S WITH CARON */ + case 0x0161: + return (0x9A); /* SMALL LETTER S WITH CARON */ + case 0x0178: + return (0x9F); /* CAPITAL LETTER Y WITH DIAERESIS */ + case 0x017D: + return (0x8E); /* CAPITAL LETTER Z WITH CARON */ + case 0x017E: + return (0x9E); /* SMALL LETTER Z WITH CARON */ + case 0x0192: + return (0x83); /* SMALL LETTER F WITH HOOK */ + case 0x02C6: + return (0x88); /* MODIFIER LETTER CIRCUMFLEX ACCENT */ + case 0x02DC: + return (0x98); /* SMALL TILDE */ + default: + return (1); + } + + if (uc == 0x20AC) + return (0x80); /* EURO SIGN */ + if (uc == 0x2122) + return (0x99); /* TRADE MARK SIGN */ + + return (1); +} + /* * DOS filenames are made of 2 parts, the name part and the extension part. * The name part is 8 characters long and the extension part is 3 @@ -389,14 +415,14 @@ */ /* - * Convert a DOS filename to a unix filename. And, return the number of + * Convert a DOS filename to a Unicode filename. And, return the number of * characters in the resulting unix filename excluding the terminating * null. */ int -dos2unixfn(dn, un, lower, d2u_loaded, d2u, ul_loaded, ul) +dos2unicodefn(dn, un, lower, d2u_loaded, d2u, ul_loaded, ul) u_char dn[11]; - u_char *un; + u_int16_t *un; int lower; int d2u_loaded; u_int8_t *d2u; @@ -404,34 +430,27 @@ u_int8_t *ul; { int i; - int thislong = 1; - u_char c; - + u_char dc; + int unichars = 0; /* - * If first char of the filename is SLOT_E5 (0x05), then the real - * first char of the filename should be 0xe5. But, they couldn't - * just have a 0xe5 mean 0xe5 because that is used to mean a freed - * directory slot. Another dos quirk. - */ - if (*dn == SLOT_E5) - c = d2u_loaded ? d2u[0xe5 & 0x7f] : dos2unix[0xe5]; - else - c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] : - dos2unix[*dn]; - *un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ? - ul[c & 0x7f] : u2l[c]) : c; - dn++; - - /* - * Copy the name portion into the unix filename string. - */ - for (i = 1; i < 8 && *dn != ' '; i++) { - c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] : - dos2unix[*dn]; - dn++; - *un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ? - ul[c & 0x7f] : u2l[c]) : c; - thislong++; + * Copy the name portion into our Unicode string. + */ + for (i = 0; i < 8 && *dn != ' '; i++) { + dc = *dn++; + /* + * If first char of the filename is SLOT_E5 (0x05), then + * the real first char of the filename should be 0xe5. + * But, they couldn't just have a 0xe5 mean 0xe5 because + * that is used to mean a freed directory slot. + */ + if (i == 0 && dc == SLOT_E5) + dc = 0xe5; + + /* + * (lower & LCASE_BASE) is currently not supported. + * Future versions may have a dos2unicode_lc table + */ + un[unichars++] = (dc < 0x80 || dc > 0x9F ? (u_int16_t)dc : dos2unicode[dc - 0x80]); } dn += 8 - i; @@ -440,24 +459,22 @@ * the extension. */ if (*dn != ' ') { - *un++ = '.'; - thislong++; + un[unichars++] = '.'; for (i = 0; i < 3 && *dn != ' '; i++) { - c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] : - dos2unix[*dn]; - dn++; - *un++ = (lower & LCASE_EXT) ? (ul_loaded && (c & 0x80) ? - ul[c & 0x7f] : u2l[c]) : c; - thislong++; + dc = *dn++; + /* + * (lower & LCASE_EXT) is currently not supported. + * Future versions may have a dos2unicode_lc table + */ + un[unichars++] = (dc < 0x80 || dc > 0x9F ? (u_int16_t)dc : dos2unicode[dc - 0x80]); } } - *un++ = 0; - return (thislong); + return (unichars); } /* - * Convert a unix filename to a DOS filename according to Win95 rules. + * Convert a Unicode filename to a DOS filename according to Win95 rules. * If applicable and gen is not 0, it is inserted into the converted * filename as a generation number. * Returns @@ -468,23 +485,17 @@ * 3 if conversion was successful and generation number was inserted */ int -unix2dosfn(un, dn, unlen, gen, u2d_loaded, u2d, lu_loaded, lu) - const u_char *un; +unicode2dosfn(un, dn, unlen, gen) + const u_int16_t *un; u_char dn[12]; int unlen; u_int gen; - int u2d_loaded; - u_int8_t *u2d; - int lu_loaded; - u_int8_t *lu; { int i, j, l; int conv = 1; - const u_char *cp, *dp, *dp1; + const u_int16_t *cp, *dp, *dp1; u_char gentext[6], *wcp; - u_int8_t c; -#define U2D(c) (u2d_loaded && ((c) & 0x80) ? u2d[(c) & 0x7f] : unix2dos[c]) - + u_int16_t c; /* * Fill the dos filename string with blanks. These are DOS's pad * characters. @@ -521,7 +532,7 @@ * Filenames with some characters are not allowed! */ for (cp = un, i = unlen; --i >= 0; cp++) - if (U2D(*cp) == 0) + if (unicode2dos(*cp) == 0) return 0; /* @@ -556,9 +567,9 @@ l = unlen - (dp - un); for (i = 0, j = 8; i < l && j < 11; i++, j++) { c = dp[i]; - c = lu_loaded && (c & 0x80) ? - lu[c & 0x7f] : l2u[c]; - c = U2D(c); + if (c < 0x100) + c = l2u[c]; + c = unicode2dos(c); if (dp[i] != (dn[j] = c) && conv != 3) conv = 2; @@ -583,9 +594,10 @@ * Now convert the rest of the name */ for (i = j = 0; un < dp && j < 8; i++, j++, un++) { - c = lu_loaded && (*un & 0x80) ? - lu[*un & 0x7f] : l2u[*un]; - c = U2D(c); + c = *un; + if (c < 0x100) + c = l2u[c]; + c = unicode2dos(c); if (*un != (dn[j] = c) && conv != 3) conv = 2; @@ -641,7 +653,6 @@ while (wcp < gentext + sizeof(gentext)) dn[i++] = *wcp++; return 3; -#undef U2D } /* @@ -650,16 +661,14 @@ * i.e. doesn't consist solely of blanks and dots */ int -unix2winfn(un, unlen, wep, cnt, chksum, table_loaded, u2w) - const u_char *un; +unicode2winfn(un, unlen, wep, cnt, chksum) + const u_int16_t *un; int unlen; struct winentry *wep; int cnt; int chksum; - int table_loaded; - u_int16_t *u2w; { - const u_int8_t *cp; + const u_int16_t *cp; u_int8_t *wcp; int i; u_int16_t code; @@ -688,38 +697,23 @@ for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { if (--unlen < 0) goto done; - if (table_loaded && (*un & 0x80)) { - code = u2w[*un++ & 0x7f]; - *wcp++ = code; - *wcp++ = code >> 8; - } else { - *wcp++ = *un++; - *wcp++ = 0; - } + code = *un++; + *wcp++ = code & 0x00ff; + *wcp++ = code >> 8; } for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { if (--unlen < 0) goto done; - if (table_loaded && (*un & 0x80)) { - code = u2w[*un++ & 0x7f]; - *wcp++ = code; - *wcp++ = code >> 8; - } else { - *wcp++ = *un++; - *wcp++ = 0; - } + code = *un++; + *wcp++ = code & 0x00ff; + *wcp++ = code >> 8; } for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { if (--unlen < 0) goto done; - if (table_loaded && (*un & 0x80)) { - code = u2w[*un++ & 0x7f]; - *wcp++ = code; - *wcp++ = code >> 8; - } else { - *wcp++ = *un++; - *wcp++ = 0; - } + code = *un++; + *wcp++ = code & 0x00ff; + *wcp++ = code >> 8; } if (!unlen) wep->weCnt |= WIN_LAST; @@ -745,14 +739,33 @@ return '?'; } + + +/* + * Convert a Unicode character to a single known case. Upper and lower case + * variants of the same character produce the same result. + * + * Note: this currently only handles case folding of ASCII characters. The + * Unicode standard defines case equivalence for other characters (such as + * precomposed characters), but I don't know whether Windows considers them + * case equivalents. + */ +static inline u_int16_t case_fold(u_int16_t ch) +{ + if (ch < 0x100) + return l2u[ch]; + else + return ch; +} + /* * Compare our filename to the one in the Win95 entry * Returns the checksum or -1 if no match */ int -winChkName(un, unlen, wep, chksum, u2w_loaded, u2w, ul_loaded, ul) - const u_char *un; - int unlen; +winChkName(un, ucslen, wep, chksum, u2w_loaded, u2w, ul_loaded, ul) + const u_int16_t *un; + int ucslen; struct winentry *wep; int chksum; int u2w_loaded; @@ -763,7 +776,6 @@ u_int8_t *cp; int i; u_int16_t code; - u_int8_t c1, c2; /* * First compare checksums @@ -780,104 +792,82 @@ */ i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS; un += i; - unlen -= i; - /* - * unlen being zero must not be treated as length missmatch. It is - * possible if the entry is WIN_LAST and contains nothing but the - * terminating 0. - */ - if (unlen < 0) - return -1; - if ((wep->weCnt&WIN_LAST) && unlen > WIN_CHARS) - return -1; + if ((ucslen -= i) < 0) /* Was "<=". See below. */ + return -1; /* More long name entries than the name would need */ + if ((wep->weCnt&WIN_LAST) && ucslen > WIN_CHARS) + return -1; /* Too few long name entries to hold the name */ + + /* + * [2865792] Some FAT implementations have a bug when the long + * is an exact multiple of WIN_CHARS long. They make an extra + * long name entry containing only a terminating 0x0000 and + * the 0xFFFF pad characters. While this is out-of-spec + * (i.e. corrupt), we can be graceful and handle it anyway, + * like Windows does. + * + * We handle this case by falling through here with ucslen == 0. + * We then expect to return during the first iteration of the + * following for() loop where --ucslen goes negative, and + * "cp" points to two zero bytes. + */ /* * Compare the name parts */ for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { - if (--unlen < 0) { + if (--ucslen < 0) { if (!*cp++ && !*cp) return chksum; return -1; } code = (cp[1] << 8) | cp[0]; - if (code & 0xff80) { - if (u2w_loaded) - code = find_lcode(code, u2w); - else if (code & 0xff00) - code = '?'; - } - c1 = ul_loaded && (code & 0x80) ? - ul[code & 0x7f] : u2l[code]; - c2 = ul_loaded && (*un & 0x80) ? - ul[*un & 0x7f] : u2l[*un]; - if (c1 != c2) - return -1; + if (case_fold(code) != case_fold(*un)) + return (-1); cp += 2; un++; } for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { - if (--unlen < 0) { + if (--ucslen < 0) { if (!*cp++ && !*cp) return chksum; return -1; } code = (cp[1] << 8) | cp[0]; - if (code & 0xff80) { - if (u2w_loaded) - code = find_lcode(code, u2w); - else if (code & 0xff00) - code = '?'; - } - c1 = ul_loaded && (code & 0x80) ? - ul[code & 0x7f] : u2l[code]; - c2 = ul_loaded && (*un & 0x80) ? - ul[*un & 0x7f] : u2l[*un]; - if (c1 != c2) - return -1; + if (case_fold(code) != case_fold(*un)) + return (-1); cp += 2; un++; } for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { - if (--unlen < 0) { + if (--ucslen < 0) { if (!*cp++ && !*cp) return chksum; return -1; } code = (cp[1] << 8) | cp[0]; - if (code & 0xff80) { - if (u2w_loaded) - code = find_lcode(code, u2w); - else if (code & 0xff00) - code = '?'; - } - c1 = ul_loaded && (code & 0x80) ? - ul[code & 0x7f] : u2l[code]; - c2 = ul_loaded && (*un & 0x80) ? - ul[*un & 0x7f] : u2l[*un]; - if (c1 != c2) - return -1; + if (case_fold(code) != case_fold(*un)) + return (-1); cp += 2; un++; } return chksum; } + /* - * Convert Win95 filename to dirbuf. + * Collect Win95 filename Unicode chars into buf. * Returns the checksum or -1 if impossible */ int -win2unixfn(wep, dp, chksum, table_loaded, u2w) +getunicodefn(wep, ucfn, unichars, chksum) struct winentry *wep; - struct dirent *dp; + u_int16_t *ucfn; + u_int16_t *unichars; int chksum; - int table_loaded; - u_int16_t *u2w; { u_int8_t *cp; - u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN; + u_int16_t *np, *ep = unichars + WIN_MAXLEN; u_int16_t code; int i; @@ -890,10 +880,7 @@ */ if (wep->weCnt&WIN_LAST) { chksum = wep->weChksum; - /* - * This works even though d_namlen is one byte! - */ - dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS; + *unichars = (wep->weCnt&WIN_CNT) * WIN_CHARS; } else if (chksum != wep->weChksum) chksum = -1; if (chksum == -1) @@ -903,7 +890,7 @@ * Offset of this entry */ i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS; - np = (u_int8_t *)dp->d_name + i; + np = ucfn + i; /* * Convert the name parts @@ -913,19 +900,13 @@ switch (code) { case 0: *np = '\0'; - dp->d_namlen -= sizeof(wep->wePart2)/2 + *unichars -= sizeof(wep->wePart2)/2 + sizeof(wep->wePart3)/2 + i + 1; return chksum; case '/': *np = '\0'; return -1; default: - if (code & 0xff80) { - if (table_loaded) - code = find_lcode(code, u2w); - else if (code & 0xff00) - code = '?'; - } *np++ = code; break; } @@ -945,18 +926,12 @@ switch (code) { case 0: *np = '\0'; - dp->d_namlen -= sizeof(wep->wePart3)/2 + i + 1; + *unichars -= sizeof(wep->wePart3)/2 + i + 1; return chksum; case '/': *np = '\0'; return -1; default: - if (code & 0xff80) { - if (table_loaded) - code = find_lcode(code, u2w); - else if (code & 0xff00) - code = '?'; - } *np++ = code; break; } @@ -976,18 +951,12 @@ switch (code) { case 0: *np = '\0'; - dp->d_namlen -= i + 1; + *unichars -= i + 1; return chksum; case '/': *np = '\0'; return -1; default: - if (code & 0xff80) { - if (table_loaded) - code = find_lcode(code, u2w); - else if (code & 0xff00) - code = '?'; - } *np++ = code; break; } @@ -1024,7 +993,7 @@ */ int winSlotCnt(un, unlen) - const u_char *un; + const u_int16_t *un; int unlen; { unlen = winLenFixup(un, unlen); @@ -1038,11 +1007,125 @@ */ int winLenFixup(un, unlen) - const u_char* un; + const u_int16_t* un; int unlen; { for (un += unlen; unlen > 0; unlen--) if (*--un != ' ' && *un != '.') break; return unlen; +} + +/* + * Encode Unicode string into UTF-8 bytes + */ +int +msdosfs_utf8_encodestr(in, inlen, out, outlen) + const u_int16_t *in; + size_t inlen; + u_int8_t *out; + size_t *outlen; +{ + u_int8_t *outmax, *o; + + outmax = out + *outlen; + for (o = out; inlen > 0; in++, inlen--) { + int outsize; + u_int16_t code; + + code = *in; + + if (code < 0x80) outsize = 1; + else if (code < 0x800) outsize = 2; + else outsize = 3; + + if (o + outsize > outmax) { + *outlen = (size_t)(o - out); + return E2BIG; + } + + switch (outsize) { + case 3: + o[2] = 0x80 | (code & 0x3f); + code = (code >> 6) | 0x800; + /* FALLTHROUGH */ + case 2: + o[1] = 0x80 | (code & 0x3f); + code = (code >> 6) | 0xc0; + /* FALLTHROUGH */ + case 1: + o[0] = code; + } + o += outsize; + } + + *outlen = (size_t)(o - out); + return 0; +} + +/* + * Decode UTF-8 sequence to Unicode string + */ +int +msdosfs_utf8_decodestr(in, inlen, out, outlen) + const u_int8_t *in; + size_t inlen; + u_int16_t *out; + size_t *outlen; +{ + u_int16_t *outmax, *o; + + outmax = out + *outlen; + for (o = out; o < outmax && inlen > 0; o++) { + if (*in < 0x80) { + *o = *in++; + inlen--; + } else if (*in < 0xc2) { + in++; inlen--; + *o = REPLACEMENT_CHAR; + } else if (*in < 0xe0) { + if (inlen < 2) { + *o++ = REPLACEMENT_CHAR; + break; + } + if ((in[1] ^ 0x80) >= 0x40) + *o = REPLACEMENT_CHAR; + else + *o = ((u_int16_t)(in[0] & 0x1f) << 6) | + (u_int16_t)(in[1] ^ 0x80); + in += 2; + inlen -= 2; + } else if (*in < 0xf0) { + if (inlen < 3) { + *o++ = REPLACEMENT_CHAR; + break; + } + if ((in[1] ^ 0x80) >= 0x40 || (in[2] ^ 0x80) >= 0x40 || + (in[0] < 0xe0 && in[1] < 0xa0)) + *o = REPLACEMENT_CHAR; + else + *o = ((u_int16_t)(in[0] & 0x0f) << 12) | + ((u_int16_t)(in[1] ^ 0x80) << 6) | + ((u_int16_t)(in[2] ^ 0x80)); + in += 3; + inlen -= 3; + } else { + size_t seqsize; + + if (*in < 0xf8) seqsize = 4; + else if (*in < 0xfc) seqsize = 5; + else if (*in < 0xff) seqsize = 6; + else seqsize = 1; + + if (inlen >= seqsize) { + inlen -= seqsize; + in += seqsize; + } else + inlen = 0; + *o = REPLACEMENT_CHAR; + } + } + + *outlen = (size_t)(o - out); + return 0; } Index: msdosfs_lookup.c =================================================================== RCS file: /home/ncvs/src/sys/fs/msdosfs/msdosfs_lookup.c,v retrieving revision 1.38 diff -u -r1.38 msdosfs_lookup.c --- msdosfs_lookup.c 4 Aug 2002 10:29:26 -0000 1.38 +++ msdosfs_lookup.c 24 Aug 2003 05:45:18 -0000 @@ -1,6 +1,26 @@ /* $FreeBSD: src/sys/fs/msdosfs/msdosfs_lookup.c,v 1.38 2002/08/04 10:29:26 jeff Exp $ */ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /* $NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $ */ - /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. @@ -118,6 +138,9 @@ int olddos = 1; cnp->cn_flags &= ~PDIRUNLOCK; + u_int16_t ucfn[WIN_MAXLEN]; + size_t unichars; + #ifdef MSDOSFS_DEBUG printf("msdosfs_lookup(): looking for %s\n", cnp->cn_nameptr); #endif @@ -149,29 +172,32 @@ goto foundroot; } - switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename, - cnp->cn_namelen, 0, - pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d, - pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu)) { + /* + * Decode lookup name into UCS-2 (Unicode) + */ + unichars = WIN_MAXLEN; + (void) msdosfs_utf8_decodestr(cnp->cn_nameptr, cnp->cn_namelen, + ucfn, &unichars); + + switch (unicode2dosfn(ucfn, dosfilename, unichars, 0)) { case 0: return (EINVAL); case 1: break; case 2: - wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, - cnp->cn_namelen) + 1; + wincnt = winSlotCnt(ucfn, unichars) + 1; break; case 3: olddos = 0; - wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, - cnp->cn_namelen) + 1; + wincnt = winSlotCnt(ucfn, unichars) + 1; break; } if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) { wincnt = 1; olddos = 1; } - unlen = winLenFixup(cnp->cn_nameptr, cnp->cn_namelen); + + unlen = winLenFixup(ucfn, unichars); /* * Suppress search for slots unless creating @@ -189,8 +215,7 @@ dosfilename, cnp->cn_namelen); #endif /* - * Search the directory pointed at by vdp for the name pointed at - * by cnp->cn_nameptr. + * Search the directory pointed at by vdp for the name in ucfn. */ tdp = NULL; /* @@ -256,7 +281,7 @@ if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; - chksum = winChkName((const u_char *)cnp->cn_nameptr, + chksum = winChkName(ucfn, unlen, (struct winentry *)dep, chksum, @@ -630,10 +655,17 @@ */ if (ddep->de_fndcnt > 0) { u_int8_t chksum = winChksum(ndep->deName); - const u_char *un = (const u_char *)cnp->cn_nameptr; - int unlen = cnp->cn_namelen; + u_int16_t ucfn[WIN_MAXLEN]; + size_t unichars; int cnt = 1; + /* + * Decode component name into Unicode + */ + unichars = WIN_MAXLEN; + (void) msdosfs_utf8_decodestr(cnp->cn_nameptr, cnp->cn_namelen, + ucfn, &unichars); + while (--ddep->de_fndcnt >= 0) { if (!(ddep->de_fndoffset & pmp->pm_crbomask)) { if ((error = bwrite(bp)) != 0) @@ -658,10 +690,8 @@ ndep--; ddep->de_fndoffset -= sizeof(struct direntry); } - if (!unix2winfn(un, unlen, (struct winentry *)ndep, - cnt++, chksum, - pmp->pm_flags & MSDOSFSMNT_U2WTABLE, - pmp->pm_u2w)) + if (!unicode2winfn(ucfn, unichars, + (struct winentry *)ndep, cnt++, chksum)) break; } } @@ -989,22 +1019,25 @@ daddr_t bn; struct buf *bp; int error; - + u_int16_t ucfn[WIN_MAXLEN]; + size_t unichars; + + /* + * Decode component name into Unicode + */ + unichars = WIN_MAXLEN; + (void) msdosfs_utf8_decodestr(cnp->cn_nameptr, cnp->cn_namelen, ucfn, + &unichars); + if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) - return (unix2dosfn((const u_char *)cnp->cn_nameptr, cp, - cnp->cn_namelen, 0, - pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d, - pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu) ? + return (unicode2dosfn(ucfn, cp, unichars, 0) ? 0 : EINVAL); for (gen = 1;; gen++) { /* * Generate DOS name with generation number */ - if (!unix2dosfn((const u_char *)cnp->cn_nameptr, cp, - cnp->cn_namelen, gen, - pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d, - pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu)) + if (!unicode2dosfn(ucfn, cp, unichars, gen)) return gen == 1 ? EINVAL : EEXIST; /* Index: msdosfs_vfsops.c =================================================================== RCS file: /home/ncvs/src/sys/fs/msdosfs/msdosfs_vfsops.c,v retrieving revision 1.105 diff -u -r1.105 msdosfs_vfsops.c --- msdosfs_vfsops.c 12 Aug 2003 20:06:55 -0000 1.105 +++ msdosfs_vfsops.c 24 Aug 2003 05:45:19 -0000 @@ -864,3 +864,4 @@ }; VFS_SET(msdosfs_vfsops, msdosfs, 0); +MODULE_DEPEND(msdosfs, libiconv, 1, 1, 1); Index: msdosfs_vnops.c =================================================================== RCS file: /home/ncvs/src/sys/fs/msdosfs/msdosfs_vnops.c,v retrieving revision 1.140 diff -u -r1.140 msdosfs_vnops.c --- msdosfs_vnops.c 12 Aug 2003 20:06:55 -0000 1.140 +++ msdosfs_vnops.c 24 Aug 2003 05:45:21 -0000 @@ -1,4 +1,25 @@ /* $FreeBSD: src/sys/fs/msdosfs/msdosfs_vnops.c,v 1.140 2003/08/12 20:06:55 trhodes Exp $ */ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */ /*- @@ -1463,6 +1484,9 @@ int ncookies = 0; off_t offset, off; int chksum = -1; + u_int16_t ucfn[WIN_MAXLEN + 1]; + u_int16_t unichars; + size_t outbytes; #ifdef MSDOSFS_DEBUG printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n", @@ -1609,10 +1633,8 @@ if (dentp->deAttributes == ATTR_WIN95) { if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; - chksum = win2unixfn((struct winentry *)dentp, - &dirbuf, chksum, - pmp->pm_flags & MSDOSFSMNT_U2WTABLE, - pmp->pm_u2w); + chksum = getunicodefn((struct winentry *)dentp, + ucfn, &unichars, chksum); continue; } @@ -1648,9 +1670,8 @@ dirbuf.d_fileno = offset / sizeof(struct direntry); dirbuf.d_type = DT_REG; } - if (chksum != winChksum(dentp->deName)) - dirbuf.d_namlen = dos2unixfn(dentp->deName, - (u_char *)dirbuf.d_name, + if (chksum != winChksum(dentp->deName)) { + unichars = dos2unicodefn(dentp->deName, ucfn, dentp->deLowerCase | ((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ? (LCASE_BASE | LCASE_EXT) : 0), @@ -1658,8 +1679,14 @@ pmp->pm_d2u, pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_ul); - else - dirbuf.d_name[dirbuf.d_namlen] = 0; + } + + /* translate the name in ucfn into UTF-8 */ + outbytes = sizeof(dirbuf.d_name) - 1; + (void) msdosfs_utf8_encodestr(ucfn, unichars, + dirbuf.d_name, &outbytes); + dirbuf.d_name[outbytes] = 0; /* null terminate */ + dirbuf.d_namlen = outbytes; chksum = -1; dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) {