2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23 #include "wine/unicode.h"
25 /* get the decomposition of a Unicode char */
26 static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
28 extern const WCHAR unicode_decompose_table[];
29 const WCHAR *ptr = unicode_decompose_table;
33 ptr = unicode_decompose_table + ptr[src >> 8];
34 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
36 if (dstlen <= 1) return 0;
37 /* apply the decomposition recursively to the first char */
38 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
42 /* check src string for invalid chars; return non-zero if invalid char found */
43 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
44 const unsigned char *src, unsigned int srclen )
46 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
47 const WCHAR def_unicode_char = table->info.def_unicode_char;
48 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
49 + (def_unicode_char & 0xff)];
52 if (cp2uni[*src] == def_unicode_char && *src != def_char) break;
59 /* mbstowcs for single-byte code page */
60 /* all lengths are in characters, not bytes */
61 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
62 const unsigned char *src, unsigned int srclen,
63 WCHAR *dst, unsigned int dstlen )
65 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
70 /* buffer too small: fill it up to dstlen and return error */
80 case 16: dst[15] = cp2uni[src[15]];
81 case 15: dst[14] = cp2uni[src[14]];
82 case 14: dst[13] = cp2uni[src[13]];
83 case 13: dst[12] = cp2uni[src[12]];
84 case 12: dst[11] = cp2uni[src[11]];
85 case 11: dst[10] = cp2uni[src[10]];
86 case 10: dst[9] = cp2uni[src[9]];
87 case 9: dst[8] = cp2uni[src[8]];
88 case 8: dst[7] = cp2uni[src[7]];
89 case 7: dst[6] = cp2uni[src[6]];
90 case 6: dst[5] = cp2uni[src[5]];
91 case 5: dst[4] = cp2uni[src[4]];
92 case 4: dst[3] = cp2uni[src[3]];
93 case 3: dst[2] = cp2uni[src[2]];
94 case 2: dst[1] = cp2uni[src[1]];
95 case 1: dst[0] = cp2uni[src[0]];
98 if (srclen < 16) return ret;
105 /* mbstowcs for single-byte code page with char decomposition */
106 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
107 const unsigned char *src, unsigned int srclen,
108 WCHAR *dst, unsigned int dstlen )
110 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
113 if (!dstlen) /* compute length */
115 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
116 for (len = 0; srclen; srclen--, src++)
117 len += get_decomposition( cp2uni[*src], dummy, 4 );
121 for (len = dstlen; srclen && len; srclen--, src++)
123 int res = get_decomposition( cp2uni[*src], dst, len );
128 if (srclen) return -1; /* overflow */
132 /* query necessary dst length for src string */
133 static inline int get_length_dbcs( const struct dbcs_table *table,
134 const unsigned char *src, unsigned int srclen )
136 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
139 for (len = 0; srclen; srclen--, src++, len++)
143 if (!--srclen) break; /* partial char, ignore it */
150 /* check src string for invalid chars; return non-zero if invalid char found */
151 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
152 const unsigned char *src, unsigned int srclen )
154 const WCHAR * const cp2uni = table->cp2uni;
155 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
156 const WCHAR def_unicode_char = table->info.def_unicode_char;
157 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
158 + (def_unicode_char & 0xff)];
161 unsigned char off = cp2uni_lb[*src];
162 if (off) /* multi-byte char */
164 if (srclen == 1) break; /* partial char, error */
165 if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
166 ((src[0] << 8) | src[1]) != def_char) break;
170 else if (cp2uni[*src] == def_unicode_char && *src != def_char) break;
177 /* mbstowcs for double-byte code page */
178 /* all lengths are in characters, not bytes */
179 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
180 const unsigned char *src, unsigned int srclen,
181 WCHAR *dst, unsigned int dstlen )
183 const WCHAR * const cp2uni = table->cp2uni;
184 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
187 if (!dstlen) return get_length_dbcs( table, src, srclen );
189 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
191 unsigned char off = cp2uni_lb[*src];
194 if (!--srclen) break; /* partial char, ignore it */
196 *dst = cp2uni[(off << 8) + *src];
198 else *dst = cp2uni[*src];
200 if (srclen) return -1; /* overflow */
205 /* mbstowcs for double-byte code page with character decomposition */
206 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
207 const unsigned char *src, unsigned int srclen,
208 WCHAR *dst, unsigned int dstlen )
210 const WCHAR * const cp2uni = table->cp2uni;
211 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
216 if (!dstlen) /* compute length */
218 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
219 for (len = 0; srclen; srclen--, src++)
221 unsigned char off = cp2uni_lb[*src];
224 if (!--srclen) break; /* partial char, ignore it */
226 ch = cp2uni[(off << 8) + *src];
228 else ch = cp2uni[*src];
229 len += get_decomposition( ch, dummy, 4 );
234 for (len = dstlen; srclen && len; srclen--, src++)
236 unsigned char off = cp2uni_lb[*src];
239 if (!--srclen) break; /* partial char, ignore it */
241 ch = cp2uni[(off << 8) + *src];
243 else ch = cp2uni[*src];
244 if (!(res = get_decomposition( ch, dst, len ))) break;
248 if (srclen) return -1; /* overflow */
253 /* return -1 on dst buffer overflow, -2 on invalid input char */
254 int wine_cp_mbstowcs( const union cptable *table, int flags,
255 const char *s, int srclen,
256 WCHAR *dst, int dstlen )
258 const unsigned char *src = (const unsigned char*) s;
260 if (table->info.char_size == 1)
262 if (flags & MB_ERR_INVALID_CHARS)
264 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
266 if (!(flags & MB_COMPOSITE))
268 if (!dstlen) return srclen;
269 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
271 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
275 if (flags & MB_ERR_INVALID_CHARS)
277 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
279 if (!(flags & MB_COMPOSITE))
280 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
282 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
286 /* CP_SYMBOL implementation */
287 /* return -1 on dst buffer overflow */
288 int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen)
291 if( dstlen == 0) return srclen;
292 len = dstlen > srclen ? srclen : dstlen;
293 for( i = 0; i < len; i++)
295 unsigned char c = src [ i ];
301 if( srclen > len) return -1;