4 * Native language support--charsets and unicode translations.
5 * By Gordon Chaffee 1996, 1997
7 * Unicode based case conversion 1999 by Wolfram Pienkoss
11 #include <linux/module.h>
12 #include <linux/string.h>
13 #include <linux/config.h>
14 #include <linux/nls.h>
15 #include <linux/kernel.h>
16 #include <linux/errno.h>
18 #include <linux/kmod.h>
20 #include <linux/spinlock.h>
22 static struct nls_table default_table;
23 static struct nls_table *tables = &default_table;
24 static DEFINE_SPINLOCK(nls_lock);
27 * Sample implementation from Unicode home page.
28 * http://www.stonehand.com/unicode/standard/fss-utf.html
38 static struct utf8_table utf8_table[] =
40 {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
41 {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
42 {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */},
43 {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */},
44 {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */},
45 {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */},
46 {0, /* end of table */}
50 utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
59 for (t = utf8_table; t->cmask; t++) {
61 if ((c0 & t->cmask) == t->cval) {
71 c = (*s ^ 0x80) & 0xFF;
80 utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
88 while (*ip && n > 0) {
90 size = utf8_mbtowc(op, ip, n);
92 /* Ignore character and move on */
109 utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
113 struct utf8_table *t;
120 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
124 *s = t->cval | (l >> c);
128 *s = 0x80 | ((l >> c) & 0x3F);
137 utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
145 while (*ip && maxlen > 0) {
147 size = utf8_wctomb(op, *ip, maxlen);
149 /* Ignore character and move on */
163 int register_nls(struct nls_table * nls)
165 struct nls_table ** tmp = &tables;
172 spin_lock(&nls_lock);
175 spin_unlock(&nls_lock);
182 spin_unlock(&nls_lock);
186 int unregister_nls(struct nls_table * nls)
188 struct nls_table ** tmp = &tables;
190 spin_lock(&nls_lock);
194 spin_unlock(&nls_lock);
199 spin_unlock(&nls_lock);
203 static struct nls_table *find_nls(char *charset)
205 struct nls_table *nls;
206 spin_lock(&nls_lock);
207 for (nls = tables; nls; nls = nls->next) {
208 if (!strcmp(nls->charset, charset))
210 if (nls->alias && !strcmp(nls->alias, charset))
213 if (nls && !try_module_get(nls->owner))
215 spin_unlock(&nls_lock);
219 struct nls_table *load_nls(char *charset)
221 struct nls_table *nls;
226 nls = find_nls(charset);
231 ret = request_module("nls_%s", charset);
233 printk("Unable to load NLS charset %s\n", charset);
236 nls = find_nls(charset);
241 void unload_nls(struct nls_table *nls)
243 module_put(nls->owner);
246 static wchar_t charset2uni[256] = {
248 0x0000, 0x0001, 0x0002, 0x0003,
249 0x0004, 0x0005, 0x0006, 0x0007,
250 0x0008, 0x0009, 0x000a, 0x000b,
251 0x000c, 0x000d, 0x000e, 0x000f,
253 0x0010, 0x0011, 0x0012, 0x0013,
254 0x0014, 0x0015, 0x0016, 0x0017,
255 0x0018, 0x0019, 0x001a, 0x001b,
256 0x001c, 0x001d, 0x001e, 0x001f,
258 0x0020, 0x0021, 0x0022, 0x0023,
259 0x0024, 0x0025, 0x0026, 0x0027,
260 0x0028, 0x0029, 0x002a, 0x002b,
261 0x002c, 0x002d, 0x002e, 0x002f,
263 0x0030, 0x0031, 0x0032, 0x0033,
264 0x0034, 0x0035, 0x0036, 0x0037,
265 0x0038, 0x0039, 0x003a, 0x003b,
266 0x003c, 0x003d, 0x003e, 0x003f,
268 0x0040, 0x0041, 0x0042, 0x0043,
269 0x0044, 0x0045, 0x0046, 0x0047,
270 0x0048, 0x0049, 0x004a, 0x004b,
271 0x004c, 0x004d, 0x004e, 0x004f,
273 0x0050, 0x0051, 0x0052, 0x0053,
274 0x0054, 0x0055, 0x0056, 0x0057,
275 0x0058, 0x0059, 0x005a, 0x005b,
276 0x005c, 0x005d, 0x005e, 0x005f,
278 0x0060, 0x0061, 0x0062, 0x0063,
279 0x0064, 0x0065, 0x0066, 0x0067,
280 0x0068, 0x0069, 0x006a, 0x006b,
281 0x006c, 0x006d, 0x006e, 0x006f,
283 0x0070, 0x0071, 0x0072, 0x0073,
284 0x0074, 0x0075, 0x0076, 0x0077,
285 0x0078, 0x0079, 0x007a, 0x007b,
286 0x007c, 0x007d, 0x007e, 0x007f,
288 0x0080, 0x0081, 0x0082, 0x0083,
289 0x0084, 0x0085, 0x0086, 0x0087,
290 0x0088, 0x0089, 0x008a, 0x008b,
291 0x008c, 0x008d, 0x008e, 0x008f,
293 0x0090, 0x0091, 0x0092, 0x0093,
294 0x0094, 0x0095, 0x0096, 0x0097,
295 0x0098, 0x0099, 0x009a, 0x009b,
296 0x009c, 0x009d, 0x009e, 0x009f,
298 0x00a0, 0x00a1, 0x00a2, 0x00a3,
299 0x00a4, 0x00a5, 0x00a6, 0x00a7,
300 0x00a8, 0x00a9, 0x00aa, 0x00ab,
301 0x00ac, 0x00ad, 0x00ae, 0x00af,
303 0x00b0, 0x00b1, 0x00b2, 0x00b3,
304 0x00b4, 0x00b5, 0x00b6, 0x00b7,
305 0x00b8, 0x00b9, 0x00ba, 0x00bb,
306 0x00bc, 0x00bd, 0x00be, 0x00bf,
308 0x00c0, 0x00c1, 0x00c2, 0x00c3,
309 0x00c4, 0x00c5, 0x00c6, 0x00c7,
310 0x00c8, 0x00c9, 0x00ca, 0x00cb,
311 0x00cc, 0x00cd, 0x00ce, 0x00cf,
313 0x00d0, 0x00d1, 0x00d2, 0x00d3,
314 0x00d4, 0x00d5, 0x00d6, 0x00d7,
315 0x00d8, 0x00d9, 0x00da, 0x00db,
316 0x00dc, 0x00dd, 0x00de, 0x00df,
318 0x00e0, 0x00e1, 0x00e2, 0x00e3,
319 0x00e4, 0x00e5, 0x00e6, 0x00e7,
320 0x00e8, 0x00e9, 0x00ea, 0x00eb,
321 0x00ec, 0x00ed, 0x00ee, 0x00ef,
323 0x00f0, 0x00f1, 0x00f2, 0x00f3,
324 0x00f4, 0x00f5, 0x00f6, 0x00f7,
325 0x00f8, 0x00f9, 0x00fa, 0x00fb,
326 0x00fc, 0x00fd, 0x00fe, 0x00ff,
329 static unsigned char page00[256] = {
330 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
331 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
332 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
333 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
334 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
335 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
336 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
337 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
338 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
339 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
340 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
341 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
342 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
343 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
344 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
345 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
347 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
348 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
349 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
350 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
351 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
352 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
353 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
354 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
355 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
356 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
357 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
358 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
359 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
360 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
361 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
362 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
365 static unsigned char *page_uni2charset[256] = {
369 static unsigned char charset2lower[256] = {
370 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
371 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
372 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
373 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
374 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
375 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
376 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
377 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
378 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */
379 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */
380 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */
381 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
382 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
383 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
384 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
385 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
387 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
388 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
389 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
390 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
391 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
392 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
393 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
394 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
395 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
396 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
397 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
398 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
399 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
400 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
401 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
402 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
405 static unsigned char charset2upper[256] = {
406 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
407 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
408 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
409 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
410 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
411 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
412 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
413 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
414 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
415 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
416 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
417 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
418 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */
419 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */
420 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */
421 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
423 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
424 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
425 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
426 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
427 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
428 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
429 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
430 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
431 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
432 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
433 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
434 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
435 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
436 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
437 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
438 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
442 static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
444 unsigned char *uni2charset;
445 unsigned char cl = uni & 0x00ff;
446 unsigned char ch = (uni & 0xff00) >> 8;
449 return -ENAMETOOLONG;
451 uni2charset = page_uni2charset[ch];
452 if (uni2charset && uni2charset[cl])
453 out[0] = uni2charset[cl];
459 static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
461 *uni = charset2uni[*rawstring];
467 static struct nls_table default_table = {
468 .charset = "default",
469 .uni2char = uni2char,
470 .char2uni = char2uni,
471 .charset2lower = charset2lower,
472 .charset2upper = charset2upper,
475 /* Returns a simple default translation table */
476 struct nls_table *load_nls_default(void)
478 struct nls_table *default_nls;
480 default_nls = load_nls(CONFIG_NLS_DEFAULT);
481 if (default_nls != NULL)
484 return &default_table;
487 EXPORT_SYMBOL(register_nls);
488 EXPORT_SYMBOL(unregister_nls);
489 EXPORT_SYMBOL(unload_nls);
490 EXPORT_SYMBOL(load_nls);
491 EXPORT_SYMBOL(load_nls_default);
492 EXPORT_SYMBOL(utf8_mbtowc);
493 EXPORT_SYMBOL(utf8_mbstowcs);
494 EXPORT_SYMBOL(utf8_wctomb);
495 EXPORT_SYMBOL(utf8_wcstombs);
497 MODULE_LICENSE("Dual BSD/GPL");