2 * linux/fs/nls/nls_base.c
4 * Native language support--charsets and unicode translations.
5 * By Gordon Chaffee 1996, 1997
7 * Unicode based case conversion 1999 by Wolfram Pienkoss
11 #include <linux/module.h>
12 #include <linux/string.h>
13 #include <linux/nls.h>
14 #include <linux/kernel.h>
15 #include <linux/errno.h>
17 #include <linux/kmod.h>
19 #include <linux/spinlock.h>
21 static struct nls_table default_table;
22 static struct nls_table *tables = &default_table;
23 static DEFINE_SPINLOCK(nls_lock);
26 * Sample implementation from Unicode home page.
27 * http://www.stonehand.com/unicode/standard/fss-utf.html
37 static const struct utf8_table utf8_table[] =
39 {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
40 {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
41 {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */},
42 {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */},
43 {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */},
44 {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */},
45 {0, /* end of table */}
49 utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
53 const struct utf8_table *t;
58 for (t = utf8_table; t->cmask; t++) {
60 if ((c0 & t->cmask) == t->cval) {
70 c = (*s ^ 0x80) & 0xFF;
79 utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
87 while (*ip && n > 0) {
89 size = utf8_mbtowc(op, ip, n);
91 /* Ignore character and move on */
108 utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
112 const struct utf8_table *t;
119 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
123 *s = t->cval | (l >> c);
127 *s = 0x80 | ((l >> c) & 0x3F);
136 utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
144 while (*ip && maxlen > 0) {
146 size = utf8_wctomb(op, *ip, maxlen);
148 /* Ignore character and move on */
162 int register_nls(struct nls_table * nls)
164 struct nls_table ** tmp = &tables;
169 spin_lock(&nls_lock);
172 spin_unlock(&nls_lock);
179 spin_unlock(&nls_lock);
183 int unregister_nls(struct nls_table * nls)
185 struct nls_table ** tmp = &tables;
187 spin_lock(&nls_lock);
191 spin_unlock(&nls_lock);
196 spin_unlock(&nls_lock);
200 static struct nls_table *find_nls(char *charset)
202 struct nls_table *nls;
203 spin_lock(&nls_lock);
204 for (nls = tables; nls; nls = nls->next) {
205 if (!strcmp(nls->charset, charset))
207 if (nls->alias && !strcmp(nls->alias, charset))
210 if (nls && !try_module_get(nls->owner))
212 spin_unlock(&nls_lock);
216 struct nls_table *load_nls(char *charset)
218 struct nls_table *nls;
223 nls = find_nls(charset);
228 ret = request_module("nls_%s", charset);
230 printk("Unable to load NLS charset %s\n", charset);
233 nls = find_nls(charset);
238 void unload_nls(struct nls_table *nls)
240 module_put(nls->owner);
243 static const wchar_t charset2uni[256] = {
245 0x0000, 0x0001, 0x0002, 0x0003,
246 0x0004, 0x0005, 0x0006, 0x0007,
247 0x0008, 0x0009, 0x000a, 0x000b,
248 0x000c, 0x000d, 0x000e, 0x000f,
250 0x0010, 0x0011, 0x0012, 0x0013,
251 0x0014, 0x0015, 0x0016, 0x0017,
252 0x0018, 0x0019, 0x001a, 0x001b,
253 0x001c, 0x001d, 0x001e, 0x001f,
255 0x0020, 0x0021, 0x0022, 0x0023,
256 0x0024, 0x0025, 0x0026, 0x0027,
257 0x0028, 0x0029, 0x002a, 0x002b,
258 0x002c, 0x002d, 0x002e, 0x002f,
260 0x0030, 0x0031, 0x0032, 0x0033,
261 0x0034, 0x0035, 0x0036, 0x0037,
262 0x0038, 0x0039, 0x003a, 0x003b,
263 0x003c, 0x003d, 0x003e, 0x003f,
265 0x0040, 0x0041, 0x0042, 0x0043,
266 0x0044, 0x0045, 0x0046, 0x0047,
267 0x0048, 0x0049, 0x004a, 0x004b,
268 0x004c, 0x004d, 0x004e, 0x004f,
270 0x0050, 0x0051, 0x0052, 0x0053,
271 0x0054, 0x0055, 0x0056, 0x0057,
272 0x0058, 0x0059, 0x005a, 0x005b,
273 0x005c, 0x005d, 0x005e, 0x005f,
275 0x0060, 0x0061, 0x0062, 0x0063,
276 0x0064, 0x0065, 0x0066, 0x0067,
277 0x0068, 0x0069, 0x006a, 0x006b,
278 0x006c, 0x006d, 0x006e, 0x006f,
280 0x0070, 0x0071, 0x0072, 0x0073,
281 0x0074, 0x0075, 0x0076, 0x0077,
282 0x0078, 0x0079, 0x007a, 0x007b,
283 0x007c, 0x007d, 0x007e, 0x007f,
285 0x0080, 0x0081, 0x0082, 0x0083,
286 0x0084, 0x0085, 0x0086, 0x0087,
287 0x0088, 0x0089, 0x008a, 0x008b,
288 0x008c, 0x008d, 0x008e, 0x008f,
290 0x0090, 0x0091, 0x0092, 0x0093,
291 0x0094, 0x0095, 0x0096, 0x0097,
292 0x0098, 0x0099, 0x009a, 0x009b,
293 0x009c, 0x009d, 0x009e, 0x009f,
295 0x00a0, 0x00a1, 0x00a2, 0x00a3,
296 0x00a4, 0x00a5, 0x00a6, 0x00a7,
297 0x00a8, 0x00a9, 0x00aa, 0x00ab,
298 0x00ac, 0x00ad, 0x00ae, 0x00af,
300 0x00b0, 0x00b1, 0x00b2, 0x00b3,
301 0x00b4, 0x00b5, 0x00b6, 0x00b7,
302 0x00b8, 0x00b9, 0x00ba, 0x00bb,
303 0x00bc, 0x00bd, 0x00be, 0x00bf,
305 0x00c0, 0x00c1, 0x00c2, 0x00c3,
306 0x00c4, 0x00c5, 0x00c6, 0x00c7,
307 0x00c8, 0x00c9, 0x00ca, 0x00cb,
308 0x00cc, 0x00cd, 0x00ce, 0x00cf,
310 0x00d0, 0x00d1, 0x00d2, 0x00d3,
311 0x00d4, 0x00d5, 0x00d6, 0x00d7,
312 0x00d8, 0x00d9, 0x00da, 0x00db,
313 0x00dc, 0x00dd, 0x00de, 0x00df,
315 0x00e0, 0x00e1, 0x00e2, 0x00e3,
316 0x00e4, 0x00e5, 0x00e6, 0x00e7,
317 0x00e8, 0x00e9, 0x00ea, 0x00eb,
318 0x00ec, 0x00ed, 0x00ee, 0x00ef,
320 0x00f0, 0x00f1, 0x00f2, 0x00f3,
321 0x00f4, 0x00f5, 0x00f6, 0x00f7,
322 0x00f8, 0x00f9, 0x00fa, 0x00fb,
323 0x00fc, 0x00fd, 0x00fe, 0x00ff,
326 static const unsigned char page00[256] = {
327 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
328 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
329 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
330 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
331 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
332 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
333 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
334 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
335 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
336 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
337 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
338 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
339 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
340 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
341 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
342 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
344 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
345 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
346 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
347 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
348 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
349 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
350 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
351 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
352 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
353 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
354 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
355 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
356 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
357 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
358 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
359 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
362 static const unsigned char *const page_uni2charset[256] = {
366 static const unsigned char charset2lower[256] = {
367 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
368 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
369 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
370 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
371 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
372 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
373 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
374 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
375 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */
376 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */
377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */
378 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
379 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
380 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
381 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
382 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
384 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
385 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
386 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
387 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
388 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
389 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
390 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
391 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
392 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
393 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
394 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
395 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
396 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
397 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
398 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
399 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
402 static const unsigned char charset2upper[256] = {
403 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
404 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
405 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
406 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
407 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
408 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
409 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
410 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
411 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
412 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
413 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
414 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
415 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */
416 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */
417 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */
418 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
420 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
421 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
422 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
423 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
424 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
425 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
426 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
427 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
428 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
429 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
430 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
431 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
432 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
433 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
434 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
435 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
439 static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
441 const unsigned char *uni2charset;
442 unsigned char cl = uni & 0x00ff;
443 unsigned char ch = (uni & 0xff00) >> 8;
446 return -ENAMETOOLONG;
448 uni2charset = page_uni2charset[ch];
449 if (uni2charset && uni2charset[cl])
450 out[0] = uni2charset[cl];
456 static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
458 *uni = charset2uni[*rawstring];
464 static struct nls_table default_table = {
465 .charset = "default",
466 .uni2char = uni2char,
467 .char2uni = char2uni,
468 .charset2lower = charset2lower,
469 .charset2upper = charset2upper,
472 /* Returns a simple default translation table */
473 struct nls_table *load_nls_default(void)
475 struct nls_table *default_nls;
477 default_nls = load_nls(CONFIG_NLS_DEFAULT);
478 if (default_nls != NULL)
481 return &default_table;
484 EXPORT_SYMBOL(register_nls);
485 EXPORT_SYMBOL(unregister_nls);
486 EXPORT_SYMBOL(unload_nls);
487 EXPORT_SYMBOL(load_nls);
488 EXPORT_SYMBOL(load_nls_default);
489 EXPORT_SYMBOL(utf8_mbtowc);
490 EXPORT_SYMBOL(utf8_mbstowcs);
491 EXPORT_SYMBOL(utf8_wctomb);
492 EXPORT_SYMBOL(utf8_wcstombs);
494 MODULE_LICENSE("Dual BSD/GPL");