2 * linux/fs/nls/nls_base.c
4 * Native language support--charsets and unicode translations.
5 * By Gordon Chaffee 1996, 1997
7 * Unicode based case conversion 1999 by Wolfram Pienkoss
11 #include <linux/module.h>
12 #include <linux/string.h>
13 #include <linux/nls.h>
14 #include <linux/kernel.h>
15 #include <linux/errno.h>
16 #include <linux/kmod.h>
17 #include <linux/spinlock.h>
19 static struct nls_table default_table;
20 static struct nls_table *tables = &default_table;
21 static DEFINE_SPINLOCK(nls_lock);
24 * Sample implementation from Unicode home page.
25 * http://www.stonehand.com/unicode/standard/fss-utf.html
35 static const struct utf8_table utf8_table[] =
37 {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
38 {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
39 {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */},
40 {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */},
41 {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */},
42 {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */},
43 {0, /* end of table */}
47 utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
51 const struct utf8_table *t;
56 for (t = utf8_table; t->cmask; t++) {
58 if ((c0 & t->cmask) == t->cval) {
68 c = (*s ^ 0x80) & 0xFF;
77 utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
85 while (*ip && n > 0) {
87 size = utf8_mbtowc(op, ip, n);
89 /* Ignore character and move on */
106 utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
110 const struct utf8_table *t;
117 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
121 *s = t->cval | (l >> c);
125 *s = 0x80 | ((l >> c) & 0x3F);
134 utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
142 while (*ip && maxlen > 0) {
144 size = utf8_wctomb(op, *ip, maxlen);
146 /* Ignore character and move on */
160 int register_nls(struct nls_table * nls)
162 struct nls_table ** tmp = &tables;
167 spin_lock(&nls_lock);
170 spin_unlock(&nls_lock);
177 spin_unlock(&nls_lock);
181 int unregister_nls(struct nls_table * nls)
183 struct nls_table ** tmp = &tables;
185 spin_lock(&nls_lock);
189 spin_unlock(&nls_lock);
194 spin_unlock(&nls_lock);
198 static struct nls_table *find_nls(char *charset)
200 struct nls_table *nls;
201 spin_lock(&nls_lock);
202 for (nls = tables; nls; nls = nls->next) {
203 if (!strcmp(nls->charset, charset))
205 if (nls->alias && !strcmp(nls->alias, charset))
208 if (nls && !try_module_get(nls->owner))
210 spin_unlock(&nls_lock);
214 struct nls_table *load_nls(char *charset)
216 return try_then_request_module(find_nls(charset), "nls_%s", charset);
219 void unload_nls(struct nls_table *nls)
221 module_put(nls->owner);
224 static const wchar_t charset2uni[256] = {
226 0x0000, 0x0001, 0x0002, 0x0003,
227 0x0004, 0x0005, 0x0006, 0x0007,
228 0x0008, 0x0009, 0x000a, 0x000b,
229 0x000c, 0x000d, 0x000e, 0x000f,
231 0x0010, 0x0011, 0x0012, 0x0013,
232 0x0014, 0x0015, 0x0016, 0x0017,
233 0x0018, 0x0019, 0x001a, 0x001b,
234 0x001c, 0x001d, 0x001e, 0x001f,
236 0x0020, 0x0021, 0x0022, 0x0023,
237 0x0024, 0x0025, 0x0026, 0x0027,
238 0x0028, 0x0029, 0x002a, 0x002b,
239 0x002c, 0x002d, 0x002e, 0x002f,
241 0x0030, 0x0031, 0x0032, 0x0033,
242 0x0034, 0x0035, 0x0036, 0x0037,
243 0x0038, 0x0039, 0x003a, 0x003b,
244 0x003c, 0x003d, 0x003e, 0x003f,
246 0x0040, 0x0041, 0x0042, 0x0043,
247 0x0044, 0x0045, 0x0046, 0x0047,
248 0x0048, 0x0049, 0x004a, 0x004b,
249 0x004c, 0x004d, 0x004e, 0x004f,
251 0x0050, 0x0051, 0x0052, 0x0053,
252 0x0054, 0x0055, 0x0056, 0x0057,
253 0x0058, 0x0059, 0x005a, 0x005b,
254 0x005c, 0x005d, 0x005e, 0x005f,
256 0x0060, 0x0061, 0x0062, 0x0063,
257 0x0064, 0x0065, 0x0066, 0x0067,
258 0x0068, 0x0069, 0x006a, 0x006b,
259 0x006c, 0x006d, 0x006e, 0x006f,
261 0x0070, 0x0071, 0x0072, 0x0073,
262 0x0074, 0x0075, 0x0076, 0x0077,
263 0x0078, 0x0079, 0x007a, 0x007b,
264 0x007c, 0x007d, 0x007e, 0x007f,
266 0x0080, 0x0081, 0x0082, 0x0083,
267 0x0084, 0x0085, 0x0086, 0x0087,
268 0x0088, 0x0089, 0x008a, 0x008b,
269 0x008c, 0x008d, 0x008e, 0x008f,
271 0x0090, 0x0091, 0x0092, 0x0093,
272 0x0094, 0x0095, 0x0096, 0x0097,
273 0x0098, 0x0099, 0x009a, 0x009b,
274 0x009c, 0x009d, 0x009e, 0x009f,
276 0x00a0, 0x00a1, 0x00a2, 0x00a3,
277 0x00a4, 0x00a5, 0x00a6, 0x00a7,
278 0x00a8, 0x00a9, 0x00aa, 0x00ab,
279 0x00ac, 0x00ad, 0x00ae, 0x00af,
281 0x00b0, 0x00b1, 0x00b2, 0x00b3,
282 0x00b4, 0x00b5, 0x00b6, 0x00b7,
283 0x00b8, 0x00b9, 0x00ba, 0x00bb,
284 0x00bc, 0x00bd, 0x00be, 0x00bf,
286 0x00c0, 0x00c1, 0x00c2, 0x00c3,
287 0x00c4, 0x00c5, 0x00c6, 0x00c7,
288 0x00c8, 0x00c9, 0x00ca, 0x00cb,
289 0x00cc, 0x00cd, 0x00ce, 0x00cf,
291 0x00d0, 0x00d1, 0x00d2, 0x00d3,
292 0x00d4, 0x00d5, 0x00d6, 0x00d7,
293 0x00d8, 0x00d9, 0x00da, 0x00db,
294 0x00dc, 0x00dd, 0x00de, 0x00df,
296 0x00e0, 0x00e1, 0x00e2, 0x00e3,
297 0x00e4, 0x00e5, 0x00e6, 0x00e7,
298 0x00e8, 0x00e9, 0x00ea, 0x00eb,
299 0x00ec, 0x00ed, 0x00ee, 0x00ef,
301 0x00f0, 0x00f1, 0x00f2, 0x00f3,
302 0x00f4, 0x00f5, 0x00f6, 0x00f7,
303 0x00f8, 0x00f9, 0x00fa, 0x00fb,
304 0x00fc, 0x00fd, 0x00fe, 0x00ff,
307 static const unsigned char page00[256] = {
308 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
309 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
310 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
311 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
312 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
313 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
314 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
315 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
316 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
317 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
318 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
319 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
320 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
321 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
322 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
323 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
325 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
326 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
327 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
328 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
329 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
330 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
331 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
332 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
333 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
334 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
335 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
336 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
337 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
338 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
339 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
340 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
343 static const unsigned char *const page_uni2charset[256] = {
347 static const unsigned char charset2lower[256] = {
348 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
349 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
350 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
351 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
352 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
353 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
354 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
355 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
356 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */
357 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */
358 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */
359 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
360 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
361 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
362 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
363 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
365 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
366 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
367 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
368 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
369 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
370 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
371 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
372 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
373 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
374 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
375 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
376 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
377 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
378 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
379 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
380 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
383 static const unsigned char charset2upper[256] = {
384 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
385 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
386 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
387 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
388 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
389 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
390 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
391 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
392 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
393 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
394 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
395 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
396 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */
397 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */
398 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */
399 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
401 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
402 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
403 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
404 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
405 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
406 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
407 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
408 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
409 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
410 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
411 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
412 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
413 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
414 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
415 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
416 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
420 static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
422 const unsigned char *uni2charset;
423 unsigned char cl = uni & 0x00ff;
424 unsigned char ch = (uni & 0xff00) >> 8;
427 return -ENAMETOOLONG;
429 uni2charset = page_uni2charset[ch];
430 if (uni2charset && uni2charset[cl])
431 out[0] = uni2charset[cl];
437 static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
439 *uni = charset2uni[*rawstring];
445 static struct nls_table default_table = {
446 .charset = "default",
447 .uni2char = uni2char,
448 .char2uni = char2uni,
449 .charset2lower = charset2lower,
450 .charset2upper = charset2upper,
453 /* Returns a simple default translation table */
454 struct nls_table *load_nls_default(void)
456 struct nls_table *default_nls;
458 default_nls = load_nls(CONFIG_NLS_DEFAULT);
459 if (default_nls != NULL)
462 return &default_table;
465 EXPORT_SYMBOL(register_nls);
466 EXPORT_SYMBOL(unregister_nls);
467 EXPORT_SYMBOL(unload_nls);
468 EXPORT_SYMBOL(load_nls);
469 EXPORT_SYMBOL(load_nls_default);
470 EXPORT_SYMBOL(utf8_mbtowc);
471 EXPORT_SYMBOL(utf8_mbstowcs);
472 EXPORT_SYMBOL(utf8_wctomb);
473 EXPORT_SYMBOL(utf8_wcstombs);
475 MODULE_LICENSE("Dual BSD/GPL");