Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik...
[linux-2.6] / fs / hfsplus / unicode.c
1 /*
2  *  linux/fs/hfsplus/unicode.c
3  *
4  * Copyright (C) 2001
5  * Brad Boyer (flar@allandria.com)
6  * (C) 2003 Ardis Technologies <roman@ardistech.com>
7  *
8  * Handler routines for unicode strings
9  */
10
11 #include <linux/types.h>
12 #include <linux/nls.h>
13 #include "hfsplus_fs.h"
14 #include "hfsplus_raw.h"
15
16 /* Fold the case of a unicode char, given the 16 bit value */
17 /* Returns folded char, or 0 if ignorable */
18 static inline u16 case_fold(u16 c)
19 {
20         u16 tmp;
21
22         tmp = hfsplus_case_fold_table[c >> 8];
23         if (tmp)
24                 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25         else
26                 tmp = c;
27         return tmp;
28 }
29
30 /* Compare unicode strings, return values like normal strcmp */
31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32                        const struct hfsplus_unistr *s2)
33 {
34         u16 len1, len2, c1, c2;
35         const hfsplus_unichr *p1, *p2;
36
37         len1 = be16_to_cpu(s1->length);
38         len2 = be16_to_cpu(s2->length);
39         p1 = s1->unicode;
40         p2 = s2->unicode;
41
42         while (1) {
43                 c1 = c2 = 0;
44
45                 while (len1 && !c1) {
46                         c1 = case_fold(be16_to_cpu(*p1));
47                         p1++;
48                         len1--;
49                 }
50                 while (len2 && !c2) {
51                         c2 = case_fold(be16_to_cpu(*p2));
52                         p2++;
53                         len2--;
54                 }
55
56                 if (c1 != c2)
57                         return (c1 < c2) ? -1 : 1;
58                 if (!c1 && !c2)
59                         return 0;
60         }
61 }
62
63 /* Compare names as a sequence of 16-bit unsigned integers */
64 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65                    const struct hfsplus_unistr *s2)
66 {
67         u16 len1, len2, c1, c2;
68         const hfsplus_unichr *p1, *p2;
69         int len;
70
71         len1 = be16_to_cpu(s1->length);
72         len2 = be16_to_cpu(s2->length);
73         p1 = s1->unicode;
74         p2 = s2->unicode;
75
76         for (len = min(len1, len2); len > 0; len--) {
77                 c1 = be16_to_cpu(*p1);
78                 c2 = be16_to_cpu(*p2);
79                 if (c1 != c2)
80                         return c1 < c2 ? -1 : 1;
81                 p1++;
82                 p2++;
83         }
84
85         return len1 < len2 ? -1 :
86                len1 > len2 ? 1 : 0;
87 }
88
89
90 #define Hangul_SBase    0xac00
91 #define Hangul_LBase    0x1100
92 #define Hangul_VBase    0x1161
93 #define Hangul_TBase    0x11a7
94 #define Hangul_SCount   11172
95 #define Hangul_LCount   19
96 #define Hangul_VCount   21
97 #define Hangul_TCount   28
98 #define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
99
100
101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102 {
103         int i, s, e;
104
105         s = 1;
106         e = p[1];
107         if (!e || cc < p[s * 2] || cc > p[e * 2])
108                 return NULL;
109         do {
110                 i = (s + e) / 2;
111                 if (cc > p[i * 2])
112                         s = i + 1;
113                 else if (cc < p[i * 2])
114                         e = i - 1;
115                 else
116                         return hfsplus_compose_table + p[i * 2 + 1];
117         } while (s <= e);
118         return NULL;
119 }
120
121 int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p)
122 {
123         const hfsplus_unichr *ip;
124         struct nls_table *nls = HFSPLUS_SB(sb).nls;
125         u8 *op;
126         u16 cc, c0, c1;
127         u16 *ce1, *ce2;
128         int i, len, ustrlen, res, compose;
129
130         op = astr;
131         ip = ustr->unicode;
132         ustrlen = be16_to_cpu(ustr->length);
133         len = *len_p;
134         ce1 = NULL;
135         compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
136
137         while (ustrlen > 0) {
138                 c0 = be16_to_cpu(*ip++);
139                 ustrlen--;
140                 /* search for single decomposed char */
141                 if (likely(compose))
142                         ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
143                 if (ce1 && (cc = ce1[0])) {
144                         /* start of a possibly decomposed Hangul char */
145                         if (cc != 0xffff)
146                                 goto done;
147                         if (!ustrlen)
148                                 goto same;
149                         c1 = be16_to_cpu(*ip) - Hangul_VBase;
150                         if (c1 < Hangul_VCount) {
151                                 /* compose the Hangul char */
152                                 cc = (c0 - Hangul_LBase) * Hangul_VCount;
153                                 cc = (cc + c1) * Hangul_TCount;
154                                 cc += Hangul_SBase;
155                                 ip++;
156                                 ustrlen--;
157                                 if (!ustrlen)
158                                         goto done;
159                                 c1 = be16_to_cpu(*ip) - Hangul_TBase;
160                                 if (c1 > 0 && c1 < Hangul_TCount) {
161                                         cc += c1;
162                                         ip++;
163                                         ustrlen--;
164                                 }
165                                 goto done;
166                         }
167                 }
168                 while (1) {
169                         /* main loop for common case of not composed chars */
170                         if (!ustrlen)
171                                 goto same;
172                         c1 = be16_to_cpu(*ip);
173                         if (likely(compose))
174                                 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1);
175                         if (ce1)
176                                 break;
177                         switch (c0) {
178                         case 0:
179                                 c0 = 0x2400;
180                                 break;
181                         case '/':
182                                 c0 = ':';
183                                 break;
184                         }
185                         res = nls->uni2char(c0, op, len);
186                         if (res < 0) {
187                                 if (res == -ENAMETOOLONG)
188                                         goto out;
189                                 *op = '?';
190                                 res = 1;
191                         }
192                         op += res;
193                         len -= res;
194                         c0 = c1;
195                         ip++;
196                         ustrlen--;
197                 }
198                 ce2 = hfsplus_compose_lookup(ce1, c0);
199                 if (ce2) {
200                         i = 1;
201                         while (i < ustrlen) {
202                                 ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i]));
203                                 if (!ce1)
204                                         break;
205                                 i++;
206                                 ce2 = ce1;
207                         }
208                         if ((cc = ce2[0])) {
209                                 ip += i;
210                                 ustrlen -= i;
211                                 goto done;
212                         }
213                 }
214         same:
215                 switch (c0) {
216                 case 0:
217                         cc = 0x2400;
218                         break;
219                 case '/':
220                         cc = ':';
221                         break;
222                 default:
223                         cc = c0;
224                 }
225         done:
226                 res = nls->uni2char(cc, op, len);
227                 if (res < 0) {
228                         if (res == -ENAMETOOLONG)
229                                 goto out;
230                         *op = '?';
231                         res = 1;
232                 }
233                 op += res;
234                 len -= res;
235         }
236         res = 0;
237 out:
238         *len_p = (char *)op - astr;
239         return res;
240 }
241
242 /*
243  * Convert one or more ASCII characters into a single unicode character.
244  * Returns the number of ASCII characters corresponding to the unicode char.
245  */
246 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
247                               wchar_t *uc)
248 {
249         int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
250         if (size <= 0) {
251                 *uc = '?';
252                 size = 1;
253         }
254         switch (*uc) {
255         case 0x2400:
256                 *uc = 0;
257                 break;
258         case ':':
259                 *uc = '/';
260                 break;
261         }
262         return size;
263 }
264
265 /* Decomposes a single unicode character. */
266 static inline u16 *decompose_unichar(wchar_t uc, int *size)
267 {
268         int off;
269
270         off = hfsplus_decompose_table[(uc >> 12) & 0xf];
271         if (off == 0 || off == 0xffff)
272                 return NULL;
273
274         off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
275         if (!off)
276                 return NULL;
277
278         off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
279         if (!off)
280                 return NULL;
281
282         off = hfsplus_decompose_table[off + (uc & 0xf)];
283         *size = off & 3;
284         if (*size == 0)
285                 return NULL;
286         return hfsplus_decompose_table + (off / 4);
287 }
288
289 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
290                     const char *astr, int len)
291 {
292         int size, dsize, decompose;
293         u16 *dstr, outlen = 0;
294         wchar_t c;
295
296         decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
297         while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
298                 size = asc2unichar(sb, astr, len, &c);
299
300                 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
301                         if (outlen + dsize > HFSPLUS_MAX_STRLEN)
302                                 break;
303                         do {
304                                 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
305                         } while (--dsize > 0);
306                 } else
307                         ustr->unicode[outlen++] = cpu_to_be16(c);
308
309                 astr += size;
310                 len -= size;
311         }
312         ustr->length = cpu_to_be16(outlen);
313         if (len > 0)
314                 return -ENAMETOOLONG;
315         return 0;
316 }
317
318 /*
319  * Hash a string to an integer as appropriate for the HFS+ filesystem.
320  * Composed unicode characters are decomposed and case-folding is performed
321  * if the appropriate bits are (un)set on the superblock.
322  */
323 int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
324 {
325         struct super_block *sb = dentry->d_sb;
326         const char *astr;
327         const u16 *dstr;
328         int casefold, decompose, size, dsize, len;
329         unsigned long hash;
330         wchar_t c;
331         u16 c2;
332
333         casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
334         decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
335         hash = init_name_hash();
336         astr = str->name;
337         len = str->len;
338         while (len > 0) {
339                 size = asc2unichar(sb, astr, len, &c);
340                 astr += size;
341                 len -= size;
342
343                 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
344                         do {
345                                 c2 = *dstr++;
346                                 if (!casefold || (c2 = case_fold(c2)))
347                                         hash = partial_name_hash(c2, hash);
348                         } while (--dsize > 0);
349                 } else {
350                         c2 = c;
351                         if (!casefold || (c2 = case_fold(c2)))
352                                 hash = partial_name_hash(c2, hash);
353                 }
354         }
355         str->hash = end_name_hash(hash);
356
357         return 0;
358 }
359
360 /*
361  * Compare strings with HFS+ filename ordering.
362  * Composed unicode characters are decomposed and case-folding is performed
363  * if the appropriate bits are (un)set on the superblock.
364  */
365 int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
366 {
367         struct super_block *sb = dentry->d_sb;
368         int casefold, decompose, size;
369         int dsize1, dsize2, len1, len2;
370         const u16 *dstr1, *dstr2;
371         const char *astr1, *astr2;
372         u16 c1, c2;
373         wchar_t c;
374
375         casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
376         decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
377         astr1 = s1->name;
378         len1 = s1->len;
379         astr2 = s2->name;
380         len2 = s2->len;
381         dsize1 = dsize2 = 0;
382         dstr1 = dstr2 = NULL;
383
384         while (len1 > 0 && len2 > 0) {
385                 if (!dsize1) {
386                         size = asc2unichar(sb, astr1, len1, &c);
387                         astr1 += size;
388                         len1 -= size;
389
390                         if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) {
391                                 c1 = c;
392                                 dstr1 = &c1;
393                                 dsize1 = 1;
394                         }
395                 }
396
397                 if (!dsize2) {
398                         size = asc2unichar(sb, astr2, len2, &c);
399                         astr2 += size;
400                         len2 -= size;
401
402                         if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) {
403                                 c2 = c;
404                                 dstr2 = &c2;
405                                 dsize2 = 1;
406                         }
407                 }
408
409                 c1 = *dstr1;
410                 c2 = *dstr2;
411                 if (casefold) {
412                         if  (!(c1 = case_fold(c1))) {
413                                 dstr1++;
414                                 dsize1--;
415                                 continue;
416                         }
417                         if (!(c2 = case_fold(c2))) {
418                                 dstr2++;
419                                 dsize2--;
420                                 continue;
421                         }
422                 }
423                 if (c1 < c2)
424                         return -1;
425                 else if (c1 > c2)
426                         return 1;
427
428                 dstr1++;
429                 dsize1--;
430                 dstr2++;
431                 dsize2--;
432         }
433
434         if (len1 < len2)
435                 return -1;
436         if (len1 > len2)
437                 return 1;
438         return 0;
439 }