Merge commit 'gcl/merge' into merge
[linux-2.6] / fs / hfsplus / unicode.c
1 /*
2  *  linux/fs/hfsplus/unicode.c
3  *
4  * Copyright (C) 2001
5  * Brad Boyer (flar@allandria.com)
6  * (C) 2003 Ardis Technologies <roman@ardistech.com>
7  *
8  * Handler routines for unicode strings
9  */
10
11 #include <linux/types.h>
12 #include <linux/nls.h>
13 #include "hfsplus_fs.h"
14 #include "hfsplus_raw.h"
15
16 /* Fold the case of a unicode char, given the 16 bit value */
17 /* Returns folded char, or 0 if ignorable */
18 static inline u16 case_fold(u16 c)
19 {
20         u16 tmp;
21
22         tmp = hfsplus_case_fold_table[c >> 8];
23         if (tmp)
24                 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25         else
26                 tmp = c;
27         return tmp;
28 }
29
30 /* Compare unicode strings, return values like normal strcmp */
31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32                        const struct hfsplus_unistr *s2)
33 {
34         u16 len1, len2, c1, c2;
35         const hfsplus_unichr *p1, *p2;
36
37         len1 = be16_to_cpu(s1->length);
38         len2 = be16_to_cpu(s2->length);
39         p1 = s1->unicode;
40         p2 = s2->unicode;
41
42         while (1) {
43                 c1 = c2 = 0;
44
45                 while (len1 && !c1) {
46                         c1 = case_fold(be16_to_cpu(*p1));
47                         p1++;
48                         len1--;
49                 }
50                 while (len2 && !c2) {
51                         c2 = case_fold(be16_to_cpu(*p2));
52                         p2++;
53                         len2--;
54                 }
55
56                 if (c1 != c2)
57                         return (c1 < c2) ? -1 : 1;
58                 if (!c1 && !c2)
59                         return 0;
60         }
61 }
62
63 /* Compare names as a sequence of 16-bit unsigned integers */
64 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65                    const struct hfsplus_unistr *s2)
66 {
67         u16 len1, len2, c1, c2;
68         const hfsplus_unichr *p1, *p2;
69         int len;
70
71         len1 = be16_to_cpu(s1->length);
72         len2 = be16_to_cpu(s2->length);
73         p1 = s1->unicode;
74         p2 = s2->unicode;
75
76         for (len = min(len1, len2); len > 0; len--) {
77                 c1 = be16_to_cpu(*p1);
78                 c2 = be16_to_cpu(*p2);
79                 if (c1 != c2)
80                         return c1 < c2 ? -1 : 1;
81                 p1++;
82                 p2++;
83         }
84
85         return len1 < len2 ? -1 :
86                len1 > len2 ? 1 : 0;
87 }
88
89
90 #define Hangul_SBase    0xac00
91 #define Hangul_LBase    0x1100
92 #define Hangul_VBase    0x1161
93 #define Hangul_TBase    0x11a7
94 #define Hangul_SCount   11172
95 #define Hangul_LCount   19
96 #define Hangul_VCount   21
97 #define Hangul_TCount   28
98 #define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
99
100
101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102 {
103         int i, s, e;
104
105         s = 1;
106         e = p[1];
107         if (!e || cc < p[s * 2] || cc > p[e * 2])
108                 return NULL;
109         do {
110                 i = (s + e) / 2;
111                 if (cc > p[i * 2])
112                         s = i + 1;
113                 else if (cc < p[i * 2])
114                         e = i - 1;
115                 else
116                         return hfsplus_compose_table + p[i * 2 + 1];
117         } while (s <= e);
118         return NULL;
119 }
120
121 int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p)
122 {
123         const hfsplus_unichr *ip;
124         struct nls_table *nls = HFSPLUS_SB(sb).nls;
125         u8 *op;
126         u16 cc, c0, c1;
127         u16 *ce1, *ce2;
128         int i, len, ustrlen, res, compose;
129
130         op = astr;
131         ip = ustr->unicode;
132         ustrlen = be16_to_cpu(ustr->length);
133         len = *len_p;
134         ce1 = NULL;
135         compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
136
137         while (ustrlen > 0) {
138                 c0 = be16_to_cpu(*ip++);
139                 ustrlen--;
140                 /* search for single decomposed char */
141                 if (likely(compose))
142                         ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
143                 if (ce1 && (cc = ce1[0])) {
144                         /* start of a possibly decomposed Hangul char */
145                         if (cc != 0xffff)
146                                 goto done;
147                         if (!ustrlen)
148                                 goto same;
149                         c1 = be16_to_cpu(*ip) - Hangul_VBase;
150                         if (c1 < Hangul_VCount) {
151                                 /* compose the Hangul char */
152                                 cc = (c0 - Hangul_LBase) * Hangul_VCount;
153                                 cc = (cc + c1) * Hangul_TCount;
154                                 cc += Hangul_SBase;
155                                 ip++;
156                                 ustrlen--;
157                                 if (!ustrlen)
158                                         goto done;
159                                 c1 = be16_to_cpu(*ip) - Hangul_TBase;
160                                 if (c1 > 0 && c1 < Hangul_TCount) {
161                                         cc += c1;
162                                         ip++;
163                                         ustrlen--;
164                                 }
165                                 goto done;
166                         }
167                 }
168                 while (1) {
169                         /* main loop for common case of not composed chars */
170                         if (!ustrlen)
171                                 goto same;
172                         c1 = be16_to_cpu(*ip);
173                         if (likely(compose))
174                                 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1);
175                         if (ce1)
176                                 break;
177                         switch (c0) {
178                         case 0:
179                                 c0 = 0x2400;
180                                 break;
181                         case '/':
182                                 c0 = ':';
183                                 break;
184                         }
185                         res = nls->uni2char(c0, op, len);
186                         if (res < 0) {
187                                 if (res == -ENAMETOOLONG)
188                                         goto out;
189                                 *op = '?';
190                                 res = 1;
191                         }
192                         op += res;
193                         len -= res;
194                         c0 = c1;
195                         ip++;
196                         ustrlen--;
197                 }
198                 ce2 = hfsplus_compose_lookup(ce1, c0);
199                 if (ce2) {
200                         i = 1;
201                         while (i < ustrlen) {
202                                 ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i]));
203                                 if (!ce1)
204                                         break;
205                                 i++;
206                                 ce2 = ce1;
207                         }
208                         if ((cc = ce2[0])) {
209                                 ip += i;
210                                 ustrlen -= i;
211                                 goto done;
212                         }
213                 }
214         same:
215                 switch (c0) {
216                 case 0:
217                         cc = 0x2400;
218                         break;
219                 case '/':
220                         cc = ':';
221                         break;
222                 default:
223                         cc = c0;
224                 }
225         done:
226                 res = nls->uni2char(cc, op, len);
227                 if (res < 0) {
228                         if (res == -ENAMETOOLONG)
229                                 goto out;
230                         *op = '?';
231                         res = 1;
232                 }
233                 op += res;
234                 len -= res;
235         }
236         res = 0;
237 out:
238         *len_p = (char *)op - astr;
239         return res;
240 }
241
242 /*
243  * Convert one or more ASCII characters into a single unicode character.
244  * Returns the number of ASCII characters corresponding to the unicode char.
245  */
246 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
247                               wchar_t *uc)
248 {
249         int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
250         if (size <= 0) {
251                 *uc = '?';
252                 size = 1;
253         }
254         switch (*uc) {
255         case 0x2400:
256                 *uc = 0;
257                 break;
258         case ':':
259                 *uc = '/';
260                 break;
261         }
262         return size;
263 }
264
265 /* Decomposes a single unicode character. */
266 static inline u16 *decompose_unichar(wchar_t uc, int *size)
267 {
268         int off;
269
270         off = hfsplus_decompose_table[(uc >> 12) & 0xf];
271         if (off == 0 || off == 0xffff)
272                 return NULL;
273
274         off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
275         if (!off)
276                 return NULL;
277
278         off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
279         if (!off)
280                 return NULL;
281
282         off = hfsplus_decompose_table[off + (uc & 0xf)];
283         *size = off & 3;
284         if (*size == 0)
285                 return NULL;
286         return hfsplus_decompose_table + (off / 4);
287 }
288
289 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
290                     const char *astr, int len)
291 {
292         int size, dsize, decompose;
293         u16 *dstr, outlen = 0;
294         wchar_t c;
295
296         decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
297         while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
298                 size = asc2unichar(sb, astr, len, &c);
299
300                 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
301                         if (outlen + dsize > HFSPLUS_MAX_STRLEN)
302                                 break;
303                         do {
304                                 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
305                         } while (--dsize > 0);
306                 } else
307                         ustr->unicode[outlen++] = cpu_to_be16(c);
308
309                 astr += size;
310                 len -= size;
311         }
312         ustr->length = cpu_to_be16(outlen);
313         if (len > 0)
314                 return -ENAMETOOLONG;
315         return 0;
316 }
317
318 /*
319  * Hash a string to an integer as appropriate for the HFS+ filesystem.
320  * Composed unicode characters are decomposed and case-folding is performed
321  * if the appropriate bits are (un)set on the superblock.
322  */
323 int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
324 {
325         struct super_block *sb = dentry->d_sb;
326         const char *astr;
327         const u16 *dstr;
328         int casefold, decompose, size, len;
329         unsigned long hash;
330         wchar_t c;
331         u16 c2;
332
333         casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
334         decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
335         hash = init_name_hash();
336         astr = str->name;
337         len = str->len;
338         while (len > 0) {
339                 int uninitialized_var(dsize);
340                 size = asc2unichar(sb, astr, len, &c);
341                 astr += size;
342                 len -= size;
343
344                 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
345                         do {
346                                 c2 = *dstr++;
347                                 if (!casefold || (c2 = case_fold(c2)))
348                                         hash = partial_name_hash(c2, hash);
349                         } while (--dsize > 0);
350                 } else {
351                         c2 = c;
352                         if (!casefold || (c2 = case_fold(c2)))
353                                 hash = partial_name_hash(c2, hash);
354                 }
355         }
356         str->hash = end_name_hash(hash);
357
358         return 0;
359 }
360
361 /*
362  * Compare strings with HFS+ filename ordering.
363  * Composed unicode characters are decomposed and case-folding is performed
364  * if the appropriate bits are (un)set on the superblock.
365  */
366 int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
367 {
368         struct super_block *sb = dentry->d_sb;
369         int casefold, decompose, size;
370         int dsize1, dsize2, len1, len2;
371         const u16 *dstr1, *dstr2;
372         const char *astr1, *astr2;
373         u16 c1, c2;
374         wchar_t c;
375
376         casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
377         decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
378         astr1 = s1->name;
379         len1 = s1->len;
380         astr2 = s2->name;
381         len2 = s2->len;
382         dsize1 = dsize2 = 0;
383         dstr1 = dstr2 = NULL;
384
385         while (len1 > 0 && len2 > 0) {
386                 if (!dsize1) {
387                         size = asc2unichar(sb, astr1, len1, &c);
388                         astr1 += size;
389                         len1 -= size;
390
391                         if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) {
392                                 c1 = c;
393                                 dstr1 = &c1;
394                                 dsize1 = 1;
395                         }
396                 }
397
398                 if (!dsize2) {
399                         size = asc2unichar(sb, astr2, len2, &c);
400                         astr2 += size;
401                         len2 -= size;
402
403                         if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) {
404                                 c2 = c;
405                                 dstr2 = &c2;
406                                 dsize2 = 1;
407                         }
408                 }
409
410                 c1 = *dstr1;
411                 c2 = *dstr2;
412                 if (casefold) {
413                         if  (!(c1 = case_fold(c1))) {
414                                 dstr1++;
415                                 dsize1--;
416                                 continue;
417                         }
418                         if (!(c2 = case_fold(c2))) {
419                                 dstr2++;
420                                 dsize2--;
421                                 continue;
422                         }
423                 }
424                 if (c1 < c2)
425                         return -1;
426                 else if (c1 > c2)
427                         return 1;
428
429                 dstr1++;
430                 dsize1--;
431                 dstr2++;
432                 dsize2--;
433         }
434
435         if (len1 < len2)
436                 return -1;
437         if (len1 > len2)
438                 return 1;
439         return 0;
440 }