2 * Unicode sort key generation
4 * Copyright 2003 Dmitry Timoshkov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include "wine/unicode.h"
22 extern int get_decomposition(WCHAR src, WCHAR *dst, unsigned int dstlen);
25 * flags - normalization NORM_* flags
27 * FIXME: 'variable' flag not handled
29 int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
31 extern const unsigned int collation_table[];
32 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
35 const WCHAR *src_save = src;
36 int srclen_save = srclen;
38 key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
39 for (; srclen; srclen--, src++)
41 int decomposed_len = get_decomposition(*src, dummy, 4);
45 for (i = 0; i < decomposed_len; i++)
50 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
51 * and skips white space and punctuation characters for
54 if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
57 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
59 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
60 if (ce != (unsigned int)-1)
62 if (ce >> 16) key_len[0] += 2;
63 if ((ce >> 8) & 0xff) key_len[1]++;
64 if ((ce >> 4) & 0x0f) key_len[2]++;
67 if (wch >> 8) key_len[3]++;
74 if (wch >> 8) key_len[0]++;
75 if (wch & 0xff) key_len[0]++;
81 if (!dstlen) /* compute length */
82 /* 4 * '\1' + 1 * '\0' + key length */
83 return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1;
85 if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
86 return 0; /* overflow */
92 key_ptr[1] = key_ptr[0] + key_len[0] + 1;
93 key_ptr[2] = key_ptr[1] + key_len[1] + 1;
94 key_ptr[3] = key_ptr[2] + key_len[2] + 1;
96 for (; srclen; srclen--, src++)
98 int decomposed_len = get_decomposition(*src, dummy, 4);
102 for (i = 0; i < decomposed_len; i++)
104 WCHAR wch = dummy[i];
107 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
108 * and skips white space and punctuation characters for
109 * NORM_IGNORESYMBOLS.
111 if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
114 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
116 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
117 if (ce != (unsigned int)-1)
120 if ((key = ce >> 16))
122 *key_ptr[0]++ = key >> 8;
123 *key_ptr[0]++ = key & 0xff;
125 /* make key 1 start from 2 */
126 if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
127 /* make key 2 start from 2 */
128 if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
129 /* key 3 is always a character code */
132 if (wch >> 8) *key_ptr[3]++ = wch >> 8;
133 if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
138 *key_ptr[0]++ = 0xff;
139 *key_ptr[0]++ = 0xfe;
140 if (wch >> 8) *key_ptr[0]++ = wch >> 8;
141 if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
150 *key_ptr[3]++ = '\1';
153 return key_ptr[3] - dst;