usp10: Add script Tai Le.
[wine] / dlls / usp10 / breaking.c
1 /*
2  * Implementation of line breaking algorithm for the Uniscribe Script Processor
3  *
4  * Copyright 2011 CodeWeavers, Aric Stewart
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  */
21 #include "config.h"
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winuser.h"
29 #include "wingdi.h"
30 #include "winnls.h"
31 #include "usp10.h"
32 #include "winternl.h"
33
34 #include "wine/debug.h"
35 #include "usp10_internal.h"
36
37 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
38
39 extern const unsigned short wine_linebreak_table[];
40
41 enum breaking_types { b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2};
42
43 enum breaking_class {b_r=1, b_s, b_x};
44
45 static void debug_output_breaks(const short* breaks, int count)
46 {
47     if (TRACE_ON(uniscribe))
48     {
49         int i;
50         TRACE("[");
51         for (i = 0; i < count && i < 200; i++)
52         {
53             switch (breaks[i])
54             {
55                 case b_x: TRACE("x"); break;
56                 case b_r: TRACE("!"); break;
57                 case b_s: TRACE("+"); break;
58                 default: TRACE("*");
59             }
60         }
61         if (i == 200)
62             TRACE("...");
63         TRACE("]\n");
64     }
65 }
66
67 static inline void else_break(short* before, short class)
68 {
69     if (*before == 0)  *before = class;
70 }
71
72 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
73 {
74     int i,j;
75     short *break_class;
76     short *break_before;
77
78     TRACE("In      %s\n",debugstr_wn(chars,count));
79
80     break_class = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
81     break_before = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
82
83     for (i = 0; i < count; i++)
84     {
85         break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
86         break_before[i] = 0;
87
88         memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
89
90         la[i].fCharStop = TRUE;
91         switch (break_class[i])
92         {
93             case b_BK:
94             case b_ZW:
95             case b_SP:
96                 la[i].fWhiteSpace = TRUE;
97                 break;
98             case b_CM:
99                 la[i].fCharStop = FALSE;
100         }
101     }
102
103     /* LB1 */
104     /* TODO: Have outside algorithms for these scripts */
105     for (i = 0; i < count; i++)
106     {
107         switch(break_class[i])
108         {
109             case b_AI:
110             case b_SA:
111             case b_SG:
112             case b_XX:
113                 break_class[i] = b_AL;
114         }
115     }
116
117     /* LB2 - LB3 */
118     break_before[0] = b_x;
119     for (i = 0; i < count; i++)
120     {
121         switch(break_class[i])
122         {
123             /* LB4 - LB6 */
124             case b_CR:
125                 if (i < count-1 && break_class[i+1] == b_LF)
126                 {
127                     else_break(&break_before[i],b_x);
128                     else_break(&break_before[i+1],b_x);
129                     break;
130                 }
131             case b_LF:
132             case b_NL:
133             case b_BK:
134                 if (i < count-1) else_break(&break_before[i+1],b_r);
135                     else_break(&break_before[i],b_x);
136                 break;
137             /* LB7 */
138             case b_SP:
139                 else_break(&break_before[i],b_x);
140                 break;
141             case b_ZW:
142                 else_break(&break_before[i],b_x);
143             /* LB8 */
144                 while (i < count-1 && break_class[i+1] == b_SP)
145                     i++;
146                 else_break(&break_before[i],b_s);
147                 break;
148         }
149     }
150
151     debug_output_breaks(break_before,count);
152
153     /* LB9 - LB10 */
154     for (i = 0; i < count; i++)
155     {
156         if (break_class[i] == b_CM)
157         {
158             if (i > 0)
159             {
160                 switch (break_class[i-1])
161                 {
162                     case b_SP:
163                     case b_BK:
164                     case b_CR:
165                     case b_LF:
166                     case b_NL:
167                     case b_ZW:
168                         break_class[i] = b_AL;
169                         break;
170                     default:
171                         break_class[i] = break_class[i-1];
172                 }
173             }
174             else break_class[i] = b_AL;
175         }
176     }
177
178     for (i = 0; i < count; i++)
179     {
180         switch(break_class[i])
181         {
182             /* LB11 */
183             case b_WJ:
184                 else_break(&break_before[i],b_x);
185                 if (i < count-1)
186                     else_break(&break_before[i+1],b_x);
187                 break;
188             /* LB12 */
189             case b_GL:
190                 if (i < count-1)
191                     else_break(&break_before[i+1],b_x);
192             /* LB12a */
193                 if (i > 0)
194                 {
195                     if (break_class[i-1] != b_SP &&
196                         break_class[i-1] != b_BA &&
197                         break_class[i-1] != b_HY)
198                         else_break(&break_before[i],b_x);
199                 }
200                 break;
201             /* LB13 */
202             case b_CL:
203             case b_CP:
204             case b_EX:
205             case b_IS:
206             case b_SY:
207                 else_break(&break_before[i],b_x);
208                 break;
209             /* LB14 */
210             case b_OP:
211                 while (i < count-1 && break_class[i+1] == b_SP)
212                 {
213                     else_break(&break_before[i+1],b_x);
214                     i++;
215                 }
216                 else_break(&break_before[i+1],b_x);
217                 break;
218             /* LB15 */
219             case b_QU:
220                 j = i+1;
221                 while (j < count-1 && break_class[j] == b_SP)
222                     j++;
223                 if (break_class[j] == b_OP)
224                 {
225                     for (; j > i; j--)
226                         else_break(&break_before[j],b_x);
227                 }
228                 break;
229             /* LB16 */
230             case b_NS:
231                 j = i-1;
232                 while(j > 0 && break_class[j] == b_SP)
233                     j--;
234                 if (break_class[j] == b_CL || break_class[j] == b_CP)
235                 {
236                     for (j++; j <= i; j++)
237                         else_break(&break_before[j],b_x);
238                 }
239                 break;
240             /* LB17 */
241             case b_B2:
242                 j = i+1;
243                 while (j < count && break_class[j] == b_SP)
244                     j++;
245                 if (break_class[j] == b_B2)
246                 {
247                     for (; j > i; j--)
248                         else_break(&break_before[j],b_x);
249                 }
250                 break;
251         }
252     }
253
254     debug_output_breaks(break_before,count);
255
256     for (i = 0; i < count; i++)
257     {
258         switch(break_class[i])
259         {
260             /* LB18 */
261             case b_SP:
262                 if (i < count-1)
263                     else_break(&break_before[i+1],b_s);
264                 break;
265             /* LB19 */
266             case b_QU:
267                 else_break(&break_before[i],b_x);
268                 if (i < count-1)
269                     else_break(&break_before[i+1],b_x);
270                 break;
271             /* LB20 */
272             case b_CB:
273                 else_break(&break_before[i],b_s);
274                 if (i < count-1)
275                     else_break(&break_before[i+1],b_s);
276             /* LB21 */
277             case b_BA:
278             case b_HY:
279             case b_NS:
280                 else_break(&break_before[i],b_x);
281                 break;
282             case b_BB:
283                 if (i < count-1)
284                     else_break(&break_before[i+1],b_x);
285                 break;
286             /* LB22 */
287             case b_IN:
288                 if (i > 0)
289                 {
290                     switch (break_class[i-1])
291                     {
292                         case b_AL:
293                         case b_ID:
294                         case b_IN:
295                         case b_NU:
296                             else_break(&break_before[i], b_x);
297                     }
298                 }
299                 break;
300         }
301
302         if (i < count-1)
303         {
304             /* LB23 */
305             if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
306                 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
307                 (break_class[i] == b_NU && break_class[i+1] == b_AL))
308                     else_break(&break_before[i+1],b_x);
309             /* LB24 */
310             if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
311                 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
312                 (break_class[i] == b_PO && break_class[i+1] == b_AL))
313                     else_break(&break_before[i+1],b_x);
314
315             /* LB25 */
316             if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
317                 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
318                 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
319                 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
320                 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
321                 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
322                 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
323                 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
324                 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
325                 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
326                 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
327                 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
328                 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
329                 (break_class[i] == b_SY && break_class[i+1] == b_NU))
330                     else_break(&break_before[i+1],b_x);
331
332             /* LB26 */
333             if (break_class[i] == b_JL)
334             {
335                 switch (break_class[i+1])
336                 {
337                     case b_JL:
338                     case b_JV:
339                     case b_H2:
340                     case b_H3:
341                         else_break(&break_before[i+1],b_x);
342                 }
343             }
344             if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
345                 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
346                     else_break(&break_before[i+1],b_x);
347             if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
348                  break_class[i+1] == b_JT)
349                     else_break(&break_before[i+1],b_x);
350
351             /* LB27 */
352             switch (break_class[i])
353             {
354                 case b_JL:
355                 case b_JV:
356                 case b_JT:
357                 case b_H2:
358                 case b_H3:
359                     if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
360                         else_break(&break_before[i+1],b_x);
361             }
362             if (break_class[i] == b_PO)
363             {
364                 switch (break_class[i+1])
365                 {
366                     case b_JL:
367                     case b_JV:
368                     case b_JT:
369                     case b_H2:
370                     case b_H3:
371                         else_break(&break_before[i+1],b_x);
372                 }
373             }
374
375             /* LB28 */
376             if (break_class[i] == b_AL && break_class[i+1] == b_AL)
377                 else_break(&break_before[i+1],b_x);
378
379             /* LB29 */
380             if (break_class[i] == b_IS && break_class[i+1] == b_AL)
381                 else_break(&break_before[i+1],b_x);
382
383             /* LB30 */
384             if ((break_class[i] == b_AL || break_class[i] == b_NU) &&
385                  break_class[i+1] == b_OP)
386                 else_break(&break_before[i+1],b_x);
387             if (break_class[i] == b_CP &&
388                 (break_class[i+1] == b_AL || break_class[i] == b_NU))
389                 else_break(&break_before[i+1],b_x);
390         }
391     }
392     debug_output_breaks(break_before,count);
393
394     /* LB31 */
395     for (i = 0; i < count-1; i++)
396         else_break(&break_before[i+1],b_s);
397
398     debug_output_breaks(break_before,count);
399     for (i = 0; i < count; i++)
400     {
401         if (break_before[i] != b_x)
402         {
403             la[i].fSoftBreak = TRUE;
404             la[i].fWordStop = TRUE;
405         }
406     }
407
408     HeapFree(GetProcessHeap(), 0, break_before);
409     HeapFree(GetProcessHeap(), 0, break_class);
410 }