2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
4 * Copyright 2011 CodeWeavers, Aric Stewart
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
34 #include "wine/debug.h"
35 #include "usp10_internal.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
39 extern const unsigned short wine_linebreak_table[];
41 enum breaking_types { b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2};
43 enum breaking_class {b_r=1, b_s, b_x};
45 static void debug_output_breaks(const short* breaks, int count)
47 if (TRACE_ON(uniscribe))
51 for (i = 0; i < count && i < 200; i++)
55 case b_x: TRACE("x"); break;
56 case b_r: TRACE("!"); break;
57 case b_s: TRACE("+"); break;
67 static inline void else_break(short* before, short class)
69 if (*before == 0) *before = class;
72 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
78 TRACE("In %s\n",debugstr_wn(chars,count));
80 break_class = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
81 break_before = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
83 for (i = 0; i < count; i++)
85 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
88 memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
90 la[i].fCharStop = TRUE;
91 switch (break_class[i])
96 la[i].fWhiteSpace = TRUE;
99 la[i].fCharStop = FALSE;
104 /* TODO: Have outside algorithms for these scripts */
105 for (i = 0; i < count; i++)
107 switch(break_class[i])
113 break_class[i] = b_AL;
118 break_before[0] = b_x;
119 for (i = 0; i < count; i++)
121 switch(break_class[i])
125 if (i < count-1 && break_class[i+1] == b_LF)
127 else_break(&break_before[i],b_x);
128 else_break(&break_before[i+1],b_x);
134 if (i < count-1) else_break(&break_before[i+1],b_r);
135 else_break(&break_before[i],b_x);
139 else_break(&break_before[i],b_x);
142 else_break(&break_before[i],b_x);
144 while (i < count-1 && break_class[i+1] == b_SP)
146 else_break(&break_before[i],b_s);
151 debug_output_breaks(break_before,count);
154 for (i = 0; i < count; i++)
156 if (break_class[i] == b_CM)
160 switch (break_class[i-1])
168 break_class[i] = b_AL;
171 break_class[i] = break_class[i-1];
174 else break_class[i] = b_AL;
178 for (i = 0; i < count; i++)
180 switch(break_class[i])
184 else_break(&break_before[i],b_x);
186 else_break(&break_before[i+1],b_x);
191 else_break(&break_before[i+1],b_x);
195 if (break_class[i-1] != b_SP &&
196 break_class[i-1] != b_BA &&
197 break_class[i-1] != b_HY)
198 else_break(&break_before[i],b_x);
207 else_break(&break_before[i],b_x);
211 while (i < count-1 && break_class[i+1] == b_SP)
213 else_break(&break_before[i+1],b_x);
216 else_break(&break_before[i+1],b_x);
221 while (j < count-1 && break_class[j] == b_SP)
223 if (break_class[j] == b_OP)
226 else_break(&break_before[j],b_x);
232 while(j > 0 && break_class[j] == b_SP)
234 if (break_class[j] == b_CL || break_class[j] == b_CP)
236 for (j++; j <= i; j++)
237 else_break(&break_before[j],b_x);
243 while (j < count && break_class[j] == b_SP)
245 if (break_class[j] == b_B2)
248 else_break(&break_before[j],b_x);
254 debug_output_breaks(break_before,count);
256 for (i = 0; i < count; i++)
258 switch(break_class[i])
263 else_break(&break_before[i+1],b_s);
267 else_break(&break_before[i],b_x);
269 else_break(&break_before[i+1],b_x);
273 else_break(&break_before[i],b_s);
275 else_break(&break_before[i+1],b_s);
280 else_break(&break_before[i],b_x);
284 else_break(&break_before[i+1],b_x);
290 switch (break_class[i-1])
296 else_break(&break_before[i], b_x);
305 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
306 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
307 (break_class[i] == b_NU && break_class[i+1] == b_AL))
308 else_break(&break_before[i+1],b_x);
310 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
311 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
312 (break_class[i] == b_PO && break_class[i+1] == b_AL))
313 else_break(&break_before[i+1],b_x);
316 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
317 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
318 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
319 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
320 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
321 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
322 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
323 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
324 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
325 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
326 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
327 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
328 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
329 (break_class[i] == b_SY && break_class[i+1] == b_NU))
330 else_break(&break_before[i+1],b_x);
333 if (break_class[i] == b_JL)
335 switch (break_class[i+1])
341 else_break(&break_before[i+1],b_x);
344 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
345 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
346 else_break(&break_before[i+1],b_x);
347 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
348 break_class[i+1] == b_JT)
349 else_break(&break_before[i+1],b_x);
352 switch (break_class[i])
359 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
360 else_break(&break_before[i+1],b_x);
362 if (break_class[i] == b_PO)
364 switch (break_class[i+1])
371 else_break(&break_before[i+1],b_x);
376 if (break_class[i] == b_AL && break_class[i+1] == b_AL)
377 else_break(&break_before[i+1],b_x);
380 if (break_class[i] == b_IS && break_class[i+1] == b_AL)
381 else_break(&break_before[i+1],b_x);
384 if ((break_class[i] == b_AL || break_class[i] == b_NU) &&
385 break_class[i+1] == b_OP)
386 else_break(&break_before[i+1],b_x);
387 if (break_class[i] == b_CP &&
388 (break_class[i+1] == b_AL || break_class[i] == b_NU))
389 else_break(&break_before[i+1],b_x);
392 debug_output_breaks(break_before,count);
395 for (i = 0; i < count; i++)
396 else_break(&break_before[i+1],b_s);
398 debug_output_breaks(break_before,count);
399 for (i = 0; i < count; i++)
401 if (break_before[i] != b_x)
403 la[i].fSoftBreak = TRUE;
404 la[i].fWordStop = TRUE;
408 HeapFree(GetProcessHeap(), 0, break_before);
409 HeapFree(GetProcessHeap(), 0, break_class);