dwrite: Support more script ranges, up to Mongolian.
[wine] / dlls / dwrite / analyzer.c
1 /*
2  *    Text analyzer
3  *
4  * Copyright 2012 Nikolay Sivov for CodeWeavers
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20
21 #define COBJMACROS
22
23 #include "dwrite.h"
24 #include "dwrite_private.h"
25
26 #include "wine/debug.h"
27
28 WINE_DEFAULT_DEBUG_CHANNEL(dwrite);
29
30 enum scriptcode {
31     Script_Arabic = 0,
32     Script_Armenian = 1,
33     Script_Bengali = 3,
34     Script_Canadian = 8,
35     Script_Cherokee = 11,
36     Script_Controls = 12,
37     Script_Coptic = 13,
38     Script_Cyrillic = 16,
39     Script_Devanagari = 18,
40     Script_Ethiopic = 19,
41     Script_Georgian = 20,
42     Script_Greek = 23,
43     Script_Gujarati = 24,
44     Script_Gurmukhi = 25,
45     Script_Hangul = 27,
46     Script_Hebrew = 29,
47     Script_Kannada = 32,
48     Script_Khmer = 36,
49     Script_Lao = 37,
50     Script_Latin  = 38,
51     Script_Malayalam = 44,
52     Script_Mongolian = 45,
53     Script_Myanmar = 46,
54     Script_NKo = 48,
55     Script_Ogham = 49,
56     Script_Oriya = 53,
57     Script_Runic = 58,
58     Script_Sinhala = 61,
59     Script_Syriac = 64,
60     Script_Tamil = 68,
61     Script_Telugu = 69,
62     Script_Thaana = 70,
63     Script_Thai = 71,
64     Script_Tibetan = 72,
65     Script_Symbol = 77,
66     Script_Unknown = (UINT16)-1
67 };
68
69 struct script_range {
70     UINT16 script;
71     DWORD first;
72     DWORD last;
73 };
74
75 static const struct script_range script_ranges[] = {
76     /* C0 Controls: U+0000–U+001F */
77     /* ASCII punctuation and symbols: U+0020–U+002F */
78     /* ASCII digits: U+0030–U+0039 */
79     /* ASCII punctuation and symbols: U+003A–U+0040 */
80     { Script_Symbol, 0x00, 0x040 },
81     /* Latin uppercase: U+0041–U+005A */
82     { Script_Latin, 0x41, 0x5a },
83     /* ASCII punctuation and symbols: U+005B–U+0060 */
84     { Script_Symbol, 0x5b, 0x060 },
85     /* Latin lowercase: U+0061–U+007A */
86     { Script_Latin, 0x61, 0x7a },
87     /* ASCII punctuation and symbols, control char DEL: U+007B–U+007F */
88     { Script_Symbol, 0x7b, 0x7f },
89     /* C1 Controls: U+0080–U+009F */
90     { Script_Controls, 0x80, 0x9f },
91     /* Latin-1 Supplement: U+00A0–U+00FF */
92     /* Latin Extended-A: U+0100–U+017F */
93     /* Latin Extended-B: U+0180–U+024F */
94     /* IPA Extensions: U+0250–U+02AF */
95     /* Spacing Modifier Letters: U+02B0–U+02FF */
96     { Script_Latin, 0xa0, 0x2ff },
97     /* Combining Diacritical Marks: U+0300–U+036F */
98     { Script_Symbol, 0x300, 0x36f },
99     /* Greek: U+0370–U+03E1 */
100     { Script_Greek, 0x370, 0x3e1 },
101     /* Coptic: U+03E2–U+03Ef */
102     { Script_Coptic, 0x3e2, 0x3ef },
103     /* Greek: U+03F0–U+03FF */
104     { Script_Greek, 0x3f0, 0x3ff },
105     /* Cyrillic: U+0400–U+04FF */
106     /* Cyrillic Supplement: U+0500–U+052F */
107     /* Cyrillic Supplement range is incomplete cause it's based on Unicode 5.2
108        that doesn't define some Abkhaz and Azerbaijani letters, we support Unicode 6.0 range here */
109     { Script_Cyrillic, 0x400, 0x52f },
110     /* Armenian: U+0530–U+058F */
111     { Script_Armenian, 0x530, 0x58f },
112     /* Hebrew: U+0590–U+05FF */
113     { Script_Hebrew, 0x590, 0x5ff },
114     /* Arabic: U+0600–U+06FF */
115     { Script_Arabic, 0x600, 0x6ff },
116     /* Syriac: U+0600–U+06FF */
117     { Script_Syriac, 0x700, 0x74f },
118     /* Arabic Supplement: U+0750–U+077F */
119     { Script_Arabic, 0x750, 0x77f },
120     /* Thaana: U+0780–U+07BF */
121     { Script_Thaana, 0x780, 0x7bf },
122     /* N'Ko: U+07C0–U+07FF */
123     { Script_NKo, 0x7c0, 0x7ff },
124     /* Devanagari: U+0900–U+097F */
125     { Script_Devanagari, 0x900, 0x97f },
126     /* Bengali: U+0980–U+09FF */
127     { Script_Bengali, 0x980, 0x9ff },
128     /* Gurmukhi: U+0A00–U+0A7F */
129     { Script_Gurmukhi, 0xa00, 0xa7f },
130     /* Gujarati: U+0A80–U+0AFF */
131     { Script_Gujarati, 0xa80, 0xaff },
132     /* Oriya: U+0B00–U+0B7F */
133     { Script_Oriya, 0xb00, 0xb7f },
134     /* Tamil: U+0B80–U+0BFF */
135     { Script_Tamil, 0xb80, 0xbff },
136     /* Telugu: U+0C00–U+0C7F */
137     { Script_Telugu, 0xc00, 0xc7f },
138     /* Kannada: U+0C80–U+0CFF */
139     { Script_Kannada, 0xc80, 0xcff },
140     /* Malayalam: U+0D00–U+0D7F */
141     { Script_Malayalam, 0xd00, 0xd7f },
142     /* Sinhala: U+0D80–U+0DFF */
143     { Script_Sinhala, 0xd80, 0xdff },
144     /* Thai: U+0E00–U+0E7F */
145     { Script_Thai, 0xe00, 0xe7f },
146     /* Lao: U+0E80–U+0EFF */
147     { Script_Lao, 0xe80, 0xeff },
148     /* Tibetan: U+0F00–U+0FFF */
149     { Script_Tibetan, 0xf00, 0xfff },
150     /* Myanmar: U+1000–U+109F */
151     { Script_Myanmar, 0x1000, 0x109f },
152     /* Georgian: U+10A0–U+10FF */
153     { Script_Georgian, 0x10a0, 0x10ff },
154     /* Hangul Jamo: U+1100–U+11FF */
155     { Script_Hangul, 0x1100, 0x11ff },
156     /* Ethiopic: U+1200–U+137F */
157     /* Ethiopic Extensions: U+1380–U+139F */
158     { Script_Ethiopic, 0x1200, 0x139f },
159     /* Cherokee: U+13A0–U+13FF */
160     { Script_Cherokee, 0x13a0, 0x13ff },
161     /* Canadian Aboriginal Syllabics: U+1400–U+167F */
162     { Script_Canadian, 0x1400, 0x167f },
163     /* Ogham: U+1680–U+169F */
164     { Script_Ogham, 0x1680, 0x169f },
165     /* Runic: U+16A0–U+16F0 */
166     { Script_Runic, 0x16a0, 0x16f0 },
167     /* Khmer: U+1780–U+17FF */
168     { Script_Khmer, 0x1780, 0x17ff },
169     /* Mongolian: U+1800–U+18AF */
170     { Script_Mongolian, 0x1800, 0x18af},
171     /* unsupported range */
172     { Script_Unknown }
173 };
174
175 static UINT16 get_char_script( WCHAR c )
176 {
177     DWORD ch = c;
178     int i;
179
180     for (i = 0; i < sizeof(script_ranges)/sizeof(struct script_range); i++)
181     {
182         const struct script_range *range = &script_ranges[i];
183         if (range->script == Script_Unknown || (range->first <= ch && range->last >= ch))
184             return range->script;
185     }
186
187     return Script_Unknown;
188 }
189
190 static HRESULT analyze_script(const WCHAR *text, UINT32 len, IDWriteTextAnalysisSink *sink)
191 {
192     DWRITE_SCRIPT_ANALYSIS sa;
193     UINT32 pos, i, length;
194
195     if (!len) return S_OK;
196
197     sa.script = get_char_script(*text);
198     sa.shapes = DWRITE_SCRIPT_SHAPES_DEFAULT;
199
200     pos = 0;
201     length = 1;
202
203     for (i = 1; i < len; i++)
204     {
205         UINT16 script = get_char_script(text[i]);
206
207         /* Script_Latin_Symb script type is ignored when preceded or followed by another script */
208         if (sa.script == Script_Symbol) sa.script = script;
209         if (script    == Script_Symbol) script = sa.script;
210         /* this is a length of a sequence to be reported next */
211         if (sa.script == script) length++;
212
213         if (sa.script != script)
214         {
215             HRESULT hr = IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
216             if (FAILED(hr)) return hr;
217             pos = i;
218             length = 1;
219             sa.script = script;
220         }
221     }
222
223     /* 1 length case or normal completion call */
224     return IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
225 }
226
227 static HRESULT WINAPI dwritetextanalyzer_QueryInterface(IDWriteTextAnalyzer *iface, REFIID riid, void **obj)
228 {
229     TRACE("(%s %p)\n", debugstr_guid(riid), obj);
230
231     if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDWriteTextAnalyzer))
232     {
233         *obj = iface;
234         return S_OK;
235     }
236
237     *obj = NULL;
238     return E_NOINTERFACE;
239
240 }
241
242 static ULONG WINAPI dwritetextanalyzer_AddRef(IDWriteTextAnalyzer *iface)
243 {
244     return 2;
245 }
246
247 static ULONG WINAPI dwritetextanalyzer_Release(IDWriteTextAnalyzer *iface)
248 {
249     return 1;
250 }
251
252 static HRESULT WINAPI dwritetextanalyzer_AnalyzeScript(IDWriteTextAnalyzer *iface,
253     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
254 {
255     const WCHAR *text;
256     HRESULT hr;
257     UINT32 len;
258
259     TRACE("(%p %u %u %p)\n", source, position, length, sink);
260
261     hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
262     if (FAILED(hr)) return hr;
263
264     return analyze_script(text, len, sink);
265 }
266
267 static HRESULT WINAPI dwritetextanalyzer_AnalyzeBidi(IDWriteTextAnalyzer *iface,
268     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
269 {
270     FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
271     return E_NOTIMPL;
272 }
273
274 static HRESULT WINAPI dwritetextanalyzer_AnalyzeNumberSubstitution(IDWriteTextAnalyzer *iface,
275     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
276 {
277     FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
278     return E_NOTIMPL;
279 }
280
281 static HRESULT WINAPI dwritetextanalyzer_AnalyzeLineBreakpoints(IDWriteTextAnalyzer *iface,
282     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
283 {
284     FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
285     return E_NOTIMPL;
286 }
287
288 static HRESULT WINAPI dwritetextanalyzer_GetGlyphs(IDWriteTextAnalyzer *iface,
289     WCHAR const* text, UINT32 length, IDWriteFontFace* font_face, BOOL is_sideways,
290     BOOL is_rtl, DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale,
291     IDWriteNumberSubstitution* substitution, DWRITE_TYPOGRAPHIC_FEATURES const** features,
292     UINT32 const* feature_range_len, UINT32 feature_ranges, UINT32 max_glyph_count,
293     UINT16* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* text_props, UINT16* glyph_indices,
294     DWRITE_SHAPING_GLYPH_PROPERTIES* glyph_props, UINT32* actual_glyph_count)
295 {
296     FIXME("(%s:%u %p %d %d %p %s %p %p %p %u %u %p %p %p %p %p): stub\n", debugstr_wn(text, length),
297         length, font_face, is_sideways, is_rtl, analysis, debugstr_w(locale), substitution, features, feature_range_len,
298         feature_ranges, max_glyph_count, clustermap, text_props, glyph_indices, glyph_props, actual_glyph_count);
299     return E_NOTIMPL;
300 }
301
302 static HRESULT WINAPI dwritetextanalyzer_GetGlyphPlacements(IDWriteTextAnalyzer *iface,
303     WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
304     UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
305     UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, BOOL is_sideways, BOOL is_rtl,
306     DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
307     UINT32 const* feature_range_len, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
308 {
309     FIXME("(%s %p %p %u %p %p %u %p %f %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
310         clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, is_sideways,
311         is_rtl, analysis, debugstr_w(locale), features, feature_range_len, feature_ranges, glyph_advances, glyph_offsets);
312     return E_NOTIMPL;
313 }
314
315 static HRESULT WINAPI dwritetextanalyzer_GetGdiCompatibleGlyphPlacements(IDWriteTextAnalyzer *iface,
316     WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
317     UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
318     UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, FLOAT pixels_per_dip,
319     DWRITE_MATRIX const* transform, BOOL use_gdi_natural, BOOL is_sideways, BOOL is_rtl,
320     DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
321     UINT32 const* feature_range_lengths, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
322 {
323     FIXME("(%s %p %p %u %p %p %u %p %f %f %p %d %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
324         clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, pixels_per_dip,
325         transform, use_gdi_natural, is_sideways, is_rtl, analysis, debugstr_w(locale), features, feature_range_lengths,
326         feature_ranges, glyph_advances, glyph_offsets);
327     return E_NOTIMPL;
328 }
329
330 static const struct IDWriteTextAnalyzerVtbl textanalyzervtbl = {
331     dwritetextanalyzer_QueryInterface,
332     dwritetextanalyzer_AddRef,
333     dwritetextanalyzer_Release,
334     dwritetextanalyzer_AnalyzeScript,
335     dwritetextanalyzer_AnalyzeBidi,
336     dwritetextanalyzer_AnalyzeNumberSubstitution,
337     dwritetextanalyzer_AnalyzeLineBreakpoints,
338     dwritetextanalyzer_GetGlyphs,
339     dwritetextanalyzer_GetGlyphPlacements,
340     dwritetextanalyzer_GetGdiCompatibleGlyphPlacements
341 };
342
343 static IDWriteTextAnalyzer textanalyzer = { &textanalyzervtbl };
344
345 HRESULT get_textanalyzer(IDWriteTextAnalyzer **ret)
346 {
347     *ret = &textanalyzer;
348     return S_OK;
349 }