dwrite: Support for Armenian, Hebrew and complete Arabic ranges.
[wine] / dlls / dwrite / analyzer.c
1 /*
2  *    Text analyzer
3  *
4  * Copyright 2012 Nikolay Sivov for CodeWeavers
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20
21 #define COBJMACROS
22
23 #include "dwrite.h"
24 #include "dwrite_private.h"
25
26 #include "wine/debug.h"
27
28 WINE_DEFAULT_DEBUG_CHANNEL(dwrite);
29
30 enum scriptcode {
31     Script_Arabic = 0,
32     Script_Armenian = 1,
33     Script_C1Controls = 12,
34     Script_Coptic = 13,
35     Script_Cyrillic = 16,
36     Script_Greek = 23,
37     Script_Hebrew = 29,
38     Script_Latin  = 38,
39     Script_Symbol = 77,
40     Script_Unknown = (UINT16)-1
41 };
42
43 struct script_range {
44     UINT16 script;
45     DWORD first;
46     DWORD last;
47 };
48
49 static const struct script_range script_ranges[] = {
50     /* C0 Controls: U+0000–U+001F */
51     /* ASCII punctuation and symbols: U+0020–U+002F */
52     /* ASCII digits: U+0030–U+0039 */
53     /* ASCII punctuation and symbols: U+003A–U+0040 */
54     { Script_Symbol, 0x00, 0x040 },
55     /* Latin uppercase: U+0041–U+005A */
56     { Script_Latin, 0x41, 0x5a },
57     /* ASCII punctuation and symbols: U+005B–U+0060 */
58     { Script_Symbol, 0x5b, 0x060 },
59     /* Latin lowercase: U+0061–U+007A */
60     { Script_Latin, 0x61, 0x7a },
61     /* ASCII punctuation and symbols, control char DEL: U+007B–U+007F */
62     { Script_Symbol, 0x7b, 0x7f },
63     /* C1 Controls: U+0080–U+009F */
64     { Script_C1Controls, 0x80, 0x9f },
65     /* Latin-1 Supplement: U+00A0–U+00FF */
66     /* Latin Extended-A: U+0100–U+017F */
67     /* Latin Extended-B: U+0180–U+024F */
68     /* IPA Extensions: U+0250–U+02AF */
69     /* Spacing Modifier Letters: U+02B0–U+02FF */
70     { Script_Latin, 0xa0, 0x2ff },
71     /* Combining Diacritical Marks: U+0300–U+036F */
72     { Script_Symbol, 0x300, 0x36f },
73     /* Greek: U+0370–U+03E1 */
74     { Script_Greek, 0x370, 0x3e1 },
75     /* Coptic: U+03E2–U+03Ef */
76     { Script_Coptic, 0x3e2, 0x3ef },
77     /* Greek: U+03F0–U+03FF */
78     { Script_Greek, 0x3f0, 0x3ff },
79     /* Cyrillic: U+0400–U+04FF */
80     /* Cyrillic Supplement: U+0500–U+052F */
81     /* Cyrillic Supplement range is incomplete cause it's based on Unicode 5.2
82        that doesn't define some Abkhaz and Azerbaijani letters, we support Unicode 6.0 range here */
83     { Script_Cyrillic, 0x400, 0x52f },
84     /* Armenian: U+0530–U+058F */
85     { Script_Armenian, 0x530, 0x58f },
86     /* Hebrew: U+0590–U+05FF */
87     { Script_Hebrew, 0x590, 0x5ff },
88     /* Arabic: U+0600–U+06FF */
89     { Script_Arabic, 0x600, 0x6ff },
90     /* unsupported range */
91     { Script_Unknown }
92 };
93
94 static UINT16 get_char_script( WCHAR c )
95 {
96     DWORD ch = c;
97     int i;
98
99     for (i = 0; i < sizeof(script_ranges)/sizeof(struct script_range); i++)
100     {
101         const struct script_range *range = &script_ranges[i];
102         if (range->script == Script_Unknown || (range->first <= ch && range->last >= ch))
103             return range->script;
104     }
105
106     return Script_Unknown;
107 }
108
109 static HRESULT analyze_script(const WCHAR *text, UINT32 len, IDWriteTextAnalysisSink *sink)
110 {
111     DWRITE_SCRIPT_ANALYSIS sa;
112     UINT32 pos, i, length;
113
114     if (!len) return S_OK;
115
116     sa.script = get_char_script(*text);
117     sa.shapes = DWRITE_SCRIPT_SHAPES_DEFAULT;
118
119     pos = 0;
120     length = 1;
121
122     for (i = 1; i < len; i++)
123     {
124         UINT16 script = get_char_script(text[i]);
125
126         /* Script_Latin_Symb script type is ignored when preceded or followed by another script */
127         if (sa.script == Script_Symbol) sa.script = script;
128         if (script    == Script_Symbol) script = sa.script;
129         /* this is a length of a sequence to be reported next */
130         if (sa.script == script) length++;
131
132         if (sa.script != script)
133         {
134             HRESULT hr = IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
135             if (FAILED(hr)) return hr;
136             pos = i;
137             length = 1;
138             sa.script = script;
139         }
140     }
141
142     /* 1 length case or normal completion call */
143     return IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
144 }
145
146 static HRESULT WINAPI dwritetextanalyzer_QueryInterface(IDWriteTextAnalyzer *iface, REFIID riid, void **obj)
147 {
148     TRACE("(%s %p)\n", debugstr_guid(riid), obj);
149
150     if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDWriteTextAnalyzer))
151     {
152         *obj = iface;
153         return S_OK;
154     }
155
156     *obj = NULL;
157     return E_NOINTERFACE;
158
159 }
160
161 static ULONG WINAPI dwritetextanalyzer_AddRef(IDWriteTextAnalyzer *iface)
162 {
163     return 2;
164 }
165
166 static ULONG WINAPI dwritetextanalyzer_Release(IDWriteTextAnalyzer *iface)
167 {
168     return 1;
169 }
170
171 static HRESULT WINAPI dwritetextanalyzer_AnalyzeScript(IDWriteTextAnalyzer *iface,
172     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
173 {
174     const WCHAR *text;
175     HRESULT hr;
176     UINT32 len;
177
178     TRACE("(%p %u %u %p)\n", source, position, length, sink);
179
180     hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
181     if (FAILED(hr)) return hr;
182
183     return analyze_script(text, len, sink);
184 }
185
186 static HRESULT WINAPI dwritetextanalyzer_AnalyzeBidi(IDWriteTextAnalyzer *iface,
187     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
188 {
189     FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
190     return E_NOTIMPL;
191 }
192
193 static HRESULT WINAPI dwritetextanalyzer_AnalyzeNumberSubstitution(IDWriteTextAnalyzer *iface,
194     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
195 {
196     FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
197     return E_NOTIMPL;
198 }
199
200 static HRESULT WINAPI dwritetextanalyzer_AnalyzeLineBreakpoints(IDWriteTextAnalyzer *iface,
201     IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
202 {
203     FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
204     return E_NOTIMPL;
205 }
206
207 static HRESULT WINAPI dwritetextanalyzer_GetGlyphs(IDWriteTextAnalyzer *iface,
208     WCHAR const* text, UINT32 length, IDWriteFontFace* font_face, BOOL is_sideways,
209     BOOL is_rtl, DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale,
210     IDWriteNumberSubstitution* substitution, DWRITE_TYPOGRAPHIC_FEATURES const** features,
211     UINT32 const* feature_range_len, UINT32 feature_ranges, UINT32 max_glyph_count,
212     UINT16* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* text_props, UINT16* glyph_indices,
213     DWRITE_SHAPING_GLYPH_PROPERTIES* glyph_props, UINT32* actual_glyph_count)
214 {
215     FIXME("(%s:%u %p %d %d %p %s %p %p %p %u %u %p %p %p %p %p): stub\n", debugstr_wn(text, length),
216         length, font_face, is_sideways, is_rtl, analysis, debugstr_w(locale), substitution, features, feature_range_len,
217         feature_ranges, max_glyph_count, clustermap, text_props, glyph_indices, glyph_props, actual_glyph_count);
218     return E_NOTIMPL;
219 }
220
221 static HRESULT WINAPI dwritetextanalyzer_GetGlyphPlacements(IDWriteTextAnalyzer *iface,
222     WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
223     UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
224     UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, BOOL is_sideways, BOOL is_rtl,
225     DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
226     UINT32 const* feature_range_len, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
227 {
228     FIXME("(%s %p %p %u %p %p %u %p %f %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
229         clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, is_sideways,
230         is_rtl, analysis, debugstr_w(locale), features, feature_range_len, feature_ranges, glyph_advances, glyph_offsets);
231     return E_NOTIMPL;
232 }
233
234 static HRESULT WINAPI dwritetextanalyzer_GetGdiCompatibleGlyphPlacements(IDWriteTextAnalyzer *iface,
235     WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
236     UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
237     UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, FLOAT pixels_per_dip,
238     DWRITE_MATRIX const* transform, BOOL use_gdi_natural, BOOL is_sideways, BOOL is_rtl,
239     DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
240     UINT32 const* feature_range_lengths, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
241 {
242     FIXME("(%s %p %p %u %p %p %u %p %f %f %p %d %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
243         clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, pixels_per_dip,
244         transform, use_gdi_natural, is_sideways, is_rtl, analysis, debugstr_w(locale), features, feature_range_lengths,
245         feature_ranges, glyph_advances, glyph_offsets);
246     return E_NOTIMPL;
247 }
248
249 static const struct IDWriteTextAnalyzerVtbl textanalyzervtbl = {
250     dwritetextanalyzer_QueryInterface,
251     dwritetextanalyzer_AddRef,
252     dwritetextanalyzer_Release,
253     dwritetextanalyzer_AnalyzeScript,
254     dwritetextanalyzer_AnalyzeBidi,
255     dwritetextanalyzer_AnalyzeNumberSubstitution,
256     dwritetextanalyzer_AnalyzeLineBreakpoints,
257     dwritetextanalyzer_GetGlyphs,
258     dwritetextanalyzer_GetGlyphPlacements,
259     dwritetextanalyzer_GetGdiCompatibleGlyphPlacements
260 };
261
262 static IDWriteTextAnalyzer textanalyzer = { &textanalyzervtbl };
263
264 HRESULT get_textanalyzer(IDWriteTextAnalyzer **ret)
265 {
266     *ret = &textanalyzer;
267     return S_OK;
268 }