2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
29 #include "xmllite_private.h"
31 #include "wine/debug.h"
32 #include "wine/list.h"
33 #include "wine/unicode.h"
35 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
37 /* not defined in public headers */
38 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
49 XmlReadInState_Initial,
50 XmlReadInState_XmlDecl,
51 XmlReadInState_Misc_DTD,
53 } XmlReaderInternalState;
57 StringValue_LocalName,
58 StringValue_QualifiedName,
61 } XmlReaderStringValue;
63 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
64 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
66 static const WCHAR dblquoteW[] = {'\"',0};
67 static const WCHAR quoteW[] = {'\'',0};
69 struct xml_encoding_data
76 static const struct xml_encoding_data xml_encoding_map[] = {
77 { utf16W, XmlEncoding_UTF16, ~0 },
78 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
85 unsigned int allocated;
89 typedef struct input_buffer input_buffer;
93 IXmlReaderInput IXmlReaderInput_iface;
95 /* reference passed on IXmlReaderInput creation, is kept when input is created */
98 xml_encoding encoding;
101 /* stream reference set after SetInput() call from reader,
102 stored as sequential stream, cause currently
103 optimizations possible with IStream aren't implemented */
104 ISequentialStream *stream;
105 input_buffer *buffer;
114 static WCHAR emptyW[] = {0};
115 static const strval strval_empty = {emptyW, 0};
126 IXmlReader IXmlReader_iface;
128 xmlreaderinput *input;
131 XmlReaderInternalState instate;
132 XmlNodeType nodetype;
133 DtdProcessing dtdmode;
134 UINT line, pos; /* reader position in XML stream */
135 struct list attrs; /* attributes list for current node */
136 struct attribute *attr; /* current attribute */
138 strval strvalues[StringValue_Last];
143 encoded_buffer utf16;
144 encoded_buffer encoded;
146 xmlreaderinput *input;
149 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
151 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
154 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
156 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
159 static inline void *m_alloc(IMalloc *imalloc, size_t len)
162 return IMalloc_Alloc(imalloc, len);
164 return heap_alloc(len);
167 static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
170 return IMalloc_Realloc(imalloc, mem, len);
172 return heap_realloc(mem, len);
175 static inline void m_free(IMalloc *imalloc, void *mem)
178 IMalloc_Free(imalloc, mem);
183 /* reader memory allocation functions */
184 static inline void *reader_alloc(xmlreader *reader, size_t len)
186 return m_alloc(reader->imalloc, len);
189 static inline void reader_free(xmlreader *reader, void *mem)
191 m_free(reader->imalloc, mem);
194 /* reader input memory allocation functions */
195 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
197 return m_alloc(input->imalloc, len);
200 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
202 return m_realloc(input->imalloc, mem, len);
205 static inline void readerinput_free(xmlreaderinput *input, void *mem)
207 m_free(input->imalloc, mem);
210 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
217 size = (strlenW(str)+1)*sizeof(WCHAR);
218 ret = readerinput_alloc(input, size);
219 if (ret) memcpy(ret, str, size);
225 static void reader_clear_attrs(xmlreader *reader)
227 struct attribute *attr, *attr2;
228 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
230 reader_free(reader, attr);
232 list_init(&reader->attrs);
233 reader->attr_count = 0;
236 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
237 while we are on a node with attributes */
238 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
240 struct attribute *attr;
242 attr = reader_alloc(reader, sizeof(*attr));
243 if (!attr) return E_OUTOFMEMORY;
245 attr->localname = *localname;
246 attr->value = *value;
247 list_add_tail(&reader->attrs, &attr->entry);
248 reader->attr_count++;
253 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
255 strval *v = &reader->strvalues[type];
257 if (v->str != strval_empty.str)
259 reader_free(reader, v->str);
264 static void reader_free_strvalues(xmlreader *reader)
267 for (type = 0; type < StringValue_Last; type++)
268 reader_free_strvalue(reader, type);
271 /* always make a copy, cause strings are supposed to be null terminated */
272 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
274 strval *v = &reader->strvalues[type];
276 reader_free_strvalue(reader, type);
277 if (value->str == strval_empty.str)
281 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
282 memcpy(v->str, value->str, value->len*sizeof(WCHAR));
283 v->str[value->len] = 0;
288 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
290 const int initial_len = 0x2000;
291 buffer->data = readerinput_alloc(input, initial_len);
292 if (!buffer->data) return E_OUTOFMEMORY;
294 memset(buffer->data, 0, 4);
295 buffer->cur = buffer->data;
296 buffer->allocated = initial_len;
302 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
304 readerinput_free(input, buffer->data);
307 static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
309 if (encoding == XmlEncoding_Unknown)
311 FIXME("unsupported encoding %d\n", encoding);
315 *cp = xml_encoding_map[encoding].cp;
320 static xml_encoding parse_encoding_name(const WCHAR *name, int len)
324 if (!name) return XmlEncoding_Unknown;
327 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
334 c = strncmpiW(xml_encoding_map[n].name, name, len);
336 c = strcmpiW(xml_encoding_map[n].name, name);
338 return xml_encoding_map[n].enc;
346 return XmlEncoding_Unknown;
349 static HRESULT alloc_input_buffer(xmlreaderinput *input)
351 input_buffer *buffer;
354 input->buffer = NULL;
356 buffer = readerinput_alloc(input, sizeof(*buffer));
357 if (!buffer) return E_OUTOFMEMORY;
359 buffer->input = input;
360 buffer->code_page = ~0; /* code page is unknown at this point */
361 hr = init_encoded_buffer(input, &buffer->utf16);
363 readerinput_free(input, buffer);
367 hr = init_encoded_buffer(input, &buffer->encoded);
369 free_encoded_buffer(input, &buffer->utf16);
370 readerinput_free(input, buffer);
374 input->buffer = buffer;
378 static void free_input_buffer(input_buffer *buffer)
380 free_encoded_buffer(buffer->input, &buffer->encoded);
381 free_encoded_buffer(buffer->input, &buffer->utf16);
382 readerinput_free(buffer->input, buffer);
385 static void readerinput_release_stream(xmlreaderinput *readerinput)
387 if (readerinput->stream) {
388 ISequentialStream_Release(readerinput->stream);
389 readerinput->stream = NULL;
393 /* Queries already stored interface for IStream/ISequentialStream.
394 Interface supplied on creation will be overwritten */
395 static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
399 readerinput_release_stream(readerinput);
400 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
402 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
407 /* reads a chunk to raw buffer */
408 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
410 encoded_buffer *buffer = &readerinput->buffer->encoded;
411 /* to make sure aligned length won't exceed allocated length */
412 ULONG len = buffer->allocated - buffer->written - 4;
416 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
417 variable width encodings like UTF-8 */
418 len = (len + 3) & ~3;
419 /* try to use allocated space or grow */
420 if (buffer->allocated - buffer->written < len)
422 buffer->allocated *= 2;
423 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
424 len = buffer->allocated - buffer->written;
427 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
428 if (FAILED(hr)) return hr;
429 TRACE("requested %d, read %d, ret 0x%08x\n", len, read, hr);
430 buffer->written += read;
435 /* grows UTF-16 buffer so it has at least 'length' bytes free on return */
436 static void readerinput_grow(xmlreaderinput *readerinput, int length)
438 encoded_buffer *buffer = &readerinput->buffer->utf16;
440 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
441 if (buffer->allocated < buffer->written + length + 4)
443 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
444 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
445 buffer->allocated = grown_size;
449 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
451 encoded_buffer *buffer = &readerinput->buffer->encoded;
452 static char startA[] = {'<','?'};
453 static char commentA[] = {'<','!'};
454 static WCHAR startW[] = {'<','?'};
455 static WCHAR commentW[] = {'<','!'};
456 static char utf8bom[] = {0xef,0xbb,0xbf};
457 static char utf16lebom[] = {0xff,0xfe};
459 *enc = XmlEncoding_Unknown;
461 if (buffer->written <= 3) return MX_E_INPUTEND;
463 /* try start symbols if we have enough data to do that, input buffer should contain
464 first chunk already */
465 if (!memcmp(buffer->data, startA, sizeof(startA)) ||
466 !memcmp(buffer->data, commentA, sizeof(commentA)))
467 *enc = XmlEncoding_UTF8;
468 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
469 !memcmp(buffer->data, commentW, sizeof(commentW)))
470 *enc = XmlEncoding_UTF16;
471 /* try with BOM now */
472 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
474 buffer->cur += sizeof(utf8bom);
475 *enc = XmlEncoding_UTF8;
477 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
479 buffer->cur += sizeof(utf16lebom);
480 *enc = XmlEncoding_UTF16;
486 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
488 encoded_buffer *buffer = &readerinput->buffer->encoded;
489 int len = buffer->written;
491 /* complete single byte char */
492 if (!(buffer->data[len-1] & 0x80)) return len;
494 /* find start byte of multibyte char */
495 while (--len && !(buffer->data[len] & 0xc0))
501 /* Returns byte length of complete char sequence for buffer code page,
502 it's relative to current buffer position which is currently used for BOM handling
504 static int readerinput_get_convlen(xmlreaderinput *readerinput)
506 encoded_buffer *buffer = &readerinput->buffer->encoded;
509 if (readerinput->buffer->code_page == CP_UTF8)
510 len = readerinput_get_utf8_convlen(readerinput);
512 len = buffer->written;
514 TRACE("%d\n", len - (int)(buffer->cur - buffer->data));
515 return len - (buffer->cur - buffer->data);
518 /* It's possbile that raw buffer has some leftovers from last conversion - some char
519 sequence that doesn't represent a full code point. Length argument should be calculated with
520 readerinput_get_convlen(). */
521 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
523 encoded_buffer *buffer = &readerinput->buffer->encoded;
524 memmove(buffer->data, buffer->cur + (buffer->written - len), len);
525 /* everything lower cur is lost too */
526 buffer->written -= len + (buffer->cur - buffer->data);
527 /* after this point we don't need cur pointer really,
528 it's used only to mark where actual data begins when first chunk is read */
529 buffer->cur = buffer->data;
532 /* note that raw buffer content is kept */
533 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
535 encoded_buffer *src = &readerinput->buffer->encoded;
536 encoded_buffer *dest = &readerinput->buffer->utf16;
542 hr = get_code_page(enc, &cp);
543 if (FAILED(hr)) return;
545 readerinput->buffer->code_page = cp;
546 len = readerinput_get_convlen(readerinput);
548 TRACE("switching to cp %d\n", cp);
550 /* just copy in this case */
551 if (enc == XmlEncoding_UTF16)
553 readerinput_grow(readerinput, len);
554 memcpy(dest->data, src->cur, len);
555 dest->written += len*sizeof(WCHAR);
559 dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
560 readerinput_grow(readerinput, dest_len);
561 ptr = (WCHAR*)dest->data;
562 MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
564 dest->written += dest_len*sizeof(WCHAR);
567 /* shrinks parsed data a buffer begins with */
568 static void reader_shrink(xmlreader *reader)
570 encoded_buffer *buffer = &reader->input->buffer->utf16;
572 /* avoid to move too often using threshold shrink length */
573 if (buffer->cur - buffer->data > buffer->written / 2)
575 buffer->written -= buffer->cur - buffer->data;
576 memmove(buffer->data, buffer->cur, buffer->written);
577 buffer->cur = buffer->data;
578 *(WCHAR*)&buffer->cur[buffer->written] = 0;
582 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
583 It won't attempt to shrink but will grow destination buffer if needed */
584 static void reader_more(xmlreader *reader)
586 xmlreaderinput *readerinput = reader->input;
587 encoded_buffer *src = &readerinput->buffer->encoded;
588 encoded_buffer *dest = &readerinput->buffer->utf16;
589 UINT cp = readerinput->buffer->code_page;
593 /* get some raw data from stream first */
594 readerinput_growraw(readerinput);
595 len = readerinput_get_convlen(readerinput);
597 /* just copy for UTF-16 case */
600 readerinput_grow(readerinput, len);
601 memcpy(dest->data, src->cur, len);
602 dest->written += len*sizeof(WCHAR);
606 dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
607 readerinput_grow(readerinput, dest_len);
608 ptr = (WCHAR*)dest->data;
609 MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
611 dest->written += dest_len*sizeof(WCHAR);
612 /* get rid of processed data */
613 readerinput_shrinkraw(readerinput, len);
616 static inline WCHAR *reader_get_cur(xmlreader *reader)
618 WCHAR *ptr = (WCHAR*)reader->input->buffer->utf16.cur;
619 if (!*ptr) reader_more(reader);
623 static int reader_cmp(xmlreader *reader, const WCHAR *str)
625 const WCHAR *ptr = reader_get_cur(reader);
626 return strncmpW(str, ptr, strlenW(str));
629 /* moves cursor n WCHARs forward */
630 static void reader_skipn(xmlreader *reader, int n)
632 encoded_buffer *buffer = &reader->input->buffer->utf16;
633 const WCHAR *ptr = reader_get_cur(reader);
635 while (*ptr++ && n--)
637 buffer->cur += sizeof(WCHAR);
642 static inline int is_wchar_space(WCHAR ch)
644 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
647 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
648 static int reader_skipspaces(xmlreader *reader)
650 encoded_buffer *buffer = &reader->input->buffer->utf16;
651 const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
653 while (is_wchar_space(*ptr))
655 buffer->cur += sizeof(WCHAR);
658 else if (*ptr == '\n')
671 /* [26] VersionNum ::= '1.' [0-9]+ */
672 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
674 WCHAR *ptr, *ptr2, *start = reader_get_cur(reader);
675 static const WCHAR onedotW[] = {'1','.',0};
677 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
679 reader_skipn(reader, 2);
681 ptr2 = ptr = reader_get_cur(reader);
682 while (*ptr >= '0' && *ptr <= '9')
685 if (ptr2 == ptr) return WC_E_DIGIT;
686 TRACE("version=%s\n", debugstr_wn(start, ptr-start));
688 val->len = ptr-start;
689 reader_skipn(reader, ptr-ptr2);
693 /* [25] Eq ::= S? '=' S? */
694 static HRESULT reader_parse_eq(xmlreader *reader)
696 static const WCHAR eqW[] = {'=',0};
697 reader_skipspaces(reader);
698 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
700 reader_skipn(reader, 1);
701 reader_skipspaces(reader);
705 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
706 static HRESULT reader_parse_versioninfo(xmlreader *reader)
708 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
712 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
714 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
715 name.str = reader_get_cur(reader);
718 reader_skipn(reader, 7);
720 hr = reader_parse_eq(reader);
721 if (FAILED(hr)) return hr;
723 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
726 reader_skipn(reader, 1);
728 hr = reader_parse_versionnum(reader, &val);
729 if (FAILED(hr)) return hr;
731 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
735 reader_skipn(reader, 1);
737 return reader_add_attr(reader, &name, &val);
740 /* ([A-Za-z0-9._] | '-') */
741 static inline int is_wchar_encname(WCHAR ch)
743 return ((ch >= 'A' && ch <= 'Z') ||
744 (ch >= 'a' && ch <= 'z') ||
745 (ch >= '0' && ch <= '9') ||
746 (ch == '.') || (ch == '_') ||
750 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
751 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
753 WCHAR *start = reader_get_cur(reader), *ptr;
757 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
761 while (is_wchar_encname(*++ptr))
765 enc = parse_encoding_name(start, len);
766 TRACE("encoding name %s\n", debugstr_wn(start, len));
770 if (enc == XmlEncoding_Unknown)
773 /* skip encoding name */
774 reader_skipn(reader, len);
778 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
779 static HRESULT reader_parse_encdecl(xmlreader *reader)
781 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
785 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
787 if (reader_cmp(reader, encodingW)) return S_FALSE;
788 name.str = reader_get_cur(reader);
790 /* skip 'encoding' */
791 reader_skipn(reader, 8);
793 hr = reader_parse_eq(reader);
794 if (FAILED(hr)) return hr;
796 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
799 reader_skipn(reader, 1);
801 hr = reader_parse_encname(reader, &val);
802 if (FAILED(hr)) return hr;
804 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
808 reader_skipn(reader, 1);
810 return reader_add_attr(reader, &name, &val);
813 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
814 static HRESULT reader_parse_sddecl(xmlreader *reader)
816 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
817 static const WCHAR yesW[] = {'y','e','s',0};
818 static const WCHAR noW[] = {'n','o',0};
823 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
825 if (reader_cmp(reader, standaloneW)) return S_FALSE;
826 name.str = reader_get_cur(reader);
828 /* skip 'standalone' */
829 reader_skipn(reader, 10);
831 hr = reader_parse_eq(reader);
832 if (FAILED(hr)) return hr;
834 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
837 reader_skipn(reader, 1);
839 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
842 start = reader_get_cur(reader);
843 /* skip 'yes'|'no' */
844 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
845 ptr = reader_get_cur(reader);
846 TRACE("standalone=%s\n", debugstr_wn(start, ptr-start));
850 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
853 reader_skipn(reader, 1);
855 return reader_add_attr(reader, &name, &val);
858 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
859 static HRESULT reader_parse_xmldecl(xmlreader *reader)
861 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
862 static const WCHAR declcloseW[] = {'?','>',0};
865 /* check if we have "<?xml " */
866 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
868 reader_skipn(reader, 5);
869 hr = reader_parse_versioninfo(reader);
873 hr = reader_parse_encdecl(reader);
877 hr = reader_parse_sddecl(reader);
881 reader_skipspaces(reader);
882 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
883 reader_skipn(reader, 2);
885 reader->nodetype = XmlNodeType_XmlDeclaration;
886 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
887 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
888 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
893 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
894 static HRESULT reader_parse_comment(xmlreader *reader)
899 reader_skipn(reader, 4);
900 reader_shrink(reader);
901 ptr = start = reader_get_cur(reader);
911 strval value = { start, ptr-start };
913 TRACE("%s\n", debugstr_wn(start, ptr-start));
915 reader_skipn(reader, 3);
916 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
917 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
918 reader_set_strvalue(reader, StringValue_Value, &value);
919 reader->nodetype = XmlNodeType_Comment;
933 reader_skipn(reader, 1);
934 ptr = reader_get_cur(reader);
938 return MX_E_INPUTEND;
941 static inline int is_namestartchar(WCHAR ch)
943 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
944 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
945 (ch >= 0xc0 && ch <= 0xd6) ||
946 (ch >= 0xd8 && ch <= 0xf6) ||
947 (ch >= 0xf8 && ch <= 0x2ff) ||
948 (ch >= 0x370 && ch <= 0x37d) ||
949 (ch >= 0x37f && ch <= 0x1fff) ||
950 (ch >= 0x200c && ch <= 0x200d) ||
951 (ch >= 0x2070 && ch <= 0x218f) ||
952 (ch >= 0x2c00 && ch <= 0x2fef) ||
953 (ch >= 0x3001 && ch <= 0xd7ff) ||
954 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
955 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
956 (ch >= 0xf900 && ch <= 0xfdcf) ||
957 (ch >= 0xfdf0 && ch <= 0xfffd);
960 static inline int is_namechar(WCHAR ch)
962 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
963 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
964 (ch == '-') || (ch == '.') ||
965 (ch >= '0' && ch <= '9') ||
967 (ch >= 0xc0 && ch <= 0xd6) ||
968 (ch >= 0xd8 && ch <= 0xf6) ||
969 (ch >= 0xf8 && ch <= 0x2ff) ||
970 (ch >= 0x300 && ch <= 0x36f) ||
971 (ch >= 0x370 && ch <= 0x37d) ||
972 (ch >= 0x37f && ch <= 0x1fff) ||
973 (ch >= 0x200c && ch <= 0x200d) ||
974 (ch >= 0x203f && ch <= 0x2040) ||
975 (ch >= 0x2070 && ch <= 0x218f) ||
976 (ch >= 0x2c00 && ch <= 0x2fef) ||
977 (ch >= 0x3001 && ch <= 0xd7ff) ||
978 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
979 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
980 (ch >= 0xf900 && ch <= 0xfdcf) ||
981 (ch >= 0xfdf0 && ch <= 0xfffd);
984 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
985 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
986 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
987 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
988 [5] Name ::= NameStartChar (NameChar)* */
989 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
991 WCHAR *ptr, *start = reader_get_cur(reader);
994 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
996 while (is_namechar(*ptr))
998 reader_skipn(reader, 1);
999 ptr = reader_get_cur(reader);
1002 TRACE("name %s:%d\n", debugstr_wn(start, ptr-start), (int)(ptr-start));
1004 name->len = ptr-start;
1009 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1010 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1012 static const WCHAR xmlW[] = {'x','m','l'};
1017 hr = reader_parse_name(reader, &name);
1018 if (FAILED(hr)) return WC_E_PI;
1020 /* now that we got name check for illegal content */
1021 if (name.len == 3 && !strncmpiW(name.str, xmlW, 3))
1022 return WC_E_LEADINGXML;
1024 /* PITarget can't be a qualified name */
1025 for (i = 0; i < name.len; i++)
1026 if (name.str[i] == ':')
1027 return i ? NC_E_NAMECOLON : WC_E_PI;
1029 TRACE("pitarget %s:%d\n", debugstr_wn(name.str, name.len), name.len);
1034 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1035 static HRESULT reader_parse_pi(xmlreader *reader)
1042 reader_skipn(reader, 2);
1043 reader_shrink(reader);
1045 hr = reader_parse_pitarget(reader, &target);
1046 if (FAILED(hr)) return hr;
1048 ptr = reader_get_cur(reader);
1049 /* exit earlier if there's no content */
1050 if (ptr[0] == '?' && ptr[1] == '>')
1053 reader_skipn(reader, 2);
1054 reader->nodetype = XmlNodeType_ProcessingInstruction;
1055 reader_set_strvalue(reader, StringValue_LocalName, &target);
1056 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1057 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1061 /* now at least a single space char should be there */
1062 if (!is_wchar_space(*ptr)) return WC_E_WHITESPACE;
1063 reader_skipspaces(reader);
1065 ptr = start = reader_get_cur(reader);
1073 strval value = { start, ptr-start };
1075 TRACE("%s\n", debugstr_wn(start, ptr-start));
1077 reader_skipn(reader, 2);
1078 reader->nodetype = XmlNodeType_ProcessingInstruction;
1079 reader_set_strvalue(reader, StringValue_LocalName, &target);
1080 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1081 reader_set_strvalue(reader, StringValue_Value, &value);
1087 reader_more(reader);
1092 reader_skipn(reader, 1);
1093 ptr = reader_get_cur(reader);
1100 /* [27] Misc ::= Comment | PI | S */
1101 static HRESULT reader_parse_misc(xmlreader *reader)
1103 HRESULT hr = S_FALSE;
1107 static const WCHAR commentW[] = {'<','!','-','-',0};
1108 static const WCHAR piW[] = {'<','?',0};
1109 const WCHAR *cur = reader_get_cur(reader);
1111 if (is_wchar_space(*cur))
1112 reader_skipspaces(reader);
1113 else if (!reader_cmp(reader, commentW))
1114 hr = reader_parse_comment(reader);
1115 else if (!reader_cmp(reader, piW))
1116 hr = reader_parse_pi(reader);
1120 if (FAILED(hr)) return hr;
1126 static HRESULT reader_parse_nextnode(xmlreader *reader)
1132 switch (reader->instate)
1134 /* if it's a first call for a new input we need to detect stream encoding */
1135 case XmlReadInState_Initial:
1139 hr = readerinput_growraw(reader->input);
1140 if (FAILED(hr)) return hr;
1142 /* try to detect encoding by BOM or data and set input code page */
1143 hr = readerinput_detectencoding(reader->input, &enc);
1144 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
1145 if (FAILED(hr)) return hr;
1147 /* always switch first time cause we have to put something in */
1148 readerinput_switchencoding(reader->input, enc);
1150 /* parse xml declaration */
1151 hr = reader_parse_xmldecl(reader);
1152 if (FAILED(hr)) return hr;
1154 reader->instate = XmlReadInState_Misc_DTD;
1155 if (hr == S_OK) return hr;
1158 case XmlReadInState_Misc_DTD:
1159 hr = reader_parse_misc(reader);
1160 if (FAILED(hr)) return hr;
1163 reader->instate = XmlReadInState_DTD;
1167 case XmlReadInState_DTD:
1168 FIXME("DTD parsing not supported\n");
1171 FIXME("internal state %d not handled\n", reader->instate);
1179 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
1181 xmlreader *This = impl_from_IXmlReader(iface);
1183 TRACE("%p %s %p\n", This, debugstr_guid(riid), ppvObject);
1185 if (IsEqualGUID(riid, &IID_IUnknown) ||
1186 IsEqualGUID(riid, &IID_IXmlReader))
1192 FIXME("interface %s not implemented\n", debugstr_guid(riid));
1193 return E_NOINTERFACE;
1196 IXmlReader_AddRef(iface);
1201 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
1203 xmlreader *This = impl_from_IXmlReader(iface);
1204 ULONG ref = InterlockedIncrement(&This->ref);
1205 TRACE("(%p)->(%d)\n", This, ref);
1209 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
1211 xmlreader *This = impl_from_IXmlReader(iface);
1212 LONG ref = InterlockedDecrement(&This->ref);
1214 TRACE("(%p)->(%d)\n", This, ref);
1218 IMalloc *imalloc = This->imalloc;
1219 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
1220 reader_clear_attrs(This);
1221 reader_free_strvalues(This);
1222 reader_free(This, This);
1223 if (imalloc) IMalloc_Release(imalloc);
1229 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
1231 xmlreader *This = impl_from_IXmlReader(iface);
1234 TRACE("(%p)->(%p)\n", This, input);
1238 readerinput_release_stream(This->input);
1239 IUnknown_Release(&This->input->IXmlReaderInput_iface);
1243 This->line = This->pos = 0;
1245 /* just reset current input */
1248 This->state = XmlReadState_Initial;
1252 /* now try IXmlReaderInput, ISequentialStream, IStream */
1253 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&This->input);
1256 IXmlReaderInput *readerinput;
1258 /* create IXmlReaderInput basing on supplied interface */
1259 hr = CreateXmlReaderInputWithEncodingName(input,
1260 NULL, NULL, FALSE, NULL, &readerinput);
1261 if (hr != S_OK) return hr;
1262 This->input = impl_from_IXmlReaderInput(readerinput);
1265 /* set stream for supplied IXmlReaderInput */
1266 hr = readerinput_query_for_stream(This->input);
1269 This->state = XmlReadState_Initial;
1270 This->instate = XmlReadInState_Initial;
1276 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
1278 xmlreader *This = impl_from_IXmlReader(iface);
1280 TRACE("(%p %u %p)\n", This, property, value);
1282 if (!value) return E_INVALIDARG;
1286 case XmlReaderProperty_DtdProcessing:
1287 *value = This->dtdmode;
1289 case XmlReaderProperty_ReadState:
1290 *value = This->state;
1293 FIXME("Unimplemented property (%u)\n", property);
1300 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
1302 xmlreader *This = impl_from_IXmlReader(iface);
1304 TRACE("(%p %u %lu)\n", iface, property, value);
1308 case XmlReaderProperty_DtdProcessing:
1309 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
1310 This->dtdmode = value;
1313 FIXME("Unimplemented property (%u)\n", property);
1320 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
1322 xmlreader *This = impl_from_IXmlReader(iface);
1323 XmlNodeType oldtype = This->nodetype;
1326 TRACE("(%p)->(%p)\n", This, nodetype);
1328 if (This->state == XmlReadState_Closed) return S_FALSE;
1330 hr = reader_parse_nextnode(This);
1331 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
1332 This->state = XmlReadState_Interactive;
1333 if (hr == S_OK) *nodetype = This->nodetype;
1338 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
1340 xmlreader *This = impl_from_IXmlReader(iface);
1341 TRACE("(%p)->(%p)\n", This, node_type);
1343 /* When we're on attribute always return attribute type, container node type is kept.
1344 Note that container is not necessarily an element, and attribute doesn't mean it's
1345 an attribute in XML spec terms. */
1346 *node_type = This->attr ? XmlNodeType_Attribute : This->nodetype;
1347 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
1350 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
1352 xmlreader *This = impl_from_IXmlReader(iface);
1354 TRACE("(%p)\n", This);
1356 if (!This->attr_count) return S_FALSE;
1357 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
1361 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
1363 xmlreader *This = impl_from_IXmlReader(iface);
1364 const struct list *next;
1366 TRACE("(%p)\n", This);
1368 if (!This->attr_count) return S_FALSE;
1371 return IXmlReader_MoveToFirstAttribute(iface);
1373 next = list_next(&This->attrs, &This->attr->entry);
1375 This->attr = LIST_ENTRY(next, struct attribute, entry);
1377 return next ? S_OK : S_FALSE;
1380 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
1382 LPCWSTR namespaceUri)
1384 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
1388 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
1390 xmlreader *This = impl_from_IXmlReader(iface);
1392 TRACE("(%p)\n", This);
1394 if (!This->attr_count) return S_FALSE;
1399 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
1401 xmlreader *This = impl_from_IXmlReader(iface);
1403 TRACE("(%p)->(%p %p)\n", This, name, len);
1404 *name = This->strvalues[StringValue_QualifiedName].str;
1405 *len = This->strvalues[StringValue_QualifiedName].len;
1409 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
1410 LPCWSTR *namespaceUri,
1411 UINT *namespaceUri_length)
1413 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
1417 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
1419 xmlreader *This = impl_from_IXmlReader(iface);
1421 TRACE("(%p)->(%p %p)\n", This, name, len);
1422 *name = This->strvalues[StringValue_LocalName].str;
1423 *len = This->strvalues[StringValue_LocalName].len;
1427 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface,
1429 UINT *prefix_length)
1431 FIXME("(%p %p %p): stub\n", iface, prefix, prefix_length);
1435 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, LPCWSTR *value, UINT *len)
1437 xmlreader *This = impl_from_IXmlReader(iface);
1439 TRACE("(%p)->(%p %p)\n", This, value, len);
1440 *value = This->strvalues[StringValue_Value].str;
1441 if (len) *len = This->strvalues[StringValue_Value].len;
1445 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface,
1450 FIXME("(%p %p %u %p): stub\n", iface, buffer, chunk_size, read);
1454 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
1456 UINT *baseUri_length)
1458 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
1462 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
1464 FIXME("(%p): stub\n", iface);
1468 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
1470 FIXME("(%p): stub\n", iface);
1474 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
1476 xmlreader *This = impl_from_IXmlReader(iface);
1478 TRACE("(%p %p)\n", This, lineNumber);
1480 if (!lineNumber) return E_INVALIDARG;
1482 *lineNumber = This->line;
1487 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
1489 xmlreader *This = impl_from_IXmlReader(iface);
1491 TRACE("(%p %p)\n", This, linePosition);
1493 if (!linePosition) return E_INVALIDARG;
1495 *linePosition = This->pos;
1500 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
1502 xmlreader *This = impl_from_IXmlReader(iface);
1504 TRACE("(%p)->(%p)\n", This, count);
1506 if (!count) return E_INVALIDARG;
1508 *count = This->attr_count;
1512 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
1514 FIXME("(%p %p): stub\n", iface, depth);
1518 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
1520 FIXME("(%p): stub\n", iface);
1524 static const struct IXmlReaderVtbl xmlreader_vtbl =
1526 xmlreader_QueryInterface,
1530 xmlreader_GetProperty,
1531 xmlreader_SetProperty,
1533 xmlreader_GetNodeType,
1534 xmlreader_MoveToFirstAttribute,
1535 xmlreader_MoveToNextAttribute,
1536 xmlreader_MoveToAttributeByName,
1537 xmlreader_MoveToElement,
1538 xmlreader_GetQualifiedName,
1539 xmlreader_GetNamespaceUri,
1540 xmlreader_GetLocalName,
1541 xmlreader_GetPrefix,
1543 xmlreader_ReadValueChunk,
1544 xmlreader_GetBaseUri,
1545 xmlreader_IsDefault,
1546 xmlreader_IsEmptyElement,
1547 xmlreader_GetLineNumber,
1548 xmlreader_GetLinePosition,
1549 xmlreader_GetAttributeCount,
1554 /** IXmlReaderInput **/
1555 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
1557 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
1559 TRACE("%p %s %p\n", This, debugstr_guid(riid), ppvObject);
1561 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
1562 IsEqualGUID(riid, &IID_IUnknown))
1568 WARN("interface %s not implemented\n", debugstr_guid(riid));
1569 return E_NOINTERFACE;
1572 IUnknown_AddRef(iface);
1577 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
1579 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
1580 ULONG ref = InterlockedIncrement(&This->ref);
1581 TRACE("(%p)->(%d)\n", This, ref);
1585 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
1587 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
1588 LONG ref = InterlockedDecrement(&This->ref);
1590 TRACE("(%p)->(%d)\n", This, ref);
1594 IMalloc *imalloc = This->imalloc;
1595 if (This->input) IUnknown_Release(This->input);
1596 if (This->stream) ISequentialStream_Release(This->stream);
1597 if (This->buffer) free_input_buffer(This->buffer);
1598 readerinput_free(This, This->baseuri);
1599 readerinput_free(This, This);
1600 if (imalloc) IMalloc_Release(imalloc);
1606 static const struct IUnknownVtbl xmlreaderinput_vtbl =
1608 xmlreaderinput_QueryInterface,
1609 xmlreaderinput_AddRef,
1610 xmlreaderinput_Release
1613 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
1618 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
1620 if (!IsEqualGUID(riid, &IID_IXmlReader))
1622 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
1627 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
1629 reader = heap_alloc(sizeof(*reader));
1630 if(!reader) return E_OUTOFMEMORY;
1632 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
1634 reader->input = NULL;
1635 reader->state = XmlReadState_Closed;
1636 reader->instate = XmlReadInState_Initial;
1637 reader->dtdmode = DtdProcessing_Prohibit;
1638 reader->line = reader->pos = 0;
1639 reader->imalloc = imalloc;
1640 if (imalloc) IMalloc_AddRef(imalloc);
1641 reader->nodetype = XmlNodeType_None;
1642 list_init(&reader->attrs);
1643 reader->attr_count = 0;
1644 reader->attr = NULL;
1646 for (i = 0; i < StringValue_Last; i++)
1647 reader->strvalues[i] = strval_empty;
1649 *obj = &reader->IXmlReader_iface;
1651 TRACE("returning iface %p\n", *obj);
1656 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
1661 IXmlReaderInput **ppInput)
1663 xmlreaderinput *readerinput;
1666 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
1667 hint, wine_dbgstr_w(base_uri), ppInput);
1669 if (!stream || !ppInput) return E_INVALIDARG;
1672 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
1674 readerinput = heap_alloc(sizeof(*readerinput));
1675 if(!readerinput) return E_OUTOFMEMORY;
1677 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinput_vtbl;
1678 readerinput->ref = 1;
1679 readerinput->imalloc = imalloc;
1680 readerinput->stream = NULL;
1681 if (imalloc) IMalloc_AddRef(imalloc);
1682 readerinput->encoding = parse_encoding_name(encoding, -1);
1683 readerinput->hint = hint;
1684 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
1686 hr = alloc_input_buffer(readerinput);
1689 readerinput_free(readerinput, readerinput);
1692 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
1694 *ppInput = &readerinput->IXmlReaderInput_iface;
1696 TRACE("returning iface %p\n", *ppInput);