2 * Copyright 2010 Jacek Caban for CodeWeavers
3 * Copyright 2010 Thomas Mullaly
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 #include "urlmon_main.h"
21 #include "wine/debug.h"
23 #define NO_SHLWAPI_REG
26 #define UINT_MAX 0xffffffff
27 #define USHORT_MAX 0xffff
29 WINE_DEFAULT_DEBUG_CHANNEL(urlmon);
32 const IUriVtbl *lpIUriVtbl;
37 /* Information about the canonicalized URI's buffer. */
44 URL_SCHEME scheme_type;
52 Uri_HOST_TYPE host_type;
64 const IUriBuilderVtbl *lpIUriBuilderVtbl;
74 /* IPv6 addresses can hold up to 8 h16 components. */
78 /* An IPv6 can have 1 elision ("::"). */
81 /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */
94 BOOL has_implicit_scheme;
100 URL_SCHEME scheme_type;
102 const WCHAR *userinfo;
108 Uri_HOST_TYPE host_type;
111 ipv6_address ipv6_address;
118 static const CHAR hexDigits[] = "0123456789ABCDEF";
120 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
121 static const struct {
123 WCHAR scheme_name[16];
124 } recognized_schemes[] = {
125 {URL_SCHEME_FTP, {'f','t','p',0}},
126 {URL_SCHEME_HTTP, {'h','t','t','p',0}},
127 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}},
128 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}},
129 {URL_SCHEME_NEWS, {'n','e','w','s',0}},
130 {URL_SCHEME_NNTP, {'n','n','t','p',0}},
131 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}},
132 {URL_SCHEME_WAIS, {'w','a','i','s',0}},
133 {URL_SCHEME_FILE, {'f','i','l','e',0}},
134 {URL_SCHEME_MK, {'m','k',0}},
135 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}},
136 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}},
137 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}},
138 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}},
139 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}},
140 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}},
141 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}},
142 {URL_SCHEME_RES, {'r','e','s',0}},
143 {URL_SCHEME_MSSHELLROOTED, {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}},
144 {URL_SCHEME_MSSHELLIDLIST, {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}},
145 {URL_SCHEME_MSHELP, {'h','c','p',0}},
146 {URL_SCHEME_WILDCARD, {'*',0}}
149 /* List of default ports Windows recognizes. */
150 static const struct {
153 } default_ports[] = {
154 {URL_SCHEME_FTP, 21},
155 {URL_SCHEME_HTTP, 80},
156 {URL_SCHEME_GOPHER, 70},
157 {URL_SCHEME_NNTP, 119},
158 {URL_SCHEME_TELNET, 23},
159 {URL_SCHEME_WAIS, 210},
160 {URL_SCHEME_HTTPS, 443},
163 /* List of 3 character top level domain names Windows seems to recognize.
164 * There might be more, but, these are the only ones I've found so far.
166 static const struct {
168 } recognized_tlds[] = {
178 static inline BOOL is_alpha(WCHAR val) {
179 return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z'));
182 static inline BOOL is_num(WCHAR val) {
183 return (val >= '0' && val <= '9');
186 /* A URI is implicitly a file path if it begins with
187 * a drive letter (eg X:) or starts with "\\" (UNC path).
189 static inline BOOL is_implicit_file_path(const WCHAR *str) {
190 if(is_alpha(str[0]) && str[1] == ':')
192 else if(str[0] == '\\' && str[1] == '\\')
198 /* Checks if the URI is a hierarchical URI. A hierarchical
199 * URI is one that has "//" after the scheme.
201 static BOOL check_hierarchical(const WCHAR **ptr) {
202 const WCHAR *start = *ptr;
217 /* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */
218 static inline BOOL is_unreserved(WCHAR val) {
219 return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
220 val == '_' || val == '~');
223 /* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
224 * / "*" / "+" / "," / ";" / "="
226 static inline BOOL is_subdelim(WCHAR val) {
227 return (val == '!' || val == '$' || val == '&' ||
228 val == '\'' || val == '(' || val == ')' ||
229 val == '*' || val == '+' || val == ',' ||
230 val == ';' || val == '=');
233 /* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
234 static inline BOOL is_gendelim(WCHAR val) {
235 return (val == ':' || val == '/' || val == '?' ||
236 val == '#' || val == '[' || val == ']' ||
240 /* Characters that delimit the end of the authority
241 * section of a URI. Sometimes a '\\' is considered
242 * an authority delimeter.
244 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
245 return (val == '#' || val == '/' || val == '?' ||
246 val == '\0' || (acceptSlash && val == '\\'));
249 /* reserved = gen-delims / sub-delims */
250 static inline BOOL is_reserved(WCHAR val) {
251 return (is_subdelim(val) || is_gendelim(val));
254 static inline BOOL is_hexdigit(WCHAR val) {
255 return ((val >= 'a' && val <= 'f') ||
256 (val >= 'A' && val <= 'F') ||
257 (val >= '0' && val <= '9'));
260 /* Computes the size of the given IPv6 address.
261 * Each h16 component is 16bits, if there is an IPv4 address, it's
262 * 32bits. If there's an elision it can be 16bits to 128bits, depending
263 * on the number of other components.
265 * Modeled after google-url's CheckIPv6ComponentsSize function
267 static void compute_ipv6_comps_size(ipv6_address *address) {
268 address->components_size = address->h16_count * 2;
271 /* IPv4 address is 4 bytes. */
272 address->components_size += 4;
274 if(address->elision) {
275 /* An elision can be anywhere from 2 bytes up to 16 bytes.
276 * It size depends on the size of the h16 and IPv4 components.
278 address->elision_size = 16 - address->components_size;
279 if(address->elision_size < 2)
280 address->elision_size = 2;
282 address->elision_size = 0;
285 /* Taken from dlls/jscript/lex.c */
286 static int hex_to_int(WCHAR val) {
287 if(val >= '0' && val <= '9')
289 else if(val >= 'a' && val <= 'f')
290 return val - 'a' + 10;
291 else if(val >= 'A' && val <= 'F')
292 return val - 'A' + 10;
297 /* Helper function for converting a percent encoded string
298 * representation of a WCHAR value into its actual WCHAR value. If
299 * the two characters following the '%' aren't valid hex values then
300 * this function returns the NULL character.
303 * "%2E" will result in '.' being returned by this function.
305 static WCHAR decode_pct_val(const WCHAR *ptr) {
308 if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) {
309 INT a = hex_to_int(*(ptr + 1));
310 INT b = hex_to_int(*(ptr + 2));
319 /* Helper function for percent encoding a given character
320 * and storing the encoded value into a given buffer (dest).
322 * It's up to the calling function to ensure that there is
323 * at least enough space in 'dest' for the percent encoded
324 * value to be stored (so dest + 3 spaces available).
326 static inline void pct_encode_val(WCHAR val, WCHAR *dest) {
328 dest[1] = hexDigits[(val >> 4) & 0xf];
329 dest[2] = hexDigits[val & 0xf];
332 /* Scans the range of characters [str, end] and returns the last occurence
333 * of 'ch' or returns NULL.
335 static const WCHAR *str_last_of(const WCHAR *str, const WCHAR *end, WCHAR ch) {
336 const WCHAR *ptr = end;
347 /* Attempts to parse the domain name from the host.
349 * This function also includes the Top-level Domain (TLD) name
350 * of the host when it tries to find the domain name. If it finds
351 * a valid domain name it will assign 'domain_start' the offset
352 * into 'host' where the domain name starts.
354 * It's implied that if a domain name its range is implied to be
355 * [host+domain_start, host+host_len).
357 static void find_domain_name(const WCHAR *host, DWORD host_len,
359 const WCHAR *last_tld, *sec_last_tld, *end;
361 end = host+host_len-1;
365 /* There has to be at least enough room for a '.' followed by a
366 * 3 character TLD for a domain to even exist in the host name.
371 last_tld = str_last_of(host, end, '.');
373 /* http://hostname -> has no domain name. */
376 sec_last_tld = str_last_of(host, last_tld-1, '.');
378 /* If the '.' is at the beginning of the host there
379 * has to be at least 3 characters in the TLD for it
381 * Ex: .com -> .com as the domain name.
382 * .co -> has no domain name.
384 if(last_tld-host == 0) {
385 if(end-(last_tld-1) < 3)
387 } else if(last_tld-host == 3) {
390 /* If there's three characters in front of last_tld and
391 * they are on the list of recognized TLDs, then this
392 * host doesn't have a domain (since the host only contains
394 * Ex: edu.uk -> has no domain name.
395 * foo.uk -> foo.uk as the domain name.
397 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
398 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3))
401 } else if(last_tld-host < 3)
402 /* Anything less then 3 characters is considered part
404 * Ex: ak.uk -> Has no domain name.
408 /* Otherwise the domain name is the whole host name. */
410 } else if(end+1-last_tld > 3) {
411 /* If the last_tld has more then 3 characters then it's automatically
412 * considered the TLD of the domain name.
413 * Ex: www.winehq.org.uk.test -> uk.test as the domain name.
415 *domain_start = (sec_last_tld+1)-host;
416 } else if(last_tld - (sec_last_tld+1) < 4) {
418 /* If the sec_last_tld is 3 characters long it HAS to be on the list of
419 * recognized to still be considered part of the TLD name, otherwise
420 * its considered the domain name.
421 * Ex: www.google.com.uk -> google.com.uk as the domain name.
422 * www.google.foo.uk -> foo.uk as the domain name.
424 if(last_tld - (sec_last_tld+1) == 3) {
425 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
426 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) {
427 const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
432 *domain_start = (domain+1) - host;
433 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
434 (host+host_len)-(host+*domain_start)));
439 *domain_start = (sec_last_tld+1)-host;
441 /* Since the sec_last_tld is less then 3 characters it's considered
443 * Ex: www.google.fo.uk -> google.fo.uk as the domain name.
445 const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
450 *domain_start = (domain+1) - host;
453 /* The second to last TLD has more then 3 characters making it
455 * Ex: www.google.test.us -> test.us as the domain name.
457 *domain_start = (sec_last_tld+1)-host;
460 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
461 (host+host_len)-(host+*domain_start)));
464 /* Computes the location where the elision should occur in the IPv6
465 * address using the numerical values of each component stored in
466 * 'values'. If the address shouldn't contain an elision then 'index'
467 * is assigned -1 as it's value. Otherwise 'index' will contain the
468 * starting index (into values) where the elision should be, and 'count'
469 * will contain the number of cells the elision covers.
472 * Windows will expand an elision if the elision only represents 1 h16
473 * component of the URI.
475 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
477 * If the IPv6 address contains an IPv4 address, the IPv4 address is also
478 * considered for being included as part of an elision if all it's components
481 * Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::]
483 static void compute_elision_location(const ipv6_address *address, const USHORT values[8],
484 INT *index, DWORD *count) {
485 DWORD i, max_len, cur_len;
486 INT max_index, cur_index;
488 max_len = cur_len = 0;
489 max_index = cur_index = -1;
490 for(i = 0; i < 8; ++i) {
491 BOOL check_ipv4 = (address->ipv4 && i == 6);
492 BOOL is_end = (check_ipv4 || i == 7);
495 /* Check if the IPv4 address contains only zeros. */
496 if(values[i] == 0 && values[i+1] == 0) {
503 } else if(values[i] == 0) {
510 if(is_end || values[i] != 0) {
511 /* We only consider it for an elision if it's
512 * more then 1 component long.
514 if(cur_len > 1 && cur_len > max_len) {
515 /* Found the new elision location. */
517 max_index = cur_index;
520 /* Reset the current range for the next range of zeros. */
530 /* Converts the specified IPv4 address into an uint value.
532 * This function assumes that the IPv4 address has already been validated.
534 static UINT ipv4toui(const WCHAR *ip, DWORD len) {
536 DWORD comp_value = 0;
539 for(ptr = ip; ptr < ip+len; ++ptr) {
545 comp_value = comp_value*10 + (*ptr-'0');
554 /* Converts an IPv4 address in numerical form into it's fully qualified
555 * string form. This function returns the number of characters written
556 * to 'dest'. If 'dest' is NULL this function will return the number of
557 * characters that would have been written.
559 * It's up to the caller to ensure there's enough space in 'dest' for the
562 static DWORD ui2ipv4(WCHAR *dest, UINT address) {
563 static const WCHAR formatW[] =
564 {'%','u','.','%','u','.','%','u','.','%','u',0};
568 digits[0] = (address >> 24) & 0xff;
569 digits[1] = (address >> 16) & 0xff;
570 digits[2] = (address >> 8) & 0xff;
571 digits[3] = address & 0xff;
575 ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]);
577 ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]);
582 /* Converts an h16 component (from an IPv6 address) into it's
585 * This function assumes that the h16 component has already been validated.
587 static USHORT h16tous(h16 component) {
591 for(i = 0; i < component.len; ++i) {
593 ret += hex_to_int(component.str[i]);
599 /* Converts an IPv6 address into it's 128 bits (16 bytes) numerical value.
601 * This function assumes that the ipv6_address has already been validated.
603 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) {
604 DWORD i, cur_component = 0;
605 BOOL already_passed_elision = FALSE;
607 for(i = 0; i < address->h16_count; ++i) {
608 if(address->elision) {
609 if(address->components[i].str > address->elision && !already_passed_elision) {
610 /* Means we just passed the elision and need to add it's values to
611 * 'number' before we do anything else.
614 for(j = 0; j < address->elision_size; j+=2)
615 number[cur_component++] = 0;
617 already_passed_elision = TRUE;
621 number[cur_component++] = h16tous(address->components[i]);
624 /* Case when the elision appears after the h16 components. */
625 if(!already_passed_elision && address->elision) {
626 for(i = 0; i < address->elision_size; i+=2)
627 number[cur_component++] = 0;
628 already_passed_elision = TRUE;
632 UINT value = ipv4toui(address->ipv4, address->ipv4_len);
634 if(cur_component != 6) {
635 ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component);
639 number[cur_component++] = (value >> 16) & 0xffff;
640 number[cur_component] = value & 0xffff;
646 /* Checks if the characters pointed to by 'ptr' are
647 * a percent encoded data octet.
649 * pct-encoded = "%" HEXDIG HEXDIG
651 static BOOL check_pct_encoded(const WCHAR **ptr) {
652 const WCHAR *start = *ptr;
658 if(!is_hexdigit(**ptr)) {
664 if(!is_hexdigit(**ptr)) {
673 /* dec-octet = DIGIT ; 0-9
674 * / %x31-39 DIGIT ; 10-99
675 * / "1" 2DIGIT ; 100-199
676 * / "2" %x30-34 DIGIT ; 200-249
677 * / "25" %x30-35 ; 250-255
679 static BOOL check_dec_octet(const WCHAR **ptr) {
680 const WCHAR *c1, *c2, *c3;
683 /* A dec-octet must be at least 1 digit long. */
684 if(*c1 < '0' || *c1 > '9')
690 /* Since the 1 digit requirment was meet, it doesn't
691 * matter if this is a DIGIT value, it's considered a
694 if(*c2 < '0' || *c2 > '9')
700 /* Same explanation as above. */
701 if(*c3 < '0' || *c3 > '9')
704 /* Anything > 255 isn't a valid IP dec-octet. */
705 if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') {
714 /* Checks if there is an implicit IPv4 address in the host component of the URI.
715 * The max value of an implicit IPv4 address is UINT_MAX.
718 * "234567" would be considered an implicit IPv4 address.
720 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) {
721 const WCHAR *start = *ptr;
725 while(is_num(**ptr)) {
726 ret = ret*10 + (**ptr - '0');
742 /* Checks if the string contains an IPv4 address.
744 * This function has a strict mode or a non-strict mode of operation
745 * When 'strict' is set to FALSE this function will return TRUE if
746 * the string contains at least 'dec-octet "." dec-octet' since partial
747 * IPv4 addresses will be normalized out into full IPv4 addresses. When
748 * 'strict' is set this function expects there to be a full IPv4 address.
750 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
752 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) {
753 const WCHAR *start = *ptr;
755 if(!check_dec_octet(ptr)) {
766 if(!check_dec_octet(ptr)) {
780 if(!check_dec_octet(ptr)) {
794 if(!check_dec_octet(ptr)) {
799 /* Found a four digit ip address. */
802 /* Tries to parse the scheme name of the URI.
804 * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
805 * NOTE: Windows accepts a number as the first character of a scheme.
807 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data) {
808 const WCHAR *start = *ptr;
811 data->scheme_len = 0;
814 if(**ptr == '*' && *ptr == start) {
815 /* Might have found a wildcard scheme. If it is the next
816 * char has to be a ':' for it to be a valid URI
820 } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' &&
821 **ptr != '-' && **ptr != '.')
830 /* Schemes must end with a ':' */
836 data->scheme = start;
837 data->scheme_len = *ptr - start;
843 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores
844 * the deduced URL_SCHEME in data->scheme_type.
846 static BOOL parse_scheme_type(parse_data *data) {
847 /* If there's scheme data then see if it's a recognized scheme. */
848 if(data->scheme && data->scheme_len) {
851 for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) {
852 if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) {
853 /* Has to be a case insensitive compare. */
854 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) {
855 data->scheme_type = recognized_schemes[i].scheme;
861 /* If we get here it means it's not a recognized scheme. */
862 data->scheme_type = URL_SCHEME_UNKNOWN;
864 } else if(data->is_relative) {
865 /* Relative URI's have no scheme. */
866 data->scheme_type = URL_SCHEME_UNKNOWN;
869 /* Should never reach here! what happened... */
870 FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri));
875 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't
876 * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type
877 * using the flags specified in 'flags' (if any). Flags that affect how this function
878 * operates are the Uri_CREATE_ALLOW_* flags.
880 * All parsed/deduced information will be stored in 'data' when the function returns.
882 * Returns TRUE if it was able to successfully parse the information.
884 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
885 static const WCHAR fileW[] = {'f','i','l','e',0};
886 static const WCHAR wildcardW[] = {'*',0};
888 /* First check to see if the uri could implicitly be a file path. */
889 if(is_implicit_file_path(*ptr)) {
890 if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) {
891 data->scheme = fileW;
892 data->scheme_len = lstrlenW(fileW);
893 data->has_implicit_scheme = TRUE;
895 TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags);
897 /* Window's does not consider anything that can implicitly be a file
898 * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set...
900 TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n",
904 } else if(!parse_scheme_name(ptr, data)) {
905 /* No Scheme was found, this means it could be:
906 * a) an implicit Wildcard scheme
910 if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) {
911 data->scheme = wildcardW;
912 data->scheme_len = lstrlenW(wildcardW);
913 data->has_implicit_scheme = TRUE;
915 TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags);
916 } else if (flags & Uri_CREATE_ALLOW_RELATIVE) {
917 data->is_relative = TRUE;
918 TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags);
920 TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags);
925 if(!data->is_relative)
926 TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags,
927 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
929 if(!parse_scheme_type(data))
932 TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type);
936 /* Parses the userinfo part of the URI (if it exists). The userinfo field of
937 * a URI can consist of "username:password@", or just "username@".
940 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
943 * 1) If there is more than one ':' in the userinfo part of the URI Windows
944 * uses the first occurence of ':' to delimit the username and password
948 * ftp://user:pass:word@winehq.org
950 * Would yield, "user" as the username and "pass:word" as the password.
952 * 2) Windows allows any character to appear in the "userinfo" part of
953 * a URI, as long as it's not an authority delimeter character set.
955 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
956 data->userinfo = *ptr;
957 data->userinfo_split = -1;
959 while(**ptr != '@') {
960 if(**ptr == ':' && data->userinfo_split == -1)
961 data->userinfo_split = *ptr - data->userinfo;
962 else if(**ptr == '%') {
963 /* If it's a known scheme type, it has to be a valid percent
966 if(!check_pct_encoded(ptr)) {
967 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
968 *ptr = data->userinfo;
969 data->userinfo = NULL;
970 data->userinfo_split = -1;
972 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
977 } else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN))
984 *ptr = data->userinfo;
985 data->userinfo = NULL;
986 data->userinfo_split = -1;
988 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
992 data->userinfo_len = *ptr - data->userinfo;
993 TRACE("(%p %p %x): Found userinfo=%s userinfo_len=%d split=%d.\n", ptr, data, flags,
994 debugstr_wn(data->userinfo, data->userinfo_len), data->userinfo_len, data->userinfo_split);
998 /* Attempts to parse a port from the URI.
1001 * Windows seems to have a cap on what the maximum value
1002 * for a port can be. The max value is USHORT_MAX.
1006 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) {
1010 while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1011 if(!is_num(**ptr)) {
1017 port = port*10 + (**ptr-'0');
1019 if(port > USHORT_MAX) {
1028 data->port_value = port;
1029 data->port_len = *ptr - data->port;
1031 TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags,
1032 debugstr_wn(data->port, data->port_len), data->port_len, data->port_value);
1036 /* Attempts to parse a IPv4 address from the URI.
1039 * Window's normalizes IPv4 addresses, This means there's three
1040 * possibilities for the URI to contain an IPv4 address.
1041 * 1) A well formed address (ex. 192.2.2.2).
1042 * 2) A partially formed address. For example "192.0" would
1043 * normalize to "192.0.0.0" during canonicalization.
1044 * 3) An implicit IPv4 address. For example "256" would
1045 * normalize to "0.0.1.0" during canonicalization. Also
1046 * note that the maximum value for an implicit IP address
1047 * is UINT_MAX, if the value in the URI exceeds this then
1048 * it is not considered an IPv4 address.
1050 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1051 const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN;
1054 if(!check_ipv4address(ptr, FALSE)) {
1055 if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) {
1056 TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n",
1062 data->has_implicit_ip = TRUE;
1065 /* Check if what we found is the only part of the host name (if it isn't
1066 * we don't have an IPv4 address).
1070 if(!parse_port(ptr, data, flags)) {
1075 } else if(!is_auth_delim(**ptr, !is_unknown)) {
1076 /* Found more data which belongs the host, so this isn't an IPv4. */
1079 data->has_implicit_ip = FALSE;
1083 data->host_len = *ptr - data->host;
1084 data->host_type = Uri_HOST_IPV4;
1086 TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n",
1087 ptr, data, flags, debugstr_wn(data->host, data->host_len),
1088 data->host_len, data->host_type);
1092 /* Attempts to parse the reg-name from the URI.
1094 * Because of the way Windows handles ':' this function also
1095 * handles parsing the port.
1097 * reg-name = *( unreserved / pct-encoded / sub-delims )
1100 * Windows allows everything, but, the characters in "auth_delims" and ':'
1101 * to appear in a reg-name, unless it's an unknown scheme type then ':' is
1102 * allowed to appear (even if a valid port isn't after it).
1104 * Windows doesn't like host names which start with '[' and end with ']'
1105 * and don't contain a valid IP literal address in between them.
1107 * On Windows if an '[' is encountered in the host name the ':' no longer
1108 * counts as a delimiter until you reach the next ']' or an "authority delimeter".
1110 * A reg-name CAN be empty.
1112 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags) {
1113 const BOOL has_start_bracket = **ptr == '[';
1114 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1115 BOOL inside_brackets = has_start_bracket;
1116 BOOL ignore_col = FALSE;
1118 /* We have to be careful with file schemes. */
1119 if(data->scheme_type == URL_SCHEME_FILE) {
1120 /* This is because an implicit file scheme could be "C:\\test" and it
1121 * would trick this function into thinking the host is "C", when after
1122 * canonicalization the host would end up being an empty string.
1124 if(is_alpha(**ptr) && *(*ptr+1) == ':') {
1125 /* Regular old drive paths don't have a host type (or host name). */
1126 data->host_type = Uri_HOST_UNKNOWN;
1130 } else if(**ptr == '\\' && *(*ptr+1) == '\\')
1131 /* Skip past the "\\" of a UNC path. */
1137 while(!is_auth_delim(**ptr, known_scheme)) {
1138 if(**ptr == ':' && !ignore_col) {
1139 /* We can ignore ':' if were inside brackets.*/
1140 if(!inside_brackets) {
1141 const WCHAR *tmp = (*ptr)++;
1143 /* Attempt to parse the port. */
1144 if(!parse_port(ptr, data, flags)) {
1145 /* Windows expects there to be a valid port for known scheme types. */
1146 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1149 TRACE("(%p %p %x): Expected valid port\n", ptr, data, flags);
1152 /* Windows gives up on trying to parse a port when it
1153 * encounters 1 invalid port.
1157 data->host_len = tmp - data->host;
1161 } else if(**ptr == '%' && known_scheme) {
1162 /* Has to be a legit % encoded value. */
1163 if(!check_pct_encoded(ptr)) {
1169 } else if(**ptr == ']')
1170 inside_brackets = FALSE;
1171 else if(**ptr == '[')
1172 inside_brackets = TRUE;
1177 if(has_start_bracket) {
1178 /* Make sure the last character of the host wasn't a ']'. */
1179 if(*(*ptr-1) == ']') {
1180 TRACE("(%p %p %x): Expected an IP literal inside of the host\n",
1188 /* Don't overwrite our length if we found a port earlier. */
1190 data->host_len = *ptr - data->host;
1192 /* If the host is empty, then it's an unknown host type. */
1193 if(data->host_len == 0)
1194 data->host_type = Uri_HOST_UNKNOWN;
1196 data->host_type = Uri_HOST_DNS;
1198 TRACE("(%p %p %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags,
1199 debugstr_wn(data->host, data->host_len), data->host_len);
1203 /* Attempts to parse an IPv6 address out of the URI.
1205 * IPv6address = 6( h16 ":" ) ls32
1206 * / "::" 5( h16 ":" ) ls32
1207 * / [ h16 ] "::" 4( h16 ":" ) ls32
1208 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1209 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1210 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1211 * / [ *4( h16 ":" ) h16 ] "::" ls32
1212 * / [ *5( h16 ":" ) h16 ] "::" h16
1213 * / [ *6( h16 ":" ) h16 ] "::"
1215 * ls32 = ( h16 ":" h16 ) / IPv4address
1216 * ; least-significant 32 bits of address.
1219 * ; 16 bits of address represented in hexadecimal.
1221 * Modeled after google-url's 'DoParseIPv6' function.
1223 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1224 const WCHAR *start, *cur_start;
1227 start = cur_start = *ptr;
1228 memset(&ip, 0, sizeof(ipv6_address));
1231 /* Check if we're on the last character of the host. */
1232 BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)
1235 BOOL is_split = (**ptr == ':');
1236 BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':');
1238 /* Check if we're at the end of of the a component, or
1239 * if we're at the end of the IPv6 address.
1241 if(is_split || is_end) {
1244 cur_len = *ptr - cur_start;
1246 /* h16 can't have a length > 4. */
1250 TRACE("(%p %p %x): h16 component to long.\n",
1256 /* An h16 component can't have the length of 0 unless
1257 * the elision is at the beginning of the address, or
1258 * at the end of the address.
1260 if(!((*ptr == start && is_elision) ||
1261 (is_end && (*ptr-2) == ip.elision))) {
1263 TRACE("(%p %p %x): IPv6 component can not have a length of 0.\n",
1270 /* An IPv6 address can have no more than 8 h16 components. */
1271 if(ip.h16_count >= 8) {
1273 TRACE("(%p %p %x): Not a IPv6 address, to many h16 components.\n",
1278 ip.components[ip.h16_count].str = cur_start;
1279 ip.components[ip.h16_count].len = cur_len;
1281 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n",
1282 ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len,
1292 /* A IPv6 address can only have 1 elision ('::'). */
1296 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n",
1308 if(!check_ipv4address(ptr, TRUE)) {
1309 if(!is_hexdigit(**ptr)) {
1310 /* Not a valid character for an IPv6 address. */
1315 /* Found an IPv4 address. */
1316 ip.ipv4 = cur_start;
1317 ip.ipv4_len = *ptr - cur_start;
1319 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n",
1320 ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len),
1323 /* IPv4 addresses can only appear at the end of a IPv6. */
1329 compute_ipv6_comps_size(&ip);
1331 /* Make sure the IPv6 address adds up to 16 bytes. */
1332 if(ip.components_size + ip.elision_size != 16) {
1334 TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n",
1339 if(ip.elision_size == 2) {
1340 /* For some reason on Windows if an elision that represents
1341 * only 1 h16 component is encountered at the very begin or
1342 * end of an IPv6 address, Windows does not consider it a
1343 * valid IPv6 address.
1345 * Ex: [::2:3:4:5:6:7] is not valid, even though the sum
1346 * of all the components == 128bits.
1348 if(ip.elision < ip.components[0].str ||
1349 ip.elision > ip.components[ip.h16_count-1].str) {
1351 TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n",
1357 data->host_type = Uri_HOST_IPV6;
1358 data->has_ipv6 = TRUE;
1359 data->ipv6_address = ip;
1361 TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n",
1362 ptr, data, flags, debugstr_wn(start, *ptr-start),
1367 /* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */
1368 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) {
1369 const WCHAR *start = *ptr;
1371 /* IPvFuture has to start with a 'v' or 'V'. */
1372 if(**ptr != 'v' && **ptr != 'V')
1375 /* Following the v their must be atleast 1 hexdigit. */
1377 if(!is_hexdigit(**ptr)) {
1383 while(is_hexdigit(**ptr))
1386 /* End of the hexdigit sequence must be a '.' */
1393 if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') {
1399 while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':')
1402 data->host_type = Uri_HOST_UNKNOWN;
1404 TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags,
1405 debugstr_wn(start, *ptr-start), *ptr-start);
1410 /* IP-literal = "[" ( IPv6address / IPvFuture ) "]" */
1411 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags) {
1420 if(!parse_ipv6address(ptr, data, flags)) {
1421 if(!parse_ipvfuture(ptr, data, flags)) {
1437 /* If a valid port is not found, then let it trickle down to
1440 if(!parse_port(ptr, data, flags)) {
1446 data->host_len = *ptr - data->host;
1451 /* Parses the host information from the URI.
1453 * host = IP-literal / IPv4address / reg-name
1455 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags) {
1456 if(!parse_ip_literal(ptr, data, flags)) {
1457 if(!parse_ipv4address(ptr, data, flags)) {
1458 if(!parse_reg_name(ptr, data, flags)) {
1459 TRACE("(%p %p %x): Malformed URI, Unknown host type.\n",
1469 /* Parses the authority information from the URI.
1471 * authority = [ userinfo "@" ] host [ ":" port ]
1473 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
1474 parse_userinfo(ptr, data, flags);
1476 /* Parsing the port will happen during one of the host parsing
1477 * routines (if the URI has a port).
1479 if(!parse_host(ptr, data, flags))
1485 /* Determines how the URI should be parsed after the scheme information.
1487 * If the scheme is followed, by "//" then, it is treated as an hierarchical URI
1488 * which then the authority and path information will be parsed out. Otherwise, the
1489 * URI will be treated as an opaque URI which the authority information is not parsed
1492 * RFC 3896 definition of hier-part:
1494 * hier-part = "//" authority path-abempty
1499 * MSDN opaque URI definition:
1500 * scheme ":" path [ "#" fragment ]
1503 * If the URI is of an unknown scheme type and has a "//" following the scheme then it
1504 * is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is
1505 * set then it is considered an opaque URI reguardless of what follows the scheme information
1506 * (per MSDN documentation).
1508 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
1509 /* Checks if the authority information needs to be parsed.
1511 * Relative URI's aren't hierarchical URI's, but, they could trick
1512 * "check_hierarchical" into thinking it is, so we need to explicitly
1513 * make sure it's not relative. Also, if the URI is an implicit file
1514 * scheme it might not contain a "//", but, it's considered hierarchical
1515 * anyways. Wildcard Schemes are always considered hierarchical
1517 if(data->scheme_type == URL_SCHEME_WILDCARD ||
1518 data->scheme_type == URL_SCHEME_FILE ||
1519 (!data->is_relative && check_hierarchical(ptr))) {
1520 /* Only treat it as a hierarchical URI if the scheme_type is known or
1521 * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set.
1523 if(data->scheme_type != URL_SCHEME_UNKNOWN ||
1524 !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) {
1525 TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
1526 data->is_opaque = FALSE;
1528 /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
1529 if(!parse_authority(ptr, data, flags))
1536 /* If it reaches here, then the URI will be treated as an opaque
1540 TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags);
1542 data->is_opaque = TRUE;
1543 /* TODO: Handle opaque URI's, parse path. */
1547 /* Parses and validates the components of the specified by data->uri
1548 * and stores the information it parses into 'data'.
1550 * Returns TRUE if it successfully parsed the URI. False otherwise.
1552 static BOOL parse_uri(parse_data *data, DWORD flags) {
1559 TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri));
1561 if(!parse_scheme(pptr, data, flags))
1564 if(!parse_hierpart(pptr, data, flags))
1567 TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
1571 /* Canonicalizes the userinfo of the URI represented by the parse_data.
1573 * Canonicalization of the userinfo is a simple process. If there are any percent
1574 * encoded characters that fall in the "unreserved" character set, they are decoded
1575 * to their actual value. If a character is not in the "unreserved" or "reserved" sets
1576 * then it is percent encoded. Other than that the characters are copied over without
1579 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
1582 uri->userinfo_start = uri->userinfo_split = -1;
1583 uri->userinfo_len = 0;
1586 /* URI doesn't have userinfo, so nothing to do here. */
1589 uri->userinfo_start = uri->canon_len;
1591 while(i < data->userinfo_len) {
1592 if(data->userinfo[i] == ':' && uri->userinfo_split == -1)
1593 /* Windows only considers the first ':' as the delimiter. */
1594 uri->userinfo_split = uri->canon_len - uri->userinfo_start;
1595 else if(data->userinfo[i] == '%') {
1596 /* Only decode % encoded values for known scheme types. */
1597 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1598 /* See if the value really needs decoded. */
1599 WCHAR val = decode_pct_val(data->userinfo + i);
1600 if(is_unreserved(val)) {
1602 uri->canon_uri[uri->canon_len] = val;
1606 /* Move pass the hex characters. */
1611 } else if(!is_reserved(data->userinfo[i]) && !is_unreserved(data->userinfo[i]) &&
1612 data->userinfo[i] != '\\') {
1613 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
1616 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
1618 pct_encode_val(data->userinfo[i], uri->canon_uri + uri->canon_len);
1620 uri->canon_len += 3;
1627 /* Nothing special, so just copy the character over. */
1628 uri->canon_uri[uri->canon_len] = data->userinfo[i];
1634 uri->userinfo_len = uri->canon_len - uri->userinfo_start;
1636 TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n",
1637 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len),
1638 uri->userinfo_split, uri->userinfo_len);
1640 /* Now insert the '@' after the userinfo. */
1642 uri->canon_uri[uri->canon_len] = '@';
1648 /* Attempts to canonicalize a reg_name.
1650 * Things that happen:
1651 * 1) If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is
1652 * lower cased. Unless it's an unknown scheme type, which case it's
1653 * no lower cased reguardless.
1655 * 2) Unreserved % encoded characters are decoded for known
1658 * 3) Forbidden characters are % encoded as long as
1659 * Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and
1660 * it isn't an unknown scheme type.
1662 * 4) If it's a file scheme and the host is "localhost" it's removed.
1664 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri,
1665 DWORD flags, BOOL computeOnly) {
1666 static const WCHAR localhostW[] =
1667 {'l','o','c','a','l','h','o','s','t',0};
1669 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1671 uri->host_start = uri->canon_len;
1673 if(data->scheme_type == URL_SCHEME_FILE &&
1674 data->host_len == lstrlenW(localhostW)) {
1675 if(!StrCmpNIW(data->host, localhostW, data->host_len)) {
1676 uri->host_start = -1;
1678 uri->host_type = Uri_HOST_UNKNOWN;
1683 for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) {
1684 if(*ptr == '%' && known_scheme) {
1685 WCHAR val = decode_pct_val(ptr);
1686 if(is_unreserved(val)) {
1687 /* If NO_CANONICALZE is not set, then windows lower cases the
1690 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) {
1692 uri->canon_uri[uri->canon_len] = tolowerW(val);
1695 uri->canon_uri[uri->canon_len] = val;
1699 /* Skip past the % encoded character. */
1703 /* Just copy the % over. */
1705 uri->canon_uri[uri->canon_len] = *ptr;
1708 } else if(*ptr == '\\') {
1709 /* Only unknown scheme types could have made it here with a '\\' in the host name. */
1711 uri->canon_uri[uri->canon_len] = *ptr;
1713 } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
1714 !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) {
1716 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
1718 /* The percent encoded value gets lower cased also. */
1719 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
1720 uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]);
1721 uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]);
1725 uri->canon_len += 3;
1728 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme)
1729 uri->canon_uri[uri->canon_len] = tolowerW(*ptr);
1731 uri->canon_uri[uri->canon_len] = *ptr;
1738 uri->host_len = uri->canon_len - uri->host_start;
1741 TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags,
1742 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
1746 find_domain_name(uri->canon_uri+uri->host_start, uri->host_len,
1747 &(uri->domain_offset));
1752 /* Attempts to canonicalize an implicit IPv4 address. */
1753 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
1754 uri->host_start = uri->canon_len;
1756 TRACE("%u\n", data->implicit_ipv4);
1757 /* For unknown scheme types Window's doesn't convert
1758 * the value into an IP address, but, it still considers
1759 * it an IPv4 address.
1761 if(data->scheme_type == URL_SCHEME_UNKNOWN) {
1763 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
1764 uri->canon_len += data->host_len;
1767 uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4);
1769 uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4);
1772 uri->host_len = uri->canon_len - uri->host_start;
1773 uri->host_type = Uri_HOST_IPV4;
1776 TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n",
1777 data, uri, flags, computeOnly,
1778 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
1784 /* Attempts to canonicalize an IPv4 address.
1786 * If the parse_data represents a URI that has an implicit IPv4 address
1787 * (ex. http://256/, this function will convert 256 into 0.0.1.0). If
1788 * the implicit IP address exceeds the value of UINT_MAX (maximum value
1789 * for an IPv4 address) it's canonicalized as if were a reg-name.
1791 * If the parse_data contains a partial or full IPv4 address it normalizes it.
1792 * A partial IPv4 address is something like "192.0" and would be normalized to
1793 * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would
1794 * be normalized to "192.2.1.3".
1797 * Window's ONLY normalizes IPv4 address for known scheme types (one that isn't
1798 * URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from
1799 * the original URI into the canonicalized URI, but, it still recognizes URI's
1800 * host type as HOST_IPV4.
1802 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
1803 if(data->has_implicit_ip)
1804 return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly);
1806 uri->host_start = uri->canon_len;
1808 /* Windows only normalizes for known scheme types. */
1809 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1810 /* parse_data contains a partial or full IPv4 address, so normalize it. */
1811 DWORD i, octetDigitCount = 0, octetCount = 0;
1812 BOOL octetHasDigit = FALSE;
1814 for(i = 0; i < data->host_len; ++i) {
1815 if(data->host[i] == '0' && !octetHasDigit) {
1816 /* Can ignore leading zeros if:
1817 * 1) It isn't the last digit of the octet.
1818 * 2) i+1 != data->host_len
1821 if(octetDigitCount == 2 ||
1822 i+1 == data->host_len ||
1823 data->host[i+1] == '.') {
1825 uri->canon_uri[uri->canon_len] = data->host[i];
1827 TRACE("Adding zero\n");
1829 } else if(data->host[i] == '.') {
1831 uri->canon_uri[uri->canon_len] = data->host[i];
1834 octetDigitCount = 0;
1835 octetHasDigit = FALSE;
1839 uri->canon_uri[uri->canon_len] = data->host[i];
1843 octetHasDigit = TRUE;
1847 /* Make sure the canonicalized IP address has 4 dec-octets.
1848 * If doesn't add "0" ones until there is 4;
1850 for( ; octetCount < 3; ++octetCount) {
1852 uri->canon_uri[uri->canon_len] = '.';
1853 uri->canon_uri[uri->canon_len+1] = '0';
1856 uri->canon_len += 2;
1859 /* Windows doesn't normalize addresses in unknown schemes. */
1861 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
1862 uri->canon_len += data->host_len;
1865 uri->host_len = uri->canon_len - uri->host_start;
1867 TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n",
1868 data, uri, flags, computeOnly,
1869 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
1876 /* Attempts to canonicalize the IPv6 address of the URI.
1878 * Multiple things happen during the canonicalization of an IPv6 address:
1879 * 1) Any leading zero's in an h16 component are removed.
1880 * Ex: [0001:0022::] -> [1:22::]
1882 * 2) The longest sequence of zero h16 components are compressed
1883 * into a "::" (elision). If there's a tie, the first is choosen.
1885 * Ex: [0:0:0:0:1:6:7:8] -> [::1:6:7:8]
1886 * [0:0:0:0:1:2::] -> [::1:2:0:0]
1887 * [0:0:1:2:0:0:7:8] -> [::1:2:0:0:7:8]
1889 * 3) If an IPv4 address is attached to the IPv6 address, it's
1891 * Ex: [::001.002.022.000] -> [::1.2.22.0]
1893 * 4) If an elision is present, but, only represents 1 h16 component
1896 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
1898 * 5) If the IPv6 address contains an IPv4 address and there exists
1899 * at least 1 non-zero h16 component the IPv4 address is converted
1900 * into two h16 components, otherwise it's normalized and kept as is.
1902 * Ex: [::192.200.003.4] -> [::192.200.3.4]
1903 * [ffff::192.200.003.4] -> [ffff::c0c8:3041]
1906 * For unknown scheme types Windows simply copies the address over without any
1909 * IPv4 address can be included in an elision if all its components are 0's.
1911 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri,
1912 DWORD flags, BOOL computeOnly) {
1913 uri->host_start = uri->canon_len;
1915 if(data->scheme_type == URL_SCHEME_UNKNOWN) {
1917 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
1918 uri->canon_len += data->host_len;
1922 DWORD i, elision_len;
1924 if(!ipv6_to_number(&(data->ipv6_address), values)) {
1925 TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n",
1926 data, uri, flags, computeOnly);
1931 uri->canon_uri[uri->canon_len] = '[';
1934 /* Find where the elision should occur (if any). */
1935 compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len);
1937 TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags,
1938 computeOnly, elision_start, elision_len);
1940 for(i = 0; i < 8; ++i) {
1941 BOOL in_elision = (elision_start > -1 && i >= elision_start &&
1942 i < elision_start+elision_len);
1943 BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision &&
1944 data->ipv6_address.h16_count == 0);
1946 if(i == elision_start) {
1948 uri->canon_uri[uri->canon_len] = ':';
1949 uri->canon_uri[uri->canon_len+1] = ':';
1951 uri->canon_len += 2;
1954 /* We can ignore the current component if we're in the elision. */
1958 /* We only add a ':' if we're not at i == 0, or when we're at
1959 * the very end of elision range since the ':' colon was handled
1960 * earlier. Otherwise we would end up with ":::" after elision.
1962 if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) {
1964 uri->canon_uri[uri->canon_len] = ':';
1972 /* Combine the two parts of the IPv4 address values. */
1978 len = ui2ipv4(uri->canon_uri+uri->canon_len, val);
1980 len = ui2ipv4(NULL, val);
1982 uri->canon_len += len;
1985 /* Write a regular h16 component to the URI. */
1987 /* Short circuit for the trivial case. */
1988 if(values[i] == 0) {
1990 uri->canon_uri[uri->canon_len] = '0';
1993 static const WCHAR formatW[] = {'%','x',0};
1996 uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len,
1997 formatW, values[i]);
2000 uri->canon_len += sprintfW(tmp, formatW, values[i]);
2006 /* Add the closing ']'. */
2008 uri->canon_uri[uri->canon_len] = ']';
2012 uri->host_len = uri->canon_len - uri->host_start;
2015 TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags,
2016 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2022 /* Attempts to canonicalize the host of the URI (if any). */
2023 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2024 uri->host_start = -1;
2026 uri->domain_offset = -1;
2029 switch(data->host_type) {
2031 uri->host_type = Uri_HOST_DNS;
2032 if(!canonicalize_reg_name(data, uri, flags, computeOnly))
2037 uri->host_type = Uri_HOST_IPV4;
2038 if(!canonicalize_ipv4address(data, uri, flags, computeOnly))
2043 if(!canonicalize_ipv6address(data, uri, flags, computeOnly))
2046 uri->host_type = Uri_HOST_IPV6;
2048 case Uri_HOST_UNKNOWN:
2049 if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) {
2050 uri->host_start = uri->canon_len;
2052 /* Nothing happens to unknown host types. */
2054 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2055 uri->canon_len += data->host_len;
2056 uri->host_len = data->host_len;
2059 uri->host_type = Uri_HOST_UNKNOWN;
2062 FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data,
2063 uri, flags, computeOnly, data->host_type);
2071 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2072 BOOL has_default_port = FALSE;
2073 USHORT default_port = 0;
2076 uri->has_port = FALSE;
2078 /* Check if the scheme has a default port. */
2079 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
2080 if(default_ports[i].scheme == data->scheme_type) {
2081 has_default_port = TRUE;
2082 default_port = default_ports[i].port;
2087 if(data->port || has_default_port)
2088 uri->has_port = TRUE;
2091 * 1) Has a port which is the default port.
2092 * 2) Has a port (not the default).
2093 * 3) Doesn't have a port, but, scheme has a default port.
2096 if(has_default_port && data->port && data->port_value == default_port) {
2097 /* If it's the default port and this flag isn't set, don't do anything. */
2098 if(flags & Uri_CREATE_NO_CANONICALIZE) {
2099 /* Copy the original port over. */
2101 uri->canon_uri[uri->canon_len] = ':';
2102 memcpy(uri->canon_uri+uri->canon_len+1, data->port, data->port_len*sizeof(WCHAR));
2104 uri->canon_len += data->port_len+1;
2107 uri->port = default_port;
2108 } else if(data->port) {
2110 uri->canon_uri[uri->canon_len] = ':';
2113 if(flags & Uri_CREATE_NO_CANONICALIZE) {
2114 /* Copy the original over without changes. */
2116 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR));
2117 uri->canon_len += data->port_len;
2119 const WCHAR formatW[] = {'%','u',0};
2122 len = sprintfW(uri->canon_uri+uri->canon_len, formatW, data->port_value);
2125 len = sprintfW(tmp, formatW, data->port_value);
2127 uri->canon_len += len;
2130 uri->port = data->port_value;
2131 } else if(has_default_port)
2132 uri->port = default_port;
2137 /* Canonicalizes the authority of the URI represented by the parse_data. */
2138 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2139 uri->authority_start = uri->canon_len;
2140 uri->authority_len = 0;
2142 if(!canonicalize_userinfo(data, uri, flags, computeOnly))
2145 if(!canonicalize_host(data, uri, flags, computeOnly))
2148 if(!canonicalize_port(data, uri, flags, computeOnly))
2151 if(uri->host_start != -1)
2152 uri->authority_len = uri->canon_len - uri->authority_start;
2154 uri->authority_start = -1;
2159 /* Determines how the URI represented by the parse_data should be canonicalized.
2161 * Essentially, if the parse_data represents an hierarchical URI then it calls
2162 * canonicalize_authority and the canonicalization functions for the path. If the
2163 * URI is opaque it canonicalizes the path of the URI.
2165 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2166 if(!data->is_opaque) {
2167 /* "//" is only added for non-wildcard scheme types. */
2168 if(data->scheme_type != URL_SCHEME_WILDCARD) {
2170 INT pos = uri->canon_len;
2172 uri->canon_uri[pos] = '/';
2173 uri->canon_uri[pos+1] = '/';
2175 uri->canon_len += 2;
2178 if(!canonicalize_authority(data, uri, flags, computeOnly))
2181 /* TODO: Canonicalize the path of the URI. */
2184 /* Opaque URI's don't have an authority. */
2185 uri->userinfo_start = uri->userinfo_split = -1;
2186 uri->userinfo_len = 0;
2187 uri->host_start = -1;
2189 uri->host_type = Uri_HOST_UNKNOWN;
2190 uri->has_port = FALSE;
2191 uri->authority_start = -1;
2192 uri->authority_len = 0;
2193 uri->domain_offset = -1;
2199 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */
2200 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2201 uri->scheme_start = -1;
2202 uri->scheme_len = 0;
2205 /* The only type of URI that doesn't have to have a scheme is a relative
2208 if(!data->is_relative) {
2209 FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data,
2210 uri, flags, debugstr_w(data->uri));
2216 INT pos = uri->canon_len;
2218 for(i = 0; i < data->scheme_len; ++i) {
2219 /* Scheme name must be lower case after canonicalization. */
2220 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]);
2223 uri->canon_uri[i + pos] = ':';
2224 uri->scheme_start = pos;
2226 TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags,
2227 debugstr_wn(uri->canon_uri, uri->scheme_len), data->scheme_len);
2230 /* This happens in both computation modes. */
2231 uri->canon_len += data->scheme_len + 1;
2232 uri->scheme_len = data->scheme_len;
2237 /* Compute's what the length of the URI specified by the parse_data will be
2238 * after canonicalization occurs using the specified flags.
2240 * This function will return a non-zero value indicating the length of the canonicalized
2241 * URI, or -1 on error.
2243 static int compute_canonicalized_length(const parse_data *data, DWORD flags) {
2246 memset(&uri, 0, sizeof(Uri));
2248 TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags,
2249 debugstr_w(data->uri));
2251 if(!canonicalize_scheme(data, &uri, flags, TRUE)) {
2252 ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags);
2256 if(!canonicalize_hierpart(data, &uri, flags, TRUE)) {
2257 ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags);
2261 TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len);
2263 return uri.canon_len;
2266 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the
2267 * canonicalization succeededs it will store all the canonicalization information
2268 * in the pointer to the Uri.
2270 * To canonicalize a URI this function first computes what the length of the URI
2271 * specified by the parse_data will be. Once this is done it will then perfom the actual
2272 * canonicalization of the URI.
2274 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
2277 uri->canon_uri = NULL;
2278 len = uri->canon_size = uri->canon_len = 0;
2280 TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri));
2282 /* First try to compute the length of the URI. */
2283 len = compute_canonicalized_length(data, flags);
2285 ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags,
2286 debugstr_w(data->uri));
2287 return E_INVALIDARG;
2290 uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR));
2292 return E_OUTOFMEMORY;
2294 if(!canonicalize_scheme(data, uri, flags, FALSE)) {
2295 ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags);
2296 heap_free(uri->canon_uri);
2297 return E_INVALIDARG;
2299 uri->scheme_type = data->scheme_type;
2301 if(!canonicalize_hierpart(data, uri, flags, FALSE)) {
2302 ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags);
2303 heap_free(uri->canon_uri);
2304 return E_INVALIDARG;
2307 uri->canon_uri[uri->canon_len] = '\0';
2308 TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));
2313 #define URI(x) ((IUri*) &(x)->lpIUriVtbl)
2314 #define URIBUILDER(x) ((IUriBuilder*) &(x)->lpIUriBuilderVtbl)
2316 #define URI_THIS(iface) DEFINE_THIS(Uri, IUri, iface)
2318 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv)
2320 Uri *This = URI_THIS(iface);
2322 if(IsEqualGUID(&IID_IUnknown, riid)) {
2323 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
2325 }else if(IsEqualGUID(&IID_IUri, riid)) {
2326 TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
2329 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
2331 return E_NOINTERFACE;
2334 IUnknown_AddRef((IUnknown*)*ppv);
2338 static ULONG WINAPI Uri_AddRef(IUri *iface)
2340 Uri *This = URI_THIS(iface);
2341 LONG ref = InterlockedIncrement(&This->ref);
2343 TRACE("(%p) ref=%d\n", This, ref);
2348 static ULONG WINAPI Uri_Release(IUri *iface)
2350 Uri *This = URI_THIS(iface);
2351 LONG ref = InterlockedDecrement(&This->ref);
2353 TRACE("(%p) ref=%d\n", This, ref);
2356 SysFreeString(This->raw_uri);
2357 heap_free(This->canon_uri);
2364 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags)
2366 Uri *This = URI_THIS(iface);
2368 TRACE("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
2373 if(uriProp > Uri_PROPERTY_STRING_LAST) {
2374 /* Windows allocates an empty BSTR for invalid Uri_PROPERTY's. */
2375 *pbstrProperty = SysAllocStringLen(NULL, 0);
2376 if(!(*pbstrProperty))
2377 return E_OUTOFMEMORY;
2379 /* It only returns S_FALSE for the ZONE property... */
2380 if(uriProp == Uri_PROPERTY_ZONE)
2386 /* Don't have support for flags yet. */
2388 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
2393 case Uri_PROPERTY_AUTHORITY:
2394 if(This->authority_start > -1) {
2395 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len);
2398 *pbstrProperty = SysAllocStringLen(NULL, 0);
2402 if(!(*pbstrProperty))
2403 hres = E_OUTOFMEMORY;
2406 case Uri_PROPERTY_DOMAIN:
2407 if(This->domain_offset > -1) {
2408 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset,
2409 This->host_len-This->domain_offset);
2412 *pbstrProperty = SysAllocStringLen(NULL, 0);
2416 if(!(*pbstrProperty))
2417 hres = E_OUTOFMEMORY;
2420 case Uri_PROPERTY_HOST:
2421 if(This->host_start > -1) {
2422 /* The '[' and ']' aren't included for IPv6 addresses. */
2423 if(This->host_type == Uri_HOST_IPV6)
2424 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2);
2426 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len);
2430 *pbstrProperty = SysAllocStringLen(NULL, 0);
2434 if(!(*pbstrProperty))
2435 hres = E_OUTOFMEMORY;
2438 case Uri_PROPERTY_PASSWORD:
2439 if(This->userinfo_split > -1) {
2440 *pbstrProperty = SysAllocStringLen(
2441 This->canon_uri+This->userinfo_start+This->userinfo_split+1,
2442 This->userinfo_len-This->userinfo_split-1);
2445 *pbstrProperty = SysAllocStringLen(NULL, 0);
2449 if(!(*pbstrProperty))
2450 return E_OUTOFMEMORY;
2453 case Uri_PROPERTY_RAW_URI:
2454 *pbstrProperty = SysAllocString(This->raw_uri);
2455 if(!(*pbstrProperty))
2456 hres = E_OUTOFMEMORY;
2460 case Uri_PROPERTY_SCHEME_NAME:
2461 if(This->scheme_start > -1) {
2462 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len);
2465 *pbstrProperty = SysAllocStringLen(NULL, 0);
2469 if(!(*pbstrProperty))
2470 hres = E_OUTOFMEMORY;
2473 case Uri_PROPERTY_USER_INFO:
2474 if(This->userinfo_start > -1) {
2475 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len);
2478 *pbstrProperty = SysAllocStringLen(NULL, 0);
2482 if(!(*pbstrProperty))
2483 hres = E_OUTOFMEMORY;
2486 case Uri_PROPERTY_USER_NAME:
2487 if(This->userinfo_start > -1) {
2488 /* If userinfo_split is set, that means a password exists
2489 * so the username is only from userinfo_start to userinfo_split.
2491 if(This->userinfo_split > -1) {
2492 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split);
2495 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len);
2499 *pbstrProperty = SysAllocStringLen(NULL, 0);
2503 if(!(*pbstrProperty))
2504 return E_OUTOFMEMORY;
2508 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
2515 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
2517 Uri *This = URI_THIS(iface);
2519 TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2522 return E_INVALIDARG;
2524 /* Can only return a length for a property if it's a string. */
2525 if(uriProp > Uri_PROPERTY_STRING_LAST)
2526 return E_INVALIDARG;
2528 /* Don't have support for flags yet. */
2530 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2535 case Uri_PROPERTY_AUTHORITY:
2536 *pcchProperty = This->authority_len;
2537 hres = (This->authority_start > -1) ? S_OK : S_FALSE;
2539 case Uri_PROPERTY_DOMAIN:
2540 if(This->domain_offset > -1)
2541 *pcchProperty = This->host_len - This->domain_offset;
2545 hres = (This->domain_offset > -1) ? S_OK : S_FALSE;
2547 case Uri_PROPERTY_HOST:
2548 *pcchProperty = This->host_len;
2550 /* '[' and ']' aren't included in the length. */
2551 if(This->host_type == Uri_HOST_IPV6)
2554 hres = (This->host_start > -1) ? S_OK : S_FALSE;
2556 case Uri_PROPERTY_PASSWORD:
2557 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0;
2558 hres = (This->userinfo_split > -1) ? S_OK : S_FALSE;
2560 case Uri_PROPERTY_RAW_URI:
2561 *pcchProperty = SysStringLen(This->raw_uri);
2564 case Uri_PROPERTY_SCHEME_NAME:
2565 *pcchProperty = This->scheme_len;
2566 hres = (This->scheme_start > -1) ? S_OK : S_FALSE;
2568 case Uri_PROPERTY_USER_INFO:
2569 *pcchProperty = This->userinfo_len;
2570 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
2572 case Uri_PROPERTY_USER_NAME:
2573 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len;
2574 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
2577 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2584 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
2586 Uri *This = URI_THIS(iface);
2589 TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2592 return E_INVALIDARG;
2594 /* Microsoft's implementation for the ZONE property of a URI seems to be lacking...
2595 * From what I can tell, instead of checking which URLZONE the URI belongs to it
2596 * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone
2599 if(uriProp == Uri_PROPERTY_ZONE) {
2600 *pcchProperty = URLZONE_INVALID;
2604 if(uriProp < Uri_PROPERTY_DWORD_START) {
2606 return E_INVALIDARG;
2610 case Uri_PROPERTY_HOST_TYPE:
2611 *pcchProperty = This->host_type;
2614 case Uri_PROPERTY_PORT:
2615 if(!This->has_port) {
2619 *pcchProperty = This->port;
2624 case Uri_PROPERTY_SCHEME:
2625 *pcchProperty = This->scheme_type;
2629 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2636 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty)
2638 Uri *This = URI_THIS(iface);
2639 FIXME("(%p)->(%d %p)\n", This, uriProp, pfHasProperty);
2642 return E_INVALIDARG;
2647 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri)
2649 Uri *This = URI_THIS(iface);
2650 FIXME("(%p)->(%p)\n", This, pstrAbsoluteUri);
2652 if(!pstrAbsoluteUri)
2658 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority)
2660 TRACE("(%p)->(%p)\n", iface, pstrAuthority);
2661 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0);
2664 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri)
2666 Uri *This = URI_THIS(iface);
2667 FIXME("(%p)->(%p)\n", This, pstrDisplayUri);
2675 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain)
2677 TRACE("(%p)->(%p)\n", iface, pstrDomain);
2678 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0);
2681 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension)
2683 Uri *This = URI_THIS(iface);
2684 FIXME("(%p)->(%p)\n", This, pstrExtension);
2692 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment)
2694 Uri *This = URI_THIS(iface);
2695 FIXME("(%p)->(%p)\n", This, pstrFragment);
2703 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost)
2705 TRACE("(%p)->(%p)\n", iface, pstrHost);
2706 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0);
2709 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword)
2711 TRACE("(%p)->(%p)\n", iface, pstrPassword);
2712 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0);
2715 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath)
2717 Uri *This = URI_THIS(iface);
2718 FIXME("(%p)->(%p)\n", This, pstrPath);
2726 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery)
2728 Uri *This = URI_THIS(iface);
2729 FIXME("(%p)->(%p)\n", This, pstrPathAndQuery);
2731 if(!pstrPathAndQuery)
2737 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery)
2739 Uri *This = URI_THIS(iface);
2740 FIXME("(%p)->(%p)\n", This, pstrQuery);
2748 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri)
2750 Uri *This = URI_THIS(iface);
2751 TRACE("(%p)->(%p)\n", This, pstrRawUri);
2753 /* Just forward the call to GetPropertyBSTR. */
2754 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0);
2757 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName)
2759 Uri *This = URI_THIS(iface);
2760 TRACE("(%p)->(%p)\n", This, pstrSchemeName);
2761 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0);
2764 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo)
2766 TRACE("(%p)->(%p)\n", iface, pstrUserInfo);
2767 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0);
2770 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName)
2772 TRACE("(%p)->(%p)\n", iface, pstrUserName);
2773 return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0);
2776 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType)
2778 TRACE("(%p)->(%p)\n", iface, pdwHostType);
2779 return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0);
2782 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort)
2784 TRACE("(%p)->(%p)\n", iface, pdwPort);
2785 return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0);
2788 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme)
2790 Uri *This = URI_THIS(iface);
2791 TRACE("(%p)->(%p)\n", This, pdwScheme);
2792 return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0);
2795 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone)
2797 TRACE("(%p)->(%p)\n", iface, pdwZone);
2798 return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0);
2801 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties)
2803 Uri *This = URI_THIS(iface);
2804 FIXME("(%p)->(%p)\n", This, pdwProperties);
2807 return E_INVALIDARG;
2812 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual)
2814 Uri *This = URI_THIS(iface);
2815 TRACE("(%p)->(%p %p)\n", This, pUri, pfEqual);
2823 /* For some reason Windows returns S_OK here... */
2827 FIXME("(%p)->(%p %p)\n", This, pUri, pfEqual);
2833 static const IUriVtbl UriVtbl = {
2837 Uri_GetPropertyBSTR,
2838 Uri_GetPropertyLength,
2839 Uri_GetPropertyDWORD,
2850 Uri_GetPathAndQuery,
2864 /***********************************************************************
2865 * CreateUri (urlmon.@)
2867 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI)
2873 TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI);
2876 return E_INVALIDARG;
2880 return E_INVALIDARG;
2883 ret = heap_alloc(sizeof(Uri));
2885 return E_OUTOFMEMORY;
2887 ret->lpIUriVtbl = &UriVtbl;
2890 /* Create a copy of pwzURI and store it as the raw_uri. */
2891 ret->raw_uri = SysAllocString(pwzURI);
2894 return E_OUTOFMEMORY;
2897 memset(&data, 0, sizeof(parse_data));
2898 data.uri = ret->raw_uri;
2900 /* Validate and parse the URI into it's components. */
2901 if(!parse_uri(&data, dwFlags)) {
2902 /* Encountered an unsupported or invalid URI */
2903 SysFreeString(ret->raw_uri);
2906 return E_INVALIDARG;
2909 /* Canonicalize the URI. */
2910 hr = canonicalize_uri(&data, ret, dwFlags);
2912 SysFreeString(ret->raw_uri);
2922 #define URIBUILDER_THIS(iface) DEFINE_THIS(UriBuilder, IUriBuilder, iface)
2924 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv)
2926 UriBuilder *This = URIBUILDER_THIS(iface);
2928 if(IsEqualGUID(&IID_IUnknown, riid)) {
2929 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
2930 *ppv = URIBUILDER(This);
2931 }else if(IsEqualGUID(&IID_IUriBuilder, riid)) {
2932 TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
2933 *ppv = URIBUILDER(This);
2935 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
2937 return E_NOINTERFACE;
2940 IUnknown_AddRef((IUnknown*)*ppv);
2944 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface)
2946 UriBuilder *This = URIBUILDER_THIS(iface);
2947 LONG ref = InterlockedIncrement(&This->ref);
2949 TRACE("(%p) ref=%d\n", This, ref);
2954 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface)
2956 UriBuilder *This = URIBUILDER_THIS(iface);
2957 LONG ref = InterlockedDecrement(&This->ref);
2959 TRACE("(%p) ref=%d\n", This, ref);
2967 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface,
2968 DWORD dwAllowEncodingPropertyMask,
2969 DWORD_PTR dwReserved,
2972 UriBuilder *This = URIBUILDER_THIS(iface);
2973 FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
2977 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface,
2978 DWORD dwCreateFlags,
2979 DWORD dwAllowEncodingPropertyMask,
2980 DWORD_PTR dwReserved,
2983 UriBuilder *This = URIBUILDER_THIS(iface);
2984 FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
2988 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface,
2989 DWORD dwCreateFlags,
2990 DWORD dwUriBuilderFlags,
2991 DWORD dwAllowEncodingPropertyMask,
2992 DWORD_PTR dwReserved,
2995 UriBuilder *This = URIBUILDER_THIS(iface);
2996 FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
2997 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
3001 static HRESULT WINAPI UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri)
3003 UriBuilder *This = URIBUILDER_THIS(iface);
3004 FIXME("(%p)->(%p)\n", This, ppIUri);
3008 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri)
3010 UriBuilder *This = URIBUILDER_THIS(iface);
3011 FIXME("(%p)->(%p)\n", This, pIUri);
3015 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment)
3017 UriBuilder *This = URIBUILDER_THIS(iface);
3018 FIXME("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment);
3022 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost)
3024 UriBuilder *This = URIBUILDER_THIS(iface);
3025 FIXME("(%p)->(%p %p)\n", This, pcchHost, ppwzHost);
3029 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword)
3031 UriBuilder *This = URIBUILDER_THIS(iface);
3032 FIXME("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword);
3036 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath)
3038 UriBuilder *This = URIBUILDER_THIS(iface);
3039 FIXME("(%p)->(%p %p)\n", This, pcchPath, ppwzPath);
3043 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort)
3045 UriBuilder *This = URIBUILDER_THIS(iface);
3046 FIXME("(%p)->(%p %p)\n", This, pfHasPort, pdwPort);
3050 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery)
3052 UriBuilder *This = URIBUILDER_THIS(iface);
3053 FIXME("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery);
3057 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName)
3059 UriBuilder *This = URIBUILDER_THIS(iface);
3060 FIXME("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName);
3064 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName)
3066 UriBuilder *This = URIBUILDER_THIS(iface);
3067 FIXME("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName);
3071 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue)
3073 UriBuilder *This = URIBUILDER_THIS(iface);
3074 FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3078 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue)
3080 UriBuilder *This = URIBUILDER_THIS(iface);
3081 FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3085 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue)
3087 UriBuilder *This = URIBUILDER_THIS(iface);
3088 FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3092 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue)
3094 UriBuilder *This = URIBUILDER_THIS(iface);
3095 FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3099 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue)
3101 UriBuilder *This = URIBUILDER_THIS(iface);
3102 FIXME("(%p)->(%d %d)\n", This, fHasPort, dwNewValue);
3106 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue)
3108 UriBuilder *This = URIBUILDER_THIS(iface);
3109 FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3113 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue)
3115 UriBuilder *This = URIBUILDER_THIS(iface);
3116 FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3120 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue)
3122 UriBuilder *This = URIBUILDER_THIS(iface);
3123 FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3127 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask)
3129 UriBuilder *This = URIBUILDER_THIS(iface);
3130 FIXME("(%p)->(0x%08x)\n", This, dwPropertyMask);
3134 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified)
3136 UriBuilder *This = URIBUILDER_THIS(iface);
3137 FIXME("(%p)->(%p)\n", This, pfModified);
3141 #undef URIBUILDER_THIS
3143 static const IUriBuilderVtbl UriBuilderVtbl = {
3144 UriBuilder_QueryInterface,
3147 UriBuilder_CreateUriSimple,
3148 UriBuilder_CreateUri,
3149 UriBuilder_CreateUriWithFlags,
3152 UriBuilder_GetFragment,
3154 UriBuilder_GetPassword,
3157 UriBuilder_GetQuery,
3158 UriBuilder_GetSchemeName,
3159 UriBuilder_GetUserName,
3160 UriBuilder_SetFragment,
3162 UriBuilder_SetPassword,
3165 UriBuilder_SetQuery,
3166 UriBuilder_SetSchemeName,
3167 UriBuilder_SetUserName,
3168 UriBuilder_RemoveProperties,
3169 UriBuilder_HasBeenModified,
3172 /***********************************************************************
3173 * CreateIUriBuilder (urlmon.@)
3175 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder)
3179 TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
3181 ret = heap_alloc(sizeof(UriBuilder));
3183 return E_OUTOFMEMORY;
3185 ret->lpIUriBuilderVtbl = &UriBuilderVtbl;
3188 *ppIUriBuilder = URIBUILDER(ret);