git.oblomov.eu Git - wine/blob - dlls/urlmon/uri.c

   1 /*
   2  * Copyright 2010 Jacek Caban for CodeWeavers
   3  * Copyright 2010 Thomas Mullaly
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  18  */
  19
  20 #include "urlmon_main.h"
  21 #include "wine/debug.h"
  22
  23 #define NO_SHLWAPI_REG
  24 #include "shlwapi.h"
  25
  26 #define UINT_MAX 0xffffffff
  27 #define USHORT_MAX 0xffff
  28
  29 WINE_DEFAULT_DEBUG_CHANNEL(urlmon);
  30
  31 typedef struct {
  32     const IUriVtbl  *lpIUriVtbl;
  33     LONG ref;
  34
  35     BSTR            raw_uri;
  36
  37     /* Information about the canonicalized URI's buffer. */
  38     WCHAR           *canon_uri;
  39     DWORD           canon_size;
  40     DWORD           canon_len;
  41
  42     INT             scheme_start;
  43     DWORD           scheme_len;
  44     URL_SCHEME      scheme_type;
  45
  46     INT             userinfo_start;
  47     DWORD           userinfo_len;
  48     INT             userinfo_split;
  49
  50     INT             host_start;
  51     DWORD           host_len;
  52     Uri_HOST_TYPE   host_type;
  53
  54     USHORT          port;
  55     BOOL            has_port;
  56
  57     INT             authority_start;
  58     DWORD           authority_len;
  59
  60     INT             domain_offset;
  61
  62     INT             path_start;
  63     DWORD           path_len;
  64 } Uri;
  65
  66 typedef struct {
  67     const IUriBuilderVtbl  *lpIUriBuilderVtbl;
  68     LONG ref;
  69 } UriBuilder;
  70
  71 typedef struct {
  72     const WCHAR *str;
  73     DWORD       len;
  74 } h16;
  75
  76 typedef struct {
  77     /* IPv6 addresses can hold up to 8 h16 components. */
  78     h16         components[8];
  79     DWORD       h16_count;
  80
  81     /* An IPv6 can have 1 elision ("::"). */
  82     const WCHAR *elision;
  83
  84     /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */
  85     const WCHAR *ipv4;
  86     DWORD       ipv4_len;
  87
  88     INT         components_size;
  89     INT         elision_size;
  90 } ipv6_address;
  91
  92 typedef struct {
  93     BSTR            uri;
  94
  95     BOOL            is_relative;
  96     BOOL            is_opaque;
  97     BOOL            has_implicit_scheme;
  98     BOOL            has_implicit_ip;
  99     UINT            implicit_ipv4;
 100
 101     const WCHAR     *scheme;
 102     DWORD           scheme_len;
 103     URL_SCHEME      scheme_type;
 104
 105     const WCHAR     *userinfo;
 106     DWORD           userinfo_len;
 107     INT             userinfo_split;
 108
 109     const WCHAR     *host;
 110     DWORD           host_len;
 111     Uri_HOST_TYPE   host_type;
 112
 113     BOOL            has_ipv6;
 114     ipv6_address    ipv6_address;
 115
 116     const WCHAR     *port;
 117     DWORD           port_len;
 118     USHORT          port_value;
 119
 120     const WCHAR     *path;
 121     DWORD           path_len;
 122 } parse_data;
 123
 124 static const CHAR hexDigits[] = "0123456789ABCDEF";
 125
 126 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
 127 static const struct {
 128     URL_SCHEME  scheme;
 129     WCHAR       scheme_name[16];
 130 } recognized_schemes[] = {
 131     {URL_SCHEME_FTP,            {'f','t','p',0}},
 132     {URL_SCHEME_HTTP,           {'h','t','t','p',0}},
 133     {URL_SCHEME_GOPHER,         {'g','o','p','h','e','r',0}},
 134     {URL_SCHEME_MAILTO,         {'m','a','i','l','t','o',0}},
 135     {URL_SCHEME_NEWS,           {'n','e','w','s',0}},
 136     {URL_SCHEME_NNTP,           {'n','n','t','p',0}},
 137     {URL_SCHEME_TELNET,         {'t','e','l','n','e','t',0}},
 138     {URL_SCHEME_WAIS,           {'w','a','i','s',0}},
 139     {URL_SCHEME_FILE,           {'f','i','l','e',0}},
 140     {URL_SCHEME_MK,             {'m','k',0}},
 141     {URL_SCHEME_HTTPS,          {'h','t','t','p','s',0}},
 142     {URL_SCHEME_SHELL,          {'s','h','e','l','l',0}},
 143     {URL_SCHEME_SNEWS,          {'s','n','e','w','s',0}},
 144     {URL_SCHEME_LOCAL,          {'l','o','c','a','l',0}},
 145     {URL_SCHEME_JAVASCRIPT,     {'j','a','v','a','s','c','r','i','p','t',0}},
 146     {URL_SCHEME_VBSCRIPT,       {'v','b','s','c','r','i','p','t',0}},
 147     {URL_SCHEME_ABOUT,          {'a','b','o','u','t',0}},
 148     {URL_SCHEME_RES,            {'r','e','s',0}},
 149     {URL_SCHEME_MSSHELLROOTED,  {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}},
 150     {URL_SCHEME_MSSHELLIDLIST,  {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}},
 151     {URL_SCHEME_MSHELP,         {'h','c','p',0}},
 152     {URL_SCHEME_WILDCARD,       {'*',0}}
 153 };
 154
 155 /* List of default ports Windows recognizes. */
 156 static const struct {
 157     URL_SCHEME  scheme;
 158     USHORT      port;
 159 } default_ports[] = {
 160     {URL_SCHEME_FTP,    21},
 161     {URL_SCHEME_HTTP,   80},
 162     {URL_SCHEME_GOPHER, 70},
 163     {URL_SCHEME_NNTP,   119},
 164     {URL_SCHEME_TELNET, 23},
 165     {URL_SCHEME_WAIS,   210},
 166     {URL_SCHEME_HTTPS,  443},
 167 };
 168
 169 /* List of 3 character top level domain names Windows seems to recognize.
 170  * There might be more, but, these are the only ones I've found so far.
 171  */
 172 static const struct {
 173     WCHAR tld_name[4];
 174 } recognized_tlds[] = {
 175     {{'c','o','m',0}},
 176     {{'e','d','u',0}},
 177     {{'g','o','v',0}},
 178     {{'i','n','t',0}},
 179     {{'m','i','l',0}},
 180     {{'n','e','t',0}},
 181     {{'o','r','g',0}}
 182 };
 183
 184 static inline BOOL is_alpha(WCHAR val) {
 185         return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z'));
 186 }
 187
 188 static inline BOOL is_num(WCHAR val) {
 189         return (val >= '0' && val <= '9');
 190 }
 191
 192 /* A URI is implicitly a file path if it begins with
 193  * a drive letter (eg X:) or starts with "\\" (UNC path).
 194  */
 195 static inline BOOL is_implicit_file_path(const WCHAR *str) {
 196     if(is_alpha(str[0]) && str[1] == ':')
 197         return TRUE;
 198     else if(str[0] == '\\' && str[1] == '\\')
 199         return TRUE;
 200
 201     return FALSE;
 202 }
 203
 204 /* Checks if the URI is a hierarchical URI. A hierarchical
 205  * URI is one that has "//" after the scheme.
 206  */
 207 static BOOL check_hierarchical(const WCHAR **ptr) {
 208     const WCHAR *start = *ptr;
 209
 210     if(**ptr != '/')
 211         return FALSE;
 212
 213     ++(*ptr);
 214     if(**ptr != '/') {
 215         *ptr = start;
 216         return FALSE;
 217     }
 218
 219     ++(*ptr);
 220     return TRUE;
 221 }
 222
 223 /* unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" */
 224 static inline BOOL is_unreserved(WCHAR val) {
 225     return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
 226             val == '_' || val == '~');
 227 }
 228
 229 /* sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 230  *               / "*" / "+" / "," / ";" / "="
 231  */
 232 static inline BOOL is_subdelim(WCHAR val) {
 233     return (val == '!' || val == '$' || val == '&' ||
 234             val == '\'' || val == '(' || val == ')' ||
 235             val == '*' || val == '+' || val == ',' ||
 236             val == ';' || val == '=');
 237 }
 238
 239 /* gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
 240 static inline BOOL is_gendelim(WCHAR val) {
 241     return (val == ':' || val == '/' || val == '?' ||
 242             val == '#' || val == '[' || val == ']' ||
 243             val == '@');
 244 }
 245
 246 /* Characters that delimit the end of the authority
 247  * section of a URI. Sometimes a '\\' is considered
 248  * an authority delimeter.
 249  */
 250 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
 251     return (val == '#' || val == '/' || val == '?' ||
 252             val == '\0' || (acceptSlash && val == '\\'));
 253 }
 254
 255 /* reserved = gen-delims / sub-delims */
 256 static inline BOOL is_reserved(WCHAR val) {
 257     return (is_subdelim(val) || is_gendelim(val));
 258 }
 259
 260 static inline BOOL is_hexdigit(WCHAR val) {
 261     return ((val >= 'a' && val <= 'f') ||
 262             (val >= 'A' && val <= 'F') ||
 263             (val >= '0' && val <= '9'));
 264 }
 265
 266 static inline BOOL is_path_delim(WCHAR val) {
 267     return (!val || val == '#' || val == '?');
 268 }
 269
 270 /* Computes the size of the given IPv6 address.
 271  * Each h16 component is 16bits, if there is an IPv4 address, it's
 272  * 32bits. If there's an elision it can be 16bits to 128bits, depending
 273  * on the number of other components.
 274  *
 275  * Modeled after google-url's CheckIPv6ComponentsSize function
 276  */
 277 static void compute_ipv6_comps_size(ipv6_address *address) {
 278     address->components_size = address->h16_count * 2;
 279
 280     if(address->ipv4)
 281         /* IPv4 address is 4 bytes. */
 282         address->components_size += 4;
 283
 284     if(address->elision) {
 285         /* An elision can be anywhere from 2 bytes up to 16 bytes.
 286          * It size depends on the size of the h16 and IPv4 components.
 287          */
 288         address->elision_size = 16 - address->components_size;
 289         if(address->elision_size < 2)
 290             address->elision_size = 2;
 291     } else
 292         address->elision_size = 0;
 293 }
 294
 295 /* Taken from dlls/jscript/lex.c */
 296 static int hex_to_int(WCHAR val) {
 297     if(val >= '0' && val <= '9')
 298         return val - '0';
 299     else if(val >= 'a' && val <= 'f')
 300         return val - 'a' + 10;
 301     else if(val >= 'A' && val <= 'F')
 302         return val - 'A' + 10;
 303
 304     return -1;
 305 }
 306
 307 /* Helper function for converting a percent encoded string
 308  * representation of a WCHAR value into its actual WCHAR value. If
 309  * the two characters following the '%' aren't valid hex values then
 310  * this function returns the NULL character.
 311  *
 312  * Eg.
 313  *  "%2E" will result in '.' being returned by this function.
 314  */
 315 static WCHAR decode_pct_val(const WCHAR *ptr) {
 316     WCHAR ret = '\0';
 317
 318     if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) {
 319         INT a = hex_to_int(*(ptr + 1));
 320         INT b = hex_to_int(*(ptr + 2));
 321
 322         ret = a << 4;
 323         ret += b;
 324     }
 325
 326     return ret;
 327 }
 328
 329 /* Helper function for percent encoding a given character
 330  * and storing the encoded value into a given buffer (dest).
 331  *
 332  * It's up to the calling function to ensure that there is
 333  * at least enough space in 'dest' for the percent encoded
 334  * value to be stored (so dest + 3 spaces available).
 335  */
 336 static inline void pct_encode_val(WCHAR val, WCHAR *dest) {
 337     dest[0] = '%';
 338     dest[1] = hexDigits[(val >> 4) & 0xf];
 339     dest[2] = hexDigits[val & 0xf];
 340 }
 341
 342 /* Scans the range of characters [str, end] and returns the last occurence
 343  * of 'ch' or returns NULL.
 344  */
 345 static const WCHAR *str_last_of(const WCHAR *str, const WCHAR *end, WCHAR ch) {
 346     const WCHAR *ptr = end;
 347
 348     while(ptr >= str) {
 349         if(*ptr == ch)
 350             return ptr;
 351         --ptr;
 352     }
 353
 354     return NULL;
 355 }
 356
 357 /* Attempts to parse the domain name from the host.
 358  *
 359  * This function also includes the Top-level Domain (TLD) name
 360  * of the host when it tries to find the domain name. If it finds
 361  * a valid domain name it will assign 'domain_start' the offset
 362  * into 'host' where the domain name starts.
 363  *
 364  * It's implied that if a domain name its range is implied to be
 365  * [host+domain_start, host+host_len).
 366  */
 367 static void find_domain_name(const WCHAR *host, DWORD host_len,
 368                              INT *domain_start) {
 369     const WCHAR *last_tld, *sec_last_tld, *end;
 370
 371     end = host+host_len-1;
 372
 373     *domain_start = -1;
 374
 375     /* There has to be at least enough room for a '.' followed by a
 376      * 3 character TLD for a domain to even exist in the host name.
 377      */
 378     if(host_len < 4)
 379         return;
 380
 381     last_tld = str_last_of(host, end, '.');
 382     if(!last_tld)
 383         /* http://hostname -> has no domain name. */
 384         return;
 385
 386     sec_last_tld = str_last_of(host, last_tld-1, '.');
 387     if(!sec_last_tld) {
 388         /* If the '.' is at the beginning of the host there
 389          * has to be at least 3 characters in the TLD for it
 390          * to be valid.
 391          *  Ex: .com -> .com as the domain name.
 392          *      .co  -> has no domain name.
 393          */
 394         if(last_tld-host == 0) {
 395             if(end-(last_tld-1) < 3)
 396                 return;
 397         } else if(last_tld-host == 3) {
 398             DWORD i;
 399
 400             /* If there's three characters in front of last_tld and
 401              * they are on the list of recognized TLDs, then this
 402              * host doesn't have a domain (since the host only contains
 403              * a TLD name.
 404              *  Ex: edu.uk -> has no domain name.
 405              *      foo.uk -> foo.uk as the domain name.
 406              */
 407             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 408                 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3))
 409                     return;
 410             }
 411         } else if(last_tld-host < 3)
 412             /* Anything less then 3 characters is considered part
 413              * of the TLD name.
 414              *  Ex: ak.uk -> Has no domain name.
 415              */
 416             return;
 417
 418         /* Otherwise the domain name is the whole host name. */
 419         *domain_start = 0;
 420     } else if(end+1-last_tld > 3) {
 421         /* If the last_tld has more then 3 characters then it's automatically
 422          * considered the TLD of the domain name.
 423          *  Ex: www.winehq.org.uk.test -> uk.test as the domain name.
 424          */
 425         *domain_start = (sec_last_tld+1)-host;
 426     } else if(last_tld - (sec_last_tld+1) < 4) {
 427         DWORD i;
 428         /* If the sec_last_tld is 3 characters long it HAS to be on the list of
 429          * recognized to still be considered part of the TLD name, otherwise
 430          * its considered the domain name.
 431          *  Ex: www.google.com.uk -> google.com.uk as the domain name.
 432          *      www.google.foo.uk -> foo.uk as the domain name.
 433          */
 434         if(last_tld - (sec_last_tld+1) == 3) {
 435             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 436                 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) {
 437                     const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 438
 439                     if(!domain)
 440                         *domain_start = 0;
 441                     else
 442                         *domain_start = (domain+1) - host;
 443                     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 444                                                         (host+host_len)-(host+*domain_start)));
 445                     return;
 446                 }
 447             }
 448
 449             *domain_start = (sec_last_tld+1)-host;
 450         } else {
 451             /* Since the sec_last_tld is less then 3 characters it's considered
 452              * part of the TLD.
 453              *  Ex: www.google.fo.uk -> google.fo.uk as the domain name.
 454              */
 455             const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 456
 457             if(!domain)
 458                 *domain_start = 0;
 459             else
 460                 *domain_start = (domain+1) - host;
 461         }
 462     } else {
 463         /* The second to last TLD has more then 3 characters making it
 464          * the domain name.
 465          *  Ex: www.google.test.us -> test.us as the domain name.
 466          */
 467         *domain_start = (sec_last_tld+1)-host;
 468     }
 469
 470     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 471                                         (host+host_len)-(host+*domain_start)));
 472 }
 473
 474 /* Computes the location where the elision should occur in the IPv6
 475  * address using the numerical values of each component stored in
 476  * 'values'. If the address shouldn't contain an elision then 'index'
 477  * is assigned -1 as it's value. Otherwise 'index' will contain the
 478  * starting index (into values) where the elision should be, and 'count'
 479  * will contain the number of cells the elision covers.
 480  *
 481  * NOTES:
 482  *  Windows will expand an elision if the elision only represents 1 h16
 483  *  component of the URI.
 484  *
 485  *  Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
 486  *
 487  *  If the IPv6 address contains an IPv4 address, the IPv4 address is also
 488  *  considered for being included as part of an elision if all it's components
 489  *  are zeros.
 490  *
 491  *  Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::]
 492  */
 493 static void compute_elision_location(const ipv6_address *address, const USHORT values[8],
 494                                      INT *index, DWORD *count) {
 495     DWORD i, max_len, cur_len;
 496     INT max_index, cur_index;
 497
 498     max_len = cur_len = 0;
 499     max_index = cur_index = -1;
 500     for(i = 0; i < 8; ++i) {
 501         BOOL check_ipv4 = (address->ipv4 && i == 6);
 502         BOOL is_end = (check_ipv4 || i == 7);
 503
 504         if(check_ipv4) {
 505             /* Check if the IPv4 address contains only zeros. */
 506             if(values[i] == 0 && values[i+1] == 0) {
 507                 if(cur_index == -1)
 508                     cur_index = i;
 509
 510                 cur_len += 2;
 511                 ++i;
 512             }
 513         } else if(values[i] == 0) {
 514             if(cur_index == -1)
 515                 cur_index = i;
 516
 517             ++cur_len;
 518         }
 519
 520         if(is_end || values[i] != 0) {
 521             /* We only consider it for an elision if it's
 522              * more then 1 component long.
 523              */
 524             if(cur_len > 1 && cur_len > max_len) {
 525                 /* Found the new elision location. */
 526                 max_len = cur_len;
 527                 max_index = cur_index;
 528             }
 529
 530             /* Reset the current range for the next range of zeros. */
 531             cur_index = -1;
 532             cur_len = 0;
 533         }
 534     }
 535
 536     *index = max_index;
 537     *count = max_len;
 538 }
 539
 540 /* Converts the specified IPv4 address into an uint value.
 541  *
 542  * This function assumes that the IPv4 address has already been validated.
 543  */
 544 static UINT ipv4toui(const WCHAR *ip, DWORD len) {
 545     UINT ret = 0;
 546     DWORD comp_value = 0;
 547     const WCHAR *ptr;
 548
 549     for(ptr = ip; ptr < ip+len; ++ptr) {
 550         if(*ptr == '.') {
 551             ret <<= 8;
 552             ret += comp_value;
 553             comp_value = 0;
 554         } else
 555             comp_value = comp_value*10 + (*ptr-'0');
 556     }
 557
 558     ret <<= 8;
 559     ret += comp_value;
 560
 561     return ret;
 562 }
 563
 564 /* Converts an IPv4 address in numerical form into it's fully qualified
 565  * string form. This function returns the number of characters written
 566  * to 'dest'. If 'dest' is NULL this function will return the number of
 567  * characters that would have been written.
 568  *
 569  * It's up to the caller to ensure there's enough space in 'dest' for the
 570  * address.
 571  */
 572 static DWORD ui2ipv4(WCHAR *dest, UINT address) {
 573     static const WCHAR formatW[] =
 574         {'%','u','.','%','u','.','%','u','.','%','u',0};
 575     DWORD ret = 0;
 576     UCHAR digits[4];
 577
 578     digits[0] = (address >> 24) & 0xff;
 579     digits[1] = (address >> 16) & 0xff;
 580     digits[2] = (address >> 8) & 0xff;
 581     digits[3] = address & 0xff;
 582
 583     if(!dest) {
 584         WCHAR tmp[16];
 585         ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]);
 586     } else
 587         ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]);
 588
 589     return ret;
 590 }
 591
 592 /* Converts an h16 component (from an IPv6 address) into it's
 593  * numerical value.
 594  *
 595  * This function assumes that the h16 component has already been validated.
 596  */
 597 static USHORT h16tous(h16 component) {
 598     DWORD i;
 599     USHORT ret = 0;
 600
 601     for(i = 0; i < component.len; ++i) {
 602         ret <<= 4;
 603         ret += hex_to_int(component.str[i]);
 604     }
 605
 606     return ret;
 607 }
 608
 609 /* Converts an IPv6 address into it's 128 bits (16 bytes) numerical value.
 610  *
 611  * This function assumes that the ipv6_address has already been validated.
 612  */
 613 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) {
 614     DWORD i, cur_component = 0;
 615     BOOL already_passed_elision = FALSE;
 616
 617     for(i = 0; i < address->h16_count; ++i) {
 618         if(address->elision) {
 619             if(address->components[i].str > address->elision && !already_passed_elision) {
 620                 /* Means we just passed the elision and need to add it's values to
 621                  * 'number' before we do anything else.
 622                  */
 623                 DWORD j = 0;
 624                 for(j = 0; j < address->elision_size; j+=2)
 625                     number[cur_component++] = 0;
 626
 627                 already_passed_elision = TRUE;
 628             }
 629         }
 630
 631         number[cur_component++] = h16tous(address->components[i]);
 632     }
 633
 634     /* Case when the elision appears after the h16 components. */
 635     if(!already_passed_elision && address->elision) {
 636         for(i = 0; i < address->elision_size; i+=2)
 637             number[cur_component++] = 0;
 638         already_passed_elision = TRUE;
 639     }
 640
 641     if(address->ipv4) {
 642         UINT value = ipv4toui(address->ipv4, address->ipv4_len);
 643
 644         if(cur_component != 6) {
 645             ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component);
 646             return FALSE;
 647         }
 648
 649         number[cur_component++] = (value >> 16) & 0xffff;
 650         number[cur_component] = value & 0xffff;
 651     }
 652
 653     return TRUE;
 654 }
 655
 656 /* Checks if the characters pointed to by 'ptr' are
 657  * a percent encoded data octet.
 658  *
 659  * pct-encoded = "%" HEXDIG HEXDIG
 660  */
 661 static BOOL check_pct_encoded(const WCHAR **ptr) {
 662     const WCHAR *start = *ptr;
 663
 664     if(**ptr != '%')
 665         return FALSE;
 666
 667     ++(*ptr);
 668     if(!is_hexdigit(**ptr)) {
 669         *ptr = start;
 670         return FALSE;
 671     }
 672
 673     ++(*ptr);
 674     if(!is_hexdigit(**ptr)) {
 675         *ptr = start;
 676         return FALSE;
 677     }
 678
 679     ++(*ptr);
 680     return TRUE;
 681 }
 682
 683 /* dec-octet   = DIGIT                 ; 0-9
 684  *             / %x31-39 DIGIT         ; 10-99
 685  *             / "1" 2DIGIT            ; 100-199
 686  *             / "2" %x30-34 DIGIT     ; 200-249
 687  *             / "25" %x30-35          ; 250-255
 688  */
 689 static BOOL check_dec_octet(const WCHAR **ptr) {
 690     const WCHAR *c1, *c2, *c3;
 691
 692     c1 = *ptr;
 693     /* A dec-octet must be at least 1 digit long. */
 694     if(*c1 < '0' || *c1 > '9')
 695         return FALSE;
 696
 697     ++(*ptr);
 698
 699     c2 = *ptr;
 700     /* Since the 1 digit requirment was meet, it doesn't
 701      * matter if this is a DIGIT value, it's considered a
 702      * dec-octet.
 703      */
 704     if(*c2 < '0' || *c2 > '9')
 705         return TRUE;
 706
 707     ++(*ptr);
 708
 709     c3 = *ptr;
 710     /* Same explanation as above. */
 711     if(*c3 < '0' || *c3 > '9')
 712         return TRUE;
 713
 714     /* Anything > 255 isn't a valid IP dec-octet. */
 715     if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') {
 716         *ptr = c1;
 717         return FALSE;
 718     }
 719
 720     ++(*ptr);
 721     return TRUE;
 722 }
 723
 724 /* Checks if there is an implicit IPv4 address in the host component of the URI.
 725  * The max value of an implicit IPv4 address is UINT_MAX.
 726  *
 727  *  Ex:
 728  *      "234567" would be considered an implicit IPv4 address.
 729  */
 730 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) {
 731     const WCHAR *start = *ptr;
 732     ULONGLONG ret = 0;
 733     *val = 0;
 734
 735     while(is_num(**ptr)) {
 736         ret = ret*10 + (**ptr - '0');
 737
 738         if(ret > UINT_MAX) {
 739             *ptr = start;
 740             return FALSE;
 741         }
 742         ++(*ptr);
 743     }
 744
 745     if(*ptr == start)
 746         return FALSE;
 747
 748     *val = ret;
 749     return TRUE;
 750 }
 751
 752 /* Checks if the string contains an IPv4 address.
 753  *
 754  * This function has a strict mode or a non-strict mode of operation
 755  * When 'strict' is set to FALSE this function will return TRUE if
 756  * the string contains at least 'dec-octet "." dec-octet' since partial
 757  * IPv4 addresses will be normalized out into full IPv4 addresses. When
 758  * 'strict' is set this function expects there to be a full IPv4 address.
 759  *
 760  * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
 761  */
 762 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) {
 763     const WCHAR *start = *ptr;
 764
 765     if(!check_dec_octet(ptr)) {
 766         *ptr = start;
 767         return FALSE;
 768     }
 769
 770     if(**ptr != '.') {
 771         *ptr = start;
 772         return FALSE;
 773     }
 774
 775     ++(*ptr);
 776     if(!check_dec_octet(ptr)) {
 777         *ptr = start;
 778         return FALSE;
 779     }
 780
 781     if(**ptr != '.') {
 782         if(strict) {
 783             *ptr = start;
 784             return FALSE;
 785         } else
 786             return TRUE;
 787     }
 788
 789     ++(*ptr);
 790     if(!check_dec_octet(ptr)) {
 791         *ptr = start;
 792         return FALSE;
 793     }
 794
 795     if(**ptr != '.') {
 796         if(strict) {
 797             *ptr = start;
 798             return FALSE;
 799         } else
 800             return TRUE;
 801     }
 802
 803     ++(*ptr);
 804     if(!check_dec_octet(ptr)) {
 805         *ptr = start;
 806         return FALSE;
 807     }
 808
 809     /* Found a four digit ip address. */
 810     return TRUE;
 811 }
 812 /* Tries to parse the scheme name of the URI.
 813  *
 814  * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
 815  * NOTE: Windows accepts a number as the first character of a scheme.
 816  */
 817 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data) {
 818     const WCHAR *start = *ptr;
 819
 820     data->scheme = NULL;
 821     data->scheme_len = 0;
 822
 823     while(**ptr) {
 824         if(**ptr == '*' && *ptr == start) {
 825             /* Might have found a wildcard scheme. If it is the next
 826              * char has to be a ':' for it to be a valid URI
 827              */
 828             ++(*ptr);
 829             break;
 830         } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' &&
 831            **ptr != '-' && **ptr != '.')
 832             break;
 833
 834         (*ptr)++;
 835     }
 836
 837     if(*ptr == start)
 838         return FALSE;
 839
 840     /* Schemes must end with a ':' */
 841     if(**ptr != ':') {
 842         *ptr = start;
 843         return FALSE;
 844     }
 845
 846     data->scheme = start;
 847     data->scheme_len = *ptr - start;
 848
 849     ++(*ptr);
 850     return TRUE;
 851 }
 852
 853 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores
 854  * the deduced URL_SCHEME in data->scheme_type.
 855  */
 856 static BOOL parse_scheme_type(parse_data *data) {
 857     /* If there's scheme data then see if it's a recognized scheme. */
 858     if(data->scheme && data->scheme_len) {
 859         DWORD i;
 860
 861         for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) {
 862             if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) {
 863                 /* Has to be a case insensitive compare. */
 864                 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) {
 865                     data->scheme_type = recognized_schemes[i].scheme;
 866                     return TRUE;
 867                 }
 868             }
 869         }
 870
 871         /* If we get here it means it's not a recognized scheme. */
 872         data->scheme_type = URL_SCHEME_UNKNOWN;
 873         return TRUE;
 874     } else if(data->is_relative) {
 875         /* Relative URI's have no scheme. */
 876         data->scheme_type = URL_SCHEME_UNKNOWN;
 877         return TRUE;
 878     } else {
 879         /* Should never reach here! what happened... */
 880         FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri));
 881         return FALSE;
 882     }
 883 }
 884
 885 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't
 886  * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type
 887  * using the flags specified in 'flags' (if any). Flags that affect how this function
 888  * operates are the Uri_CREATE_ALLOW_* flags.
 889  *
 890  * All parsed/deduced information will be stored in 'data' when the function returns.
 891  *
 892  * Returns TRUE if it was able to successfully parse the information.
 893  */
 894 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
 895     static const WCHAR fileW[] = {'f','i','l','e',0};
 896     static const WCHAR wildcardW[] = {'*',0};
 897
 898     /* First check to see if the uri could implicitly be a file path. */
 899     if(is_implicit_file_path(*ptr)) {
 900         if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) {
 901             data->scheme = fileW;
 902             data->scheme_len = lstrlenW(fileW);
 903             data->has_implicit_scheme = TRUE;
 904
 905             TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags);
 906         } else {
 907             /* Window's does not consider anything that can implicitly be a file
 908              * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set...
 909              */
 910             TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n",
 911                     ptr, data, flags);
 912             return FALSE;
 913         }
 914     } else if(!parse_scheme_name(ptr, data)) {
 915         /* No Scheme was found, this means it could be:
 916          *      a) an implicit Wildcard scheme
 917          *      b) a relative URI
 918          *      c) a invalid URI.
 919          */
 920         if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) {
 921             data->scheme = wildcardW;
 922             data->scheme_len = lstrlenW(wildcardW);
 923             data->has_implicit_scheme = TRUE;
 924
 925             TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags);
 926         } else if (flags & Uri_CREATE_ALLOW_RELATIVE) {
 927             data->is_relative = TRUE;
 928             TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags);
 929         } else {
 930             TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags);
 931             return FALSE;
 932         }
 933     }
 934
 935     if(!data->is_relative)
 936         TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags,
 937                 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
 938
 939     if(!parse_scheme_type(data))
 940         return FALSE;
 941
 942     TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type);
 943     return TRUE;
 944 }
 945
 946 /* Parses the userinfo part of the URI (if it exists). The userinfo field of
 947  * a URI can consist of "username:password@", or just "username@".
 948  *
 949  * RFC def:
 950  * userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
 951  *
 952  * NOTES:
 953  *  1)  If there is more than one ':' in the userinfo part of the URI Windows
 954  *      uses the first occurence of ':' to delimit the username and password
 955  *      components.
 956  *
 957  *      ex:
 958  *          ftp://user:pass:word@winehq.org
 959  *
 960  *      Would yield, "user" as the username and "pass:word" as the password.
 961  *
 962  *  2)  Windows allows any character to appear in the "userinfo" part of
 963  *      a URI, as long as it's not an authority delimeter character set.
 964  */
 965 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
 966     data->userinfo = *ptr;
 967     data->userinfo_split = -1;
 968
 969     while(**ptr != '@') {
 970         if(**ptr == ':' && data->userinfo_split == -1)
 971             data->userinfo_split = *ptr - data->userinfo;
 972         else if(**ptr == '%') {
 973             /* If it's a known scheme type, it has to be a valid percent
 974              * encoded value.
 975              */
 976             if(!check_pct_encoded(ptr)) {
 977                 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
 978                     *ptr = data->userinfo;
 979                     data->userinfo = NULL;
 980                     data->userinfo_split = -1;
 981
 982                     TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
 983                     return;
 984                 }
 985             } else
 986                 continue;
 987         } else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN))
 988             break;
 989
 990         ++(*ptr);
 991     }
 992
 993     if(**ptr != '@') {
 994         *ptr = data->userinfo;
 995         data->userinfo = NULL;
 996         data->userinfo_split = -1;
 997
 998         TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
 999         return;
1000     }
1001
1002     data->userinfo_len = *ptr - data->userinfo;
1003     TRACE("(%p %p %x): Found userinfo=%s userinfo_len=%d split=%d.\n", ptr, data, flags,
1004             debugstr_wn(data->userinfo, data->userinfo_len), data->userinfo_len, data->userinfo_split);
1005     ++(*ptr);
1006 }
1007
1008 /* Attempts to parse a port from the URI.
1009  *
1010  * NOTES:
1011  *  Windows seems to have a cap on what the maximum value
1012  *  for a port can be. The max value is USHORT_MAX.
1013  *
1014  * port = *DIGIT
1015  */
1016 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) {
1017     UINT port = 0;
1018     data->port = *ptr;
1019
1020     while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1021         if(!is_num(**ptr)) {
1022             *ptr = data->port;
1023             data->port = NULL;
1024             return FALSE;
1025         }
1026
1027         port = port*10 + (**ptr-'0');
1028
1029         if(port > USHORT_MAX) {
1030             *ptr = data->port;
1031             data->port = NULL;
1032             return FALSE;
1033         }
1034
1035         ++(*ptr);
1036     }
1037
1038     data->port_value = port;
1039     data->port_len = *ptr - data->port;
1040
1041     TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags,
1042         debugstr_wn(data->port, data->port_len), data->port_len, data->port_value);
1043     return TRUE;
1044 }
1045
1046 /* Attempts to parse a IPv4 address from the URI.
1047  *
1048  * NOTES:
1049  *  Window's normalizes IPv4 addresses, This means there's three
1050  *  possibilities for the URI to contain an IPv4 address.
1051  *      1)  A well formed address (ex. 192.2.2.2).
1052  *      2)  A partially formed address. For example "192.0" would
1053  *          normalize to "192.0.0.0" during canonicalization.
1054  *      3)  An implicit IPv4 address. For example "256" would
1055  *          normalize to "0.0.1.0" during canonicalization. Also
1056  *          note that the maximum value for an implicit IP address
1057  *          is UINT_MAX, if the value in the URI exceeds this then
1058  *          it is not considered an IPv4 address.
1059  */
1060 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1061     const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN;
1062     data->host = *ptr;
1063
1064     if(!check_ipv4address(ptr, FALSE)) {
1065         if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) {
1066             TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n",
1067                 ptr, data, flags);
1068             *ptr = data->host;
1069             data->host = NULL;
1070             return FALSE;
1071         } else
1072             data->has_implicit_ip = TRUE;
1073     }
1074
1075     /* Check if what we found is the only part of the host name (if it isn't
1076      * we don't have an IPv4 address).
1077      */
1078     if(**ptr == ':') {
1079         ++(*ptr);
1080         if(!parse_port(ptr, data, flags)) {
1081             *ptr = data->host;
1082             data->host = NULL;
1083             return FALSE;
1084         }
1085     } else if(!is_auth_delim(**ptr, !is_unknown)) {
1086         /* Found more data which belongs the host, so this isn't an IPv4. */
1087         *ptr = data->host;
1088         data->host = NULL;
1089         data->has_implicit_ip = FALSE;
1090         return FALSE;
1091     }
1092
1093     data->host_len = *ptr - data->host;
1094     data->host_type = Uri_HOST_IPV4;
1095
1096     TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n",
1097         ptr, data, flags, debugstr_wn(data->host, data->host_len),
1098         data->host_len, data->host_type);
1099     return TRUE;
1100 }
1101
1102 /* Attempts to parse the reg-name from the URI.
1103  *
1104  * Because of the way Windows handles ':' this function also
1105  * handles parsing the port.
1106  *
1107  * reg-name = *( unreserved / pct-encoded / sub-delims )
1108  *
1109  * NOTE:
1110  *  Windows allows everything, but, the characters in "auth_delims" and ':'
1111  *  to appear in a reg-name, unless it's an unknown scheme type then ':' is
1112  *  allowed to appear (even if a valid port isn't after it).
1113  *
1114  *  Windows doesn't like host names which start with '[' and end with ']'
1115  *  and don't contain a valid IP literal address in between them.
1116  *
1117  *  On Windows if an '[' is encountered in the host name the ':' no longer
1118  *  counts as a delimiter until you reach the next ']' or an "authority delimeter".
1119  *
1120  *  A reg-name CAN be empty.
1121  */
1122 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags) {
1123     const BOOL has_start_bracket = **ptr == '[';
1124     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1125     BOOL inside_brackets = has_start_bracket;
1126     BOOL ignore_col = FALSE;
1127
1128     /* We have to be careful with file schemes. */
1129     if(data->scheme_type == URL_SCHEME_FILE) {
1130         /* This is because an implicit file scheme could be "C:\\test" and it
1131          * would trick this function into thinking the host is "C", when after
1132          * canonicalization the host would end up being an empty string.
1133          */
1134         if(is_alpha(**ptr) && *(*ptr+1) == ':') {
1135             /* Regular old drive paths don't have a host type (or host name). */
1136             data->host_type = Uri_HOST_UNKNOWN;
1137             data->host = *ptr;
1138             data->host_len = 0;
1139             return TRUE;
1140         } else if(**ptr == '\\' && *(*ptr+1) == '\\')
1141             /* Skip past the "\\" of a UNC path. */
1142             *ptr += 2;
1143     }
1144
1145     data->host = *ptr;
1146
1147     while(!is_auth_delim(**ptr, known_scheme)) {
1148         if(**ptr == ':' && !ignore_col) {
1149             /* We can ignore ':' if were inside brackets.*/
1150             if(!inside_brackets) {
1151                 const WCHAR *tmp = (*ptr)++;
1152
1153                 /* Attempt to parse the port. */
1154                 if(!parse_port(ptr, data, flags)) {
1155                     /* Windows expects there to be a valid port for known scheme types. */
1156                     if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1157                         *ptr = data->host;
1158                         data->host = NULL;
1159                         TRACE("(%p %p %x): Expected valid port\n", ptr, data, flags);
1160                         return FALSE;
1161                     } else
1162                         /* Windows gives up on trying to parse a port when it
1163                          * encounters 1 invalid port.
1164                          */
1165                         ignore_col = TRUE;
1166                 } else {
1167                     data->host_len = tmp - data->host;
1168                     break;
1169                 }
1170             }
1171         } else if(**ptr == '%' && known_scheme) {
1172             /* Has to be a legit % encoded value. */
1173             if(!check_pct_encoded(ptr)) {
1174                 *ptr = data->host;
1175                 data->host = NULL;
1176                 return FALSE;
1177             } else
1178                 continue;
1179         } else if(**ptr == ']')
1180             inside_brackets = FALSE;
1181         else if(**ptr == '[')
1182             inside_brackets = TRUE;
1183
1184         ++(*ptr);
1185     }
1186
1187     if(has_start_bracket) {
1188         /* Make sure the last character of the host wasn't a ']'. */
1189         if(*(*ptr-1) == ']') {
1190             TRACE("(%p %p %x): Expected an IP literal inside of the host\n",
1191                 ptr, data, flags);
1192             *ptr = data->host;
1193             data->host = NULL;
1194             return FALSE;
1195         }
1196     }
1197
1198     /* Don't overwrite our length if we found a port earlier. */
1199     if(!data->port)
1200         data->host_len = *ptr - data->host;
1201
1202     /* If the host is empty, then it's an unknown host type. */
1203     if(data->host_len == 0)
1204         data->host_type = Uri_HOST_UNKNOWN;
1205     else
1206         data->host_type = Uri_HOST_DNS;
1207
1208     TRACE("(%p %p %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags,
1209         debugstr_wn(data->host, data->host_len), data->host_len);
1210     return TRUE;
1211 }
1212
1213 /* Attempts to parse an IPv6 address out of the URI.
1214  *
1215  * IPv6address =                               6( h16 ":" ) ls32
1216  *                /                       "::" 5( h16 ":" ) ls32
1217  *                / [               h16 ] "::" 4( h16 ":" ) ls32
1218  *                / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1219  *                / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1220  *                / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1221  *                / [ *4( h16 ":" ) h16 ] "::"              ls32
1222  *                / [ *5( h16 ":" ) h16 ] "::"              h16
1223  *                / [ *6( h16 ":" ) h16 ] "::"
1224  *
1225  * ls32        = ( h16 ":" h16 ) / IPv4address
1226  *             ; least-significant 32 bits of address.
1227  *
1228  * h16         = 1*4HEXDIG
1229  *             ; 16 bits of address represented in hexadecimal.
1230  *
1231  * Modeled after google-url's 'DoParseIPv6' function.
1232  */
1233 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1234     const WCHAR *start, *cur_start;
1235     ipv6_address ip;
1236
1237     start = cur_start = *ptr;
1238     memset(&ip, 0, sizeof(ipv6_address));
1239
1240     for(;; ++(*ptr)) {
1241         /* Check if we're on the last character of the host. */
1242         BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)
1243                         || **ptr == ']');
1244
1245         BOOL is_split = (**ptr == ':');
1246         BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':');
1247
1248         /* Check if we're at the end of of the a component, or
1249          * if we're at the end of the IPv6 address.
1250          */
1251         if(is_split || is_end) {
1252             DWORD cur_len = 0;
1253
1254             cur_len = *ptr - cur_start;
1255
1256             /* h16 can't have a length > 4. */
1257             if(cur_len > 4) {
1258                 *ptr = start;
1259
1260                 TRACE("(%p %p %x): h16 component to long.\n",
1261                     ptr, data, flags);
1262                 return FALSE;
1263             }
1264
1265             if(cur_len == 0) {
1266                 /* An h16 component can't have the length of 0 unless
1267                  * the elision is at the beginning of the address, or
1268                  * at the end of the address.
1269                  */
1270                 if(!((*ptr == start && is_elision) ||
1271                     (is_end && (*ptr-2) == ip.elision))) {
1272                     *ptr = start;
1273                     TRACE("(%p %p %x): IPv6 component can not have a length of 0.\n",
1274                         ptr, data, flags);
1275                     return FALSE;
1276                 }
1277             }
1278
1279             if(cur_len > 0) {
1280                 /* An IPv6 address can have no more than 8 h16 components. */
1281                 if(ip.h16_count >= 8) {
1282                     *ptr = start;
1283                     TRACE("(%p %p %x): Not a IPv6 address, to many h16 components.\n",
1284                         ptr, data, flags);
1285                     return FALSE;
1286                 }
1287
1288                 ip.components[ip.h16_count].str = cur_start;
1289                 ip.components[ip.h16_count].len = cur_len;
1290
1291                 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n",
1292                     ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len,
1293                     ip.h16_count);
1294                 ++ip.h16_count;
1295             }
1296         }
1297
1298         if(is_end)
1299             break;
1300
1301         if(is_elision) {
1302             /* A IPv6 address can only have 1 elision ('::'). */
1303             if(ip.elision) {
1304                 *ptr = start;
1305
1306                 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n",
1307                     ptr, data, flags);
1308                 return FALSE;
1309             }
1310
1311             ip.elision = *ptr;
1312             ++(*ptr);
1313         }
1314
1315         if(is_split)
1316             cur_start = *ptr+1;
1317         else {
1318             if(!check_ipv4address(ptr, TRUE)) {
1319                 if(!is_hexdigit(**ptr)) {
1320                     /* Not a valid character for an IPv6 address. */
1321                     *ptr = start;
1322                     return FALSE;
1323                 }
1324             } else {
1325                 /* Found an IPv4 address. */
1326                 ip.ipv4 = cur_start;
1327                 ip.ipv4_len = *ptr - cur_start;
1328
1329                 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n",
1330                     ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len),
1331                     ip.ipv4_len);
1332
1333                 /* IPv4 addresses can only appear at the end of a IPv6. */
1334                 break;
1335             }
1336         }
1337     }
1338
1339     compute_ipv6_comps_size(&ip);
1340
1341     /* Make sure the IPv6 address adds up to 16 bytes. */
1342     if(ip.components_size + ip.elision_size != 16) {
1343         *ptr = start;
1344         TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n",
1345             ptr, data, flags);
1346         return FALSE;
1347     }
1348
1349     if(ip.elision_size == 2) {
1350         /* For some reason on Windows if an elision that represents
1351          * only 1 h16 component is encountered at the very begin or
1352          * end of an IPv6 address, Windows does not consider it a
1353          * valid IPv6 address.
1354          *
1355          *  Ex: [::2:3:4:5:6:7] is not valid, even though the sum
1356          *      of all the components == 128bits.
1357          */
1358          if(ip.elision < ip.components[0].str ||
1359             ip.elision > ip.components[ip.h16_count-1].str) {
1360             *ptr = start;
1361             TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n",
1362                 ptr, data, flags);
1363             return FALSE;
1364         }
1365     }
1366
1367     data->host_type = Uri_HOST_IPV6;
1368     data->has_ipv6 = TRUE;
1369     data->ipv6_address = ip;
1370
1371     TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n",
1372         ptr, data, flags, debugstr_wn(start, *ptr-start),
1373         *ptr-start);
1374     return TRUE;
1375 }
1376
1377 /*  IPvFuture  = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */
1378 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) {
1379     const WCHAR *start = *ptr;
1380
1381     /* IPvFuture has to start with a 'v' or 'V'. */
1382     if(**ptr != 'v' && **ptr != 'V')
1383         return FALSE;
1384
1385     /* Following the v their must be atleast 1 hexdigit. */
1386     ++(*ptr);
1387     if(!is_hexdigit(**ptr)) {
1388         *ptr = start;
1389         return FALSE;
1390     }
1391
1392     ++(*ptr);
1393     while(is_hexdigit(**ptr))
1394         ++(*ptr);
1395
1396     /* End of the hexdigit sequence must be a '.' */
1397     if(**ptr != '.') {
1398         *ptr = start;
1399         return FALSE;
1400     }
1401
1402     ++(*ptr);
1403     if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') {
1404         *ptr = start;
1405         return FALSE;
1406     }
1407
1408     ++(*ptr);
1409     while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':')
1410         ++(*ptr);
1411
1412     data->host_type = Uri_HOST_UNKNOWN;
1413
1414     TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags,
1415         debugstr_wn(start, *ptr-start), *ptr-start);
1416
1417     return TRUE;
1418 }
1419
1420 /* IP-literal = "[" ( IPv6address / IPvFuture  ) "]" */
1421 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags) {
1422     data->host = *ptr;
1423
1424     if(**ptr != '[') {
1425         data->host = NULL;
1426         return FALSE;
1427     }
1428
1429     ++(*ptr);
1430     if(!parse_ipv6address(ptr, data, flags)) {
1431         if(!parse_ipvfuture(ptr, data, flags)) {
1432             *ptr = data->host;
1433             data->host = NULL;
1434             return FALSE;
1435         }
1436     }
1437
1438     if(**ptr != ']') {
1439         *ptr = data->host;
1440         data->host = NULL;
1441         return FALSE;
1442     }
1443
1444     ++(*ptr);
1445     if(**ptr == ':') {
1446         ++(*ptr);
1447         /* If a valid port is not found, then let it trickle down to
1448          * parse_reg_name.
1449          */
1450         if(!parse_port(ptr, data, flags)) {
1451             *ptr = data->host;
1452             data->host = NULL;
1453             return FALSE;
1454         }
1455     } else
1456         data->host_len = *ptr - data->host;
1457
1458     return TRUE;
1459 }
1460
1461 /* Parses the host information from the URI.
1462  *
1463  * host = IP-literal / IPv4address / reg-name
1464  */
1465 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags) {
1466     if(!parse_ip_literal(ptr, data, flags)) {
1467         if(!parse_ipv4address(ptr, data, flags)) {
1468             if(!parse_reg_name(ptr, data, flags)) {
1469                 TRACE("(%p %p %x): Malformed URI, Unknown host type.\n",
1470                     ptr, data, flags);
1471                 return FALSE;
1472             }
1473         }
1474     }
1475
1476     return TRUE;
1477 }
1478
1479 /* Parses the authority information from the URI.
1480  *
1481  * authority   = [ userinfo "@" ] host [ ":" port ]
1482  */
1483 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
1484     parse_userinfo(ptr, data, flags);
1485
1486     /* Parsing the port will happen during one of the host parsing
1487      * routines (if the URI has a port).
1488      */
1489     if(!parse_host(ptr, data, flags))
1490         return FALSE;
1491
1492     return TRUE;
1493 }
1494
1495 /* Attempts to parse the path information of a hierarchical URI. */
1496 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
1497     const WCHAR *start = *ptr;
1498     static const WCHAR slash[] = {'/',0};
1499
1500     if(is_path_delim(**ptr)) {
1501         if(data->scheme_type == URL_SCHEME_WILDCARD) {
1502             /* Wildcard schemes don't get a '/' attached if their path is
1503              * empty.
1504              */
1505             data->path = NULL;
1506             data->path_len = 0;
1507         } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
1508             /* If the path component is empty, then a '/' is added. */
1509             data->path = slash;
1510             data->path_len = 1;
1511         }
1512     } else {
1513         while(!is_path_delim(**ptr)) {
1514             if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN &&
1515                data->scheme_type != URL_SCHEME_FILE) {
1516                 if(!check_pct_encoded(ptr)) {
1517                     *ptr = start;
1518                     return FALSE;
1519                 } else
1520                     continue;
1521             } else if(**ptr == '\\') {
1522                 /* Not allowed to have a backslash if NO_CANONICALIZE is set
1523                  * and the scheme is known type (but not a file scheme).
1524                  */
1525                 if(flags & Uri_CREATE_NO_CANONICALIZE) {
1526                     if(data->scheme_type != URL_SCHEME_FILE &&
1527                        data->scheme_type != URL_SCHEME_UNKNOWN) {
1528                         *ptr = start;
1529                         return FALSE;
1530                     }
1531                 }
1532             }
1533
1534             ++(*ptr);
1535         }
1536
1537         /* The only time a URI doesn't have a path is when
1538          * the NO_CANONICALIZE flag is set and the raw URI
1539          * didn't contain one.
1540          */
1541         if(*ptr == start) {
1542             data->path = NULL;
1543             data->path_len = 0;
1544         } else {
1545             data->path = start;
1546             data->path_len = *ptr - start;
1547         }
1548     }
1549
1550     if(data->path)
1551         TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
1552             debugstr_wn(data->path, data->path_len), data->path_len);
1553     else
1554         TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
1555
1556     return TRUE;
1557 }
1558
1559 /* Parses the path of a opaque URI (much less strict then the parser
1560  * for a hierarchical URI).
1561  *
1562  * NOTE:
1563  *  Windows allows invalid % encoded data to appear in opaque URI paths
1564  *  for unknown scheme types.
1565  */
1566 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) {
1567     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1568
1569     data->path = *ptr;
1570
1571     while(!is_path_delim(**ptr)) {
1572         if(**ptr == '%' && known_scheme) {
1573             if(!check_pct_encoded(ptr)) {
1574                 *ptr = data->path;
1575                 data->path = NULL;
1576                 return FALSE;
1577             } else
1578                 continue;
1579         }
1580
1581         ++(*ptr);
1582     }
1583
1584     data->path_len = *ptr - data->path;
1585     TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags,
1586         debugstr_wn(data->path, data->path_len), data->path_len);
1587     return TRUE;
1588 }
1589
1590 /* Determines how the URI should be parsed after the scheme information.
1591  *
1592  * If the scheme is followed, by "//" then, it is treated as an hierarchical URI
1593  * which then the authority and path information will be parsed out. Otherwise, the
1594  * URI will be treated as an opaque URI which the authority information is not parsed
1595  * out.
1596  *
1597  * RFC 3896 definition of hier-part:
1598  *
1599  * hier-part   = "//" authority path-abempty
1600  *                 / path-absolute
1601  *                 / path-rootless
1602  *                 / path-empty
1603  *
1604  * MSDN opaque URI definition:
1605  *  scheme ":" path [ "#" fragment ]
1606  *
1607  * NOTES:
1608  *  If the URI is of an unknown scheme type and has a "//" following the scheme then it
1609  *  is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is
1610  *  set then it is considered an opaque URI reguardless of what follows the scheme information
1611  *  (per MSDN documentation).
1612  */
1613 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
1614     /* Checks if the authority information needs to be parsed.
1615      *
1616      * Relative URI's aren't hierarchical URI's, but, they could trick
1617      * "check_hierarchical" into thinking it is, so we need to explicitly
1618      * make sure it's not relative. Also, if the URI is an implicit file
1619      * scheme it might not contain a "//", but, it's considered hierarchical
1620      * anyways. Wildcard Schemes are always considered hierarchical
1621      */
1622     if(data->scheme_type == URL_SCHEME_WILDCARD ||
1623        data->scheme_type == URL_SCHEME_FILE ||
1624        (!data->is_relative && check_hierarchical(ptr))) {
1625         /* Only treat it as a hierarchical URI if the scheme_type is known or
1626          * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set.
1627          */
1628         if(data->scheme_type != URL_SCHEME_UNKNOWN ||
1629            !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) {
1630             TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
1631             data->is_opaque = FALSE;
1632
1633             if(data->scheme_type == URL_SCHEME_FILE)
1634                 /* Skip past the "//" after the scheme (if any). */
1635                 check_hierarchical(ptr);
1636
1637             /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
1638             if(!parse_authority(ptr, data, flags))
1639                 return FALSE;
1640
1641             return parse_path_hierarchical(ptr, data, flags);
1642         }
1643     }
1644
1645     /* If it reaches here, then the URI will be treated as an opaque
1646      * URI.
1647      */
1648
1649     TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags);
1650
1651     data->is_opaque = TRUE;
1652     if(!parse_path_opaque(ptr, data, flags))
1653         return FALSE;
1654
1655     return TRUE;
1656 }
1657
1658 /* Parses and validates the components of the specified by data->uri
1659  * and stores the information it parses into 'data'.
1660  *
1661  * Returns TRUE if it successfully parsed the URI. False otherwise.
1662  */
1663 static BOOL parse_uri(parse_data *data, DWORD flags) {
1664     const WCHAR *ptr;
1665     const WCHAR **pptr;
1666
1667     ptr = data->uri;
1668     pptr = &ptr;
1669
1670     TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri));
1671
1672     if(!parse_scheme(pptr, data, flags))
1673         return FALSE;
1674
1675     if(!parse_hierpart(pptr, data, flags))
1676         return FALSE;
1677
1678     /* TODO: Parse query and fragment (if the URI has one). */
1679
1680     TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
1681     return TRUE;
1682 }
1683
1684 /* Canonicalizes the userinfo of the URI represented by the parse_data.
1685  *
1686  * Canonicalization of the userinfo is a simple process. If there are any percent
1687  * encoded characters that fall in the "unreserved" character set, they are decoded
1688  * to their actual value. If a character is not in the "unreserved" or "reserved" sets
1689  * then it is percent encoded. Other than that the characters are copied over without
1690  * change.
1691  */
1692 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
1693     DWORD i = 0;
1694
1695     uri->userinfo_start = uri->userinfo_split = -1;
1696     uri->userinfo_len = 0;
1697
1698     if(!data->userinfo)
1699         /* URI doesn't have userinfo, so nothing to do here. */
1700         return TRUE;
1701
1702     uri->userinfo_start = uri->canon_len;
1703
1704     while(i < data->userinfo_len) {
1705         if(data->userinfo[i] == ':' && uri->userinfo_split == -1)
1706             /* Windows only considers the first ':' as the delimiter. */
1707             uri->userinfo_split = uri->canon_len - uri->userinfo_start;
1708         else if(data->userinfo[i] == '%') {
1709             /* Only decode % encoded values for known scheme types. */
1710             if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1711                 /* See if the value really needs decoded. */
1712                 WCHAR val = decode_pct_val(data->userinfo + i);
1713                 if(is_unreserved(val)) {
1714                     if(!computeOnly)
1715                         uri->canon_uri[uri->canon_len] = val;
1716
1717                     ++uri->canon_len;
1718
1719                     /* Move pass the hex characters. */
1720                     i += 3;
1721                     continue;
1722                 }
1723             }
1724         } else if(!is_reserved(data->userinfo[i]) && !is_unreserved(data->userinfo[i]) &&
1725                   data->userinfo[i] != '\\') {
1726             /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
1727              * is NOT set.
1728              */
1729             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
1730                 if(!computeOnly)
1731                     pct_encode_val(data->userinfo[i], uri->canon_uri + uri->canon_len);
1732
1733                 uri->canon_len += 3;
1734                 ++i;
1735                 continue;
1736             }
1737         }
1738
1739         if(!computeOnly)
1740             /* Nothing special, so just copy the character over. */
1741             uri->canon_uri[uri->canon_len] = data->userinfo[i];
1742
1743         ++uri->canon_len;
1744         ++i;
1745     }
1746
1747     uri->userinfo_len = uri->canon_len - uri->userinfo_start;
1748     if(!computeOnly)
1749         TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n",
1750                 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len),
1751                 uri->userinfo_split, uri->userinfo_len);
1752
1753     /* Now insert the '@' after the userinfo. */
1754     if(!computeOnly)
1755         uri->canon_uri[uri->canon_len] = '@';
1756
1757     ++uri->canon_len;
1758     return TRUE;
1759 }
1760
1761 /* Attempts to canonicalize a reg_name.
1762  *
1763  * Things that happen:
1764  *  1)  If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is
1765  *      lower cased. Unless it's an unknown scheme type, which case it's
1766  *      no lower cased reguardless.
1767  *
1768  *  2)  Unreserved % encoded characters are decoded for known
1769  *      scheme types.
1770  *
1771  *  3)  Forbidden characters are % encoded as long as
1772  *      Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and
1773  *      it isn't an unknown scheme type.
1774  *
1775  *  4)  If it's a file scheme and the host is "localhost" it's removed.
1776  */
1777 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri,
1778                                   DWORD flags, BOOL computeOnly) {
1779     static const WCHAR localhostW[] =
1780             {'l','o','c','a','l','h','o','s','t',0};
1781     const WCHAR *ptr;
1782     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1783
1784     uri->host_start = uri->canon_len;
1785
1786     if(data->scheme_type == URL_SCHEME_FILE &&
1787        data->host_len == lstrlenW(localhostW)) {
1788         if(!StrCmpNIW(data->host, localhostW, data->host_len)) {
1789             uri->host_start = -1;
1790             uri->host_len = 0;
1791             uri->host_type = Uri_HOST_UNKNOWN;
1792             return TRUE;
1793         }
1794     }
1795
1796     for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) {
1797         if(*ptr == '%' && known_scheme) {
1798             WCHAR val = decode_pct_val(ptr);
1799             if(is_unreserved(val)) {
1800                 /* If NO_CANONICALZE is not set, then windows lower cases the
1801                  * decoded value.
1802                  */
1803                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) {
1804                     if(!computeOnly)
1805                         uri->canon_uri[uri->canon_len] = tolowerW(val);
1806                 } else {
1807                     if(!computeOnly)
1808                         uri->canon_uri[uri->canon_len] = val;
1809                 }
1810                 ++uri->canon_len;
1811
1812                 /* Skip past the % encoded character. */
1813                 ptr += 2;
1814                 continue;
1815             } else {
1816                 /* Just copy the % over. */
1817                 if(!computeOnly)
1818                     uri->canon_uri[uri->canon_len] = *ptr;
1819                 ++uri->canon_len;
1820             }
1821         } else if(*ptr == '\\') {
1822             /* Only unknown scheme types could have made it here with a '\\' in the host name. */
1823             if(!computeOnly)
1824                 uri->canon_uri[uri->canon_len] = *ptr;
1825             ++uri->canon_len;
1826         } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
1827                   !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) {
1828             if(!computeOnly) {
1829                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
1830
1831                 /* The percent encoded value gets lower cased also. */
1832                 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
1833                     uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]);
1834                     uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]);
1835                 }
1836             }
1837
1838             uri->canon_len += 3;
1839         } else {
1840             if(!computeOnly) {
1841                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme)
1842                     uri->canon_uri[uri->canon_len] = tolowerW(*ptr);
1843                 else
1844                     uri->canon_uri[uri->canon_len] = *ptr;
1845             }
1846
1847             ++uri->canon_len;
1848         }
1849     }
1850
1851     uri->host_len = uri->canon_len - uri->host_start;
1852
1853     if(!computeOnly)
1854         TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags,
1855             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
1856             uri->host_len);
1857
1858     if(!computeOnly)
1859         find_domain_name(uri->canon_uri+uri->host_start, uri->host_len,
1860             &(uri->domain_offset));
1861
1862     return TRUE;
1863 }
1864
1865 /* Attempts to canonicalize an implicit IPv4 address. */
1866 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
1867     uri->host_start = uri->canon_len;
1868
1869     TRACE("%u\n", data->implicit_ipv4);
1870     /* For unknown scheme types Window's doesn't convert
1871      * the value into an IP address, but, it still considers
1872      * it an IPv4 address.
1873      */
1874     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
1875         if(!computeOnly)
1876             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
1877         uri->canon_len += data->host_len;
1878     } else {
1879         if(!computeOnly)
1880             uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4);
1881         else
1882             uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4);
1883     }
1884
1885     uri->host_len = uri->canon_len - uri->host_start;
1886     uri->host_type = Uri_HOST_IPV4;
1887
1888     if(!computeOnly)
1889         TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n",
1890             data, uri, flags, computeOnly,
1891             debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
1892             uri->host_len);
1893
1894     return TRUE;
1895 }
1896
1897 /* Attempts to canonicalize an IPv4 address.
1898  *
1899  * If the parse_data represents a URI that has an implicit IPv4 address
1900  * (ex. http://256/, this function will convert 256 into 0.0.1.0). If
1901  * the implicit IP address exceeds the value of UINT_MAX (maximum value
1902  * for an IPv4 address) it's canonicalized as if were a reg-name.
1903  *
1904  * If the parse_data contains a partial or full IPv4 address it normalizes it.
1905  * A partial IPv4 address is something like "192.0" and would be normalized to
1906  * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would
1907  * be normalized to "192.2.1.3".
1908  *
1909  * NOTES:
1910  *  Window's ONLY normalizes IPv4 address for known scheme types (one that isn't
1911  *  URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from
1912  *  the original URI into the canonicalized URI, but, it still recognizes URI's
1913  *  host type as HOST_IPV4.
1914  */
1915 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
1916     if(data->has_implicit_ip)
1917         return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly);
1918     else {
1919         uri->host_start = uri->canon_len;
1920
1921         /* Windows only normalizes for known scheme types. */
1922         if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1923             /* parse_data contains a partial or full IPv4 address, so normalize it. */
1924             DWORD i, octetDigitCount = 0, octetCount = 0;
1925             BOOL octetHasDigit = FALSE;
1926
1927             for(i = 0; i < data->host_len; ++i) {
1928                 if(data->host[i] == '0' && !octetHasDigit) {
1929                     /* Can ignore leading zeros if:
1930                      *  1) It isn't the last digit of the octet.
1931                      *  2) i+1 != data->host_len
1932                      *  3) i+1 != '.'
1933                      */
1934                     if(octetDigitCount == 2 ||
1935                        i+1 == data->host_len ||
1936                        data->host[i+1] == '.') {
1937                         if(!computeOnly)
1938                             uri->canon_uri[uri->canon_len] = data->host[i];
1939                         ++uri->canon_len;
1940                         TRACE("Adding zero\n");
1941                     }
1942                 } else if(data->host[i] == '.') {
1943                     if(!computeOnly)
1944                         uri->canon_uri[uri->canon_len] = data->host[i];
1945                     ++uri->canon_len;
1946
1947                     octetDigitCount = 0;
1948                     octetHasDigit = FALSE;
1949                     ++octetCount;
1950                 } else {
1951                     if(!computeOnly)
1952                         uri->canon_uri[uri->canon_len] = data->host[i];
1953                     ++uri->canon_len;
1954
1955                     ++octetDigitCount;
1956                     octetHasDigit = TRUE;
1957                 }
1958             }
1959
1960             /* Make sure the canonicalized IP address has 4 dec-octets.
1961              * If doesn't add "0" ones until there is 4;
1962              */
1963             for( ; octetCount < 3; ++octetCount) {
1964                 if(!computeOnly) {
1965                     uri->canon_uri[uri->canon_len] = '.';
1966                     uri->canon_uri[uri->canon_len+1] = '0';
1967                 }
1968
1969                 uri->canon_len += 2;
1970             }
1971         } else {
1972             /* Windows doesn't normalize addresses in unknown schemes. */
1973             if(!computeOnly)
1974                 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
1975             uri->canon_len += data->host_len;
1976         }
1977
1978         uri->host_len = uri->canon_len - uri->host_start;
1979         if(!computeOnly)
1980             TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n",
1981                 data, uri, flags, computeOnly,
1982                 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
1983                 uri->host_len);
1984     }
1985
1986     return TRUE;
1987 }
1988
1989 /* Attempts to canonicalize the IPv6 address of the URI.
1990  *
1991  * Multiple things happen during the canonicalization of an IPv6 address:
1992  *  1)  Any leading zero's in an h16 component are removed.
1993  *      Ex: [0001:0022::] -> [1:22::]
1994  *
1995  *  2)  The longest sequence of zero h16 components are compressed
1996  *      into a "::" (elision). If there's a tie, the first is choosen.
1997  *
1998  *      Ex: [0:0:0:0:1:6:7:8]   -> [::1:6:7:8]
1999  *          [0:0:0:0:1:2::]     -> [::1:2:0:0]
2000  *          [0:0:1:2:0:0:7:8]   -> [::1:2:0:0:7:8]
2001  *
2002  *  3)  If an IPv4 address is attached to the IPv6 address, it's
2003  *      also normalized.
2004  *      Ex: [::001.002.022.000] -> [::1.2.22.0]
2005  *
2006  *  4)  If an elision is present, but, only represents 1 h16 component
2007  *      it's expanded.
2008  *
2009  *      Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
2010  *
2011  *  5)  If the IPv6 address contains an IPv4 address and there exists
2012  *      at least 1 non-zero h16 component the IPv4 address is converted
2013  *      into two h16 components, otherwise it's normalized and kept as is.
2014  *
2015  *      Ex: [::192.200.003.4]       -> [::192.200.3.4]
2016  *          [ffff::192.200.003.4]   -> [ffff::c0c8:3041]
2017  *
2018  * NOTE:
2019  *  For unknown scheme types Windows simply copies the address over without any
2020  *  changes.
2021  *
2022  *  IPv4 address can be included in an elision if all its components are 0's.
2023  */
2024 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri,
2025                                      DWORD flags, BOOL computeOnly) {
2026     uri->host_start = uri->canon_len;
2027
2028     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2029         if(!computeOnly)
2030             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2031         uri->canon_len += data->host_len;
2032     } else {
2033         USHORT values[8];
2034         INT elision_start;
2035         DWORD i, elision_len;
2036
2037         if(!ipv6_to_number(&(data->ipv6_address), values)) {
2038             TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n",
2039                 data, uri, flags, computeOnly);
2040             return FALSE;
2041         }
2042
2043         if(!computeOnly)
2044             uri->canon_uri[uri->canon_len] = '[';
2045         ++uri->canon_len;
2046
2047         /* Find where the elision should occur (if any). */
2048         compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len);
2049
2050         TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags,
2051             computeOnly, elision_start, elision_len);
2052
2053         for(i = 0; i < 8; ++i) {
2054             BOOL in_elision = (elision_start > -1 && i >= elision_start &&
2055                                i < elision_start+elision_len);
2056             BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision &&
2057                             data->ipv6_address.h16_count == 0);
2058
2059             if(i == elision_start) {
2060                 if(!computeOnly) {
2061                     uri->canon_uri[uri->canon_len] = ':';
2062                     uri->canon_uri[uri->canon_len+1] = ':';
2063                 }
2064                 uri->canon_len += 2;
2065             }
2066
2067             /* We can ignore the current component if we're in the elision. */
2068             if(in_elision)
2069                 continue;
2070
2071             /* We only add a ':' if we're not at i == 0, or when we're at
2072              * the very end of elision range since the ':' colon was handled
2073              * earlier. Otherwise we would end up with ":::" after elision.
2074              */
2075             if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) {
2076                 if(!computeOnly)
2077                     uri->canon_uri[uri->canon_len] = ':';
2078                 ++uri->canon_len;
2079             }
2080
2081             if(do_ipv4) {
2082                 UINT val;
2083                 DWORD len;
2084
2085                 /* Combine the two parts of the IPv4 address values. */
2086                 val = values[i];
2087                 val <<= 16;
2088                 val += values[i+1];
2089
2090                 if(!computeOnly)
2091                     len = ui2ipv4(uri->canon_uri+uri->canon_len, val);
2092                 else
2093                     len = ui2ipv4(NULL, val);
2094
2095                 uri->canon_len += len;
2096                 ++i;
2097             } else {
2098                 /* Write a regular h16 component to the URI. */
2099
2100                 /* Short circuit for the trivial case. */
2101                 if(values[i] == 0) {
2102                     if(!computeOnly)
2103                         uri->canon_uri[uri->canon_len] = '0';
2104                     ++uri->canon_len;
2105                 } else {
2106                     static const WCHAR formatW[] = {'%','x',0};
2107
2108                     if(!computeOnly)
2109                         uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len,
2110                                             formatW, values[i]);
2111                     else {
2112                         WCHAR tmp[5];
2113                         uri->canon_len += sprintfW(tmp, formatW, values[i]);
2114                     }
2115                 }
2116             }
2117         }
2118
2119         /* Add the closing ']'. */
2120         if(!computeOnly)
2121             uri->canon_uri[uri->canon_len] = ']';
2122         ++uri->canon_len;
2123     }
2124
2125     uri->host_len = uri->canon_len - uri->host_start;
2126
2127     if(!computeOnly)
2128         TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags,
2129             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2130             uri->host_len);
2131
2132     return TRUE;
2133 }
2134
2135 /* Attempts to canonicalize the host of the URI (if any). */
2136 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2137     uri->host_start = -1;
2138     uri->host_len = 0;
2139     uri->domain_offset = -1;
2140
2141     if(data->host) {
2142         switch(data->host_type) {
2143         case Uri_HOST_DNS:
2144             uri->host_type = Uri_HOST_DNS;
2145             if(!canonicalize_reg_name(data, uri, flags, computeOnly))
2146                 return FALSE;
2147
2148             break;
2149         case Uri_HOST_IPV4:
2150             uri->host_type = Uri_HOST_IPV4;
2151             if(!canonicalize_ipv4address(data, uri, flags, computeOnly))
2152                 return FALSE;
2153
2154             break;
2155         case Uri_HOST_IPV6:
2156             if(!canonicalize_ipv6address(data, uri, flags, computeOnly))
2157                 return FALSE;
2158
2159             uri->host_type = Uri_HOST_IPV6;
2160             break;
2161         case Uri_HOST_UNKNOWN:
2162             if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) {
2163                 uri->host_start = uri->canon_len;
2164
2165                 /* Nothing happens to unknown host types. */
2166                 if(!computeOnly)
2167                     memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2168                 uri->canon_len += data->host_len;
2169                 uri->host_len = data->host_len;
2170             }
2171
2172             uri->host_type = Uri_HOST_UNKNOWN;
2173             break;
2174         default:
2175             FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data,
2176                     uri, flags, computeOnly, data->host_type);
2177             return FALSE;
2178        }
2179    }
2180
2181    return TRUE;
2182 }
2183
2184 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2185     BOOL has_default_port = FALSE;
2186     USHORT default_port = 0;
2187     DWORD i;
2188
2189     uri->has_port = FALSE;
2190
2191     /* Check if the scheme has a default port. */
2192     for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
2193         if(default_ports[i].scheme == data->scheme_type) {
2194             has_default_port = TRUE;
2195             default_port = default_ports[i].port;
2196             break;
2197         }
2198     }
2199
2200     if(data->port || has_default_port)
2201         uri->has_port = TRUE;
2202
2203     /* Possible cases:
2204      *  1)  Has a port which is the default port.
2205      *  2)  Has a port (not the default).
2206      *  3)  Doesn't have a port, but, scheme has a default port.
2207      *  4)  No port.
2208      */
2209     if(has_default_port && data->port && data->port_value == default_port) {
2210         /* If it's the default port and this flag isn't set, don't do anything. */
2211         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2212             /* Copy the original port over. */
2213             if(!computeOnly) {
2214                 uri->canon_uri[uri->canon_len] = ':';
2215                 memcpy(uri->canon_uri+uri->canon_len+1, data->port, data->port_len*sizeof(WCHAR));
2216             }
2217             uri->canon_len += data->port_len+1;
2218         }
2219
2220         uri->port = default_port;
2221     } else if(data->port) {
2222         if(!computeOnly)
2223             uri->canon_uri[uri->canon_len] = ':';
2224         ++uri->canon_len;
2225
2226         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2227             /* Copy the original over without changes. */
2228             if(!computeOnly)
2229                 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR));
2230             uri->canon_len += data->port_len;
2231         } else {
2232             const WCHAR formatW[] = {'%','u',0};
2233             INT len = 0;
2234             if(!computeOnly)
2235                 len = sprintfW(uri->canon_uri+uri->canon_len, formatW, data->port_value);
2236             else {
2237                 WCHAR tmp[6];
2238                 len = sprintfW(tmp, formatW, data->port_value);
2239             }
2240             uri->canon_len += len;
2241         }
2242
2243         uri->port = data->port_value;
2244     } else if(has_default_port)
2245         uri->port = default_port;
2246
2247     return TRUE;
2248 }
2249
2250 /* Canonicalizes the authority of the URI represented by the parse_data. */
2251 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2252     uri->authority_start = uri->canon_len;
2253     uri->authority_len = 0;
2254
2255     if(!canonicalize_userinfo(data, uri, flags, computeOnly))
2256         return FALSE;
2257
2258     if(!canonicalize_host(data, uri, flags, computeOnly))
2259         return FALSE;
2260
2261     if(!canonicalize_port(data, uri, flags, computeOnly))
2262         return FALSE;
2263
2264     if(uri->host_start != -1)
2265         uri->authority_len = uri->canon_len - uri->authority_start;
2266     else
2267         uri->authority_start = -1;
2268
2269     return TRUE;
2270 }
2271
2272 /* Attempts to canonicalize the path of a hierarchical URI.
2273  *
2274  * Things that happen:
2275  *  1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN
2276  *      flag is set or it's a file URI. Forbidden characters are always encoded
2277  *      for file schemes reguardless and forbidden characters are never encoded
2278  *      for unknown scheme types.
2279  *
2280  *  2). For known scheme types '\\' are changed to '/'.
2281  *
2282  *  3). Percent encoded, unreserved characters are decoded to their actual values.
2283  *      Unless the scheme type is unknown. For file schemes any percent encoded
2284  *      character in the unreserved or reserved set is decoded.
2285  *
2286  *  4). For File schemes if the path is starts with a drive letter and doesn't
2287  *      start with a '/' then one is appended.
2288  *      Ex: file://c:/test.mp3 -> file:///c:/test.mp3
2289  *
2290  *  5). Dot segments are removed from the path for all scheme types
2291  *      unless NO_CANONICALIZE flag is set. Dot segments aren't removed
2292  *      for wildcard scheme types.
2293  *
2294  * NOTES:
2295  *      file://c:/test%20test   -> file:///c:/test%2520test
2296  *      file://c:/test%3Etest   -> file:///c:/test%253Etest
2297  *      file:///c:/test%20test  -> file:///c:/test%20test
2298  *      file:///c:/test%test    -> file:///c:/test%25test
2299  */
2300 static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
2301                                            DWORD flags, BOOL computeOnly) {
2302     const WCHAR *ptr;
2303     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2304     const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
2305
2306     BOOL escape_pct = FALSE;
2307
2308     if(!data->path) {
2309         uri->path_start = -1;
2310         uri->path_len = 0;
2311         return TRUE;
2312     }
2313
2314     uri->path_start = uri->canon_len;
2315
2316     /* Check if a '/' needs to be appended for the file scheme. */
2317     if(is_file) {
2318         if(data->path_len > 1 && is_alpha(*(data->path)) &&
2319            *(data->path+1) == ':') {
2320             if(!computeOnly)
2321                 uri->canon_uri[uri->canon_len] = '/';
2322             uri->canon_len++;
2323             escape_pct = TRUE;
2324         }
2325     }
2326
2327     for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
2328         if(*ptr == '%') {
2329             const WCHAR *tmp = ptr;
2330             WCHAR val;
2331
2332             /* Check if the % represents a valid encoded char, or if it needs encoded. */
2333             BOOL force_encode = !check_pct_encoded(&tmp) && is_file;
2334             val = decode_pct_val(ptr);
2335
2336             if(force_encode || escape_pct) {
2337                 /* Escape the percent sign in the file URI. */
2338                 if(!computeOnly)
2339                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2340                 uri->canon_len += 3;
2341             } else if((is_unreserved(val) && known_scheme) ||
2342                       (is_file && (is_unreserved(val) || is_reserved(val)))) {
2343                 if(!computeOnly)
2344                     uri->canon_uri[uri->canon_len] = val;
2345                 ++uri->canon_len;
2346
2347                 ptr += 2;
2348                 continue;
2349             } else {
2350                 if(!computeOnly)
2351                     uri->canon_uri[uri->canon_len] = *ptr;
2352                 ++uri->canon_len;
2353             }
2354         } else if(*ptr == '\\' && known_scheme) {
2355             if(!computeOnly)
2356                 uri->canon_uri[uri->canon_len] = '/';
2357             ++uri->canon_len;
2358         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
2359                   (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
2360             /* Escape the forbidden character. */
2361             if(!computeOnly)
2362                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2363             uri->canon_len += 3;
2364         } else {
2365             if(!computeOnly)
2366                 uri->canon_uri[uri->canon_len] = *ptr;
2367             ++uri->canon_len;
2368         }
2369     }
2370
2371     uri->path_len = uri->canon_len - uri->path_start;
2372
2373     if(!computeOnly)
2374         TRACE("Canonicalized path %s len=%d\n",
2375             debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len),
2376             uri->path_len);
2377
2378     return TRUE;
2379 }
2380
2381 /* Determines how the URI represented by the parse_data should be canonicalized.
2382  *
2383  * Essentially, if the parse_data represents an hierarchical URI then it calls
2384  * canonicalize_authority and the canonicalization functions for the path. If the
2385  * URI is opaque it canonicalizes the path of the URI.
2386  */
2387 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2388     if(!data->is_opaque) {
2389         /* "//" is only added for non-wildcard scheme types. */
2390         if(data->scheme_type != URL_SCHEME_WILDCARD) {
2391             if(!computeOnly) {
2392                 INT pos = uri->canon_len;
2393
2394                 uri->canon_uri[pos] = '/';
2395                 uri->canon_uri[pos+1] = '/';
2396            }
2397            uri->canon_len += 2;
2398         }
2399
2400         if(!canonicalize_authority(data, uri, flags, computeOnly))
2401             return FALSE;
2402
2403         /* TODO: Canonicalize the path of the URI. */
2404         if(!canonicalize_path_hierarchical(data, uri, flags, computeOnly))
2405             return FALSE;
2406
2407     } else {
2408         /* Opaque URI's don't have an authority. */
2409         uri->userinfo_start = uri->userinfo_split = -1;
2410         uri->userinfo_len = 0;
2411         uri->host_start = -1;
2412         uri->host_len = 0;
2413         uri->host_type = Uri_HOST_UNKNOWN;
2414         uri->has_port = FALSE;
2415         uri->authority_start = -1;
2416         uri->authority_len = 0;
2417         uri->domain_offset = -1;
2418     }
2419
2420     return TRUE;
2421 }
2422
2423 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */
2424 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2425     uri->scheme_start = -1;
2426     uri->scheme_len = 0;
2427
2428     if(!data->scheme) {
2429         /* The only type of URI that doesn't have to have a scheme is a relative
2430          * URI.
2431          */
2432         if(!data->is_relative) {
2433             FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data,
2434                     uri, flags, debugstr_w(data->uri));
2435             return FALSE;
2436         }
2437     } else {
2438         if(!computeOnly) {
2439             DWORD i;
2440             INT pos = uri->canon_len;
2441
2442             for(i = 0; i < data->scheme_len; ++i) {
2443                 /* Scheme name must be lower case after canonicalization. */
2444                 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]);
2445             }
2446
2447             uri->canon_uri[i + pos] = ':';
2448             uri->scheme_start = pos;
2449
2450             TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags,
2451                     debugstr_wn(uri->canon_uri,  uri->scheme_len), data->scheme_len);
2452         }
2453
2454         /* This happens in both computation modes. */
2455         uri->canon_len += data->scheme_len + 1;
2456         uri->scheme_len = data->scheme_len;
2457     }
2458     return TRUE;
2459 }
2460
2461 /* Compute's what the length of the URI specified by the parse_data will be
2462  * after canonicalization occurs using the specified flags.
2463  *
2464  * This function will return a non-zero value indicating the length of the canonicalized
2465  * URI, or -1 on error.
2466  */
2467 static int compute_canonicalized_length(const parse_data *data, DWORD flags) {
2468     Uri uri;
2469
2470     memset(&uri, 0, sizeof(Uri));
2471
2472     TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags,
2473             debugstr_w(data->uri));
2474
2475     if(!canonicalize_scheme(data, &uri, flags, TRUE)) {
2476         ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags);
2477         return -1;
2478     }
2479
2480     if(!canonicalize_hierpart(data, &uri, flags, TRUE)) {
2481         ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags);
2482         return -1;
2483     }
2484
2485     TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len);
2486
2487     return uri.canon_len;
2488 }
2489
2490 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the
2491  * canonicalization succeededs it will store all the canonicalization information
2492  * in the pointer to the Uri.
2493  *
2494  * To canonicalize a URI this function first computes what the length of the URI
2495  * specified by the parse_data will be. Once this is done it will then perfom the actual
2496  * canonicalization of the URI.
2497  */
2498 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
2499     INT len;
2500
2501     uri->canon_uri = NULL;
2502     len = uri->canon_size = uri->canon_len = 0;
2503
2504     TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri));
2505
2506     /* First try to compute the length of the URI. */
2507     len = compute_canonicalized_length(data, flags);
2508     if(len == -1) {
2509         ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags,
2510                 debugstr_w(data->uri));
2511         return E_INVALIDARG;
2512     }
2513
2514     uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR));
2515     if(!uri->canon_uri)
2516         return E_OUTOFMEMORY;
2517
2518     if(!canonicalize_scheme(data, uri, flags, FALSE)) {
2519         ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags);
2520         heap_free(uri->canon_uri);
2521         return E_INVALIDARG;
2522     }
2523     uri->scheme_type = data->scheme_type;
2524
2525     if(!canonicalize_hierpart(data, uri, flags, FALSE)) {
2526         ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags);
2527         heap_free(uri->canon_uri);
2528         return E_INVALIDARG;
2529     }
2530
2531     uri->canon_uri[uri->canon_len] = '\0';
2532     TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));
2533
2534     return S_OK;
2535 }
2536
2537 #define URI(x)         ((IUri*)  &(x)->lpIUriVtbl)
2538 #define URIBUILDER(x)  ((IUriBuilder*)  &(x)->lpIUriBuilderVtbl)
2539
2540 #define URI_THIS(iface) DEFINE_THIS(Uri, IUri, iface)
2541
2542 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv)
2543 {
2544     Uri *This = URI_THIS(iface);
2545
2546     if(IsEqualGUID(&IID_IUnknown, riid)) {
2547         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
2548         *ppv = URI(This);
2549     }else if(IsEqualGUID(&IID_IUri, riid)) {
2550         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
2551         *ppv = URI(This);
2552     }else {
2553         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
2554         *ppv = NULL;
2555         return E_NOINTERFACE;
2556     }
2557
2558     IUnknown_AddRef((IUnknown*)*ppv);
2559     return S_OK;
2560 }
2561
2562 static ULONG WINAPI Uri_AddRef(IUri *iface)
2563 {
2564     Uri *This = URI_THIS(iface);
2565     LONG ref = InterlockedIncrement(&This->ref);
2566
2567     TRACE("(%p) ref=%d\n", This, ref);
2568
2569     return ref;
2570 }
2571
2572 static ULONG WINAPI Uri_Release(IUri *iface)
2573 {
2574     Uri *This = URI_THIS(iface);
2575     LONG ref = InterlockedDecrement(&This->ref);
2576
2577     TRACE("(%p) ref=%d\n", This, ref);
2578
2579     if(!ref) {
2580         SysFreeString(This->raw_uri);
2581         heap_free(This->canon_uri);
2582         heap_free(This);
2583     }
2584
2585     return ref;
2586 }
2587
2588 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags)
2589 {
2590     Uri *This = URI_THIS(iface);
2591     HRESULT hres;
2592     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
2593
2594     if(!pbstrProperty)
2595         return E_POINTER;
2596
2597     if(uriProp > Uri_PROPERTY_STRING_LAST) {
2598         /* Windows allocates an empty BSTR for invalid Uri_PROPERTY's. */
2599         *pbstrProperty = SysAllocStringLen(NULL, 0);
2600         if(!(*pbstrProperty))
2601             return E_OUTOFMEMORY;
2602
2603         /* It only returns S_FALSE for the ZONE property... */
2604         if(uriProp == Uri_PROPERTY_ZONE)
2605             return S_FALSE;
2606         else
2607             return S_OK;
2608     }
2609
2610     /* Don't have support for flags yet. */
2611     if(dwFlags) {
2612         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
2613         return E_NOTIMPL;
2614     }
2615
2616     switch(uriProp) {
2617     case Uri_PROPERTY_AUTHORITY:
2618         if(This->authority_start > -1) {
2619             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len);
2620             hres = S_OK;
2621         } else {
2622             *pbstrProperty = SysAllocStringLen(NULL, 0);
2623             hres = S_FALSE;
2624         }
2625
2626         if(!(*pbstrProperty))
2627             hres = E_OUTOFMEMORY;
2628
2629         break;
2630     case Uri_PROPERTY_DOMAIN:
2631         if(This->domain_offset > -1) {
2632             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset,
2633                                                This->host_len-This->domain_offset);
2634             hres = S_OK;
2635         } else {
2636             *pbstrProperty = SysAllocStringLen(NULL, 0);
2637             hres = S_FALSE;
2638         }
2639
2640         if(!(*pbstrProperty))
2641             hres = E_OUTOFMEMORY;
2642
2643         break;
2644     case Uri_PROPERTY_HOST:
2645         if(This->host_start > -1) {
2646             /* The '[' and ']' aren't included for IPv6 addresses. */
2647             if(This->host_type == Uri_HOST_IPV6)
2648                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2);
2649             else
2650                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len);
2651
2652             hres = S_OK;
2653         } else {
2654             *pbstrProperty = SysAllocStringLen(NULL, 0);
2655             hres = S_FALSE;
2656         }
2657
2658         if(!(*pbstrProperty))
2659             hres = E_OUTOFMEMORY;
2660
2661         break;
2662     case Uri_PROPERTY_PASSWORD:
2663         if(This->userinfo_split > -1) {
2664             *pbstrProperty = SysAllocStringLen(
2665                 This->canon_uri+This->userinfo_start+This->userinfo_split+1,
2666                 This->userinfo_len-This->userinfo_split-1);
2667             hres = S_OK;
2668         } else {
2669             *pbstrProperty = SysAllocStringLen(NULL, 0);
2670             hres = S_FALSE;
2671         }
2672
2673         if(!(*pbstrProperty))
2674             return E_OUTOFMEMORY;
2675
2676         break;
2677     case Uri_PROPERTY_RAW_URI:
2678         *pbstrProperty = SysAllocString(This->raw_uri);
2679         if(!(*pbstrProperty))
2680             hres = E_OUTOFMEMORY;
2681         else
2682             hres = S_OK;
2683         break;
2684     case Uri_PROPERTY_SCHEME_NAME:
2685         if(This->scheme_start > -1) {
2686             *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len);
2687             hres = S_OK;
2688         } else {
2689             *pbstrProperty = SysAllocStringLen(NULL, 0);
2690             hres = S_FALSE;
2691         }
2692
2693         if(!(*pbstrProperty))
2694             hres = E_OUTOFMEMORY;
2695
2696         break;
2697     case Uri_PROPERTY_USER_INFO:
2698         if(This->userinfo_start > -1) {
2699             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len);
2700             hres = S_OK;
2701         } else {
2702             *pbstrProperty = SysAllocStringLen(NULL, 0);
2703             hres = S_FALSE;
2704         }
2705
2706         if(!(*pbstrProperty))
2707             hres = E_OUTOFMEMORY;
2708
2709         break;
2710     case Uri_PROPERTY_USER_NAME:
2711         if(This->userinfo_start > -1) {
2712             /* If userinfo_split is set, that means a password exists
2713              * so the username is only from userinfo_start to userinfo_split.
2714              */
2715             if(This->userinfo_split > -1) {
2716                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split);
2717                 hres = S_OK;
2718             } else {
2719                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len);
2720                 hres = S_OK;
2721             }
2722         } else {
2723             *pbstrProperty = SysAllocStringLen(NULL, 0);
2724             hres = S_FALSE;
2725         }
2726
2727         if(!(*pbstrProperty))
2728             return E_OUTOFMEMORY;
2729
2730         break;
2731     default:
2732         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
2733         hres = E_NOTIMPL;
2734     }
2735
2736     return hres;
2737 }
2738
2739 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
2740 {
2741     Uri *This = URI_THIS(iface);
2742     HRESULT hres;
2743     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2744
2745     if(!pcchProperty)
2746         return E_INVALIDARG;
2747
2748     /* Can only return a length for a property if it's a string. */
2749     if(uriProp > Uri_PROPERTY_STRING_LAST)
2750         return E_INVALIDARG;
2751
2752     /* Don't have support for flags yet. */
2753     if(dwFlags) {
2754         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2755         return E_NOTIMPL;
2756     }
2757
2758     switch(uriProp) {
2759     case Uri_PROPERTY_AUTHORITY:
2760         *pcchProperty = This->authority_len;
2761         hres = (This->authority_start > -1) ? S_OK : S_FALSE;
2762         break;
2763     case Uri_PROPERTY_DOMAIN:
2764         if(This->domain_offset > -1)
2765             *pcchProperty = This->host_len - This->domain_offset;
2766         else
2767             *pcchProperty = 0;
2768
2769         hres = (This->domain_offset > -1) ? S_OK : S_FALSE;
2770         break;
2771     case Uri_PROPERTY_HOST:
2772         *pcchProperty = This->host_len;
2773
2774         /* '[' and ']' aren't included in the length. */
2775         if(This->host_type == Uri_HOST_IPV6)
2776             *pcchProperty -= 2;
2777
2778         hres = (This->host_start > -1) ? S_OK : S_FALSE;
2779         break;
2780     case Uri_PROPERTY_PASSWORD:
2781         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0;
2782         hres = (This->userinfo_split > -1) ? S_OK : S_FALSE;
2783         break;
2784     case Uri_PROPERTY_RAW_URI:
2785         *pcchProperty = SysStringLen(This->raw_uri);
2786         hres = S_OK;
2787         break;
2788     case Uri_PROPERTY_SCHEME_NAME:
2789         *pcchProperty = This->scheme_len;
2790         hres = (This->scheme_start > -1) ? S_OK : S_FALSE;
2791         break;
2792     case Uri_PROPERTY_USER_INFO:
2793         *pcchProperty = This->userinfo_len;
2794         hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
2795         break;
2796     case Uri_PROPERTY_USER_NAME:
2797         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len;
2798         hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
2799         break;
2800     default:
2801         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2802         hres = E_NOTIMPL;
2803     }
2804
2805     return hres;
2806 }
2807
2808 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
2809 {
2810     Uri *This = URI_THIS(iface);
2811     HRESULT hres;
2812
2813     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2814
2815     if(!pcchProperty)
2816         return E_INVALIDARG;
2817
2818     /* Microsoft's implementation for the ZONE property of a URI seems to be lacking...
2819      * From what I can tell, instead of checking which URLZONE the URI belongs to it
2820      * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone
2821      * function.
2822      */
2823     if(uriProp == Uri_PROPERTY_ZONE) {
2824         *pcchProperty = URLZONE_INVALID;
2825         return E_NOTIMPL;
2826     }
2827
2828     if(uriProp < Uri_PROPERTY_DWORD_START) {
2829         *pcchProperty = 0;
2830         return E_INVALIDARG;
2831     }
2832
2833     switch(uriProp) {
2834     case Uri_PROPERTY_HOST_TYPE:
2835         *pcchProperty = This->host_type;
2836         hres = S_OK;
2837         break;
2838     case Uri_PROPERTY_PORT:
2839         if(!This->has_port) {
2840             *pcchProperty = 0;
2841             hres = S_FALSE;
2842         } else {
2843             *pcchProperty = This->port;
2844             hres = S_OK;
2845         }
2846
2847         break;
2848     case Uri_PROPERTY_SCHEME:
2849         *pcchProperty = This->scheme_type;
2850         hres = S_OK;
2851         break;
2852     default:
2853         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
2854         hres = E_NOTIMPL;
2855     }
2856
2857     return hres;
2858 }
2859
2860 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty)
2861 {
2862     Uri *This = URI_THIS(iface);
2863     FIXME("(%p)->(%d %p)\n", This, uriProp, pfHasProperty);
2864
2865     if(!pfHasProperty)
2866         return E_INVALIDARG;
2867
2868     return E_NOTIMPL;
2869 }
2870
2871 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri)
2872 {
2873     Uri *This = URI_THIS(iface);
2874     FIXME("(%p)->(%p)\n", This, pstrAbsoluteUri);
2875
2876     if(!pstrAbsoluteUri)
2877         return E_POINTER;
2878
2879     return E_NOTIMPL;
2880 }
2881
2882 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority)
2883 {
2884     TRACE("(%p)->(%p)\n", iface, pstrAuthority);
2885     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0);
2886 }
2887
2888 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri)
2889 {
2890     Uri *This = URI_THIS(iface);
2891     FIXME("(%p)->(%p)\n", This, pstrDisplayUri);
2892
2893     if(!pstrDisplayUri)
2894         return E_POINTER;
2895
2896     return E_NOTIMPL;
2897 }
2898
2899 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain)
2900 {
2901     TRACE("(%p)->(%p)\n", iface, pstrDomain);
2902     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0);
2903 }
2904
2905 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension)
2906 {
2907     Uri *This = URI_THIS(iface);
2908     FIXME("(%p)->(%p)\n", This, pstrExtension);
2909
2910     if(!pstrExtension)
2911         return E_POINTER;
2912
2913     return E_NOTIMPL;
2914 }
2915
2916 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment)
2917 {
2918     Uri *This = URI_THIS(iface);
2919     FIXME("(%p)->(%p)\n", This, pstrFragment);
2920
2921     if(!pstrFragment)
2922         return E_POINTER;
2923
2924     return E_NOTIMPL;
2925 }
2926
2927 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost)
2928 {
2929     TRACE("(%p)->(%p)\n", iface, pstrHost);
2930     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0);
2931 }
2932
2933 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword)
2934 {
2935     TRACE("(%p)->(%p)\n", iface, pstrPassword);
2936     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0);
2937 }
2938
2939 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath)
2940 {
2941     Uri *This = URI_THIS(iface);
2942     FIXME("(%p)->(%p)\n", This, pstrPath);
2943
2944     if(!pstrPath)
2945         return E_POINTER;
2946
2947     return E_NOTIMPL;
2948 }
2949
2950 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery)
2951 {
2952     Uri *This = URI_THIS(iface);
2953     FIXME("(%p)->(%p)\n", This, pstrPathAndQuery);
2954
2955     if(!pstrPathAndQuery)
2956         return E_POINTER;
2957
2958     return E_NOTIMPL;
2959 }
2960
2961 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery)
2962 {
2963     Uri *This = URI_THIS(iface);
2964     FIXME("(%p)->(%p)\n", This, pstrQuery);
2965
2966     if(!pstrQuery)
2967         return E_POINTER;
2968
2969     return E_NOTIMPL;
2970 }
2971
2972 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri)
2973 {
2974     Uri *This = URI_THIS(iface);
2975     TRACE("(%p)->(%p)\n", This, pstrRawUri);
2976
2977     /* Just forward the call to GetPropertyBSTR. */
2978     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0);
2979 }
2980
2981 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName)
2982 {
2983     Uri *This = URI_THIS(iface);
2984     TRACE("(%p)->(%p)\n", This, pstrSchemeName);
2985     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0);
2986 }
2987
2988 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo)
2989 {
2990     TRACE("(%p)->(%p)\n", iface, pstrUserInfo);
2991     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0);
2992 }
2993
2994 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName)
2995 {
2996     TRACE("(%p)->(%p)\n", iface, pstrUserName);
2997     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0);
2998 }
2999
3000 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType)
3001 {
3002     TRACE("(%p)->(%p)\n", iface, pdwHostType);
3003     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0);
3004 }
3005
3006 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort)
3007 {
3008     TRACE("(%p)->(%p)\n", iface, pdwPort);
3009     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0);
3010 }
3011
3012 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme)
3013 {
3014     Uri *This = URI_THIS(iface);
3015     TRACE("(%p)->(%p)\n", This, pdwScheme);
3016     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0);
3017 }
3018
3019 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone)
3020 {
3021     TRACE("(%p)->(%p)\n", iface, pdwZone);
3022     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0);
3023 }
3024
3025 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties)
3026 {
3027     Uri *This = URI_THIS(iface);
3028     FIXME("(%p)->(%p)\n", This, pdwProperties);
3029
3030     if(!pdwProperties)
3031         return E_INVALIDARG;
3032
3033     return E_NOTIMPL;
3034 }
3035
3036 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual)
3037 {
3038     Uri *This = URI_THIS(iface);
3039     TRACE("(%p)->(%p %p)\n", This, pUri, pfEqual);
3040
3041     if(!pfEqual)
3042         return E_POINTER;
3043
3044     if(!pUri) {
3045         *pfEqual = FALSE;
3046
3047         /* For some reason Windows returns S_OK here... */
3048         return S_OK;
3049     }
3050
3051     FIXME("(%p)->(%p %p)\n", This, pUri, pfEqual);
3052     return E_NOTIMPL;
3053 }
3054
3055 #undef URI_THIS
3056
3057 static const IUriVtbl UriVtbl = {
3058     Uri_QueryInterface,
3059     Uri_AddRef,
3060     Uri_Release,
3061     Uri_GetPropertyBSTR,
3062     Uri_GetPropertyLength,
3063     Uri_GetPropertyDWORD,
3064     Uri_HasProperty,
3065     Uri_GetAbsoluteUri,
3066     Uri_GetAuthority,
3067     Uri_GetDisplayUri,
3068     Uri_GetDomain,
3069     Uri_GetExtension,
3070     Uri_GetFragment,
3071     Uri_GetHost,
3072     Uri_GetPassword,
3073     Uri_GetPath,
3074     Uri_GetPathAndQuery,
3075     Uri_GetQuery,
3076     Uri_GetRawUri,
3077     Uri_GetSchemeName,
3078     Uri_GetUserInfo,
3079     Uri_GetUserName,
3080     Uri_GetHostType,
3081     Uri_GetPort,
3082     Uri_GetScheme,
3083     Uri_GetZone,
3084     Uri_GetProperties,
3085     Uri_IsEqual
3086 };
3087
3088 /***********************************************************************
3089  *           CreateUri (urlmon.@)
3090  */
3091 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI)
3092 {
3093     Uri *ret;
3094     HRESULT hr;
3095     parse_data data;
3096
3097     TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI);
3098
3099     if(!ppURI)
3100         return E_INVALIDARG;
3101
3102     if(!pwzURI) {
3103         *ppURI = NULL;
3104         return E_INVALIDARG;
3105     }
3106
3107     ret = heap_alloc(sizeof(Uri));
3108     if(!ret)
3109         return E_OUTOFMEMORY;
3110
3111     ret->lpIUriVtbl = &UriVtbl;
3112     ret->ref = 1;
3113
3114     /* Create a copy of pwzURI and store it as the raw_uri. */
3115     ret->raw_uri = SysAllocString(pwzURI);
3116     if(!ret->raw_uri) {
3117         heap_free(ret);
3118         return E_OUTOFMEMORY;
3119     }
3120
3121     memset(&data, 0, sizeof(parse_data));
3122     data.uri = ret->raw_uri;
3123
3124     /* Validate and parse the URI into it's components. */
3125     if(!parse_uri(&data, dwFlags)) {
3126         /* Encountered an unsupported or invalid URI */
3127         SysFreeString(ret->raw_uri);
3128         heap_free(ret);
3129         *ppURI = NULL;
3130         return E_INVALIDARG;
3131     }
3132
3133     /* Canonicalize the URI. */
3134     hr = canonicalize_uri(&data, ret, dwFlags);
3135     if(FAILED(hr)) {
3136         SysFreeString(ret->raw_uri);
3137         heap_free(ret);
3138         *ppURI = NULL;
3139         return hr;
3140     }
3141
3142     *ppURI = URI(ret);
3143     return S_OK;
3144 }
3145
3146 #define URIBUILDER_THIS(iface) DEFINE_THIS(UriBuilder, IUriBuilder, iface)
3147
3148 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv)
3149 {
3150     UriBuilder *This = URIBUILDER_THIS(iface);
3151
3152     if(IsEqualGUID(&IID_IUnknown, riid)) {
3153         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
3154         *ppv = URIBUILDER(This);
3155     }else if(IsEqualGUID(&IID_IUriBuilder, riid)) {
3156         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
3157         *ppv = URIBUILDER(This);
3158     }else {
3159         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
3160         *ppv = NULL;
3161         return E_NOINTERFACE;
3162     }
3163
3164     IUnknown_AddRef((IUnknown*)*ppv);
3165     return S_OK;
3166 }
3167
3168 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface)
3169 {
3170     UriBuilder *This = URIBUILDER_THIS(iface);
3171     LONG ref = InterlockedIncrement(&This->ref);
3172
3173     TRACE("(%p) ref=%d\n", This, ref);
3174
3175     return ref;
3176 }
3177
3178 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface)
3179 {
3180     UriBuilder *This = URIBUILDER_THIS(iface);
3181     LONG ref = InterlockedDecrement(&This->ref);
3182
3183     TRACE("(%p) ref=%d\n", This, ref);
3184
3185     if(!ref)
3186         heap_free(This);
3187
3188     return ref;
3189 }
3190
3191 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface,
3192                                                  DWORD        dwAllowEncodingPropertyMask,
3193                                                  DWORD_PTR    dwReserved,
3194                                                  IUri       **ppIUri)
3195 {
3196     UriBuilder *This = URIBUILDER_THIS(iface);
3197     FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
3198     return E_NOTIMPL;
3199 }
3200
3201 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface,
3202                                            DWORD        dwCreateFlags,
3203                                            DWORD        dwAllowEncodingPropertyMask,
3204                                            DWORD_PTR    dwReserved,
3205                                            IUri       **ppIUri)
3206 {
3207     UriBuilder *This = URIBUILDER_THIS(iface);
3208     FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
3209     return E_NOTIMPL;
3210 }
3211
3212 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface,
3213                                          DWORD        dwCreateFlags,
3214                                          DWORD        dwUriBuilderFlags,
3215                                          DWORD        dwAllowEncodingPropertyMask,
3216                                          DWORD_PTR    dwReserved,
3217                                          IUri       **ppIUri)
3218 {
3219     UriBuilder *This = URIBUILDER_THIS(iface);
3220     FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
3221         dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
3222     return E_NOTIMPL;
3223 }
3224
3225 static HRESULT WINAPI  UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri)
3226 {
3227     UriBuilder *This = URIBUILDER_THIS(iface);
3228     FIXME("(%p)->(%p)\n", This, ppIUri);
3229     return E_NOTIMPL;
3230 }
3231
3232 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri)
3233 {
3234     UriBuilder *This = URIBUILDER_THIS(iface);
3235     FIXME("(%p)->(%p)\n", This, pIUri);
3236     return E_NOTIMPL;
3237 }
3238
3239 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment)
3240 {
3241     UriBuilder *This = URIBUILDER_THIS(iface);
3242     FIXME("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment);
3243     return E_NOTIMPL;
3244 }
3245
3246 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost)
3247 {
3248     UriBuilder *This = URIBUILDER_THIS(iface);
3249     FIXME("(%p)->(%p %p)\n", This, pcchHost, ppwzHost);
3250     return E_NOTIMPL;
3251 }
3252
3253 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword)
3254 {
3255     UriBuilder *This = URIBUILDER_THIS(iface);
3256     FIXME("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword);
3257     return E_NOTIMPL;
3258 }
3259
3260 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath)
3261 {
3262     UriBuilder *This = URIBUILDER_THIS(iface);
3263     FIXME("(%p)->(%p %p)\n", This, pcchPath, ppwzPath);
3264     return E_NOTIMPL;
3265 }
3266
3267 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort)
3268 {
3269     UriBuilder *This = URIBUILDER_THIS(iface);
3270     FIXME("(%p)->(%p %p)\n", This, pfHasPort, pdwPort);
3271     return E_NOTIMPL;
3272 }
3273
3274 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery)
3275 {
3276     UriBuilder *This = URIBUILDER_THIS(iface);
3277     FIXME("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery);
3278     return E_NOTIMPL;
3279 }
3280
3281 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName)
3282 {
3283     UriBuilder *This = URIBUILDER_THIS(iface);
3284     FIXME("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName);
3285     return E_NOTIMPL;
3286 }
3287
3288 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName)
3289 {
3290     UriBuilder *This = URIBUILDER_THIS(iface);
3291     FIXME("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName);
3292     return E_NOTIMPL;
3293 }
3294
3295 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue)
3296 {
3297     UriBuilder *This = URIBUILDER_THIS(iface);
3298     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3299     return E_NOTIMPL;
3300 }
3301
3302 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue)
3303 {
3304     UriBuilder *This = URIBUILDER_THIS(iface);
3305     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3306     return E_NOTIMPL;
3307 }
3308
3309 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue)
3310 {
3311     UriBuilder *This = URIBUILDER_THIS(iface);
3312     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3313     return E_NOTIMPL;
3314 }
3315
3316 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue)
3317 {
3318     UriBuilder *This = URIBUILDER_THIS(iface);
3319     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3320     return E_NOTIMPL;
3321 }
3322
3323 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue)
3324 {
3325     UriBuilder *This = URIBUILDER_THIS(iface);
3326     FIXME("(%p)->(%d %d)\n", This, fHasPort, dwNewValue);
3327     return E_NOTIMPL;
3328 }
3329
3330 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue)
3331 {
3332     UriBuilder *This = URIBUILDER_THIS(iface);
3333     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3334     return E_NOTIMPL;
3335 }
3336
3337 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue)
3338 {
3339     UriBuilder *This = URIBUILDER_THIS(iface);
3340     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3341     return E_NOTIMPL;
3342 }
3343
3344 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue)
3345 {
3346     UriBuilder *This = URIBUILDER_THIS(iface);
3347     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
3348     return E_NOTIMPL;
3349 }
3350
3351 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask)
3352 {
3353     UriBuilder *This = URIBUILDER_THIS(iface);
3354     FIXME("(%p)->(0x%08x)\n", This, dwPropertyMask);
3355     return E_NOTIMPL;
3356 }
3357
3358 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified)
3359 {
3360     UriBuilder *This = URIBUILDER_THIS(iface);
3361     FIXME("(%p)->(%p)\n", This, pfModified);
3362     return E_NOTIMPL;
3363 }
3364
3365 #undef URIBUILDER_THIS
3366
3367 static const IUriBuilderVtbl UriBuilderVtbl = {
3368     UriBuilder_QueryInterface,
3369     UriBuilder_AddRef,
3370     UriBuilder_Release,
3371     UriBuilder_CreateUriSimple,
3372     UriBuilder_CreateUri,
3373     UriBuilder_CreateUriWithFlags,
3374     UriBuilder_GetIUri,
3375     UriBuilder_SetIUri,
3376     UriBuilder_GetFragment,
3377     UriBuilder_GetHost,
3378     UriBuilder_GetPassword,
3379     UriBuilder_GetPath,
3380     UriBuilder_GetPort,
3381     UriBuilder_GetQuery,
3382     UriBuilder_GetSchemeName,
3383     UriBuilder_GetUserName,
3384     UriBuilder_SetFragment,
3385     UriBuilder_SetHost,
3386     UriBuilder_SetPassword,
3387     UriBuilder_SetPath,
3388     UriBuilder_SetPort,
3389     UriBuilder_SetQuery,
3390     UriBuilder_SetSchemeName,
3391     UriBuilder_SetUserName,
3392     UriBuilder_RemoveProperties,
3393     UriBuilder_HasBeenModified,
3394 };
3395
3396 /***********************************************************************
3397  *           CreateIUriBuilder (urlmon.@)
3398  */
3399 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder)
3400 {
3401     UriBuilder *ret;
3402
3403     TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
3404
3405     ret = heap_alloc(sizeof(UriBuilder));
3406     if(!ret)
3407         return E_OUTOFMEMORY;
3408
3409     ret->lpIUriBuilderVtbl = &UriBuilderVtbl;
3410     ret->ref = 1;
3411
3412     *ppIUriBuilder = URIBUILDER(ret);
3413     return S_OK;
3414 }