git.oblomov.eu Git - wine/blob - dlls/urlmon/uri.c

   1 /*
   2  * Copyright 2010 Jacek Caban for CodeWeavers
   3  * Copyright 2010 Thomas Mullaly
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  18  */
  19
  20 #include "urlmon_main.h"
  21 #include "wine/debug.h"
  22
  23 #define NO_SHLWAPI_REG
  24 #include "shlwapi.h"
  25
  26 #define UINT_MAX 0xffffffff
  27 #define USHORT_MAX 0xffff
  28
  29 WINE_DEFAULT_DEBUG_CHANNEL(urlmon);
  30
  31 static const IID IID_IUriObj = {0x4b364760,0x9f51,0x11df,{0x98,0x1c,0x08,0x00,0x20,0x0c,0x9a,0x66}};
  32
  33 typedef struct {
  34     const IUriVtbl  *lpIUriVtbl;
  35     LONG ref;
  36
  37     BSTR            raw_uri;
  38
  39     /* Information about the canonicalized URI's buffer. */
  40     WCHAR           *canon_uri;
  41     DWORD           canon_size;
  42     DWORD           canon_len;
  43
  44     INT             scheme_start;
  45     DWORD           scheme_len;
  46     URL_SCHEME      scheme_type;
  47
  48     INT             userinfo_start;
  49     DWORD           userinfo_len;
  50     INT             userinfo_split;
  51
  52     INT             host_start;
  53     DWORD           host_len;
  54     Uri_HOST_TYPE   host_type;
  55
  56     USHORT          port;
  57     BOOL            has_port;
  58
  59     INT             authority_start;
  60     DWORD           authority_len;
  61
  62     INT             domain_offset;
  63
  64     INT             path_start;
  65     DWORD           path_len;
  66     INT             extension_offset;
  67
  68     INT             query_start;
  69     DWORD           query_len;
  70
  71     INT             fragment_start;
  72     DWORD           fragment_len;
  73 } Uri;
  74
  75 typedef struct {
  76     const IUriBuilderVtbl  *lpIUriBuilderVtbl;
  77     LONG ref;
  78
  79     IUri *uri;
  80 } UriBuilder;
  81
  82 typedef struct {
  83     const WCHAR *str;
  84     DWORD       len;
  85 } h16;
  86
  87 typedef struct {
  88     /* IPv6 addresses can hold up to 8 h16 components. */
  89     h16         components[8];
  90     DWORD       h16_count;
  91
  92     /* An IPv6 can have 1 elision ("::"). */
  93     const WCHAR *elision;
  94
  95     /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */
  96     const WCHAR *ipv4;
  97     DWORD       ipv4_len;
  98
  99     INT         components_size;
 100     INT         elision_size;
 101 } ipv6_address;
 102
 103 typedef struct {
 104     BSTR            uri;
 105
 106     BOOL            is_relative;
 107     BOOL            is_opaque;
 108     BOOL            has_implicit_scheme;
 109     BOOL            has_implicit_ip;
 110     UINT            implicit_ipv4;
 111
 112     const WCHAR     *scheme;
 113     DWORD           scheme_len;
 114     URL_SCHEME      scheme_type;
 115
 116     const WCHAR     *userinfo;
 117     DWORD           userinfo_len;
 118     INT             userinfo_split;
 119
 120     const WCHAR     *host;
 121     DWORD           host_len;
 122     Uri_HOST_TYPE   host_type;
 123
 124     BOOL            has_ipv6;
 125     ipv6_address    ipv6_address;
 126
 127     const WCHAR     *port;
 128     DWORD           port_len;
 129     USHORT          port_value;
 130
 131     const WCHAR     *path;
 132     DWORD           path_len;
 133
 134     const WCHAR     *query;
 135     DWORD           query_len;
 136
 137     const WCHAR     *fragment;
 138     DWORD           fragment_len;
 139 } parse_data;
 140
 141 static const CHAR hexDigits[] = "0123456789ABCDEF";
 142
 143 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
 144 static const struct {
 145     URL_SCHEME  scheme;
 146     WCHAR       scheme_name[16];
 147 } recognized_schemes[] = {
 148     {URL_SCHEME_FTP,            {'f','t','p',0}},
 149     {URL_SCHEME_HTTP,           {'h','t','t','p',0}},
 150     {URL_SCHEME_GOPHER,         {'g','o','p','h','e','r',0}},
 151     {URL_SCHEME_MAILTO,         {'m','a','i','l','t','o',0}},
 152     {URL_SCHEME_NEWS,           {'n','e','w','s',0}},
 153     {URL_SCHEME_NNTP,           {'n','n','t','p',0}},
 154     {URL_SCHEME_TELNET,         {'t','e','l','n','e','t',0}},
 155     {URL_SCHEME_WAIS,           {'w','a','i','s',0}},
 156     {URL_SCHEME_FILE,           {'f','i','l','e',0}},
 157     {URL_SCHEME_MK,             {'m','k',0}},
 158     {URL_SCHEME_HTTPS,          {'h','t','t','p','s',0}},
 159     {URL_SCHEME_SHELL,          {'s','h','e','l','l',0}},
 160     {URL_SCHEME_SNEWS,          {'s','n','e','w','s',0}},
 161     {URL_SCHEME_LOCAL,          {'l','o','c','a','l',0}},
 162     {URL_SCHEME_JAVASCRIPT,     {'j','a','v','a','s','c','r','i','p','t',0}},
 163     {URL_SCHEME_VBSCRIPT,       {'v','b','s','c','r','i','p','t',0}},
 164     {URL_SCHEME_ABOUT,          {'a','b','o','u','t',0}},
 165     {URL_SCHEME_RES,            {'r','e','s',0}},
 166     {URL_SCHEME_MSSHELLROOTED,  {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}},
 167     {URL_SCHEME_MSSHELLIDLIST,  {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}},
 168     {URL_SCHEME_MSHELP,         {'h','c','p',0}},
 169     {URL_SCHEME_WILDCARD,       {'*',0}}
 170 };
 171
 172 /* List of default ports Windows recognizes. */
 173 static const struct {
 174     URL_SCHEME  scheme;
 175     USHORT      port;
 176 } default_ports[] = {
 177     {URL_SCHEME_FTP,    21},
 178     {URL_SCHEME_HTTP,   80},
 179     {URL_SCHEME_GOPHER, 70},
 180     {URL_SCHEME_NNTP,   119},
 181     {URL_SCHEME_TELNET, 23},
 182     {URL_SCHEME_WAIS,   210},
 183     {URL_SCHEME_HTTPS,  443},
 184 };
 185
 186 /* List of 3 character top level domain names Windows seems to recognize.
 187  * There might be more, but, these are the only ones I've found so far.
 188  */
 189 static const struct {
 190     WCHAR tld_name[4];
 191 } recognized_tlds[] = {
 192     {{'c','o','m',0}},
 193     {{'e','d','u',0}},
 194     {{'g','o','v',0}},
 195     {{'i','n','t',0}},
 196     {{'m','i','l',0}},
 197     {{'n','e','t',0}},
 198     {{'o','r','g',0}}
 199 };
 200
 201 static Uri *get_uri_obj(IUri *uri)
 202 {
 203     Uri *ret;
 204     HRESULT hres;
 205
 206     hres = IUri_QueryInterface(uri, &IID_IUriObj, (void**)&ret);
 207     return SUCCEEDED(hres) ? ret : NULL;
 208 }
 209
 210 static inline BOOL is_alpha(WCHAR val) {
 211         return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z'));
 212 }
 213
 214 static inline BOOL is_num(WCHAR val) {
 215         return (val >= '0' && val <= '9');
 216 }
 217
 218 /* A URI is implicitly a file path if it begins with
 219  * a drive letter (eg X:) or starts with "\\" (UNC path).
 220  */
 221 static inline BOOL is_implicit_file_path(const WCHAR *str) {
 222     if(is_alpha(str[0]) && str[1] == ':')
 223         return TRUE;
 224     else if(str[0] == '\\' && str[1] == '\\')
 225         return TRUE;
 226
 227     return FALSE;
 228 }
 229
 230 /* Checks if the URI is a hierarchical URI. A hierarchical
 231  * URI is one that has "//" after the scheme.
 232  */
 233 static BOOL check_hierarchical(const WCHAR **ptr) {
 234     const WCHAR *start = *ptr;
 235
 236     if(**ptr != '/')
 237         return FALSE;
 238
 239     ++(*ptr);
 240     if(**ptr != '/') {
 241         *ptr = start;
 242         return FALSE;
 243     }
 244
 245     ++(*ptr);
 246     return TRUE;
 247 }
 248
 249 /* unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" */
 250 static inline BOOL is_unreserved(WCHAR val) {
 251     return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
 252             val == '_' || val == '~');
 253 }
 254
 255 /* sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 256  *               / "*" / "+" / "," / ";" / "="
 257  */
 258 static inline BOOL is_subdelim(WCHAR val) {
 259     return (val == '!' || val == '$' || val == '&' ||
 260             val == '\'' || val == '(' || val == ')' ||
 261             val == '*' || val == '+' || val == ',' ||
 262             val == ';' || val == '=');
 263 }
 264
 265 /* gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
 266 static inline BOOL is_gendelim(WCHAR val) {
 267     return (val == ':' || val == '/' || val == '?' ||
 268             val == '#' || val == '[' || val == ']' ||
 269             val == '@');
 270 }
 271
 272 /* Characters that delimit the end of the authority
 273  * section of a URI. Sometimes a '\\' is considered
 274  * an authority delimeter.
 275  */
 276 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
 277     return (val == '#' || val == '/' || val == '?' ||
 278             val == '\0' || (acceptSlash && val == '\\'));
 279 }
 280
 281 /* reserved = gen-delims / sub-delims */
 282 static inline BOOL is_reserved(WCHAR val) {
 283     return (is_subdelim(val) || is_gendelim(val));
 284 }
 285
 286 static inline BOOL is_hexdigit(WCHAR val) {
 287     return ((val >= 'a' && val <= 'f') ||
 288             (val >= 'A' && val <= 'F') ||
 289             (val >= '0' && val <= '9'));
 290 }
 291
 292 static inline BOOL is_path_delim(WCHAR val) {
 293     return (!val || val == '#' || val == '?');
 294 }
 295
 296 /* Checks if the two Uri's are logically equivalent. It's a simple
 297  * comparison, since they are both of type Uri, and it can access
 298  * the properties of each Uri directly without the need to go
 299  * through the "IUri_Get*" interface calls.
 300  */
 301 static BOOL are_equal_simple(const Uri *a, const Uri *b) {
 302     if(a->scheme_type == b->scheme_type) {
 303         const BOOL known_scheme = a->scheme_type != URL_SCHEME_UNKNOWN;
 304         const BOOL are_hierarchical =
 305                 (a->authority_start > -1 && b->authority_start > -1);
 306
 307         if(a->scheme_type == URL_SCHEME_FILE) {
 308             if(a->canon_len == b->canon_len)
 309                 return !StrCmpIW(a->canon_uri, b->canon_uri);
 310         }
 311
 312         /* Only compare the scheme names (if any) if their unknown scheme types. */
 313         if(!known_scheme) {
 314             if((a->scheme_start > -1 && b->scheme_start > -1) &&
 315                (a->scheme_len == b->scheme_len)) {
 316                 /* Make sure the schemes are the same. */
 317                 if(StrCmpNW(a->canon_uri+a->scheme_start, b->canon_uri+b->scheme_start, a->scheme_len))
 318                     return FALSE;
 319             } else if(a->scheme_len != b->scheme_len)
 320                 /* One of the Uri's has a scheme name, while the other doesn't. */
 321                 return FALSE;
 322         }
 323
 324         /* If they have a userinfo component, perform case sensitive compare. */
 325         if((a->userinfo_start > -1 && b->userinfo_start > -1) &&
 326            (a->userinfo_len == b->userinfo_len)) {
 327             if(StrCmpNW(a->canon_uri+a->userinfo_start, b->canon_uri+b->userinfo_start, a->userinfo_len))
 328                 return FALSE;
 329         } else if(a->userinfo_len != b->userinfo_len)
 330             /* One of the Uri's had a userinfo, while the other one doesn't. */
 331             return FALSE;
 332
 333         /* Check if they have a host name. */
 334         if((a->host_start > -1 && b->host_start > -1) &&
 335            (a->host_len == b->host_len)) {
 336             /* Perform a case insensitive compare if they are a known scheme type. */
 337             if(known_scheme) {
 338                 if(StrCmpNIW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len))
 339                     return FALSE;
 340             } else if(StrCmpNW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len))
 341                 return FALSE;
 342         } else if(a->host_len != b->host_len)
 343             /* One of the Uri's had a host, while the other one didn't. */
 344             return FALSE;
 345
 346         if(a->has_port && b->has_port) {
 347             if(a->port != b->port)
 348                 return FALSE;
 349         } else if(a->has_port || b->has_port)
 350             /* One had a port, while the other one didn't. */
 351             return FALSE;
 352
 353         /* Windows is weird with how it handles paths. For example
 354          * One URI could be "http://google.com" (after canonicalization)
 355          * and one could be "http://google.com/" and the IsEqual function
 356          * would still evaluate to TRUE, but, only if they are both hierarchical
 357          * URIs.
 358          */
 359         if((a->path_start > -1 && b->path_start > -1) &&
 360            (a->path_len == b->path_len)) {
 361             if(StrCmpNW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len))
 362                 return FALSE;
 363         } else if(are_hierarchical && a->path_len == -1 && b->path_len == 0) {
 364             if(*(a->canon_uri+a->path_start) != '/')
 365                 return FALSE;
 366         } else if(are_hierarchical && b->path_len == 1 && a->path_len == 0) {
 367             if(*(b->canon_uri+b->path_start) != '/')
 368                 return FALSE;
 369         } else if(a->path_len != b->path_len)
 370             return FALSE;
 371
 372         /* Compare the query strings of the two URIs. */
 373         if((a->query_start > -1 && b->query_start > -1) &&
 374            (a->query_len == b->query_len)) {
 375             if(StrCmpNW(a->canon_uri+a->query_start, b->canon_uri+b->query_start, a->query_len))
 376                 return FALSE;
 377         } else if(a->query_len != b->query_len)
 378             return FALSE;
 379
 380         if((a->fragment_start > -1 && b->fragment_start > -1) &&
 381            (a->fragment_len == b->fragment_len)) {
 382             if(StrCmpNW(a->canon_uri+a->fragment_start, b->canon_uri+b->fragment_start, a->fragment_len))
 383                 return FALSE;
 384         } else if(a->fragment_len != b->fragment_len)
 385             return FALSE;
 386
 387         /* If we get here, the two URIs are equivalent. */
 388         return TRUE;
 389     }
 390
 391     return FALSE;
 392 }
 393
 394 /* Computes the size of the given IPv6 address.
 395  * Each h16 component is 16bits, if there is an IPv4 address, it's
 396  * 32bits. If there's an elision it can be 16bits to 128bits, depending
 397  * on the number of other components.
 398  *
 399  * Modeled after google-url's CheckIPv6ComponentsSize function
 400  */
 401 static void compute_ipv6_comps_size(ipv6_address *address) {
 402     address->components_size = address->h16_count * 2;
 403
 404     if(address->ipv4)
 405         /* IPv4 address is 4 bytes. */
 406         address->components_size += 4;
 407
 408     if(address->elision) {
 409         /* An elision can be anywhere from 2 bytes up to 16 bytes.
 410          * It size depends on the size of the h16 and IPv4 components.
 411          */
 412         address->elision_size = 16 - address->components_size;
 413         if(address->elision_size < 2)
 414             address->elision_size = 2;
 415     } else
 416         address->elision_size = 0;
 417 }
 418
 419 /* Taken from dlls/jscript/lex.c */
 420 static int hex_to_int(WCHAR val) {
 421     if(val >= '0' && val <= '9')
 422         return val - '0';
 423     else if(val >= 'a' && val <= 'f')
 424         return val - 'a' + 10;
 425     else if(val >= 'A' && val <= 'F')
 426         return val - 'A' + 10;
 427
 428     return -1;
 429 }
 430
 431 /* Helper function for converting a percent encoded string
 432  * representation of a WCHAR value into its actual WCHAR value. If
 433  * the two characters following the '%' aren't valid hex values then
 434  * this function returns the NULL character.
 435  *
 436  * Eg.
 437  *  "%2E" will result in '.' being returned by this function.
 438  */
 439 static WCHAR decode_pct_val(const WCHAR *ptr) {
 440     WCHAR ret = '\0';
 441
 442     if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) {
 443         INT a = hex_to_int(*(ptr + 1));
 444         INT b = hex_to_int(*(ptr + 2));
 445
 446         ret = a << 4;
 447         ret += b;
 448     }
 449
 450     return ret;
 451 }
 452
 453 /* Helper function for percent encoding a given character
 454  * and storing the encoded value into a given buffer (dest).
 455  *
 456  * It's up to the calling function to ensure that there is
 457  * at least enough space in 'dest' for the percent encoded
 458  * value to be stored (so dest + 3 spaces available).
 459  */
 460 static inline void pct_encode_val(WCHAR val, WCHAR *dest) {
 461     dest[0] = '%';
 462     dest[1] = hexDigits[(val >> 4) & 0xf];
 463     dest[2] = hexDigits[val & 0xf];
 464 }
 465
 466 /* Scans the range of characters [str, end] and returns the last occurence
 467  * of 'ch' or returns NULL.
 468  */
 469 static const WCHAR *str_last_of(const WCHAR *str, const WCHAR *end, WCHAR ch) {
 470     const WCHAR *ptr = end;
 471
 472     while(ptr >= str) {
 473         if(*ptr == ch)
 474             return ptr;
 475         --ptr;
 476     }
 477
 478     return NULL;
 479 }
 480
 481 /* Attempts to parse the domain name from the host.
 482  *
 483  * This function also includes the Top-level Domain (TLD) name
 484  * of the host when it tries to find the domain name. If it finds
 485  * a valid domain name it will assign 'domain_start' the offset
 486  * into 'host' where the domain name starts.
 487  *
 488  * It's implied that if a domain name its range is implied to be
 489  * [host+domain_start, host+host_len).
 490  */
 491 static void find_domain_name(const WCHAR *host, DWORD host_len,
 492                              INT *domain_start) {
 493     const WCHAR *last_tld, *sec_last_tld, *end;
 494
 495     end = host+host_len-1;
 496
 497     *domain_start = -1;
 498
 499     /* There has to be at least enough room for a '.' followed by a
 500      * 3 character TLD for a domain to even exist in the host name.
 501      */
 502     if(host_len < 4)
 503         return;
 504
 505     last_tld = str_last_of(host, end, '.');
 506     if(!last_tld)
 507         /* http://hostname -> has no domain name. */
 508         return;
 509
 510     sec_last_tld = str_last_of(host, last_tld-1, '.');
 511     if(!sec_last_tld) {
 512         /* If the '.' is at the beginning of the host there
 513          * has to be at least 3 characters in the TLD for it
 514          * to be valid.
 515          *  Ex: .com -> .com as the domain name.
 516          *      .co  -> has no domain name.
 517          */
 518         if(last_tld-host == 0) {
 519             if(end-(last_tld-1) < 3)
 520                 return;
 521         } else if(last_tld-host == 3) {
 522             DWORD i;
 523
 524             /* If there's three characters in front of last_tld and
 525              * they are on the list of recognized TLDs, then this
 526              * host doesn't have a domain (since the host only contains
 527              * a TLD name.
 528              *  Ex: edu.uk -> has no domain name.
 529              *      foo.uk -> foo.uk as the domain name.
 530              */
 531             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 532                 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3))
 533                     return;
 534             }
 535         } else if(last_tld-host < 3)
 536             /* Anything less then 3 characters is considered part
 537              * of the TLD name.
 538              *  Ex: ak.uk -> Has no domain name.
 539              */
 540             return;
 541
 542         /* Otherwise the domain name is the whole host name. */
 543         *domain_start = 0;
 544     } else if(end+1-last_tld > 3) {
 545         /* If the last_tld has more then 3 characters then it's automatically
 546          * considered the TLD of the domain name.
 547          *  Ex: www.winehq.org.uk.test -> uk.test as the domain name.
 548          */
 549         *domain_start = (sec_last_tld+1)-host;
 550     } else if(last_tld - (sec_last_tld+1) < 4) {
 551         DWORD i;
 552         /* If the sec_last_tld is 3 characters long it HAS to be on the list of
 553          * recognized to still be considered part of the TLD name, otherwise
 554          * its considered the domain name.
 555          *  Ex: www.google.com.uk -> google.com.uk as the domain name.
 556          *      www.google.foo.uk -> foo.uk as the domain name.
 557          */
 558         if(last_tld - (sec_last_tld+1) == 3) {
 559             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 560                 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) {
 561                     const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 562
 563                     if(!domain)
 564                         *domain_start = 0;
 565                     else
 566                         *domain_start = (domain+1) - host;
 567                     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 568                                                         (host+host_len)-(host+*domain_start)));
 569                     return;
 570                 }
 571             }
 572
 573             *domain_start = (sec_last_tld+1)-host;
 574         } else {
 575             /* Since the sec_last_tld is less then 3 characters it's considered
 576              * part of the TLD.
 577              *  Ex: www.google.fo.uk -> google.fo.uk as the domain name.
 578              */
 579             const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 580
 581             if(!domain)
 582                 *domain_start = 0;
 583             else
 584                 *domain_start = (domain+1) - host;
 585         }
 586     } else {
 587         /* The second to last TLD has more then 3 characters making it
 588          * the domain name.
 589          *  Ex: www.google.test.us -> test.us as the domain name.
 590          */
 591         *domain_start = (sec_last_tld+1)-host;
 592     }
 593
 594     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 595                                         (host+host_len)-(host+*domain_start)));
 596 }
 597
 598 /* Removes the dot segments from a heirarchical URIs path component. This
 599  * function performs the removal in place.
 600  *
 601  * This is a modified version of Qt's QUrl function "removeDotsFromPath".
 602  *
 603  * This function returns the new length of the path string.
 604  */
 605 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) {
 606     WCHAR *out = path;
 607     const WCHAR *in = out;
 608     const WCHAR *end = out + path_len;
 609     DWORD len;
 610
 611     while(in < end) {
 612         /* A.  if the input buffer begins with a prefix of "/./" or "/.",
 613          *     where "." is a complete path segment, then replace that
 614          *     prefix with "/" in the input buffer; otherwise,
 615          */
 616         if(in <= end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '/') {
 617             in += 2;
 618             continue;
 619         } else if(in == end - 2 && in[0] == '/' && in[1] == '.') {
 620             *out++ = '/';
 621             in += 2;
 622             break;
 623         }
 624
 625         /* B.  if the input buffer begins with a prefix of "/../" or "/..",
 626          *     where ".." is a complete path segment, then replace that
 627          *     prefix with "/" in the input buffer and remove the last
 628          *     segment and its preceding "/" (if any) from the output
 629          *     buffer; otherwise,
 630          */
 631         if(in <= end - 4 && in[0] == '/' && in[1] == '.' && in[2] == '.' && in[3] == '/') {
 632             while(out > path && *(--out) != '/');
 633
 634             in += 3;
 635             continue;
 636         } else if(in == end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '.') {
 637             while(out > path && *(--out) != '/');
 638
 639             if(*out == '/')
 640                 ++out;
 641
 642             in += 3;
 643             break;
 644         }
 645
 646         /* C.  move the first path segment in the input buffer to the end of
 647          *     the output buffer, including the initial "/" character (if
 648          *     any) and any subsequent characters up to, but not including,
 649          *     the next "/" character or the end of the input buffer.
 650          */
 651         *out++ = *in++;
 652         while(in < end && *in != '/')
 653             *out++ = *in++;
 654     }
 655
 656     len = out - path;
 657     TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len,
 658         debugstr_wn(path, len), len);
 659     return len;
 660 }
 661
 662 /* Attempts to find the file extension in a given path. */
 663 static INT find_file_extension(const WCHAR *path, DWORD path_len) {
 664     const WCHAR *end;
 665
 666     for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) {
 667         if(*end == '.')
 668             return end-path;
 669     }
 670
 671     return -1;
 672 }
 673
 674 /* Computes the location where the elision should occur in the IPv6
 675  * address using the numerical values of each component stored in
 676  * 'values'. If the address shouldn't contain an elision then 'index'
 677  * is assigned -1 as it's value. Otherwise 'index' will contain the
 678  * starting index (into values) where the elision should be, and 'count'
 679  * will contain the number of cells the elision covers.
 680  *
 681  * NOTES:
 682  *  Windows will expand an elision if the elision only represents 1 h16
 683  *  component of the URI.
 684  *
 685  *  Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
 686  *
 687  *  If the IPv6 address contains an IPv4 address, the IPv4 address is also
 688  *  considered for being included as part of an elision if all it's components
 689  *  are zeros.
 690  *
 691  *  Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::]
 692  */
 693 static void compute_elision_location(const ipv6_address *address, const USHORT values[8],
 694                                      INT *index, DWORD *count) {
 695     DWORD i, max_len, cur_len;
 696     INT max_index, cur_index;
 697
 698     max_len = cur_len = 0;
 699     max_index = cur_index = -1;
 700     for(i = 0; i < 8; ++i) {
 701         BOOL check_ipv4 = (address->ipv4 && i == 6);
 702         BOOL is_end = (check_ipv4 || i == 7);
 703
 704         if(check_ipv4) {
 705             /* Check if the IPv4 address contains only zeros. */
 706             if(values[i] == 0 && values[i+1] == 0) {
 707                 if(cur_index == -1)
 708                     cur_index = i;
 709
 710                 cur_len += 2;
 711                 ++i;
 712             }
 713         } else if(values[i] == 0) {
 714             if(cur_index == -1)
 715                 cur_index = i;
 716
 717             ++cur_len;
 718         }
 719
 720         if(is_end || values[i] != 0) {
 721             /* We only consider it for an elision if it's
 722              * more then 1 component long.
 723              */
 724             if(cur_len > 1 && cur_len > max_len) {
 725                 /* Found the new elision location. */
 726                 max_len = cur_len;
 727                 max_index = cur_index;
 728             }
 729
 730             /* Reset the current range for the next range of zeros. */
 731             cur_index = -1;
 732             cur_len = 0;
 733         }
 734     }
 735
 736     *index = max_index;
 737     *count = max_len;
 738 }
 739
 740 /* Removes all the leading and trailing white spaces or
 741  * control characters from the URI and removes all control
 742  * characters inside of the URI string.
 743  */
 744 static BSTR pre_process_uri(LPCWSTR uri) {
 745     BSTR ret;
 746     DWORD len;
 747     const WCHAR *start, *end;
 748     WCHAR *buf, *ptr;
 749
 750     len = lstrlenW(uri);
 751
 752     start = uri;
 753     /* Skip leading controls and whitespace. */
 754     while(iscntrlW(*start) || isspaceW(*start)) ++start;
 755
 756     end = uri+len-1;
 757     if(start == end)
 758         /* URI consisted only of control/whitespace. */
 759         ret = SysAllocStringLen(NULL, 0);
 760     else {
 761         while(iscntrlW(*end) || isspaceW(*end)) --end;
 762
 763         buf = heap_alloc(((end+1)-start)*sizeof(WCHAR));
 764         if(!buf)
 765             return NULL;
 766
 767         for(ptr = buf; start < end+1; ++start) {
 768             if(!iscntrlW(*start))
 769                 *ptr++ = *start;
 770         }
 771
 772         ret = SysAllocStringLen(buf, ptr-buf);
 773         heap_free(buf);
 774     }
 775
 776     return ret;
 777 }
 778
 779 /* Converts the specified IPv4 address into an uint value.
 780  *
 781  * This function assumes that the IPv4 address has already been validated.
 782  */
 783 static UINT ipv4toui(const WCHAR *ip, DWORD len) {
 784     UINT ret = 0;
 785     DWORD comp_value = 0;
 786     const WCHAR *ptr;
 787
 788     for(ptr = ip; ptr < ip+len; ++ptr) {
 789         if(*ptr == '.') {
 790             ret <<= 8;
 791             ret += comp_value;
 792             comp_value = 0;
 793         } else
 794             comp_value = comp_value*10 + (*ptr-'0');
 795     }
 796
 797     ret <<= 8;
 798     ret += comp_value;
 799
 800     return ret;
 801 }
 802
 803 /* Converts an IPv4 address in numerical form into it's fully qualified
 804  * string form. This function returns the number of characters written
 805  * to 'dest'. If 'dest' is NULL this function will return the number of
 806  * characters that would have been written.
 807  *
 808  * It's up to the caller to ensure there's enough space in 'dest' for the
 809  * address.
 810  */
 811 static DWORD ui2ipv4(WCHAR *dest, UINT address) {
 812     static const WCHAR formatW[] =
 813         {'%','u','.','%','u','.','%','u','.','%','u',0};
 814     DWORD ret = 0;
 815     UCHAR digits[4];
 816
 817     digits[0] = (address >> 24) & 0xff;
 818     digits[1] = (address >> 16) & 0xff;
 819     digits[2] = (address >> 8) & 0xff;
 820     digits[3] = address & 0xff;
 821
 822     if(!dest) {
 823         WCHAR tmp[16];
 824         ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]);
 825     } else
 826         ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]);
 827
 828     return ret;
 829 }
 830
 831 /* Converts an h16 component (from an IPv6 address) into it's
 832  * numerical value.
 833  *
 834  * This function assumes that the h16 component has already been validated.
 835  */
 836 static USHORT h16tous(h16 component) {
 837     DWORD i;
 838     USHORT ret = 0;
 839
 840     for(i = 0; i < component.len; ++i) {
 841         ret <<= 4;
 842         ret += hex_to_int(component.str[i]);
 843     }
 844
 845     return ret;
 846 }
 847
 848 /* Converts an IPv6 address into it's 128 bits (16 bytes) numerical value.
 849  *
 850  * This function assumes that the ipv6_address has already been validated.
 851  */
 852 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) {
 853     DWORD i, cur_component = 0;
 854     BOOL already_passed_elision = FALSE;
 855
 856     for(i = 0; i < address->h16_count; ++i) {
 857         if(address->elision) {
 858             if(address->components[i].str > address->elision && !already_passed_elision) {
 859                 /* Means we just passed the elision and need to add it's values to
 860                  * 'number' before we do anything else.
 861                  */
 862                 DWORD j = 0;
 863                 for(j = 0; j < address->elision_size; j+=2)
 864                     number[cur_component++] = 0;
 865
 866                 already_passed_elision = TRUE;
 867             }
 868         }
 869
 870         number[cur_component++] = h16tous(address->components[i]);
 871     }
 872
 873     /* Case when the elision appears after the h16 components. */
 874     if(!already_passed_elision && address->elision) {
 875         for(i = 0; i < address->elision_size; i+=2)
 876             number[cur_component++] = 0;
 877         already_passed_elision = TRUE;
 878     }
 879
 880     if(address->ipv4) {
 881         UINT value = ipv4toui(address->ipv4, address->ipv4_len);
 882
 883         if(cur_component != 6) {
 884             ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component);
 885             return FALSE;
 886         }
 887
 888         number[cur_component++] = (value >> 16) & 0xffff;
 889         number[cur_component] = value & 0xffff;
 890     }
 891
 892     return TRUE;
 893 }
 894
 895 /* Checks if the characters pointed to by 'ptr' are
 896  * a percent encoded data octet.
 897  *
 898  * pct-encoded = "%" HEXDIG HEXDIG
 899  */
 900 static BOOL check_pct_encoded(const WCHAR **ptr) {
 901     const WCHAR *start = *ptr;
 902
 903     if(**ptr != '%')
 904         return FALSE;
 905
 906     ++(*ptr);
 907     if(!is_hexdigit(**ptr)) {
 908         *ptr = start;
 909         return FALSE;
 910     }
 911
 912     ++(*ptr);
 913     if(!is_hexdigit(**ptr)) {
 914         *ptr = start;
 915         return FALSE;
 916     }
 917
 918     ++(*ptr);
 919     return TRUE;
 920 }
 921
 922 /* dec-octet   = DIGIT                 ; 0-9
 923  *             / %x31-39 DIGIT         ; 10-99
 924  *             / "1" 2DIGIT            ; 100-199
 925  *             / "2" %x30-34 DIGIT     ; 200-249
 926  *             / "25" %x30-35          ; 250-255
 927  */
 928 static BOOL check_dec_octet(const WCHAR **ptr) {
 929     const WCHAR *c1, *c2, *c3;
 930
 931     c1 = *ptr;
 932     /* A dec-octet must be at least 1 digit long. */
 933     if(*c1 < '0' || *c1 > '9')
 934         return FALSE;
 935
 936     ++(*ptr);
 937
 938     c2 = *ptr;
 939     /* Since the 1 digit requirment was meet, it doesn't
 940      * matter if this is a DIGIT value, it's considered a
 941      * dec-octet.
 942      */
 943     if(*c2 < '0' || *c2 > '9')
 944         return TRUE;
 945
 946     ++(*ptr);
 947
 948     c3 = *ptr;
 949     /* Same explanation as above. */
 950     if(*c3 < '0' || *c3 > '9')
 951         return TRUE;
 952
 953     /* Anything > 255 isn't a valid IP dec-octet. */
 954     if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') {
 955         *ptr = c1;
 956         return FALSE;
 957     }
 958
 959     ++(*ptr);
 960     return TRUE;
 961 }
 962
 963 /* Checks if there is an implicit IPv4 address in the host component of the URI.
 964  * The max value of an implicit IPv4 address is UINT_MAX.
 965  *
 966  *  Ex:
 967  *      "234567" would be considered an implicit IPv4 address.
 968  */
 969 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) {
 970     const WCHAR *start = *ptr;
 971     ULONGLONG ret = 0;
 972     *val = 0;
 973
 974     while(is_num(**ptr)) {
 975         ret = ret*10 + (**ptr - '0');
 976
 977         if(ret > UINT_MAX) {
 978             *ptr = start;
 979             return FALSE;
 980         }
 981         ++(*ptr);
 982     }
 983
 984     if(*ptr == start)
 985         return FALSE;
 986
 987     *val = ret;
 988     return TRUE;
 989 }
 990
 991 /* Checks if the string contains an IPv4 address.
 992  *
 993  * This function has a strict mode or a non-strict mode of operation
 994  * When 'strict' is set to FALSE this function will return TRUE if
 995  * the string contains at least 'dec-octet "." dec-octet' since partial
 996  * IPv4 addresses will be normalized out into full IPv4 addresses. When
 997  * 'strict' is set this function expects there to be a full IPv4 address.
 998  *
 999  * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1000  */
1001 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) {
1002     const WCHAR *start = *ptr;
1003
1004     if(!check_dec_octet(ptr)) {
1005         *ptr = start;
1006         return FALSE;
1007     }
1008
1009     if(**ptr != '.') {
1010         *ptr = start;
1011         return FALSE;
1012     }
1013
1014     ++(*ptr);
1015     if(!check_dec_octet(ptr)) {
1016         *ptr = start;
1017         return FALSE;
1018     }
1019
1020     if(**ptr != '.') {
1021         if(strict) {
1022             *ptr = start;
1023             return FALSE;
1024         } else
1025             return TRUE;
1026     }
1027
1028     ++(*ptr);
1029     if(!check_dec_octet(ptr)) {
1030         *ptr = start;
1031         return FALSE;
1032     }
1033
1034     if(**ptr != '.') {
1035         if(strict) {
1036             *ptr = start;
1037             return FALSE;
1038         } else
1039             return TRUE;
1040     }
1041
1042     ++(*ptr);
1043     if(!check_dec_octet(ptr)) {
1044         *ptr = start;
1045         return FALSE;
1046     }
1047
1048     /* Found a four digit ip address. */
1049     return TRUE;
1050 }
1051 /* Tries to parse the scheme name of the URI.
1052  *
1053  * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
1054  * NOTE: Windows accepts a number as the first character of a scheme.
1055  */
1056 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data) {
1057     const WCHAR *start = *ptr;
1058
1059     data->scheme = NULL;
1060     data->scheme_len = 0;
1061
1062     while(**ptr) {
1063         if(**ptr == '*' && *ptr == start) {
1064             /* Might have found a wildcard scheme. If it is the next
1065              * char has to be a ':' for it to be a valid URI
1066              */
1067             ++(*ptr);
1068             break;
1069         } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' &&
1070            **ptr != '-' && **ptr != '.')
1071             break;
1072
1073         (*ptr)++;
1074     }
1075
1076     if(*ptr == start)
1077         return FALSE;
1078
1079     /* Schemes must end with a ':' */
1080     if(**ptr != ':') {
1081         *ptr = start;
1082         return FALSE;
1083     }
1084
1085     data->scheme = start;
1086     data->scheme_len = *ptr - start;
1087
1088     ++(*ptr);
1089     return TRUE;
1090 }
1091
1092 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores
1093  * the deduced URL_SCHEME in data->scheme_type.
1094  */
1095 static BOOL parse_scheme_type(parse_data *data) {
1096     /* If there's scheme data then see if it's a recognized scheme. */
1097     if(data->scheme && data->scheme_len) {
1098         DWORD i;
1099
1100         for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) {
1101             if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) {
1102                 /* Has to be a case insensitive compare. */
1103                 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) {
1104                     data->scheme_type = recognized_schemes[i].scheme;
1105                     return TRUE;
1106                 }
1107             }
1108         }
1109
1110         /* If we get here it means it's not a recognized scheme. */
1111         data->scheme_type = URL_SCHEME_UNKNOWN;
1112         return TRUE;
1113     } else if(data->is_relative) {
1114         /* Relative URI's have no scheme. */
1115         data->scheme_type = URL_SCHEME_UNKNOWN;
1116         return TRUE;
1117     } else {
1118         /* Should never reach here! what happened... */
1119         FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri));
1120         return FALSE;
1121     }
1122 }
1123
1124 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't
1125  * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type
1126  * using the flags specified in 'flags' (if any). Flags that affect how this function
1127  * operates are the Uri_CREATE_ALLOW_* flags.
1128  *
1129  * All parsed/deduced information will be stored in 'data' when the function returns.
1130  *
1131  * Returns TRUE if it was able to successfully parse the information.
1132  */
1133 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
1134     static const WCHAR fileW[] = {'f','i','l','e',0};
1135     static const WCHAR wildcardW[] = {'*',0};
1136
1137     /* First check to see if the uri could implicitly be a file path. */
1138     if(is_implicit_file_path(*ptr)) {
1139         if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) {
1140             data->scheme = fileW;
1141             data->scheme_len = lstrlenW(fileW);
1142             data->has_implicit_scheme = TRUE;
1143
1144             TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags);
1145         } else {
1146             /* Window's does not consider anything that can implicitly be a file
1147              * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set...
1148              */
1149             TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n",
1150                     ptr, data, flags);
1151             return FALSE;
1152         }
1153     } else if(!parse_scheme_name(ptr, data)) {
1154         /* No Scheme was found, this means it could be:
1155          *      a) an implicit Wildcard scheme
1156          *      b) a relative URI
1157          *      c) a invalid URI.
1158          */
1159         if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) {
1160             data->scheme = wildcardW;
1161             data->scheme_len = lstrlenW(wildcardW);
1162             data->has_implicit_scheme = TRUE;
1163
1164             TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags);
1165         } else if (flags & Uri_CREATE_ALLOW_RELATIVE) {
1166             data->is_relative = TRUE;
1167             TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags);
1168         } else {
1169             TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags);
1170             return FALSE;
1171         }
1172     }
1173
1174     if(!data->is_relative)
1175         TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags,
1176                 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
1177
1178     if(!parse_scheme_type(data))
1179         return FALSE;
1180
1181     TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type);
1182     return TRUE;
1183 }
1184
1185 /* Parses the userinfo part of the URI (if it exists). The userinfo field of
1186  * a URI can consist of "username:password@", or just "username@".
1187  *
1188  * RFC def:
1189  * userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
1190  *
1191  * NOTES:
1192  *  1)  If there is more than one ':' in the userinfo part of the URI Windows
1193  *      uses the first occurence of ':' to delimit the username and password
1194  *      components.
1195  *
1196  *      ex:
1197  *          ftp://user:pass:word@winehq.org
1198  *
1199  *      Would yield, "user" as the username and "pass:word" as the password.
1200  *
1201  *  2)  Windows allows any character to appear in the "userinfo" part of
1202  *      a URI, as long as it's not an authority delimeter character set.
1203  */
1204 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
1205     data->userinfo = *ptr;
1206     data->userinfo_split = -1;
1207
1208     while(**ptr != '@') {
1209         if(**ptr == ':' && data->userinfo_split == -1)
1210             data->userinfo_split = *ptr - data->userinfo;
1211         else if(**ptr == '%') {
1212             /* If it's a known scheme type, it has to be a valid percent
1213              * encoded value.
1214              */
1215             if(!check_pct_encoded(ptr)) {
1216                 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1217                     *ptr = data->userinfo;
1218                     data->userinfo = NULL;
1219                     data->userinfo_split = -1;
1220
1221                     TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1222                     return;
1223                 }
1224             } else
1225                 continue;
1226         } else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN))
1227             break;
1228
1229         ++(*ptr);
1230     }
1231
1232     if(**ptr != '@') {
1233         *ptr = data->userinfo;
1234         data->userinfo = NULL;
1235         data->userinfo_split = -1;
1236
1237         TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1238         return;
1239     }
1240
1241     data->userinfo_len = *ptr - data->userinfo;
1242     TRACE("(%p %p %x): Found userinfo=%s userinfo_len=%d split=%d.\n", ptr, data, flags,
1243             debugstr_wn(data->userinfo, data->userinfo_len), data->userinfo_len, data->userinfo_split);
1244     ++(*ptr);
1245 }
1246
1247 /* Attempts to parse a port from the URI.
1248  *
1249  * NOTES:
1250  *  Windows seems to have a cap on what the maximum value
1251  *  for a port can be. The max value is USHORT_MAX.
1252  *
1253  * port = *DIGIT
1254  */
1255 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) {
1256     UINT port = 0;
1257     data->port = *ptr;
1258
1259     while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1260         if(!is_num(**ptr)) {
1261             *ptr = data->port;
1262             data->port = NULL;
1263             return FALSE;
1264         }
1265
1266         port = port*10 + (**ptr-'0');
1267
1268         if(port > USHORT_MAX) {
1269             *ptr = data->port;
1270             data->port = NULL;
1271             return FALSE;
1272         }
1273
1274         ++(*ptr);
1275     }
1276
1277     data->port_value = port;
1278     data->port_len = *ptr - data->port;
1279
1280     TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags,
1281         debugstr_wn(data->port, data->port_len), data->port_len, data->port_value);
1282     return TRUE;
1283 }
1284
1285 /* Attempts to parse a IPv4 address from the URI.
1286  *
1287  * NOTES:
1288  *  Window's normalizes IPv4 addresses, This means there's three
1289  *  possibilities for the URI to contain an IPv4 address.
1290  *      1)  A well formed address (ex. 192.2.2.2).
1291  *      2)  A partially formed address. For example "192.0" would
1292  *          normalize to "192.0.0.0" during canonicalization.
1293  *      3)  An implicit IPv4 address. For example "256" would
1294  *          normalize to "0.0.1.0" during canonicalization. Also
1295  *          note that the maximum value for an implicit IP address
1296  *          is UINT_MAX, if the value in the URI exceeds this then
1297  *          it is not considered an IPv4 address.
1298  */
1299 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1300     const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN;
1301     data->host = *ptr;
1302
1303     if(!check_ipv4address(ptr, FALSE)) {
1304         if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) {
1305             TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n",
1306                 ptr, data, flags);
1307             *ptr = data->host;
1308             data->host = NULL;
1309             return FALSE;
1310         } else
1311             data->has_implicit_ip = TRUE;
1312     }
1313
1314     /* Check if what we found is the only part of the host name (if it isn't
1315      * we don't have an IPv4 address).
1316      */
1317     if(**ptr == ':') {
1318         ++(*ptr);
1319         if(!parse_port(ptr, data, flags)) {
1320             *ptr = data->host;
1321             data->host = NULL;
1322             return FALSE;
1323         }
1324     } else if(!is_auth_delim(**ptr, !is_unknown)) {
1325         /* Found more data which belongs the host, so this isn't an IPv4. */
1326         *ptr = data->host;
1327         data->host = NULL;
1328         data->has_implicit_ip = FALSE;
1329         return FALSE;
1330     }
1331
1332     data->host_len = *ptr - data->host;
1333     data->host_type = Uri_HOST_IPV4;
1334
1335     TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n",
1336         ptr, data, flags, debugstr_wn(data->host, data->host_len),
1337         data->host_len, data->host_type);
1338     return TRUE;
1339 }
1340
1341 /* Attempts to parse the reg-name from the URI.
1342  *
1343  * Because of the way Windows handles ':' this function also
1344  * handles parsing the port.
1345  *
1346  * reg-name = *( unreserved / pct-encoded / sub-delims )
1347  *
1348  * NOTE:
1349  *  Windows allows everything, but, the characters in "auth_delims" and ':'
1350  *  to appear in a reg-name, unless it's an unknown scheme type then ':' is
1351  *  allowed to appear (even if a valid port isn't after it).
1352  *
1353  *  Windows doesn't like host names which start with '[' and end with ']'
1354  *  and don't contain a valid IP literal address in between them.
1355  *
1356  *  On Windows if an '[' is encountered in the host name the ':' no longer
1357  *  counts as a delimiter until you reach the next ']' or an "authority delimeter".
1358  *
1359  *  A reg-name CAN be empty.
1360  */
1361 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags) {
1362     const BOOL has_start_bracket = **ptr == '[';
1363     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1364     BOOL inside_brackets = has_start_bracket;
1365     BOOL ignore_col = FALSE;
1366
1367     /* We have to be careful with file schemes. */
1368     if(data->scheme_type == URL_SCHEME_FILE) {
1369         /* This is because an implicit file scheme could be "C:\\test" and it
1370          * would trick this function into thinking the host is "C", when after
1371          * canonicalization the host would end up being an empty string.
1372          */
1373         if(is_alpha(**ptr) && *(*ptr+1) == ':') {
1374             /* Regular old drive paths don't have a host type (or host name). */
1375             data->host_type = Uri_HOST_UNKNOWN;
1376             data->host = *ptr;
1377             data->host_len = 0;
1378             return TRUE;
1379         } else if(**ptr == '\\' && *(*ptr+1) == '\\')
1380             /* Skip past the "\\" of a UNC path. */
1381             *ptr += 2;
1382     }
1383
1384     data->host = *ptr;
1385
1386     while(!is_auth_delim(**ptr, known_scheme)) {
1387         if(**ptr == ':' && !ignore_col) {
1388             /* We can ignore ':' if were inside brackets.*/
1389             if(!inside_brackets) {
1390                 const WCHAR *tmp = (*ptr)++;
1391
1392                 /* Attempt to parse the port. */
1393                 if(!parse_port(ptr, data, flags)) {
1394                     /* Windows expects there to be a valid port for known scheme types. */
1395                     if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1396                         *ptr = data->host;
1397                         data->host = NULL;
1398                         TRACE("(%p %p %x): Expected valid port\n", ptr, data, flags);
1399                         return FALSE;
1400                     } else
1401                         /* Windows gives up on trying to parse a port when it
1402                          * encounters 1 invalid port.
1403                          */
1404                         ignore_col = TRUE;
1405                 } else {
1406                     data->host_len = tmp - data->host;
1407                     break;
1408                 }
1409             }
1410         } else if(**ptr == '%' && known_scheme) {
1411             /* Has to be a legit % encoded value. */
1412             if(!check_pct_encoded(ptr)) {
1413                 *ptr = data->host;
1414                 data->host = NULL;
1415                 return FALSE;
1416             } else
1417                 continue;
1418         } else if(**ptr == ']')
1419             inside_brackets = FALSE;
1420         else if(**ptr == '[')
1421             inside_brackets = TRUE;
1422
1423         ++(*ptr);
1424     }
1425
1426     if(has_start_bracket) {
1427         /* Make sure the last character of the host wasn't a ']'. */
1428         if(*(*ptr-1) == ']') {
1429             TRACE("(%p %p %x): Expected an IP literal inside of the host\n",
1430                 ptr, data, flags);
1431             *ptr = data->host;
1432             data->host = NULL;
1433             return FALSE;
1434         }
1435     }
1436
1437     /* Don't overwrite our length if we found a port earlier. */
1438     if(!data->port)
1439         data->host_len = *ptr - data->host;
1440
1441     /* If the host is empty, then it's an unknown host type. */
1442     if(data->host_len == 0)
1443         data->host_type = Uri_HOST_UNKNOWN;
1444     else
1445         data->host_type = Uri_HOST_DNS;
1446
1447     TRACE("(%p %p %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags,
1448         debugstr_wn(data->host, data->host_len), data->host_len);
1449     return TRUE;
1450 }
1451
1452 /* Attempts to parse an IPv6 address out of the URI.
1453  *
1454  * IPv6address =                               6( h16 ":" ) ls32
1455  *                /                       "::" 5( h16 ":" ) ls32
1456  *                / [               h16 ] "::" 4( h16 ":" ) ls32
1457  *                / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1458  *                / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1459  *                / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1460  *                / [ *4( h16 ":" ) h16 ] "::"              ls32
1461  *                / [ *5( h16 ":" ) h16 ] "::"              h16
1462  *                / [ *6( h16 ":" ) h16 ] "::"
1463  *
1464  * ls32        = ( h16 ":" h16 ) / IPv4address
1465  *             ; least-significant 32 bits of address.
1466  *
1467  * h16         = 1*4HEXDIG
1468  *             ; 16 bits of address represented in hexadecimal.
1469  *
1470  * Modeled after google-url's 'DoParseIPv6' function.
1471  */
1472 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1473     const WCHAR *start, *cur_start;
1474     ipv6_address ip;
1475
1476     start = cur_start = *ptr;
1477     memset(&ip, 0, sizeof(ipv6_address));
1478
1479     for(;; ++(*ptr)) {
1480         /* Check if we're on the last character of the host. */
1481         BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)
1482                         || **ptr == ']');
1483
1484         BOOL is_split = (**ptr == ':');
1485         BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':');
1486
1487         /* Check if we're at the end of of the a component, or
1488          * if we're at the end of the IPv6 address.
1489          */
1490         if(is_split || is_end) {
1491             DWORD cur_len = 0;
1492
1493             cur_len = *ptr - cur_start;
1494
1495             /* h16 can't have a length > 4. */
1496             if(cur_len > 4) {
1497                 *ptr = start;
1498
1499                 TRACE("(%p %p %x): h16 component to long.\n",
1500                     ptr, data, flags);
1501                 return FALSE;
1502             }
1503
1504             if(cur_len == 0) {
1505                 /* An h16 component can't have the length of 0 unless
1506                  * the elision is at the beginning of the address, or
1507                  * at the end of the address.
1508                  */
1509                 if(!((*ptr == start && is_elision) ||
1510                     (is_end && (*ptr-2) == ip.elision))) {
1511                     *ptr = start;
1512                     TRACE("(%p %p %x): IPv6 component can not have a length of 0.\n",
1513                         ptr, data, flags);
1514                     return FALSE;
1515                 }
1516             }
1517
1518             if(cur_len > 0) {
1519                 /* An IPv6 address can have no more than 8 h16 components. */
1520                 if(ip.h16_count >= 8) {
1521                     *ptr = start;
1522                     TRACE("(%p %p %x): Not a IPv6 address, to many h16 components.\n",
1523                         ptr, data, flags);
1524                     return FALSE;
1525                 }
1526
1527                 ip.components[ip.h16_count].str = cur_start;
1528                 ip.components[ip.h16_count].len = cur_len;
1529
1530                 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n",
1531                     ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len,
1532                     ip.h16_count);
1533                 ++ip.h16_count;
1534             }
1535         }
1536
1537         if(is_end)
1538             break;
1539
1540         if(is_elision) {
1541             /* A IPv6 address can only have 1 elision ('::'). */
1542             if(ip.elision) {
1543                 *ptr = start;
1544
1545                 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n",
1546                     ptr, data, flags);
1547                 return FALSE;
1548             }
1549
1550             ip.elision = *ptr;
1551             ++(*ptr);
1552         }
1553
1554         if(is_split)
1555             cur_start = *ptr+1;
1556         else {
1557             if(!check_ipv4address(ptr, TRUE)) {
1558                 if(!is_hexdigit(**ptr)) {
1559                     /* Not a valid character for an IPv6 address. */
1560                     *ptr = start;
1561                     return FALSE;
1562                 }
1563             } else {
1564                 /* Found an IPv4 address. */
1565                 ip.ipv4 = cur_start;
1566                 ip.ipv4_len = *ptr - cur_start;
1567
1568                 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n",
1569                     ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len),
1570                     ip.ipv4_len);
1571
1572                 /* IPv4 addresses can only appear at the end of a IPv6. */
1573                 break;
1574             }
1575         }
1576     }
1577
1578     compute_ipv6_comps_size(&ip);
1579
1580     /* Make sure the IPv6 address adds up to 16 bytes. */
1581     if(ip.components_size + ip.elision_size != 16) {
1582         *ptr = start;
1583         TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n",
1584             ptr, data, flags);
1585         return FALSE;
1586     }
1587
1588     if(ip.elision_size == 2) {
1589         /* For some reason on Windows if an elision that represents
1590          * only 1 h16 component is encountered at the very begin or
1591          * end of an IPv6 address, Windows does not consider it a
1592          * valid IPv6 address.
1593          *
1594          *  Ex: [::2:3:4:5:6:7] is not valid, even though the sum
1595          *      of all the components == 128bits.
1596          */
1597          if(ip.elision < ip.components[0].str ||
1598             ip.elision > ip.components[ip.h16_count-1].str) {
1599             *ptr = start;
1600             TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n",
1601                 ptr, data, flags);
1602             return FALSE;
1603         }
1604     }
1605
1606     data->host_type = Uri_HOST_IPV6;
1607     data->has_ipv6 = TRUE;
1608     data->ipv6_address = ip;
1609
1610     TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n",
1611         ptr, data, flags, debugstr_wn(start, *ptr-start),
1612         *ptr-start);
1613     return TRUE;
1614 }
1615
1616 /*  IPvFuture  = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */
1617 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) {
1618     const WCHAR *start = *ptr;
1619
1620     /* IPvFuture has to start with a 'v' or 'V'. */
1621     if(**ptr != 'v' && **ptr != 'V')
1622         return FALSE;
1623
1624     /* Following the v their must be atleast 1 hexdigit. */
1625     ++(*ptr);
1626     if(!is_hexdigit(**ptr)) {
1627         *ptr = start;
1628         return FALSE;
1629     }
1630
1631     ++(*ptr);
1632     while(is_hexdigit(**ptr))
1633         ++(*ptr);
1634
1635     /* End of the hexdigit sequence must be a '.' */
1636     if(**ptr != '.') {
1637         *ptr = start;
1638         return FALSE;
1639     }
1640
1641     ++(*ptr);
1642     if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') {
1643         *ptr = start;
1644         return FALSE;
1645     }
1646
1647     ++(*ptr);
1648     while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':')
1649         ++(*ptr);
1650
1651     data->host_type = Uri_HOST_UNKNOWN;
1652
1653     TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags,
1654         debugstr_wn(start, *ptr-start), *ptr-start);
1655
1656     return TRUE;
1657 }
1658
1659 /* IP-literal = "[" ( IPv6address / IPvFuture  ) "]" */
1660 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags) {
1661     data->host = *ptr;
1662
1663     if(**ptr != '[') {
1664         data->host = NULL;
1665         return FALSE;
1666     }
1667
1668     ++(*ptr);
1669     if(!parse_ipv6address(ptr, data, flags)) {
1670         if(!parse_ipvfuture(ptr, data, flags)) {
1671             *ptr = data->host;
1672             data->host = NULL;
1673             return FALSE;
1674         }
1675     }
1676
1677     if(**ptr != ']') {
1678         *ptr = data->host;
1679         data->host = NULL;
1680         return FALSE;
1681     }
1682
1683     ++(*ptr);
1684     if(**ptr == ':') {
1685         ++(*ptr);
1686         /* If a valid port is not found, then let it trickle down to
1687          * parse_reg_name.
1688          */
1689         if(!parse_port(ptr, data, flags)) {
1690             *ptr = data->host;
1691             data->host = NULL;
1692             return FALSE;
1693         }
1694     } else
1695         data->host_len = *ptr - data->host;
1696
1697     return TRUE;
1698 }
1699
1700 /* Parses the host information from the URI.
1701  *
1702  * host = IP-literal / IPv4address / reg-name
1703  */
1704 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags) {
1705     if(!parse_ip_literal(ptr, data, flags)) {
1706         if(!parse_ipv4address(ptr, data, flags)) {
1707             if(!parse_reg_name(ptr, data, flags)) {
1708                 TRACE("(%p %p %x): Malformed URI, Unknown host type.\n",
1709                     ptr, data, flags);
1710                 return FALSE;
1711             }
1712         }
1713     }
1714
1715     return TRUE;
1716 }
1717
1718 /* Parses the authority information from the URI.
1719  *
1720  * authority   = [ userinfo "@" ] host [ ":" port ]
1721  */
1722 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
1723     parse_userinfo(ptr, data, flags);
1724
1725     /* Parsing the port will happen during one of the host parsing
1726      * routines (if the URI has a port).
1727      */
1728     if(!parse_host(ptr, data, flags))
1729         return FALSE;
1730
1731     return TRUE;
1732 }
1733
1734 /* Attempts to parse the path information of a hierarchical URI. */
1735 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
1736     const WCHAR *start = *ptr;
1737     static const WCHAR slash[] = {'/',0};
1738
1739     if(is_path_delim(**ptr)) {
1740         if(data->scheme_type == URL_SCHEME_WILDCARD) {
1741             /* Wildcard schemes don't get a '/' attached if their path is
1742              * empty.
1743              */
1744             data->path = NULL;
1745             data->path_len = 0;
1746         } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
1747             /* If the path component is empty, then a '/' is added. */
1748             data->path = slash;
1749             data->path_len = 1;
1750         }
1751     } else {
1752         while(!is_path_delim(**ptr)) {
1753             if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN &&
1754                data->scheme_type != URL_SCHEME_FILE) {
1755                 if(!check_pct_encoded(ptr)) {
1756                     *ptr = start;
1757                     return FALSE;
1758                 } else
1759                     continue;
1760             } else if(**ptr == '\\') {
1761                 /* Not allowed to have a backslash if NO_CANONICALIZE is set
1762                  * and the scheme is known type (but not a file scheme).
1763                  */
1764                 if(flags & Uri_CREATE_NO_CANONICALIZE) {
1765                     if(data->scheme_type != URL_SCHEME_FILE &&
1766                        data->scheme_type != URL_SCHEME_UNKNOWN) {
1767                         *ptr = start;
1768                         return FALSE;
1769                     }
1770                 }
1771             }
1772
1773             ++(*ptr);
1774         }
1775
1776         /* The only time a URI doesn't have a path is when
1777          * the NO_CANONICALIZE flag is set and the raw URI
1778          * didn't contain one.
1779          */
1780         if(*ptr == start) {
1781             data->path = NULL;
1782             data->path_len = 0;
1783         } else {
1784             data->path = start;
1785             data->path_len = *ptr - start;
1786         }
1787     }
1788
1789     if(data->path)
1790         TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
1791             debugstr_wn(data->path, data->path_len), data->path_len);
1792     else
1793         TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
1794
1795     return TRUE;
1796 }
1797
1798 /* Parses the path of a opaque URI (much less strict then the parser
1799  * for a hierarchical URI).
1800  *
1801  * NOTE:
1802  *  Windows allows invalid % encoded data to appear in opaque URI paths
1803  *  for unknown scheme types.
1804  */
1805 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) {
1806     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1807
1808     data->path = *ptr;
1809
1810     while(!is_path_delim(**ptr)) {
1811         if(**ptr == '%' && known_scheme) {
1812             if(!check_pct_encoded(ptr)) {
1813                 *ptr = data->path;
1814                 data->path = NULL;
1815                 return FALSE;
1816             } else
1817                 continue;
1818         }
1819
1820         ++(*ptr);
1821     }
1822
1823     data->path_len = *ptr - data->path;
1824     TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags,
1825         debugstr_wn(data->path, data->path_len), data->path_len);
1826     return TRUE;
1827 }
1828
1829 /* Determines how the URI should be parsed after the scheme information.
1830  *
1831  * If the scheme is followed, by "//" then, it is treated as an hierarchical URI
1832  * which then the authority and path information will be parsed out. Otherwise, the
1833  * URI will be treated as an opaque URI which the authority information is not parsed
1834  * out.
1835  *
1836  * RFC 3896 definition of hier-part:
1837  *
1838  * hier-part   = "//" authority path-abempty
1839  *                 / path-absolute
1840  *                 / path-rootless
1841  *                 / path-empty
1842  *
1843  * MSDN opaque URI definition:
1844  *  scheme ":" path [ "#" fragment ]
1845  *
1846  * NOTES:
1847  *  If the URI is of an unknown scheme type and has a "//" following the scheme then it
1848  *  is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is
1849  *  set then it is considered an opaque URI reguardless of what follows the scheme information
1850  *  (per MSDN documentation).
1851  */
1852 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
1853     const WCHAR *start = *ptr;
1854
1855     /* Checks if the authority information needs to be parsed.
1856      *
1857      * Relative URI's aren't hierarchical URI's, but, they could trick
1858      * "check_hierarchical" into thinking it is, so we need to explicitly
1859      * make sure it's not relative. Also, if the URI is an implicit file
1860      * scheme it might not contain a "//", but, it's considered hierarchical
1861      * anyways. Wildcard Schemes are always considered hierarchical
1862      */
1863     if(data->scheme_type == URL_SCHEME_WILDCARD ||
1864        data->scheme_type == URL_SCHEME_FILE ||
1865        (!data->is_relative && check_hierarchical(ptr))) {
1866         /* Only treat it as a hierarchical URI if the scheme_type is known or
1867          * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set.
1868          */
1869         if(data->scheme_type != URL_SCHEME_UNKNOWN ||
1870            !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) {
1871             TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
1872             data->is_opaque = FALSE;
1873
1874             if(data->scheme_type == URL_SCHEME_FILE)
1875                 /* Skip past the "//" after the scheme (if any). */
1876                 check_hierarchical(ptr);
1877
1878             /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
1879             if(!parse_authority(ptr, data, flags))
1880                 return FALSE;
1881
1882             return parse_path_hierarchical(ptr, data, flags);
1883         } else
1884             /* Reset ptr to it's starting position so opaque path parsing
1885              * begins at the correct location.
1886              */
1887             *ptr = start;
1888     }
1889
1890     /* If it reaches here, then the URI will be treated as an opaque
1891      * URI.
1892      */
1893
1894     TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags);
1895
1896     data->is_opaque = TRUE;
1897     if(!parse_path_opaque(ptr, data, flags))
1898         return FALSE;
1899
1900     return TRUE;
1901 }
1902
1903 /* Attempts to parse the query string from the URI.
1904  *
1905  * NOTES:
1906  *  If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
1907  *  data is allowed appear in the query string. For unknown scheme types
1908  *  invalid percent encoded data is allowed to appear reguardless.
1909  */
1910 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) {
1911     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1912
1913     if(**ptr != '?') {
1914         TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags);
1915         return TRUE;
1916     }
1917
1918     data->query = *ptr;
1919
1920     ++(*ptr);
1921     while(**ptr && **ptr != '#') {
1922         if(**ptr == '%' && known_scheme &&
1923            !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
1924             if(!check_pct_encoded(ptr)) {
1925                 *ptr = data->query;
1926                 data->query = NULL;
1927                 return FALSE;
1928             } else
1929                 continue;
1930         }
1931
1932         ++(*ptr);
1933     }
1934
1935     data->query_len = *ptr - data->query;
1936
1937     TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags,
1938         debugstr_wn(data->query, data->query_len), data->query_len);
1939     return TRUE;
1940 }
1941
1942 /* Attempts to parse the fragment from the URI.
1943  *
1944  * NOTES:
1945  *  If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
1946  *  data is allowed appear in the query string. For unknown scheme types
1947  *  invalid percent encoded data is allowed to appear reguardless.
1948  */
1949 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) {
1950     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1951
1952     if(**ptr != '#') {
1953         TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags);
1954         return TRUE;
1955     }
1956
1957     data->fragment = *ptr;
1958
1959     ++(*ptr);
1960     while(**ptr) {
1961         if(**ptr == '%' && known_scheme &&
1962            !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
1963             if(!check_pct_encoded(ptr)) {
1964                 *ptr = data->fragment;
1965                 data->fragment = NULL;
1966                 return FALSE;
1967             } else
1968                 continue;
1969         }
1970
1971         ++(*ptr);
1972     }
1973
1974     data->fragment_len = *ptr - data->fragment;
1975
1976     TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags,
1977         debugstr_wn(data->fragment, data->fragment_len), data->fragment_len);
1978     return TRUE;
1979 }
1980
1981 /* Parses and validates the components of the specified by data->uri
1982  * and stores the information it parses into 'data'.
1983  *
1984  * Returns TRUE if it successfully parsed the URI. False otherwise.
1985  */
1986 static BOOL parse_uri(parse_data *data, DWORD flags) {
1987     const WCHAR *ptr;
1988     const WCHAR **pptr;
1989
1990     ptr = data->uri;
1991     pptr = &ptr;
1992
1993     TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri));
1994
1995     if(!parse_scheme(pptr, data, flags))
1996         return FALSE;
1997
1998     if(!parse_hierpart(pptr, data, flags))
1999         return FALSE;
2000
2001     if(!parse_query(pptr, data, flags))
2002         return FALSE;
2003
2004     if(!parse_fragment(pptr, data, flags))
2005         return FALSE;
2006
2007     TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
2008     return TRUE;
2009 }
2010
2011 /* Canonicalizes the userinfo of the URI represented by the parse_data.
2012  *
2013  * Canonicalization of the userinfo is a simple process. If there are any percent
2014  * encoded characters that fall in the "unreserved" character set, they are decoded
2015  * to their actual value. If a character is not in the "unreserved" or "reserved" sets
2016  * then it is percent encoded. Other than that the characters are copied over without
2017  * change.
2018  */
2019 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2020     DWORD i = 0;
2021
2022     uri->userinfo_start = uri->userinfo_split = -1;
2023     uri->userinfo_len = 0;
2024
2025     if(!data->userinfo)
2026         /* URI doesn't have userinfo, so nothing to do here. */
2027         return TRUE;
2028
2029     uri->userinfo_start = uri->canon_len;
2030
2031     while(i < data->userinfo_len) {
2032         if(data->userinfo[i] == ':' && uri->userinfo_split == -1)
2033             /* Windows only considers the first ':' as the delimiter. */
2034             uri->userinfo_split = uri->canon_len - uri->userinfo_start;
2035         else if(data->userinfo[i] == '%') {
2036             /* Only decode % encoded values for known scheme types. */
2037             if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2038                 /* See if the value really needs decoded. */
2039                 WCHAR val = decode_pct_val(data->userinfo + i);
2040                 if(is_unreserved(val)) {
2041                     if(!computeOnly)
2042                         uri->canon_uri[uri->canon_len] = val;
2043
2044                     ++uri->canon_len;
2045
2046                     /* Move pass the hex characters. */
2047                     i += 3;
2048                     continue;
2049                 }
2050             }
2051         } else if(!is_reserved(data->userinfo[i]) && !is_unreserved(data->userinfo[i]) &&
2052                   data->userinfo[i] != '\\') {
2053             /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
2054              * is NOT set.
2055              */
2056             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
2057                 if(!computeOnly)
2058                     pct_encode_val(data->userinfo[i], uri->canon_uri + uri->canon_len);
2059
2060                 uri->canon_len += 3;
2061                 ++i;
2062                 continue;
2063             }
2064         }
2065
2066         if(!computeOnly)
2067             /* Nothing special, so just copy the character over. */
2068             uri->canon_uri[uri->canon_len] = data->userinfo[i];
2069
2070         ++uri->canon_len;
2071         ++i;
2072     }
2073
2074     uri->userinfo_len = uri->canon_len - uri->userinfo_start;
2075     if(!computeOnly)
2076         TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n",
2077                 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len),
2078                 uri->userinfo_split, uri->userinfo_len);
2079
2080     /* Now insert the '@' after the userinfo. */
2081     if(!computeOnly)
2082         uri->canon_uri[uri->canon_len] = '@';
2083
2084     ++uri->canon_len;
2085     return TRUE;
2086 }
2087
2088 /* Attempts to canonicalize a reg_name.
2089  *
2090  * Things that happen:
2091  *  1)  If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is
2092  *      lower cased. Unless it's an unknown scheme type, which case it's
2093  *      no lower cased reguardless.
2094  *
2095  *  2)  Unreserved % encoded characters are decoded for known
2096  *      scheme types.
2097  *
2098  *  3)  Forbidden characters are % encoded as long as
2099  *      Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and
2100  *      it isn't an unknown scheme type.
2101  *
2102  *  4)  If it's a file scheme and the host is "localhost" it's removed.
2103  */
2104 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri,
2105                                   DWORD flags, BOOL computeOnly) {
2106     static const WCHAR localhostW[] =
2107             {'l','o','c','a','l','h','o','s','t',0};
2108     const WCHAR *ptr;
2109     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2110
2111     uri->host_start = uri->canon_len;
2112
2113     if(data->scheme_type == URL_SCHEME_FILE &&
2114        data->host_len == lstrlenW(localhostW)) {
2115         if(!StrCmpNIW(data->host, localhostW, data->host_len)) {
2116             uri->host_start = -1;
2117             uri->host_len = 0;
2118             uri->host_type = Uri_HOST_UNKNOWN;
2119             return TRUE;
2120         }
2121     }
2122
2123     for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) {
2124         if(*ptr == '%' && known_scheme) {
2125             WCHAR val = decode_pct_val(ptr);
2126             if(is_unreserved(val)) {
2127                 /* If NO_CANONICALZE is not set, then windows lower cases the
2128                  * decoded value.
2129                  */
2130                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) {
2131                     if(!computeOnly)
2132                         uri->canon_uri[uri->canon_len] = tolowerW(val);
2133                 } else {
2134                     if(!computeOnly)
2135                         uri->canon_uri[uri->canon_len] = val;
2136                 }
2137                 ++uri->canon_len;
2138
2139                 /* Skip past the % encoded character. */
2140                 ptr += 2;
2141                 continue;
2142             } else {
2143                 /* Just copy the % over. */
2144                 if(!computeOnly)
2145                     uri->canon_uri[uri->canon_len] = *ptr;
2146                 ++uri->canon_len;
2147             }
2148         } else if(*ptr == '\\') {
2149             /* Only unknown scheme types could have made it here with a '\\' in the host name. */
2150             if(!computeOnly)
2151                 uri->canon_uri[uri->canon_len] = *ptr;
2152             ++uri->canon_len;
2153         } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
2154                   !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) {
2155             if(!computeOnly) {
2156                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2157
2158                 /* The percent encoded value gets lower cased also. */
2159                 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
2160                     uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]);
2161                     uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]);
2162                 }
2163             }
2164
2165             uri->canon_len += 3;
2166         } else {
2167             if(!computeOnly) {
2168                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme)
2169                     uri->canon_uri[uri->canon_len] = tolowerW(*ptr);
2170                 else
2171                     uri->canon_uri[uri->canon_len] = *ptr;
2172             }
2173
2174             ++uri->canon_len;
2175         }
2176     }
2177
2178     uri->host_len = uri->canon_len - uri->host_start;
2179
2180     if(!computeOnly)
2181         TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags,
2182             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2183             uri->host_len);
2184
2185     if(!computeOnly)
2186         find_domain_name(uri->canon_uri+uri->host_start, uri->host_len,
2187             &(uri->domain_offset));
2188
2189     return TRUE;
2190 }
2191
2192 /* Attempts to canonicalize an implicit IPv4 address. */
2193 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2194     uri->host_start = uri->canon_len;
2195
2196     TRACE("%u\n", data->implicit_ipv4);
2197     /* For unknown scheme types Window's doesn't convert
2198      * the value into an IP address, but, it still considers
2199      * it an IPv4 address.
2200      */
2201     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2202         if(!computeOnly)
2203             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2204         uri->canon_len += data->host_len;
2205     } else {
2206         if(!computeOnly)
2207             uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4);
2208         else
2209             uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4);
2210     }
2211
2212     uri->host_len = uri->canon_len - uri->host_start;
2213     uri->host_type = Uri_HOST_IPV4;
2214
2215     if(!computeOnly)
2216         TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n",
2217             data, uri, flags, computeOnly,
2218             debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2219             uri->host_len);
2220
2221     return TRUE;
2222 }
2223
2224 /* Attempts to canonicalize an IPv4 address.
2225  *
2226  * If the parse_data represents a URI that has an implicit IPv4 address
2227  * (ex. http://256/, this function will convert 256 into 0.0.1.0). If
2228  * the implicit IP address exceeds the value of UINT_MAX (maximum value
2229  * for an IPv4 address) it's canonicalized as if were a reg-name.
2230  *
2231  * If the parse_data contains a partial or full IPv4 address it normalizes it.
2232  * A partial IPv4 address is something like "192.0" and would be normalized to
2233  * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would
2234  * be normalized to "192.2.1.3".
2235  *
2236  * NOTES:
2237  *  Window's ONLY normalizes IPv4 address for known scheme types (one that isn't
2238  *  URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from
2239  *  the original URI into the canonicalized URI, but, it still recognizes URI's
2240  *  host type as HOST_IPV4.
2241  */
2242 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2243     if(data->has_implicit_ip)
2244         return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly);
2245     else {
2246         uri->host_start = uri->canon_len;
2247
2248         /* Windows only normalizes for known scheme types. */
2249         if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2250             /* parse_data contains a partial or full IPv4 address, so normalize it. */
2251             DWORD i, octetDigitCount = 0, octetCount = 0;
2252             BOOL octetHasDigit = FALSE;
2253
2254             for(i = 0; i < data->host_len; ++i) {
2255                 if(data->host[i] == '0' && !octetHasDigit) {
2256                     /* Can ignore leading zeros if:
2257                      *  1) It isn't the last digit of the octet.
2258                      *  2) i+1 != data->host_len
2259                      *  3) i+1 != '.'
2260                      */
2261                     if(octetDigitCount == 2 ||
2262                        i+1 == data->host_len ||
2263                        data->host[i+1] == '.') {
2264                         if(!computeOnly)
2265                             uri->canon_uri[uri->canon_len] = data->host[i];
2266                         ++uri->canon_len;
2267                         TRACE("Adding zero\n");
2268                     }
2269                 } else if(data->host[i] == '.') {
2270                     if(!computeOnly)
2271                         uri->canon_uri[uri->canon_len] = data->host[i];
2272                     ++uri->canon_len;
2273
2274                     octetDigitCount = 0;
2275                     octetHasDigit = FALSE;
2276                     ++octetCount;
2277                 } else {
2278                     if(!computeOnly)
2279                         uri->canon_uri[uri->canon_len] = data->host[i];
2280                     ++uri->canon_len;
2281
2282                     ++octetDigitCount;
2283                     octetHasDigit = TRUE;
2284                 }
2285             }
2286
2287             /* Make sure the canonicalized IP address has 4 dec-octets.
2288              * If doesn't add "0" ones until there is 4;
2289              */
2290             for( ; octetCount < 3; ++octetCount) {
2291                 if(!computeOnly) {
2292                     uri->canon_uri[uri->canon_len] = '.';
2293                     uri->canon_uri[uri->canon_len+1] = '0';
2294                 }
2295
2296                 uri->canon_len += 2;
2297             }
2298         } else {
2299             /* Windows doesn't normalize addresses in unknown schemes. */
2300             if(!computeOnly)
2301                 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2302             uri->canon_len += data->host_len;
2303         }
2304
2305         uri->host_len = uri->canon_len - uri->host_start;
2306         if(!computeOnly)
2307             TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n",
2308                 data, uri, flags, computeOnly,
2309                 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2310                 uri->host_len);
2311     }
2312
2313     return TRUE;
2314 }
2315
2316 /* Attempts to canonicalize the IPv6 address of the URI.
2317  *
2318  * Multiple things happen during the canonicalization of an IPv6 address:
2319  *  1)  Any leading zero's in an h16 component are removed.
2320  *      Ex: [0001:0022::] -> [1:22::]
2321  *
2322  *  2)  The longest sequence of zero h16 components are compressed
2323  *      into a "::" (elision). If there's a tie, the first is choosen.
2324  *
2325  *      Ex: [0:0:0:0:1:6:7:8]   -> [::1:6:7:8]
2326  *          [0:0:0:0:1:2::]     -> [::1:2:0:0]
2327  *          [0:0:1:2:0:0:7:8]   -> [::1:2:0:0:7:8]
2328  *
2329  *  3)  If an IPv4 address is attached to the IPv6 address, it's
2330  *      also normalized.
2331  *      Ex: [::001.002.022.000] -> [::1.2.22.0]
2332  *
2333  *  4)  If an elision is present, but, only represents 1 h16 component
2334  *      it's expanded.
2335  *
2336  *      Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
2337  *
2338  *  5)  If the IPv6 address contains an IPv4 address and there exists
2339  *      at least 1 non-zero h16 component the IPv4 address is converted
2340  *      into two h16 components, otherwise it's normalized and kept as is.
2341  *
2342  *      Ex: [::192.200.003.4]       -> [::192.200.3.4]
2343  *          [ffff::192.200.003.4]   -> [ffff::c0c8:3041]
2344  *
2345  * NOTE:
2346  *  For unknown scheme types Windows simply copies the address over without any
2347  *  changes.
2348  *
2349  *  IPv4 address can be included in an elision if all its components are 0's.
2350  */
2351 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri,
2352                                      DWORD flags, BOOL computeOnly) {
2353     uri->host_start = uri->canon_len;
2354
2355     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2356         if(!computeOnly)
2357             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2358         uri->canon_len += data->host_len;
2359     } else {
2360         USHORT values[8];
2361         INT elision_start;
2362         DWORD i, elision_len;
2363
2364         if(!ipv6_to_number(&(data->ipv6_address), values)) {
2365             TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n",
2366                 data, uri, flags, computeOnly);
2367             return FALSE;
2368         }
2369
2370         if(!computeOnly)
2371             uri->canon_uri[uri->canon_len] = '[';
2372         ++uri->canon_len;
2373
2374         /* Find where the elision should occur (if any). */
2375         compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len);
2376
2377         TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags,
2378             computeOnly, elision_start, elision_len);
2379
2380         for(i = 0; i < 8; ++i) {
2381             BOOL in_elision = (elision_start > -1 && i >= elision_start &&
2382                                i < elision_start+elision_len);
2383             BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision &&
2384                             data->ipv6_address.h16_count == 0);
2385
2386             if(i == elision_start) {
2387                 if(!computeOnly) {
2388                     uri->canon_uri[uri->canon_len] = ':';
2389                     uri->canon_uri[uri->canon_len+1] = ':';
2390                 }
2391                 uri->canon_len += 2;
2392             }
2393
2394             /* We can ignore the current component if we're in the elision. */
2395             if(in_elision)
2396                 continue;
2397
2398             /* We only add a ':' if we're not at i == 0, or when we're at
2399              * the very end of elision range since the ':' colon was handled
2400              * earlier. Otherwise we would end up with ":::" after elision.
2401              */
2402             if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) {
2403                 if(!computeOnly)
2404                     uri->canon_uri[uri->canon_len] = ':';
2405                 ++uri->canon_len;
2406             }
2407
2408             if(do_ipv4) {
2409                 UINT val;
2410                 DWORD len;
2411
2412                 /* Combine the two parts of the IPv4 address values. */
2413                 val = values[i];
2414                 val <<= 16;
2415                 val += values[i+1];
2416
2417                 if(!computeOnly)
2418                     len = ui2ipv4(uri->canon_uri+uri->canon_len, val);
2419                 else
2420                     len = ui2ipv4(NULL, val);
2421
2422                 uri->canon_len += len;
2423                 ++i;
2424             } else {
2425                 /* Write a regular h16 component to the URI. */
2426
2427                 /* Short circuit for the trivial case. */
2428                 if(values[i] == 0) {
2429                     if(!computeOnly)
2430                         uri->canon_uri[uri->canon_len] = '0';
2431                     ++uri->canon_len;
2432                 } else {
2433                     static const WCHAR formatW[] = {'%','x',0};
2434
2435                     if(!computeOnly)
2436                         uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len,
2437                                             formatW, values[i]);
2438                     else {
2439                         WCHAR tmp[5];
2440                         uri->canon_len += sprintfW(tmp, formatW, values[i]);
2441                     }
2442                 }
2443             }
2444         }
2445
2446         /* Add the closing ']'. */
2447         if(!computeOnly)
2448             uri->canon_uri[uri->canon_len] = ']';
2449         ++uri->canon_len;
2450     }
2451
2452     uri->host_len = uri->canon_len - uri->host_start;
2453
2454     if(!computeOnly)
2455         TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags,
2456             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2457             uri->host_len);
2458
2459     return TRUE;
2460 }
2461
2462 /* Attempts to canonicalize the host of the URI (if any). */
2463 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2464     uri->host_start = -1;
2465     uri->host_len = 0;
2466     uri->domain_offset = -1;
2467
2468     if(data->host) {
2469         switch(data->host_type) {
2470         case Uri_HOST_DNS:
2471             uri->host_type = Uri_HOST_DNS;
2472             if(!canonicalize_reg_name(data, uri, flags, computeOnly))
2473                 return FALSE;
2474
2475             break;
2476         case Uri_HOST_IPV4:
2477             uri->host_type = Uri_HOST_IPV4;
2478             if(!canonicalize_ipv4address(data, uri, flags, computeOnly))
2479                 return FALSE;
2480
2481             break;
2482         case Uri_HOST_IPV6:
2483             if(!canonicalize_ipv6address(data, uri, flags, computeOnly))
2484                 return FALSE;
2485
2486             uri->host_type = Uri_HOST_IPV6;
2487             break;
2488         case Uri_HOST_UNKNOWN:
2489             if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) {
2490                 uri->host_start = uri->canon_len;
2491
2492                 /* Nothing happens to unknown host types. */
2493                 if(!computeOnly)
2494                     memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2495                 uri->canon_len += data->host_len;
2496                 uri->host_len = data->host_len;
2497             }
2498
2499             uri->host_type = Uri_HOST_UNKNOWN;
2500             break;
2501         default:
2502             FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data,
2503                     uri, flags, computeOnly, data->host_type);
2504             return FALSE;
2505        }
2506    }
2507
2508    return TRUE;
2509 }
2510
2511 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2512     BOOL has_default_port = FALSE;
2513     USHORT default_port = 0;
2514     DWORD i;
2515
2516     uri->has_port = FALSE;
2517
2518     /* Check if the scheme has a default port. */
2519     for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
2520         if(default_ports[i].scheme == data->scheme_type) {
2521             has_default_port = TRUE;
2522             default_port = default_ports[i].port;
2523             break;
2524         }
2525     }
2526
2527     if(data->port || has_default_port)
2528         uri->has_port = TRUE;
2529
2530     /* Possible cases:
2531      *  1)  Has a port which is the default port.
2532      *  2)  Has a port (not the default).
2533      *  3)  Doesn't have a port, but, scheme has a default port.
2534      *  4)  No port.
2535      */
2536     if(has_default_port && data->port && data->port_value == default_port) {
2537         /* If it's the default port and this flag isn't set, don't do anything. */
2538         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2539             /* Copy the original port over. */
2540             if(!computeOnly) {
2541                 uri->canon_uri[uri->canon_len] = ':';
2542                 memcpy(uri->canon_uri+uri->canon_len+1, data->port, data->port_len*sizeof(WCHAR));
2543             }
2544             uri->canon_len += data->port_len+1;
2545         }
2546
2547         uri->port = default_port;
2548     } else if(data->port) {
2549         if(!computeOnly)
2550             uri->canon_uri[uri->canon_len] = ':';
2551         ++uri->canon_len;
2552
2553         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2554             /* Copy the original over without changes. */
2555             if(!computeOnly)
2556                 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR));
2557             uri->canon_len += data->port_len;
2558         } else {
2559             const WCHAR formatW[] = {'%','u',0};
2560             INT len = 0;
2561             if(!computeOnly)
2562                 len = sprintfW(uri->canon_uri+uri->canon_len, formatW, data->port_value);
2563             else {
2564                 WCHAR tmp[6];
2565                 len = sprintfW(tmp, formatW, data->port_value);
2566             }
2567             uri->canon_len += len;
2568         }
2569
2570         uri->port = data->port_value;
2571     } else if(has_default_port)
2572         uri->port = default_port;
2573
2574     return TRUE;
2575 }
2576
2577 /* Canonicalizes the authority of the URI represented by the parse_data. */
2578 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2579     uri->authority_start = uri->canon_len;
2580     uri->authority_len = 0;
2581
2582     if(!canonicalize_userinfo(data, uri, flags, computeOnly))
2583         return FALSE;
2584
2585     if(!canonicalize_host(data, uri, flags, computeOnly))
2586         return FALSE;
2587
2588     if(!canonicalize_port(data, uri, flags, computeOnly))
2589         return FALSE;
2590
2591     if(uri->host_start != -1)
2592         uri->authority_len = uri->canon_len - uri->authority_start;
2593     else
2594         uri->authority_start = -1;
2595
2596     return TRUE;
2597 }
2598
2599 /* Attempts to canonicalize the path of a hierarchical URI.
2600  *
2601  * Things that happen:
2602  *  1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN
2603  *      flag is set or it's a file URI. Forbidden characters are always encoded
2604  *      for file schemes reguardless and forbidden characters are never encoded
2605  *      for unknown scheme types.
2606  *
2607  *  2). For known scheme types '\\' are changed to '/'.
2608  *
2609  *  3). Percent encoded, unreserved characters are decoded to their actual values.
2610  *      Unless the scheme type is unknown. For file schemes any percent encoded
2611  *      character in the unreserved or reserved set is decoded.
2612  *
2613  *  4). For File schemes if the path is starts with a drive letter and doesn't
2614  *      start with a '/' then one is appended.
2615  *      Ex: file://c:/test.mp3 -> file:///c:/test.mp3
2616  *
2617  *  5). Dot segments are removed from the path for all scheme types
2618  *      unless NO_CANONICALIZE flag is set. Dot segments aren't removed
2619  *      for wildcard scheme types.
2620  *
2621  * NOTES:
2622  *      file://c:/test%20test   -> file:///c:/test%2520test
2623  *      file://c:/test%3Etest   -> file:///c:/test%253Etest
2624  *      file:///c:/test%20test  -> file:///c:/test%20test
2625  *      file:///c:/test%test    -> file:///c:/test%25test
2626  */
2627 static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
2628                                            DWORD flags, BOOL computeOnly) {
2629     const WCHAR *ptr;
2630     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2631     const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
2632
2633     BOOL escape_pct = FALSE;
2634
2635     if(!data->path) {
2636         uri->path_start = -1;
2637         uri->path_len = 0;
2638         return TRUE;
2639     }
2640
2641     uri->path_start = uri->canon_len;
2642
2643     /* Check if a '/' needs to be appended for the file scheme. */
2644     if(is_file) {
2645         if(data->path_len > 1 && is_alpha(*(data->path)) &&
2646            *(data->path+1) == ':') {
2647             if(!computeOnly)
2648                 uri->canon_uri[uri->canon_len] = '/';
2649             uri->canon_len++;
2650             escape_pct = TRUE;
2651         }
2652     }
2653
2654     for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
2655         if(*ptr == '%') {
2656             const WCHAR *tmp = ptr;
2657             WCHAR val;
2658
2659             /* Check if the % represents a valid encoded char, or if it needs encoded. */
2660             BOOL force_encode = !check_pct_encoded(&tmp) && is_file;
2661             val = decode_pct_val(ptr);
2662
2663             if(force_encode || escape_pct) {
2664                 /* Escape the percent sign in the file URI. */
2665                 if(!computeOnly)
2666                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2667                 uri->canon_len += 3;
2668             } else if((is_unreserved(val) && known_scheme) ||
2669                       (is_file && (is_unreserved(val) || is_reserved(val)))) {
2670                 if(!computeOnly)
2671                     uri->canon_uri[uri->canon_len] = val;
2672                 ++uri->canon_len;
2673
2674                 ptr += 2;
2675                 continue;
2676             } else {
2677                 if(!computeOnly)
2678                     uri->canon_uri[uri->canon_len] = *ptr;
2679                 ++uri->canon_len;
2680             }
2681         } else if(*ptr == '\\' && known_scheme) {
2682             if(!computeOnly)
2683                 uri->canon_uri[uri->canon_len] = '/';
2684             ++uri->canon_len;
2685         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
2686                   (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
2687             /* Escape the forbidden character. */
2688             if(!computeOnly)
2689                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2690             uri->canon_len += 3;
2691         } else {
2692             if(!computeOnly)
2693                 uri->canon_uri[uri->canon_len] = *ptr;
2694             ++uri->canon_len;
2695         }
2696     }
2697
2698     uri->path_len = uri->canon_len - uri->path_start;
2699
2700     /* Removing the dot segments only happens when it's not in
2701      * computeOnly mode and it's not a wildcard scheme.
2702      */
2703     if(!computeOnly && data->scheme_type != URL_SCHEME_WILDCARD) {
2704         if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
2705             /* Remove the dot segments (if any) and reset everything to the new
2706              * correct length.
2707              */
2708             DWORD new_len = remove_dot_segments(uri->canon_uri+uri->path_start, uri->path_len);
2709             uri->canon_len -= uri->path_len-new_len;
2710             uri->path_len = new_len;
2711         }
2712     }
2713
2714     if(!computeOnly)
2715         TRACE("Canonicalized path %s len=%d\n",
2716             debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len),
2717             uri->path_len);
2718
2719     return TRUE;
2720 }
2721
2722 /* Attempts to canonicalize the path for an opaque URI.
2723  *
2724  * For known scheme types:
2725  *  1)  forbidden characters are percent encoded if
2726  *      NO_ENCODE_FORBIDDEN_CHARACTERS isn't set.
2727  *
2728  *  2)  Percent encoded, unreserved characters are decoded
2729  *      to their actual values, for known scheme types.
2730  *
2731  *  3)  '\\' are changed to '/' for known scheme types
2732  *      except for mailto schemes.
2733  */
2734 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2735     const WCHAR *ptr;
2736     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2737
2738     if(!data->path) {
2739         uri->path_start = -1;
2740         uri->path_len = 0;
2741         return TRUE;
2742     }
2743
2744     uri->path_start = uri->canon_len;
2745
2746     /* Windows doesn't allow a "//" to appear after the scheme
2747      * of a URI, if it's an opaque URI.
2748      */
2749     if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') {
2750         /* So it inserts a "/." before the "//" if it exists. */
2751         if(!computeOnly) {
2752             uri->canon_uri[uri->canon_len] = '/';
2753             uri->canon_uri[uri->canon_len+1] = '.';
2754         }
2755
2756         uri->canon_len += 2;
2757     }
2758
2759     for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
2760         if(*ptr == '%' && known_scheme) {
2761             WCHAR val = decode_pct_val(ptr);
2762
2763             if(is_unreserved(val)) {
2764                 if(!computeOnly)
2765                     uri->canon_uri[uri->canon_len] = val;
2766                 ++uri->canon_len;
2767
2768                 ptr += 2;
2769                 continue;
2770             } else {
2771                 if(!computeOnly)
2772                     uri->canon_uri[uri->canon_len] = *ptr;
2773                 ++uri->canon_len;
2774             }
2775         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
2776                   !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
2777             if(!computeOnly)
2778                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2779             uri->canon_len += 3;
2780         } else {
2781             if(!computeOnly)
2782                 uri->canon_uri[uri->canon_len] = *ptr;
2783             ++uri->canon_len;
2784         }
2785     }
2786
2787     uri->path_len = uri->canon_len - uri->path_start;
2788
2789     TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly,
2790         debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len);
2791     return TRUE;
2792 }
2793
2794 /* Determines how the URI represented by the parse_data should be canonicalized.
2795  *
2796  * Essentially, if the parse_data represents an hierarchical URI then it calls
2797  * canonicalize_authority and the canonicalization functions for the path. If the
2798  * URI is opaque it canonicalizes the path of the URI.
2799  */
2800 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2801     if(!data->is_opaque) {
2802         /* "//" is only added for non-wildcard scheme types. */
2803         if(data->scheme_type != URL_SCHEME_WILDCARD) {
2804             if(!computeOnly) {
2805                 INT pos = uri->canon_len;
2806
2807                 uri->canon_uri[pos] = '/';
2808                 uri->canon_uri[pos+1] = '/';
2809            }
2810            uri->canon_len += 2;
2811         }
2812
2813         if(!canonicalize_authority(data, uri, flags, computeOnly))
2814             return FALSE;
2815
2816         /* TODO: Canonicalize the path of the URI. */
2817         if(!canonicalize_path_hierarchical(data, uri, flags, computeOnly))
2818             return FALSE;
2819
2820     } else {
2821         /* Opaque URI's don't have an authority. */
2822         uri->userinfo_start = uri->userinfo_split = -1;
2823         uri->userinfo_len = 0;
2824         uri->host_start = -1;
2825         uri->host_len = 0;
2826         uri->host_type = Uri_HOST_UNKNOWN;
2827         uri->has_port = FALSE;
2828         uri->authority_start = -1;
2829         uri->authority_len = 0;
2830         uri->domain_offset = -1;
2831
2832         if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
2833             return FALSE;
2834     }
2835
2836     if(uri->path_start > -1 && !computeOnly)
2837         /* Finding file extensions happens for both types of URIs. */
2838         uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len);
2839     else
2840         uri->extension_offset = -1;
2841
2842     return TRUE;
2843 }
2844
2845 /* Attempts to canonicalize the query string of the URI.
2846  *
2847  * Things that happen:
2848  *  1)  For known scheme types forbidden characters
2849  *      are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set
2850  *      or NO_ENCODE_FORBIDDEN_CHARACTERS is set.
2851  *
2852  *  2)  For known scheme types, percent encoded, unreserved characters
2853  *      are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set.
2854  */
2855 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2856     const WCHAR *ptr, *end;
2857     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2858
2859     if(!data->query) {
2860         uri->query_start = -1;
2861         uri->query_len = 0;
2862         return TRUE;
2863     }
2864
2865     uri->query_start = uri->canon_len;
2866
2867     end = data->query+data->query_len;
2868     for(ptr = data->query; ptr < end; ++ptr) {
2869         if(*ptr == '%') {
2870             if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2871                 WCHAR val = decode_pct_val(ptr);
2872                 if(is_unreserved(val)) {
2873                     if(!computeOnly)
2874                         uri->canon_uri[uri->canon_len] = val;
2875                     ++uri->canon_len;
2876
2877                     ptr += 2;
2878                     continue;
2879                 }
2880             }
2881         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
2882             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
2883                !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2884                 if(!computeOnly)
2885                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2886                 uri->canon_len += 3;
2887                 continue;
2888             }
2889         }
2890
2891         if(!computeOnly)
2892             uri->canon_uri[uri->canon_len] = *ptr;
2893         ++uri->canon_len;
2894     }
2895
2896     uri->query_len = uri->canon_len - uri->query_start;
2897
2898     if(!computeOnly)
2899         TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags,
2900             computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len),
2901             uri->query_len);
2902     return TRUE;
2903 }
2904
2905 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2906     const WCHAR *ptr, *end;
2907     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2908
2909     if(!data->fragment) {
2910         uri->fragment_start = -1;
2911         uri->fragment_len = 0;
2912         return TRUE;
2913     }
2914
2915     uri->fragment_start = uri->canon_len;
2916
2917     end = data->fragment + data->fragment_len;
2918     for(ptr = data->fragment; ptr < end; ++ptr) {
2919         if(*ptr == '%') {
2920             if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2921                 WCHAR val = decode_pct_val(ptr);
2922                 if(is_unreserved(val)) {
2923                     if(!computeOnly)
2924                         uri->canon_uri[uri->canon_len] = val;
2925                     ++uri->canon_len;
2926
2927                     ptr += 2;
2928                     continue;
2929                 }
2930             }
2931         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
2932             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
2933                !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2934                 if(!computeOnly)
2935                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2936                 uri->canon_len += 3;
2937                 continue;
2938             }
2939         }
2940
2941         if(!computeOnly)
2942             uri->canon_uri[uri->canon_len] = *ptr;
2943         ++uri->canon_len;
2944     }
2945
2946     uri->fragment_len = uri->canon_len - uri->fragment_start;
2947
2948     if(!computeOnly)
2949         TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags,
2950             computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len),
2951             uri->fragment_len);
2952     return TRUE;
2953 }
2954
2955 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */
2956 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2957     uri->scheme_start = -1;
2958     uri->scheme_len = 0;
2959
2960     if(!data->scheme) {
2961         /* The only type of URI that doesn't have to have a scheme is a relative
2962          * URI.
2963          */
2964         if(!data->is_relative) {
2965             FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data,
2966                     uri, flags, debugstr_w(data->uri));
2967             return FALSE;
2968         }
2969     } else {
2970         if(!computeOnly) {
2971             DWORD i;
2972             INT pos = uri->canon_len;
2973
2974             for(i = 0; i < data->scheme_len; ++i) {
2975                 /* Scheme name must be lower case after canonicalization. */
2976                 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]);
2977             }
2978
2979             uri->canon_uri[i + pos] = ':';
2980             uri->scheme_start = pos;
2981
2982             TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags,
2983                     debugstr_wn(uri->canon_uri,  uri->scheme_len), data->scheme_len);
2984         }
2985
2986         /* This happens in both computation modes. */
2987         uri->canon_len += data->scheme_len + 1;
2988         uri->scheme_len = data->scheme_len;
2989     }
2990     return TRUE;
2991 }
2992
2993 /* Compute's what the length of the URI specified by the parse_data will be
2994  * after canonicalization occurs using the specified flags.
2995  *
2996  * This function will return a non-zero value indicating the length of the canonicalized
2997  * URI, or -1 on error.
2998  */
2999 static int compute_canonicalized_length(const parse_data *data, DWORD flags) {
3000     Uri uri;
3001
3002     memset(&uri, 0, sizeof(Uri));
3003
3004     TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags,
3005             debugstr_w(data->uri));
3006
3007     if(!canonicalize_scheme(data, &uri, flags, TRUE)) {
3008         ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags);
3009         return -1;
3010     }
3011
3012     if(!canonicalize_hierpart(data, &uri, flags, TRUE)) {
3013         ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags);
3014         return -1;
3015     }
3016
3017     if(!canonicalize_query(data, &uri, flags, TRUE)) {
3018         ERR("(%p %x): Failed to compute query string length.\n", data, flags);
3019         return -1;
3020     }
3021
3022     if(!canonicalize_fragment(data, &uri, flags, TRUE)) {
3023         ERR("(%p %x): Failed to compute fragment length.\n", data, flags);
3024         return -1;
3025     }
3026
3027     TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len);
3028
3029     return uri.canon_len;
3030 }
3031
3032 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the
3033  * canonicalization succeededs it will store all the canonicalization information
3034  * in the pointer to the Uri.
3035  *
3036  * To canonicalize a URI this function first computes what the length of the URI
3037  * specified by the parse_data will be. Once this is done it will then perfom the actual
3038  * canonicalization of the URI.
3039  */
3040 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
3041     INT len;
3042
3043     uri->canon_uri = NULL;
3044     len = uri->canon_size = uri->canon_len = 0;
3045
3046     TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri));
3047
3048     /* First try to compute the length of the URI. */
3049     len = compute_canonicalized_length(data, flags);
3050     if(len == -1) {
3051         ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags,
3052                 debugstr_w(data->uri));
3053         return E_INVALIDARG;
3054     }
3055
3056     uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR));
3057     if(!uri->canon_uri)
3058         return E_OUTOFMEMORY;
3059
3060     uri->canon_size = len;
3061     if(!canonicalize_scheme(data, uri, flags, FALSE)) {
3062         ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags);
3063         heap_free(uri->canon_uri);
3064         return E_INVALIDARG;
3065     }
3066     uri->scheme_type = data->scheme_type;
3067
3068     if(!canonicalize_hierpart(data, uri, flags, FALSE)) {
3069         ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags);
3070         heap_free(uri->canon_uri);
3071         return E_INVALIDARG;
3072     }
3073
3074     if(!canonicalize_query(data, uri, flags, FALSE)) {
3075         ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n",
3076             data, uri, flags);
3077         return E_INVALIDARG;
3078     }
3079
3080     if(!canonicalize_fragment(data, uri, flags, FALSE)) {
3081         ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n",
3082             data, uri, flags);
3083         return E_INVALIDARG;
3084     }
3085
3086     /* There's a possibility we didn't use all the space we allocated
3087      * earlier.
3088      */
3089     if(uri->canon_len < uri->canon_size) {
3090         /* This happens if the URI is hierarchical and dot
3091          * segments were removed from it's path.
3092          */
3093         WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR));
3094         if(!tmp)
3095             return E_OUTOFMEMORY;
3096
3097         uri->canon_uri = tmp;
3098         uri->canon_size = uri->canon_len;
3099     }
3100
3101     uri->canon_uri[uri->canon_len] = '\0';
3102     TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));
3103
3104     return S_OK;
3105 }
3106
3107 #define URI(x)         ((IUri*)  &(x)->lpIUriVtbl)
3108 #define URIBUILDER(x)  ((IUriBuilder*)  &(x)->lpIUriBuilderVtbl)
3109
3110 #define URI_THIS(iface) DEFINE_THIS(Uri, IUri, iface)
3111
3112 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv)
3113 {
3114     Uri *This = URI_THIS(iface);
3115
3116     if(IsEqualGUID(&IID_IUnknown, riid)) {
3117         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
3118         *ppv = URI(This);
3119     }else if(IsEqualGUID(&IID_IUri, riid)) {
3120         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
3121         *ppv = URI(This);
3122     }else if(IsEqualGUID(&IID_IUriObj, riid)) {
3123         TRACE("(%p)->(IID_IUriObj %p)\n", This, ppv);
3124         *ppv = This;
3125         return S_OK;
3126     }else {
3127         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
3128         *ppv = NULL;
3129         return E_NOINTERFACE;
3130     }
3131
3132     IUnknown_AddRef((IUnknown*)*ppv);
3133     return S_OK;
3134 }
3135
3136 static ULONG WINAPI Uri_AddRef(IUri *iface)
3137 {
3138     Uri *This = URI_THIS(iface);
3139     LONG ref = InterlockedIncrement(&This->ref);
3140
3141     TRACE("(%p) ref=%d\n", This, ref);
3142
3143     return ref;
3144 }
3145
3146 static ULONG WINAPI Uri_Release(IUri *iface)
3147 {
3148     Uri *This = URI_THIS(iface);
3149     LONG ref = InterlockedDecrement(&This->ref);
3150
3151     TRACE("(%p) ref=%d\n", This, ref);
3152
3153     if(!ref) {
3154         SysFreeString(This->raw_uri);
3155         heap_free(This->canon_uri);
3156         heap_free(This);
3157     }
3158
3159     return ref;
3160 }
3161
3162 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags)
3163 {
3164     Uri *This = URI_THIS(iface);
3165     HRESULT hres;
3166     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3167
3168     if(!pbstrProperty)
3169         return E_POINTER;
3170
3171     if(uriProp > Uri_PROPERTY_STRING_LAST) {
3172         /* Windows allocates an empty BSTR for invalid Uri_PROPERTY's. */
3173         *pbstrProperty = SysAllocStringLen(NULL, 0);
3174         if(!(*pbstrProperty))
3175             return E_OUTOFMEMORY;
3176
3177         /* It only returns S_FALSE for the ZONE property... */
3178         if(uriProp == Uri_PROPERTY_ZONE)
3179             return S_FALSE;
3180         else
3181             return S_OK;
3182     }
3183
3184     /* Don't have support for flags yet. */
3185     if(dwFlags) {
3186         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3187         return E_NOTIMPL;
3188     }
3189
3190     switch(uriProp) {
3191     case Uri_PROPERTY_ABSOLUTE_URI:
3192         *pbstrProperty = SysAllocString(This->canon_uri);
3193
3194         if(!(*pbstrProperty))
3195             hres = E_OUTOFMEMORY;
3196         else
3197             hres = S_OK;
3198
3199         break;
3200     case Uri_PROPERTY_AUTHORITY:
3201         if(This->authority_start > -1) {
3202             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len);
3203             hres = S_OK;
3204         } else {
3205             *pbstrProperty = SysAllocStringLen(NULL, 0);
3206             hres = S_FALSE;
3207         }
3208
3209         if(!(*pbstrProperty))
3210             hres = E_OUTOFMEMORY;
3211
3212         break;
3213     case Uri_PROPERTY_DISPLAY_URI:
3214         /* The Display URI contains everything except for the userinfo for known
3215          * scheme types.
3216          */
3217         if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) {
3218             *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len);
3219
3220             if(*pbstrProperty) {
3221                 /* Copy everything before the userinfo over. */
3222                 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
3223                 /* Copy everything after the userinfo over. */
3224                 memcpy(*pbstrProperty+This->userinfo_start,
3225                    This->canon_uri+This->userinfo_start+This->userinfo_len+1,
3226                    (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR));
3227             }
3228         } else
3229             *pbstrProperty = SysAllocString(This->canon_uri);
3230
3231         if(!(*pbstrProperty))
3232             hres = E_OUTOFMEMORY;
3233         else
3234             hres = S_OK;
3235
3236         break;
3237     case Uri_PROPERTY_DOMAIN:
3238         if(This->domain_offset > -1) {
3239             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset,
3240                                                This->host_len-This->domain_offset);
3241             hres = S_OK;
3242         } else {
3243             *pbstrProperty = SysAllocStringLen(NULL, 0);
3244             hres = S_FALSE;
3245         }
3246
3247         if(!(*pbstrProperty))
3248             hres = E_OUTOFMEMORY;
3249
3250         break;
3251     case Uri_PROPERTY_EXTENSION:
3252         if(This->extension_offset > -1) {
3253             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset,
3254                                                This->path_len-This->extension_offset);
3255             hres = S_OK;
3256         } else {
3257             *pbstrProperty = SysAllocStringLen(NULL, 0);
3258             hres = S_FALSE;
3259         }
3260
3261         if(!(*pbstrProperty))
3262             hres = E_OUTOFMEMORY;
3263
3264         break;
3265     case Uri_PROPERTY_FRAGMENT:
3266         if(This->fragment_start > -1) {
3267             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len);
3268             hres = S_OK;
3269         } else {
3270             *pbstrProperty = SysAllocStringLen(NULL, 0);
3271             hres = S_FALSE;
3272         }
3273
3274         if(!(*pbstrProperty))
3275             hres = E_OUTOFMEMORY;
3276
3277         break;
3278     case Uri_PROPERTY_HOST:
3279         if(This->host_start > -1) {
3280             /* The '[' and ']' aren't included for IPv6 addresses. */
3281             if(This->host_type == Uri_HOST_IPV6)
3282                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2);
3283             else
3284                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len);
3285
3286             hres = S_OK;
3287         } else {
3288             *pbstrProperty = SysAllocStringLen(NULL, 0);
3289             hres = S_FALSE;
3290         }
3291
3292         if(!(*pbstrProperty))
3293             hres = E_OUTOFMEMORY;
3294
3295         break;
3296     case Uri_PROPERTY_PASSWORD:
3297         if(This->userinfo_split > -1) {
3298             *pbstrProperty = SysAllocStringLen(
3299                 This->canon_uri+This->userinfo_start+This->userinfo_split+1,
3300                 This->userinfo_len-This->userinfo_split-1);
3301             hres = S_OK;
3302         } else {
3303             *pbstrProperty = SysAllocStringLen(NULL, 0);
3304             hres = S_FALSE;
3305         }
3306
3307         if(!(*pbstrProperty))
3308             return E_OUTOFMEMORY;
3309
3310         break;
3311     case Uri_PROPERTY_PATH:
3312         if(This->path_start > -1) {
3313             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len);
3314             hres = S_OK;
3315         } else {
3316             *pbstrProperty = SysAllocStringLen(NULL, 0);
3317             hres = S_FALSE;
3318         }
3319
3320         if(!(*pbstrProperty))
3321             hres = E_OUTOFMEMORY;
3322
3323         break;
3324     case Uri_PROPERTY_PATH_AND_QUERY:
3325         if(This->path_start > -1) {
3326             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len);
3327             hres = S_OK;
3328         } else if(This->query_start > -1) {
3329             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
3330             hres = S_OK;
3331         } else {
3332             *pbstrProperty = SysAllocStringLen(NULL, 0);
3333             hres = S_FALSE;
3334         }
3335
3336         if(!(*pbstrProperty))
3337             hres = E_OUTOFMEMORY;
3338
3339         break;
3340     case Uri_PROPERTY_QUERY:
3341         if(This->query_start > -1) {
3342             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
3343             hres = S_OK;
3344         } else {
3345             *pbstrProperty = SysAllocStringLen(NULL, 0);
3346             hres = S_FALSE;
3347         }
3348
3349         if(!(*pbstrProperty))
3350             hres = E_OUTOFMEMORY;
3351
3352         break;
3353     case Uri_PROPERTY_RAW_URI:
3354         *pbstrProperty = SysAllocString(This->raw_uri);
3355         if(!(*pbstrProperty))
3356             hres = E_OUTOFMEMORY;
3357         else
3358             hres = S_OK;
3359         break;
3360     case Uri_PROPERTY_SCHEME_NAME:
3361         if(This->scheme_start > -1) {
3362             *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len);
3363             hres = S_OK;
3364         } else {
3365             *pbstrProperty = SysAllocStringLen(NULL, 0);
3366             hres = S_FALSE;
3367         }
3368
3369         if(!(*pbstrProperty))
3370             hres = E_OUTOFMEMORY;
3371
3372         break;
3373     case Uri_PROPERTY_USER_INFO:
3374         if(This->userinfo_start > -1) {
3375             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len);
3376             hres = S_OK;
3377         } else {
3378             *pbstrProperty = SysAllocStringLen(NULL, 0);
3379             hres = S_FALSE;
3380         }
3381
3382         if(!(*pbstrProperty))
3383             hres = E_OUTOFMEMORY;
3384
3385         break;
3386     case Uri_PROPERTY_USER_NAME:
3387         if(This->userinfo_start > -1) {
3388             /* If userinfo_split is set, that means a password exists
3389              * so the username is only from userinfo_start to userinfo_split.
3390              */
3391             if(This->userinfo_split > -1) {
3392                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split);
3393                 hres = S_OK;
3394             } else {
3395                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len);
3396                 hres = S_OK;
3397             }
3398         } else {
3399             *pbstrProperty = SysAllocStringLen(NULL, 0);
3400             hres = S_FALSE;
3401         }
3402
3403         if(!(*pbstrProperty))
3404             return E_OUTOFMEMORY;
3405
3406         break;
3407     default:
3408         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3409         hres = E_NOTIMPL;
3410     }
3411
3412     return hres;
3413 }
3414
3415 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
3416 {
3417     Uri *This = URI_THIS(iface);
3418     HRESULT hres;
3419     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3420
3421     if(!pcchProperty)
3422         return E_INVALIDARG;
3423
3424     /* Can only return a length for a property if it's a string. */
3425     if(uriProp > Uri_PROPERTY_STRING_LAST)
3426         return E_INVALIDARG;
3427
3428     /* Don't have support for flags yet. */
3429     if(dwFlags) {
3430         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3431         return E_NOTIMPL;
3432     }
3433
3434     switch(uriProp) {
3435     case Uri_PROPERTY_ABSOLUTE_URI:
3436         *pcchProperty = This->canon_len;
3437         hres = S_OK;
3438         break;
3439     case Uri_PROPERTY_AUTHORITY:
3440         *pcchProperty = This->authority_len;
3441         hres = (This->authority_start > -1) ? S_OK : S_FALSE;
3442         break;
3443     case Uri_PROPERTY_DISPLAY_URI:
3444         if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1)
3445             *pcchProperty = This->canon_len-This->userinfo_len-1;
3446         else
3447             *pcchProperty = This->canon_len;
3448
3449         hres = S_OK;
3450         break;
3451     case Uri_PROPERTY_DOMAIN:
3452         if(This->domain_offset > -1)
3453             *pcchProperty = This->host_len - This->domain_offset;
3454         else
3455             *pcchProperty = 0;
3456
3457         hres = (This->domain_offset > -1) ? S_OK : S_FALSE;
3458         break;
3459     case Uri_PROPERTY_EXTENSION:
3460         if(This->extension_offset > -1) {
3461             *pcchProperty = This->path_len - This->extension_offset;
3462             hres = S_OK;
3463         } else {
3464             *pcchProperty = 0;
3465             hres = S_FALSE;
3466         }
3467
3468         break;
3469     case Uri_PROPERTY_FRAGMENT:
3470         *pcchProperty = This->fragment_len;
3471         hres = (This->fragment_start > -1) ? S_OK : S_FALSE;
3472         break;
3473     case Uri_PROPERTY_HOST:
3474         *pcchProperty = This->host_len;
3475
3476         /* '[' and ']' aren't included in the length. */
3477         if(This->host_type == Uri_HOST_IPV6)
3478             *pcchProperty -= 2;
3479
3480         hres = (This->host_start > -1) ? S_OK : S_FALSE;
3481         break;
3482     case Uri_PROPERTY_PASSWORD:
3483         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0;
3484         hres = (This->userinfo_split > -1) ? S_OK : S_FALSE;
3485         break;
3486     case Uri_PROPERTY_PATH:
3487         *pcchProperty = This->path_len;
3488         hres = (This->path_start > -1) ? S_OK : S_FALSE;
3489         break;
3490     case Uri_PROPERTY_PATH_AND_QUERY:
3491         *pcchProperty = This->path_len+This->query_len;
3492         hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE;
3493         break;
3494     case Uri_PROPERTY_QUERY:
3495         *pcchProperty = This->query_len;
3496         hres = (This->query_start > -1) ? S_OK : S_FALSE;
3497         break;
3498     case Uri_PROPERTY_RAW_URI:
3499         *pcchProperty = SysStringLen(This->raw_uri);
3500         hres = S_OK;
3501         break;
3502     case Uri_PROPERTY_SCHEME_NAME:
3503         *pcchProperty = This->scheme_len;
3504         hres = (This->scheme_start > -1) ? S_OK : S_FALSE;
3505         break;
3506     case Uri_PROPERTY_USER_INFO:
3507         *pcchProperty = This->userinfo_len;
3508         hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
3509         break;
3510     case Uri_PROPERTY_USER_NAME:
3511         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len;
3512         hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
3513         break;
3514     default:
3515         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3516         hres = E_NOTIMPL;
3517     }
3518
3519     return hres;
3520 }
3521
3522 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
3523 {
3524     Uri *This = URI_THIS(iface);
3525     HRESULT hres;
3526
3527     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3528
3529     if(!pcchProperty)
3530         return E_INVALIDARG;
3531
3532     /* Microsoft's implementation for the ZONE property of a URI seems to be lacking...
3533      * From what I can tell, instead of checking which URLZONE the URI belongs to it
3534      * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone
3535      * function.
3536      */
3537     if(uriProp == Uri_PROPERTY_ZONE) {
3538         *pcchProperty = URLZONE_INVALID;
3539         return E_NOTIMPL;
3540     }
3541
3542     if(uriProp < Uri_PROPERTY_DWORD_START) {
3543         *pcchProperty = 0;
3544         return E_INVALIDARG;
3545     }
3546
3547     switch(uriProp) {
3548     case Uri_PROPERTY_HOST_TYPE:
3549         *pcchProperty = This->host_type;
3550         hres = S_OK;
3551         break;
3552     case Uri_PROPERTY_PORT:
3553         if(!This->has_port) {
3554             *pcchProperty = 0;
3555             hres = S_FALSE;
3556         } else {
3557             *pcchProperty = This->port;
3558             hres = S_OK;
3559         }
3560
3561         break;
3562     case Uri_PROPERTY_SCHEME:
3563         *pcchProperty = This->scheme_type;
3564         hres = S_OK;
3565         break;
3566     default:
3567         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
3568         hres = E_NOTIMPL;
3569     }
3570
3571     return hres;
3572 }
3573
3574 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty)
3575 {
3576     Uri *This = URI_THIS(iface);
3577     TRACE("(%p)->(%d %p)\n", This, uriProp, pfHasProperty);
3578
3579     if(!pfHasProperty)
3580         return E_INVALIDARG;
3581
3582     switch(uriProp) {
3583     case Uri_PROPERTY_ABSOLUTE_URI:
3584         *pfHasProperty = TRUE;
3585         break;
3586     case Uri_PROPERTY_AUTHORITY:
3587         *pfHasProperty = This->authority_start > -1;
3588         break;
3589     case Uri_PROPERTY_DISPLAY_URI:
3590         *pfHasProperty = TRUE;
3591         break;
3592     case Uri_PROPERTY_DOMAIN:
3593         *pfHasProperty = This->domain_offset > -1;
3594         break;
3595     case Uri_PROPERTY_EXTENSION:
3596         *pfHasProperty = This->extension_offset > -1;
3597         break;
3598     case Uri_PROPERTY_FRAGMENT:
3599         *pfHasProperty = This->fragment_start > -1;
3600         break;
3601     case Uri_PROPERTY_HOST:
3602         *pfHasProperty = This->host_start > -1;
3603         break;
3604     case Uri_PROPERTY_PASSWORD:
3605         *pfHasProperty = This->userinfo_split > -1;
3606         break;
3607     case Uri_PROPERTY_PATH:
3608         *pfHasProperty = This->path_start > -1;
3609         break;
3610     case Uri_PROPERTY_PATH_AND_QUERY:
3611         *pfHasProperty = (This->path_start > -1 || This->query_start > -1);
3612         break;
3613     case Uri_PROPERTY_QUERY:
3614         *pfHasProperty = This->query_start > -1;
3615         break;
3616     case Uri_PROPERTY_RAW_URI:
3617         *pfHasProperty = TRUE;
3618         break;
3619     case Uri_PROPERTY_SCHEME_NAME:
3620         *pfHasProperty = This->scheme_start > -1;
3621         break;
3622     case Uri_PROPERTY_USER_INFO:
3623     case Uri_PROPERTY_USER_NAME:
3624         *pfHasProperty = This->userinfo_start > -1;
3625         break;
3626     case Uri_PROPERTY_HOST_TYPE:
3627         *pfHasProperty = TRUE;
3628         break;
3629     case Uri_PROPERTY_PORT:
3630         *pfHasProperty = This->has_port;
3631         break;
3632     case Uri_PROPERTY_SCHEME:
3633         *pfHasProperty = TRUE;
3634         break;
3635     case Uri_PROPERTY_ZONE:
3636         *pfHasProperty = FALSE;
3637         break;
3638     default:
3639         FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty);
3640         return E_NOTIMPL;
3641     }
3642
3643     return S_OK;
3644 }
3645
3646 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri)
3647 {
3648     TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri);
3649     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0);
3650 }
3651
3652 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority)
3653 {
3654     TRACE("(%p)->(%p)\n", iface, pstrAuthority);
3655     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0);
3656 }
3657
3658 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri)
3659 {
3660     TRACE("(%p)->(%p)\n", iface, pstrDisplayUri);
3661     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0);
3662 }
3663
3664 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain)
3665 {
3666     TRACE("(%p)->(%p)\n", iface, pstrDomain);
3667     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0);
3668 }
3669
3670 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension)
3671 {
3672     TRACE("(%p)->(%p)\n", iface, pstrExtension);
3673     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0);
3674 }
3675
3676 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment)
3677 {
3678     TRACE("(%p)->(%p)\n", iface, pstrFragment);
3679     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0);
3680 }
3681
3682 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost)
3683 {
3684     TRACE("(%p)->(%p)\n", iface, pstrHost);
3685     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0);
3686 }
3687
3688 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword)
3689 {
3690     TRACE("(%p)->(%p)\n", iface, pstrPassword);
3691     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0);
3692 }
3693
3694 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath)
3695 {
3696     TRACE("(%p)->(%p)\n", iface, pstrPath);
3697     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0);
3698 }
3699
3700 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery)
3701 {
3702     TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery);
3703     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0);
3704 }
3705
3706 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery)
3707 {
3708     TRACE("(%p)->(%p)\n", iface, pstrQuery);
3709     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0);
3710 }
3711
3712 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri)
3713 {
3714     Uri *This = URI_THIS(iface);
3715     TRACE("(%p)->(%p)\n", This, pstrRawUri);
3716
3717     /* Just forward the call to GetPropertyBSTR. */
3718     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0);
3719 }
3720
3721 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName)
3722 {
3723     Uri *This = URI_THIS(iface);
3724     TRACE("(%p)->(%p)\n", This, pstrSchemeName);
3725     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0);
3726 }
3727
3728 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo)
3729 {
3730     TRACE("(%p)->(%p)\n", iface, pstrUserInfo);
3731     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0);
3732 }
3733
3734 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName)
3735 {
3736     TRACE("(%p)->(%p)\n", iface, pstrUserName);
3737     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0);
3738 }
3739
3740 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType)
3741 {
3742     TRACE("(%p)->(%p)\n", iface, pdwHostType);
3743     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0);
3744 }
3745
3746 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort)
3747 {
3748     TRACE("(%p)->(%p)\n", iface, pdwPort);
3749     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0);
3750 }
3751
3752 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme)
3753 {
3754     Uri *This = URI_THIS(iface);
3755     TRACE("(%p)->(%p)\n", This, pdwScheme);
3756     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0);
3757 }
3758
3759 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone)
3760 {
3761     TRACE("(%p)->(%p)\n", iface, pdwZone);
3762     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0);
3763 }
3764
3765 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties)
3766 {
3767     Uri *This = URI_THIS(iface);
3768     TRACE("(%p)->(%p)\n", This, pdwProperties);
3769
3770     if(!pdwProperties)
3771         return E_INVALIDARG;
3772
3773     /* All URIs have these. */
3774     *pdwProperties = Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|
3775                      Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE;
3776
3777     if(This->scheme_start > -1)
3778         *pdwProperties |= Uri_HAS_SCHEME_NAME;
3779
3780     if(This->authority_start > -1) {
3781         *pdwProperties |= Uri_HAS_AUTHORITY;
3782         if(This->userinfo_start > -1)
3783             *pdwProperties |= Uri_HAS_USER_INFO|Uri_HAS_USER_NAME;
3784         if(This->userinfo_split > -1)
3785             *pdwProperties |= Uri_HAS_PASSWORD;
3786         if(This->host_start > -1)
3787             *pdwProperties |= Uri_HAS_HOST;
3788         if(This->domain_offset > -1)
3789             *pdwProperties |= Uri_HAS_DOMAIN;
3790         if(This->has_port)
3791             *pdwProperties |= Uri_HAS_PORT;
3792     }
3793
3794     if(This->path_start > -1)
3795         *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY;
3796     if(This->query_start > -1)
3797         *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY;
3798
3799     if(This->extension_offset > -1)
3800         *pdwProperties |= Uri_HAS_EXTENSION;
3801
3802     if(This->fragment_start > -1)
3803         *pdwProperties |= Uri_HAS_FRAGMENT;
3804
3805     return S_OK;
3806 }
3807
3808 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual)
3809 {
3810     Uri *This = URI_THIS(iface);
3811     Uri *other;
3812
3813     TRACE("(%p)->(%p %p)\n", This, pUri, pfEqual);
3814
3815     if(!pfEqual)
3816         return E_POINTER;
3817
3818     if(!pUri) {
3819         *pfEqual = FALSE;
3820
3821         /* For some reason Windows returns S_OK here... */
3822         return S_OK;
3823     }
3824
3825     /* Try to convert it to a Uri (allows for a more simple comparison). */
3826     if((other = get_uri_obj(pUri)))
3827         *pfEqual = are_equal_simple(This, other);
3828     else {
3829         /* Do it the hard way. */
3830         FIXME("(%p)->(%p %p) No support for unknown IUri's yet.\n", iface, pUri, pfEqual);
3831         return E_NOTIMPL;
3832     }
3833
3834     return S_OK;
3835 }
3836
3837 #undef URI_THIS
3838
3839 static const IUriVtbl UriVtbl = {
3840     Uri_QueryInterface,
3841     Uri_AddRef,
3842     Uri_Release,
3843     Uri_GetPropertyBSTR,
3844     Uri_GetPropertyLength,
3845     Uri_GetPropertyDWORD,
3846     Uri_HasProperty,
3847     Uri_GetAbsoluteUri,
3848     Uri_GetAuthority,
3849     Uri_GetDisplayUri,
3850     Uri_GetDomain,
3851     Uri_GetExtension,
3852     Uri_GetFragment,
3853     Uri_GetHost,
3854     Uri_GetPassword,
3855     Uri_GetPath,
3856     Uri_GetPathAndQuery,
3857     Uri_GetQuery,
3858     Uri_GetRawUri,
3859     Uri_GetSchemeName,
3860     Uri_GetUserInfo,
3861     Uri_GetUserName,
3862     Uri_GetHostType,
3863     Uri_GetPort,
3864     Uri_GetScheme,
3865     Uri_GetZone,
3866     Uri_GetProperties,
3867     Uri_IsEqual
3868 };
3869
3870 /***********************************************************************
3871  *           CreateUri (urlmon.@)
3872  *
3873  * Creates a new IUri object using the URI represented by pwzURI. This function
3874  * parses and validates the components of pwzURI and then canonicalizes the
3875  * parsed components.
3876  *
3877  * PARAMS
3878  *  pwzURI      [I] The URI to parse, validate, and canonicalize.
3879  *  dwFlags     [I] Flags which can affect how the parsing/canonicalization is performed.
3880  *  dwReserved  [I] Reserved (not used).
3881  *  ppURI       [O] The resulting IUri after parsing/canonicalization occurs.
3882  *
3883  * RETURNS
3884  *  Success: Returns S_OK. ppURI contains the pointer to the newly allocated IUri.
3885  *  Failure: E_INVALIDARG if there's invalid flag combinations in dwFlags, or an
3886  *           invalid parameters, or pwzURI doesn't represnt a valid URI.
3887  *           E_OUTOFMEMORY if any memory allocation fails.
3888  *
3889  * NOTES
3890  *  Default flags:
3891  *      Uri_CREATE_CANONICALIZE, Uri_CREATE_DECODE_EXTRA_INFO, Uri_CREATE_CRACK_UNKNOWN_SCHEMES,
3892  *      Uri_CREATE_PRE_PROCESS_HTML_URI, Uri_CREATE_NO_IE_SETTINGS.
3893  */
3894 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI)
3895 {
3896     const DWORD supported_flags = Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME|
3897         Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME|Uri_CREATE_NO_CANONICALIZE|Uri_CREATE_CANONICALIZE|
3898         Uri_CREATE_DECODE_EXTRA_INFO|Uri_CREATE_NO_DECODE_EXTRA_INFO|Uri_CREATE_CRACK_UNKNOWN_SCHEMES|
3899         Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES|Uri_CREATE_PRE_PROCESS_HTML_URI|Uri_CREATE_NO_PRE_PROCESS_HTML_URI|
3900         Uri_CREATE_NO_IE_SETTINGS|Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS;
3901     Uri *ret;
3902     HRESULT hr;
3903     parse_data data;
3904
3905     TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI);
3906
3907     if(!ppURI)
3908         return E_INVALIDARG;
3909
3910     if(!pwzURI) {
3911         *ppURI = NULL;
3912         return E_INVALIDARG;
3913     }
3914
3915     /* Check for invalid flags. */
3916     if((dwFlags & Uri_CREATE_DECODE_EXTRA_INFO && dwFlags & Uri_CREATE_NO_DECODE_EXTRA_INFO) ||
3917        (dwFlags & Uri_CREATE_CANONICALIZE && dwFlags & Uri_CREATE_NO_CANONICALIZE) ||
3918        (dwFlags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && dwFlags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) ||
3919        (dwFlags & Uri_CREATE_PRE_PROCESS_HTML_URI && dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) ||
3920        (dwFlags & Uri_CREATE_IE_SETTINGS && dwFlags & Uri_CREATE_NO_IE_SETTINGS)) {
3921         *ppURI = NULL;
3922         return E_INVALIDARG;
3923     }
3924
3925     /* Currently unsupported. */
3926     if(dwFlags & ~supported_flags)
3927         FIXME("Ignoring unsupported flags %x\n", dwFlags & ~supported_flags);
3928
3929     ret = heap_alloc(sizeof(Uri));
3930     if(!ret)
3931         return E_OUTOFMEMORY;
3932
3933     ret->lpIUriVtbl = &UriVtbl;
3934     ret->ref = 1;
3935
3936     /* Pre process the URI, unless told otherwise. */
3937     if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI))
3938         ret->raw_uri = pre_process_uri(pwzURI);
3939     else
3940         ret->raw_uri = SysAllocString(pwzURI);
3941
3942     if(!ret->raw_uri) {
3943         heap_free(ret);
3944         return E_OUTOFMEMORY;
3945     }
3946
3947     memset(&data, 0, sizeof(parse_data));
3948     data.uri = ret->raw_uri;
3949
3950     /* Validate and parse the URI into it's components. */
3951     if(!parse_uri(&data, dwFlags)) {
3952         /* Encountered an unsupported or invalid URI */
3953         SysFreeString(ret->raw_uri);
3954         heap_free(ret);
3955         *ppURI = NULL;
3956         return E_INVALIDARG;
3957     }
3958
3959     /* Canonicalize the URI. */
3960     hr = canonicalize_uri(&data, ret, dwFlags);
3961     if(FAILED(hr)) {
3962         SysFreeString(ret->raw_uri);
3963         heap_free(ret);
3964         *ppURI = NULL;
3965         return hr;
3966     }
3967
3968     *ppURI = URI(ret);
3969     return S_OK;
3970 }
3971
3972 /***********************************************************************
3973  *           CreateUriWithFragment (urlmon.@)
3974  *
3975  * Creates a new IUri object. This is almost the same as CreateUri, expect that
3976  * it allows you to explicitly specify a fragment (pwzFragment) for pwzURI.
3977  *
3978  * PARAMS
3979  *  pwzURI      [I] The URI to parse and perform canonicalization on.
3980  *  pwzFragment [I] The explict fragment string which should be added to pwzURI.
3981  *  dwFlags     [I] The flags which will be passed to CreateUri.
3982  *  dwReserved  [I] Reserved (not used).
3983  *  ppURI       [O] The resulting IUri after parsing/canonicalization.
3984  *
3985  * RETURNS
3986  *  Success: S_OK. ppURI contains the pointer to the newly allocated IUri.
3987  *  Failure: E_INVALIDARG if pwzURI already contains a fragment and pwzFragment
3988  *           isn't NULL. Will also return E_INVALIDARG for the same reasons as
3989  *           CreateUri will. E_OUTOFMEMORY if any allocations fail.
3990  */
3991 HRESULT WINAPI CreateUriWithFragment(LPCWSTR pwzURI, LPCWSTR pwzFragment, DWORD dwFlags,
3992                                      DWORD_PTR dwReserved, IUri **ppURI)
3993 {
3994     HRESULT hres;
3995     TRACE("(%s %s %x %x %p)\n", debugstr_w(pwzURI), debugstr_w(pwzFragment), dwFlags, (DWORD)dwReserved, ppURI);
3996
3997     if(!ppURI)
3998         return E_INVALIDARG;
3999
4000     if(!pwzURI) {
4001         *ppURI = NULL;
4002         return E_INVALIDARG;
4003     }
4004
4005     /* Check if a fragment should be appended to the URI string. */
4006     if(pwzFragment) {
4007         WCHAR *uriW;
4008         DWORD uri_len, frag_len;
4009         BOOL add_pound;
4010
4011         /* Check if the original URI already has a fragment component. */
4012         if(StrChrW(pwzURI, '#')) {
4013             *ppURI = NULL;
4014             return E_INVALIDARG;
4015         }
4016
4017         uri_len = lstrlenW(pwzURI);
4018         frag_len = lstrlenW(pwzFragment);
4019
4020         /* If the fragment doesn't start with a '#', one will be added. */
4021         add_pound = *pwzFragment != '#';
4022
4023         if(add_pound)
4024             uriW = heap_alloc((uri_len+frag_len+2)*sizeof(WCHAR));
4025         else
4026             uriW = heap_alloc((uri_len+frag_len+1)*sizeof(WCHAR));
4027
4028         if(!uriW)
4029             return E_OUTOFMEMORY;
4030
4031         memcpy(uriW, pwzURI, uri_len*sizeof(WCHAR));
4032         if(add_pound)
4033             uriW[uri_len++] = '#';
4034         memcpy(uriW+uri_len, pwzFragment, (frag_len+1)*sizeof(WCHAR));
4035
4036         hres = CreateUri(uriW, dwFlags, 0, ppURI);
4037
4038         heap_free(uriW);
4039     } else
4040         /* A fragment string wasn't specified, so just forward the call. */
4041         hres = CreateUri(pwzURI, dwFlags, 0, ppURI);
4042
4043     return hres;
4044 }
4045
4046 #define URIBUILDER_THIS(iface) DEFINE_THIS(UriBuilder, IUriBuilder, iface)
4047
4048 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv)
4049 {
4050     UriBuilder *This = URIBUILDER_THIS(iface);
4051
4052     if(IsEqualGUID(&IID_IUnknown, riid)) {
4053         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
4054         *ppv = URIBUILDER(This);
4055     }else if(IsEqualGUID(&IID_IUriBuilder, riid)) {
4056         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
4057         *ppv = URIBUILDER(This);
4058     }else {
4059         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
4060         *ppv = NULL;
4061         return E_NOINTERFACE;
4062     }
4063
4064     IUnknown_AddRef((IUnknown*)*ppv);
4065     return S_OK;
4066 }
4067
4068 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface)
4069 {
4070     UriBuilder *This = URIBUILDER_THIS(iface);
4071     LONG ref = InterlockedIncrement(&This->ref);
4072
4073     TRACE("(%p) ref=%d\n", This, ref);
4074
4075     return ref;
4076 }
4077
4078 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface)
4079 {
4080     UriBuilder *This = URIBUILDER_THIS(iface);
4081     LONG ref = InterlockedDecrement(&This->ref);
4082
4083     TRACE("(%p) ref=%d\n", This, ref);
4084
4085     if(!ref) {
4086         if(This->uri) IUri_Release(This->uri);
4087         heap_free(This);
4088     }
4089
4090     return ref;
4091 }
4092
4093 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface,
4094                                                  DWORD        dwAllowEncodingPropertyMask,
4095                                                  DWORD_PTR    dwReserved,
4096                                                  IUri       **ppIUri)
4097 {
4098     UriBuilder *This = URIBUILDER_THIS(iface);
4099     TRACE("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4100
4101     if(!ppIUri)
4102         return E_POINTER;
4103
4104     /* Acts the same way as CreateUri. */
4105     if(dwAllowEncodingPropertyMask && !This->uri) {
4106         *ppIUri = NULL;
4107         return E_NOTIMPL;
4108     }
4109
4110     if(!This->uri) {
4111         *ppIUri = NULL;
4112         return INET_E_INVALID_URL;
4113     }
4114
4115     FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4116     return E_NOTIMPL;
4117 }
4118
4119 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface,
4120                                            DWORD        dwCreateFlags,
4121                                            DWORD        dwAllowEncodingPropertyMask,
4122                                            DWORD_PTR    dwReserved,
4123                                            IUri       **ppIUri)
4124 {
4125     UriBuilder *This = URIBUILDER_THIS(iface);
4126     TRACE("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4127
4128     if(!ppIUri)
4129         return E_POINTER;
4130
4131     /* The only time it doesn't return E_NOTIMPL when the dwAllow parameter
4132      * has flags set, is when the IUriBuilder has a IUri set and it hasn't
4133      * been modified (a call to a "Set*" hasn't been performed).
4134      *
4135      * TODO: Check if the IUriBuilder's properties have been modified.
4136      */
4137     if(dwAllowEncodingPropertyMask && !This->uri) {
4138         *ppIUri = NULL;
4139         return E_NOTIMPL;
4140     }
4141
4142     if(!This->uri) {
4143         *ppIUri = NULL;
4144         return INET_E_INVALID_URL;
4145     }
4146
4147     FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4148     return E_NOTIMPL;
4149 }
4150
4151 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface,
4152                                          DWORD        dwCreateFlags,
4153                                          DWORD        dwUriBuilderFlags,
4154                                          DWORD        dwAllowEncodingPropertyMask,
4155                                          DWORD_PTR    dwReserved,
4156                                          IUri       **ppIUri)
4157 {
4158     UriBuilder *This = URIBUILDER_THIS(iface);
4159     TRACE("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
4160         dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4161
4162     if(!ppIUri)
4163         return E_POINTER;
4164
4165     /* Same as CreateUri. */
4166     if(dwAllowEncodingPropertyMask && !This->uri) {
4167         *ppIUri = NULL;
4168         return E_NOTIMPL;
4169     }
4170
4171     if(!This->uri) {
4172         *ppIUri = NULL;
4173         return INET_E_INVALID_URL;
4174     }
4175
4176     FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
4177         dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4178     return E_NOTIMPL;
4179 }
4180
4181 static HRESULT WINAPI  UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri)
4182 {
4183     UriBuilder *This = URIBUILDER_THIS(iface);
4184     FIXME("(%p)->(%p)\n", This, ppIUri);
4185     return E_NOTIMPL;
4186 }
4187
4188 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri)
4189 {
4190     UriBuilder *This = URIBUILDER_THIS(iface);
4191     FIXME("(%p)->(%p)\n", This, pIUri);
4192     return E_NOTIMPL;
4193 }
4194
4195 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment)
4196 {
4197     UriBuilder *This = URIBUILDER_THIS(iface);
4198     TRACE("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment);
4199
4200     if(!pcchFragment) {
4201         if(ppwzFragment)
4202             *ppwzFragment = NULL;
4203         return E_POINTER;
4204     }
4205
4206     if(!ppwzFragment) {
4207         *pcchFragment = 0;
4208         return E_POINTER;
4209     }
4210
4211     FIXME("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment);
4212     return E_NOTIMPL;
4213 }
4214
4215 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost)
4216 {
4217     UriBuilder *This = URIBUILDER_THIS(iface);
4218     TRACE("(%p)->(%p %p)\n", This, pcchHost, ppwzHost);
4219
4220     if(!pcchHost) {
4221         if(ppwzHost)
4222             *ppwzHost = NULL;
4223         return E_POINTER;
4224     }
4225
4226     if(!ppwzHost) {
4227         *pcchHost = 0;
4228         return E_POINTER;
4229     }
4230
4231     FIXME("(%p)->(%p %p)\n", This, pcchHost, ppwzHost);
4232     return E_NOTIMPL;
4233 }
4234
4235 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword)
4236 {
4237     UriBuilder *This = URIBUILDER_THIS(iface);
4238     TRACE("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword);
4239
4240     if(!pcchPassword) {
4241         if(ppwzPassword)
4242             *ppwzPassword = NULL;
4243         return E_POINTER;
4244     }
4245
4246     if(!ppwzPassword) {
4247         *pcchPassword = 0;
4248         return E_POINTER;
4249     }
4250
4251     FIXME("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword);
4252     return E_NOTIMPL;
4253 }
4254
4255 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath)
4256 {
4257     UriBuilder *This = URIBUILDER_THIS(iface);
4258     FIXME("(%p)->(%p %p)\n", This, pcchPath, ppwzPath);
4259     return E_NOTIMPL;
4260 }
4261
4262 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort)
4263 {
4264     UriBuilder *This = URIBUILDER_THIS(iface);
4265     FIXME("(%p)->(%p %p)\n", This, pfHasPort, pdwPort);
4266     return E_NOTIMPL;
4267 }
4268
4269 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery)
4270 {
4271     UriBuilder *This = URIBUILDER_THIS(iface);
4272     FIXME("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery);
4273     return E_NOTIMPL;
4274 }
4275
4276 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName)
4277 {
4278     UriBuilder *This = URIBUILDER_THIS(iface);
4279     FIXME("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName);
4280     return E_NOTIMPL;
4281 }
4282
4283 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName)
4284 {
4285     UriBuilder *This = URIBUILDER_THIS(iface);
4286     FIXME("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName);
4287     return E_NOTIMPL;
4288 }
4289
4290 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue)
4291 {
4292     UriBuilder *This = URIBUILDER_THIS(iface);
4293     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4294     return E_NOTIMPL;
4295 }
4296
4297 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue)
4298 {
4299     UriBuilder *This = URIBUILDER_THIS(iface);
4300     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4301     return E_NOTIMPL;
4302 }
4303
4304 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue)
4305 {
4306     UriBuilder *This = URIBUILDER_THIS(iface);
4307     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4308     return E_NOTIMPL;
4309 }
4310
4311 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue)
4312 {
4313     UriBuilder *This = URIBUILDER_THIS(iface);
4314     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4315     return E_NOTIMPL;
4316 }
4317
4318 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue)
4319 {
4320     UriBuilder *This = URIBUILDER_THIS(iface);
4321     FIXME("(%p)->(%d %d)\n", This, fHasPort, dwNewValue);
4322     return E_NOTIMPL;
4323 }
4324
4325 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue)
4326 {
4327     UriBuilder *This = URIBUILDER_THIS(iface);
4328     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4329     return E_NOTIMPL;
4330 }
4331
4332 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue)
4333 {
4334     UriBuilder *This = URIBUILDER_THIS(iface);
4335     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4336     return E_NOTIMPL;
4337 }
4338
4339 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue)
4340 {
4341     UriBuilder *This = URIBUILDER_THIS(iface);
4342     FIXME("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
4343     return E_NOTIMPL;
4344 }
4345
4346 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask)
4347 {
4348     UriBuilder *This = URIBUILDER_THIS(iface);
4349     FIXME("(%p)->(0x%08x)\n", This, dwPropertyMask);
4350     return E_NOTIMPL;
4351 }
4352
4353 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified)
4354 {
4355     UriBuilder *This = URIBUILDER_THIS(iface);
4356     FIXME("(%p)->(%p)\n", This, pfModified);
4357     return E_NOTIMPL;
4358 }
4359
4360 #undef URIBUILDER_THIS
4361
4362 static const IUriBuilderVtbl UriBuilderVtbl = {
4363     UriBuilder_QueryInterface,
4364     UriBuilder_AddRef,
4365     UriBuilder_Release,
4366     UriBuilder_CreateUriSimple,
4367     UriBuilder_CreateUri,
4368     UriBuilder_CreateUriWithFlags,
4369     UriBuilder_GetIUri,
4370     UriBuilder_SetIUri,
4371     UriBuilder_GetFragment,
4372     UriBuilder_GetHost,
4373     UriBuilder_GetPassword,
4374     UriBuilder_GetPath,
4375     UriBuilder_GetPort,
4376     UriBuilder_GetQuery,
4377     UriBuilder_GetSchemeName,
4378     UriBuilder_GetUserName,
4379     UriBuilder_SetFragment,
4380     UriBuilder_SetHost,
4381     UriBuilder_SetPassword,
4382     UriBuilder_SetPath,
4383     UriBuilder_SetPort,
4384     UriBuilder_SetQuery,
4385     UriBuilder_SetSchemeName,
4386     UriBuilder_SetUserName,
4387     UriBuilder_RemoveProperties,
4388     UriBuilder_HasBeenModified,
4389 };
4390
4391 /***********************************************************************
4392  *           CreateIUriBuilder (urlmon.@)
4393  */
4394 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder)
4395 {
4396     UriBuilder *ret;
4397
4398     TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
4399
4400     if(!ppIUriBuilder)
4401         return E_POINTER;
4402
4403     ret = heap_alloc(sizeof(UriBuilder));
4404     if(!ret)
4405         return E_OUTOFMEMORY;
4406
4407     ret->lpIUriBuilderVtbl = &UriBuilderVtbl;
4408     ret->ref = 1;
4409
4410     ret->uri = pIUri;
4411     if(pIUri)
4412         IUri_AddRef(pIUri);
4413
4414     *ppIUriBuilder = URIBUILDER(ret);
4415     return S_OK;
4416 }