* '"a b"' -> 'a b'
* - escaped quotes must be converted back to '"'
* '\"' -> '"'
- * - an odd number of '\'s followed by '"' correspond to half that number
- * of '\' followed by a '"' (extension of the above)
- * '\\\"' -> '\"'
- * '\\\\\"' -> '\\"'
- * - an even number of '\'s followed by a '"' correspond to half that number
- * of '\', plus a regular quote serving as an argument delimiter (which
- * means it does not appear in the result)
- * 'a\\"b c"' -> 'a\b c'
- * 'a\\\\"b c"' -> 'a\\b c'
- * - '\' that are not followed by a '"' are copied literally
+ * - consecutive backslashes preceding a quote see their number halved with
+ * the remainder escaping the quote:
+ * 2n backslashes + quote -> n backslashes + quote as an argument delimiter
+ * 2n+1 backslashes + quote -> n backslashes + literal quote
+ * - backslashes that are not followed by a quote are copied literally:
* 'a\b' -> 'a\b'
* 'a\\b' -> 'a\\b'
+ * - in quoted strings, consecutive quotes see their number divided by three
+ * with the remainder modulo 3 deciding whether to close the string or not.
+ * Note that the opening quote must be counted in the consecutive quotes,
+ * that's the (1+) below:
+ * (1+) 3n quotes -> n quotes
+ * (1+) 3n+1 quotes -> n quotes plus closes the quoted string
+ * (1+) 3n+2 quotes -> n+1 quotes plus closes the quoted string
+ * - in unquoted strings, the first quote opens the quoted string and the
+ * remaining consecutive quotes follow the above rule.
*/
LPWSTR* WINAPI CommandLineToArgvW(LPCWSTR lpCmdline, int* numargs)
{
LPCWSTR s;
LPWSTR d;
LPWSTR cmdline;
- int in_quotes,bcount;
+ int qcount,bcount;
if(!numargs)
{
/* --- First count the arguments */
argc=1;
- bcount=0;
- in_quotes=0;
s=lpCmdline;
+ /* The first argument, the executable path, follows special rules */
+ if (*s=='"')
+ {
+ /* The executable path ends at the next quote, no matter what */
+ s++;
+ while (*s)
+ if (*s++=='"')
+ break;
+ }
+ else
+ {
+ /* The executable path ends at the next space, no matter what */
+ while (*s && *s!=' ' && *s!='\t')
+ s++;
+ }
+ /* skip to the first argument, if any */
+ while (*s==' ' || *s=='\t')
+ s++;
+ if (*s)
+ argc++;
+
+ /* Analyze the remaining arguments */
+ qcount=bcount=0;
while (*s)
{
- if (((*s==' ' || *s=='\t') && !in_quotes))
+ if ((*s==' ' || *s=='\t') && qcount==0)
{
/* skip to the next argument and count it if any */
while (*s==' ' || *s=='\t')
if (*s)
argc++;
bcount=0;
- continue;
}
else if (*s=='\\')
{
/* '\', count them */
bcount++;
+ s++;
}
- else if ((*s=='"') && ((bcount & 1)==0))
+ else if (*s=='"')
{
- /* unescaped '"' */
- in_quotes=!in_quotes;
+ /* '"' */
+ if ((bcount & 1)==0)
+ qcount++; /* unescaped '"' */
+ s++;
bcount=0;
+ /* consecutive quotes, see comment in copying code below */
+ while (*s=='"')
+ {
+ qcount++;
+ s++;
+ }
+ qcount=qcount % 3;
+ if (qcount==2)
+ qcount=0;
}
else
{
/* a regular character */
bcount=0;
+ s++;
}
- s++;
}
/* Allocate in a single lump, the string array, and the strings that go
strcpyW(cmdline, lpCmdline);
/* --- Then split and copy the arguments */
+ argv[0]=d=cmdline;
argc=1;
- bcount=0;
- in_quotes=0;
- s=argv[0]=d=cmdline;
+ /* The first argument, the executable path, follows special rules */
+ if (*d=='"')
+ {
+ /* The executable path ends at the next quote, no matter what */
+ s=d+1;
+ while (*s)
+ {
+ if (*s=='"')
+ {
+ s++;
+ break;
+ }
+ *d++=*s++;
+ }
+ }
+ else
+ {
+ /* The executable path ends at the next space, no matter what */
+ while (*d && *d!=' ' && *d!='\t')
+ d++;
+ s=d;
+ if (*s)
+ s++;
+ }
+ /* close the argument */
+ *d++=0;
+ /* skip to the first argument and initialize it if any */
+ while (*s==' ' || *s=='\t')
+ s++;
+ if (*s)
+ argv[argc++]=d;
+
+ /* Split and copy the remaining arguments */
+ qcount=bcount=0;
while (*s)
{
- if ((*s==' ' || *s=='\t') && !in_quotes)
+ if ((*s==' ' || *s=='\t') && qcount==0)
{
/* close the argument */
*d++=0;
* number of '\', plus a quote which we erase.
*/
d-=bcount/2;
- in_quotes=!in_quotes;
- s++;
+ qcount++;
}
else
{
*/
d=d-bcount/2-1;
*d++='"';
- s++;
}
+ s++;
bcount=0;
+ /* Now count the number of consecutive quotes. Note that qcount
+ * already takes into account the opening quote if any, as well as
+ * the quote that lead us here.
+ */
+ while (*s=='"')
+ {
+ if (++qcount==3)
+ {
+ *d++='"';
+ qcount=0;
+ }
+ s++;
+ }
+ if (qcount==2)
+ qcount=0;
}
else
{
{"exe", "twoquotes", "next", NULL}, 0},
{"exe three\"\"\"quotes next",
- {"exe", "three\"quotes", "next", NULL}, 0x21},
+ {"exe", "three\"quotes", "next", NULL}, 0},
{"exe four\"\"\"\" quotes\" next 4%3=1",
- {"exe", "four\" quotes", "next", "4%3=1", NULL}, 0x61},
+ {"exe", "four\" quotes", "next", "4%3=1", NULL}, 0},
{"exe five\"\"\"\"\"quotes next",
- {"exe", "five\"quotes", "next", NULL}, 0x21},
+ {"exe", "five\"quotes", "next", NULL}, 0},
{"exe six\"\"\"\"\"\"quotes next",
- {"exe", "six\"\"quotes", "next", NULL}, 0x20},
+ {"exe", "six\"\"quotes", "next", NULL}, 0},
{"exe seven\"\"\"\"\"\"\" quotes\" next 7%3=1",
- {"exe", "seven\"\" quotes", "next", "7%3=1", NULL}, 0x20},
+ {"exe", "seven\"\" quotes", "next", "7%3=1", NULL}, 0},
{"exe twelve\"\"\"\"\"\"\"\"\"\"\"\"quotes next",
- {"exe", "twelve\"\"\"\"quotes", "next", NULL}, 0x20},
+ {"exe", "twelve\"\"\"\"quotes", "next", NULL}, 0},
{"exe thirteen\"\"\"\"\"\"\"\"\"\"\"\"\" quotes\" next 13%3=1",
- {"exe", "thirteen\"\"\"\" quotes", "next", "13%3=1", NULL}, 0x20},
+ {"exe", "thirteen\"\"\"\" quotes", "next", "13%3=1", NULL}, 0},
/* Inside a quoted string the opening quote is added to the set of
* consecutive quotes to get the effective quotes count. This gives:
* 1+3n+2 quotes -> n+1 quotes plus closes the quoted string
*/
{"exe \"two\"\"quotes next",
- {"exe", "two\"quotes", "next", NULL}, 0x21},
+ {"exe", "two\"quotes", "next", NULL}, 0},
{"exe \"two\"\" next",
- {"exe", "two\"", "next", NULL}, 0x21},
+ {"exe", "two\"", "next", NULL}, 0},
{"exe \"three\"\"\" quotes\" next 4%3=1",
- {"exe", "three\" quotes", "next", "4%3=1", NULL}, 0x61},
+ {"exe", "three\" quotes", "next", "4%3=1", NULL}, 0},
{"exe \"four\"\"\"\"quotes next",
- {"exe", "four\"quotes", "next", NULL}, 0x21},
+ {"exe", "four\"quotes", "next", NULL}, 0},
{"exe \"five\"\"\"\"\"quotes next",
- {"exe", "five\"\"quotes", "next", NULL}, 0x20},
+ {"exe", "five\"\"quotes", "next", NULL}, 0},
{"exe \"six\"\"\"\"\"\" quotes\" next 7%3=1",
- {"exe", "six\"\" quotes", "next", "7%3=1", NULL}, 0x20},
+ {"exe", "six\"\" quotes", "next", "7%3=1", NULL}, 0},
{"exe \"eleven\"\"\"\"\"\"\"\"\"\"\"quotes next",
- {"exe", "eleven\"\"\"\"quotes", "next", NULL}, 0x20},
+ {"exe", "eleven\"\"\"\"quotes", "next", NULL}, 0},
{"exe \"twelve\"\"\"\"\"\"\"\"\"\"\"\" quotes\" next 13%3=1",
- {"exe", "twelve\"\"\"\" quotes", "next", "13%3=1", NULL}, 0x20},
+ {"exe", "twelve\"\"\"\" quotes", "next", "13%3=1", NULL}, 0},
/* Escaped consecutive quotes are fun */
{"exe \"the crazy \\\\\"\"\"\\\\\" quotes",
- {"exe", "the crazy \\\"\\", "quotes", NULL}, 0x21},
+ {"exe", "the crazy \\\"\\", "quotes", NULL}, 0},
/* The executable path has its own rules!!!
* - Backslashes have no special meaning.
* argument, the latter is parsed using the regular rules.
*/
{"exe\"file\"path arg1",
- {"exe\"file\"path", "arg1", NULL}, 0x10},
+ {"exe\"file\"path", "arg1", NULL}, 0},
{"exe\"file\"path\targ1",
- {"exe\"file\"path", "arg1", NULL}, 0x10},
+ {"exe\"file\"path", "arg1", NULL}, 0},
{"exe\"path\\ arg1",
- {"exe\"path\\", "arg1", NULL}, 0x31},
+ {"exe\"path\\", "arg1", NULL}, 0},
{"\\\"exe \"arg one\"",
- {"\\\"exe", "arg one", NULL}, 0x10},
+ {"\\\"exe", "arg one", NULL}, 0},
{"\"spaced exe\" \"next arg\"",
{"spaced exe", "next arg", NULL}, 0},
{"spaced exe", "next arg", NULL}, 0},
{"\"exe\"arg\" one\" argtwo",
- {"exe", "arg one", "argtwo", NULL}, 0x31},
+ {"exe", "arg one", "argtwo", NULL}, 0},
{"\"spaced exe\\\"arg1 arg2",
- {"spaced exe\\", "arg1", "arg2", NULL}, 0x11},
+ {"spaced exe\\", "arg1", "arg2", NULL}, 0},
{"\"two\"\" arg1 ",
- {"two", " arg1 ", NULL}, 0x11},
+ {"two", " arg1 ", NULL}, 0},
{"\"three\"\"\" arg2",
- {"three", "", "arg2", NULL}, 0x61},
+ {"three", "", "arg2", NULL}, 0},
{"\"four\"\"\"\"arg1",
- {"four", "\"arg1", NULL}, 0x11},
+ {"four", "\"arg1", NULL}, 0},
/* If the first character is a space then the executable path is empty */
{" \"arg\"one argtwo",
/* Only (double-)quotes have a special meaning. */
{"Params23456", "'p2 p3` p4\\ $even", 0x40,
{" \"'p2\" \"p3`\" \"p4\\\" \"$even\" \"\"",
- {"", "'p2", "p3`", "p4\" $even \"", NULL}, 0x80}},
+ {"", "'p2", "p3`", "p4\" $even \"", NULL}, 0}},
{"Params23456", "p=2 p-3 p4\tp4\rp4\np4", 0x1c2,
{" \"p=2\" \"p-3\" \"p4\tp4\rp4\np4\" \"\" \"\"",
{"Params23456789", "three\"\"\"quotes \"p four\" three\"\"\"quotes p6", 0xff3,
{" \"three\"\" \"quotes\" \"p four\" \"three\"\" \"quotes\" \"p6\" \"\" \"\"",
- {"", "three\"", "quotes", "p four", "three\"", "quotes", "p6", "", "", NULL}, 0x7e1}},
+ {"", "three\"", "quotes", "p four", "three\"", "quotes", "p6", "", "", NULL}, 0}},
{"Params23456789", "four\"\"\"\"quotes \"p three\" four\"\"\"\"quotes p5", 0xf3,
{" \"four\"\"quotes\" \"p three\" \"four\"\"quotes\" \"p5\" \"\" \"\" \"\" \"\"",
- {"", "four\"quotes p", "three fourquotes p5 \"", "", "", "", NULL}, 0xde1}},
+ {"", "four\"quotes p", "three fourquotes p5 \"", "", "", "", NULL}, 0}},
/* Quoted strings cannot be continued by tacking on a non space character
* either.
{"Params23456789", "\"three q\"\"\"uotes \"p four\" \"three q\"\"\"uotes p7", 0xff3,
{" \"three q\"\" \"uotes\" \"p four\" \"three q\"\" \"uotes\" \"p7\" \"\" \"\"",
- {"", "three q\"", "uotes", "p four", "three q\"", "uotes", "p7", "", "", NULL}, 0x7e1}},
+ {"", "three q\"", "uotes", "p four", "three q\"", "uotes", "p7", "", "", NULL}, 0}},
{"Params23456789", "\"four \"\"\"\" quotes\" \"p three\" \"four \"\"\"\" quotes\" p5", 0xff3,
{" \"four \"\" quotes\" \"p three\" \"four \"\" quotes\" \"p5\" \"\" \"\" \"\" \"\"",
- {"", "four \"", "quotes p", "three four", "", "quotes p5 \"", "", "", "", NULL}, 0x3e0}},
+ {"", "four \"", "quotes p", "three four", "", "quotes p5 \"", "", "", "", NULL}, 0}},
/* The quoted string rules also apply to consecutive quotes at the start
* of a parameter but don't count the opening quote!
{"Params23456789", "\"\"\"three quotes\" \"p three\" \"\"\"three quotes\" p5", 0x6f3,
{" \"\"three quotes\" \"p three\" \"\"three quotes\" \"p5\" \"\" \"\" \"\" \"\"",
- {"", "three", "quotes p", "three \"three", "quotes p5 \"", "", "", "", NULL}, 0x181}},
+ {"", "three", "quotes p", "three \"three", "quotes p5 \"", "", "", "", NULL}, 0}},
{"Params23456789", "\"\"\"\"fourquotes \"p four\" \"\"\"\"fourquotes p7", 0xbf3,
{" \"\"\" \"fourquotes\" \"p four\" \"\"\" \"fourquotes\" \"p7\" \"\" \"\"",
- {"", "\"", "fourquotes", "p four", "\"", "fourquotes", "p7", "", "", NULL}, 0x7e1}},
+ {"", "\"", "fourquotes", "p four", "\"", "fourquotes", "p7", "", "", NULL}, 0}},
/* An unclosed quoted string gets lost! */
{"Params23456", "p2 \"p3\" \"p4 is lost", 0x1c3,