diff options
author | Masen Furer <m_github@0x26.net> | 2023-01-25 12:23:25 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-25 12:23:25 -0800 |
commit | ea12bf43b990ef3a7df087edb8b45d6514585d76 (patch) | |
tree | c6c6952f83562fdf85a1543f49b1bf65d5b1c74f | |
parent | 8736549a48c8467045ea2a56edddc9d4b17a4546 (diff) | |
download | tox-git-ea12bf43b990ef3a7df087edb8b45d6514585d76.tar.gz |
Windows shlex fix (#2895)
-rw-r--r-- | docs/changelog/2635.bugfix.rst | 13 | ||||
-rw-r--r-- | docs/config.rst | 16 | ||||
-rw-r--r-- | src/tox/config/loader/ini/replace.py | 19 | ||||
-rw-r--r-- | src/tox/config/loader/str_convert.py | 36 | ||||
-rw-r--r-- | tests/config/loader/ini/replace/test_replace_env_var.py | 14 | ||||
-rw-r--r-- | tests/config/loader/test_str_convert.py | 126 |
6 files changed, 208 insertions, 16 deletions
diff --git a/docs/changelog/2635.bugfix.rst b/docs/changelog/2635.bugfix.rst new file mode 100644 index 00000000..a125880d --- /dev/null +++ b/docs/changelog/2635.bugfix.rst @@ -0,0 +1,13 @@ +When parsing command lines, use ``shlex(..., posix=True)``, even on windows platforms, since non-POSIX mode does not +handle escape characters and quoting like a shell would. This improves cross-platform configurations without hacks or +esoteric quoting. + +To make this transition easier, on Windows, the backslash path separator will not treated as an escape character unless +it preceeds a quote, whitespace, or another backslash chracter. This allows paths to mostly be written in single or +double backslash style. + +Note that **double-backslash will no longer be escaped to a single backslash in substitutions**, instead the double +backslash will be consumed as part of command splitting, on either posix or windows platforms. + +In some instances superfluous double or single quote characters may be stripped from arg arrays in ways that do not +occur in the default windows ``cmd.exe`` shell - by :user:`masenf`. diff --git a/docs/config.rst b/docs/config.rst index 7b702f3f..b392ee5b 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -433,6 +433,16 @@ Run .. note:: + ``shlex`` POSIX-mode quoting rules are used to split the command line into arguments on all + supported platforms as of tox 4.4.0. + + The backslash ``\`` character can be used to escape quotes, whitespace, itself, and + other characters (except on Windows, where a backslash in a path will not be interpreted as an escape). + Unescaped single quote will disable the backslash escape until closed by another unescaped single quote. + For more details, please see :doc:`shlex parsing rules <python:library/shlex>`. + + .. note:: + Inline scripts can be used, however note these are discovered from the project root directory, and is not influenced by :ref:`change_dir` (this only affects the runtime current working directory). To make this behaviour explicit we recommend that you make inline scripts absolute paths by prepending ``{tox_root}``, instead of @@ -795,7 +805,7 @@ through the ``{...}`` string-substitution pattern. The string inside the curly braces may reference a global or per-environment config key as described above. -The backslash character ``\`` will act as an escape for a following: ``\``, +In substitutions, the backslash character ``\`` will act as an escape when preceeding ``{``, ``}``, ``:``, ``[``, or ``]``, otherwise the backslash will be reproduced literally:: @@ -803,6 +813,10 @@ reproduced literally:: python -c 'print("\{posargs} = \{}".format("{posargs}"))' python -c 'print("host: \{}".format("{env:HOSTNAME:host\: not set}")' +Note that any backslashes remaining after substitution may be processed by ``shlex`` during command parsing. On POSIX +platforms, the backslash will escape any following character; on windows, the backslash will escape any following quote, +whitespace, or backslash character (since it normally acts as a path delimiter). + Special substitutions that accept additional colon-delimited ``:`` parameters cannot have a space after the ``:`` at the beginning of line (e.g. ``{posargs: magic}`` would be parsed as factorial ``{posargs``, having value magic). diff --git a/src/tox/config/loader/ini/replace.py b/src/tox/config/loader/ini/replace.py index 40730ca5..41672780 100644 --- a/src/tox/config/loader/ini/replace.py +++ b/src/tox/config/loader/ini/replace.py @@ -30,7 +30,7 @@ LOGGER = logging.getLogger(__name__) ARG_DELIMITER = ":" REPLACE_START = "{" REPLACE_END = "}" -BACKSLASH_ESCAPE_CHARS = ["\\", ARG_DELIMITER, REPLACE_START, REPLACE_END, "[", "]"] +BACKSLASH_ESCAPE_CHARS = [ARG_DELIMITER, REPLACE_START, REPLACE_END, "[", "]"] MAX_REPLACE_DEPTH = 100 @@ -115,11 +115,18 @@ class MatchExpression: pos = 0 while pos < len(value): - if len(value) > pos + 1 and value[pos] == "\\" and value[pos + 1] in BACKSLASH_ESCAPE_CHARS: - # backslash escapes the next character from a special set - last_arg.append(value[pos + 1]) - pos += 2 - continue + if len(value) > pos + 1 and value[pos] == "\\": + if value[pos + 1] in BACKSLASH_ESCAPE_CHARS: + # backslash escapes the next character from a special set + last_arg.append(value[pos + 1]) + pos += 2 + continue + if value[pos + 1] == "\\": + # backlash doesn't escape a backslash, but does prevent it from affecting the next char + # a subsequent `shlex` pass will eat the double backslash during command splitting + last_arg.append(value[pos : pos + 2]) + pos += 2 + continue fragment = value[pos:] if terminator and fragment.startswith(terminator): pos += len(terminator) diff --git a/src/tox/config/loader/str_convert.py b/src/tox/config/loader/str_convert.py index f07545f5..ad7ffa5d 100644 --- a/src/tox/config/loader/str_convert.py +++ b/src/tox/config/loader/str_convert.py @@ -46,10 +46,42 @@ class StrConvert(Convert[str]): raise TypeError(f"dictionary lines must be of form key=value, found {row!r}") @staticmethod + def _win32_process_path_backslash(value: str, escape: str, special_chars: str) -> str: + """ + Escape backslash in value that is not followed by a special character. + + This allows windows paths to be written without double backslash, while + retaining the POSIX backslash escape semantics for quotes and escapes. + """ + result = [] + for ix, char in enumerate(value): + result.append(char) + if char == escape: + last_char = value[ix - 1 : ix] + if last_char == escape: + continue + next_char = value[ix + 1 : ix + 2] + if next_char not in (escape, *special_chars): + result.append(escape) # escape escapes that are not themselves escaping a special character + return "".join(result) + + @staticmethod def to_command(value: str) -> Command: - is_win = sys.platform == "win32" + """ + At this point, ``value`` has already been substituted out, and all punctuation / escapes are final. + + Value will typically be stripped of whitespace when coming from an ini file. + """ value = value.replace(r"\#", "#") - splitter = shlex.shlex(value, posix=not is_win) + is_win = sys.platform == "win32" + if is_win: # pragma: win32 cover + s = shlex.shlex(posix=True) + value = StrConvert._win32_process_path_backslash( + value, + escape=s.escape, + special_chars=s.quotes + s.whitespace, + ) + splitter = shlex.shlex(value, posix=True) splitter.whitespace_split = True splitter.commenters = "" # comments handled earlier, and the shlex does not know escaped comment characters args: list[str] = [] diff --git a/tests/config/loader/ini/replace/test_replace_env_var.py b/tests/config/loader/ini/replace/test_replace_env_var.py index 54438b0d..c5aca717 100644 --- a/tests/config/loader/ini/replace/test_replace_env_var.py +++ b/tests/config/loader/ini/replace/test_replace_env_var.py @@ -17,24 +17,24 @@ def test_replace_env_set(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> N def test_replace_env_set_double_bs(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None: - """Double backslash should escape to single backslash and not affect surrounding replacements.""" + """Double backslash should remain but not affect surrounding replacements.""" monkeypatch.setenv("MAGIC", "something good") result = replace_one(r"{env:MAGIC}\\{env:MAGIC}") - assert result == r"something good\something good" + assert result == r"something good\\something good" def test_replace_env_set_triple_bs(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None: - """Triple backslash should escape to single backslash also escape subsequent replacement.""" + """Triple backslash should retain two slashes with the third escaping subsequent replacement.""" monkeypatch.setenv("MAGIC", "something good") result = replace_one(r"{env:MAGIC}\\\{env:MAGIC}") - assert result == r"something good\{env:MAGIC}" + assert result == r"something good\\{env:MAGIC}" def test_replace_env_set_quad_bs(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None: - """Quad backslash should escape to two backslashes and not affect surrounding replacements.""" + """Quad backslash should remain but not affect surrounding replacements.""" monkeypatch.setenv("MAGIC", "something good") - result = replace_one(r"\\{env:MAGIC}\\\\{env:MAGIC}\\") - assert result == r"\something good\\something good" + "\\" + result = replace_one(r"\\{env:MAGIC}\\\\{env:MAGIC}" + "\\") + assert result == r"\\something good\\\\something good" + "\\" def test_replace_env_when_value_is_backslash(replace_one: ReplaceOne, monkeypatch: MonkeyPatch) -> None: diff --git a/tests/config/loader/test_str_convert.py b/tests/config/loader/test_str_convert.py index 4fe0aaff..1619056d 100644 --- a/tests/config/loader/test_str_convert.py +++ b/tests/config/loader/test_str_convert.py @@ -2,12 +2,14 @@ from __future__ import annotations import sys from pathlib import Path +from textwrap import dedent from typing import Any, Dict, List, Optional, Set, TypeVar, Union import pytest from tox.config.loader.str_convert import StrConvert from tox.config.types import Command, EnvList +from tox.pytest import MonkeyPatch, SubRequest, ToxProjectCreator if sys.version_info >= (3, 8): # pragma: no cover (py38+) from typing import Literal @@ -80,3 +82,127 @@ def test_str_convert_nok(raw: str, of_type: type[Any], msg: str, exc_type: type[ def test_invalid_shell_expression(value: str, expected: list[str]) -> None: result = StrConvert().to_command(value).args assert result == expected + + +SIMPLE_ARGS = [ + ('foo "bar baz"', ["foo", "bar baz"]), + ('foo "bar baz"ext', ["foo", "bar bazext"]), + ('foo="bar baz"', ["foo=bar baz"]), + ("foo 'bar baz'", ["foo", "bar baz"]), + ("foo 'bar baz'ext", ["foo", "bar bazext"]), + ("foo='bar baz'", ["foo=bar baz"]), + (r"foo=\"bar baz\"", ['foo="bar', 'baz"']), + (r'foo="bar baz\"', ['foo="bar baz\\"']), + ("foo='bar baz' quuc", ["foo=bar baz", "quuc"]), + (r"foo='bar baz\' quuc", ["foo=bar baz\\", "quuc"]), + (r"foo=\"bar baz\' quuc", ['foo="bar', "baz'", "quuc"]), + (r"foo=\\\"bar baz\"", ['foo=\\"bar', 'baz"']), + (r'foo=\\"bar baz\"', [r'foo=\\"bar baz\"']), +] +NEWLINE_ARGS = [ + ('foo\n"bar\nbaz"', ["foo", "bar\nbaz"]), +] +INI_CONFIG_NEWLINE_ARGS = [ + ('foo\\\n "bar\\\n baz"', ["foobarbaz"]), # behavior change from tox 3 + ('foo\\\n "bar \\\n baz"', ["foobar baz"]), # behavior change from tox 3 + ('foo \\\n "bar\\\n baz"', ["foo", "barbaz"]), + ('foo \\\n "bar \\\n baz"', ["foo", "bar baz"]), + ('foo \\\n \\"bar \\\n baz"', ["foo", '"bar', 'baz"']), + ("foo \\\n bar \\\n baz", ["foo", "bar", "baz"]), +] +WINDOWS_PATH_ARGS = [ + (r"SPECIAL:\foo\bar --quuz='baz atan'", [r"SPECIAL:\foo\bar", "--quuz=baz atan"]), + (r"X:\\foo\\bar --quuz='baz atan'", [r"X:\foo\bar", "--quuz=baz atan"]), + ("/foo/bar --quuz='baz atan'", ["/foo/bar", "--quuz=baz atan"]), + ('cc --arg "C:\\\\Users\\""', ["cc", "--arg", 'C:\\Users"']), + ('cc --arg "C:\\\\Users\\"', ["cc", "--arg", '"C:\\\\Users\\"']), + ('cc --arg "C:\\\\Users"', ["cc", "--arg", "C:\\Users"]), + ('cc --arg \\"C:\\\\Users"', ["cc", "--arg", '\\"C:\\\\Users"']), + ('cc --arg "C:\\\\Users\\ "', ["cc", "--arg", "C:\\Users\\ "]), + # ('cc --arg "C:\\\\Users\\ ', ["cc", "--arg", '"C:\\\\Users\\ ']), + ('cc --arg "C:\\\\Users\\\\"', ["cc", "--arg", "C:\\Users\\"]), + ('cc --arg "C:\\\\Users\\\\ "', ["cc", "--arg", "C:\\Users\\ "]), + # ('cc --arg "C:\\\\Users\\\\ ', ["cc", "--arg", '"C:\\\\Users\\\\ ']), + ( + r'cc --arg C:\\Users\\ --arg2 "SPECIAL:\Temp\f o o" --arg3="\\FOO\share\Path name" --arg4 SPECIAL:\Temp\ '[:-1], + [ + "cc", + "--arg", + "C:\\Users\\", + "--arg2", + "SPECIAL:\\Temp\\f o o", + "--arg3=\\FOO\\share\\Path name", + "--arg4", + "SPECIAL:\\Temp\\", + ], + ), +] +WACKY_SLASH_ARGS = [ + ("\\\\\\", ["\\\\\\"]), + (" \\'\\'\\ '", [" \\'\\'\\ '"]), + ("\\'\\'\\ ", ["'' "]), + ("\\'\\ \\\\", ["' \\"]), + ("\\'\\ ", ["' "]), + ('''"\\'\\"''', ['"\\\'\\"']), + ("'\\' \\\\", ["\\", "\\"]), + ('"\\\\" \\\\', ["\\", "\\"]), +] + + +@pytest.fixture(params=["win32", "linux2"]) +def sys_platform(request: SubRequest, monkeypatch: MonkeyPatch) -> str: + monkeypatch.setattr(sys, "platform", request.param) + return str(request.param) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + *SIMPLE_ARGS, + *NEWLINE_ARGS, + *WINDOWS_PATH_ARGS, + *WACKY_SLASH_ARGS, + ], +) +def test_shlex_platform_specific(sys_platform: str, value: str, expected: list[str]) -> None: + if sys_platform != "win32" and value.startswith("SPECIAL:"): + # on non-windows platform, backslash is always an escape, not path separator + expected = [exp.replace("\\", "") for exp in expected] + result = StrConvert().to_command(value).args + assert result == expected + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + *SIMPLE_ARGS, + *INI_CONFIG_NEWLINE_ARGS, + *WINDOWS_PATH_ARGS, + # *WACKY_SLASH_ARGS, + ], +) +def test_shlex_platform_specific_ini( + tox_project: ToxProjectCreator, + sys_platform: str, + value: str, + expected: list[str], +) -> None: + if sys_platform != "win32" and value.startswith("SPECIAL:"): + # on non-windows platform, backslash is always an escape, not path separator + expected = [exp.replace("\\", "") for exp in expected] + project = tox_project( + { + "tox.ini": dedent( + """ + [testenv] + commands = + %s""", + ) + % value, + }, + ) + outcome = project.run("c") + outcome.assert_success() + env_config = outcome.env_conf("py") + result = env_config["commands"] + assert result == [Command(args=expected)] |