diff --git a/changelog/12365.bugfix.rst b/changelog/12365.bugfix.rst new file mode 100644 index 00000000000..d0ef5624944 --- /dev/null +++ b/changelog/12365.bugfix.rst @@ -0,0 +1 @@ +Fixed ANSI escape codes (for example from syntax-highlighted assertion diffs produced under ``--color=yes``) leaking into JUnit XML reports and other plain-text outputs. :class:`~_pytest._code.code.TerminalRepr` now strips all ANSI escape sequences from its string representation. diff --git a/src/_pytest/_code/code.py b/src/_pytest/_code/code.py index 3c453b15dd7..464857165d5 100644 --- a/src/_pytest/_code/code.py +++ b/src/_pytest/_code/code.py @@ -1246,6 +1246,14 @@ def repr_excinfo(self, excinfo: ExceptionInfo[BaseException]) -> ExceptionChainR return ExceptionChainRepr(repr_chain) +# Matches all ANSI escape sequences per ECMA-48: CSI sequences (SGR colour +# codes ending in "m", but also cursor moves and line clears such as +# "\x1b[K" / "\x1b[2K") and two-character escapes. Plain-text consumers +# (JUnit XML, pytest-xdist serialization, resultlog) must never receive raw +# escape codes. See #12365. +_ANSI_ESCAPE_RE: Final = re.compile(r"\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + + @dataclasses.dataclass(eq=False) class TerminalRepr: """Base class for terminal representations -- pieces of data that display @@ -1257,7 +1265,10 @@ def __str__(self) -> str: io = StringIO() tw = TerminalWriter(file=io) self.toterminal(tw) - return io.getvalue().strip() + # Strip ANSI escape codes that may have been pre-baked into the repr + # data (e.g. by Pygments-highlighted assertion diffs under + # --color=yes) so plain-text consumers never see them. See #12365. + return _ANSI_ESCAPE_RE.sub("", io.getvalue().strip()) def __repr__(self) -> str: return f"<{self.__class__} instance at {id(self):0x}>" diff --git a/testing/code/test_excinfo.py b/testing/code/test_excinfo.py index 883a7c5f9b0..f57b5bf6b50 100644 --- a/testing/code/test_excinfo.py +++ b/testing/code/test_excinfo.py @@ -2057,3 +2057,20 @@ def test_check_error_notes_failure( with pytest.raises(AssertionError): with pytest.raises(type(error), match=match): raise error + + +def test_terminalrepr_strips_all_ansi_escape_codes() -> None: + r"""TerminalRepr.__str__ must strip every ANSI escape code, not just SGR. + + Regression guard for #12365: a narrow ``\x1b[...m`` regex let non-SGR CSI + sequences (cursor moves, line clears) leak into plain-text consumers such + as JUnit XML. + """ + from _pytest._code.code import TerminalRepr + + class _Repr(TerminalRepr): + def toterminal(self, tw: TerminalWriter) -> None: + # SGR colour codes plus a line-clear and a cursor-up move. + tw.write("\x1b[31mred\x1b[0m \x1b[2Kcleared \x1b[1Aup") + + assert str(_Repr()) == "red cleared up" diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index 1018b858413..ff470b3f9ab 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -1752,7 +1752,10 @@ def test_esc(my_setup): node = dom.get_first_by_tag("testcase") snode = node.get_first_by_tag("error") assert "#x1B[31mred#x1B[m" in snode["message"] - assert "#x1B[31mred#x1B[m" in snode.text + # TerminalRepr.__str__ strips ANSI escape codes before bin_xml_escape + # runs, so the text body contains the bare word without escape sequences. + assert "red" in snode.text + assert "#x1B" not in snode.text @parametrize_families @@ -1837,3 +1840,30 @@ def test_no_message_quiet(pytester: Pytester) -> None: result = pytester.runpytest("--junitxml=pytest.xml", "--quiet") result.stdout.no_fnmatch_line("* generated xml file: *") + + +def test_ansi_escape_codes_stripped_from_junitxml( + pytester: Pytester, run_and_parse: RunAndParse +) -> None: + """ANSI escape codes from assertion diffs must not leak into JUnit XML. + + Pygments-highlighted assertion diffs pre-bake ANSI escape sequences into + the repr data. TerminalRepr.__str__ now strips them so that plain-text + consumers (JUnit XML, pytest-xdist serialization) never see raw escape + codes. See #12365. + """ + pytester.makepyfile( + """ + def test_fail(): + assert "hello" == "world" + """ + ) + _, dom = run_and_parse("--color=yes") + node = dom.get_first_by_tag("testcase") + fnode = node.get_first_by_tag("failure") + # The failure text must not contain raw ANSI escape sequences. + assert "\x1b[" not in fnode.text + assert "#x1B" not in fnode.text + # But the actual assertion content should still be present. + assert "hello" in fnode.text + assert "world" in fnode.text