From a055130da51056960c62acd178defa885e943e0e Mon Sep 17 00:00:00 2001 From: Uchenna <110002944+I-am-Uchenna@users.noreply.github.com> Date: Mon, 15 Jun 2026 15:44:32 +0100 Subject: [PATCH 1/6] Strip ANSI escape codes from TerminalRepr.__str__ output Pygments-highlighted assertion diffs pre-bake ANSI escape sequences into ReprEntry.lines at test time. When TerminalRepr.__str__ is called for plain-text consumers (JUnit XML via --junitxml, pytest-xdist serialization), these raw escape codes leak into the output. Add a compiled regex that strips SGR sequences (ESC[...m) from the string returned by __str__. This is safe for test data because saferepr() escapes real 0x1B bytes to the literal text representation, so only Pygments-injected codes are matched. Fixes #12365. --- src/_pytest/_code/code.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/_pytest/_code/code.py b/src/_pytest/_code/code.py index 3c453b15dd7..ef226f8f4e6 100644 --- a/src/_pytest/_code/code.py +++ b/src/_pytest/_code/code.py @@ -1246,6 +1246,9 @@ def repr_excinfo(self, excinfo: ExceptionInfo[BaseException]) -> ExceptionChainR return ExceptionChainRepr(repr_chain) +_ANSI_ESCAPE_RE: Final = re.compile(r"\x1b\[[0-9;]*m") + + @dataclasses.dataclass(eq=False) class TerminalRepr: """Base class for terminal representations -- pieces of data that display @@ -1257,7 +1260,13 @@ def __str__(self) -> str: io = StringIO() tw = TerminalWriter(file=io) self.toterminal(tw) - return io.getvalue().strip() + text = io.getvalue().strip() + # Strip ANSI escape sequences that may have been pre-baked into the + # repr data by Pygments-highlighted assertion diffs or by a + # TerminalWriter running with forced markup (PY_COLORS / FORCE_COLOR). + # Plain-text consumers such as JUnit XML and pytest-xdist serialization + # should never see raw escape codes. See #12365. + return _ANSI_ESCAPE_RE.sub("", text) def __repr__(self) -> str: return f"<{self.__class__} instance at {id(self):0x}>" From 132ecf82c78baabd5cca6f3321c660cbfad1accf Mon Sep 17 00:00:00 2001 From: Uchenna <110002944+I-am-Uchenna@users.noreply.github.com> Date: Mon, 15 Jun 2026 15:46:22 +0100 Subject: [PATCH 2/6] Add tests for ANSI escape code stripping in JUnit XML Update test_escaped_setup_teardown_error to reflect that TerminalRepr.__str__ now strips ANSI codes before bin_xml_escape runs. The message attribute (from reprcrash.message) is unaffected since it bypasses __str__. Add test_ansi_escape_codes_stripped_from_junitxml to verify that --color=yes assertion diffs do not leak ANSI sequences into the XML failure body. --- testing/test_junitxml.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index 1018b858413..36834739b60 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -1752,7 +1752,10 @@ def test_esc(my_setup): node = dom.get_first_by_tag("testcase") snode = node.get_first_by_tag("error") assert "#x1B[31mred#x1B[m" in snode["message"] - assert "#x1B[31mred#x1B[m" in snode.text + # TerminalRepr.__str__ strips ANSI escape codes before bin_xml_escape + # runs, so the text body contains the bare word without escape sequences. + assert "red" in snode.text + assert "#x1B" not in snode.text @parametrize_families @@ -1837,3 +1840,31 @@ def test_no_message_quiet(pytester: Pytester) -> None: result = pytester.runpytest("--junitxml=pytest.xml", "--quiet") result.stdout.no_fnmatch_line("* generated xml file: *") + + + +def test_ansi_escape_codes_stripped_from_junitxml( + pytester: Pytester, run_and_parse: RunAndParse +) -> None: + """ANSI escape codes from assertion diffs must not leak into JUnit XML. + + Pygments-highlighted assertion diffs pre-bake ANSI escape sequences into + the repr data. TerminalRepr.__str__ now strips them so that plain-text + consumers (JUnit XML, pytest-xdist serialization) never see raw escape + codes. See #12365. + """ + pytester.makepyfile( + """ + def test_fail(): + assert "hello" == "world" + """ + ) + _, dom = run_and_parse("--color=yes") + node = dom.get_first_by_tag("testcase") + fnode = node.get_first_by_tag("failure") + # The failure text must not contain raw ANSI escape sequences. + assert "\x1b[" not in fnode.text + assert "#x1B" not in fnode.text + # But the actual assertion content should still be present. + assert "hello" in fnode.text + assert "world" in fnode.text From f447ff2433b578c0957fc592e27324624f598cb6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jun 2026 14:49:41 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- testing/test_junitxml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index 36834739b60..ff470b3f9ab 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -1842,7 +1842,6 @@ def test_no_message_quiet(pytester: Pytester) -> None: result.stdout.no_fnmatch_line("* generated xml file: *") - def test_ansi_escape_codes_stripped_from_junitxml( pytester: Pytester, run_and_parse: RunAndParse ) -> None: From 05947712998b5b12fda20a7a72fbaac65cfce6b5 Mon Sep 17 00:00:00 2001 From: Uchenna <110002944+I-am-Uchenna@users.noreply.github.com> Date: Sun, 21 Jun 2026 06:38:59 +0200 Subject: [PATCH 4/6] Broaden ANSI escape regex to strip all CSI sequences (#12365) --- src/_pytest/_code/code.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/_pytest/_code/code.py b/src/_pytest/_code/code.py index ef226f8f4e6..464857165d5 100644 --- a/src/_pytest/_code/code.py +++ b/src/_pytest/_code/code.py @@ -1246,7 +1246,12 @@ def repr_excinfo(self, excinfo: ExceptionInfo[BaseException]) -> ExceptionChainR return ExceptionChainRepr(repr_chain) -_ANSI_ESCAPE_RE: Final = re.compile(r"\x1b\[[0-9;]*m") +# Matches all ANSI escape sequences per ECMA-48: CSI sequences (SGR colour +# codes ending in "m", but also cursor moves and line clears such as +# "\x1b[K" / "\x1b[2K") and two-character escapes. Plain-text consumers +# (JUnit XML, pytest-xdist serialization, resultlog) must never receive raw +# escape codes. See #12365. +_ANSI_ESCAPE_RE: Final = re.compile(r"\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") @dataclasses.dataclass(eq=False) @@ -1260,13 +1265,10 @@ def __str__(self) -> str: io = StringIO() tw = TerminalWriter(file=io) self.toterminal(tw) - text = io.getvalue().strip() - # Strip ANSI escape sequences that may have been pre-baked into the - # repr data by Pygments-highlighted assertion diffs or by a - # TerminalWriter running with forced markup (PY_COLORS / FORCE_COLOR). - # Plain-text consumers such as JUnit XML and pytest-xdist serialization - # should never see raw escape codes. See #12365. - return _ANSI_ESCAPE_RE.sub("", text) + # Strip ANSI escape codes that may have been pre-baked into the repr + # data (e.g. by Pygments-highlighted assertion diffs under + # --color=yes) so plain-text consumers never see them. See #12365. + return _ANSI_ESCAPE_RE.sub("", io.getvalue().strip()) def __repr__(self) -> str: return f"<{self.__class__} instance at {id(self):0x}>" From edc0e9718a7af2be5894682a91c450bb695e73b3 Mon Sep 17 00:00:00 2001 From: Uchenna <110002944+I-am-Uchenna@users.noreply.github.com> Date: Sun, 21 Jun 2026 06:40:04 +0200 Subject: [PATCH 5/6] Add regression test for stripping non-SGR ANSI codes (#12365) --- testing/code/test_excinfo.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/testing/code/test_excinfo.py b/testing/code/test_excinfo.py index 883a7c5f9b0..f57b5bf6b50 100644 --- a/testing/code/test_excinfo.py +++ b/testing/code/test_excinfo.py @@ -2057,3 +2057,20 @@ def test_check_error_notes_failure( with pytest.raises(AssertionError): with pytest.raises(type(error), match=match): raise error + + +def test_terminalrepr_strips_all_ansi_escape_codes() -> None: + r"""TerminalRepr.__str__ must strip every ANSI escape code, not just SGR. + + Regression guard for #12365: a narrow ``\x1b[...m`` regex let non-SGR CSI + sequences (cursor moves, line clears) leak into plain-text consumers such + as JUnit XML. + """ + from _pytest._code.code import TerminalRepr + + class _Repr(TerminalRepr): + def toterminal(self, tw: TerminalWriter) -> None: + # SGR colour codes plus a line-clear and a cursor-up move. + tw.write("\x1b[31mred\x1b[0m \x1b[2Kcleared \x1b[1Aup") + + assert str(_Repr()) == "red cleared up" From ad508d41dbba68c1a2a456f17927619af84d6e0e Mon Sep 17 00:00:00 2001 From: Uchenna <110002944+I-am-Uchenna@users.noreply.github.com> Date: Sun, 21 Jun 2026 06:40:49 +0200 Subject: [PATCH 6/6] Add changelog fragment for #12365 --- changelog/12365.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/12365.bugfix.rst diff --git a/changelog/12365.bugfix.rst b/changelog/12365.bugfix.rst new file mode 100644 index 00000000000..d0ef5624944 --- /dev/null +++ b/changelog/12365.bugfix.rst @@ -0,0 +1 @@ +Fixed ANSI escape codes (for example from syntax-highlighted assertion diffs produced under ``--color=yes``) leaking into JUnit XML reports and other plain-text outputs. :class:`~_pytest._code.code.TerminalRepr` now strips all ANSI escape sequences from its string representation.