From f4d67ec974cb16307cb787e63ffc4d26ce31e582 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Mon, 12 Oct 2020 11:29:42 -0400 Subject: [PATCH 1/5] Ensure unclosed script tags are parsed correctly Provides a workaround for https://bugs.python.org/issue41989 Related to #1036. --- markdown/htmlparser.py | 7 +++ tests/test_syntax/blocks/test_html_blocks.py | 47 ++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index f83ddeace..a698814db 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -72,6 +72,13 @@ def reset(self): def close(self): """Handle any buffered data.""" super().close() + if len(self.rawdata): + # Temp fix for https://bugs.python.org/issue41989 + # TODO: remove this when the bug is fixed in all supported Python versions. + if self.convert_charrefs and not self.cdata_elem: + self.handle_data(unescape(self.rawdata)) + else: + self.handle_data(self.rawdata) # Handle any unclosed tags. if len(self._cache): self.cleandoc.append(self.md.htmlStash.store(''.join(self._cache))) diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py index 0a2092d3f..11884e186 100644 --- a/tests/test_syntax/blocks/test_html_blocks.py +++ b/tests/test_syntax/blocks/test_html_blocks.py @@ -1317,3 +1317,50 @@ def text_invalid_tags(self): """ ) ) + + def test_script_tags(self): + self.assertMarkdownRenders( + self.dedent( + """ + + + + """ + ), + self.dedent( + """ + + + + """ + ) + ) + + def test_unclosed_script_tag(self): + # Ensure we have a working fix for https://bugs.python.org/issue41989 + self.assertMarkdownRenders( + self.dedent( + """ + ` tag. + """ + ), + self.dedent( + """ +

Text <script> more text.

+
+ *foo* +
+ +
+ + bar + +
+ +

A new paragraph with a closing </script> tag.

+ """ + ) ) \ No newline at end of file From c99ace1f4059f864ab97767f4219d31a713c5845 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Mon, 12 Oct 2020 13:45:17 -0400 Subject: [PATCH 3/5] release notes --- docs/change_log/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/change_log/index.md b/docs/change_log/index.md index 554864480..3b2eea59c 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -5,6 +5,7 @@ Python-Markdown Change Log Under development: version 3.3.1 (a bug-fix release). +* Correctly parse raw `script` and `style` tags (#1036). * Ensure consistent class handling by `fenced_code` and `codehilite` (#1032). Oct 6, 2020: version 3.3 ([Notes](release-3.3.md)). From 310d506b8369893c494fb81623d97d20e53d14ea Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Mon, 12 Oct 2020 13:59:39 -0400 Subject: [PATCH 4/5] cleanup --- markdown/htmlparser.py | 10 +++++----- tests/test_syntax/blocks/test_html_blocks.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 795e29ba1..a362fd981 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -76,7 +76,7 @@ def close(self): # Temp fix for https://bugs.python.org/issue41989 # TODO: remove this when the bug is fixed in all supported Python versions. if self.convert_charrefs and not self.cdata_elem: - self.handle_data(unescape(self.rawdata)) + self.handle_data(htmlparser.unescape(self.rawdata)) else: self.handle_data(self.rawdata) # Handle any unclosed tags. @@ -215,12 +215,12 @@ def unknown_decl(self, data): # As __startag_text is private, all references to it must be in this subclass. # The last few lines of parse_starttag are reversed so that handle_starttag # can override cdata_mode in certain situations (in a code span). - __starttag_text = None + __starttag_text = None def get_starttag_text(self): """Return full source of start tag: '<...>'.""" return self.__starttag_text - + def parse_starttag(self, i): self.__starttag_text = None endpos = self.check_for_whole_start_tag(i) @@ -243,7 +243,7 @@ def parse_starttag(self, i): if not rest: attrvalue = None elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue[:1] == '"' == attrvalue[-1:]: # noqa: E127 attrvalue = attrvalue[1:-1] if attrvalue: attrvalue = htmlparser.unescape(attrvalue) @@ -256,7 +256,7 @@ def parse_starttag(self, i): if "\n" in self.__starttag_text: lineno = lineno + self.__starttag_text.count("\n") offset = len(self.__starttag_text) \ - - self.__starttag_text.rfind("\n") + - self.__starttag_text.rfind("\n") # noqa: E127 else: offset = offset + len(self.__starttag_text) self.handle_data(rawdata[i:endpos]) diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py index 3b4ad3643..3fea76675 100644 --- a/tests/test_syntax/blocks/test_html_blocks.py +++ b/tests/test_syntax/blocks/test_html_blocks.py @@ -1325,7 +1325,7 @@ def test_script_tags(self): - + @@ -1336,7 +1336,7 @@ def test_script_tags(self): - + @@ -1351,7 +1351,7 @@ def test_unclosed_script_tag(self): """ ` tag. """ ), @@ -1397,8 +1397,8 @@ def test_inline_script_tags(self): bar - +

A new paragraph with a closing </script> tag.

""" ) - ) \ No newline at end of file + ) From 09c5eb60fb063e7cff30b6f847c13e9e7213f781 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Mon, 12 Oct 2020 14:07:53 -0400 Subject: [PATCH 5/5] coverage --- markdown/htmlparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index a362fd981..6776d340f 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -75,7 +75,7 @@ def close(self): if len(self.rawdata): # Temp fix for https://bugs.python.org/issue41989 # TODO: remove this when the bug is fixed in all supported Python versions. - if self.convert_charrefs and not self.cdata_elem: + if self.convert_charrefs and not self.cdata_elem: # pragma: no cover self.handle_data(htmlparser.unescape(self.rawdata)) else: self.handle_data(self.rawdata) @@ -221,7 +221,7 @@ def get_starttag_text(self): """Return full source of start tag: '<...>'.""" return self.__starttag_text - def parse_starttag(self, i): + def parse_starttag(self, i): # pragma: no cover self.__starttag_text = None endpos = self.check_for_whole_start_tag(i) if endpos < 0: