From f4d67ec974cb16307cb787e63ffc4d26ce31e582 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Mon, 12 Oct 2020 11:29:42 -0400
Subject: [PATCH 1/5] Ensure unclosed script tags are parsed correctly

Provides a workaround for https://bugs.python.org/issue41989
Related to #1036.
---
 markdown/htmlparser.py                       |  7 +++
 tests/test_syntax/blocks/test_html_blocks.py | 47 ++++++++++++++++++++
 2 files changed, 54 insertions(+)
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
index f83ddeace..a698814db 100644
--- a/markdown/htmlparser.py
+++ b/markdown/htmlparser.py
@@ -72,6 +72,13 @@ def reset(self):
     def close(self):
         """Handle any buffered data."""
         super().close()
+        if len(self.rawdata):
+            # Temp fix for https://bugs.python.org/issue41989
+            # TODO: remove this when the bug is fixed in all supported Python versions.
+            if self.convert_charrefs and not self.cdata_elem:
+                self.handle_data(unescape(self.rawdata))
+            else:
+                self.handle_data(self.rawdata)
         # Handle any unclosed tags.
         if len(self._cache):
             self.cleandoc.append(self.md.htmlStash.store(''.join(self._cache)))
diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py
index 0a2092d3f..11884e186 100644
--- a/tests/test_syntax/blocks/test_html_blocks.py
+++ b/tests/test_syntax/blocks/test_html_blocks.py
@@ -1317,3 +1317,50 @@ def text_invalid_tags(self):
                 """
             )
         )
+
+    def test_script_tags(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                <script>
+                *random stuff* <div> &amp;
+                </script>
+                
+                <style>
+                **more stuff**
+                </style>
+                """
+            ),
+            self.dedent(
+                """
+                <script>
+                *random stuff* <div> &amp;
+                </script>
+                
+                <style>
+                **more stuff**
+                </style>
+                """
+            )
+        )
+
+    def test_unclosed_script_tag(self):
+        # Ensure we have a working fix for https://bugs.python.org/issue41989
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                <script>
+                *random stuff* <div> &amp;
+                
+                Still part of the *script* tag
+                """
+            ),
+            self.dedent(
+                """
+                <script>
+                *random stuff* <div> &amp;
+                
+                Still part of the *script* tag
+                """
+            )
+        )
\ No newline at end of file

From 961b20509c8224c1492195ef7a8c8259b76427e5 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Mon, 12 Oct 2020 13:10:47 -0400
Subject: [PATCH 2/5] Avoid cdata_mode outside of HTML blocks.

Fixes #1036.
---
 markdown/htmlparser.py                       | 63 ++++++++++++++++++++
 tests/test_syntax/blocks/test_html_blocks.py | 38 ++++++++++++
 2 files changed, 101 insertions(+)

diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
index a698814db..795e29ba1 100644
--- a/markdown/htmlparser.py
+++ b/markdown/htmlparser.py
@@ -131,6 +131,9 @@ def handle_starttag(self, tag, attrs):
             self._cache.append(text)
         else:
             self.cleandoc.append(text)
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                # This is presumably a standalone tag in a code span (see #1036).
+                self.clear_cdata_mode()
 
     def handle_endtag(self, tag):
         text = self.get_endtag_text(tag)
@@ -207,3 +210,63 @@ def handle_pi(self, data):
     def unknown_decl(self, data):
         end = ']]>' if data.startswith('CDATA[') else ']>'
         self.handle_empty_tag('<![{}{}'.format(data, end), is_block=True)
+
+    # The rest has been copied from base class in standard lib to address #1036.
+    # As __startag_text is private, all references to it must be in this subclass.
+    # The last few lines of parse_starttag are reversed so that handle_starttag
+    # can override cdata_mode in certain situations (in a code span).
+        __starttag_text = None
+
+    def get_starttag_text(self):
+        """Return full source of start tag: '<...>'."""
+        return self.__starttag_text
+        
+    def parse_starttag(self, i):
+        self.__starttag_text = None
+        endpos = self.check_for_whole_start_tag(i)
+        if endpos < 0:
+            return endpos
+        rawdata = self.rawdata
+        self.__starttag_text = rawdata[i:endpos]
+
+        # Now parse the data between i+1 and j into a tag and attrs
+        attrs = []
+        match = htmlparser.tagfind_tolerant.match(rawdata, i+1)
+        assert match, 'unexpected call to parse_starttag()'
+        k = match.end()
+        self.lasttag = tag = match.group(1).lower()
+        while k < endpos:
+            m = htmlparser.attrfind_tolerant.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
+            if not rest:
+                attrvalue = None
+            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                attrvalue = attrvalue[1:-1]
+            if attrvalue:
+                attrvalue = htmlparser.unescape(attrvalue)
+            attrs.append((attrname.lower(), attrvalue))
+            k = m.end()
+
+        end = rawdata[k:endpos].strip()
+        if end not in (">", "/>"):
+            lineno, offset = self.getpos()
+            if "\n" in self.__starttag_text:
+                lineno = lineno + self.__starttag_text.count("\n")
+                offset = len(self.__starttag_text) \
+                         - self.__starttag_text.rfind("\n")
+            else:
+                offset = offset + len(self.__starttag_text)
+            self.handle_data(rawdata[i:endpos])
+            return endpos
+        if end.endswith('/>'):
+            # XHTML-style empty tag: <span attr="value" />
+            self.handle_startendtag(tag, attrs)
+        else:
+            # *** set cdata_mode first so we can override it in handle_starttag (see #1036) ***
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                self.set_cdata_mode(tag)
+            self.handle_starttag(tag, attrs)
+        return endpos
diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py
index 11884e186..3b4ad3643 100644
--- a/tests/test_syntax/blocks/test_html_blocks.py
+++ b/tests/test_syntax/blocks/test_html_blocks.py
@@ -1363,4 +1363,42 @@ def test_unclosed_script_tag(self):
                 Still part of the *script* tag
                 """
             )
+        )
+    
+    def test_inline_script_tags(self):
+        # Ensure inline script tags doesn't cause the parser to eat content (see #1036).
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                Text `<script>` more *text*.
+                
+                <div>
+                *foo*
+                </div>
+
+                <div>
+
+                bar
+
+                </div>
+                                
+                A new paragraph with a closing `</script>` tag.
+                """
+            ),
+            self.dedent(
+                """
+                <p>Text <code>&lt;script&gt;</code> more <em>text</em>.</p>
+                <div>
+                *foo*
+                </div>
+
+                <div>
+
+                bar
+
+                </div>
+                
+                <p>A new paragraph with a closing <code>&lt;/script&gt;</code> tag.</p>
+                """
+            )
         )
\ No newline at end of file

From c99ace1f4059f864ab97767f4219d31a713c5845 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Mon, 12 Oct 2020 13:45:17 -0400
Subject: [PATCH 3/5] release notes

---
 docs/change_log/index.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/change_log/index.md b/docs/change_log/index.md
index 554864480..3b2eea59c 100644
--- a/docs/change_log/index.md
+++ b/docs/change_log/index.md
@@ -5,6 +5,7 @@ Python-Markdown Change Log
 
 Under development: version 3.3.1 (a bug-fix release).
 
+* Correctly parse raw `script` and `style` tags (#1036).
 * Ensure consistent class handling by `fenced_code` and `codehilite` (#1032).
 
 Oct 6, 2020: version 3.3 ([Notes](release-3.3.md)).

From 310d506b8369893c494fb81623d97d20e53d14ea Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Mon, 12 Oct 2020 13:59:39 -0400
Subject: [PATCH 4/5] cleanup

---
 markdown/htmlparser.py                       | 10 +++++-----
 tests/test_syntax/blocks/test_html_blocks.py | 18 +++++++++---------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
index 795e29ba1..a362fd981 100644
--- a/markdown/htmlparser.py
+++ b/markdown/htmlparser.py
@@ -76,7 +76,7 @@ def close(self):
             # Temp fix for https://bugs.python.org/issue41989
             # TODO: remove this when the bug is fixed in all supported Python versions.
             if self.convert_charrefs and not self.cdata_elem:
-                self.handle_data(unescape(self.rawdata))
+                self.handle_data(htmlparser.unescape(self.rawdata))
             else:
                 self.handle_data(self.rawdata)
         # Handle any unclosed tags.
@@ -215,12 +215,12 @@ def unknown_decl(self, data):
     # As __startag_text is private, all references to it must be in this subclass.
     # The last few lines of parse_starttag are reversed so that handle_starttag
     # can override cdata_mode in certain situations (in a code span).
-        __starttag_text = None
+    __starttag_text = None
 
     def get_starttag_text(self):
         """Return full source of start tag: '<...>'."""
         return self.__starttag_text
-        
+
     def parse_starttag(self, i):
         self.__starttag_text = None
         endpos = self.check_for_whole_start_tag(i)
@@ -243,7 +243,7 @@ def parse_starttag(self, i):
             if not rest:
                 attrvalue = None
             elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
-                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                 attrvalue[:1] == '"' == attrvalue[-1:]:  # noqa: E127
                 attrvalue = attrvalue[1:-1]
             if attrvalue:
                 attrvalue = htmlparser.unescape(attrvalue)
@@ -256,7 +256,7 @@ def parse_starttag(self, i):
             if "\n" in self.__starttag_text:
                 lineno = lineno + self.__starttag_text.count("\n")
                 offset = len(self.__starttag_text) \
-                         - self.__starttag_text.rfind("\n")
+                         - self.__starttag_text.rfind("\n")  # noqa: E127
             else:
                 offset = offset + len(self.__starttag_text)
             self.handle_data(rawdata[i:endpos])
diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py
index 3b4ad3643..3fea76675 100644
--- a/tests/test_syntax/blocks/test_html_blocks.py
+++ b/tests/test_syntax/blocks/test_html_blocks.py
@@ -1325,7 +1325,7 @@ def test_script_tags(self):
                 <script>
                 *random stuff* <div> &amp;
                 </script>
-                
+
                 <style>
                 **more stuff**
                 </style>
@@ -1336,7 +1336,7 @@ def test_script_tags(self):
                 <script>
                 *random stuff* <div> &amp;
                 </script>
-                
+
                 <style>
                 **more stuff**
                 </style>
@@ -1351,7 +1351,7 @@ def test_unclosed_script_tag(self):
                 """
                 <script>
                 *random stuff* <div> &amp;
-                
+
                 Still part of the *script* tag
                 """
             ),
@@ -1359,19 +1359,19 @@ def test_unclosed_script_tag(self):
                 """
                 <script>
                 *random stuff* <div> &amp;
-                
+
                 Still part of the *script* tag
                 """
             )
         )
-    
+
     def test_inline_script_tags(self):
         # Ensure inline script tags doesn't cause the parser to eat content (see #1036).
         self.assertMarkdownRenders(
             self.dedent(
                 """
                 Text `<script>` more *text*.
-                
+
                 <div>
                 *foo*
                 </div>
@@ -1381,7 +1381,7 @@ def test_inline_script_tags(self):
                 bar
 
                 </div>
-                                
+
                 A new paragraph with a closing `</script>` tag.
                 """
             ),
@@ -1397,8 +1397,8 @@ def test_inline_script_tags(self):
                 bar
 
                 </div>
-                
+
                 <p>A new paragraph with a closing <code>&lt;/script&gt;</code> tag.</p>
                 """
             )
-        )
\ No newline at end of file
+        )

From 09c5eb60fb063e7cff30b6f847c13e9e7213f781 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Mon, 12 Oct 2020 14:07:53 -0400
Subject: [PATCH 5/5] coverage

---
 markdown/htmlparser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
index a362fd981..6776d340f 100644
--- a/markdown/htmlparser.py
+++ b/markdown/htmlparser.py
@@ -75,7 +75,7 @@ def close(self):
         if len(self.rawdata):
             # Temp fix for https://bugs.python.org/issue41989
             # TODO: remove this when the bug is fixed in all supported Python versions.
-            if self.convert_charrefs and not self.cdata_elem:
+            if self.convert_charrefs and not self.cdata_elem:  # pragma: no cover
                 self.handle_data(htmlparser.unescape(self.rawdata))
             else:
                 self.handle_data(self.rawdata)
@@ -221,7 +221,7 @@ def get_starttag_text(self):
         """Return full source of start tag: '<...>'."""
         return self.__starttag_text
 
-    def parse_starttag(self, i):
+    def parse_starttag(self, i):  # pragma: no cover
         self.__starttag_text = None
         endpos = self.check_for_whole_start_tag(i)
         if endpos < 0: