From c6f9c5df8c05e211c35e43c14f19cf27dfd9f447 Mon Sep 17 00:00:00 2001 From: Stephen Altamirano Date: Thu, 4 Mar 2021 17:39:03 -0800 Subject: [PATCH 1/2] Support non-http schemas in url refs These url schemes were already valid for inline linking. This adds the ability to use them as url references as well. --- tests/test_getRefs.py | 10 ++++++++++ textile/core.py | 10 ++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/test_getRefs.py b/tests/test_getRefs.py index f6e0ae4f..d3cfcd72 100644 --- a/tests/test_getRefs.py +++ b/tests/test_getRefs.py @@ -9,3 +9,13 @@ def test_getRefs(): result = t.urlrefs expect = {'Google': 'http://www.google.com'} assert result == expect + + t2 = Textile() + + result = t2.getRefs("my ftp [ftp]ftp://example.com") + expect = 'my ftp ' + assert result == expect + + result = t2.urlrefs + expect = {'ftp': 'ftp://example.com'} + assert result == expect diff --git a/textile/core.py b/textile/core.py index 3053969d..4ccb83f9 100644 --- a/textile/core.py +++ b/textile/core.py @@ -612,8 +612,14 @@ def glyphs(self, text): def getRefs(self, text): """Capture and store URL references in self.urlrefs.""" - pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', - re.U) + all_schemes = '|'.join([ + '(?:{0})'.format(scheme) + for scheme in self.url_schemes + ]) + pattern = re.compile( + r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:{0}:\/\/|\/)\S+)(?=\s|$)'.format(all_schemes), + re.U + ) text = pattern.sub(self.refs, text) return text From 6b787c951b65b5850f598ff348ef3b5402048177 Mon Sep 17 00:00:00 2001 From: Stephen Altamirano Date: Sun, 7 Mar 2021 14:33:32 -0800 Subject: [PATCH 2/2] Lift urlref regex creation to Textile constructor --- textile/core.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/textile/core.py b/textile/core.py index 4ccb83f9..7b66af02 100644 --- a/textile/core.py +++ b/textile/core.py @@ -212,6 +212,15 @@ def __init__(self, restricted=False, lite=False, noimage=False, else: self.url_schemes = self.unrestricted_url_schemes + all_schemes_re_s = '|'.join([ + '(?:{0})'.format(scheme) + for scheme in self.url_schemes + ]) + self.url_ref_regex = re.compile( + r'(?:(?<=^)|(?<=\s))\[(.+)\]\s?((?:{0}:\/\/|\/)\S+)(?=\s|$)'.format(all_schemes_re_s), + re.U + ) + def parse(self, text, rel=None, sanitize=False): """Parse the input text as textile and return html output.""" self.notes = OrderedDict() @@ -612,16 +621,7 @@ def glyphs(self, text): def getRefs(self, text): """Capture and store URL references in self.urlrefs.""" - all_schemes = '|'.join([ - '(?:{0})'.format(scheme) - for scheme in self.url_schemes - ]) - pattern = re.compile( - r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:{0}:\/\/|\/)\S+)(?=\s|$)'.format(all_schemes), - re.U - ) - text = pattern.sub(self.refs, text) - return text + return self.url_ref_regex.sub(self.refs, text) def refs(self, match): flag, url = match.groups()