From 254c35c45947ae7a6b22c6fe7d07aa96cce50b0c Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 9 Feb 2023 23:02:20 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8=20NEW:=20extended=20URL=20link=20?= =?UTF-8?q?customisation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `myst_url_schemes` configuration has been extended, in a back-compatible manner, to also allow customisation of the link URL/text, via templates. The `inline_attrs` extension also allows for specific links to be marked as external, using `{.external}`. --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 90 ++++++------- docs/conf.py | 23 ++++ docs/syntax/syntax.md | 70 ++++++++-- myst_parser/_docs.py | 4 +- myst_parser/config/main.py | 125 ++++++++++++++---- myst_parser/mdit_to_docutils/base.py | 113 +++++++++++----- myst_parser/mdit_to_docutils/utils.py | 37 ------ myst_parser/mocking.py | 9 +- myst_parser/parsers/docutils_.py | 69 +++++++++- myst_parser/sphinx_ext/directives.py | 4 +- myst_parser/sphinx_ext/main.py | 4 +- pyproject.toml | 4 +- .../fixtures/mock_include_errors.md | 3 +- tests/test_renderers/fixtures/myst-config.txt | 51 +++++++ .../fixtures/reporter_warnings.md | 2 +- tests/test_renderers/test_myst_config.py | 3 + 17 files changed, 442 insertions(+), 171 deletions(-) delete mode 100644 myst_parser/mdit_to_docutils/utils.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a88e8263..4c4f87a4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,7 +26,7 @@ repos: args: [--py37-plus] - repo: https://github.com/PyCQA/isort - rev: 5.11.4 + rev: 5.12.0 hooks: - id: isort diff --git a/CHANGELOG.md b/CHANGELOG.md index 70bd6e94..a4e31352 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,8 @@ Full Changelog: [v0.18.0...v0.18.1](https://github.com/executablebooks/MyST-Parser/compare/v0.18.0...v0.18.1) -- ⬆️ UPGRADE: docutils 0.19 support (#611) -- ✨ NEW: Add `attrs_image` (experimental) extension (#620) +- ⬆️ UPGRADE: docutils 0.19 support in +- ✨ NEW: Add `attrs_image` (experimental) extension in - e.g. `![image](image.png){#id .class width=100px}` - See: [Optional syntax section](docs/syntax/optional.md) - **Important**: This is an experimental extension, and may change in future releases @@ -53,24 +53,24 @@ myst: ### Key PRs -- ♻️📚 Restructure code base and documentation (#566) -- ⬆️ Drop Sphinx 3 and add Sphinx 5 support (#579) -- 🐛 FIX: `parse_directive_text` when body followed by options (#580) -- 🐛 FIX: floor table column widths to integers (#568), thanks to @Jean-Abou-Samra! +- ♻️📚 Restructure code base and documentation () +- ⬆️ Drop Sphinx 3 and add Sphinx 5 support () +- 🐛 FIX: `parse_directive_text` when body followed by options () +- 🐛 FIX: floor table column widths to integers (), thanks to ! ## 0.17.2 - 2022-04-17 Full Changelog: [v0.17.1...v0.17.2](https://github.com/executablebooks/MyST-Parser/compare/v0.17.1...v0.17.2) -- ♻️ REFACTOR: Replace `attrs` by `dataclasses` for configuration (#557) +- ♻️ REFACTOR: Replace `attrs` by `dataclasses` for configuration () ## 0.17.1 - 2022-04-15 Full Changelog: [v0.17.0...v0.17.1](https://github.com/executablebooks/MyST-Parser/compare/v0.17.0...v0.17.1) -- 🐛 FIX: Heading anchor resolution for parallel builds (#525) -- 🔧 MAINTAIN: Move packaging from setuptools to flit (#553) -- 🔧 MAINTAIN: Directly specify attrs dependency (#555) +- 🐛 FIX: Heading anchor resolution for parallel builds () +- 🔧 MAINTAIN: Move packaging from setuptools to flit () +- 🔧 MAINTAIN: Directly specify attrs dependency () ## 0.17.0 - 2022-02-11 @@ -153,18 +153,18 @@ Now the warning is still emitted, but also the heading is rendered as a [rubric] Other internal improvements primarily focused in improving support for the for "docutils-only" use, introduced in `v0.16`: -- ♻️ REFACTOR: `default_parser` -> `create_md_parser` in [#474](https://github.com/executablebooks/MyST-Parser/pull/474) -- 👌 IMPROVE: Add `bullet` attribute to `bullet_list` node in [#465](https://github.com/executablebooks/MyST-Parser/pull/465) -- 👌 IMPROVE: Use correct renderer for `state.inline_text` in [#466](https://github.com/executablebooks/MyST-Parser/pull/466) -- 👌 IMPROVE: Docutils parser settings in [#476](https://github.com/executablebooks/MyST-Parser/pull/476) -- 🐛 FIX: front-matter rendering with docutils in [#477](https://github.com/executablebooks/MyST-Parser/pull/477) -- 👌 IMPROVE: Code block highlighting in [#478](https://github.com/executablebooks/MyST-Parser/pull/478) -- 👌 IMPROVE: `note_refname` for docutils internal links in [#481](https://github.com/executablebooks/MyST-Parser/pull/481) -- 🐛 FIX: Ordered list starting number in [#483](https://github.com/executablebooks/MyST-Parser/pull/483) -- 👌 IMPROVE: Propagate enumerated list suffix in [#484](https://github.com/executablebooks/MyST-Parser/pull/484) -- 👌 IMPROVE: `DocutilsRenderer.create_highlighted_code_block` in [#488](https://github.com/executablebooks/MyST-Parser/pull/488) -- 🐛 FIX: Source line reporting for nested parsing in [#490](https://github.com/executablebooks/MyST-Parser/pull/490) -- 🔧 MAINTAIN: Implement `MockInliner.parse` in [#504](https://github.com/executablebooks/MyST-Parser/pull/504) +- ♻️ REFACTOR: `default_parser` -> `create_md_parser` in +- 👌 IMPROVE: Add `bullet` attribute to `bullet_list` node in +- 👌 IMPROVE: Use correct renderer for `state.inline_text` in +- 👌 IMPROVE: Docutils parser settings in +- 🐛 FIX: front-matter rendering with docutils in +- 👌 IMPROVE: Code block highlighting in +- 👌 IMPROVE: `note_refname` for docutils internal links in +- 🐛 FIX: Ordered list starting number in +- 👌 IMPROVE: Propagate enumerated list suffix in +- 👌 IMPROVE: `DocutilsRenderer.create_highlighted_code_block` in +- 🐛 FIX: Source line reporting for nested parsing in +- 🔧 MAINTAIN: Implement `MockInliner.parse` in ## 0.16.1 - 2021-12-16 @@ -215,7 +215,7 @@ which includes no direct install requirements on docutils or sphinx. See [MyST with Docutils](docs/docutils.md) for more information. -Thanks to help from [@cpitclaudel](https://github.com/cpitclaudel)! +Thanks to help from ! ### Include MyST files in RST files @@ -295,23 +295,23 @@ See [Tables syntax](docs/syntax/syntax.md#tables) for more information. ### Pull Requests -- 🐛 FIX: Add mandatory attributes on `enumerated_list` by @cpitclaudel in [#418](https://github.com/executablebooks/MyST-Parser/pull/418) -- 📚 DOCS: Add reference to MySTyc in landing page by @astrojuanlu in [#413](https://github.com/executablebooks/MyST-Parser/pull/413) -- ⬆️ UPGRADE: markdown-it-py v2, mdit-py-plugins v0.3 by @chrisjsewell in [#449](https://github.com/executablebooks/MyST-Parser/pull/449) -- 👌 IMPROVE: Table rendering by @chrisjsewell in [#450](https://github.com/executablebooks/MyST-Parser/pull/450) -- 🐛 FIX: Ensure parent files are re-built if `include` file changes by @chrisjsewell in [#451](https://github.com/executablebooks/MyST-Parser/pull/451) -- 🐛 FIX: Convert empty directive option to `None` by @chrisjsewell in [#452](https://github.com/executablebooks/MyST-Parser/pull/452) -- 👌 IMPROVE: Add `\\` for hard-breaks in latex by @chrisjsewell in [#453](https://github.com/executablebooks/MyST-Parser/pull/453) -- 🔧 MAINTAIN: Remove empty "sphinx" extra by @hukkin in [#350](https://github.com/executablebooks/MyST-Parser/pull/350) -- ✨ NEW: Add `fieldlist` extension by @chrisjsewell in [#455](https://github.com/executablebooks/MyST-Parser/pull/455) -- ✨ NEW: Add Docutils MyST config and CLI by @cpitclaudel in [#426](https://github.com/executablebooks/MyST-Parser/pull/426) -- 🔧 MAINTAIN: Add publishing job for `myst-docutils` by @chrisjsewell in [#456](https://github.com/executablebooks/MyST-Parser/pull/456) -- 🧪 TESTS: Add for `gettext_additional_targets` by @jpmckinney in [#459](https://github.com/executablebooks/MyST-Parser/pull/459) +- 🐛 FIX: Add mandatory attributes on `enumerated_list` by in +- 📚 DOCS: Add reference to MySTyc in landing page by in +- ⬆️ UPGRADE: markdown-it-py v2, mdit-py-plugins v0.3 by in +- 👌 IMPROVE: Table rendering by in +- 🐛 FIX: Ensure parent files are re-built if `include` file changes by in +- 🐛 FIX: Convert empty directive option to `None` by in +- 👌 IMPROVE: Add `\\` for hard-breaks in latex by in +- 🔧 MAINTAIN: Remove empty "sphinx" extra by in +- ✨ NEW: Add `fieldlist` extension by in +- ✨ NEW: Add Docutils MyST config and CLI by in +- 🔧 MAINTAIN: Add publishing job for `myst-docutils` by in +- 🧪 TESTS: Add for `gettext_additional_targets` by in ### New Contributors -- @cpitclaudel made their first contribution in [#418](https://github.com/executablebooks/MyST-Parser/pull/418) -- @astrojuanlu made their first contribution in [#413](https://github.com/executablebooks/MyST-Parser/pull/413) +- made their first contribution in +- made their first contribution in **Full Changelog**: @@ -352,7 +352,7 @@ Set `myst_url_schemes = None`, to revert to the previous default. Use this option to specify a custom function to auto-generate heading anchors (see [Auto-generated header anchors](docs/syntax/optional.md#auto-generated-header-anchors)). -Thanks to [@jpmckinney](https://github.com/jpmckinney)! +Thanks to ! ## 0.14.0 - 2021-05-04 @@ -433,7 +433,7 @@ A warning (of type `myst.nested_header`) is now emitted when this occurs. - 👌 IMPROVE: Store `rawtext` in AST nodes: We now ensure that the raw text is propagated from the Markdown tokens to the Sphinx AST. In particular, this is required by the `gettext` builder, to generate translation POT templates. - Thanks to [@jpmckinney](https://github.com/jpmckinney)! + Thanks to ! - ✨ NEW: Add warning types `myst.subtype`: All parsing warnings are assigned a type/subtype, and also the messages are appended with them. These warning types can be suppressed with the sphinx `suppress_warnings` config option. @@ -481,7 +481,7 @@ Minor fixes: : Add isort and mypy type checking to code base. -(thanks to contributors @akhmerov, @tfiers) +(thanks to contributors , ) ## 0.13.1 - 2020-12-31 @@ -576,7 +576,7 @@ I’m an inline image: ## 0.12.10 - 2020-09-21 🐛 FIX: allow dates to be parsed in frontmatter. -: This fixes a bug that would raise errors at parse time if non-string date objects were in front-matter YAML. See [#253](https://github.com/executablebooks/MyST-Parser/pull/253) +: This fixes a bug that would raise errors at parse time if non-string date objects were in front-matter YAML. See ## 0.12.9 - 2020-09-08 @@ -641,7 +641,7 @@ See the [Definition Lists documentation](https://myst-parser.readthedocs.io/en/l - How-to [include rST files into a Markdown file](https://myst-parser.readthedocs.io/en/latest/using/howto.html#include-rst-files-into-a-markdown-file) - How-to [Use sphinx.ext.autodoc in Markdown files](https://myst-parser.readthedocs.io/en/latest/using/howto.html#use-sphinx-ext-autodoc-in-markdown-files) - Thanks to [@stephenroller](https://github.com/stephenroller) for the contribution 🎉 + Thanks to for the contribution 🎉 ## 0.12.1 - 2020-08-19 @@ -709,9 +709,9 @@ More configuration options for math parsing (see [MyST configuration options](ht ### Improved 👌 -* Support Sphinx version 3 [#197](https://github.com/executablebooks/MyST-Parser/pull/197) ([@chrisjsewell](https://github.com/chrisjsewell)) -* Update Trove Classifiers [#192](https://github.com/executablebooks/MyST-Parser/pull/192) ([@chrisjsewell](https://github.com/chrisjsewell)) -* Add functionality to use docutils specialized role [#189](https://github.com/executablebooks/MyST-Parser/pull/189) ([@chrisjsewell](https://github.com/chrisjsewell)) +* Support Sphinx version 3 in () +* Update Trove Classifiers in () +* Add functionality to use docutils specialized role in () ### Contributors to this release diff --git a/docs/conf.py b/docs/conf.py index 0bcab599..2a931d41 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -102,6 +102,29 @@ "attrs_inline", "inv_link", ] +myst_url_schemes = { + "http": None, + "https": None, + "mailto": None, + "ftp": None, + "wiki": "https://en.wikipedia.org/wiki/{{path}}#{{fragment}}", + "doi": "https://doi.org/{{path}}", + "gh-pr": { + "url": "https://github.com/executablebooks/MyST-Parser/pull/{{path}}#{{fragment}}", + "title": "PR #{{path}}", + "classes": ["github"], + }, + "gh-issue": { + "url": "https://github.com/executablebooks/MyST-Parser/issue/{{path}}#{{fragment}}", + "title": "Issue #{{path}}", + "classes": ["github"], + }, + "gh-user": { + "url": "https://github.com/{{path}}", + "title": "@{{path}}", + "classes": ["github"], + }, +} myst_number_code_blocks = ["typescript"] myst_heading_anchors = 2 myst_footnote_transition = True diff --git a/docs/syntax/syntax.md b/docs/syntax/syntax.md index c2f5b277..9da01eb9 100644 --- a/docs/syntax/syntax.md +++ b/docs/syntax/syntax.md @@ -301,20 +301,70 @@ Here are some examples: ::: -### Customising destination resolution +### Customising external URL resolution -You can customise the default destination resolution rules by setting the following [configuration options](../configuration.md): +:::{versionadded} 0.19 +`myst_url_schemes` now allows for customising how the links are converted to URLs, +and the `attrs_inline` extension can be used to specify certain links as external. +::: + +By default, all links which begin with `http:`, `https:`, `ftp:`, or `mailto:` will be treated as external [URL] links. +You can customise this behaviour in a number of ways using [configuration options](../configuration.md). + +Most simply, by setting the `myst_all_links_external` configuration option to `True`, +all links will be treated as external [URL] links. + +To apply selectively to specific links, you can enable the [attrs_inline](optional.md#inline-attributes) extension, +then add an `external` class to the link.\ +For example, `[my-external-link](my-external-link){.external}` becomes [my-external-link](my-external-link){.external}. -`myst_all_links_external` (default: `False`) -: If `True`, then all links will be treated as external links. +To specify a custom list of URL schemes, you can set the `myst_url_schemes` configuration option. +By default this is set to `["http", "https", "ftp", "mailto"]`. + +As well as being a list of strings, `myst_url_schemes` can also be a dictionary, +where the keys are the URL schemes, and the values define how the links are converted to URLs. +This allows you to customise the conversion of links to URLs for specific schemes, for example: + +```python +myst_url_schemes = { + "http": None, + "https": None, + "wiki": "https://en.wikipedia.org/wiki/{{path}}#{{fragment}}", + "doi": "https://doi.org/{{path}}", + "gh-issue": { + "url": "https://github.com/executablebooks/MyST-Parser/issue/{{path}}#{{fragment}}", + "title": "Issue #{{path}}", + "classes": ["github"], + }, +} +``` -`myst_url_schemes` (default: `["http", "https", "ftp", "mailto"]`) -: A list of [URL] schemes which will be treated as external links. +Allows for links such as: + +- `[URI](wiki:Uniform_Resource_Identifier#URI_references)` is converted to [URI](wiki:Uniform_Resource_Identifier#URI_references) +- `` is converted to +- `` is converted to + +:::{tip} +You can also use the [sphinx-tippy](https://sphinx-tippy.readthedocs.io) extension to add rich "hover" tooltips to links. + +Adding the `github` class above integrates well the [pydata-sphinx-theme's GitHub link formatting](https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/theme-elements.html#link-shortening-for-git-repository-services) +::: + +The value of each scheme can be: + +- `None`: the link is converted directly to an external URL. +- A string: the link is converted to an external URL using the string as a template. +- A dictionary: the link is converted to an external URL using the dictionary’s `url` key as a template. + - The (optional) `title` key is a template for the link’s implicit title, i.e. it is used if the link has no explicit title. + - The (optional) `classes` key is a list of classes to add to the link. + +The templates for `url` and `title` can use variables (enclosed by `{{ }}`), which are substituted for the corresponding parts of the link `:///;?#`. +For example: -`myst_ref_domains` (default: `[]`) -: A list of [sphinx domains](inv:sphinx#domain) which will be allowed for internal links. - For example, `myst_ref_domains = ("std", "py")` will only allow cross-references to `std` and `py` domains. - If the list is empty, then all domains will be allowed. +- `scheme`: the URL scheme, e.g. `wiki`. +- `path`: the path part of the URL, e.g. `Uniform_Resource_Identifier`. +- `fragment`: the fragment part of the URL, e.g. `URI_references`. (syntax/inv_links)= ### Cross-project (inventory) links diff --git a/myst_parser/_docs.py b/myst_parser/_docs.py index da4ba41a..85b9e633 100644 --- a/myst_parser/_docs.py +++ b/myst_parser/_docs.py @@ -76,7 +76,7 @@ def run(self): continue # filter by sphinx options - if "sphinx" in self.options and field.metadata.get("docutils_only"): + if "sphinx" in self.options and "sphinx" in field.metadata.get("omit", []): continue if "extensions" in self.options: @@ -98,7 +98,7 @@ def run(self): if field.metadata.get("extension"): description = f"{field.metadata.get('extension')}: {description}" default = self.field_default(value) - ctype = self.field_type(field) + ctype = field.metadata.get("doc_type") or self.field_type(field) text.extend( [ f"* - `{name}`", diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py index 98ed71c1..47bd05a5 100644 --- a/myst_parser/config/main.py +++ b/myst_parser/config/main.py @@ -6,13 +6,15 @@ Dict, Iterable, Iterator, + List, Optional, Sequence, + Set, Tuple, Union, - cast, ) +from myst_parser._compat import TypedDict from myst_parser.warnings_ import MystWarnings from .dc_validators import ( deep_iterable, @@ -26,7 +28,7 @@ ) -def check_extensions(_, field: dc.Field, value: Any): +def check_extensions(inst: "MdParserConfig", field: dc.Field, value: Any) -> None: """Check that the extensions are a list of known strings""" if not isinstance(value, Iterable): raise TypeError(f"'{field.name}' not iterable: {value}") @@ -52,9 +54,68 @@ def check_extensions(_, field: dc.Field, value: Any): ) if diff: raise ValueError(f"'{field.name}' items not recognised: {diff}") + setattr(inst, field.name, set(value)) -def check_sub_delimiters(_, field: dc.Field, value: Any): +class UrlSchemeType(TypedDict, total=False): + """Type of the external schemes dictionary.""" + + url: str # TODO add Required when python version supports it + title: str + classes: List[str] + + +def check_url_schemes(inst: "MdParserConfig", field: dc.Field, value: Any) -> None: + """Check that the external schemes are of the right format.""" + if isinstance(value, (list, tuple)): + if not all(isinstance(v, str) for v in value): + raise TypeError(f"'{field.name}' is not a list of strings: {value!r}") + value = {v: None for v in value} + + if not isinstance(value, dict): + raise TypeError(f"'{field.name}' is not a dictionary: {value!r}") + + new_dict: Dict[str, Optional[UrlSchemeType]] = {} + for key, val in value.items(): + if not isinstance(key, str): + raise TypeError(f"'{field.name}' key is not a string: {key!r}") + if val is None: + new_dict[key] = val + elif isinstance(val, str): + new_dict[key] = {"url": val} + elif isinstance(val, dict): + if not all(isinstance(k, str) for k in val): + raise TypeError(f"'{field.name}[{key}]' keys are not strings: {val!r}") + if "url" not in val: + raise TypeError( + f"'{field.name}[{key}]' does not contain a 'url' key: {val!r}" + ) + if not isinstance(val["url"], str): + raise TypeError( + f"'{field.name}[{key}][url]' is not a string: {val['url']!r}" + ) + if "title" in val and not isinstance(val["title"], str): + raise TypeError( + f"'{field.name}[{key}][title]' is not a string: {val['title']!r}" + ) + if ( + "classes" in val + and not isinstance(val["classes"], list) + and not all(isinstance(c, str) for c in val["classes"]) + ): + raise TypeError( + f"'{field.name}[{key}][classes]' is not a list of str: {val['classes']!r}" + ) + new_dict[key] = val # type: ignore + else: + raise TypeError( + f"'{field.name}[{key}]' value is not a string or dict: {val!r}" + ) + + setattr(inst, field.name, new_dict) + + +def check_sub_delimiters(_: "MdParserConfig", field: dc.Field, value: Any) -> None: """Check that the sub_delimiters are a tuple of length 2 of strings of length 1""" if (not isinstance(value, (tuple, list))) or len(value) != 2: raise TypeError(f"'{field.name}' is not a tuple of length 2: {value}") @@ -65,7 +126,7 @@ def check_sub_delimiters(_, field: dc.Field, value: Any): ) -def check_inventories(_, field: dc.Field, value: Any): +def check_inventories(_: "MdParserConfig", field: dc.Field, value: Any) -> None: """Check that the inventories are a dict of {str: (str, Optional[str])}""" if not isinstance(value, dict): raise TypeError(f"'{field.name}' is not a dictionary: {value!r}") @@ -89,6 +150,18 @@ class MdParserConfig: Note in the sphinx configuration these option names are prepended with ``myst_`` """ + def __repr__(self) -> str: + """Return a string representation of the config.""" + # this replicates the auto-generated __repr__, + # but also allows for a repr function to be defined on the field + attributes: List[str] = [] + for name, val, f in self.as_triple(): + if not f.repr: + continue + val_str = f.metadata.get("repr_func", repr)(val) + attributes.append(f"{name}={val_str}") + return f"{self.__class__.__name__}({', '.join(attributes)})" + # TODO replace commonmark_only, gfm_only with a single option commonmark_only: bool = dc.field( @@ -106,8 +179,8 @@ class MdParserConfig: }, ) - enable_extensions: Sequence[str] = dc.field( - default_factory=list, + enable_extensions: Set[str] = dc.field( + default_factory=set, metadata={"validator": check_extensions, "help": "Enable syntax extensions"}, ) @@ -127,14 +200,19 @@ class MdParserConfig: }, ) - # see https://en.wikipedia.org/wiki/List_of_URI_schemes - url_schemes: Optional[Iterable[str]] = dc.field( - default=cast(Optional[Iterable[str]], ("http", "https", "mailto", "ftp")), + url_schemes: Dict[str, Optional[UrlSchemeType]] = dc.field( + default_factory=lambda: { + "http": None, + "https": None, + "mailto": None, + "ftp": None, + }, metadata={ - "validator": optional( - deep_iterable(instance_of(str), instance_of((list, tuple))) - ), - "help": "URL scheme prefixes identified as external links", + "validator": check_url_schemes, + "help": "URI schemes that are converted to external links", + "repr_func": lambda v: repr(tuple(v)), + # Note, lists of strings will be coerced to dicts in the validator + "doc_type": "list[str] | dict[str, None | str | dict]", }, ) @@ -145,7 +223,7 @@ class MdParserConfig: deep_iterable(instance_of(str), instance_of((list, tuple))) ), "help": "Sphinx domain names to search in for link references", - "sphinx_only": True, + "omit": ["docutils"], }, ) @@ -179,19 +257,19 @@ class MdParserConfig: "validator": optional(is_callable), "help": "Function for creating heading anchors", "global_only": True, - "sphinx_only": True, # TODO docutils config doesn't handle callables + "omit": ["docutils"], # TODO docutils config doesn't handle callables }, ) html_meta: Dict[str, str] = dc.field( default_factory=dict, - repr=False, metadata={ "validator": deep_mapping( instance_of(str), instance_of(str), instance_of(dict) ), "merge_topmatter": True, "help": "HTML meta tags", + "repr_func": lambda v: f"{{{', '.join(f'{k}: ...' for k in v)}}}", }, ) @@ -215,7 +293,6 @@ class MdParserConfig: substitutions: Dict[str, Union[str, int, float]] = dc.field( default_factory=dict, - repr=False, metadata={ "validator": deep_mapping( instance_of(str), instance_of((str, int, float)), instance_of(dict) @@ -223,16 +300,18 @@ class MdParserConfig: "merge_topmatter": True, "help": "Substitutions mapping", "extension": "substitutions", + "repr_func": lambda v: f"{{{', '.join(f'{k}: ...' for k in v)}}}", }, ) sub_delimiters: Tuple[str, str] = dc.field( default=("{", "}"), + repr=False, metadata={ "validator": check_sub_delimiters, "help": "Substitution delimiters", "extension": "substitutions", - "sphinx_only": True, + "omit": ["docutils"], }, ) @@ -285,7 +364,7 @@ class MdParserConfig: "help": "Update sphinx.ext.mathjax configuration to ignore `$` delimiters", "extension": "dollarmath", "global_only": True, - "sphinx_only": True, + "omit": ["docutils"], }, ) @@ -296,7 +375,7 @@ class MdParserConfig: "help": "MathJax classes to add to math HTML", "extension": "dollarmath", "global_only": True, - "sphinx_only": True, + "omit": ["docutils"], }, ) @@ -316,7 +395,7 @@ class MdParserConfig: metadata={ "validator": deep_iterable(instance_of(str), instance_of((list, tuple))), "help": "A list of warning types to suppress warning messages", - "docutils_only": True, + "omit": ["sphinx"], "global_only": True, }, ) @@ -326,7 +405,7 @@ class MdParserConfig: metadata={ "validator": instance_of(bool), "help": "Syntax highlight code blocks with pygments", - "docutils_only": True, + "omit": ["sphinx"], }, ) @@ -336,7 +415,7 @@ class MdParserConfig: metadata={ "validator": check_inventories, "help": "Mapping of key to (url, inv file), for intra-project referencing", - "docutils_only": True, + "omit": ["sphinx"], "global_only": True, }, ) diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py index 93639ed0..d69c614b 100644 --- a/myst_parser/mdit_to_docutils/base.py +++ b/myst_parser/mdit_to_docutils/base.py @@ -43,7 +43,7 @@ from myst_parser import inventory from myst_parser._compat import findall -from myst_parser.config.main import MdParserConfig +from myst_parser.config.main import MdParserConfig, UrlSchemeType from myst_parser.mocking import ( MockIncludeDirective, MockingError, @@ -55,7 +55,6 @@ from myst_parser.parsers.directives import MarkupError, parse_directive_text from myst_parser.warnings_ import MystWarnings, create_warning from .html_to_nodes import html_to_nodes -from .utils import is_external_url if TYPE_CHECKING: from sphinx.environment import BuildEnvironment @@ -67,6 +66,14 @@ def make_document(source_path="notset", parser_cls=RSTParser) -> nodes.document: return new_document(source_path, settings=settings) +REGEX_SCHEME = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.-]*):") +"""RFC 7595: A non-empty scheme component followed by a colon (:), +consisting of a sequence of characters beginning with a letter +and followed by any combination of letters, digits, plus (+), period (.), or hyphen (-). +Although schemes are case-insensitive, the canonical form is lowercase +and documents that specify schemes must do so with lowercase letters. +""" +REGEX_URI_TEMPLATE = re.compile(r"{{\s*(scheme|netloc|path|params|query|fragment)\s*}}") REGEX_DIRECTIVE_START = re.compile(r"^[\s]{0,3}([`]{3,10}|[~]{3,10}|[:]{3,10})\{") @@ -774,11 +781,15 @@ def render_heading(self, token: SyntaxTreeNode) -> None: def render_link(self, token: SyntaxTreeNode) -> None: """Parse `` or `[text](link "title")` syntax to docutils AST: - - If `<>` autolink, forward to `render_autolink` - If `myst_all_links_external` is True, forward to `render_external_url` - - If link is an external URL, forward to `render_external_url` - - External URLs start with a scheme (e.g. `http:`) in `myst_url_schemes`, - or any scheme if `myst_url_schemes` is None. + - If the link token has a class attribute containing `external`, + forward to `render_external_url` + - If the link is an id link (e.g. `#id`), forward to `render_id_link` + - If the link has a schema, and the schema is in `url_schemes` (e.g. `http:`), + forward to `render_external_url` + - If the link has an `inv:` schema, and `inv_link` is enabled, + forward to `render_inventory_link` + - If the link is an autolink/linkify type link, forward to `render_external_url` - Otherwise, forward to `render_internal_link` """ if ( @@ -788,29 +799,30 @@ def render_link(self, token: SyntaxTreeNode) -> None: ): return self.render_external_url(token) - href = cast(str, token.attrGet("href") or "") + if "class" in token.attrs and "external" in str(token.attrs["class"]).split(): + return self.render_external_url(token) + href = cast(str, token.attrGet("href") or "") if href.startswith("#"): return self.render_id_link(token) - # TODO ideally whether inv_link is enabled could be precomputed - if "inv_link" in self.md_config.enable_extensions and href.startswith("inv:"): - return self.create_inventory_link(token) + scheme_match = REGEX_SCHEME.match(href) + scheme = None if scheme_match is None else scheme_match.group(1) + if scheme in self.md_config.url_schemes: + return self.render_external_url(token, self.md_config.url_schemes[scheme]) - if token.info == "auto": # handles both autolink and linkify - return self.render_external_url(token) + if scheme == "inv" and "inv_link" in self.md_config.enable_extensions: + return self.render_inventory_link(token) - # Check for external URL - url_scheme = urlparse(href).scheme - allowed_url_schemes = self.md_config.url_schemes - if (allowed_url_schemes is None and url_scheme) or ( - allowed_url_schemes is not None and url_scheme in allowed_url_schemes - ): + if token.info == "auto": + # handles both autolink and linkify, these are currently never internal return self.render_external_url(token) return self.render_internal_link(token) - def render_external_url(self, token: SyntaxTreeNode) -> None: + def render_external_url( + self, token: SyntaxTreeNode, conversion: None | UrlSchemeType = None + ) -> None: """Render link token (including autolink and linkify), where the link has been identified as an external URL. """ @@ -819,9 +831,44 @@ def render_external_url(self, token: SyntaxTreeNode) -> None: self.copy_attributes( token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"} ) - ref_node["refuri"] = escapeHtml(token.attrGet("href") or "") # type: ignore[arg-type] - with self.current_node_context(ref_node, append=True): - self.render_children(token) + uri = cast(str, token.attrGet("href") or "") + implicit_text: str | None = None + + if conversion is not None: + # implicit_template: str | None = None + # if isinstance(conversion, (list, tuple)): + # href_template, implicit_template = conversion + # else: + # href_template = conversion + # markdown-it encodes unsafe characters with percent-encoding + # we want to get back the original, source input + uri = self.md.normalizeLinkText(uri) + parsed = urlparse(uri) + # Note we specifically do not use jinja2 here, + # to restrict the scope of the templating language, + # so that it can be used in a language agnostic way + uri = re.sub( + REGEX_URI_TEMPLATE, + lambda match: getattr(parsed, match.group(1), ""), + conversion["url"], + ) + uri = self.md.normalizeLink(uri) + if "title" in conversion and (token.info == "auto" or not token.children): + implicit_text = re.sub( + REGEX_URI_TEMPLATE, + lambda match: getattr(parsed, match.group(1), ""), + conversion["title"], + ) + if "classes" in conversion: + ref_node["classes"].extend(conversion["classes"]) + + ref_node["refuri"] = escapeHtml(uri) + if implicit_text is not None: + with self.current_node_context(ref_node, append=True): + self.current_node.append(nodes.Text(implicit_text)) + else: + with self.current_node_context(ref_node, append=True): + self.render_children(token) def render_id_link(self, token: SyntaxTreeNode) -> None: """Render link token like `[text](#id)`, to a local target.""" @@ -856,7 +903,7 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None: with self.current_node_context(ref_node, append=True): self.render_children(token) - def create_inventory_link(self, token: SyntaxTreeNode) -> None: + def render_inventory_link(self, token: SyntaxTreeNode) -> None: r"""Create a link to an inventory object. This assumes the href is of the form `:#`. @@ -867,14 +914,12 @@ def create_inventory_link(self, token: SyntaxTreeNode) -> None: `\*` is treated as a plain `*`. """ - # account for autolinks - if token.info == "auto": - # autolinks escape the HTML, which we don't want - href = token.children[0].content - explicit = False - else: - href = cast(str, token.attrGet("href") or "") - explicit = bool(token.children) + # markdown-it encodes unsafe characters with percent-encoding + # we want to get back the original, source input + href = self.md.normalizeLinkText(cast(str, token.attrGet("href") or "")) + + # note if the link had explicit text or not (autolinks are always implicit) + explicit = False if token.info == "auto" else bool(token.children) # split the href up into parts uri_parts = urlparse(href) @@ -994,9 +1039,9 @@ def render_image(self, token: SyntaxTreeNode) -> None: self.add_line_and_source_path(img_node, token) destination = cast(str, token.attrGet("src") or "") - if self.md_env.get("relative-images", None) is not None and not is_external_url( - destination, None, True - ): + if self.md_env.get( + "relative-images", None + ) is not None and not REGEX_SCHEME.match(destination): # make the path relative to an "including" document # this is set when using the `relative-images` option of the MyST `include` directive destination = os.path.normpath( diff --git a/myst_parser/mdit_to_docutils/utils.py b/myst_parser/mdit_to_docutils/utils.py deleted file mode 100644 index 5284e481..00000000 --- a/myst_parser/mdit_to_docutils/utils.py +++ /dev/null @@ -1,37 +0,0 @@ -import html -from typing import Iterable, Optional -from urllib.parse import quote, urlparse - - -def escape_url(raw: str) -> str: - """ - Escape urls to prevent code injection craziness. (Hopefully.) - """ - return html.escape(quote(html.unescape(raw), safe="/#:()*?=%@+,&")) - - -def is_external_url( - reference: str, - known_url_schemes: Optional[Iterable[str]], - match_fragment: bool = False, -) -> bool: - """Return if a reference should be recognised as an external URL. - - URLs are of the format: scheme://netloc/path;parameters?query#fragment - - This checks if there is a url scheme (e.g. 'https') and, if so, - if the scheme is is the list of known_url_schemes (if supplied). - - :param known_url_schemes: e.g. ["http", "https", "mailto"] - If None, match all schemes - :param match_fragment: If True and a fragment found, then True will be returned, - irrespective of a scheme match - - """ - url_check = urlparse(reference) - scheme_known = ( - url_check.scheme in known_url_schemes - if known_url_schemes is not None - else bool(url_check.scheme) - ) - return scheme_known or (match_fragment and url_check.fragment != "") diff --git a/myst_parser/mocking.py b/myst_parser/mocking.py index 1e8a7695..563711f1 100644 --- a/myst_parser/mocking.py +++ b/myst_parser/mocking.py @@ -376,12 +376,13 @@ def run(self) -> list[nodes.Element]: # tab_width = self.options.get("tab-width", self.document.settings.tab_width) try: file_content = path.read_text(encoding=encoding, errors=error_handler) + except FileNotFoundError: + raise DirectiveError( + 4, f'Directive "{self.name}": file not found: {str(path)!r}' + ) except Exception as error: raise DirectiveError( - 4, - 'Directive "{}": error reading file: {}\n{}.'.format( - self.name, path, error - ), + 4, f'Directive "{self.name}": error reading file: {path}\n{error}.' ) # get required section of text diff --git a/myst_parser/parsers/docutils_.py b/myst_parser/parsers/docutils_.py index efa2e78d..aeca1897 100644 --- a/myst_parser/parsers/docutils_.py +++ b/myst_parser/parsers/docutils_.py @@ -1,6 +1,17 @@ """MyST Markdown parser for docutils.""" from dataclasses import Field -from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Sequence, + Set, + Tuple, + Union, +) import yaml from docutils import frontend, nodes @@ -16,7 +27,7 @@ ) from myst_parser.mdit_to_docutils.base import DocutilsRenderer from myst_parser.parsers.mdit import create_md_parser -from myst_parser.warnings_ import create_warning +from myst_parser.warnings_ import MystWarnings, create_warning def _validate_int( @@ -26,6 +37,16 @@ def _validate_int( return int(value) +def _validate_comma_separated_set( + setting, value, option_parser, config_parser=None, config_section=None +) -> Set[str]: + """Validate an integer setting.""" + value = frontend.validate_comma_separated_list( + setting, value, option_parser, config_parser, config_section + ) + return set(value) + + def _create_validate_tuple(length: int) -> Callable[..., Tuple[str, ...]]: """Create a validator for a tuple of length `length`.""" @@ -73,18 +94,41 @@ def _validate_yaml( output = yaml.safe_load(value) except Exception: raise ValueError("Invalid YAML string") - if "validator" in field.metadata: - field.metadata["validator"](None, field, output) + if not isinstance(output, dict): + raise ValueError("Expecting a YAML dictionary") return output return _validate_yaml +def _validate_url_schemes( + setting, value, option_parser, config_parser=None, config_section=None +): + """Validate a url_schemes setting. + + This is a tricky one, because it can be either a comma-separated list or a YAML dictionary. + """ + try: + output = yaml.safe_load(value) + except Exception: + raise ValueError("Invalid YAML string") + if isinstance(output, str): + output = {k: None for k in output.split(",")} + if not isinstance(output, dict): + raise ValueError("Expecting a comma-delimited str or YAML dictionary") + return output + + def _attr_to_optparse_option(at: Field, default: Any) -> Tuple[dict, str]: """Convert a field into a Docutils optparse options dict. :returns: (option_dict, default) """ + if at.name == "url_schemes": + return { + "metavar": "|", + "validator": _validate_url_schemes, + }, ",".join(default) if at.type is int: return {"metavar": "", "validator": _validate_int}, str(default) if at.type is bool: @@ -110,6 +154,11 @@ def _attr_to_optparse_option(at: Field, default: Any) -> Tuple[dict, str]: "metavar": "", "validator": frontend.validate_comma_separated_list, }, ",".join(default) + if at.type == Set[str]: + return { + "metavar": "", + "validator": _validate_comma_separated_set, + }, ",".join(default) if at.type == Tuple[str, str]: return { "metavar": "", @@ -159,7 +208,7 @@ def create_myst_settings_spec(config_cls=MdParserConfig, prefix: str = "myst_"): return tuple( attr_to_optparse_option(at, getattr(defaults, at.name), prefix) for at in config_cls.get_fields() - if (not at.metadata.get("sphinx_only", False)) + if ("docutils" not in at.metadata.get("omit", [])) ) @@ -171,7 +220,7 @@ def create_myst_config( """Create a configuration instance from the given settings.""" values = {} for attribute in config_cls.get_fields(): - if attribute.metadata.get("sphinx_only", False): + if "docutils" in attribute.metadata.get("omit", []): continue setting = f"{prefix}{attribute.name}" val = getattr(settings, setting, DOCUTILS_UNSET) @@ -226,6 +275,14 @@ def parse(self, inputstring: str, document: nodes.document) -> None: document.append(error) config = MdParserConfig() + if "attrs_image" in config.enable_extensions: + create_warning( + document, + "The `attrs_image` extension is deprecated, " + "please use `attrs_inline` instead.", + MystWarnings.DEPRECATED, + ) + # update the global config with the file-level config try: topmatter = read_topmatter(inputstring) diff --git a/myst_parser/sphinx_ext/directives.py b/myst_parser/sphinx_ext/directives.py index bf69e8ca..415c5498 100644 --- a/myst_parser/sphinx_ext/directives.py +++ b/myst_parser/sphinx_ext/directives.py @@ -72,9 +72,7 @@ def run(self) -> List[nodes.Node]: myst_extensions = copy(state._renderer.md_config.enable_extensions) node = nodes.Element() try: - state._renderer.md_config.enable_extensions = list( - state._renderer.md_config.enable_extensions - ) + ["html_image"] + state._renderer.md_config.enable_extensions.add("html_image") state.nested_parse(self.content, self.content_offset, node) finally: state._renderer.md_config.enable_extensions = myst_extensions diff --git a/myst_parser/sphinx_ext/main.py b/myst_parser/sphinx_ext/main.py index c7e2a672..7bb9a169 100644 --- a/myst_parser/sphinx_ext/main.py +++ b/myst_parser/sphinx_ext/main.py @@ -29,7 +29,7 @@ def setup_sphinx(app: Sphinx, load_parser=False): app.add_post_transform(MystReferenceResolver) for name, default, field in MdParserConfig().as_triple(): - if not field.metadata.get("docutils_only", False): + if "sphinx" not in field.metadata.get("omit", []): # TODO add types? app.add_config_value(f"myst_{name}", default, "env", types=Any) @@ -51,7 +51,7 @@ def create_myst_config(app): values = { name: app.config[f"myst_{name}"] for name, _, field in MdParserConfig().as_triple() - if not field.metadata.get("docutils_only", False) + if "sphinx" not in field.metadata.get("omit", []) } try: diff --git a/pyproject.toml b/pyproject.toml index 20ee3d27..869c6049 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -119,7 +119,9 @@ module = ["docutils.*", "yaml.*"] ignore_missing_imports = true [tool.pytest.ini_options] -filterwarnings = [] +filterwarnings = [ + "ignore:.*The default for the setting.*:FutureWarning", +] [tool.coverage.run] omit = ["*/_docs.py"] diff --git a/tests/test_renderers/fixtures/mock_include_errors.md b/tests/test_renderers/fixtures/mock_include_errors.md index c293be1f..6e443cb6 100644 --- a/tests/test_renderers/fixtures/mock_include_errors.md +++ b/tests/test_renderers/fixtures/mock_include_errors.md @@ -11,8 +11,7 @@ Non-existent path: ```{include} other.md ``` . -tmpdir/test.md:1: (SEVERE/4) Directive "include": error reading file: tmpdir/other.md -[Errno 2] No such file or directory: 'tmpdir/other.md'. +tmpdir/test.md:1: (SEVERE/4) Directive "include": file not found: 'tmpdir/other.md' . Error in include file: diff --git a/tests/test_renderers/fixtures/myst-config.txt b/tests/test_renderers/fixtures/myst-config.txt index 94aabb1a..e7728744 100644 --- a/tests/test_renderers/fixtures/myst-config.txt +++ b/tests/test_renderers/fixtures/myst-config.txt @@ -175,6 +175,41 @@ www.commonmark.org/he +. + + + + ab:xyz + + + cd:xyz + + + My text +. + +[url_schemes_list] --myst-url-schemes=ab +. +[](ab:c) [](http:c) +. + + + + + +
+ + Docutils System Messages + <system_message backrefs="problematic-1" ids="system-message-1" level="3" line="1" source="<string>" type="ERROR"> + <paragraph> + Unknown target name: "http:c". + +<string>:1: (ERROR/3) Unknown target name: "http:c". +. + [heading_anchors] --myst-heading-anchors=1 . # My title @@ -210,6 +245,17 @@ text d . +[attrs_image] --myst-enable-extensions=attrs_image +. +a +. +<document source="<string>"> + <paragraph> + a + +<string>:: (WARNING/2) The `attrs_image` extension is deprecated, please use `attrs_inline` instead. [myst.deprecated] +. + [attrs_inline_span] --myst-enable-extensions=attrs_inline . [content]{#id .a .b} @@ -234,6 +280,8 @@ text . <https://example.com>{.a .b} +[hi](abc){.external} + (other)= [text1](https://example.com){#id1 .a .b} @@ -247,6 +295,9 @@ text <paragraph> <reference classes="a b" refuri="https://example.com"> https://example.com + <paragraph> + <reference classes="external" refuri="abc"> + hi <target refid="other"> <paragraph ids="other" names="other"> <reference classes="a b" ids="id1" names="id1" refuri="https://example.com"> diff --git a/tests/test_renderers/fixtures/reporter_warnings.md b/tests/test_renderers/fixtures/reporter_warnings.md index 68b76b1c..8b112b9e 100644 --- a/tests/test_renderers/fixtures/reporter_warnings.md +++ b/tests/test_renderers/fixtures/reporter_warnings.md @@ -61,7 +61,7 @@ myst: --- . <string>:1: (WARNING/2) 'title_to_header' must be of type <class 'bool'> (got 1 that is a <class 'int'>). [myst.topmatter] -<string>:1: (WARNING/2) 'url_schemes[0]' must be of type <class 'str'> (got 1 that is a <class 'int'>). [myst.topmatter] +<string>:1: (WARNING/2) 'url_schemes' is not a list of strings: [1] [myst.topmatter] <string>:1: (WARNING/2) 'substitutions['key']' must be of type (<class 'str'>, <class 'int'>, <class 'float'>) (got [] that is a <class 'list'>). [myst.topmatter] . diff --git a/tests/test_renderers/test_myst_config.py b/tests/test_renderers/test_myst_config.py index ae8d9519..ff521208 100644 --- a/tests/test_renderers/test_myst_config.py +++ b/tests/test_renderers/test_myst_config.py @@ -4,6 +4,7 @@ from pathlib import Path import pytest +from docutils import __version_info__ from docutils.core import Publisher, publish_string from pytest_param_files import ParamTestData @@ -16,6 +17,8 @@ @pytest.mark.param_file(FIXTURE_PATH / "myst-config.txt") def test_cmdline(file_params: ParamTestData): """The description is parsed as a docutils commandline""" + if "url_schemes_list" in file_params.title and __version_info__ < (0, 18): + pytest.skip("problematic node ids changed in docutils 0.18") pub = Publisher(parser=Parser()) option_parser = pub.setup_option_parser() try: From 3b10f37adbf4e26f01ed185a7de0807211371f16 Mon Sep 17 00:00:00 2001 From: Chris Sewell <chrisj_sewell@hotmail.com> Date: Sat, 11 Feb 2023 19:24:53 +0100 Subject: [PATCH 2/2] Allow missing `url` key --- docs/syntax/syntax.md | 2 +- myst_parser/config/main.py | 8 ++------ myst_parser/mdit_to_docutils/base.py | 30 +++++++++++++++++++--------- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/docs/syntax/syntax.md b/docs/syntax/syntax.md index 9da01eb9..571b9200 100644 --- a/docs/syntax/syntax.md +++ b/docs/syntax/syntax.md @@ -359,7 +359,7 @@ The value of each scheme can be: - The (optional) `title` key is a template for the link’s implicit title, i.e. it is used if the link has no explicit title. - The (optional) `classes` key is a list of classes to add to the link. -The templates for `url` and `title` can use variables (enclosed by `{{ }}`), which are substituted for the corresponding parts of the link `<scheme>://<netloc>/<path>;<params>?<query>#<fragment>`. +The templates for `url` and `title` can use variables (enclosed by `{{ }}`), which are substituted for the corresponding parts of the link `<scheme>://<netloc>/<path>;<params>?<query>#<fragment>` (or the full link using `uri`). For example: - `scheme`: the URL scheme, e.g. `wiki`. diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py index 47bd05a5..adac9b12 100644 --- a/myst_parser/config/main.py +++ b/myst_parser/config/main.py @@ -60,7 +60,7 @@ def check_extensions(inst: "MdParserConfig", field: dc.Field, value: Any) -> Non class UrlSchemeType(TypedDict, total=False): """Type of the external schemes dictionary.""" - url: str # TODO add Required when python version supports it + url: str title: str classes: List[str] @@ -86,11 +86,7 @@ def check_url_schemes(inst: "MdParserConfig", field: dc.Field, value: Any) -> No elif isinstance(val, dict): if not all(isinstance(k, str) for k in val): raise TypeError(f"'{field.name}[{key}]' keys are not strings: {val!r}") - if "url" not in val: - raise TypeError( - f"'{field.name}[{key}]' does not contain a 'url' key: {val!r}" - ) - if not isinstance(val["url"], str): + if "url" in val and not isinstance(val["url"], str): raise TypeError( f"'{field.name}[{key}][url]' is not a string: {val['url']!r}" ) diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py index d69c614b..a45ebb16 100644 --- a/myst_parser/mdit_to_docutils/base.py +++ b/myst_parser/mdit_to_docutils/base.py @@ -73,7 +73,9 @@ def make_document(source_path="notset", parser_cls=RSTParser) -> nodes.document: Although schemes are case-insensitive, the canonical form is lowercase and documents that specify schemes must do so with lowercase letters. """ -REGEX_URI_TEMPLATE = re.compile(r"{{\s*(scheme|netloc|path|params|query|fragment)\s*}}") +REGEX_URI_TEMPLATE = re.compile( + r"{{\s*(uri|scheme|netloc|path|params|query|fragment)\s*}}" +) REGEX_DIRECTIVE_START = re.compile(r"^[\s]{0,3}([`]{3,10}|[~]{3,10}|[:]{3,10})\{") @@ -843,20 +845,30 @@ def render_external_url( # markdown-it encodes unsafe characters with percent-encoding # we want to get back the original, source input uri = self.md.normalizeLinkText(uri) - parsed = urlparse(uri) + _parsed = urlparse(uri) + parsed = { + "uri": uri, + "scheme": _parsed.scheme, + "netloc": _parsed.netloc, + "path": _parsed.path, + "params": _parsed.params, + "query": _parsed.query, + "fragment": _parsed.fragment, + } # Note we specifically do not use jinja2 here, # to restrict the scope of the templating language, # so that it can be used in a language agnostic way - uri = re.sub( - REGEX_URI_TEMPLATE, - lambda match: getattr(parsed, match.group(1), ""), - conversion["url"], - ) - uri = self.md.normalizeLink(uri) + if "url" in conversion: + uri = re.sub( + REGEX_URI_TEMPLATE, + lambda match: parsed.get(match.group(1), ""), + conversion["url"], + ) + uri = self.md.normalizeLink(uri) if "title" in conversion and (token.info == "auto" or not token.children): implicit_text = re.sub( REGEX_URI_TEMPLATE, - lambda match: getattr(parsed, match.group(1), ""), + lambda match: parsed.get(match.group(1), ""), conversion["title"], ) if "classes" in conversion: