From e932c088c80be59e9200967e0bdbfdbe7b0e946a Mon Sep 17 00:00:00 2001 From: Si Jie Date: Tue, 9 Apr 2019 13:54:37 +0800 Subject: [PATCH 1/5] Parser: Support context-aware includes After our parser includes a file, its context is discarded. This means that in subsequent passes of parser, we do not know where the content has been included from, and included content is treated uniformly as if they were originally part of the host file. The path of the references in the included file may not resolve correctly if it belongs to a different directory from the host file. For example, if /a/b/c.md refers to a file in '../file.css', and /index.md is the host file that includes /a/b/c.md, then /index.md should resolve this path to '/a/b/file.css'. It cannot retain the original path '../file.css', as that will lead to the wrong css file. To support context-aware references, i.e. the included segments retain awareness of their original include locations, let's add a data-included-from meta whenever we include files. To maintain the existing behaviour and to prevent leaking file structure data, this meta tag is removed before we finish generating the final website. --- src/Page.js | 1 + src/lib/markbind/src/parser.js | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/Page.js b/src/Page.js index 07b7bef43a..ccde234a9b 100644 --- a/src/Page.js +++ b/src/Page.js @@ -804,6 +804,7 @@ Page.prototype.generate = function (builtFiles) { .then(() => markbinder.renderFile(this.tempPath, fileConfig)) .then(result => this.postRender(result)) .then(result => this.collectPluginsAssets(result)) + .then(result => markbinder.unwrapIncludeSrc(result)) .then((result) => { this.content = htmlBeautify(result, { indent_size: 2 }); diff --git a/src/lib/markbind/src/parser.js b/src/lib/markbind/src/parser.js index 27c152a90e..79b6a32a07 100644 --- a/src/lib/markbind/src/parser.js +++ b/src/lib/markbind/src/parser.js @@ -341,7 +341,11 @@ Parser.prototype._preprocess = function (node, context, config) { } actualContent = self._rebaseReferenceForStaticIncludes(actualContent, element, config); } - element.children = cheerio.parseHTML(actualContent, true); // the needed content; + const wrapperType = isInline ? 'span' : 'div'; + element.children = cheerio.parseHTML( + `<${wrapperType} data-included-from="${filePath}">${actualContent}`, + true, + ); } else { let actualContent = (fileContent && isTrim) ? fileContent.trim() : fileContent; if (isIncludeSrcMd) { @@ -351,7 +355,11 @@ Parser.prototype._preprocess = function (node, context, config) { actualContent = md.render(actualContent); } } - element.children = cheerio.parseHTML(actualContent, true); + const wrapperType = isInline ? 'span' : 'div'; + element.children = cheerio.parseHTML( + `<${wrapperType} data-included-from="${filePath}">${actualContent}`, + true, + ); } // The element's children are in the new context @@ -402,6 +410,17 @@ Parser.prototype._preprocess = function (node, context, config) { return element; }; +Parser.prototype.unwrapIncludeSrc = function (html) { + const $ = cheerio.load(html, { + xmlMode: false, + decodeEntities: false, + }); + $('div[data-included-from], span[data-included-from]').each(function () { + $(this).replaceWith($(this).contents()); + }); + return $.html(); +}; + Parser.prototype._parse = function (node, context, config) { const element = node; const self = this; From 593c70af786b45b079cced11d547c21a4a2f51a4 Mon Sep 17 00:00:00 2001 From: Si Jie Date: Tue, 9 Apr 2019 13:55:46 +0800 Subject: [PATCH 2/5] Parser: Support relative references MarkBind does not support relative references if they are written using Markdown syntax. The existing MarkBind parser only processes relative URLs once during the preprocess stage, during which Markdown code has not yet been transcribed into html. With context-based includes available in a previous commit, we can now store the context from which files are included from. We can then use this information to derive the absolute path of a resource referenced by an included file. Let's rewrite the support for relative URLs by processing all such relative URLs after the site is generated. This way, we can convert relative URLs in Markdown code to absolute URLs too. With this new method of processing relative URLs, let's also remove the old implementation. --- src/Page.js | 2 + src/lib/markbind/src/parser.js | 84 ++++++++++++++++++++++------------ src/lib/markbind/src/utils.js | 6 +++ 3 files changed, 63 insertions(+), 29 deletions(-) diff --git a/src/Page.js b/src/Page.js index ccde234a9b..18008786b2 100644 --- a/src/Page.js +++ b/src/Page.js @@ -804,6 +804,7 @@ Page.prototype.generate = function (builtFiles) { .then(() => markbinder.renderFile(this.tempPath, fileConfig)) .then(result => this.postRender(result)) .then(result => this.collectPluginsAssets(result)) + .then(result => markbinder.processDynamicResources(this.sourcePath, result)) .then(result => markbinder.unwrapIncludeSrc(result)) .then((result) => { this.content = htmlBeautify(result, { indent_size: 2 }); @@ -981,6 +982,7 @@ Page.prototype.resolveDependency = function (dependency, builtFiles) { baseUrlMap: this.baseUrlMap, rootPath: this.rootPath, })) + .then(result => markbinder.processDynamicResources(file, result)) .then((result) => { // resolve the site base url here const newBaseUrl = calculateNewBaseUrl(file, this.rootPath, this.baseUrlMap); diff --git a/src/lib/markbind/src/parser.js b/src/lib/markbind/src/parser.js index 79b6a32a07..cb09328ed7 100644 --- a/src/lib/markbind/src/parser.js +++ b/src/lib/markbind/src/parser.js @@ -172,24 +172,20 @@ Parser.prototype._preprocess = function (node, context, config) { element.attribs = element.attribs || {}; element.attribs[ATTRIB_CWF] = path.resolve(context.cwf); - const requiresSrc = ['img', 'pic', 'include'].includes(element.name); + const requiresSrc = ['include'].includes(element.name); if (requiresSrc && _.isEmpty(element.attribs.src)) { const error = new Error(`Empty src attribute in ${element.name} in: ${element.attribs[ATTRIB_CWF]}`); this._onError(error); return createErrorNode(element, error); } - const shouldProcessSrc = ['img', 'pic', 'include', 'panel'].includes(element.name); + const shouldProcessSrc = ['include', 'panel'].includes(element.name); const hasSrc = _.hasIn(element.attribs, 'src'); let isUrl; let includeSrc; let filePath; - let isAbsolutePath; let actualFilePath; if (hasSrc && shouldProcessSrc) { isUrl = utils.isUrl(element.attribs.src); - isAbsolutePath = path.isAbsolute(element.attribs.src) - || element.attribs.src.includes('{{baseUrl}}') - || element.attribs.src.includes('{{hostBaseUrl}}'); includeSrc = url.parse(element.attribs.src); filePath = isUrl ? element.attribs.src @@ -202,7 +198,7 @@ Parser.prototype._preprocess = function (node, context, config) { this.boilerplateIncludeSrc.push({ from: context.cwf, to: actualFilePath }); } const isOptional = element.name === 'include' && _.hasIn(element.attribs, 'optional'); - if (!['img', 'pic'].includes(element.name) && !utils.fileExists(actualFilePath)) { + if (!utils.fileExists(actualFilePath)) { if (isOptional) { return createEmptyNode(); } @@ -215,18 +211,6 @@ Parser.prototype._preprocess = function (node, context, config) { } } - const shouldProcessHref = ['a', 'link'].includes(element.name); - const hasHref = _.hasIn(element.attribs, 'href'); - if (hasHref && shouldProcessHref) { - isUrl = utils.isUrl(element.attribs.href); - isAbsolutePath = path.isAbsolute(element.attribs.href) || element.attribs.href.startsWith('{{'); - includeSrc = url.parse(element.attribs.href); - filePath = isUrl - ? element.attribs.src - : path.resolve(path.dirname(context.cwf), decodeURIComponent(includeSrc.path)); - actualFilePath = filePath; - } - if (element.name === 'include') { const isInline = _.hasIn(element.attribs, 'inline'); const isDynamic = _.hasIn(element.attribs, 'dynamic'); @@ -391,16 +375,6 @@ Parser.prototype._preprocess = function (node, context, config) { if (element.name === 'body') { // eslint-disable-next-line no-console console.warn(` tag found in ${element.attribs[ATTRIB_CWF]}. This may cause formatting errors.`); - } else if (['img', 'pic'].includes(element.name)) { - if (!isUrl && !isAbsolutePath) { - const resultPath = path.join('{{hostBaseUrl}}', path.relative(config.rootPath, filePath)); - element.attribs.src = utils.ensurePosix(resultPath); - } - } else if (['a', 'link'].includes(element.name)) { - if (!isUrl && !isAbsolutePath && hasHref) { - const resultPath = path.join('{{hostBaseUrl}}', path.relative(config.rootPath, filePath)); - element.attribs.href = utils.ensurePosix(resultPath); - } } if (element.children && element.children.length > 0) { element.children = element.children.map(e => self._preprocess(e, context, config)); @@ -410,6 +384,58 @@ Parser.prototype._preprocess = function (node, context, config) { return element; }; +Parser.prototype.processDynamicResources = function (context, html) { + const self = this; + const $ = cheerio.load(html, { + xmlMode: false, + decodeEntities: false, + }); + $('img, pic').each(function () { + const elem = $(this); + const resourcePath = utils.ensurePosix(elem.attr('src')); + if (resourcePath === undefined || resourcePath === '') { + // Found empty img/pic resource in resourcePath + return; + } + if (utils.isAbsolutePath(resourcePath) || utils.isUrl(resourcePath)) { + // Do not rewrite. + return; + } + const firstParent = elem.closest('div[data-included-from], span[data-included-from]'); + const originalSrc = utils.ensurePosix(firstParent.attr('data-included-from') || context); + + const originalSrcFolder = path.posix.dirname(originalSrc); + const fullResourcePath = path.posix.join(originalSrcFolder, resourcePath); + const resolvedResourcePath = path.posix.relative(utils.ensurePosix(self.rootPath), fullResourcePath); + const absoluteResourcePath = path.posix.join('{{hostBaseUrl}}', resolvedResourcePath); + + $(this).attr('src', absoluteResourcePath); + }); + $('a, link').each(function () { + const elem = $(this); + const resourcePath = elem.attr('href'); + if (resourcePath === undefined || resourcePath === '') { + // Found empty href resource in resourcePath + return; + } + if (utils.isAbsolutePath(resourcePath) || utils.isUrl(resourcePath) || resourcePath.startsWith('#')) { + // Do not rewrite. + return; + } + + const firstParent = elem.closest('div[data-included-from], span[data-included-from]'); + const originalSrc = utils.ensurePosix(firstParent.attr('data-included-from') || context); + + const originalSrcFolder = path.posix.dirname(originalSrc); + const fullResourcePath = path.posix.join(originalSrcFolder, resourcePath); + const resolvedResourcePath = path.posix.relative(utils.ensurePosix(self.rootPath), fullResourcePath); + const absoluteResourcePath = path.posix.join('{{hostBaseUrl}}', resolvedResourcePath); + + $(this).attr('href', absoluteResourcePath); + }); + return $.html(); +}; + Parser.prototype.unwrapIncludeSrc = function (html) { const $ = cheerio.load(html, { xmlMode: false, diff --git a/src/lib/markbind/src/utils.js b/src/lib/markbind/src/utils.js index 64f2bc42c9..262f74e8d0 100644 --- a/src/lib/markbind/src/utils.js +++ b/src/lib/markbind/src/utils.js @@ -63,6 +63,12 @@ module.exports = { return r.test(filePath); }, + isAbsolutePath(filePath) { + return path.isAbsolute(filePath) + || filePath.includes('{{baseUrl}}') + || filePath.includes('{{hostBaseUrl}}'); + }, + createErrorElement(error) { return `
${error.message}
`; }, From bc58bbecf3a0046ff5544601e24b51b875f70c3a Mon Sep 17 00:00:00 2001 From: Si Jie Date: Tue, 9 Apr 2019 15:09:04 +0800 Subject: [PATCH 3/5] Parser.test.js: Update tests to support context-aware includes As parser now supports context-aware includes, we need to update our unit tests for parser to check that our data-included-from tags are correct. Let's update our unit tests in parser.test.js. --- test/unit/parser.test.js | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/test/unit/parser.test.js b/test/unit/parser.test.js index f3e9177dde..b2120f062d 100644 --- a/test/unit/parser.test.js +++ b/test/unit/parser.test.js @@ -45,10 +45,11 @@ test('includeFile replaces with
', async () => { const expected = [ '# Index', - `
`, + `
` + + `
`, '', '# Include', - '
', + '
', '', ].join('\n'); @@ -85,10 +86,11 @@ test('includeFile replaces with
', async const expected = [ '# Index', - `
`, + `
` + + `
`, '', '# Exist', - '
', + '
', '', ].join('\n'); @@ -161,10 +163,11 @@ test('includeFile replaces with
', async const expected = [ '# Index', - `
`, + `
` + + `
`, '', 'existing segment', - '
', + '
', '', ].join('\n'); @@ -205,7 +208,8 @@ test('includeFile replaces with inline const expected = [ '# Index', - `existing segment`, + `` + + `existing segment`, '', ].join('\n'); @@ -245,10 +249,11 @@ test('includeFile replaces with trimmed c const expected = [ '# Index', - `
`, + `
` + + `
`, '', 'existing segment', - '
', + '
', '', ].join('\n'); @@ -332,10 +337,11 @@ test('includeFile replaces with
const expected = [ '# Index', - `
`, + `
` + + `
`, '', 'existing segment', - '
', + '
', '', ].join('\n'); @@ -372,10 +378,11 @@ test('includeFile replaces with const expected = [ '# Index', - `
`, + `
` + + `
`, '', '', - '
', + '
', '', ].join('\n'); From 76a85723058cc89db0b79b23b4ca698d261b749d Mon Sep 17 00:00:00 2001 From: Si Jie Date: Wed, 10 Apr 2019 19:26:22 +0800 Subject: [PATCH 4/5] FilterTags: Adjust relative URL reference With context-aware includes, all relative URLs should now be with respect to the file that contains the relative URLs, instead of being with respect to the files that are expected to include them. FilterTags needs to be updated so that the reference to Hiding Tags is with respect to its location in userGuide/plugins/. --- docs/userGuide/plugins/filterTags.mbdf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/userGuide/plugins/filterTags.mbdf b/docs/userGuide/plugins/filterTags.mbdf index 485e543bd3..4b63f6ce42 100644 --- a/docs/userGuide/plugins/filterTags.mbdf +++ b/docs/userGuide/plugins/filterTags.mbdf @@ -79,7 +79,7 @@ Alternatively, you can specify tags to render for a page in the front matter. ``` -Tags in `site.json` will be merged with the ones in the front matter, and are processed after front matter tags. See [Hiding Tags](userGuide/tweakingThePageStructure.html#hiding-tags) for more information. +Tags in `site.json` will be merged with the ones in the front matter, and are processed after front matter tags. See [Hiding Tags](../tweakingThePageStructure.html#hiding-tags) for more information. #### Advanced Tagging Tips From 8890044e231848b97efaac8f007f4469c86b96f1 Mon Sep 17 00:00:00 2001 From: Si Jie Date: Tue, 16 Apr 2019 13:29:29 +0800 Subject: [PATCH 5/5] Support Relative References: Update User Guide Our user guide does not mention how users should use relative URL references. Now that parser fully supports relative URL references for images and links, let's update our user guide to document MarkBind's behavior towards relative references. In particular, the intra-site links section has been updated with an important note on how users should specify relative references, while images and pictures section have been updated with a note teaching users that they can now use relative references. --- docs/userGuide/syntax/images.mbdf | 4 ++++ docs/userGuide/syntax/links.mbdf | 10 +++++++--- docs/userGuide/syntax/pictures.mbdf | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/userGuide/syntax/images.mbdf b/docs/userGuide/syntax/images.mbdf index f4a41ed010..3ebcc34d39 100644 --- a/docs/userGuide/syntax/images.mbdf +++ b/docs/userGuide/syntax/images.mbdf @@ -6,6 +6,10 @@ ```markdown ![](https://markbind.org/images/logo-lightbackground.png) ``` + + URLs can be specified as relative references. More info in: Intra-Site Links + + diff --git a/docs/userGuide/syntax/links.mbdf b/docs/userGuide/syntax/links.mbdf index 605a30f5a5..79952d60e1 100644 --- a/docs/userGuide/syntax/links.mbdf +++ b/docs/userGuide/syntax/links.mbdf @@ -56,12 +56,14 @@ Links should start with {{ showBaseUrlCode }} (which represents the root directo 1. Click [here]({{ showBaseUrlCode }}/userGuide/reusingContents.html). 2. `![](`{{ showBaseUrlCode }}`/images/preview.png)` + +To ensure that links in the _markbind/ folder work correctly across the entire site, they should be written as absolute paths, prepended with {{ baseUrl }}. +
Relative paths:
-Links to files can also be specified relative to the file that includes it. {{ icon_example }} Assuming that we have the following folder structure: ``` @@ -83,12 +85,14 @@ Within `index.md`, we can also display the image using ``` or by including `subsite.md`: -``` +```html ``` -To ensure that links in the _markbind/ folder work correctly across the entire site, they should be written as absolute paths, prepended with {{ baseUrl }}. + Relative links to resources (e.g. images, hrefs) should be valid relative to the original, included file. In other words, the links should be accessible when traversing starting from the location of the included file. +
+ In the example above, image.png is in the same directory as subsite.md. When using relative references, the correct path is image.png and not textbook/image.png.
diff --git a/docs/userGuide/syntax/pictures.mbdf b/docs/userGuide/syntax/pictures.mbdf index e797b79c43..23de1d1dd4 100644 --- a/docs/userGuide/syntax/pictures.mbdf +++ b/docs/userGuide/syntax/pictures.mbdf @@ -24,7 +24,7 @@ Name | Type | Default | Description --- | --- | --- | --- alt | `string` | | **This must be specified.**
The alternative text of the image. height | `string` | | The height of the image in pixels. -src | `string` | | **This must be specified.**
The URL of the image. +src | `string` | | **This must be specified.**
The URL of the image.
The URL can be specified as absolute or relative references. More info in: _[Intra-Site Links]({{baseUrl}}/userGuide/formattingContents.html#intraSiteLinks)_ width | `string` | | The width of the image in pixels.
If both width and height are specified, width takes priority over height. It is to maintain the image's aspect ratio.