From a633690cb2586f9e5f14b0d6cf22179320106d49 Mon Sep 17 00:00:00 2001 From: Ze Yu Date: Thu, 25 Jun 2020 20:21:40 +0800 Subject: [PATCH 1/2] Refactor baseUrl into variablePreprocessor BaseUrl processing is done in a separate stage involving repeated and recursive parsing / rendering of the content. This decreases cohesiveness of variable processing, and also performance due to the repeated parsing and rendering. It also necessitates edge-case solutions such as that in #1088 when we need to resolve the baseUrl before the resolveBaseUrl stage has been reached. With a framework for variable processing now, let's move baseUrl processing into it, solving the above said problems. Furthermore, rendering of other variables containing html is dependent on the extra htmlparser call in resolveBaseUrl. Let's formally remove the need for this by using only the unescaped nunjucks environment to render variables. --- src/Page.js | 85 ++++-------- src/Site.js | 15 ++- src/lib/markbind/src/constants.js | 1 - src/lib/markbind/src/parser.js | 127 ++---------------- .../markbind/src/parsers/componentParser.js | 8 ++ .../preprocessors/componentPreprocessor.js | 42 +++--- .../src/preprocessors/variablePreprocessor.js | 2 +- src/lib/markbind/src/utils/index.js | 6 - src/lib/markbind/src/utils/nunjuckUtils.js | 66 +-------- test/functional/test_site/expected/index.html | 2 - .../_markbind/plugins/testSpecialTag.js | 23 +--- .../expected/index.html | 8 -- .../test_site_special_tags/index.md | 9 -- test/unit/Site.test.js | 13 +- test/unit/nunjuckUtils.test.js | 29 ---- test/unit/parser.test.js | 14 +- test/unit/parsers/componentParser.test.js | 1 - 17 files changed, 96 insertions(+), 355 deletions(-) delete mode 100644 test/unit/nunjuckUtils.test.js diff --git a/src/Page.js b/src/Page.js index 09e2ff7eef..7eebda0289 100644 --- a/src/Page.js +++ b/src/Page.js @@ -18,7 +18,6 @@ const logger = require('./util/logger'); const MarkBind = require('./lib/markbind/src/parser'); const md = require('./lib/markbind/src/lib/markdown-it'); const utils = require('./lib/markbind/src/utils'); -const urlUtils = require('./lib/markbind/src/utils/urls'); const CLI_VERSION = require('../package.json').version; @@ -597,7 +596,7 @@ class Page { // Insert content .then(result => njUtil.renderRaw(result, { [LAYOUT_PAGE_BODY_VARIABLE]: pageData, - }, {}, false)); + })); } @@ -711,7 +710,7 @@ class Page { // Add anchor classes and highlight current page's anchor, if any. const currentPageHtmlPath = this.src.replace(/\.(md|mbd)$/, '.html'); - const currentPageRegex = new RegExp(`{{ *baseUrl *}}/${currentPageHtmlPath}`); + const currentPageRegex = new RegExp(`${this.baseUrl}/${currentPageHtmlPath}`); $nav('a[href]').each((i, elem) => { if (currentPageRegex.test($nav(elem).attr('href'))) { $nav(elem).addClass('current'); @@ -862,7 +861,7 @@ class Page { } } - collectHeadFiles(baseUrl, hostBaseUrl) { + collectHeadFiles() { const { head } = this.frontMatter; if (head === FRONT_MATTER_NONE_ATTR) { this.headFileTopContent = ''; @@ -892,16 +891,11 @@ class Page { // Split top and bottom contents const $ = cheerio.load(headFileMappedData, { xmlMode: false }); if ($('head-top').length) { - collectedTopContent.push(njUtil.renderRaw($('head-top').html(), { - baseUrl, - hostBaseUrl, - }).trim().replace(/\n\s*\n/g, '\n').replace(/\n/g, '\n ')); + collectedTopContent.push($('head-top').html().trim().replace(/\n\s*\n/g, '\n') + .replace(/\n/g, '\n ')); $('head-top').remove(); } - collectedBottomContent.push(njUtil.renderRaw($.html(), { - baseUrl, - hostBaseUrl, - }).trim().replace(/\n\s*\n/g, '\n').replace(/\n/g, '\n ')); + collectedBottomContent.push($.html().trim().replace(/\n\s*\n/g, '\n').replace(/\n/g, '\n ')); }); this.headFileTopContent = collectedTopContent.join('\n '); this.headFileBottomContent = collectedBottomContent.join('\n '); @@ -962,6 +956,7 @@ class Page { */ const fileConfig = { baseUrlMap: this.baseUrlMap, + baseUrl: this.baseUrl, rootPath: this.rootPath, headerIdMap: this.headerIdMap, fixedHeader: this.fixedHeader, @@ -981,27 +976,16 @@ class Page { .then(result => this.insertHeaderFile(result, fileConfig)) .then(result => this.insertFooterFile(result)) .then(result => Page.insertTemporaryStyles(result)) - .then(result => markbinder.resolveBaseUrl(result, fileConfig)) .then(result => markbinder.render(result, this.sourcePath, fileConfig)) .then(result => this.postRender(result)) .then(result => this.collectPluginsAssets(result)) - .then(result => markbinder.processDynamicResources(this.sourcePath, result)) + .then(result => MarkBind.processDynamicResources(this.sourcePath, result, fileConfig)) .then(result => MarkBind.unwrapIncludeSrc(result)) .then((result) => { - this.content = result; - - const { relative } = urlUtils.getParentSiteAbsoluteAndRelativePaths(this.sourcePath, this.rootPath, - this.baseUrlMap); - const baseUrl = relative ? `${this.baseUrl}/${utils.ensurePosix(relative)}` : this.baseUrl; - const hostBaseUrl = this.baseUrl; + this.addLayoutScriptsAndStyles(); + this.collectHeadFiles(); - this.addLayoutFiles(); - this.collectHeadFiles(baseUrl, hostBaseUrl); - - this.content = njUtil.renderString(this.content, { - baseUrl, - hostBaseUrl, - }); + this.content = result; this.collectAllPageSections(); this.buildPageNav(); @@ -1089,7 +1073,7 @@ class Page { pageContextSources.forEach((src) => { if (src === undefined || src === '' || utils.isUrl(src)) { return; - } else if (utils.isAbsolutePath(src)) { + } else if (path.isAbsolute(src)) { self.pluginSourceFiles.add(path.resolve(src)); return; } @@ -1121,7 +1105,7 @@ class Page { src = ensurePosix(src); if (src === '' || utils.isUrl(src)) { return; - } else if (utils.isAbsolutePath(src)) { + } else if (path.isAbsolute(src)) { self.pluginSourceFiles.add(path.resolve(src)); return; } @@ -1245,7 +1229,7 @@ class Page { /** * Adds linked layout files to page assets */ - addLayoutFiles() { + addLayoutScriptsAndStyles() { this.asset.layoutScript = path.join(this.layoutsAssetPath, this.frontMatter.layout, 'scripts.js'); this.asset.layoutStyle = path.join(this.layoutsAssetPath, this.frontMatter.layout, 'styles.css'); } @@ -1272,41 +1256,30 @@ class Page { const markbinder = new MarkBind({ variablePreprocessor: this.variablePreprocessor, }); + /** + * @type {FileConfig} + */ + const fileConfig = { + baseUrlMap: this.baseUrlMap, + baseUrl: this.baseUrl, + rootPath: this.rootPath, + headerIdMap: {}, + cwf: file, + }; return fs.readFileAsync(dependency.to, 'utf-8') - .then(result => markbinder.includeFile(dependency.to, result, { - baseUrlMap: this.baseUrlMap, - rootPath: this.rootPath, - cwf: file, - })) + .then(result => markbinder.includeFile(dependency.to, result, fileConfig)) .then(result => Page.removeFrontMatter(result)) .then(result => this.collectPluginSources(result)) .then(result => this.preRender(result)) - .then(result => markbinder.resolveBaseUrl(result, { - baseUrlMap: this.baseUrlMap, - rootPath: this.rootPath, - })) - .then(result => markbinder.render(result, this.sourcePath, { - baseUrlMap: this.baseUrlMap, - rootPath: this.rootPath, - headerIdMap: {}, - })) + .then(result => markbinder.render(result, this.sourcePath, fileConfig)) .then(result => this.postRender(result)) .then(result => this.collectPluginsAssets(result)) - .then(result => markbinder.processDynamicResources(file, result)) + .then(result => MarkBind.processDynamicResources(file, result, fileConfig)) .then(result => MarkBind.unwrapIncludeSrc(result)) .then((result) => { - // resolve the site base url here - const { relative } = urlUtils.getParentSiteAbsoluteAndRelativePaths(file, this.rootPath, - this.baseUrlMap); - const baseUrl = relative ? `${this.baseUrl}/${utils.ensurePosix(relative)}` : this.baseUrl; - const hostBaseUrl = this.baseUrl; - const content = njUtil.renderString(result, { - baseUrl, - hostBaseUrl, - }); const outputContentHTML = this.disableHtmlBeautify - ? content - : htmlBeautify(content, Page.htmlBeautifyOptions); + ? result + : htmlBeautify(result, Page.htmlBeautifyOptions); return fs.outputFileAsync(resultPath, outputContentHTML); }) .then(() => { diff --git a/src/Site.js b/src/Site.js index fd4aca7b31..e2fd601cba 100644 --- a/src/Site.js +++ b/src/Site.js @@ -11,6 +11,7 @@ const njUtil = require('./lib/markbind/src/utils/nunjuckUtils'); const injectHtmlParser2SpecialTags = require('./lib/markbind/src/patches/htmlparser2'); const injectMarkdownItSpecialTags = require( './lib/markbind/src/lib/markdown-it/markdown-it-escape-special-tags'); +const utils = require('./lib/markbind/src/utils'); const _ = {}; _.difference = require('lodash/difference'); @@ -524,8 +525,6 @@ class Site { * Collects the user defined variables map in the site/subsites */ collectUserDefinedVariablesMap() { - // The key is the base directory of the site/subsites, - // while the value is a mapping of user defined variables this.variablePreprocessor.resetUserDefinedVariablesMap(); this.baseUrlMap.forEach((base) => { @@ -540,13 +539,17 @@ class Site { } /* - This is to prevent the first nunjuck call from converting {{baseUrl}} to an empty string - and let the baseUrl value be injected later. + We retrieve the baseUrl of the (sub)site by appending the relative to the configured base url + i.e. We ignore the configured baseUrl of the sub sites. */ - this.variablePreprocessor.addUserDefinedVariable(base, 'baseUrl', '{{baseUrl}}'); + const siteRelativePathFromRoot = utils.ensurePosix(path.relative(this.rootPath, base)); + const siteBaseUrl = siteRelativePathFromRoot === '' + ? this.siteConfig.baseUrl + : path.posix.join(this.siteConfig.baseUrl || '/', siteRelativePathFromRoot); + this.variablePreprocessor.addUserDefinedVariable(base, 'baseUrl', siteBaseUrl); this.variablePreprocessor.addUserDefinedVariable(base, 'MarkBind', MARKBIND_LINK_HTML); - const $ = cheerio.load(content); + const $ = cheerio.load(content, { decodeEntities: false }); $('variable,span').each((index, element) => { const name = $(element).attr('name') || $(element).attr('id'); const variableSource = $(element).attr('from'); diff --git a/src/lib/markbind/src/constants.js b/src/lib/markbind/src/constants.js index 3ff5b152fc..2d8dd96aa0 100644 --- a/src/lib/markbind/src/constants.js +++ b/src/lib/markbind/src/constants.js @@ -1,6 +1,5 @@ module.exports = { // src/lib/markbind/src/parser.js - ATTRIB_INCLUDE_PATH: 'include-path', ATTRIB_CWF: 'cwf', BOILERPLATE_FOLDER_NAME: '_markbind/boilerplates', diff --git a/src/lib/markbind/src/parser.js b/src/lib/markbind/src/parser.js index 30a83b3303..d1a22391ca 100644 --- a/src/lib/markbind/src/parser.js +++ b/src/lib/markbind/src/parser.js @@ -3,11 +3,9 @@ const htmlparser = require('htmlparser2'); require('./patches/htmlparser2'); const path = require('path'); const Promise = require('bluebird'); const slugify = require('@sindresorhus/slugify'); -const ensurePosix = require('ensure-posix-path'); const componentParser = require('./parsers/componentParser'); const componentPreprocessor = require('./preprocessors/componentPreprocessor'); const logger = require('../../../util/logger'); -const njUtil = require('./utils/nunjuckUtils'); const _ = {}; _.clone = require('lodash/clone'); @@ -18,16 +16,10 @@ _.isEmpty = require('lodash/isEmpty'); const md = require('./lib/markdown-it'); const utils = require('./utils'); -const urlUtils = require('./utils/urls'); cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities -const { - ATTRIB_INCLUDE_PATH, - ATTRIB_CWF, -} = require('./constants'); - class Parser { constructor(config) { this.variablePreprocessor = config.variablePreprocessor; @@ -48,20 +40,16 @@ class Parser { return _.clone(this.missingIncludeSrc); } - processDynamicResources(context, html) { - const $ = cheerio.load(html, { - xmlMode: false, - decodeEntities: false, - }); + static processDynamicResources(context, html, config) { + const $ = cheerio.load(html, { xmlMode: false }); - const { rootPath } = this; function getAbsoluteResourcePath(elem, relativeResourcePath) { const firstParent = elem.closest('div[data-included-from], span[data-included-from]'); const originalSrc = utils.ensurePosix(firstParent.attr('data-included-from') || context); const originalSrcFolder = path.posix.dirname(originalSrc); const fullResourcePath = path.posix.join(originalSrcFolder, relativeResourcePath); - const resolvedResourcePath = path.posix.relative(utils.ensurePosix(rootPath), fullResourcePath); - return path.posix.join('{{hostBaseUrl}}', resolvedResourcePath); + const resolvedResourcePath = path.posix.relative(utils.ensurePosix(config.rootPath), fullResourcePath); + return path.posix.join(config.baseUrl || '/', resolvedResourcePath); } $('img, pic, thumbnail').each(function () { @@ -70,7 +58,7 @@ class Parser { return; } const resourcePath = utils.ensurePosix(elem.attr('src')); - if (utils.isAbsolutePath(resourcePath) || utils.isUrl(resourcePath)) { + if (path.isAbsolute(resourcePath) || utils.isUrl(resourcePath)) { // Do not rewrite. return; } @@ -84,7 +72,7 @@ class Parser { // Found empty href resource in resourcePath return; } - if (utils.isAbsolutePath(resourcePath) || utils.isUrl(resourcePath) || resourcePath.startsWith('#')) { + if (path.isAbsolute(resourcePath) || utils.isUrl(resourcePath) || resourcePath.startsWith('#')) { // Do not rewrite. return; } @@ -95,10 +83,7 @@ class Parser { } static unwrapIncludeSrc(html) { - const $ = cheerio.load(html, { - xmlMode: false, - decodeEntities: false, - }); + const $ = cheerio.load(html, { xmlMode: false }); $('div[data-included-from], span[data-included-from]').each(function () { $(this).replaceWith($(this).contents()); }); @@ -149,22 +134,6 @@ class Parser { node.children = cheerio.parseHTML(md.render(cheerio.html(node.children)), true); cheerio.prototype.options.xmlMode = true; break; - case 'panel': { - if (!_.hasIn(node.attribs, 'src')) { // dynamic panel - break; - } - const fileExists = utils.fileExists(node.attribs.src) - || utils.fileExists(urlUtils.calculateBoilerplateFilePath(node.attribs.boilerplate, - node.attribs.src, config)); - if (fileExists) { - const { src, fragment } = node.attribs; - const resultDir = path.dirname(path.join('{{hostBaseUrl}}', path.relative(config.rootPath, src))); - const resultPath = path.join(resultDir, utils.setExtension(path.basename(src), '._include_.html')); - node.attribs.src = utils.ensurePosix(fragment ? `${resultPath}#${fragment}` : resultPath); - } - delete node.attribs.boilerplate; - break; - } default: break; } @@ -230,10 +199,7 @@ class Parser { }); resolve(cheerio.html(nodes)); }); - const parser = new htmlparser.Parser(handler, { - xmlMode: true, - decodeEntities: true, - }); + const parser = new htmlparser.Parser(handler, { xmlMode: true }); const renderedContent = this.variablePreprocessor.renderPage(file, content, additionalVariables); @@ -276,10 +242,7 @@ class Parser { resolve(cheerio.html(nodes)); cheerio.prototype.options.xmlMode = true; }); - const parser = new htmlparser.Parser(handler, { - xmlMode: true, - decodeEntities: false, - }); + const parser = new htmlparser.Parser(handler, { xmlMode: true }); const fileExt = utils.getExt(filePath); if (utils.isMarkdownFileExt(fileExt)) { const renderedContent = md.render(content); @@ -293,78 +256,6 @@ class Parser { }); } - resolveBaseUrl(pageData, config) { - const { baseUrlMap, rootPath } = config; - this.baseUrlMap = baseUrlMap; - this.rootPath = rootPath; - - return new Promise((resolve, reject) => { - const handler = new htmlparser.DomHandler((error, dom) => { - if (error) { - reject(error); - return; - } - const nodes = dom.map(d => this._rebaseReference(d)); - cheerio.prototype.options.xmlMode = false; - resolve(cheerio.html(nodes)); - cheerio.prototype.options.xmlMode = true; - }); - const parser = new htmlparser.Parser(handler, { - xmlMode: true, - decodeEntities: true, - }); - parser.parseComplete(pageData); - }); - } - - /** - * Pre-renders the baseUrl of the provided node and its children according to - * the site they belong to, such that the final call to render baseUrl correctly - * resolves baseUrl according to the said site. - */ - _rebaseReference(node) { - if (_.isArray(node)) { - return node.map(el => this._rebaseReference(el)); - } - if (Parser.isText(node)) { - return node; - } - - // Rebase children elements - node.children.forEach(el => this._rebaseReference(el)); - - const includeSourceFile = node.attribs[ATTRIB_CWF]; - const includedSourceFile = node.attribs[ATTRIB_INCLUDE_PATH]; - delete node.attribs[ATTRIB_CWF]; - delete node.attribs[ATTRIB_INCLUDE_PATH]; - - // Skip rebasing for non-includes - if (!includedSourceFile || !node.children) { - return node; - } - - // The site at which the file with the include element was pre-processed - const currentBase = urlUtils.getParentSiteAbsolutePath(includeSourceFile, this.rootPath, this.baseUrlMap); - // The site of the included file - const newBase = urlUtils.getParentSiteAbsoluteAndRelativePaths(includedSourceFile, this.rootPath, - this.baseUrlMap); - - // Only re-render if include src and content are from different sites - if (currentBase !== newBase.absolute) { - cheerio.prototype.options.xmlMode = false; - const rendered = njUtil.renderRaw(cheerio.html(node.children), { - // This is to prevent the nunjuck call from converting {{hostBaseUrl}} to an empty string - // and let the hostBaseUrl value be injected later. - hostBaseUrl: '{{hostBaseUrl}}', - baseUrl: `{{hostBaseUrl}}/${ensurePosix(newBase.relative)}`, - }, { path: includedSourceFile }); - node.children = cheerio.parseHTML(rendered, true); - cheerio.prototype.options.xmlMode = true; - } - - return node; - } - static isText(node) { return node.type === 'text' || node.type === 'comment'; } diff --git a/src/lib/markbind/src/parsers/componentParser.js b/src/lib/markbind/src/parsers/componentParser.js index e4665b59f3..046e91828d 100644 --- a/src/lib/markbind/src/parsers/componentParser.js +++ b/src/lib/markbind/src/parsers/componentParser.js @@ -6,6 +6,10 @@ _.has = require('lodash/has'); const md = require('../lib/markdown-it'); const logger = require('../../../../util/logger'); +const { + ATTRIB_CWF, +} = require('../constants'); + cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities @@ -483,6 +487,10 @@ function postParseComponents(node) { } catch (error) { logger.error(error); } + + if (node.attribs) { + delete node.attribs[ATTRIB_CWF]; + } } diff --git a/src/lib/markbind/src/preprocessors/componentPreprocessor.js b/src/lib/markbind/src/preprocessors/componentPreprocessor.js index a4b7a749fc..bd0d69133b 100644 --- a/src/lib/markbind/src/preprocessors/componentPreprocessor.js +++ b/src/lib/markbind/src/preprocessors/componentPreprocessor.js @@ -13,7 +13,6 @@ _.has = require('lodash/has'); _.isEmpty = require('lodash/isEmpty'); const { - ATTRIB_INCLUDE_PATH, ATTRIB_CWF, } = require('../constants'); @@ -81,19 +80,20 @@ function _getSrcFlagsAndFilePaths(element, context, config) { // We do this even if the src is not a url to get the hash, if any const includeSrc = url.parse(element.attribs.src); - const baseUrlRegex = new RegExp('^{{\\s*baseUrl\\s*}}[/\\\\]'); - let filePath; if (isUrl) { filePath = element.attribs.src; } else { const includePath = decodeURIComponent(includeSrc.path); - if (baseUrlRegex.test(includePath)) { - // The baseUrl has not been resolved during pre-processing, but we need the source file path - const parentSitePath = urlUtils.getParentSiteAbsolutePath(context.cwf, config.rootPath, - config.baseUrlMap); - filePath = path.resolve(parentSitePath, includePath.replace(baseUrlRegex, '')); + if (path.posix.isAbsolute(includePath)) { + /* + If the src starts with the baseUrl (or simply '/' if there is no baseUrl specified), + get the relative path from the rootPath first, + then use it to resolve the absolute path of the referenced file on the filesystem. + */ + const relativePathToFile = path.posix.relative(`${config.baseUrl}/`, includePath); + filePath = path.resolve(config.rootPath, relativePathToFile); } else { filePath = path.resolve(path.dirname(context.cwf), includePath); } @@ -123,16 +123,14 @@ function _getSrcFlagsAndFilePaths(element, context, config) { * and adds the appropriate include. */ function _preProcessPanel(node, context, config, parser) { - const element = node; - - const hasSrc = _.has(element.attribs, 'src'); + const hasSrc = _.has(node.attribs, 'src'); if (!hasSrc) { - if (element.children && element.children.length > 0) { + if (node.children && node.children.length > 0) { // eslint-disable-next-line no-use-before-define - element.children = element.children.map(e => preProcessComponent(e, context, config, parser)); + node.children = node.children.map(e => preProcessComponent(e, context, config, parser)); } - return element; + return node; } const { @@ -140,22 +138,27 @@ function _preProcessPanel(node, context, config, parser) { hash, filePath, actualFilePath, - } = _getSrcFlagsAndFilePaths(element, context, config); + } = _getSrcFlagsAndFilePaths(node, context, config); - const fileExistsNode = _getFileExistsNode(element, context, config, parser, actualFilePath); + const fileExistsNode = _getFileExistsNode(node, context, config, parser, actualFilePath); if (fileExistsNode) { return fileExistsNode; } if (!isUrl && hash) { - element.attribs.fragment = hash.substring(1); + node.attribs.fragment = hash.substring(1); } - element.attribs.src = filePath; + const { fragment } = node.attribs; + const relativePath = utils.setExtension(path.relative(config.rootPath, filePath), '._include_.html'); + const fullResourcePath = path.posix.join(`${config.baseUrl}/`, utils.ensurePosix(relativePath)); + node.attribs.src = fragment ? `${fullResourcePath}#${fragment}` : fullResourcePath; + + delete node.attribs.boilerplate; parser.dynamicIncludeSrc.push({ from: context.cwf, to: actualFilePath, asIfTo: filePath }); - return element; + return node; } @@ -231,7 +234,6 @@ function _preprocessInclude(node, context, config, parser) { const isTrim = _.has(element.attribs, 'trim'); element.name = isInline ? 'span' : 'div'; - element.attribs[ATTRIB_INCLUDE_PATH] = filePath; // No need to process url contents if (isUrl) return element; diff --git a/src/lib/markbind/src/preprocessors/variablePreprocessor.js b/src/lib/markbind/src/preprocessors/variablePreprocessor.js index c260ddeb2f..e77c230ec6 100644 --- a/src/lib/markbind/src/preprocessors/variablePreprocessor.js +++ b/src/lib/markbind/src/preprocessors/variablePreprocessor.js @@ -89,7 +89,7 @@ class VariablePreprocessor { * This is to allow using previously declared site variables in site variables declared later on. */ renderAndAddUserDefinedVariable(site, name, value) { - const renderedVal = njUtil.renderRaw(value, this.userDefinedVariablesMap[site], {}, false); + const renderedVal = njUtil.renderRaw(value, this.userDefinedVariablesMap[site]); this.addUserDefinedVariable(site, name, renderedVal); } diff --git a/src/lib/markbind/src/utils/index.js b/src/lib/markbind/src/utils/index.js index b46bebf05f..506f959498 100644 --- a/src/lib/markbind/src/utils/index.js +++ b/src/lib/markbind/src/utils/index.js @@ -62,12 +62,6 @@ module.exports = { return r.test(filePath); }, - isAbsolutePath(filePath) { - return path.isAbsolute(filePath) - || filePath.includes('{{baseUrl}}') - || filePath.includes('{{hostBaseUrl}}'); - }, - createErrorNode(element, error) { const errorElement = cheerio.parseHTML( `
${error.message}
`, true)[0]; diff --git a/src/lib/markbind/src/utils/nunjuckUtils.js b/src/lib/markbind/src/utils/nunjuckUtils.js index 0f30a61b1b..c9dbacd951 100644 --- a/src/lib/markbind/src/utils/nunjuckUtils.js +++ b/src/lib/markbind/src/utils/nunjuckUtils.js @@ -1,73 +1,13 @@ const nunjucks = require('nunjucks'); const dateFilter = require('../lib/nunjucks-extensions/nunjucks-date'); -const commonEnv = nunjucks.configure().addFilter('date', dateFilter); const unescapedEnv = nunjucks.configure({ autoescape: false }).addFilter('date', dateFilter); -const START_ESCAPE_STR = '{% raw %}'; -const END_ESCAPE_STR = '{% endraw %}'; -const RAW_TAG_REGEX = new RegExp('{% *(end)?raw *%}', 'g'); - -/** - * Pads the outermost {% raw %} {% endraw %} pairs with {% raw %} {% endraw %} again. - * This allows variables and other nunjuck syntax inside {% raw %} {% endraw %} tags - * to be ignored by nunjucks until the final renderString call. - */ -function preEscapeRawTags(pageData) { - // TODO simplify using re.matchAll once node v10 reaches 'eol' - // https://github.com/nodejs/Release#nodejs-release-working-group - const tagMatches = []; - let tagMatch = RAW_TAG_REGEX.exec(pageData); - while (tagMatch !== null) { - tagMatches.push(tagMatch); - tagMatch = RAW_TAG_REGEX.exec(pageData); - } - - const tagInfos = Array.from(tagMatches, match => ({ - isStartTag: !match[0].includes('endraw'), - index: match.index, - content: match[0], - })); - - let numStartRawTags = 0; // nesting level of {% raw %} - let lastTokenEnd = 0; - const tokens = []; - - for (let i = 0; i < tagInfos.length; i += 1) { - const { index, isStartTag, content } = tagInfos[i]; - const currentTokenEnd = index + content.length; - tokens.push(pageData.slice(lastTokenEnd, currentTokenEnd)); - lastTokenEnd = currentTokenEnd; - - if (isStartTag) { - if (numStartRawTags === 0) { - // only pad outermost {% raw %} with an extra {% raw %} - tokens.push(START_ESCAPE_STR); - } - numStartRawTags += 1; - } else { - if (numStartRawTags === 1) { - // only pad outermost {% endraw %} with an extra {% endraw %} - tokens.push(END_ESCAPE_STR); - } - numStartRawTags -= 1; - } - } - // add the last token - tokens.push(pageData.slice(lastTokenEnd)); - - return tokens.join(''); -} - module.exports = { - renderRaw(pageData, variableMap = {}, options = {}, autoescape = true) { - const escapedPage = preEscapeRawTags(pageData); - if (autoescape) return commonEnv.renderString(escapedPage, variableMap, options); - return unescapedEnv.renderString(escapedPage, variableMap, options); - }, - renderString(pageData, variableMap = {}, options = {}, env = commonEnv) { - return env.renderString(pageData, variableMap, options); + renderRaw(pageData, variableMap = {}) { + return unescapedEnv.renderString(pageData, variableMap); }, + compile(src, ...args) { return nunjucks.compile(src, unescapedEnv, ...args); }, diff --git a/test/functional/test_site/expected/index.html b/test/functional/test_site/expected/index.html index cbdfb01dc5..bc0e2f700f 100644 --- a/test/functional/test_site/expected/index.html +++ b/test/functional/test_site/expected/index.html @@ -652,11 +652,9 @@

Markbind Plugin Pre-renderLevel 2 header (inside headingSearchIndex) with no-index attribute should not be indexed

Level 6 header (outside headingSearchIndex) with always-index attribute should be indexed

Test nunjucks raw tags

-

{{ variable interpolation syntax can be used with v-pre }}
{{ nonExistentVariable }}
{{ code elements should automatically be assigned v-pre }} -


    diff --git a/test/functional/test_site_special_tags/_markbind/plugins/testSpecialTag.js b/test/functional/test_site_special_tags/_markbind/plugins/testSpecialTag.js index 0b08040538..7c51e9bc1a 100644 --- a/test/functional/test_site_special_tags/_markbind/plugins/testSpecialTag.js +++ b/test/functional/test_site_special_tags/_markbind/plugins/testSpecialTag.js @@ -1,7 +1,5 @@ const cheerio = module.parent.require('cheerio'); -const ESCAPE_REGEX = new RegExp('{% *raw *%}(.*?){% *endraw *%}', 'gs'); - /* Simple test plugin that whitelists as a special tag. If encountered, it wraps the text node inside with some indication text as to @@ -22,27 +20,8 @@ function preRender(content) { return $.html(); } -/* - Tests that special tags like which would contain a lot of mustache syntax - like {{ }}, we are able to replace them with !success!success success!success! - without interference from other dependencies -*/ -function postRender(content) { - const $ = cheerio.load(content, { xmlMode: false }); - const escapedNunjucks = $('mustache'); - escapedNunjucks.each((index, element) => { - const unwrappedText = $(element).text(); - const unescapedText = unwrappedText.replace(ESCAPE_REGEX, 'raw$1endraw'); - const transformedText = unescapedText.replace(/{/g, '!success').replace(/}/g, 'success!'); - $(element).text(transformedText); - }); - - return $.html(); -} - module.exports = { preRender, - postRender, - getSpecialTags: () => ['testtag', 'testselfclosingtag', 'mustache'], + getSpecialTags: () => ['testtag', 'testselfclosingtag'], }; diff --git a/test/functional/test_site_special_tags/expected/index.html b/test/functional/test_site_special_tags/expected/index.html index ff7b5c984d..9466ef0820 100644 --- a/test/functional/test_site_special_tags/expected/index.html +++ b/test/functional/test_site_special_tags/expected/index.html @@ -44,14 +44,6 @@

    So far as to comply with t

    There should be text between this and the next <hr> tag, since it is a special tag. All text should appear in the browser window as a single line, save for the comment which the browser still interprets. (but will be in the expected output)

    - -raw - -!success!success This should be enclosed in success string success!success! - -!success This should also be enclosed in success strings success!success!success! -endraw - !success diff --git a/test/functional/test_site_special_tags/index.md b/test/functional/test_site_special_tags/index.md index 2bbb3088b2..9a500a16bb 100644 --- a/test/functional/test_site_special_tags/index.md +++ b/test/functional/test_site_special_tags/index.md @@ -29,15 +29,6 @@ There should be text between this and the next `
    ` tag, since it is a special All text should appear in the browser window as a single line, save for the comment which the browser still interprets. (but will be in the expected output) - -{%raw%} - -{{ This should be enclosed in success string }} - -{ This should also be enclosed in success strings }}} -{%endraw%} - - some text diff --git a/test/unit/Site.test.js b/test/unit/Site.test.js index 55c19b3f08..c92754285b 100644 --- a/test/unit/Site.test.js +++ b/test/unit/Site.test.js @@ -359,6 +359,7 @@ test('Site resolves variables referencing other variables', async () => { fs.vol.fromJSON(json, ''); const site = new Site('./', '_site'); + await site.readSiteConfig(); await site.collectBaseUrl(); await site.collectUserDefinedVariablesMap(); @@ -367,7 +368,7 @@ test('Site resolves variables referencing other variables', async () => { // check all variables expect(root.level1).toEqual('variable'); expect(root.level2).toEqual('variable'); - const expectedTextSpan = '<span style="color: blue">Blue text</span>'; + const expectedTextSpan = 'Blue text'; expect(root.level3).toEqual(expectedTextSpan); expect(root.level4).toEqual(expectedTextSpan); }); @@ -389,6 +390,7 @@ test('Site read correct user defined variables', async () => { fs.vol.fromJSON(json, ''); const site = new Site('./', '_site'); + await site.readSiteConfig(); await site.collectBaseUrl(); await site.collectUserDefinedVariablesMap(); @@ -398,11 +400,10 @@ test('Site read correct user defined variables', async () => { const othersub = site.variablePreprocessor.userDefinedVariablesMap[path.resolve('otherSub/sub')]; // check all baseUrls - const baseUrl = '{{baseUrl}}'; - expect(root.baseUrl).toEqual(baseUrl); - expect(sub.baseUrl).toEqual(baseUrl); - expect(subsub.baseUrl).toEqual(baseUrl); - expect(othersub.baseUrl).toEqual(baseUrl); + expect(root.baseUrl).toEqual(''); + expect(sub.baseUrl).toEqual('/sub'); + expect(subsub.baseUrl).toEqual('/sub/sub'); + expect(othersub.baseUrl).toEqual('/otherSub/sub'); // check other variables expect(root.variable).toEqual('variable'); diff --git a/test/unit/nunjuckUtils.test.js b/test/unit/nunjuckUtils.test.js deleted file mode 100644 index e506a44673..0000000000 --- a/test/unit/nunjuckUtils.test.js +++ /dev/null @@ -1,29 +0,0 @@ -const njUtil = require('../../src/lib/markbind/src/utils/nunjuckUtils'); - -test('Escaping nunjucks raw tags', () => { - const escapedString = 'This is a content with escaped data {% raw %} CONTENT {% endraw %}'; - const escapedContent = njUtil.renderRaw(escapedString); - - expect(escapedContent).toBe(escapedString); -}); - -test('Escaping nunjucks with new lines', () => { - const escapedString = 'This is a content with escaped data\n {% raw %} \nCONTENT\n {% endraw %}'; - const escapedContent = njUtil.renderRaw(escapedString); - - expect(escapedContent).toBe(escapedString); -}); - -test('Escaping multiple nunjucks raw tags', () => { - const escapedString = 'Multiple escapes: {% raw %} first {% endraw %} {% raw %} second {% endraw %}'; - const escapedContent = njUtil.renderRaw(escapedString); - - expect(escapedContent).toBe(escapedString); -}); - -test('Escaping nested nunjucks raw tags', () => { - const escapedString = 'Multiple escapes: {% raw %} first {% raw %} inside {% endraw %} second {% endraw %}'; - const escapedContent = njUtil.renderRaw(escapedString); - - expect(escapedContent).toBe(escapedString); -}); diff --git a/test/unit/parser.test.js b/test/unit/parser.test.js index dfdf38ca1e..26d126bd7b 100644 --- a/test/unit/parser.test.js +++ b/test/unit/parser.test.js @@ -47,7 +47,7 @@ test('includeFile replaces with
    ', async () => { const expected = [ '# Index', - `
    ` + `
    ` + `
    `, '', '# Include', @@ -86,7 +86,7 @@ test('includeFile replaces with
    ', async const expected = [ '# Index', - `
    ` + `
    ` + `
    `, '', '# Exist', @@ -159,7 +159,7 @@ test('includeFile replaces with
    ', async const expected = [ '# Index', - `
    ` + `
    ` + `
    `, '', 'existing segment', @@ -202,7 +202,7 @@ test('includeFile replaces with inline const expected = [ '# Index', - `` + `` + `existing segment`, '', ].join('\n'); @@ -241,7 +241,7 @@ test('includeFile replaces with trimmed c const expected = [ '# Index', - `
    ` + `
    ` + `
    `, '', 'existing segment', @@ -321,7 +321,7 @@ test('includeFile replaces with
    const expected = [ '# Index', - `
    ` + `
    ` + `
    `, '', 'existing segment', @@ -360,7 +360,7 @@ test('includeFile replaces with const expected = [ '# Index', - `
    ` + `
    ` + `
    `, '', '', diff --git a/test/unit/parsers/componentParser.test.js b/test/unit/parsers/componentParser.test.js index debeba9417..fa5767445d 100644 --- a/test/unit/parsers/componentParser.test.js +++ b/test/unit/parsers/componentParser.test.js @@ -27,7 +27,6 @@ const parseAndVerifyTemplate = (template, expectedTemplate, postParse = false) = const htmlParser = new htmlparser.Parser(handler, { xmlMode: true, - decodeEntities: false, }); htmlParser.parseComplete(template); From ca8b3300fa777ac49f01cf15e158cfce5b5a8354 Mon Sep 17 00:00:00 2001 From: Ze Yu Date: Wed, 10 Jun 2020 23:11:06 +0800 Subject: [PATCH 2/2] Remove xmlMode reliance Markbind relies on xmlMode parsing of htmlparser2 to parse 'custom self closing tags' such as '', and avoid the auto lower case conversion of attribute names. This introduces very tight coupling between the order and variations of this option across html rendering and parsing calls. In addition, xmlMode is not compliant with various html specs, such as opening tags being able to act as closing tags in certain cases, or more common cases like erroneously expanding
    into
    ...
    . Let's use htmlparser2's recognizeSelfClosing tag and lowerCaseAttributeName options to do the same instead. --- src/Page.js | 15 +++++----- src/Site.js | 2 +- src/lib/markbind/src/parser.js | 15 +++------- .../markbind/src/parsers/componentParser.js | 1 - src/lib/markbind/src/patches/htmlparser2.js | 30 ++++++++++++++++++- src/plugins/algolia.js | 2 +- src/plugins/codeBlockCopyButtons.js | 2 +- .../default/markbind-plugin-anchors.js | 2 +- .../markbind-plugin-footnotes-popovers.js | 2 +- .../default/markbind-plugin-plantuml.js | 2 +- .../markbind-plugin-shorthandSyntax.js | 2 +- src/plugins/filterTags.js | 2 +- .../_markbind/boilerplates/folder/inside.md | 2 +- .../_markbind/plugins/testMarkbindPlugin.js | 2 +- .../_markbind/plugins/testMarkbindPlugin.js | 2 +- test/functional/test_site/expected/index.html | 12 ++++---- .../expected/index.html | 2 ++ test/unit/parser.test.js | 2 +- test/unit/parsers/componentParser.test.js | 4 +-- 19 files changed, 62 insertions(+), 41 deletions(-) diff --git a/src/Page.js b/src/Page.js index 7eebda0289..7416ce8540 100644 --- a/src/Page.js +++ b/src/Page.js @@ -1,4 +1,4 @@ -const cheerio = require('cheerio'); +const cheerio = require('cheerio'); require('./lib/markbind/src/patches/htmlparser2'); const fm = require('fastmatter'); const fs = require('fs-extra-promise'); const htmlBeautify = require('js-beautify').html; @@ -56,7 +56,6 @@ const { TEMP_DROPDOWN_PLACEHOLDER_CLASS, } = require('./constants'); -cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities class Page { @@ -706,7 +705,7 @@ class Page { const siteNavHtml = md.render(navigationElements.length === 0 ? siteNavMappedData.replace(SITE_NAV_EMPTY_LINE_REGEX, '\n') : navigationElements.html().replace(SITE_NAV_EMPTY_LINE_REGEX, '\n')); - const $nav = cheerio.load(siteNavHtml, { xmlMode: false }); + const $nav = cheerio.load(siteNavHtml); // Add anchor classes and highlight current page's anchor, if any. const currentPageHtmlPath = this.src.replace(/\.(md|mbd)$/, '.html'); @@ -844,7 +843,7 @@ class Page { */ buildPageNav() { if (this.isPageNavigationSpecifierValid()) { - const $ = cheerio.load(this.content, { xmlMode: false }); + const $ = cheerio.load(this.content); this.navigableHeadings = {}; this.collectNavigableHeadings($(`#${CONTENT_WRAPPER_ID}`).html()); const pageNavTitleHtml = this.generatePageNavTitleHtml(); @@ -889,7 +888,7 @@ class Page { const headFileMappedData = this.variablePreprocessor.renderSiteVariables(this.sourcePath, headFileContent).trim(); // Split top and bottom contents - const $ = cheerio.load(headFileMappedData, { xmlMode: false }); + const $ = cheerio.load(headFileMappedData); if ($('head-top').length) { collectedTopContent.push($('head-top').html().trim().replace(/\n\s*\n/g, '\n') .replace(/\n/g, '\n ')); @@ -918,7 +917,7 @@ class Page { } collectPageSection(section) { - const $ = cheerio.load(this.content, { xmlMode: false }); + const $ = cheerio.load(this.content); const pageSection = $(section); if (pageSection.length === 0) { return; @@ -1087,7 +1086,7 @@ class Page { } if (domTagSourcesMap) { - const $ = cheerio.load(content, { xmlMode: true }); + const $ = cheerio.load(content); domTagSourcesMap.forEach(([tagName, attrName]) => { if (!_.isString(tagName) || !_.isString(attrName)) { @@ -1166,7 +1165,7 @@ class Page { * @return String html of the element, with the attribute's asset resolved */ getResolvedAssetElement(assetElementHtml, tagName, attrName, plugin, pluginName) { - const $ = cheerio.load(assetElementHtml, { xmlMode: false }); + const $ = cheerio.load(assetElementHtml); const el = $(`${tagName}[${attrName}]`); el.attr(attrName, (i, assetPath) => { diff --git a/src/Site.js b/src/Site.js index e2fd601cba..91335d7ebc 100644 --- a/src/Site.js +++ b/src/Site.js @@ -1,4 +1,4 @@ -const cheerio = require('cheerio'); +const cheerio = require('cheerio'); require('./lib/markbind/src/patches/htmlparser2'); const fs = require('fs-extra-promise'); const ghpages = require('gh-pages'); const ignore = require('ignore'); diff --git a/src/lib/markbind/src/parser.js b/src/lib/markbind/src/parser.js index d1a22391ca..c50574b697 100644 --- a/src/lib/markbind/src/parser.js +++ b/src/lib/markbind/src/parser.js @@ -17,7 +17,6 @@ _.isEmpty = require('lodash/isEmpty'); const md = require('./lib/markdown-it'); const utils = require('./utils'); -cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities class Parser { @@ -41,7 +40,7 @@ class Parser { } static processDynamicResources(context, html, config) { - const $ = cheerio.load(html, { xmlMode: false }); + const $ = cheerio.load(html); function getAbsoluteResourcePath(elem, relativeResourcePath) { const firstParent = elem.closest('div[data-included-from], span[data-included-from]'); @@ -83,7 +82,7 @@ class Parser { } static unwrapIncludeSrc(html) { - const $ = cheerio.load(html, { xmlMode: false }); + const $ = cheerio.load(html); $('div[data-included-from], span[data-included-from]').each(function () { $(this).replaceWith($(this).contents()); }); @@ -124,15 +123,11 @@ class Parser { switch (node.name) { case 'md': node.name = 'span'; - cheerio.prototype.options.xmlMode = false; node.children = cheerio.parseHTML(md.renderInline(cheerio.html(node.children)), true); - cheerio.prototype.options.xmlMode = true; break; case 'markdown': node.name = 'div'; - cheerio.prototype.options.xmlMode = false; node.children = cheerio.parseHTML(md.render(cheerio.html(node.children)), true); - cheerio.prototype.options.xmlMode = true; break; default: break; @@ -199,7 +194,7 @@ class Parser { }); resolve(cheerio.html(nodes)); }); - const parser = new htmlparser.Parser(handler, { xmlMode: true }); + const parser = new htmlparser.Parser(handler); const renderedContent = this.variablePreprocessor.renderPage(file, content, additionalVariables); @@ -238,11 +233,9 @@ class Parser { nodes.forEach((d) => { this._trimNodes(d); }); - cheerio.prototype.options.xmlMode = false; resolve(cheerio.html(nodes)); - cheerio.prototype.options.xmlMode = true; }); - const parser = new htmlparser.Parser(handler, { xmlMode: true }); + const parser = new htmlparser.Parser(handler); const fileExt = utils.getExt(filePath); if (utils.isMarkdownFileExt(fileExt)) { const renderedContent = md.render(content); diff --git a/src/lib/markbind/src/parsers/componentParser.js b/src/lib/markbind/src/parsers/componentParser.js index 046e91828d..1c4735b4c5 100644 --- a/src/lib/markbind/src/parsers/componentParser.js +++ b/src/lib/markbind/src/parsers/componentParser.js @@ -10,7 +10,6 @@ const { ATTRIB_CWF, } = require('../constants'); -cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities /* diff --git a/src/lib/markbind/src/patches/htmlparser2.js b/src/lib/markbind/src/patches/htmlparser2.js index 526e3b7db2..62b5ab360d 100644 --- a/src/lib/markbind/src/patches/htmlparser2.js +++ b/src/lib/markbind/src/patches/htmlparser2.js @@ -1,4 +1,32 @@ -const { Tokenizer } = require('htmlparser2'); +/* + * Four behaviours of htmlparser2 are patched here, the first 2 being 'convenience' patches + * to avoid repeated passing of lowerCaseAttributeNames and recognizeSelfClosing options. + * 1. Defaulting to self closing tag recognition + * 2. Disabling automatic attribute name conversion to lower case + * 3. Recognising text in markdown inline code / code blocks as raw text + * 4. Ability to inject/whitelist certain tags to be parsed like script/style tags do. (special tags) + */ + +const { Tokenizer, Parser } = require('htmlparser2'); + +/* + Enable any self closing tags '' to be parsed. + This allows MarkBind's own components to be recognised as such (e.g. ''). + This is equivalent to the { recognizeSelfClosing: true } option of htmlparser2; + We modify the relevant code to avoid passing it in repeatedly. + */ +Parser.prototype.onselfclosingtag = function () { + this._closeCurrentTag(); +}; + +/* + Disable automatic lower case conversion. + This is equivalent to the { lowerCaseAttributeNames: false } option of htmlparser2; + We modify the relevant code to avoid passing it in repeatedly as well. + */ +Parser.prototype.onattribname = function (name) { + this._attribname = name; +}; const MARKDOWN = Symbol('MARKDOWN'); diff --git a/src/plugins/algolia.js b/src/plugins/algolia.js index 02759870e7..460cda3807 100644 --- a/src/plugins/algolia.js +++ b/src/plugins/algolia.js @@ -22,7 +22,7 @@ function buildAlgoliaInitScript(pluginContext) { } function addNoIndexClasses(content) { - const $ = cheerio.load(content, { xmlMode: false }); + const $ = cheerio.load(content); const noIndexSelectors = [ 'dropdown', 'b-modal', diff --git a/src/plugins/codeBlockCopyButtons.js b/src/plugins/codeBlockCopyButtons.js index 7c11b169d1..c34244cb5c 100644 --- a/src/plugins/codeBlockCopyButtons.js +++ b/src/plugins/codeBlockCopyButtons.js @@ -45,7 +45,7 @@ const copyCodeBlockScript = `