From d2a24ddd484dff903a17cf98eca85de064e40c46 Mon Sep 17 00:00:00 2001 From: Ze Yu Date: Wed, 5 Feb 2020 18:46:11 +0800 Subject: [PATCH] Refactor preprocess and url processing functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parser class houses a large number of functions. This can be daunting for newer developers to the project, and decreases maintainability of the code. Let’s start by modularising the preprocess and url processing functions into separate files. Let’s also enhance some in-code documentation for these functions. Some of these functions can also be abstracted into smaller units of functionality, increasing the maintainability of the code. --- src/lib/markbind/src/parser.js | 459 ++++-------------- .../preprocessors/componentPreprocessor.js | 436 +++++++++++++++++ .../markbind/src/{utils.js => utils/index.js} | 25 +- src/lib/markbind/src/utils/urls.js | 55 +++ 4 files changed, 603 insertions(+), 372 deletions(-) create mode 100644 src/lib/markbind/src/preprocessors/componentPreprocessor.js rename src/lib/markbind/src/{utils.js => utils/index.js} (81%) create mode 100644 src/lib/markbind/src/utils/urls.js diff --git a/src/lib/markbind/src/parser.js b/src/lib/markbind/src/parser.js index 7a8fc3a11c..7ba665129d 100644 --- a/src/lib/markbind/src/parser.js +++ b/src/lib/markbind/src/parser.js @@ -4,10 +4,9 @@ const htmlparser = require('htmlparser2'); require('./patches/htmlparser2'); const nunjucks = require('nunjucks'); const path = require('path'); const Promise = require('bluebird'); -const url = require('url'); -const pathIsInside = require('path-is-inside'); const slugify = require('@sindresorhus/slugify'); const componentParser = require('./parsers/componentParser'); +const componentPreprocessor = require('./preprocessors/componentPreprocessor'); const _ = {}; _.clone = require('lodash/clone'); @@ -15,11 +14,10 @@ _.cloneDeep = require('lodash/cloneDeep'); _.hasIn = require('lodash/hasIn'); _.isArray = require('lodash/isArray'); _.isEmpty = require('lodash/isEmpty'); -_.pick = require('lodash/pick'); -const CyclicReferenceError = require('./handlers/cyclicReferenceError.js'); const md = require('./lib/markdown-it'); const utils = require('./utils'); +const urlUtils = require('./utils/urls'); cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities @@ -27,7 +25,6 @@ cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities const { ATTRIB_INCLUDE_PATH, ATTRIB_CWF, - BOILERPLATE_FOLDER_NAME, IMPORTED_VARIABLE_PREFIX, } = require('./constants'); @@ -129,21 +126,6 @@ class Parser { return _.clone(this.missingIncludeSrc); } - static _preprocessThumbnails(element) { - const isImage = _.hasIn(element.attribs, 'src') && element.attribs.src !== ''; - if (isImage) { - return element; - } - const text = _.hasIn(element.attribs, 'text') ? element.attribs.text : ''; - if (text === '') { - return element; - } - const renderedText = md.renderInline(text); - // eslint-disable-next-line no-param-reassign - element.children = cheerio.parseHTML(renderedText); - return element; - } - _renderIncludeFile(filePath, element, context, config, asIfAt = filePath) { try { this._fileCache[filePath] = this._fileCache[filePath] @@ -153,20 +135,19 @@ class Parser { const missingReferenceErrorMessage = `Missing reference in: ${element.attribs[ATTRIB_CWF]}`; e.message += `\n${missingReferenceErrorMessage}`; this._onError(e); - return Parser.createErrorNode(element, e); + return utils.createErrorNode(element, e); } const fileContent = this._fileCache[filePath]; // cache the file contents to save some I/O - const { parent, relative } = Parser.calculateNewBaseUrls(asIfAt, config.rootPath, config.baseUrlMap); + const { parent, relative } = urlUtils.calculateNewBaseUrls(asIfAt, config.rootPath, config.baseUrlMap); const userDefinedVariables = config.userDefinedVariablesMap[path.resolve(parent, relative)]; // Extract included variables from the PARENT file const includeVariables = Parser.extractIncludeVariables(element, context.variables); // Extract page variables from the CHILD file const pageVariables = this.extractPageVariables(asIfAt, fileContent, userDefinedVariables, includeVariables); - const content - = nunjucks.renderString(fileContent, - { ...pageVariables, ...includeVariables, ...userDefinedVariables }, - { path: filePath }); + const content = nunjucks.renderString(fileContent, + { ...pageVariables, ...includeVariables, ...userDefinedVariables }, + { path: filePath }); const childContext = _.cloneDeep(context); childContext.cwf = asIfAt; childContext.variables = includeVariables; @@ -197,10 +178,7 @@ class Parser { // Render inner file content const { content: renderedContent, childContext, userDefinedVariables } = this._renderIncludeFile(filePath, element, context, config); - if (!Parser.PROCESSED_INNER_VARIABLES.has(filePath)) { - Parser.PROCESSED_INNER_VARIABLES.add(filePath); - this._extractInnerVariables(renderedContent, childContext, config); - } + this.extractInnerVariablesIfNotProcessed(renderedContent, childContext, config, filePath); const innerVariables = this.getImportedVariableMap(filePath); Parser.VARIABLE_LOOKUP.get(filePath).forEach((value, variableName, map) => { map.set(variableName, nunjucks.renderString(value, { ...userDefinedVariables, ...innerVariables })); @@ -208,182 +186,11 @@ class Parser { }); } - _preprocess(node, context, config) { - const element = node; - const self = this; - element.attribs = element.attribs || {}; - element.attribs[ATTRIB_CWF] = path.resolve(context.cwf); - if (element.name === 'thumbnail') { - return Parser._preprocessThumbnails(element); - } - const requiresSrc = ['include'].includes(element.name); - if (requiresSrc && _.isEmpty(element.attribs.src)) { - const error = new Error(`Empty src attribute in ${element.name} in: ${element.attribs[ATTRIB_CWF]}`); - this._onError(error); - return Parser.createErrorNode(element, error); - } - const shouldProcessSrc = ['include', 'panel'].includes(element.name); - const hasSrc = _.hasIn(element.attribs, 'src'); - let isUrl; - let includeSrc; - let filePath; - let actualFilePath; - if (hasSrc && shouldProcessSrc) { - isUrl = utils.isUrl(element.attribs.src); - includeSrc = url.parse(element.attribs.src); - filePath = isUrl - ? element.attribs.src - : path.resolve(path.dirname(context.cwf), decodeURIComponent(includeSrc.path)); - actualFilePath = filePath; - const isBoilerplate = _.hasIn(element.attribs, 'boilerplate'); - if (isBoilerplate) { - element.attribs.boilerplate = element.attribs.boilerplate || path.basename(filePath); - actualFilePath - = Parser.calculateBoilerplateFilePath(element.attribs.boilerplate, filePath, config); - this.boilerplateIncludeSrc.push({ from: context.cwf, to: actualFilePath }); - } - const isOptional = element.name === 'include' && _.hasIn(element.attribs, 'optional'); - if (!utils.fileExists(actualFilePath)) { - if (isOptional) { - return Parser.createEmptyNode(); - } - this.missingIncludeSrc.push({ from: context.cwf, to: actualFilePath }); - const error - = new Error(`No such file: ${actualFilePath}\nMissing reference in ${element.attribs[ATTRIB_CWF]}`); - this._onError(error); - return Parser.createErrorNode(element, error); - } - } - if (element.name === 'include') { - const isInline = _.hasIn(element.attribs, 'inline'); - const isDynamic = _.hasIn(element.attribs, 'dynamic'); - const isOptional = _.hasIn(element.attribs, 'optional'); - const isTrim = _.hasIn(element.attribs, 'trim'); - element.name = isInline ? 'span' : 'div'; - element.attribs[ATTRIB_INCLUDE_PATH] = filePath; - if (isOptional && !includeSrc.hash) { - // optional includes of whole files have been handled, but segments still need to be processed - delete element.attribs.optional; - } - if (isDynamic) { - element.name = 'panel'; - element.attribs.src = filePath; - element.attribs['no-close'] = true; - element.attribs['no-switch'] = true; - if (includeSrc.hash) { - element.attribs.fragment = includeSrc.hash.substring(1); - } - element.attribs.header = element.attribs.name || ''; - delete element.attribs.dynamic; - this.dynamicIncludeSrc.push({ from: context.cwf, to: actualFilePath, asIfTo: element.attribs.src }); - return element; - } - if (isUrl) { - return element; // only keep url path for dynamic - } - this.staticIncludeSrc.push({ from: context.cwf, to: actualFilePath }); - const isIncludeSrcMd = utils.isMarkdownFileExt(utils.getExt(filePath)); - if (isIncludeSrcMd && context.source === 'html') { - // HTML include markdown, use special tag to indicate markdown code. - element.name = 'markdown'; - } - const { content, childContext, userDefinedVariables } - = this._renderIncludeFile(actualFilePath, element, context, config, filePath); - childContext.source = isIncludeSrcMd ? 'md' : 'html'; - childContext.callStack.push(context.cwf); - if (!Parser.PROCESSED_INNER_VARIABLES.has(filePath)) { - Parser.PROCESSED_INNER_VARIABLES.add(filePath); - this._extractInnerVariables(content, childContext, config); - } - const innerVariables = this.getImportedVariableMap(filePath); - const fileContent = nunjucks.renderString(content, { ...userDefinedVariables, ...innerVariables }); - // Delete variable attributes in include - Object.keys(element.attribs).forEach((attribute) => { - if (attribute.startsWith('var-')) { - delete element.attribs[attribute]; - } - }); - delete element.attribs.boilerplate; - delete element.attribs.src; - delete element.attribs.inline; - delete element.attribs.trim; - if (includeSrc.hash) { - // directly get segment from the src - const segmentSrc = cheerio.parseHTML(fileContent, true); - const $ = cheerio.load(segmentSrc); - const hashContent = $(includeSrc.hash).html(); - let actualContent = (hashContent && isTrim) ? hashContent.trim() : hashContent; - if (actualContent === null) { - if (isOptional) { - // set empty content for optional segment include that does not exist - actualContent = ''; - } else { - const error - = new Error(`No such segment '${includeSrc.hash.substring(1)}' in file: ${actualFilePath}` - + `\nMissing reference in ${element.attribs[ATTRIB_CWF]}`); - this._onError(error); - return Parser.createErrorNode(element, error); - } - } - if (isOptional) { - // optional includes of segments have now been handled, so delete the attribute - delete element.attribs.optional; - } - if (isIncludeSrcMd) { - if (context.mode === 'include') { - actualContent = isInline ? actualContent : utils.wrapContent(actualContent, '\n\n', '\n'); - } else { - actualContent = md.render(actualContent); - } - actualContent = Parser._rebaseReferenceForStaticIncludes(actualContent, element, config); - } - const wrapperType = isInline ? 'span' : 'div'; - element.children - = cheerio.parseHTML( - `<${wrapperType} data-included-from="${filePath}">${actualContent}`, - true); - } else { - let actualContent = (fileContent && isTrim) ? fileContent.trim() : fileContent; - if (isIncludeSrcMd) { - if (context.mode === 'include') { - actualContent = isInline ? actualContent : utils.wrapContent(actualContent, '\n\n', '\n'); - } else { - actualContent = md.render(actualContent); - } - } - const wrapperType = isInline ? 'span' : 'div'; - element.children - = cheerio.parseHTML( - `<${wrapperType} data-included-from="${filePath}">${actualContent}`, - true); - } - if (element.children && element.children.length > 0) { - if (childContext.callStack.length > CyclicReferenceError.MAX_RECURSIVE_DEPTH) { - const error = new CyclicReferenceError(childContext.callStack); - this._onError(error); - return Parser.createErrorNode(element, error); - } - element.children = element.children.map(e => self._preprocess(e, childContext, config)); - } - } else if ((element.name === 'panel') && hasSrc) { - if (!isUrl && includeSrc.hash) { - element.attribs.fragment = includeSrc.hash.substring(1); // save hash to fragment attribute - } - element.attribs.src = filePath; - this.dynamicIncludeSrc.push({ from: context.cwf, to: actualFilePath, asIfTo: filePath }); - return element; - } else if (element.name === 'variable' || element.name === 'import') { - return Parser.createEmptyNode(); - } else { - if (element.name === 'body') { - // eslint-disable-next-line no-console - console.warn(` tag found in ${element.attribs[ATTRIB_CWF]}. This may cause formatting errors.`); - } - if (element.children && element.children.length > 0) { - element.children = element.children.map(e => self._preprocess(e, context, config)); - } + extractInnerVariablesIfNotProcessed(content, childContext, config, filePathToExtract) { + if (!Parser.PROCESSED_INNER_VARIABLES.has(filePathToExtract)) { + Parser.PROCESSED_INNER_VARIABLES.add(filePathToExtract); + this._extractInnerVariables(content, childContext, config); } - return element; } processDynamicResources(context, html) { @@ -496,7 +303,7 @@ class Parser { } const fileExists = utils.fileExists(element.attribs.src) || utils.fileExists( - Parser.calculateBoilerplateFilePath( + urlUtils.calculateBoilerplateFilePath( element.attribs.boilerplate, element.attribs.src, config)); if (fileExists) { @@ -546,69 +353,6 @@ class Parser { } } - preprocess(file, pageData, context, config) { - const currentContext = context; - currentContext.mode = 'include'; - currentContext.callStack = []; - - return new Promise((resolve, reject) => { - const handler = new htmlparser.DomHandler((error, dom) => { - if (error) { - reject(error); - return; - } - const nodes = dom.map((d) => { - let processed; - try { - processed = this._preprocess(d, currentContext, config); - } catch (err) { - err.message += `\nError while preprocessing '${file}'`; - this._onError(err); - processed = Parser.createErrorNode(d, err); - } - return processed; - }); - resolve(cheerio.html(nodes)); - }); - - const parser = new htmlparser.Parser(handler, { - xmlMode: true, - decodeEntities: true, - }); - - const { parent, relative } = Parser.calculateNewBaseUrls(file, config.rootPath, config.baseUrlMap); - const userDefinedVariables = config.userDefinedVariablesMap[path.resolve(parent, relative)]; - const { additionalVariables } = config; - const pageVariables = this.extractPageVariables(file, pageData, userDefinedVariables, {}); - - let fileContent = nunjucks.renderString(pageData, - { - ...pageVariables, - ...userDefinedVariables, - ...additionalVariables, - }, - { path: file }); - this._extractInnerVariables(fileContent, currentContext, config); - const innerVariables = this.getImportedVariableMap(currentContext.cwf); - fileContent = nunjucks.renderString(fileContent, { - ...userDefinedVariables, - ...additionalVariables, - ...innerVariables, - }); - const fileExt = utils.getExt(file); - if (utils.isMarkdownFileExt(fileExt)) { - currentContext.source = 'md'; - parser.parseComplete(fileContent.toString()); - } else if (fileExt === 'html') { - currentContext.source = 'html'; - parser.parseComplete(fileContent); - } else { - const error = new Error(`Unsupported File Extension: '${fileExt}'`); - reject(error); - } - }); - } - includeFile(file, config) { const context = {}; context.cwf = config.cwf || file; // current working file @@ -623,11 +367,11 @@ class Parser { const nodes = dom.map((d) => { let processed; try { - processed = this._preprocess(d, context, config); + processed = componentPreprocessor.preProcessComponent(d, context, config, this); } catch (err) { err.message += `\nError while preprocessing '${file}'`; this._onError(err); - processed = Parser.createErrorNode(d, err); + processed = utils.createErrorNode(d, err); } return processed; }); @@ -640,7 +384,7 @@ class Parser { let actualFilePath = file; if (!utils.fileExists(file)) { const boilerplateFilePath - = Parser.calculateBoilerplateFilePath(path.basename(file), file, config); + = urlUtils.calculateBoilerplateFilePath(path.basename(file), file, config); if (utils.fileExists(boilerplateFilePath)) { actualFilePath = boilerplateFilePath; } @@ -652,7 +396,7 @@ class Parser { return; } const { parent, relative } - = Parser.calculateNewBaseUrls(file, config.rootPath, config.baseUrlMap); + = urlUtils.calculateNewBaseUrls(file, config.rootPath, config.baseUrlMap); const userDefinedVariables = config.userDefinedVariablesMap[path.resolve(parent, relative)]; const pageVariables = this.extractPageVariables(file, data, userDefinedVariables, {}); let fileContent @@ -685,15 +429,71 @@ class Parser { return new Promise((resolve, reject) => { let actualFilePath = file; if (!utils.fileExists(file)) { - const boilerplateFilePath = Parser.calculateBoilerplateFilePath(path.basename(file), file, config); + const boilerplateFilePath = urlUtils.calculateBoilerplateFilePath(path.basename(file), file, config); if (utils.fileExists(boilerplateFilePath)) { actualFilePath = boilerplateFilePath; } } - this.preprocess(actualFilePath, pageData, context, config) - .then(resolve) - .catch(reject); + const currentContext = context; + currentContext.mode = 'include'; + currentContext.callStack = []; + + const handler = new htmlparser.DomHandler((error, dom) => { + if (error) { + reject(error); + return; + } + const nodes = dom.map((d) => { + let processed; + try { + processed = componentPreprocessor.preProcessComponent(d, currentContext, config, this); + } catch (err) { + err.message += `\nError while preprocessing '${actualFilePath}'`; + this._onError(err); + processed = utils.createErrorNode(d, err); + } + return processed; + }); + resolve(cheerio.html(nodes)); + }); + + const parser = new htmlparser.Parser(handler, { + xmlMode: true, + decodeEntities: true, + }); + + const { parent, relative } = urlUtils.calculateNewBaseUrls(actualFilePath, + config.rootPath, config.baseUrlMap); + const userDefinedVariables = config.userDefinedVariablesMap[path.resolve(parent, relative)]; + const { additionalVariables } = config; + const pageVariables = this.extractPageVariables(actualFilePath, pageData, userDefinedVariables, {}); + + let fileContent = nunjucks.renderString(pageData, + { + ...pageVariables, + ...userDefinedVariables, + ...additionalVariables, + }, + { path: actualFilePath }); + this._extractInnerVariables(fileContent, currentContext, config); + const innerVariables = this.getImportedVariableMap(currentContext.cwf); + fileContent = nunjucks.renderString(fileContent, { + ...userDefinedVariables, + ...additionalVariables, + ...innerVariables, + }); + const fileExt = utils.getExt(actualFilePath); + if (utils.isMarkdownFileExt(fileExt)) { + currentContext.source = 'md'; + parser.parseComplete(fileContent.toString()); + } else if (fileExt === 'html') { + currentContext.source = 'html'; + parser.parseComplete(fileContent); + } else { + const error = new Error(`Unsupported File Extension: '${fileExt}'`); + reject(error); + } }); } @@ -714,7 +514,7 @@ class Parser { } catch (err) { err.message += `\nError while rendering '${file}'`; this._onError(err); - parsed = Parser.createErrorNode(d, err); + parsed = utils.createErrorNode(d, err); } return parsed; }); @@ -804,34 +604,32 @@ class Parser { // rebase current element if (element.attribs[ATTRIB_INCLUDE_PATH]) { const filePath = element.attribs[ATTRIB_INCLUDE_PATH]; - let newBase = Parser.calculateNewBaseUrls(filePath, this.rootPath, this.baseUrlMap); - if (newBase) { - const { relative, parent } = newBase; + let newBaseUrl = urlUtils.calculateNewBaseUrls(filePath, this.rootPath, this.baseUrlMap); + if (newBaseUrl) { + const { relative, parent } = newBaseUrl; // eslint-disable-next-line no-param-reassign foundBase[parent] = relative; } + // override with parent's base const combinedBases = { ...childrenBase, ...foundBase }; const bases = Object.keys(combinedBases); if (bases.length !== 0) { // need to rebase - newBase = combinedBases[bases[0]]; - const { children } = element; - if (children) { - const currentBase - = Parser.calculateNewBaseUrls(element.attribs[ATTRIB_CWF], this.rootPath, this.baseUrlMap); - if (currentBase) { - if (currentBase.relative !== newBase) { - cheerio.prototype.options.xmlMode = false; - const newBaseUrl = `{{hostBaseUrl}}/${newBase}`; - const rendered = nunjucks.renderString(cheerio.html(children), { - // This is to prevent the nunjuck call from converting {{hostBaseUrl}} to an empty string - // and let the hostBaseUrl value be injected later. - hostBaseUrl: '{{hostBaseUrl}}', - baseUrl: newBaseUrl, - }, { path: filePath }); - element.children = cheerio.parseHTML(rendered, true); - cheerio.prototype.options.xmlMode = true; - } + newBaseUrl = combinedBases[bases[0]]; + if (element.children) { + // ATTRIB_CWF is where the element was preprocessed + const currentBase = urlUtils.calculateNewBaseUrls(element.attribs[ATTRIB_CWF], + this.rootPath, this.baseUrlMap); + if (currentBase && currentBase.relative !== newBaseUrl) { + cheerio.prototype.options.xmlMode = false; + const rendered = nunjucks.renderString(cheerio.html(element.children), { + // This is to prevent the nunjuck call from converting {{hostBaseUrl}} to an empty string + // and let the hostBaseUrl value be injected later. + hostBaseUrl: '{{hostBaseUrl}}', + baseUrl: `{{hostBaseUrl}}/${newBaseUrl}`, + }, { path: filePath }); + element.children = cheerio.parseHTML(rendered, true); + cheerio.prototype.options.xmlMode = true; } } } @@ -841,75 +639,12 @@ class Parser { return element; } - static _rebaseReferenceForStaticIncludes(pageData, element, config) { - if (!config) { - return pageData; - } - if (!pageData.includes('{{baseUrl}}')) { - return pageData; - } - const filePath = element.attribs[ATTRIB_INCLUDE_PATH]; - const fileBase = Parser.calculateNewBaseUrls(filePath, config.rootPath, config.baseUrlMap); - if (!fileBase.relative) { - return pageData; - } - const currentPath = element.attribs[ATTRIB_CWF]; - const currentBase = Parser.calculateNewBaseUrls(currentPath, config.rootPath, config.baseUrlMap); - if (currentBase.relative === fileBase.relative) { - return pageData; - } - const newBase = fileBase.relative; - const newBaseUrl = `{{hostBaseUrl}}/${newBase}`; - return nunjucks.renderString(pageData, { baseUrl: newBaseUrl }, { path: filePath }); - } - static resetVariables() { Parser.VARIABLE_LOOKUP.clear(); Parser.FILE_ALIASES.clear(); Parser.PROCESSED_INNER_VARIABLES.clear(); } - /** - * @throws Will throw an error if a non-absolute path or path outside the root is given - */ - static calculateNewBaseUrls(filePath, root, lookUp) { - if (!path.isAbsolute(filePath)) { - throw new Error(`Non-absolute path given to calculateNewBaseUrls: '${filePath}'`); - } - if (!pathIsInside(filePath, root)) { - throw new Error(`Path given '${filePath}' is not in root '${root}'`); - } - function calculate(file, result) { - if (file === root) { - return { relative: path.relative(root, root), parent: root }; - } - const parent = path.dirname(file); - if (lookUp.has(parent) && result.length === 1) { - return { relative: path.relative(parent, result[0]), parent }; - } else if (lookUp.has(parent)) { - return calculate(parent, [parent]); - } - return calculate(parent, result); - } - - return calculate(filePath, []); - } - - static calculateBoilerplateFilePath(pathInBoilerplates, asIfAt, config) { - const { parent, relative } = Parser.calculateNewBaseUrls(asIfAt, config.rootPath, config.baseUrlMap); - return path.resolve(parent, relative, BOILERPLATE_FOLDER_NAME, pathInBoilerplates); - } - - static createErrorNode(element, error) { - const errorElement = cheerio.parseHTML(utils.createErrorElement(error), true)[0]; - return Object.assign(element, _.pick(errorElement, ['name', 'attribs', 'children'])); - } - - static createEmptyNode() { - const emptyElement = cheerio.parseHTML('
', true)[0]; - return emptyElement; - } - static isText(element) { return element.type === 'text' || element.type === 'comment'; } diff --git a/src/lib/markbind/src/preprocessors/componentPreprocessor.js b/src/lib/markbind/src/preprocessors/componentPreprocessor.js new file mode 100644 index 0000000000..7e3e642a33 --- /dev/null +++ b/src/lib/markbind/src/preprocessors/componentPreprocessor.js @@ -0,0 +1,436 @@ +const cheerio = require('cheerio'); +const nunjucks = require('nunjucks'); +const path = require('path'); +const url = require('url'); + +const CyclicReferenceError = require('../handlers/cyclicReferenceError.js'); + +const md = require('../lib/markdown-it'); +const utils = require('../utils'); +const urlUtils = require('../utils/urls'); + +const _ = {}; +_.has = require('lodash/has'); +_.isEmpty = require('lodash/isEmpty'); + +const { + ATTRIB_INCLUDE_PATH, + ATTRIB_CWF, +} = require('../constants'); + + +/* + * All components + */ + + +function _preProcessAllComponents(node, context) { + const element = node; + + // We do this since element.attribs is undefined if it does not exist + element.attribs = element.attribs || {}; + + element.attribs[ATTRIB_CWF] = path.resolve(context.cwf); +} + + +/* + * Thumbnails + */ + + +// TODO move this to componentParser +function _preProcessThumbnail(node) { + const element = node; + + const isImage = _.has(element.attribs, 'src') && element.attribs.src !== ''; + if (isImage) { + return element; + } + + const text = _.has(element.attribs, 'text') ? element.attribs.text : ''; + if (text === '') { + return element; + } + const renderedText = md.renderInline(text); + element.children = cheerio.parseHTML(renderedText); + + return element; +} + + +/* + * Common panel and include helper functions + */ + + +function _getBoilerplateFilePath(node, config, filePath) { + const element = node; + + const isBoilerplate = _.has(element.attribs, 'boilerplate'); + if (isBoilerplate) { + element.attribs.boilerplate = element.attribs.boilerplate || path.basename(filePath); + + return urlUtils.calculateBoilerplateFilePath(element.attribs.boilerplate, filePath, config); + } + + return undefined; +} + +/** + * Returns either an empty or error node depending on whether the file specified exists + * and whether this file is optional if not. + */ +function _getFileExistsNode(element, context, config, parser, actualFilePath, isOptional = false) { + if (!utils.fileExists(actualFilePath)) { + if (isOptional) { + return utils.createEmptyNode(); + } + + parser.missingIncludeSrc.push({ from: context.cwf, to: actualFilePath }); + const error = new Error( + `No such file: ${actualFilePath}\nMissing reference in ${element.attribs[ATTRIB_CWF]}`); + parser._onError(error); + + return utils.createErrorNode(element, error); + } + + return false; +} + +/** + * Retrieves several flags and file paths from the src attribute specified in the element. + */ +function _getSrcFlagsAndFilePaths(element, context, config) { + const isUrl = utils.isUrl(element.attribs.src); + + // We do this even if the src is not a url to get the hash, if any + const includeSrc = url.parse(element.attribs.src); + + const filePath = isUrl + ? element.attribs.src + : path.resolve(path.dirname(context.cwf), decodeURIComponent(includeSrc.path)); + + const boilerplateFilePath = _getBoilerplateFilePath(element, config, filePath); + const actualFilePath = boilerplateFilePath || filePath; + + return { + isUrl, + hash: includeSrc.hash, + filePath, + boilerplateFilePath, + actualFilePath, + }; +} + + +/* + * Panels + */ + + +/** + * PreProcesses panels with a src attribute specified. + * Replaces the panel with an error node if the src is invalid. + * Otherwise, sets the fragment attribute of the panel as parsed from the src, + * and adds the appropriate include. + */ +function _preProcessPanel(node, context, config, parser) { + const element = node; + + const hasSrc = _.has(element.attribs, 'src'); + if (!hasSrc) { + if (element.children && element.children.length > 0) { + // eslint-disable-next-line no-use-before-define + element.children = element.children.map(e => preProcessComponent(e, context, config, parser)); + } + + return element; + } + + const { + isUrl, + hash, + filePath, + boilerplateFilePath, + actualFilePath, + } = _getSrcFlagsAndFilePaths(element, context, config); + + const fileExistsNode = _getFileExistsNode(element, context, config, parser, actualFilePath); + if (fileExistsNode) { + return fileExistsNode; + } + + if (!isUrl && hash) { + element.attribs.fragment = hash.substring(1); + } + + element.attribs.src = filePath; + + // TODO do we need boilerplateIncludeSrc? + if (boilerplateFilePath) { + parser.boilerplateIncludeSrc.push({ from: context.cwf, to: boilerplateFilePath }); + } + parser.dynamicIncludeSrc.push({ from: context.cwf, to: actualFilePath, asIfTo: filePath }); + + return element; +} + + +/* + * Includes + */ + + +function _rebaseReferenceForStaticIncludes(pageData, element, config) { + if (!config) return pageData; + + if (!pageData.includes('{{baseUrl}}')) return pageData; + + const filePath = element.attribs[ATTRIB_INCLUDE_PATH]; + const fileBase = urlUtils.calculateNewBaseUrls(filePath, config.rootPath, config.baseUrlMap); + + if (!fileBase.relative) return pageData; + + const currentPath = element.attribs[ATTRIB_CWF]; + const currentBase = urlUtils.calculateNewBaseUrls(currentPath, config.rootPath, config.baseUrlMap); + + if (currentBase.relative === fileBase.relative) return pageData; + + const newBase = fileBase.relative; + const newBaseUrl = `{{hostBaseUrl}}/${newBase}`; + + return nunjucks.renderString(pageData, { baseUrl: newBaseUrl }, { path: filePath }); +} + +function _deleteIncludeAttributes(node) { + const element = node; + + // Delete variable attributes in include tags as they are no longer needed + // e.g. '' + Object.keys(element.attribs).forEach((attribute) => { + if (attribute.startsWith('var-')) { + delete element.attribs[attribute]; + } + }); + + delete element.attribs.boilerplate; + delete element.attribs.src; + delete element.attribs.inline; + delete element.attribs.trim; +} + +/** + * Check if the current working file's source type is a html file, but the include source is in markdown. + * Use a special tag to indicate markdown code for parsing later if so, + * as html files are not passed through markdown-it. + * @return Whether the include source is in markdown + */ +function _isHtmlIncludingMarkdown(node, context, filePath) { + const element = node; + const isIncludeSrcMd = utils.isMarkdownFileExt(utils.getExt(filePath)); + if (isIncludeSrcMd && context.source === 'html') { + element.name = 'markdown'; + } + + return isIncludeSrcMd; +} + +function _preprocessDynamicInclude(node, context, parser, hash, filePath, actualFilePath) { + const element = node; + element.name = 'panel'; + element.attribs.src = filePath; + + element.attribs['no-close'] = true; + element.attribs['no-switch'] = true; + element.attribs.header = element.attribs.name || ''; + + if (hash) { + element.attribs.fragment = hash.substring(1); + } + + parser.dynamicIncludeSrc.push({ from: context.cwf, to: actualFilePath, asIfTo: element.attribs.src }); + delete element.attribs.dynamic; + + return element; +} + +/** + * PreProcesses includes. + * Replaces it with an error node if the specified src is invalid, + * or an empty node if the src is invalid but optional. + * Replaces it with a panel with the appropriate content if the dynamic attribute is specified. + */ +function _preprocessInclude(node, context, config, parser) { + const element = node; + + if (_.isEmpty(element.attribs.src)) { + const error = new Error(`Empty src attribute in include in: ${element.attribs[ATTRIB_CWF]}`); + parser._onError(error); + return utils.createErrorNode(element, error); + } + + const { + isUrl, + hash, + filePath, + boilerplateFilePath, + actualFilePath, + } = _getSrcFlagsAndFilePaths(element, context, config); + + const isOptional = _.has(element.attribs, 'optional'); + const fileExistsNode = _getFileExistsNode(element, context, config, parser, actualFilePath, isOptional); + if (fileExistsNode) return fileExistsNode; + + // optional includes of whole files have been handled, + // but segments still need to be processed further down + if (isOptional && !hash) { + delete element.attribs.optional; + } + + // TODO do we need boilerplateIncludeSrc? + if (boilerplateFilePath) { + parser.boilerplateIncludeSrc.push({ from: context.cwf, to: boilerplateFilePath }); + } + + const isInline = _.has(element.attribs, 'inline'); + const isTrim = _.has(element.attribs, 'trim'); + const isDynamic = _.has(element.attribs, 'dynamic'); + + element.name = isInline ? 'span' : 'div'; + element.attribs[ATTRIB_INCLUDE_PATH] = filePath; + + // Use a 'plain' panel for a dynamic include + if (isDynamic) return _preprocessDynamicInclude(element, context, parser, hash, filePath, actualFilePath); + + // No need to process url contents + if (isUrl) return element; + + parser.staticIncludeSrc.push({ from: context.cwf, to: actualFilePath }); + + const isIncludeSrcMd = _isHtmlIncludingMarkdown(element, context, filePath); + + const { content, childContext, userDefinedVariables } + = parser._renderIncludeFile(actualFilePath, element, context, config, filePath); + childContext.source = isIncludeSrcMd ? 'md' : 'html'; + childContext.callStack.push(context.cwf); + parser.extractInnerVariablesIfNotProcessed(content, childContext, config, filePath); + + const innerVariables = parser.getImportedVariableMap(filePath); + const fileContent = nunjucks.renderString(content, { ...userDefinedVariables, ...innerVariables }); + + _deleteIncludeAttributes(element); + + // Process sources with or without hash, retrieving and appending + // the appropriate children to a wrapped include element + + let actualContent; + + if (hash) { + // Keep scripts in the fileContent + const src = cheerio.parseHTML(fileContent, true); + const $ = cheerio.load(src); + const hashContent = $(hash).html(); + + actualContent = (hashContent && isTrim) ? hashContent.trim() : hashContent; + + if (actualContent === null && isOptional) { + // Use empty content if it is optional + actualContent = ''; + } else if (actualContent === null) { + const hashSrcWithoutHash = hash.substring(1); + const error = new Error(`No such segment '${hashSrcWithoutHash}' in file: ${actualFilePath}\n` + + `Missing reference in ${element.attribs[ATTRIB_CWF]}`); + parser._onError(error); + + return utils.createErrorNode(element, error); + } + + // optional includes of segments have now been handled, so delete the attribute + if (isOptional) delete element.attribs.optional; + } else { + actualContent = (fileContent && isTrim) ? fileContent.trim() : fileContent; + } + + if (isIncludeSrcMd) { + actualContent = isInline ? actualContent : `\n\n${actualContent}\n`; + } + actualContent = _rebaseReferenceForStaticIncludes(actualContent, element, config); + + // Flag with a data-included-from flag with the source filePath for calculating + // the file path of dynamic resources ( images, anchors, plugin sources, etc. ) later + const wrapperType = isInline ? 'span' : 'div'; + const childrenHtml = `<${wrapperType} data-included-from="${filePath}">${actualContent}`; + element.children = cheerio.parseHTML(childrenHtml, true); + + if (element.children && element.children.length > 0) { + if (childContext.callStack.length > CyclicReferenceError.MAX_RECURSIVE_DEPTH) { + const error = new CyclicReferenceError(childContext.callStack); + parser._onError(error); + return utils.createErrorNode(element, error); + } + + // eslint-disable-next-line no-use-before-define + element.children = element.children.map(e => preProcessComponent(e, childContext, config, parser)); + } + + return element; +} + + +/* + * Variable and imports + */ + +function _preprocessVariables() { + return utils.createEmptyNode(); +} + + +/* + * Body + */ + + +function _preprocessBody(node) { + // eslint-disable-next-line no-console + console.warn(` tag found in ${node.attribs[ATTRIB_CWF]}. This may cause formatting errors.`); +} + + +/* + * API + */ + + +function preProcessComponent(node, context, config, parser) { + const element = node; + + _preProcessAllComponents(element, context); + + switch (element.name) { + case 'thumbnail': + return _preProcessThumbnail(element); + case 'panel': + return _preProcessPanel(element, context, config, parser); + case 'variable': + case 'import': + return _preprocessVariables(); + case 'include': + return _preprocessInclude(element, context, config, parser); + case 'body': + _preprocessBody(element); + // eslint-disable-next-line no-fallthrough + default: + // preprocess children + if (element.children && element.children.length > 0) { + element.children = element.children.map(e => preProcessComponent(e, context, config, parser)); + } + return element; + } +} + + +module.exports = { + preProcessComponent, +}; diff --git a/src/lib/markbind/src/utils.js b/src/lib/markbind/src/utils/index.js similarity index 81% rename from src/lib/markbind/src/utils.js rename to src/lib/markbind/src/utils/index.js index 4e6eee399f..b46bebf05f 100644 --- a/src/lib/markbind/src/utils.js +++ b/src/lib/markbind/src/utils/index.js @@ -1,7 +1,13 @@ +const cheerio = require('cheerio'); const fs = require('fs'); const path = require('path'); -const { markdownFileExts } = require('./constants'); +const _ = {}; +_.pick = require('lodash/pick'); + +const { + markdownFileExts, +} = require('../constants'); module.exports = { getCurrentDirectoryBase() { @@ -40,13 +46,6 @@ module.exports = { return ext; }, - wrapContent(content, front, tail) { - if (tail === undefined) { - return front + content + front; - } - return front + content + tail; - }, - setExtension(filename, ext) { return path.join( path.dirname(filename), @@ -69,8 +68,14 @@ module.exports = { || filePath.includes('{{hostBaseUrl}}'); }, - createErrorElement(error) { - return `
${error.message}
`; + createErrorNode(element, error) { + const errorElement = cheerio.parseHTML( + `
${error.message}
`, true)[0]; + return Object.assign(element, _.pick(errorElement, ['name', 'attribs', 'children'])); + }, + + createEmptyNode() { + return cheerio.parseHTML('
', true)[0]; }, /** diff --git a/src/lib/markbind/src/utils/urls.js b/src/lib/markbind/src/utils/urls.js new file mode 100644 index 0000000000..8fe193c865 --- /dev/null +++ b/src/lib/markbind/src/utils/urls.js @@ -0,0 +1,55 @@ +const path = require('path'); +const pathIsInside = require('path-is-inside'); + +const _ = {}; +_.pick = require('lodash/pick'); + +const { + BOILERPLATE_FOLDER_NAME, +} = require('../constants'); + +/** + * @param filePath The absolute file path to look up that is nested inside the root directory + * @param root The base directory from which to terminate the look up + * @param lookUp The set of urls representing the sites' base directories + * @return An object containing + * 1. The parent site's path of that the immediate parent site of the specified filePath, + * or the root site's path if there is none + * 2. The relative path from (1) to the immediate parent site of the specified filePath + * @throws If a non-absolute path or path outside the root is given + */ +function calculateNewBaseUrls(filePath, root, lookUp) { + if (!path.isAbsolute(filePath)) { + throw new Error(`Non-absolute path given to calculateNewBaseUrls: '${filePath}'`); + } + if (!pathIsInside(filePath, root)) { + throw new Error(`Path given '${filePath}' is not in root '${root}'`); + } + + function calculate(file, result) { + if (file === root) { + return { relative: '', parent: root }; + } + + const parent = path.dirname(file); + if (lookUp.has(parent)) { + return result + ? { relative: path.relative(parent, result), parent } + : calculate(parent, parent); + } + + return calculate(parent, result); + } + + return calculate(filePath, undefined); +} + +function calculateBoilerplateFilePath(pathInBoilerplates, asIfAt, config) { + const { parent, relative } = calculateNewBaseUrls(asIfAt, config.rootPath, config.baseUrlMap); + return path.resolve(parent, relative, BOILERPLATE_FOLDER_NAME, pathInBoilerplates); +} + +module.exports = { + calculateBoilerplateFilePath, + calculateNewBaseUrls, +};