diff options
author | Michael Catanzaro <mcatanzaro@gnome.org> | 2020-11-18 14:43:20 -0600 |
---|---|---|
committer | Michael Catanzaro <mcatanzaro@gnome.org> | 2020-11-18 14:43:20 -0600 |
commit | fe47931e18b3e2c919803e7dbc1849931295e785 (patch) | |
tree | 8b8d769a83d44affc0c9973ef727e592becd23fa | |
parent | 0c2f7feacd4cc50ac3f9504252e1a2aa552ccd00 (diff) | |
download | epiphany-mcatanzaro/highlight.js-10.4.0.tar.gz |
Update to highlight.js 10.4.0mcatanzaro/highlight.js-10.4.0
-rw-r--r-- | third-party/highlightjs/highlight.js | 4428 |
1 files changed, 2388 insertions, 2040 deletions
diff --git a/third-party/highlightjs/highlight.js b/third-party/highlightjs/highlight.js index 08b11e98c..bb7a3c759 100644 --- a/third-party/highlightjs/highlight.js +++ b/third-party/highlightjs/highlight.js @@ -1,2209 +1,2265 @@ /* - Highlight.js 10.2.0 (519f7798) + Highlight.js 10.4.0 (4055826e) License: BSD-3-Clause Copyright (c) 2006-2020, Ivan Sagalaev */ var hljs = (function () { - 'use strict'; - - // https://github.com/substack/deep-freeze/blob/master/index.js - - function deepFreeze(obj) { - Object.freeze(obj); - - var objIsFunction = typeof obj === 'function'; - - Object.getOwnPropertyNames(obj).forEach(function(prop) { - if (Object.hasOwnProperty.call(obj, prop) - && obj[prop] !== null - && (typeof obj[prop] === "object" || typeof obj[prop] === "function") - // IE11 fix: https://github.com/highlightjs/highlight.js/issues/2318 - // TODO: remove in the future - && (objIsFunction ? prop !== 'caller' && prop !== 'callee' && prop !== 'arguments' : true) - && !Object.isFrozen(obj[prop])) { - deepFreeze(obj[prop]); - } - }); + 'use strict'; + + function deepFreeze(obj) { + if (obj instanceof Map) { + obj.clear = obj.delete = obj.set = function () { + throw new Error('map is read-only'); + }; + } else if (obj instanceof Set) { + obj.add = obj.clear = obj.delete = function () { + throw new Error('set is read-only'); + }; + } - return obj; - } + // Freeze self + Object.freeze(obj); - class Response { - /** - * @param {CompiledMode} mode - */ - constructor(mode) { - // eslint-disable-next-line no-undefined - if (mode.data === undefined) mode.data = {}; + Object.getOwnPropertyNames(obj).forEach(function (name) { + var prop = obj[name]; - this.data = mode.data; - } + // Freeze prop if it is an object + if (typeof prop == 'object' && !Object.isFrozen(prop)) { + deepFreeze(prop); + } + }); - ignoreMatch() { - this.ignore = true; + return obj; } - } - /** - * @param {string} value - * @returns {string} - */ - function escapeHTML(value) { - return value - .replace(/&/g, '&') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/"/g, '"') - .replace(/'/g, '''); - } + var deepFreezeEs6 = deepFreeze; + var _default = deepFreeze; + deepFreezeEs6.default = _default; - /** - * performs a shallow merge of multiple objects into one - * - * @template T - * @param {T} original - * @param {Record<string,any>[]} objects - * @returns {T} a single new object - */ - function inherit(original, ...objects) { - /** @type Record<string,any> */ - var result = {}; + class Response { + + constructor(mode) { + // eslint-disable-next-line no-undefined + if (mode.data === undefined) mode.data = {}; - for (const key in original) { - result[key] = original[key]; - } - objects.forEach(function(obj) { - for (const key in obj) { - result[key] = obj[key]; + this.data = mode.data; } - }); - return /** @type {T} */ (result); - } - /* Stream merging */ + ignoreMatch() { + this.ignore = true; + } + } - /** - * @typedef Event - * @property {'start'|'stop'} event - * @property {number} offset - * @property {Node} node - */ + /** + * @param {string} value + * @returns {string} + */ + function escapeHTML(value) { + return value + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + } - /** - * @param {Node} node - */ - function tag(node) { - return node.nodeName.toLowerCase(); - } + /** + * performs a shallow merge of multiple objects into one + * + * @template T + * @param {T} original + * @param {Record<string,any>[]} objects + * @returns {T} a single new object + */ + function inherit(original, ...objects) { + /** @type Record<string,any> */ + const result = Object.create(null); - /** - * @param {Node} node - */ - function nodeStream(node) { - /** @type Event[] */ - var result = []; - (function _nodeStream(node, offset) { - for (var child = node.firstChild; child; child = child.nextSibling) { - if (child.nodeType === 3) { - offset += child.nodeValue.length; - } else if (child.nodeType === 1) { - result.push({ - event: 'start', - offset: offset, - node: child - }); - offset = _nodeStream(child, offset); - // Prevent void elements from having an end tag that would actually - // double them in the output. There are more void elements in HTML - // but we list only those realistically expected in code display. - if (!tag(child).match(/br|hr|img|input/)) { - result.push({ - event: 'stop', - offset: offset, - node: child - }); - } - } + for (const key in original) { + result[key] = original[key]; } - return offset; - })(node, 0); - return result; - } + objects.forEach(function(obj) { + for (const key in obj) { + result[key] = obj[key]; + } + }); + return /** @type {T} */ (result); + } - /** - * @param {any} original - the original stream - * @param {any} highlighted - stream of the highlighted source - * @param {string} value - the original source itself - */ - function mergeStreams(original, highlighted, value) { - var processed = 0; - var result = ''; - var nodeStack = []; + /* Stream merging */ - function selectStream() { - if (!original.length || !highlighted.length) { - return original.length ? original : highlighted; - } - if (original[0].offset !== highlighted[0].offset) { - return (original[0].offset < highlighted[0].offset) ? original : highlighted; - } - - /* - To avoid starting the stream just before it should stop the order is - ensured that original always starts first and closes last: - - if (event1 == 'start' && event2 == 'start') - return original; - if (event1 == 'start' && event2 == 'stop') - return highlighted; - if (event1 == 'stop' && event2 == 'start') - return original; - if (event1 == 'stop' && event2 == 'stop') - return highlighted; - - ... which is collapsed to: - */ - return highlighted[0].event === 'start' ? original : highlighted; - } + /** + * @typedef Event + * @property {'start'|'stop'} event + * @property {number} offset + * @property {Node} node + */ /** * @param {Node} node */ - function open(node) { - /** @param {Attr} attr */ - function attr_str(attr) { - return ' ' + attr.nodeName + '="' + escapeHTML(attr.value) + '"'; - } - // @ts-ignore - result += '<' + tag(node) + [].map.call(node.attributes, attr_str).join('') + '>'; + function tag(node) { + return node.nodeName.toLowerCase(); } /** * @param {Node} node */ - function close(node) { - result += '</' + tag(node) + '>'; + function nodeStream(node) { + /** @type Event[] */ + const result = []; + (function _nodeStream(node, offset) { + for (let child = node.firstChild; child; child = child.nextSibling) { + if (child.nodeType === 3) { + offset += child.nodeValue.length; + } else if (child.nodeType === 1) { + result.push({ + event: 'start', + offset: offset, + node: child + }); + offset = _nodeStream(child, offset); + // Prevent void elements from having an end tag that would actually + // double them in the output. There are more void elements in HTML + // but we list only those realistically expected in code display. + if (!tag(child).match(/br|hr|img|input/)) { + result.push({ + event: 'stop', + offset: offset, + node: child + }); + } + } + } + return offset; + })(node, 0); + return result; } /** - * @param {Event} event + * @param {any} original - the original stream + * @param {any} highlighted - stream of the highlighted source + * @param {string} value - the original source itself */ - function render(event) { - (event.event === 'start' ? open : close)(event.node); - } + function mergeStreams(original, highlighted, value) { + let processed = 0; + let result = ''; + const nodeStack = []; + + function selectStream() { + if (!original.length || !highlighted.length) { + return original.length ? original : highlighted; + } + if (original[0].offset !== highlighted[0].offset) { + return (original[0].offset < highlighted[0].offset) ? original : highlighted; + } - while (original.length || highlighted.length) { - var stream = selectStream(); - result += escapeHTML(value.substring(processed, stream[0].offset)); - processed = stream[0].offset; - if (stream === original) { /* - On any opening or closing tag of the original markup we first close - the entire highlighted node stack, then render the original tag along - with all the following original tags at the same offset and then - reopen all the tags on the highlighted stack. + To avoid starting the stream just before it should stop the order is + ensured that original always starts first and closes last: + + if (event1 == 'start' && event2 == 'start') + return original; + if (event1 == 'start' && event2 == 'stop') + return highlighted; + if (event1 == 'stop' && event2 == 'start') + return original; + if (event1 == 'stop' && event2 == 'stop') + return highlighted; + + ... which is collapsed to: */ - nodeStack.reverse().forEach(close); - do { - render(stream.splice(0, 1)[0]); - stream = selectStream(); - } while (stream === original && stream.length && stream[0].offset === processed); - nodeStack.reverse().forEach(open); - } else { - if (stream[0].event === 'start') { - nodeStack.push(stream[0].node); - } else { - nodeStack.pop(); - } - render(stream.splice(0, 1)[0]); + return highlighted[0].event === 'start' ? original : highlighted; } - } - return result + escapeHTML(value.substr(processed)); - } - var utils = /*#__PURE__*/Object.freeze({ - __proto__: null, - escapeHTML: escapeHTML, - inherit: inherit, - nodeStream: nodeStream, - mergeStreams: mergeStreams - }); + /** + * @param {Node} node + */ + function open(node) { + /** @param {Attr} attr */ + function attributeString(attr) { + return ' ' + attr.nodeName + '="' + escapeHTML(attr.value) + '"'; + } + // @ts-ignore + result += '<' + tag(node) + [].map.call(node.attributes, attributeString).join('') + '>'; + } - /** - * @typedef {object} Renderer - * @property {(text: string) => void} addText - * @property {(node: Node) => void} openNode - * @property {(node: Node) => void} closeNode - * @property {() => string} value - */ + /** + * @param {Node} node + */ + function close(node) { + result += '</' + tag(node) + '>'; + } - /** @typedef {{kind?: string, sublanguage?: boolean}} Node */ - /** @typedef {{walk: (r: Renderer) => void}} Tree */ - /** */ + /** + * @param {Event} event + */ + function render(event) { + (event.event === 'start' ? open : close)(event.node); + } - const SPAN_CLOSE = '</span>'; + while (original.length || highlighted.length) { + let stream = selectStream(); + result += escapeHTML(value.substring(processed, stream[0].offset)); + processed = stream[0].offset; + if (stream === original) { + /* + On any opening or closing tag of the original markup we first close + the entire highlighted node stack, then render the original tag along + with all the following original tags at the same offset and then + reopen all the tags on the highlighted stack. + */ + nodeStack.reverse().forEach(close); + do { + render(stream.splice(0, 1)[0]); + stream = selectStream(); + } while (stream === original && stream.length && stream[0].offset === processed); + nodeStack.reverse().forEach(open); + } else { + if (stream[0].event === 'start') { + nodeStack.push(stream[0].node); + } else { + nodeStack.pop(); + } + render(stream.splice(0, 1)[0]); + } + } + return result + escapeHTML(value.substr(processed)); + } - /** - * Determines if a node needs to be wrapped in <span> - * - * @param {Node} node */ - const emitsWrappingTags = (node) => { - return !!node.kind; - }; + var utils = /*#__PURE__*/Object.freeze({ + __proto__: null, + escapeHTML: escapeHTML, + inherit: inherit, + nodeStream: nodeStream, + mergeStreams: mergeStreams + }); - /** @type {Renderer} */ - class HTMLRenderer { /** - * Creates a new HTMLRenderer - * - * @param {Tree} parseTree - the parse tree (must support `walk` API) - * @param {{classPrefix: string}} options + * @typedef {object} Renderer + * @property {(text: string) => void} addText + * @property {(node: Node) => void} openNode + * @property {(node: Node) => void} closeNode + * @property {() => string} value */ - constructor(parseTree, options) { - this.buffer = ""; - this.classPrefix = options.classPrefix; - parseTree.walk(this); - } - /** - * Adds texts to the output stream - * - * @param {string} text */ - addText(text) { - this.buffer += escapeHTML(text); - } + /** @typedef {{kind?: string, sublanguage?: boolean}} Node */ + /** @typedef {{walk: (r: Renderer) => void}} Tree */ + /** */ + + const SPAN_CLOSE = '</span>'; /** - * Adds a node open to the output stream (if needed) + * Determines if a node needs to be wrapped in <span> * * @param {Node} node */ - openNode(node) { - if (!emitsWrappingTags(node)) return; + const emitsWrappingTags = (node) => { + return !!node.kind; + }; - let className = node.kind; - if (!node.sublanguage) { - className = `${this.classPrefix}${className}`; + /** @type {Renderer} */ + class HTMLRenderer { + /** + * Creates a new HTMLRenderer + * + * @param {Tree} parseTree - the parse tree (must support `walk` API) + * @param {{classPrefix: string}} options + */ + constructor(parseTree, options) { + this.buffer = ""; + this.classPrefix = options.classPrefix; + parseTree.walk(this); } - this.span(className); - } - /** - * Adds a node close to the output stream (if needed) - * - * @param {Node} node */ - closeNode(node) { - if (!emitsWrappingTags(node)) return; + /** + * Adds texts to the output stream + * + * @param {string} text */ + addText(text) { + this.buffer += escapeHTML(text); + } - this.buffer += SPAN_CLOSE; - } + /** + * Adds a node open to the output stream (if needed) + * + * @param {Node} node */ + openNode(node) { + if (!emitsWrappingTags(node)) return; - /** - * returns the accumulated buffer - */ - value() { - return this.buffer; - } + let className = node.kind; + if (!node.sublanguage) { + className = `${this.classPrefix}${className}`; + } + this.span(className); + } - // helpers + /** + * Adds a node close to the output stream (if needed) + * + * @param {Node} node */ + closeNode(node) { + if (!emitsWrappingTags(node)) return; - /** - * Builds a span element - * - * @param {string} className */ - span(className) { - this.buffer += `<span class="${className}">`; - } - } + this.buffer += SPAN_CLOSE; + } - /** @typedef {{kind?: string, sublanguage?: boolean, children: Node[]} | string} Node */ - /** @typedef {{kind?: string, sublanguage?: boolean, children: Node[]} } DataNode */ - /** */ + /** + * returns the accumulated buffer + */ + value() { + return this.buffer; + } - class TokenTree { - constructor() { - /** @type DataNode */ - this.rootNode = { children: [] }; - this.stack = [this.rootNode]; - } + // helpers - get top() { - return this.stack[this.stack.length - 1]; + /** + * Builds a span element + * + * @param {string} className */ + span(className) { + this.buffer += `<span class="${className}">`; + } } - get root() { return this.rootNode; } + /** @typedef {{kind?: string, sublanguage?: boolean, children: Node[]} | string} Node */ + /** @typedef {{kind?: string, sublanguage?: boolean, children: Node[]} } DataNode */ + /** */ - /** @param {Node} node */ - add(node) { - this.top.children.push(node); - } + class TokenTree { + constructor() { + /** @type DataNode */ + this.rootNode = { children: [] }; + this.stack = [this.rootNode]; + } - /** @param {string} kind */ - openNode(kind) { - /** @type Node */ - const node = { kind, children: [] }; - this.add(node); - this.stack.push(node); - } + get top() { + return this.stack[this.stack.length - 1]; + } + + get root() { return this.rootNode; } - closeNode() { - if (this.stack.length > 1) { - return this.stack.pop(); + /** @param {Node} node */ + add(node) { + this.top.children.push(node); } - // eslint-disable-next-line no-undefined - return undefined; - } - closeAllNodes() { - while (this.closeNode()); - } + /** @param {string} kind */ + openNode(kind) { + /** @type Node */ + const node = { kind, children: [] }; + this.add(node); + this.stack.push(node); + } - toJSON() { - return JSON.stringify(this.rootNode, null, 4); - } + closeNode() { + if (this.stack.length > 1) { + return this.stack.pop(); + } + // eslint-disable-next-line no-undefined + return undefined; + } - /** - * @typedef { import("./html_renderer").Renderer } Renderer - * @param {Renderer} builder - */ - walk(builder) { - // this does not - return this.constructor._walk(builder, this.rootNode); - // this works - // return TokenTree._walk(builder, this.rootNode); + closeAllNodes() { + while (this.closeNode()); + } + + toJSON() { + return JSON.stringify(this.rootNode, null, 4); + } + + /** + * @typedef { import("./html_renderer").Renderer } Renderer + * @param {Renderer} builder + */ + walk(builder) { + // this does not + return this.constructor._walk(builder, this.rootNode); + // this works + // return TokenTree._walk(builder, this.rootNode); + } + + /** + * @param {Renderer} builder + * @param {Node} node + */ + static _walk(builder, node) { + if (typeof node === "string") { + builder.addText(node); + } else if (node.children) { + builder.openNode(node); + node.children.forEach((child) => this._walk(builder, child)); + builder.closeNode(node); + } + return builder; + } + + /** + * @param {Node} node + */ + static _collapse(node) { + if (typeof node === "string") return; + if (!node.children) return; + + if (node.children.every(el => typeof el === "string")) { + // node.text = node.children.join(""); + // delete node.children; + node.children = [node.children.join("")]; + } else { + node.children.forEach((child) => { + TokenTree._collapse(child); + }); + } + } } /** - * @param {Renderer} builder - * @param {Node} node - */ - static _walk(builder, node) { - if (typeof node === "string") { - builder.addText(node); - } else if (node.children) { - builder.openNode(node); - node.children.forEach((child) => this._walk(builder, child)); - builder.closeNode(node); - } - return builder; - } + Currently this is all private API, but this is the minimal API necessary + that an Emitter must implement to fully support the parser. + + Minimal interface: + + - addKeyword(text, kind) + - addText(text) + - addSublanguage(emitter, subLanguageName) + - finalize() + - openNode(kind) + - closeNode() + - closeAllNodes() + - toHTML() + + */ /** - * @param {Node} node + * @implements {Emitter} */ - static _collapse(node) { - if (typeof node === "string") return; - if (!node.children) return; - - if (node.children.every(el => typeof el === "string")) { - // node.text = node.children.join(""); - // delete node.children; - node.children = [node.children.join("")]; - } else { - node.children.forEach((child) => { - TokenTree._collapse(child); - }); + class TokenTreeEmitter extends TokenTree { + /** + * @param {*} options + */ + constructor(options) { + super(); + this.options = options; } - } - } - /** - Currently this is all private API, but this is the minimal API necessary - that an Emitter must implement to fully support the parser. + /** + * @param {string} text + * @param {string} kind + */ + addKeyword(text, kind) { + if (text === "") { return; } - Minimal interface: + this.openNode(kind); + this.addText(text); + this.closeNode(); + } - - addKeyword(text, kind) - - addText(text) - - addSublanguage(emitter, subLanguageName) - - finalize() - - openNode(kind) - - closeNode() - - closeAllNodes() - - toHTML() + /** + * @param {string} text + */ + addText(text) { + if (text === "") { return; } - */ + this.add(text); + } - /** - * @implements {Emitter} - */ - class TokenTreeEmitter extends TokenTree { - /** - * @param {*} options - */ - constructor(options) { - super(); - this.options = options; + /** + * @param {Emitter & {root: DataNode}} emitter + * @param {string} name + */ + addSublanguage(emitter, name) { + /** @type DataNode */ + const node = emitter.root; + node.kind = name; + node.sublanguage = true; + this.add(node); + } + + toHTML() { + const renderer = new HTMLRenderer(this, this.options); + return renderer.value(); + } + + finalize() { + return true; + } } /** - * @param {string} text - * @param {string} kind - */ - addKeyword(text, kind) { - if (text === "") { return; } - - this.openNode(kind); - this.addText(text); - this.closeNode(); + * @param {string} value + * @returns {RegExp} + * */ + function escape(value) { + return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm'); } /** - * @param {string} text + * @param {RegExp | string } re + * @returns {string} */ - addText(text) { - if (text === "") { return; } + function source(re) { + if (!re) return null; + if (typeof re === "string") return re; - this.add(text); + return re.source; } /** - * @param {Emitter & {root: DataNode}} emitter - * @param {string} name + * @param {...(RegExp | string) } args + * @returns {string} */ - addSublanguage(emitter, name) { - /** @type DataNode */ - const node = emitter.root; - node.kind = name; - node.sublanguage = true; - this.add(node); + function concat(...args) { + const joined = args.map((x) => source(x)).join(""); + return joined; } - toHTML() { - const renderer = new HTMLRenderer(this, this.options); - return renderer.value(); + /** + * @param {RegExp} re + * @returns {number} + */ + function countMatchGroups(re) { + return (new RegExp(re.toString() + '|')).exec('').length - 1; } - finalize() { - return true; + /** + * Does lexeme start with a regular expression match at the beginning + * @param {RegExp} re + * @param {string} lexeme + */ + function startsWith(re, lexeme) { + const match = re && re.exec(lexeme); + return match && match.index === 0; } - } - - /** - * @param {string} value - * @returns {RegExp} - * */ - function escape(value) { - return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm'); - } - - /** - * @param {RegExp | string } re - * @returns {string} - */ - function source(re) { - if (!re) return null; - if (typeof re === "string") return re; - - return re.source; - } - - /** - * @param {...(RegExp | string) } args - * @returns {string} - */ - function concat(...args) { - const joined = args.map((x) => source(x)).join(""); - return joined; - } - - /** - * @param {RegExp} re - * @returns {number} - */ - function countMatchGroups(re) { - return (new RegExp(re.toString() + '|')).exec('').length - 1; - } - - /** - * Does lexeme start with a regular expression match at the beginning - * @param {RegExp} re - * @param {string} lexeme - */ - function startsWith(re, lexeme) { - var match = re && re.exec(lexeme); - return match && match.index === 0; - } - // join logically computes regexps.join(separator), but fixes the - // backreferences so they continue to match. - // it also places each individual regular expression into it's own - // match group, keeping track of the sequencing of those match groups - // is currently an exercise for the caller. :-) - /** - * @param {(string | RegExp)[]} regexps - * @param {string} separator - * @returns {string} - */ - function join(regexps, separator = "|") { - // backreferenceRe matches an open parenthesis or backreference. To avoid - // an incorrect parse, it additionally matches the following: - // - [...] elements, where the meaning of parentheses and escapes change - // - other escape sequences, so we do not misparse escape sequences as - // interesting elements - // - non-matching or lookahead parentheses, which do not capture. These - // follow the '(' with a '?'. - var backreferenceRe = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./; - var numCaptures = 0; - var ret = ''; - for (var i = 0; i < regexps.length; i++) { - numCaptures += 1; - var offset = numCaptures; - var re = source(regexps[i]); - if (i > 0) { - ret += separator; - } - ret += "("; - while (re.length > 0) { - var match = backreferenceRe.exec(re); - if (match == null) { - ret += re; - break; - } - ret += re.substring(0, match.index); - re = re.substring(match.index + match[0].length); - if (match[0][0] === '\\' && match[1]) { - // Adjust the backreference. - ret += '\\' + String(Number(match[1]) + offset); - } else { - ret += match[0]; - if (match[0] === '(') { - numCaptures++; + // join logically computes regexps.join(separator), but fixes the + // backreferences so they continue to match. + // it also places each individual regular expression into it's own + // match group, keeping track of the sequencing of those match groups + // is currently an exercise for the caller. :-) + /** + * @param {(string | RegExp)[]} regexps + * @param {string} separator + * @returns {string} + */ + function join(regexps, separator = "|") { + // backreferenceRe matches an open parenthesis or backreference. To avoid + // an incorrect parse, it additionally matches the following: + // - [...] elements, where the meaning of parentheses and escapes change + // - other escape sequences, so we do not misparse escape sequences as + // interesting elements + // - non-matching or lookahead parentheses, which do not capture. These + // follow the '(' with a '?'. + const backreferenceRe = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./; + let numCaptures = 0; + let ret = ''; + for (let i = 0; i < regexps.length; i++) { + numCaptures += 1; + const offset = numCaptures; + let re = source(regexps[i]); + if (i > 0) { + ret += separator; + } + ret += "("; + while (re.length > 0) { + const match = backreferenceRe.exec(re); + if (match == null) { + ret += re; + break; + } + ret += re.substring(0, match.index); + re = re.substring(match.index + match[0].length); + if (match[0][0] === '\\' && match[1]) { + // Adjust the backreference. + ret += '\\' + String(Number(match[1]) + offset); + } else { + ret += match[0]; + if (match[0] === '(') { + numCaptures++; + } } } + ret += ")"; } - ret += ")"; + return ret; } - return ret; - } - // Common regexps - const IDENT_RE = '[a-zA-Z]\\w*'; - const UNDERSCORE_IDENT_RE = '[a-zA-Z_]\\w*'; - const NUMBER_RE = '\\b\\d+(\\.\\d+)?'; - const C_NUMBER_RE = '(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float - const BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b... - const RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~'; + // Common regexps + const IDENT_RE = '[a-zA-Z]\\w*'; + const UNDERSCORE_IDENT_RE = '[a-zA-Z_]\\w*'; + const NUMBER_RE = '\\b\\d+(\\.\\d+)?'; + const C_NUMBER_RE = '(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float + const BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b... + const RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~'; - /** - * @param { Partial<Mode> & {binary?: string | RegExp} } opts - */ - const SHEBANG = (opts = {}) => { - const beginShebang = /^#![ ]*\//; - if (opts.binary) { - opts.begin = concat( - beginShebang, - /.*\b/, - opts.binary, - /\b.*/); - } - return inherit({ - className: 'meta', - begin: beginShebang, - end: /$/, - relevance: 0, - /** @type {ModeCallback} */ - "on:begin": (m, resp) => { - if (m.index !== 0) resp.ignoreMatch(); - } - }, opts); - }; - - // Common modes - const BACKSLASH_ESCAPE = { - begin: '\\\\[\\s\\S]', relevance: 0 - }; - const APOS_STRING_MODE = { - className: 'string', - begin: '\'', - end: '\'', - illegal: '\\n', - contains: [BACKSLASH_ESCAPE] - }; - const QUOTE_STRING_MODE = { - className: 'string', - begin: '"', - end: '"', - illegal: '\\n', - contains: [BACKSLASH_ESCAPE] - }; - const PHRASAL_WORDS_MODE = { - begin: /\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|they|like|more)\b/ - }; - /** - * Creates a comment mode - * - * @param {string | RegExp} begin - * @param {string | RegExp} end - * @param {Mode | {}} [modeOptions] - * @returns {Partial<Mode>} - */ - const COMMENT = function(begin, end, modeOptions = {}) { - var mode = inherit( - { - className: 'comment', - begin, - end, - contains: [] - }, - modeOptions - ); - mode.contains.push(PHRASAL_WORDS_MODE); - mode.contains.push({ - className: 'doctag', - begin: '(?:TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):', - relevance: 0 - }); - return mode; - }; - const C_LINE_COMMENT_MODE = COMMENT('//', '$'); - const C_BLOCK_COMMENT_MODE = COMMENT('/\\*', '\\*/'); - const HASH_COMMENT_MODE = COMMENT('#', '$'); - const NUMBER_MODE = { - className: 'number', - begin: NUMBER_RE, - relevance: 0 - }; - const C_NUMBER_MODE = { - className: 'number', - begin: C_NUMBER_RE, - relevance: 0 - }; - const BINARY_NUMBER_MODE = { - className: 'number', - begin: BINARY_NUMBER_RE, - relevance: 0 - }; - const CSS_NUMBER_MODE = { - className: 'number', - begin: NUMBER_RE + '(' + - '%|em|ex|ch|rem' + - '|vw|vh|vmin|vmax' + - '|cm|mm|in|pt|pc|px' + - '|deg|grad|rad|turn' + - '|s|ms' + - '|Hz|kHz' + - '|dpi|dpcm|dppx' + - ')?', - relevance: 0 - }; - const REGEXP_MODE = { - // this outer rule makes sure we actually have a WHOLE regex and not simply - // an expression such as: - // - // 3 / something - // - // (which will then blow up when regex's `illegal` sees the newline) - begin: /(?=\/[^/\n]*\/)/, - contains: [{ - className: 'regexp', - begin: /\//, - end: /\/[gimuy]*/, - illegal: /\n/, - contains: [ - BACKSLASH_ESCAPE, - { - begin: /\[/, - end: /\]/, - relevance: 0, - contains: [BACKSLASH_ESCAPE] + /** + * @param { Partial<Mode> & {binary?: string | RegExp} } opts + */ + const SHEBANG = (opts = {}) => { + const beginShebang = /^#![ ]*\//; + if (opts.binary) { + opts.begin = concat( + beginShebang, + /.*\b/, + opts.binary, + /\b.*/); + } + return inherit({ + className: 'meta', + begin: beginShebang, + end: /$/, + relevance: 0, + /** @type {ModeCallback} */ + "on:begin": (m, resp) => { + if (m.index !== 0) resp.ignoreMatch(); } - ] - }] - }; - const TITLE_MODE = { - className: 'title', - begin: IDENT_RE, - relevance: 0 - }; - const UNDERSCORE_TITLE_MODE = { - className: 'title', - begin: UNDERSCORE_IDENT_RE, - relevance: 0 - }; - const METHOD_GUARD = { - // excludes method names from keyword processing - begin: '\\.\\s*' + UNDERSCORE_IDENT_RE, - relevance: 0 - }; + }, opts); + }; - /** - * Adds end same as begin mechanics to a mode - * - * Your mode must include at least a single () match group as that first match - * group is what is used for comparison - * @param {Partial<Mode>} mode - */ - const END_SAME_AS_BEGIN = function(mode) { - return Object.assign(mode, - { - /** @type {ModeCallback} */ - 'on:begin': (m, resp) => { resp.data._beginMatch = m[1]; }, - /** @type {ModeCallback} */ - 'on:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch(); } + // Common modes + const BACKSLASH_ESCAPE = { + begin: '\\\\[\\s\\S]', relevance: 0 + }; + const APOS_STRING_MODE = { + className: 'string', + begin: '\'', + end: '\'', + illegal: '\\n', + contains: [BACKSLASH_ESCAPE] + }; + const QUOTE_STRING_MODE = { + className: 'string', + begin: '"', + end: '"', + illegal: '\\n', + contains: [BACKSLASH_ESCAPE] + }; + const PHRASAL_WORDS_MODE = { + begin: /\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|they|like|more)\b/ + }; + /** + * Creates a comment mode + * + * @param {string | RegExp} begin + * @param {string | RegExp} end + * @param {Mode | {}} [modeOptions] + * @returns {Partial<Mode>} + */ + const COMMENT = function(begin, end, modeOptions = {}) { + const mode = inherit( + { + className: 'comment', + begin, + end, + contains: [] + }, + modeOptions + ); + mode.contains.push(PHRASAL_WORDS_MODE); + mode.contains.push({ + className: 'doctag', + begin: '(?:TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):', + relevance: 0 }); - }; - - var MODES = /*#__PURE__*/Object.freeze({ - __proto__: null, - IDENT_RE: IDENT_RE, - UNDERSCORE_IDENT_RE: UNDERSCORE_IDENT_RE, - NUMBER_RE: NUMBER_RE, - C_NUMBER_RE: C_NUMBER_RE, - BINARY_NUMBER_RE: BINARY_NUMBER_RE, - RE_STARTERS_RE: RE_STARTERS_RE, - SHEBANG: SHEBANG, - BACKSLASH_ESCAPE: BACKSLASH_ESCAPE, - APOS_STRING_MODE: APOS_STRING_MODE, - QUOTE_STRING_MODE: QUOTE_STRING_MODE, - PHRASAL_WORDS_MODE: PHRASAL_WORDS_MODE, - COMMENT: COMMENT, - C_LINE_COMMENT_MODE: C_LINE_COMMENT_MODE, - C_BLOCK_COMMENT_MODE: C_BLOCK_COMMENT_MODE, - HASH_COMMENT_MODE: HASH_COMMENT_MODE, - NUMBER_MODE: NUMBER_MODE, - C_NUMBER_MODE: C_NUMBER_MODE, - BINARY_NUMBER_MODE: BINARY_NUMBER_MODE, - CSS_NUMBER_MODE: CSS_NUMBER_MODE, - REGEXP_MODE: REGEXP_MODE, - TITLE_MODE: TITLE_MODE, - UNDERSCORE_TITLE_MODE: UNDERSCORE_TITLE_MODE, - METHOD_GUARD: METHOD_GUARD, - END_SAME_AS_BEGIN: END_SAME_AS_BEGIN - }); - - // keywords that should have no default relevance value - var COMMON_KEYWORDS = 'of and for in not or if then'.split(' '); - - // compilation + return mode; + }; + const C_LINE_COMMENT_MODE = COMMENT('//', '$'); + const C_BLOCK_COMMENT_MODE = COMMENT('/\\*', '\\*/'); + const HASH_COMMENT_MODE = COMMENT('#', '$'); + const NUMBER_MODE = { + className: 'number', + begin: NUMBER_RE, + relevance: 0 + }; + const C_NUMBER_MODE = { + className: 'number', + begin: C_NUMBER_RE, + relevance: 0 + }; + const BINARY_NUMBER_MODE = { + className: 'number', + begin: BINARY_NUMBER_RE, + relevance: 0 + }; + const CSS_NUMBER_MODE = { + className: 'number', + begin: NUMBER_RE + '(' + + '%|em|ex|ch|rem' + + '|vw|vh|vmin|vmax' + + '|cm|mm|in|pt|pc|px' + + '|deg|grad|rad|turn' + + '|s|ms' + + '|Hz|kHz' + + '|dpi|dpcm|dppx' + + ')?', + relevance: 0 + }; + const REGEXP_MODE = { + // this outer rule makes sure we actually have a WHOLE regex and not simply + // an expression such as: + // + // 3 / something + // + // (which will then blow up when regex's `illegal` sees the newline) + begin: /(?=\/[^/\n]*\/)/, + contains: [{ + className: 'regexp', + begin: /\//, + end: /\/[gimuy]*/, + illegal: /\n/, + contains: [ + BACKSLASH_ESCAPE, + { + begin: /\[/, + end: /\]/, + relevance: 0, + contains: [BACKSLASH_ESCAPE] + } + ] + }] + }; + const TITLE_MODE = { + className: 'title', + begin: IDENT_RE, + relevance: 0 + }; + const UNDERSCORE_TITLE_MODE = { + className: 'title', + begin: UNDERSCORE_IDENT_RE, + relevance: 0 + }; + const METHOD_GUARD = { + // excludes method names from keyword processing + begin: '\\.\\s*' + UNDERSCORE_IDENT_RE, + relevance: 0 + }; - /** - * Compiles a language definition result - * - * Given the raw result of a language definition (Language), compiles this so - * that it is ready for highlighting code. - * @param {Language} language - * @returns {CompiledLanguage} - */ - function compileLanguage(language) { /** - * Builds a regex with the case sensativility of the current language + * Adds end same as begin mechanics to a mode * - * @param {RegExp | string} value - * @param {boolean} [global] + * Your mode must include at least a single () match group as that first match + * group is what is used for comparison + * @param {Partial<Mode>} mode */ - function langRe(value, global) { - return new RegExp( - source(value), - 'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '') - ); - } + const END_SAME_AS_BEGIN = function(mode) { + return Object.assign(mode, + { + /** @type {ModeCallback} */ + 'on:begin': (m, resp) => { resp.data._beginMatch = m[1]; }, + /** @type {ModeCallback} */ + 'on:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch(); } + }); + }; + + var MODES = /*#__PURE__*/Object.freeze({ + __proto__: null, + IDENT_RE: IDENT_RE, + UNDERSCORE_IDENT_RE: UNDERSCORE_IDENT_RE, + NUMBER_RE: NUMBER_RE, + C_NUMBER_RE: C_NUMBER_RE, + BINARY_NUMBER_RE: BINARY_NUMBER_RE, + RE_STARTERS_RE: RE_STARTERS_RE, + SHEBANG: SHEBANG, + BACKSLASH_ESCAPE: BACKSLASH_ESCAPE, + APOS_STRING_MODE: APOS_STRING_MODE, + QUOTE_STRING_MODE: QUOTE_STRING_MODE, + PHRASAL_WORDS_MODE: PHRASAL_WORDS_MODE, + COMMENT: COMMENT, + C_LINE_COMMENT_MODE: C_LINE_COMMENT_MODE, + C_BLOCK_COMMENT_MODE: C_BLOCK_COMMENT_MODE, + HASH_COMMENT_MODE: HASH_COMMENT_MODE, + NUMBER_MODE: NUMBER_MODE, + C_NUMBER_MODE: C_NUMBER_MODE, + BINARY_NUMBER_MODE: BINARY_NUMBER_MODE, + CSS_NUMBER_MODE: CSS_NUMBER_MODE, + REGEXP_MODE: REGEXP_MODE, + TITLE_MODE: TITLE_MODE, + UNDERSCORE_TITLE_MODE: UNDERSCORE_TITLE_MODE, + METHOD_GUARD: METHOD_GUARD, + END_SAME_AS_BEGIN: END_SAME_AS_BEGIN + }); + + // keywords that should have no default relevance value + const COMMON_KEYWORDS = [ + 'of', + 'and', + 'for', + 'in', + 'not', + 'or', + 'if', + 'then', + 'parent', // common variable name + 'list', // common variable name + 'value' // common variable name + ]; + + // compilation /** - Stores multiple regular expressions and allows you to quickly search for - them all in a string simultaneously - returning the first match. It does - this by creating a huge (a|b|c) regex - each individual item wrapped with () - and joined by `|` - using match groups to track position. When a match is - found checking which position in the array has content allows us to figure - out which of the original regexes / match groups triggered the match. - - The match object itself (the result of `Regex.exec`) is returned but also - enhanced by merging in any meta-data that was registered with the regex. - This is how we keep track of which mode matched, and what type of rule - (`illegal`, `begin`, end, etc). - */ - class MultiRegex { - constructor() { - this.matchIndexes = {}; - // @ts-ignore - this.regexes = []; - this.matchAt = 1; - this.position = 0; + * Compiles a language definition result + * + * Given the raw result of a language definition (Language), compiles this so + * that it is ready for highlighting code. + * @param {Language} language + * @returns {CompiledLanguage} + */ + function compileLanguage(language) { + /** + * Builds a regex with the case sensativility of the current language + * + * @param {RegExp | string} value + * @param {boolean} [global] + */ + function langRe(value, global) { + return new RegExp( + source(value), + 'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '') + ); } - // @ts-ignore - addRule(re, opts) { - opts.position = this.position++; - // @ts-ignore - this.matchIndexes[this.matchAt] = opts; - this.regexes.push([opts, re]); - this.matchAt += countMatchGroups(re) + 1; - } + /** + Stores multiple regular expressions and allows you to quickly search for + them all in a string simultaneously - returning the first match. It does + this by creating a huge (a|b|c) regex - each individual item wrapped with () + and joined by `|` - using match groups to track position. When a match is + found checking which position in the array has content allows us to figure + out which of the original regexes / match groups triggered the match. + + The match object itself (the result of `Regex.exec`) is returned but also + enhanced by merging in any meta-data that was registered with the regex. + This is how we keep track of which mode matched, and what type of rule + (`illegal`, `begin`, end, etc). + */ + class MultiRegex { + constructor() { + this.matchIndexes = {}; + // @ts-ignore + this.regexes = []; + this.matchAt = 1; + this.position = 0; + } - compile() { - if (this.regexes.length === 0) { - // avoids the need to check length every time exec is called + // @ts-ignore + addRule(re, opts) { + opts.position = this.position++; // @ts-ignore - this.exec = () => null; + this.matchIndexes[this.matchAt] = opts; + this.regexes.push([opts, re]); + this.matchAt += countMatchGroups(re) + 1; } - const terminators = this.regexes.map(el => el[1]); - this.matcherRe = langRe(join(terminators), true); - this.lastIndex = 0; - } - /** @param {string} s */ - exec(s) { - this.matcherRe.lastIndex = this.lastIndex; - const match = this.matcherRe.exec(s); - if (!match) { return null; } + compile() { + if (this.regexes.length === 0) { + // avoids the need to check length every time exec is called + // @ts-ignore + this.exec = () => null; + } + const terminators = this.regexes.map(el => el[1]); + this.matcherRe = langRe(join(terminators), true); + this.lastIndex = 0; + } - // eslint-disable-next-line no-undefined - const i = match.findIndex((el, i) => i > 0 && el !== undefined); - // @ts-ignore - const matchData = this.matchIndexes[i]; - // trim off any earlier non-relevant match groups (ie, the other regex - // match groups that make up the multi-matcher) - match.splice(0, i); + /** @param {string} s */ + exec(s) { + this.matcherRe.lastIndex = this.lastIndex; + const match = this.matcherRe.exec(s); + if (!match) { return null; } - return Object.assign(match, matchData); + // eslint-disable-next-line no-undefined + const i = match.findIndex((el, i) => i > 0 && el !== undefined); + // @ts-ignore + const matchData = this.matchIndexes[i]; + // trim off any earlier non-relevant match groups (ie, the other regex + // match groups that make up the multi-matcher) + match.splice(0, i); + + return Object.assign(match, matchData); + } } - } - /* - Created to solve the key deficiently with MultiRegex - there is no way to - test for multiple matches at a single location. Why would we need to do - that? In the future a more dynamic engine will allow certain matches to be - ignored. An example: if we matched say the 3rd regex in a large group but - decided to ignore it - we'd need to started testing again at the 4th - regex... but MultiRegex itself gives us no real way to do that. + /* + Created to solve the key deficiently with MultiRegex - there is no way to + test for multiple matches at a single location. Why would we need to do + that? In the future a more dynamic engine will allow certain matches to be + ignored. An example: if we matched say the 3rd regex in a large group but + decided to ignore it - we'd need to started testing again at the 4th + regex... but MultiRegex itself gives us no real way to do that. - So what this class creates MultiRegexs on the fly for whatever search - position they are needed. + So what this class creates MultiRegexs on the fly for whatever search + position they are needed. - NOTE: These additional MultiRegex objects are created dynamically. For most - grammars most of the time we will never actually need anything more than the - first MultiRegex - so this shouldn't have too much overhead. + NOTE: These additional MultiRegex objects are created dynamically. For most + grammars most of the time we will never actually need anything more than the + first MultiRegex - so this shouldn't have too much overhead. - Say this is our search group, and we match regex3, but wish to ignore it. + Say this is our search group, and we match regex3, but wish to ignore it. - regex1 | regex2 | regex3 | regex4 | regex5 ' ie, startAt = 0 + regex1 | regex2 | regex3 | regex4 | regex5 ' ie, startAt = 0 - What we need is a new MultiRegex that only includes the remaining - possibilities: + What we need is a new MultiRegex that only includes the remaining + possibilities: - regex4 | regex5 ' ie, startAt = 3 + regex4 | regex5 ' ie, startAt = 3 - This class wraps all that complexity up in a simple API... `startAt` decides - where in the array of expressions to start doing the matching. It - auto-increments, so if a match is found at position 2, then startAt will be - set to 3. If the end is reached startAt will return to 0. + This class wraps all that complexity up in a simple API... `startAt` decides + where in the array of expressions to start doing the matching. It + auto-increments, so if a match is found at position 2, then startAt will be + set to 3. If the end is reached startAt will return to 0. + + MOST of the time the parser will be setting startAt manually to 0. + */ + class ResumableMultiRegex { + constructor() { + // @ts-ignore + this.rules = []; + // @ts-ignore + this.multiRegexes = []; + this.count = 0; + + this.lastIndex = 0; + this.regexIndex = 0; + } - MOST of the time the parser will be setting startAt manually to 0. - */ - class ResumableMultiRegex { - constructor() { // @ts-ignore - this.rules = []; + getMatcher(index) { + if (this.multiRegexes[index]) return this.multiRegexes[index]; + + const matcher = new MultiRegex(); + this.rules.slice(index).forEach(([re, opts]) => matcher.addRule(re, opts)); + matcher.compile(); + this.multiRegexes[index] = matcher; + return matcher; + } + + resumingScanAtSamePosition() { + return this.regexIndex !== 0; + } + + considerAll() { + this.regexIndex = 0; + } + // @ts-ignore - this.multiRegexes = []; - this.count = 0; + addRule(re, opts) { + this.rules.push([re, opts]); + if (opts.type === "begin") this.count++; + } - this.lastIndex = 0; - this.regexIndex = 0; - } + /** @param {string} s */ + exec(s) { + const m = this.getMatcher(this.regexIndex); + m.lastIndex = this.lastIndex; + let result = m.exec(s); + + // The following is because we have no easy way to say "resume scanning at the + // existing position but also skip the current rule ONLY". What happens is + // all prior rules are also skipped which can result in matching the wrong + // thing. Example of matching "booger": + + // our matcher is [string, "booger", number] + // + // ....booger.... + + // if "booger" is ignored then we'd really need a regex to scan from the + // SAME position for only: [string, number] but ignoring "booger" (if it + // was the first match), a simple resume would scan ahead who knows how + // far looking only for "number", ignoring potential string matches (or + // future "booger" matches that might be valid.) + + // So what we do: We execute two matchers, one resuming at the same + // position, but the second full matcher starting at the position after: + + // /--- resume first regex match here (for [number]) + // |/---- full match here for [string, "booger", number] + // vv + // ....booger.... + + // Which ever results in a match first is then used. So this 3-4 step + // process essentially allows us to say "match at this position, excluding + // a prior rule that was ignored". + // + // 1. Match "booger" first, ignore. Also proves that [string] does non match. + // 2. Resume matching for [number] + // 3. Match at index + 1 for [string, "booger", number] + // 4. If #2 and #3 result in matches, which came first? + if (this.resumingScanAtSamePosition()) { + if (result && result.index === this.lastIndex) ; else { // use the second matcher result + const m2 = this.getMatcher(0); + m2.lastIndex = this.lastIndex + 1; + result = m2.exec(s); + } + } - // @ts-ignore - getMatcher(index) { - if (this.multiRegexes[index]) return this.multiRegexes[index]; + if (result) { + this.regexIndex += result.position + 1; + if (this.regexIndex === this.count) { + // wrap-around to considering all matches again + this.considerAll(); + } + } - const matcher = new MultiRegex(); - this.rules.slice(index).forEach(([re, opts]) => matcher.addRule(re, opts)); - matcher.compile(); - this.multiRegexes[index] = matcher; - return matcher; + return result; + } } - resumingScanAtSamePosition() { - return this.regexIndex !== 0; - } + /** + * Given a mode, builds a huge ResumableMultiRegex that can be used to walk + * the content and find matches. + * + * @param {CompiledMode} mode + * @returns {ResumableMultiRegex} + */ + function buildModeRegex(mode) { + const mm = new ResumableMultiRegex(); + + mode.contains.forEach(term => mm.addRule(term.begin, { rule: term, type: "begin" })); - considerAll() { - this.regexIndex = 0; + if (mode.terminator_end) { + mm.addRule(mode.terminator_end, { type: "end" }); + } + if (mode.illegal) { + mm.addRule(mode.illegal, { type: "illegal" }); + } + + return mm; } - // @ts-ignore - addRule(re, opts) { - this.rules.push([re, opts]); - if (opts.type === "begin") this.count++; + // TODO: We need negative look-behind support to do this properly + /** + * Skip a match if it has a preceding dot + * + * This is used for `beginKeywords` to prevent matching expressions such as + * `bob.keyword.do()`. The mode compiler automatically wires this up as a + * special _internal_ 'on:begin' callback for modes with `beginKeywords` + * @param {RegExpMatchArray} match + * @param {CallbackResponse} response + */ + function skipIfhasPrecedingDot(match, response) { + const before = match.input[match.index - 1]; + if (before === ".") { + response.ignoreMatch(); + } } - /** @param {string} s */ - exec(s) { - const m = this.getMatcher(this.regexIndex); - m.lastIndex = this.lastIndex; - let result = m.exec(s); + /** skip vs abort vs ignore + * + * @skip - The mode is still entered and exited normally (and contains rules apply), + * but all content is held and added to the parent buffer rather than being + * output when the mode ends. Mostly used with `sublanguage` to build up + * a single large buffer than can be parsed by sublanguage. + * + * - The mode begin ands ends normally. + * - Content matched is added to the parent mode buffer. + * - The parser cursor is moved forward normally. + * + * @abort - A hack placeholder until we have ignore. Aborts the mode (as if it + * never matched) but DOES NOT continue to match subsequent `contains` + * modes. Abort is bad/suboptimal because it can result in modes + * farther down not getting applied because an earlier rule eats the + * content but then aborts. + * + * - The mode does not begin. + * - Content matched by `begin` is added to the mode buffer. + * - The parser cursor is moved forward accordingly. + * + * @ignore - Ignores the mode (as if it never matched) and continues to match any + * subsequent `contains` modes. Ignore isn't technically possible with + * the current parser implementation. + * + * - The mode does not begin. + * - Content matched by `begin` is ignored. + * - The parser cursor is not moved forward. + */ - // The following is because we have no easy way to say "resume scanning at the - // existing position but also skip the current rule ONLY". What happens is - // all prior rules are also skipped which can result in matching the wrong - // thing. Example of matching "booger": + /** + * Compiles an individual mode + * + * This can raise an error if the mode contains certain detectable known logic + * issues. + * @param {Mode} mode + * @param {CompiledMode | null} [parent] + * @returns {CompiledMode | never} + */ + function compileMode(mode, parent) { + const cmode = /** @type CompiledMode */ (mode); + if (mode.compiled) return cmode; + mode.compiled = true; - // our matcher is [string, "booger", number] - // - // ....booger.... + // __beforeBegin is considered private API, internal use only + mode.__beforeBegin = null; - // if "booger" is ignored then we'd really need a regex to scan from the - // SAME position for only: [string, number] but ignoring "booger" (if it - // was the first match), a simple resume would scan ahead who knows how - // far looking only for "number", ignoring potential string matches (or - // future "booger" matches that might be valid.) + mode.keywords = mode.keywords || mode.beginKeywords; - // So what we do: We execute two matchers, one resuming at the same - // position, but the second full matcher starting at the position after: + let keywordPattern = null; + if (typeof mode.keywords === "object") { + keywordPattern = mode.keywords.$pattern; + delete mode.keywords.$pattern; + } - // /--- resume first regex match here (for [number]) - // |/---- full match here for [string, "booger", number] - // vv - // ....booger.... + if (mode.keywords) { + mode.keywords = compileKeywords(mode.keywords, language.case_insensitive); + } - // Which ever results in a match first is then used. So this 3-4 step - // process essentially allows us to say "match at this position, excluding - // a prior rule that was ignored". - // - // 1. Match "booger" first, ignore. Also proves that [string] does non match. - // 2. Resume matching for [number] - // 3. Match at index + 1 for [string, "booger", number] - // 4. If #2 and #3 result in matches, which came first? - if (this.resumingScanAtSamePosition()) { - if (result && result.index === this.lastIndex) ; else { // use the second matcher result - const m2 = this.getMatcher(0); - m2.lastIndex = this.lastIndex + 1; - result = m2.exec(s); - } + // both are not allowed + if (mode.lexemes && keywordPattern) { + throw new Error("ERR: Prefer `keywords.$pattern` to `mode.lexemes`, BOTH are not allowed. (see mode reference) "); } - if (result) { - this.regexIndex += result.position + 1; - if (this.regexIndex === this.count) { - // wrap-around to considering all matches again - this.considerAll(); + // `mode.lexemes` was the old standard before we added and now recommend + // using `keywords.$pattern` to pass the keyword pattern + cmode.keywordPatternRe = langRe(mode.lexemes || keywordPattern || /\w+/, true); + + if (parent) { + if (mode.beginKeywords) { + // for languages with keywords that include non-word characters checking for + // a word boundary is not sufficient, so instead we check for a word boundary + // or whitespace - this does no harm in any case since our keyword engine + // doesn't allow spaces in keywords anyways and we still check for the boundary + // first + mode.begin = '\\b(' + mode.beginKeywords.split(' ').join('|') + ')(?!\\.)(?=\\b|\\s)'; + mode.__beforeBegin = skipIfhasPrecedingDot; + } + if (!mode.begin) mode.begin = /\B|\b/; + cmode.beginRe = langRe(mode.begin); + if (mode.endSameAsBegin) mode.end = mode.begin; + if (!mode.end && !mode.endsWithParent) mode.end = /\B|\b/; + if (mode.end) cmode.endRe = langRe(mode.end); + cmode.terminator_end = source(mode.end) || ''; + if (mode.endsWithParent && parent.terminator_end) { + cmode.terminator_end += (mode.end ? '|' : '') + parent.terminator_end; } } + if (mode.illegal) cmode.illegalRe = langRe(mode.illegal); + // eslint-disable-next-line no-undefined + if (mode.relevance === undefined) mode.relevance = 1; + if (!mode.contains) mode.contains = []; - return result; - } - } - - /** - * Given a mode, builds a huge ResumableMultiRegex that can be used to walk - * the content and find matches. - * - * @param {CompiledMode} mode - * @returns {ResumableMultiRegex} - */ - function buildModeRegex(mode) { - const mm = new ResumableMultiRegex(); + mode.contains = [].concat(...mode.contains.map(function(c) { + return expandOrCloneMode(c === 'self' ? mode : c); + })); + mode.contains.forEach(function(c) { compileMode(/** @type Mode */ (c), cmode); }); - mode.contains.forEach(term => mm.addRule(term.begin, { rule: term, type: "begin" })); + if (mode.starts) { + compileMode(mode.starts, parent); + } - if (mode.terminator_end) { - mm.addRule(mode.terminator_end, { type: "end" }); + cmode.matcher = buildModeRegex(cmode); + return cmode; } - if (mode.illegal) { - mm.addRule(mode.illegal, { type: "illegal" }); + + // self is not valid at the top-level + if (language.contains && language.contains.includes('self')) { + throw new Error("ERR: contains `self` is not supported at the top-level of a language. See documentation."); } - return mm; - } + // we need a null object, which inherit will guarantee + language.classNameAliases = inherit(language.classNameAliases || {}); - // TODO: We need negative look-behind support to do this properly - /** - * Skip a match if it has a preceding or trailing dot - * - * This is used for `beginKeywords` to prevent matching expressions such as - * `bob.keyword.do()`. The mode compiler automatically wires this up as a - * special _internal_ 'on:begin' callback for modes with `beginKeywords` - * @param {RegExpMatchArray} match - * @param {CallbackResponse} response - */ - function skipIfhasPrecedingOrTrailingDot(match, response) { - const before = match.input[match.index - 1]; - const after = match.input[match.index + match[0].length]; - if (before === "." || after === ".") { - response.ignoreMatch(); - } + return compileMode(/** @type Mode */ (language)); } - /** skip vs abort vs ignore - * - * @skip - The mode is still entered and exited normally (and contains rules apply), - * but all content is held and added to the parent buffer rather than being - * output when the mode ends. Mostly used with `sublanguage` to build up - * a single large buffer than can be parsed by sublanguage. - * - * - The mode begin ands ends normally. - * - Content matched is added to the parent mode buffer. - * - The parser cursor is moved forward normally. - * - * @abort - A hack placeholder until we have ignore. Aborts the mode (as if it - * never matched) but DOES NOT continue to match subsequent `contains` - * modes. Abort is bad/suboptimal because it can result in modes - * farther down not getting applied because an earlier rule eats the - * content but then aborts. - * - * - The mode does not begin. - * - Content matched by `begin` is added to the mode buffer. - * - The parser cursor is moved forward accordingly. + /** + * Determines if a mode has a dependency on it's parent or not * - * @ignore - Ignores the mode (as if it never matched) and continues to match any - * subsequent `contains` modes. Ignore isn't technically possible with - * the current parser implementation. + * If a mode does have a parent dependency then often we need to clone it if + * it's used in multiple places so that each copy points to the correct parent, + * where-as modes without a parent can often safely be re-used at the bottom of + * a mode chain. * - * - The mode does not begin. - * - Content matched by `begin` is ignored. - * - The parser cursor is not moved forward. - */ + * @param {Mode | null} mode + * @returns {boolean} - is there a dependency on the parent? + * */ + function dependencyOnParent(mode) { + if (!mode) return false; + + return mode.endsWithParent || dependencyOnParent(mode.starts); + } /** - * Compiles an individual mode + * Expands a mode or clones it if necessary + * + * This is necessary for modes with parental dependenceis (see notes on + * `dependencyOnParent`) and for nodes that have `variants` - which must then be + * exploded into their own individual modes at compile time. * - * This can raise an error if the mode contains certain detectable known logic - * issues. * @param {Mode} mode - * @param {CompiledMode | null} [parent] - * @returns {CompiledMode | never} - */ - function compileMode(mode, parent) { - const cmode = /** @type CompiledMode */ (mode); - if (mode.compiled) return cmode; - mode.compiled = true; - - // __beforeBegin is considered private API, internal use only - mode.__beforeBegin = null; - - mode.keywords = mode.keywords || mode.beginKeywords; - - let kw_pattern = null; - if (typeof mode.keywords === "object") { - kw_pattern = mode.keywords.$pattern; - delete mode.keywords.$pattern; - } - - if (mode.keywords) { - mode.keywords = compileKeywords(mode.keywords, language.case_insensitive); + * @returns {Mode | Mode[]} + * */ + function expandOrCloneMode(mode) { + if (mode.variants && !mode.cached_variants) { + mode.cached_variants = mode.variants.map(function(variant) { + return inherit(mode, { variants: null }, variant); + }); } - // both are not allowed - if (mode.lexemes && kw_pattern) { - throw new Error("ERR: Prefer `keywords.$pattern` to `mode.lexemes`, BOTH are not allowed. (see mode reference) "); + // EXPAND + // if we have variants then essentially "replace" the mode with the variants + // this happens in compileMode, where this function is called from + if (mode.cached_variants) { + return mode.cached_variants; } - // `mode.lexemes` was the old standard before we added and now recommend - // using `keywords.$pattern` to pass the keyword pattern - cmode.keywordPatternRe = langRe(mode.lexemes || kw_pattern || /\w+/, true); - - if (parent) { - if (mode.beginKeywords) { - // for languages with keywords that include non-word characters checking for - // a word boundary is not sufficient, so instead we check for a word boundary - // or whitespace - this does no harm in any case since our keyword engine - // doesn't allow spaces in keywords anyways and we still check for the boundary - // first - mode.begin = '\\b(' + mode.beginKeywords.split(' ').join('|') + ')(?=\\b|\\s)'; - mode.__beforeBegin = skipIfhasPrecedingOrTrailingDot; - } - if (!mode.begin) mode.begin = /\B|\b/; - cmode.beginRe = langRe(mode.begin); - if (mode.endSameAsBegin) mode.end = mode.begin; - if (!mode.end && !mode.endsWithParent) mode.end = /\B|\b/; - if (mode.end) cmode.endRe = langRe(mode.end); - cmode.terminator_end = source(mode.end) || ''; - if (mode.endsWithParent && parent.terminator_end) { - cmode.terminator_end += (mode.end ? '|' : '') + parent.terminator_end; - } + // CLONE + // if we have dependencies on parents then we need a unique + // instance of ourselves, so we can be reused with many + // different parents without issue + if (dependencyOnParent(mode)) { + return inherit(mode, { starts: mode.starts ? inherit(mode.starts) : null }); } - if (mode.illegal) cmode.illegalRe = langRe(mode.illegal); - // eslint-disable-next-line no-undefined - if (mode.relevance === undefined) mode.relevance = 1; - if (!mode.contains) mode.contains = []; - - mode.contains = [].concat(...mode.contains.map(function(c) { - return expand_or_clone_mode(c === 'self' ? mode : c); - })); - mode.contains.forEach(function(c) { compileMode(/** @type Mode */ (c), cmode); }); - if (mode.starts) { - compileMode(mode.starts, parent); + if (Object.isFrozen(mode)) { + return inherit(mode); } - cmode.matcher = buildModeRegex(cmode); - return cmode; + // no special dependency issues, just return ourselves + return mode; } - // self is not valid at the top-level - if (language.contains && language.contains.includes('self')) { - throw new Error("ERR: contains `self` is not supported at the top-level of a language. See documentation."); - } - return compileMode(/** @type Mode */ (language)); - } + /*********************************************** + Keywords + ***********************************************/ - /** - * Determines if a mode has a dependency on it's parent or not - * - * If a mode does have a parent dependency then often we need to clone it if - * it's used in multiple places so that each copy points to the correct parent, - * where-as modes without a parent can often safely be re-used at the bottom of - * a mode chain. - * - * @param {Mode | null} mode - * @returns {boolean} - is there a dependency on the parent? - * */ - function dependencyOnParent(mode) { - if (!mode) return false; - - return mode.endsWithParent || dependencyOnParent(mode.starts); - } - - /** - * Expands a mode or clones it if necessary - * - * This is necessary for modes with parental dependenceis (see notes on - * `dependencyOnParent`) and for nodes that have `variants` - which must then be - * exploded into their own individual modes at compile time. - * - * @param {Mode} mode - * @returns {Mode | Mode[]} - * */ - function expand_or_clone_mode(mode) { - if (mode.variants && !mode.cached_variants) { - mode.cached_variants = mode.variants.map(function(variant) { - return inherit(mode, { variants: null }, variant); - }); - } - - // EXPAND - // if we have variants then essentially "replace" the mode with the variants - // this happens in compileMode, where this function is called from - if (mode.cached_variants) { - return mode.cached_variants; - } - - // CLONE - // if we have dependencies on parents then we need a unique - // instance of ourselves, so we can be reused with many - // different parents without issue - if (dependencyOnParent(mode)) { - return inherit(mode, { starts: mode.starts ? inherit(mode.starts) : null }); - } - - if (Object.isFrozen(mode)) { - return inherit(mode); - } + /** + * Given raw keywords from a language definition, compile them. + * + * @param {string | Record<string,string>} rawKeywords + * @param {boolean} caseInsensitive + */ + function compileKeywords(rawKeywords, caseInsensitive) { + /** @type KeywordDict */ + const compiledKeywords = {}; - // no special dependency issues, just return ourselves - return mode; - } + if (typeof rawKeywords === 'string') { // string + splitAndCompile('keyword', rawKeywords); + } else { + Object.keys(rawKeywords).forEach(function(className) { + splitAndCompile(className, rawKeywords[className]); + }); + } + return compiledKeywords; - /*********************************************** - Keywords - ***********************************************/ + // --- - /** - * Given raw keywords from a language definition, compile them. - * - * @param {string | Record<string,string>} rawKeywords - * @param {boolean} case_insensitive - */ - function compileKeywords(rawKeywords, case_insensitive) { - /** @type KeywordDict */ - var compiled_keywords = {}; - - if (typeof rawKeywords === 'string') { // string - splitAndCompile('keyword', rawKeywords); - } else { - Object.keys(rawKeywords).forEach(function(className) { - splitAndCompile(className, rawKeywords[className]); - }); + /** + * Compiles an individual list of keywords + * + * Ex: "for if when while|5" + * + * @param {string} className + * @param {string} keywordList + */ + function splitAndCompile(className, keywordList) { + if (caseInsensitive) { + keywordList = keywordList.toLowerCase(); + } + keywordList.split(' ').forEach(function(keyword) { + const pair = keyword.split('|'); + compiledKeywords[pair[0]] = [className, scoreForKeyword(pair[0], pair[1])]; + }); + } } - return compiled_keywords; - - // --- /** - * Compiles an individual list of keywords - * - * Ex: "for if when while|5" + * Returns the proper score for a given keyword * - * @param {string} className - * @param {string} keywordList + * Also takes into account comment keywords, which will be scored 0 UNLESS + * another score has been manually assigned. + * @param {string} keyword + * @param {string} [providedScore] */ - function splitAndCompile(className, keywordList) { - if (case_insensitive) { - keywordList = keywordList.toLowerCase(); + function scoreForKeyword(keyword, providedScore) { + // manual scores always win over common keywords + // so you can force a score of 1 if you really insist + if (providedScore) { + return Number(providedScore); } - keywordList.split(' ').forEach(function(keyword) { - var pair = keyword.split('|'); - compiled_keywords[pair[0]] = [className, scoreForKeyword(pair[0], pair[1])]; - }); - } - } - - /** - * Returns the proper score for a given keyword - * - * Also takes into account comment keywords, which will be scored 0 UNLESS - * another score has been manually assigned. - * @param {string} keyword - * @param {string} [providedScore] - */ - function scoreForKeyword(keyword, providedScore) { - // manual scores always win over common keywords - // so you can force a score of 1 if you really insist - if (providedScore) { - return Number(providedScore); - } - - return commonKeyword(keyword) ? 0 : 1; - } - - /** - * Determines if a given keyword is common or not - * - * @param {string} keyword */ - function commonKeyword(keyword) { - return COMMON_KEYWORDS.includes(keyword.toLowerCase()); - } - - var version = "10.2.0"; - // @ts-nocheck - - function hasValueOrEmptyAttribute(value) { - return Boolean(value || value === ""); - } - - const Component = { - props: ["language", "code", "autodetect"], - data: function() { - return { - detectedLanguage: "", - unknownLanguage: false - }; - }, - computed: { - className() { - if (this.unknownLanguage) return ""; - - return "hljs " + this.detectedLanguage; - }, - highlighted() { - // no idea what language to use, return raw code - if (!this.autoDetect && !hljs.getLanguage(this.language)) { - console.warn(`The language "${this.language}" you specified could not be found.`); - this.unknownLanguage = true; - return escapeHTML(this.code); - } - - let result; - if (this.autoDetect) { - result = hljs.highlightAuto(this.code); - this.detectedLanguage = result.language; - } else { - result = hljs.highlight(this.language, this.code, this.ignoreIllegals); - this.detectectLanguage = this.language; - } - return result.value; - }, - autoDetect() { - return !this.language || hasValueOrEmptyAttribute(this.autodetect); - }, - ignoreIllegals() { - return true; - } - }, - // this avoids needing to use a whole Vue compilation pipeline just - // to build Highlight.js - render(createElement) { - return createElement("pre", {}, [ - createElement("code", { - class: this.className, - domProps: { innerHTML: this.highlighted }}) - ]); + return commonKeyword(keyword) ? 0 : 1; } - // template: `<pre><code :class="className" v-html="highlighted"></code></pre>` - }; - const VuePlugin = { - install(Vue) { - Vue.component('highlightjs', Component); + /** + * Determines if a given keyword is common or not + * + * @param {string} keyword */ + function commonKeyword(keyword) { + return COMMON_KEYWORDS.includes(keyword.toLowerCase()); } - }; - - /* - Syntax highlighting with language autodetection. - https://highlightjs.org/ - */ - const escape$1 = escapeHTML; - const inherit$1 = inherit; + var version = "10.4.0"; - const { nodeStream: nodeStream$1, mergeStreams: mergeStreams$1 } = utils; - const NO_MATCH = Symbol("nomatch"); + // @ts-nocheck - /** - * @param {any} hljs - object that is extended (legacy) - * @returns {HLJSApi} - */ - const HLJS = function(hljs) { - // Convenience variables for build-in objects - /** @type {unknown[]} */ - var ArrayProto = []; - - // Global internal variables used within the highlight.js library. - /** @type {Record<string, Language>} */ - var languages = Object.create(null); - /** @type {Record<string, string>} */ - var aliases = Object.create(null); - /** @type {HLJSPlugin[]} */ - var plugins = []; - - // safe/production mode - swallows more errors, tries to keep running - // even if a single syntax or parse hits a fatal error - var SAFE_MODE = true; - var fixMarkupRe = /(^(<[^>]+>|\t|)+|\n)/gm; - var LANGUAGE_NOT_FOUND = "Could not find the language '{}', did you forget to load/include a language module?"; - /** @type {Language} */ - const PLAINTEXT_LANGUAGE = { disableAutodetect: true, name: 'Plain text', contains: [] }; - - // Global options used when within external APIs. This is modified when - // calling the `hljs.configure` function. - /** @type HLJSOptions */ - var options = { - noHighlightRe: /^(no-?highlight)$/i, - languageDetectRe: /\blang(?:uage)?-([\w-]+)\b/i, - classPrefix: 'hljs-', - tabReplace: null, - useBR: false, - languages: null, - // beta configuration options, subject to change, welcome to discuss - // https://github.com/highlightjs/highlight.js/issues/1086 - __emitter: TokenTreeEmitter - }; - - /* Utility functions */ - - /** - * Tests a language name to see if highlighting should be skipped - * @param {string} languageName - */ - function shouldNotHighlight(languageName) { - return options.noHighlightRe.test(languageName); + function hasValueOrEmptyAttribute(value) { + return Boolean(value || value === ""); } - /** - * @param {HighlightedHTMLElement} block - the HTML element to determine language for - */ - function blockLanguage(block) { - var classes = block.className + ' '; - - classes += block.parentNode ? block.parentNode.className : ''; - - // language-* takes precedence over non-prefixed class names. - const match = options.languageDetectRe.exec(classes); - if (match) { - var language = getLanguage(match[1]); - if (!language) { - console.warn(LANGUAGE_NOT_FOUND.replace("{}", match[1])); - console.warn("Falling back to no-highlight mode for this block.", block); + function BuildVuePlugin(hljs) { + const Component = { + props: ["language", "code", "autodetect"], + data: function() { + return { + detectedLanguage: "", + unknownLanguage: false + }; + }, + computed: { + className() { + if (this.unknownLanguage) return ""; + + return "hljs " + this.detectedLanguage; + }, + highlighted() { + // no idea what language to use, return raw code + if (!this.autoDetect && !hljs.getLanguage(this.language)) { + console.warn(`The language "${this.language}" you specified could not be found.`); + this.unknownLanguage = true; + return escapeHTML(this.code); + } + + let result; + if (this.autoDetect) { + result = hljs.highlightAuto(this.code); + this.detectedLanguage = result.language; + } else { + result = hljs.highlight(this.language, this.code, this.ignoreIllegals); + this.detectedLanguage = this.language; + } + return result.value; + }, + autoDetect() { + return !this.language || hasValueOrEmptyAttribute(this.autodetect); + }, + ignoreIllegals() { + return true; + } + }, + // this avoids needing to use a whole Vue compilation pipeline just + // to build Highlight.js + render(createElement) { + return createElement("pre", {}, [ + createElement("code", { + class: this.className, + domProps: { innerHTML: this.highlighted }}) + ]); } - return language ? match[1] : 'no-highlight'; - } + // template: `<pre><code :class="className" v-html="highlighted"></code></pre>` + }; + + const VuePlugin = { + install(Vue) { + Vue.component('highlightjs', Component); + } + }; - return classes - .split(/\s+/) - .find((_class) => shouldNotHighlight(_class) || getLanguage(_class)); + return { Component, VuePlugin }; } - /** - * Core highlighting function. - * - * @param {string} languageName - the language to use for highlighting - * @param {string} code - the code to highlight - * @param {boolean} [ignoreIllegals] - whether to ignore illegal matches, default is to bail - * @param {Mode} [continuation] - current continuation mode, if any - * - * @returns {HighlightResult} Result - an object that represents the result - * @property {string} language - the language name - * @property {number} relevance - the relevance score - * @property {string} value - the highlighted HTML code - * @property {string} code - the original raw code - * @property {Mode} top - top of the current mode stack - * @property {boolean} illegal - indicates whether any illegal matches were found + /* + Syntax highlighting with language autodetection. + https://highlightjs.org/ */ - function highlight(languageName, code, ignoreIllegals, continuation) { - /** @type {{ code: string, language: string, result?: any }} */ - var context = { - code, - language: languageName - }; - // the plugin can change the desired language or the code to be highlighted - // just be changing the object it was passed - fire("before:highlight", context); - // a before plugin can usurp the result completely by providing it's own - // in which case we don't even need to call highlight - var result = context.result ? - context.result : - _highlight(context.language, context.code, ignoreIllegals, continuation); + const escape$1 = escapeHTML; + const inherit$1 = inherit; - result.code = context.code; - // the plugin can change anything in result to suite it - fire("after:highlight", result); - - return result; - } + const { nodeStream: nodeStream$1, mergeStreams: mergeStreams$1 } = utils; + const NO_MATCH = Symbol("nomatch"); /** - * private highlight that's used internally and does not fire callbacks - * - * @param {string} languageName - the language to use for highlighting - * @param {string} code - the code to highlight - * @param {boolean} [ignoreIllegals] - whether to ignore illegal matches, default is to bail - * @param {Mode} [continuation] - current continuation mode, if any - */ - function _highlight(languageName, code, ignoreIllegals, continuation) { - var codeToHighlight = code; + * @param {any} hljs - object that is extended (legacy) + * @returns {HLJSApi} + */ + const HLJS = function(hljs) { + // Convenience variables for build-in objects + /** @type {unknown[]} */ + const ArrayProto = []; + + // Global internal variables used within the highlight.js library. + /** @type {Record<string, Language>} */ + const languages = Object.create(null); + /** @type {Record<string, string>} */ + const aliases = Object.create(null); + /** @type {HLJSPlugin[]} */ + const plugins = []; + + // safe/production mode - swallows more errors, tries to keep running + // even if a single syntax or parse hits a fatal error + let SAFE_MODE = true; + const fixMarkupRe = /(^(<[^>]+>|\t|)+|\n)/gm; + const LANGUAGE_NOT_FOUND = "Could not find the language '{}', did you forget to load/include a language module?"; + /** @type {Language} */ + const PLAINTEXT_LANGUAGE = { disableAutodetect: true, name: 'Plain text', contains: [] }; + + // Global options used when within external APIs. This is modified when + // calling the `hljs.configure` function. + /** @type HLJSOptions */ + let options = { + noHighlightRe: /^(no-?highlight)$/i, + languageDetectRe: /\blang(?:uage)?-([\w-]+)\b/i, + classPrefix: 'hljs-', + tabReplace: null, + useBR: false, + languages: null, + // beta configuration options, subject to change, welcome to discuss + // https://github.com/highlightjs/highlight.js/issues/1086 + __emitter: TokenTreeEmitter + }; + + /* Utility functions */ /** - * Return keyword data if a match is a keyword - * @param {CompiledMode} mode - current mode - * @param {RegExpMatchArray} match - regexp match data - * @returns {KeywordData | false} + * Tests a language name to see if highlighting should be skipped + * @param {string} languageName */ - function keywordData(mode, match) { - var matchText = language.case_insensitive ? match[0].toLowerCase() : match[0]; - return Object.prototype.hasOwnProperty.call(mode.keywords, matchText) && mode.keywords[matchText]; + function shouldNotHighlight(languageName) { + return options.noHighlightRe.test(languageName); } - function processKeywords() { - if (!top.keywords) { - emitter.addText(mode_buffer); - return; - } - - let last_index = 0; - top.keywordPatternRe.lastIndex = 0; - let match = top.keywordPatternRe.exec(mode_buffer); - let buf = ""; - - while (match) { - buf += mode_buffer.substring(last_index, match.index); - const data = keywordData(top, match); - if (data) { - const [kind, keywordRelevance] = data; - emitter.addText(buf); - buf = ""; - - relevance += keywordRelevance; - emitter.addKeyword(match[0], kind); - } else { - buf += match[0]; + /** + * @param {HighlightedHTMLElement} block - the HTML element to determine language for + */ + function blockLanguage(block) { + let classes = block.className + ' '; + + classes += block.parentNode ? block.parentNode.className : ''; + + // language-* takes precedence over non-prefixed class names. + const match = options.languageDetectRe.exec(classes); + if (match) { + const language = getLanguage(match[1]); + if (!language) { + console.warn(LANGUAGE_NOT_FOUND.replace("{}", match[1])); + console.warn("Falling back to no-highlight mode for this block.", block); } - last_index = top.keywordPatternRe.lastIndex; - match = top.keywordPatternRe.exec(mode_buffer); + return language ? match[1] : 'no-highlight'; } - buf += mode_buffer.substr(last_index); - emitter.addText(buf); + + return classes + .split(/\s+/) + .find((_class) => shouldNotHighlight(_class) || getLanguage(_class)); } - function processSubLanguage() { - if (mode_buffer === "") return; - /** @type HighlightResult */ - var result = null; + /** + * Core highlighting function. + * + * @param {string} languageName - the language to use for highlighting + * @param {string} code - the code to highlight + * @param {boolean} [ignoreIllegals] - whether to ignore illegal matches, default is to bail + * @param {CompiledMode} [continuation] - current continuation mode, if any + * + * @returns {HighlightResult} Result - an object that represents the result + * @property {string} language - the language name + * @property {number} relevance - the relevance score + * @property {string} value - the highlighted HTML code + * @property {string} code - the original raw code + * @property {CompiledMode} top - top of the current mode stack + * @property {boolean} illegal - indicates whether any illegal matches were found + */ + function highlight(languageName, code, ignoreIllegals, continuation) { + /** @type {{ code: string, language: string, result?: any }} */ + const context = { + code, + language: languageName + }; + // the plugin can change the desired language or the code to be highlighted + // just be changing the object it was passed + fire("before:highlight", context); - if (typeof top.subLanguage === 'string') { - if (!languages[top.subLanguage]) { - emitter.addText(mode_buffer); - return; - } - result = _highlight(top.subLanguage, mode_buffer, true, continuations[top.subLanguage]); - continuations[top.subLanguage] = result.top; - } else { - result = highlightAuto(mode_buffer, top.subLanguage.length ? top.subLanguage : null); - } + // a before plugin can usurp the result completely by providing it's own + // in which case we don't even need to call highlight + const result = context.result ? + context.result : + _highlight(context.language, context.code, ignoreIllegals, continuation); - // Counting embedded language score towards the host language may be disabled - // with zeroing the containing mode relevance. Use case in point is Markdown that - // allows XML everywhere and makes every XML snippet to have a much larger Markdown - // score. - if (top.relevance > 0) { - relevance += result.relevance; - } - emitter.addSublanguage(result.emitter, result.language); - } + result.code = context.code; + // the plugin can change anything in result to suite it + fire("after:highlight", result); - function processBuffer() { - if (top.subLanguage != null) { - processSubLanguage(); - } else { - processKeywords(); - } - mode_buffer = ''; + return result; } /** - * @param {Mode} mode - new mode to start - */ - function startNewMode(mode) { - if (mode.className) { - emitter.openNode(mode.className); + * private highlight that's used internally and does not fire callbacks + * + * @param {string} languageName - the language to use for highlighting + * @param {string} code - the code to highlight + * @param {boolean} [ignoreIllegals] - whether to ignore illegal matches, default is to bail + * @param {CompiledMode} [continuation] - current continuation mode, if any + * @returns {HighlightResult} - result of the highlight operation + */ + function _highlight(languageName, code, ignoreIllegals, continuation) { + const codeToHighlight = code; + + /** + * Return keyword data if a match is a keyword + * @param {CompiledMode} mode - current mode + * @param {RegExpMatchArray} match - regexp match data + * @returns {KeywordData | false} + */ + function keywordData(mode, match) { + const matchText = language.case_insensitive ? match[0].toLowerCase() : match[0]; + return Object.prototype.hasOwnProperty.call(mode.keywords, matchText) && mode.keywords[matchText]; } - top = Object.create(mode, { parent: { value: top } }); - return top; - } - /** - * @param {CompiledMode } mode - the mode to potentially end - * @param {RegExpMatchArray} match - the latest match - * @param {string} matchPlusRemainder - match plus remainder of content - * @returns {CompiledMode | void} - the next mode, or if void continue on in current mode - */ - function endOfMode(mode, match, matchPlusRemainder) { - let matched = startsWith(mode.endRe, matchPlusRemainder); - - if (matched) { - if (mode["on:end"]) { - const resp = new Response(mode); - mode["on:end"](match, resp); - if (resp.ignore) matched = false; + function processKeywords() { + if (!top.keywords) { + emitter.addText(modeBuffer); + return; } - if (matched) { - while (mode.endsParent && mode.parent) { - mode = mode.parent; + let lastIndex = 0; + top.keywordPatternRe.lastIndex = 0; + let match = top.keywordPatternRe.exec(modeBuffer); + let buf = ""; + + while (match) { + buf += modeBuffer.substring(lastIndex, match.index); + const data = keywordData(top, match); + if (data) { + const [kind, keywordRelevance] = data; + emitter.addText(buf); + buf = ""; + + relevance += keywordRelevance; + const cssClass = language.classNameAliases[kind] || kind; + emitter.addKeyword(match[0], cssClass); + } else { + buf += match[0]; } - return mode; + lastIndex = top.keywordPatternRe.lastIndex; + match = top.keywordPatternRe.exec(modeBuffer); } + buf += modeBuffer.substr(lastIndex); + emitter.addText(buf); } - // even if on:end fires an `ignore` it's still possible - // that we might trigger the end node because of a parent mode - if (mode.endsWithParent) { - return endOfMode(mode.parent, match, matchPlusRemainder); - } - } - /** - * Handle matching but then ignoring a sequence of text - * - * @param {string} lexeme - string containing full match text - */ - function doIgnore(lexeme) { - if (top.matcher.regexIndex === 0) { - // no more regexs to potentially match here, so we move the cursor forward one - // space - mode_buffer += lexeme[0]; - return 1; - } else { - // no need to move the cursor, we still have additional regexes to try and - // match at this very spot - resumeScanAtSamePosition = true; - return 0; - } - } + function processSubLanguage() { + if (modeBuffer === "") return; + /** @type HighlightResult */ + let result = null; - /** - * Handle the start of a new potential mode match - * - * @param {EnhancedMatch} match - the current match - * @returns {number} how far to advance the parse cursor - */ - function doBeginMatch(match) { - var lexeme = match[0]; - var new_mode = match.rule; + if (typeof top.subLanguage === 'string') { + if (!languages[top.subLanguage]) { + emitter.addText(modeBuffer); + return; + } + result = _highlight(top.subLanguage, modeBuffer, true, continuations[top.subLanguage]); + continuations[top.subLanguage] = /** @type {CompiledMode} */ (result.top); + } else { + result = highlightAuto(modeBuffer, top.subLanguage.length ? top.subLanguage : null); + } - const resp = new Response(new_mode); - // first internal before callbacks, then the public ones - const beforeCallbacks = [new_mode.__beforeBegin, new_mode["on:begin"]]; - for (const cb of beforeCallbacks) { - if (!cb) continue; - cb(match, resp); - if (resp.ignore) return doIgnore(lexeme); + // Counting embedded language score towards the host language may be disabled + // with zeroing the containing mode relevance. Use case in point is Markdown that + // allows XML everywhere and makes every XML snippet to have a much larger Markdown + // score. + if (top.relevance > 0) { + relevance += result.relevance; + } + emitter.addSublanguage(result.emitter, result.language); } - if (new_mode && new_mode.endSameAsBegin) { - new_mode.endRe = escape(lexeme); + function processBuffer() { + if (top.subLanguage != null) { + processSubLanguage(); + } else { + processKeywords(); + } + modeBuffer = ''; } - if (new_mode.skip) { - mode_buffer += lexeme; - } else { - if (new_mode.excludeBegin) { - mode_buffer += lexeme; - } - processBuffer(); - if (!new_mode.returnBegin && !new_mode.excludeBegin) { - mode_buffer = lexeme; + /** + * @param {Mode} mode - new mode to start + */ + function startNewMode(mode) { + if (mode.className) { + emitter.openNode(language.classNameAliases[mode.className] || mode.className); } + top = Object.create(mode, { parent: { value: top } }); + return top; } - startNewMode(new_mode); - // if (mode["after:begin"]) { - // let resp = new Response(mode); - // mode["after:begin"](match, resp); - // } - return new_mode.returnBegin ? 0 : lexeme.length; - } - /** - * Handle the potential end of mode - * - * @param {RegExpMatchArray} match - the current match - */ - function doEndMatch(match) { - var lexeme = match[0]; - var matchPlusRemainder = codeToHighlight.substr(match.index); + /** + * @param {CompiledMode } mode - the mode to potentially end + * @param {RegExpMatchArray} match - the latest match + * @param {string} matchPlusRemainder - match plus remainder of content + * @returns {CompiledMode | void} - the next mode, or if void continue on in current mode + */ + function endOfMode(mode, match, matchPlusRemainder) { + let matched = startsWith(mode.endRe, matchPlusRemainder); - var end_mode = endOfMode(top, match, matchPlusRemainder); - if (!end_mode) { return NO_MATCH; } + if (matched) { + if (mode["on:end"]) { + const resp = new Response(mode); + mode["on:end"](match, resp); + if (resp.ignore) matched = false; + } - var origin = top; - if (origin.skip) { - mode_buffer += lexeme; - } else { - if (!(origin.returnEnd || origin.excludeEnd)) { - mode_buffer += lexeme; + if (matched) { + while (mode.endsParent && mode.parent) { + mode = mode.parent; + } + return mode; + } } - processBuffer(); - if (origin.excludeEnd) { - mode_buffer = lexeme; + // even if on:end fires an `ignore` it's still possible + // that we might trigger the end node because of a parent mode + if (mode.endsWithParent) { + return endOfMode(mode.parent, match, matchPlusRemainder); } } - do { - if (top.className) { - emitter.closeNode(); + + /** + * Handle matching but then ignoring a sequence of text + * + * @param {string} lexeme - string containing full match text + */ + function doIgnore(lexeme) { + if (top.matcher.regexIndex === 0) { + // no more regexs to potentially match here, so we move the cursor forward one + // space + modeBuffer += lexeme[0]; + return 1; + } else { + // no need to move the cursor, we still have additional regexes to try and + // match at this very spot + resumeScanAtSamePosition = true; + return 0; } - if (!top.skip && !top.subLanguage) { - relevance += top.relevance; + } + + /** + * Handle the start of a new potential mode match + * + * @param {EnhancedMatch} match - the current match + * @returns {number} how far to advance the parse cursor + */ + function doBeginMatch(match) { + const lexeme = match[0]; + const newMode = match.rule; + + const resp = new Response(newMode); + // first internal before callbacks, then the public ones + const beforeCallbacks = [newMode.__beforeBegin, newMode["on:begin"]]; + for (const cb of beforeCallbacks) { + if (!cb) continue; + cb(match, resp); + if (resp.ignore) return doIgnore(lexeme); } - top = top.parent; - } while (top !== end_mode.parent); - if (end_mode.starts) { - if (end_mode.endSameAsBegin) { - end_mode.starts.endRe = end_mode.endRe; + + if (newMode && newMode.endSameAsBegin) { + newMode.endRe = escape(lexeme); } - startNewMode(end_mode.starts); + + if (newMode.skip) { + modeBuffer += lexeme; + } else { + if (newMode.excludeBegin) { + modeBuffer += lexeme; + } + processBuffer(); + if (!newMode.returnBegin && !newMode.excludeBegin) { + modeBuffer = lexeme; + } + } + startNewMode(newMode); + // if (mode["after:begin"]) { + // let resp = new Response(mode); + // mode["after:begin"](match, resp); + // } + return newMode.returnBegin ? 0 : lexeme.length; } - return origin.returnEnd ? 0 : lexeme.length; - } - function processContinuations() { - var list = []; - for (var current = top; current !== language; current = current.parent) { - if (current.className) { - list.unshift(current.className); + /** + * Handle the potential end of mode + * + * @param {RegExpMatchArray} match - the current match + */ + function doEndMatch(match) { + const lexeme = match[0]; + const matchPlusRemainder = codeToHighlight.substr(match.index); + + const endMode = endOfMode(top, match, matchPlusRemainder); + if (!endMode) { return NO_MATCH; } + + const origin = top; + if (origin.skip) { + modeBuffer += lexeme; + } else { + if (!(origin.returnEnd || origin.excludeEnd)) { + modeBuffer += lexeme; + } + processBuffer(); + if (origin.excludeEnd) { + modeBuffer = lexeme; + } + } + do { + if (top.className) { + emitter.closeNode(); + } + if (!top.skip && !top.subLanguage) { + relevance += top.relevance; + } + top = top.parent; + } while (top !== endMode.parent); + if (endMode.starts) { + if (endMode.endSameAsBegin) { + endMode.starts.endRe = endMode.endRe; + } + startNewMode(endMode.starts); } + return origin.returnEnd ? 0 : lexeme.length; } - list.forEach(item => emitter.openNode(item)); - } - /** @type {{type?: MatchType, index?: number, rule?: Mode}}} */ - var lastMatch = {}; + function processContinuations() { + const list = []; + for (let current = top; current !== language; current = current.parent) { + if (current.className) { + list.unshift(current.className); + } + } + list.forEach(item => emitter.openNode(item)); + } - /** - * Process an individual match - * - * @param {string} textBeforeMatch - text preceeding the match (since the last match) - * @param {EnhancedMatch} [match] - the match itself - */ - function processLexeme(textBeforeMatch, match) { - var lexeme = match && match[0]; + /** @type {{type?: MatchType, index?: number, rule?: Mode}}} */ + let lastMatch = {}; - // add non-matched text to the current mode buffer - mode_buffer += textBeforeMatch; + /** + * Process an individual match + * + * @param {string} textBeforeMatch - text preceeding the match (since the last match) + * @param {EnhancedMatch} [match] - the match itself + */ + function processLexeme(textBeforeMatch, match) { + const lexeme = match && match[0]; - if (lexeme == null) { - processBuffer(); - return 0; - } + // add non-matched text to the current mode buffer + modeBuffer += textBeforeMatch; - // we've found a 0 width match and we're stuck, so we need to advance - // this happens when we have badly behaved rules that have optional matchers to the degree that - // sometimes they can end up matching nothing at all - // Ref: https://github.com/highlightjs/highlight.js/issues/2140 - if (lastMatch.type === "begin" && match.type === "end" && lastMatch.index === match.index && lexeme === "") { - // spit the "skipped" character that our regex choked on back into the output sequence - mode_buffer += codeToHighlight.slice(match.index, match.index + 1); - if (!SAFE_MODE) { + if (lexeme == null) { + processBuffer(); + return 0; + } + + // we've found a 0 width match and we're stuck, so we need to advance + // this happens when we have badly behaved rules that have optional matchers to the degree that + // sometimes they can end up matching nothing at all + // Ref: https://github.com/highlightjs/highlight.js/issues/2140 + if (lastMatch.type === "begin" && match.type === "end" && lastMatch.index === match.index && lexeme === "") { + // spit the "skipped" character that our regex choked on back into the output sequence + modeBuffer += codeToHighlight.slice(match.index, match.index + 1); + if (!SAFE_MODE) { + /** @type {AnnotatedError} */ + const err = new Error('0 width match regex'); + err.languageName = languageName; + err.badRule = lastMatch.rule; + throw err; + } + return 1; + } + lastMatch = match; + + if (match.type === "begin") { + return doBeginMatch(match); + } else if (match.type === "illegal" && !ignoreIllegals) { + // illegal match, we do not continue processing /** @type {AnnotatedError} */ - const err = new Error('0 width match regex'); - err.languageName = languageName; - err.badRule = lastMatch.rule; + const err = new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '<unnamed>') + '"'); + err.mode = top; throw err; + } else if (match.type === "end") { + const processed = doEndMatch(match); + if (processed !== NO_MATCH) { + return processed; + } } - return 1; - } - lastMatch = match; - - if (match.type === "begin") { - return doBeginMatch(match); - } else if (match.type === "illegal" && !ignoreIllegals) { - // illegal match, we do not continue processing - /** @type {AnnotatedError} */ - const err = new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '<unnamed>') + '"'); - err.mode = top; - throw err; - } else if (match.type === "end") { - var processed = doEndMatch(match); - if (processed !== NO_MATCH) { - return processed; + + // edge case for when illegal matches $ (end of line) which is technically + // a 0 width match but not a begin/end match so it's not caught by the + // first handler (when ignoreIllegals is true) + if (match.type === "illegal" && lexeme === "") { + // advance so we aren't stuck in an infinite loop + return 1; } - } - // edge case for when illegal matches $ (end of line) which is technically - // a 0 width match but not a begin/end match so it's not caught by the - // first handler (when ignoreIllegals is true) - if (match.type === "illegal" && lexeme === "") { - // advance so we aren't stuck in an infinite loop - return 1; - } + // infinite loops are BAD, this is a last ditch catch all. if we have a + // decent number of iterations yet our index (cursor position in our + // parsing) still 3x behind our index then something is very wrong + // so we bail + if (iterations > 100000 && iterations > match.index * 3) { + const err = new Error('potential infinite loop, way more iterations than matches'); + throw err; + } + + /* + Why might be find ourselves here? Only one occasion now. An end match that was + triggered but could not be completed. When might this happen? When an `endSameasBegin` + rule sets the end rule to a specific match. Since the overall mode termination rule that's + being used to scan the text isn't recompiled that means that any match that LOOKS like + the end (but is not, because it is not an exact match to the beginning) will + end up here. A definite end match, but when `doEndMatch` tries to "reapply" + the end rule and fails to match, we wind up here, and just silently ignore the end. + + This causes no real harm other than stopping a few times too many. + */ - // infinite loops are BAD, this is a last ditch catch all. if we have a - // decent number of iterations yet our index (cursor position in our - // parsing) still 3x behind our index then something is very wrong - // so we bail - if (iterations > 100000 && iterations > match.index * 3) { - const err = new Error('potential infinite loop, way more iterations than matches'); - throw err; + modeBuffer += lexeme; + return lexeme.length; } - /* - Why might be find ourselves here? Only one occasion now. An end match that was - triggered but could not be completed. When might this happen? When an `endSameasBegin` - rule sets the end rule to a specific match. Since the overall mode termination rule that's - being used to scan the text isn't recompiled that means that any match that LOOKS like - the end (but is not, because it is not an exact match to the beginning) will - end up here. A definite end match, but when `doEndMatch` tries to "reapply" - the end rule and fails to match, we wind up here, and just silently ignore the end. - - This causes no real harm other than stopping a few times too many. - */ + const language = getLanguage(languageName); + if (!language) { + console.error(LANGUAGE_NOT_FOUND.replace("{}", languageName)); + throw new Error('Unknown language: "' + languageName + '"'); + } - mode_buffer += lexeme; - return lexeme.length; - } - - var language = getLanguage(languageName); - if (!language) { - console.error(LANGUAGE_NOT_FOUND.replace("{}", languageName)); - throw new Error('Unknown language: "' + languageName + '"'); - } - - var md = compileLanguage(language); - var result = ''; - /** @type {CompiledMode} */ - var top = continuation || md; - /** @type Record<string,Mode> */ - var continuations = {}; // keep continuations for sub-languages - var emitter = new options.__emitter(options); - processContinuations(); - var mode_buffer = ''; - var relevance = 0; - var index = 0; - var iterations = 0; - var resumeScanAtSamePosition = false; - - try { - top.matcher.considerAll(); - - for (;;) { - iterations++; - if (resumeScanAtSamePosition) { - // only regexes not matched previously will now be - // considered for a potential match - resumeScanAtSamePosition = false; - } else { - top.matcher.considerAll(); - } - top.matcher.lastIndex = index; + const md = compileLanguage(language); + let result = ''; + /** @type {CompiledMode} */ + let top = continuation || md; + /** @type Record<string,CompiledMode> */ + const continuations = {}; // keep continuations for sub-languages + const emitter = new options.__emitter(options); + processContinuations(); + let modeBuffer = ''; + let relevance = 0; + let index = 0; + let iterations = 0; + let resumeScanAtSamePosition = false; + + try { + top.matcher.considerAll(); + + for (;;) { + iterations++; + if (resumeScanAtSamePosition) { + // only regexes not matched previously will now be + // considered for a potential match + resumeScanAtSamePosition = false; + } else { + top.matcher.considerAll(); + } + top.matcher.lastIndex = index; - const match = top.matcher.exec(codeToHighlight); - // console.log("match", match[0], match.rule && match.rule.begin) + const match = top.matcher.exec(codeToHighlight); + // console.log("match", match[0], match.rule && match.rule.begin) - if (!match) break; + if (!match) break; - const beforeMatch = codeToHighlight.substring(index, match.index); - const processedCount = processLexeme(beforeMatch, match); - index = match.index + processedCount; - } - processLexeme(codeToHighlight.substr(index)); - emitter.closeAllNodes(); - emitter.finalize(); - result = emitter.toHTML(); + const beforeMatch = codeToHighlight.substring(index, match.index); + const processedCount = processLexeme(beforeMatch, match); + index = match.index + processedCount; + } + processLexeme(codeToHighlight.substr(index)); + emitter.closeAllNodes(); + emitter.finalize(); + result = emitter.toHTML(); - return { - relevance: relevance, - value: result, - language: languageName, - illegal: false, - emitter: emitter, - top: top - }; - } catch (err) { - if (err.message && err.message.includes('Illegal')) { - return { - illegal: true, - illegalBy: { - msg: err.message, - context: codeToHighlight.slice(index - 100, index + 100), - mode: err.mode - }, - sofar: result, - relevance: 0, - value: escape$1(codeToHighlight), - emitter: emitter - }; - } else if (SAFE_MODE) { return { + relevance: relevance, + value: result, + language: languageName, illegal: false, - relevance: 0, - value: escape$1(codeToHighlight), emitter: emitter, - language: languageName, - top: top, - errorRaised: err + top: top }; - } else { - throw err; + } catch (err) { + if (err.message && err.message.includes('Illegal')) { + return { + illegal: true, + illegalBy: { + msg: err.message, + context: codeToHighlight.slice(index - 100, index + 100), + mode: err.mode + }, + sofar: result, + relevance: 0, + value: escape$1(codeToHighlight), + emitter: emitter + }; + } else if (SAFE_MODE) { + return { + illegal: false, + relevance: 0, + value: escape$1(codeToHighlight), + emitter: emitter, + language: languageName, + top: top, + errorRaised: err + }; + } else { + throw err; + } } } - } - /** - * returns a valid highlight result, without actually doing any actual work, - * auto highlight starts with this and it's possible for small snippets that - * auto-detection may not find a better match - * @param {string} code - * @returns {HighlightResult} - */ - function justTextHighlightResult(code) { - const result = { - relevance: 0, - emitter: new options.__emitter(options), - value: escape$1(code), - illegal: false, - top: PLAINTEXT_LANGUAGE - }; - result.emitter.addText(code); - return result; - } + /** + * returns a valid highlight result, without actually doing any actual work, + * auto highlight starts with this and it's possible for small snippets that + * auto-detection may not find a better match + * @param {string} code + * @returns {HighlightResult} + */ + function justTextHighlightResult(code) { + const result = { + relevance: 0, + emitter: new options.__emitter(options), + value: escape$1(code), + illegal: false, + top: PLAINTEXT_LANGUAGE + }; + result.emitter.addText(code); + return result; + } - /** - Highlighting with language detection. Accepts a string with the code to - highlight. Returns an object with the following properties: - - - language (detected language) - - relevance (int) - - value (an HTML string with highlighting markup) - - second_best (object with the same structure for second-best heuristically - detected language, may be absent) - - @param {string} code - @param {Array<string>} [languageSubset] - @returns {AutoHighlightResult} - */ - function highlightAuto(code, languageSubset) { - languageSubset = languageSubset || options.languages || Object.keys(languages); - var result = justTextHighlightResult(code); - var secondBest = result; - languageSubset.filter(getLanguage).filter(autoDetection).forEach(function(name) { - var current = _highlight(name, code, false); - current.language = name; - if (current.relevance > secondBest.relevance) { - secondBest = current; - } - if (current.relevance > result.relevance) { - secondBest = result; - result = current; - } - }); - if (secondBest.language) { - // second_best (with underscore) is the expected API + /** + Highlighting with language detection. Accepts a string with the code to + highlight. Returns an object with the following properties: + + - language (detected language) + - relevance (int) + - value (an HTML string with highlighting markup) + - second_best (object with the same structure for second-best heuristically + detected language, may be absent) + + @param {string} code + @param {Array<string>} [languageSubset] + @returns {AutoHighlightResult} + */ + function highlightAuto(code, languageSubset) { + languageSubset = languageSubset || options.languages || Object.keys(languages); + const plaintext = justTextHighlightResult(code); + + const results = languageSubset.filter(getLanguage).filter(autoDetection).map(name => + _highlight(name, code, false) + ); + results.unshift(plaintext); // plaintext is always an option + + const sorted = results.sort((a, b) => { + // sort base on relevance + if (a.relevance !== b.relevance) return b.relevance - a.relevance; + + // always award the tie to the base language + // ie if C++ and Arduino are tied, it's more likely to be C++ + if (a.language && b.language) { + if (getLanguage(a.language).supersetOf === b.language) { + return 1; + } else if (getLanguage(b.language).supersetOf === a.language) { + return -1; + } + } + + // otherwise say they are equal, which has the effect of sorting on + // relevance while preserving the original ordering - which is how ties + // have historically been settled, ie the language that comes first always + // wins in the case of a tie + return 0; + }); + + const [best, secondBest] = sorted; + + /** @type {AutoHighlightResult} */ + const result = best; result.second_best = secondBest; + + return result; } - return result; - } - /** - Post-processing of the highlighted markup: + /** + Post-processing of the highlighted markup: - - replace TABs with something more useful - - replace real line-breaks with '<br>' for non-pre containers + - replace TABs with something more useful + - replace real line-breaks with '<br>' for non-pre containers - @param {string} html - @returns {string} - */ - function fixMarkup(html) { - if (!(options.tabReplace || options.useBR)) { - return html; + @param {string} html + @returns {string} + */ + function fixMarkup(html) { + if (!(options.tabReplace || options.useBR)) { + return html; + } + + return html.replace(fixMarkupRe, match => { + if (match === '\n') { + return options.useBR ? '<br>' : match; + } else if (options.tabReplace) { + return match.replace(/\t/g, options.tabReplace); + } + return match; + }); } - return html.replace(fixMarkupRe, match => { - if (match === '\n') { - return options.useBR ? '<br>' : match; - } else if (options.tabReplace) { - return match.replace(/\t/g, options.tabReplace); + /** + * Builds new class name for block given the language name + * + * @param {string} prevClassName + * @param {string} [currentLang] + * @param {string} [resultLang] + */ + function buildClassName(prevClassName, currentLang, resultLang) { + const language = currentLang ? aliases[currentLang] : resultLang; + const result = [prevClassName.trim()]; + + if (!prevClassName.match(/\bhljs\b/)) { + result.push('hljs'); } - return match; - }); - } - /** - * Builds new class name for block given the language name - * - * @param {string} prevClassName - * @param {string} [currentLang] - * @param {string} [resultLang] - */ - function buildClassName(prevClassName, currentLang, resultLang) { - var language = currentLang ? aliases[currentLang] : resultLang; - var result = [prevClassName.trim()]; + if (!prevClassName.includes(language)) { + result.push(language); + } - if (!prevClassName.match(/\bhljs\b/)) { - result.push('hljs'); + return result.join(' ').trim(); } - if (!prevClassName.includes(language)) { - result.push(language); - } + /** + * Applies highlighting to a DOM node containing code. Accepts a DOM node and + * two optional parameters for fixMarkup. + * + * @param {HighlightedHTMLElement} element - the HTML element to highlight + */ + function highlightBlock(element) { + /** @type HTMLElement */ + let node = null; + const language = blockLanguage(element); - return result.join(' ').trim(); - } + if (shouldNotHighlight(language)) return; - /** - * Applies highlighting to a DOM node containing code. Accepts a DOM node and - * two optional parameters for fixMarkup. - * - * @param {HighlightedHTMLElement} element - the HTML element to highlight - */ - function highlightBlock(element) { - /** @type HTMLElement */ - let node = null; - const language = blockLanguage(element); + fire("before:highlightBlock", + { block: element, language: language }); - if (shouldNotHighlight(language)) return; + if (options.useBR) { + node = document.createElement('div'); + node.innerHTML = element.innerHTML.replace(/\n/g, '').replace(/<br[ /]*>/g, '\n'); + } else { + node = element; + } + const text = node.textContent; + const result = language ? highlight(language, text, true) : highlightAuto(text); + + const originalStream = nodeStream$1(node); + if (originalStream.length) { + const resultNode = document.createElement('div'); + resultNode.innerHTML = result.value; + result.value = mergeStreams$1(originalStream, nodeStream$1(resultNode), text); + } + result.value = fixMarkup(result.value); - fire("before:highlightBlock", - { block: element, language: language }); + fire("after:highlightBlock", { block: element, result: result }); - if (options.useBR) { - node = document.createElement('div'); - node.innerHTML = element.innerHTML.replace(/\n/g, '').replace(/<br[ /]*>/g, '\n'); - } else { - node = element; + element.innerHTML = result.value; + element.className = buildClassName(element.className, language, result.language); + element.result = { + language: result.language, + // TODO: remove with version 11.0 + re: result.relevance, + relavance: result.relevance + }; + if (result.second_best) { + element.second_best = { + language: result.second_best.language, + // TODO: remove with version 11.0 + re: result.second_best.relevance, + relavance: result.second_best.relevance + }; + } } - const text = node.textContent; - const result = language ? highlight(language, text, true) : highlightAuto(text); - const originalStream = nodeStream$1(node); - if (originalStream.length) { - const resultNode = document.createElement('div'); - resultNode.innerHTML = result.value; - result.value = mergeStreams$1(originalStream, nodeStream$1(resultNode), text); + /** + * Updates highlight.js global options with the passed options + * + * @param {Partial<HLJSOptions>} userOptions + */ + function configure(userOptions) { + if (userOptions.useBR) { + console.warn("'useBR' option is deprecated and will be removed entirely in v11.0"); + console.warn("Please see https://github.com/highlightjs/highlight.js/issues/2559"); + } + options = inherit$1(options, userOptions); } - result.value = fixMarkup(result.value); - fire("after:highlightBlock", { block: element, result: result }); + /** + * Highlights to all <pre><code> blocks on a page + * + * @type {Function & {called?: boolean}} + */ + const initHighlighting = () => { + if (initHighlighting.called) return; + initHighlighting.called = true; - element.innerHTML = result.value; - element.className = buildClassName(element.className, language, result.language); - element.result = { - language: result.language, - // TODO: remove with version 11.0 - re: result.relevance, - relavance: result.relevance + const blocks = document.querySelectorAll('pre code'); + ArrayProto.forEach.call(blocks, highlightBlock); }; - if (result.second_best) { - element.second_best = { - language: result.second_best.language, - // TODO: remove with version 11.0 - re: result.second_best.relevance, - relavance: result.second_best.relevance - }; - } - } - - /** - * Updates highlight.js global options with the passed options - * - * @param {{}} userOptions - */ - function configure(userOptions) { - options = inherit$1(options, userOptions); - } - /** - * Highlights to all <pre><code> blocks on a page - * - * @type {Function & {called?: boolean}} - */ - const initHighlighting = () => { - if (initHighlighting.called) return; - initHighlighting.called = true; + // Higlights all when DOMContentLoaded fires + function initHighlightingOnLoad() { + // @ts-ignore + window.addEventListener('DOMContentLoaded', initHighlighting, false); + } - var blocks = document.querySelectorAll('pre code'); - ArrayProto.forEach.call(blocks, highlightBlock); - }; + /** + * Register a language grammar module + * + * @param {string} languageName + * @param {LanguageFn} languageDefinition + */ + function registerLanguage(languageName, languageDefinition) { + let lang = null; + try { + lang = languageDefinition(hljs); + } catch (error) { + console.error("Language definition for '{}' could not be registered.".replace("{}", languageName)); + // hard or soft error + if (!SAFE_MODE) { throw error; } else { console.error(error); } + // languages that have serious errors are replaced with essentially a + // "plaintext" stand-in so that the code blocks will still get normal + // css classes applied to them - and one bad language won't break the + // entire highlighter + lang = PLAINTEXT_LANGUAGE; + } + // give it a temporary name if it doesn't have one in the meta-data + if (!lang.name) lang.name = languageName; + languages[languageName] = lang; + lang.rawDefinition = languageDefinition.bind(null, hljs); - // Higlights all when DOMContentLoaded fires - function initHighlightingOnLoad() { - // @ts-ignore - window.addEventListener('DOMContentLoaded', initHighlighting, false); - } + if (lang.aliases) { + registerAliases(lang.aliases, { languageName }); + } + } - /** - * Register a language grammar module - * - * @param {string} languageName - * @param {LanguageFn} languageDefinition - */ - function registerLanguage(languageName, languageDefinition) { - var lang = null; - try { - lang = languageDefinition(hljs); - } catch (error) { - console.error("Language definition for '{}' could not be registered.".replace("{}", languageName)); - // hard or soft error - if (!SAFE_MODE) { throw error; } else { console.error(error); } - // languages that have serious errors are replaced with essentially a - // "plaintext" stand-in so that the code blocks will still get normal - // css classes applied to them - and one bad language won't break the - // entire highlighter - lang = PLAINTEXT_LANGUAGE; - } - // give it a temporary name if it doesn't have one in the meta-data - if (!lang.name) lang.name = languageName; - languages[languageName] = lang; - lang.rawDefinition = languageDefinition.bind(null, hljs); - - if (lang.aliases) { - registerAliases(lang.aliases, { languageName }); + /** + * @returns {string[]} List of language internal names + */ + function listLanguages() { + return Object.keys(languages); } - } - /** - * @returns {string[]} List of language internal names - */ - function listLanguages() { - return Object.keys(languages); - } + /** + intended usage: When one language truly requires another - /** - intended usage: When one language truly requires another + Unlike `getLanguage`, this will throw when the requested language + is not available. - Unlike `getLanguage`, this will throw when the requested language - is not available. + @param {string} name - name of the language to fetch/require + @returns {Language | never} + */ + function requireLanguage(name) { + console.warn("requireLanguage is deprecated and will be removed entirely in the future."); + console.warn("Please see https://github.com/highlightjs/highlight.js/pull/2844"); - @param {string} name - name of the language to fetch/require - @returns {Language | never} - */ - function requireLanguage(name) { - var lang = getLanguage(name); - if (lang) { return lang; } + const lang = getLanguage(name); + if (lang) { return lang; } - var err = new Error('The \'{}\' language is required, but not loaded.'.replace('{}', name)); - throw err; - } + const err = new Error('The \'{}\' language is required, but not loaded.'.replace('{}', name)); + throw err; + } - /** - * @param {string} name - name of the language to retrieve - * @returns {Language | undefined} - */ - function getLanguage(name) { - name = (name || '').toLowerCase(); - return languages[name] || languages[aliases[name]]; - } + /** + * @param {string} name - name of the language to retrieve + * @returns {Language | undefined} + */ + function getLanguage(name) { + name = (name || '').toLowerCase(); + return languages[name] || languages[aliases[name]]; + } - /** - * - * @param {string|string[]} aliasList - single alias or list of aliases - * @param {{languageName: string}} opts - */ - function registerAliases(aliasList, { languageName }) { - if (typeof aliasList === 'string') { - aliasList = [aliasList]; + /** + * + * @param {string|string[]} aliasList - single alias or list of aliases + * @param {{languageName: string}} opts + */ + function registerAliases(aliasList, { languageName }) { + if (typeof aliasList === 'string') { + aliasList = [aliasList]; + } + aliasList.forEach(alias => { aliases[alias] = languageName; }); } - aliasList.forEach(alias => { aliases[alias] = languageName; }); - } - /** - * Determines if a given language has auto-detection enabled - * @param {string} name - name of the language - */ - function autoDetection(name) { - var lang = getLanguage(name); - return lang && !lang.disableAutodetect; - } + /** + * Determines if a given language has auto-detection enabled + * @param {string} name - name of the language + */ + function autoDetection(name) { + const lang = getLanguage(name); + return lang && !lang.disableAutodetect; + } - /** - * @param {HLJSPlugin} plugin - */ - function addPlugin(plugin) { - plugins.push(plugin); - } + /** + * @param {HLJSPlugin} plugin + */ + function addPlugin(plugin) { + plugins.push(plugin); + } - /** - * - * @param {PluginEvent} event - * @param {any} args - */ - function fire(event, args) { - var cb = event; - plugins.forEach(function(plugin) { - if (plugin[cb]) { - plugin[cb](args); - } - }); - } + /** + * + * @param {PluginEvent} event + * @param {any} args + */ + function fire(event, args) { + const cb = event; + plugins.forEach(function(plugin) { + if (plugin[cb]) { + plugin[cb](args); + } + }); + } - /* fixMarkup is deprecated and will be removed entirely in v11 */ - function deprecate_fixMarkup(arg) { - console.warn("fixMarkup is deprecated and will be removed entirely in v11.0"); - console.warn("Please see https://github.com/highlightjs/highlight.js/issues/2534"); + /** + Note: fixMarkup is deprecated and will be removed entirely in v11 - return fixMarkup(arg) - } + @param {string} arg + @returns {string} + */ + function deprecateFixMarkup(arg) { + console.warn("fixMarkup is deprecated and will be removed entirely in v11.0"); + console.warn("Please see https://github.com/highlightjs/highlight.js/issues/2534"); - /* Interface definition */ - Object.assign(hljs, { - highlight, - highlightAuto, - fixMarkup: deprecate_fixMarkup, - highlightBlock, - configure, - initHighlighting, - initHighlightingOnLoad, - registerLanguage, - listLanguages, - getLanguage, - registerAliases, - requireLanguage, - autoDetection, - inherit: inherit$1, - addPlugin, - // plugins for frameworks - vuePlugin: VuePlugin - }); + return fixMarkup(arg); + } - hljs.debugMode = function() { SAFE_MODE = false; }; - hljs.safeMode = function() { SAFE_MODE = true; }; - hljs.versionString = version; + /* Interface definition */ + Object.assign(hljs, { + highlight, + highlightAuto, + fixMarkup: deprecateFixMarkup, + highlightBlock, + configure, + initHighlighting, + initHighlightingOnLoad, + registerLanguage, + listLanguages, + getLanguage, + registerAliases, + requireLanguage, + autoDetection, + inherit: inherit$1, + addPlugin, + // plugins for frameworks + vuePlugin: BuildVuePlugin(hljs).VuePlugin + }); + + hljs.debugMode = function() { SAFE_MODE = false; }; + hljs.safeMode = function() { SAFE_MODE = true; }; + hljs.versionString = version; - for (const key in MODES) { - // @ts-ignore - if (typeof MODES[key] === "object") { + for (const key in MODES) { // @ts-ignore - deepFreeze(MODES[key]); + if (typeof MODES[key] === "object") { + // @ts-ignore + deepFreezeEs6(MODES[key]); + } } - } - // merge all the modes/regexs into our main object - Object.assign(hljs, MODES); + // merge all the modes/regexs into our main object + Object.assign(hljs, MODES); - return hljs; - }; + return hljs; + }; - // export an "instance" of the highlighter - var highlight = HLJS({}); + // export an "instance" of the highlighter + var highlight = HLJS({}); - return highlight; + return highlight; }()); if (typeof exports === 'object' && typeof module !== 'undefined') { module.exports = hljs; } @@ -2258,10 +2314,10 @@ hljs.registerLanguage('css', function () { }; var AT_IDENTIFIER = '@[a-z-]+'; // @font-face var AT_MODIFIERS = "and or not only"; - var AT_PROPERTY_RE = /@\-?\w[\w]*(\-\w+)*/; // @-webkit-keyframes + var AT_PROPERTY_RE = /@-?\w[\w]*(-\w+)*/; // @-webkit-keyframes var IDENT_RE = '[a-zA-Z-][a-zA-Z0-9_-]*'; var RULE = { - begin: /(?:[A-Z\_\.\-]+|--[a-zA-Z0-9_-]+)\s*:/, returnBegin: true, end: ';', endsWithParent: true, + begin: /([*]\s?)?(?:[A-Z_.\-\\]+|--[a-zA-Z0-9_-]+)\s*(\/\*\*\/)?:/, returnBegin: true, end: ';', endsWithParent: true, contains: [ ATTRIBUTE ] @@ -2270,14 +2326,14 @@ hljs.registerLanguage('css', function () { return { name: 'CSS', case_insensitive: true, - illegal: /[=\/|'\$]/, + illegal: /[=|'\$]/, contains: [ hljs.C_BLOCK_COMMENT_MODE, { className: 'selector-id', begin: /#[A-Za-z0-9_-]+/ }, { - className: 'selector-class', begin: /\.[A-Za-z0-9_-]+/ + className: 'selector-class', begin: '\\.' + IDENT_RE }, { className: 'selector-attr', @@ -2290,7 +2346,7 @@ hljs.registerLanguage('css', function () { }, { className: 'selector-pseudo', - begin: /:(:)?[a-zA-Z0-9\_\-\+\(\)"'.]+/ + begin: /:(:)?[a-zA-Z0-9_+()"'.-]+/ }, // matching these here allows us to treat them more like regular CSS // rules so everything between the {} gets regular rule highlighting, @@ -2333,10 +2389,11 @@ hljs.registerLanguage('css', function () { relevance: 0 }, { - begin: '{', end: '}', + begin: /\{/, end: /\}/, illegal: /\S/, contains: [ hljs.C_BLOCK_COMMENT_MODE, + { begin: /;/ }, // empty ; rule RULE, ] } @@ -2531,68 +2588,174 @@ hljs.registerLanguage('javascript', function () { Website: https://developer.mozilla.org/en-US/docs/Web/JavaScript */ + /** @type LanguageFn */ function javascript(hljs) { - var IDENT_RE$1 = IDENT_RE; - var FRAGMENT = { + /** + * Takes a string like "<Booger" and checks to see + * if we can find a matching "</Booger" later in the + * content. + * @param {RegExpMatchArray} match + * @param {{after:number}} param1 + */ + const hasClosingTag = (match, { after }) => { + const tag = "</" + match[0].slice(1); + const pos = match.input.indexOf(tag, after); + return pos !== -1; + }; + + const IDENT_RE$1 = IDENT_RE; + const FRAGMENT = { begin: '<>', end: '</>' }; - var XML_TAG = { + const XML_TAG = { begin: /<[A-Za-z0-9\\._:-]+/, - end: /\/[A-Za-z0-9\\._:-]+>|\/>/ + end: /\/[A-Za-z0-9\\._:-]+>|\/>/, + /** + * @param {RegExpMatchArray} match + * @param {CallbackResponse} response + */ + isTrulyOpeningTag: (match, response) => { + const afterMatchIndex = match[0].length + match.index; + const nextChar = match.input[afterMatchIndex]; + // nested type? + // HTML should not include another raw `<` inside a tag + // But a type might: `<Array<Array<number>>`, etc. + if (nextChar === "<") { + response.ignoreMatch(); + return; + } + // <something> + // This is now either a tag or a type. + if (nextChar === ">") { + // if we cannot find a matching closing tag, then we + // will ignore it + if (!hasClosingTag(match, { after: afterMatchIndex })) { + response.ignoreMatch(); + } + } + } }; - var KEYWORDS$1 = { + const KEYWORDS$1 = { $pattern: IDENT_RE, keyword: KEYWORDS.join(" "), literal: LITERALS.join(" "), built_in: BUILT_INS.join(" ") }; - var NUMBER = { + + // https://tc39.es/ecma262/#sec-literals-numeric-literals + const decimalDigits = '[0-9](_?[0-9])*'; + const frac = `\\.(${decimalDigits})`; + // DecimalIntegerLiteral, including Annex B NonOctalDecimalIntegerLiteral + // https://tc39.es/ecma262/#sec-additional-syntax-numeric-literals + const decimalInteger = `0|[1-9](_?[0-9])*|0[0-7]*[89][0-9]*`; + const NUMBER = { className: 'number', variants: [ - { begin: '\\b(0[bB][01]+)n?' }, - { begin: '\\b(0[oO][0-7]+)n?' }, - { begin: hljs.C_NUMBER_RE + 'n?' } + // DecimalLiteral + { begin: `(\\b(${decimalInteger})((${frac})|\\.)?|(${frac}))` + + `[eE][+-]?(${decimalDigits})\\b` }, + { begin: `\\b(${decimalInteger})\\b((${frac})\\b|\\.)?|(${frac})\\b` }, + + // DecimalBigIntegerLiteral + { begin: `\\b(0|[1-9](_?[0-9])*)n\\b` }, + + // NonDecimalIntegerLiteral + { begin: "\\b0[xX][0-9a-fA-F](_?[0-9a-fA-F])*n?\\b" }, + { begin: "\\b0[bB][0-1](_?[0-1])*n?\\b" }, + { begin: "\\b0[oO][0-7](_?[0-7])*n?\\b" }, + + // LegacyOctalIntegerLiteral (does not include underscore separators) + // https://tc39.es/ecma262/#sec-additional-syntax-numeric-literals + { begin: "\\b0[0-7]+n?\\b" }, ], relevance: 0 }; - var SUBST = { + + const SUBST = { className: 'subst', - begin: '\\$\\{', end: '\\}', + begin: '\\$\\{', + end: '\\}', keywords: KEYWORDS$1, - contains: [] // defined later + contains: [] // defined later }; - var HTML_TEMPLATE = { - begin: 'html`', end: '', + const HTML_TEMPLATE = { + begin: 'html`', + end: '', starts: { - end: '`', returnEnd: false, + end: '`', + returnEnd: false, contains: [ hljs.BACKSLASH_ESCAPE, SUBST ], - subLanguage: 'xml', + subLanguage: 'xml' } }; - var CSS_TEMPLATE = { - begin: 'css`', end: '', + const CSS_TEMPLATE = { + begin: 'css`', + end: '', starts: { - end: '`', returnEnd: false, + end: '`', + returnEnd: false, contains: [ hljs.BACKSLASH_ESCAPE, SUBST ], - subLanguage: 'css', + subLanguage: 'css' } }; - var TEMPLATE_STRING = { + const TEMPLATE_STRING = { className: 'string', - begin: '`', end: '`', + begin: '`', + end: '`', contains: [ hljs.BACKSLASH_ESCAPE, SUBST ] }; - SUBST.contains = [ + const JSDOC_COMMENT = hljs.COMMENT( + '/\\*\\*', + '\\*/', + { + relevance: 0, + contains: [ + { + className: 'doctag', + begin: '@[A-Za-z]+', + contains: [ + { + className: 'type', + begin: '\\{', + end: '\\}', + relevance: 0 + }, + { + className: 'variable', + begin: IDENT_RE$1 + '(?=\\s*(-)|$)', + endsParent: true, + relevance: 0 + }, + // eat spaces (not newlines) so we can find + // types or variables + { + begin: /(?=[^\n])\s/, + relevance: 0 + } + ] + } + ] + } + ); + const COMMENT = { + className: "comment", + variants: [ + JSDOC_COMMENT, + hljs.C_BLOCK_COMMENT_MODE, + hljs.C_LINE_COMMENT_MODE + ] + }; + const SUBST_INTERNALS = [ hljs.APOS_STRING_MODE, hljs.QUOTE_STRING_MODE, HTML_TEMPLATE, @@ -2601,32 +2764,52 @@ hljs.registerLanguage('javascript', function () { NUMBER, hljs.REGEXP_MODE ]; - var PARAMS_CONTAINS = SUBST.contains.concat([ + SUBST.contains = SUBST_INTERNALS + .concat({ + // we need to pair up {} inside our subst to prevent + // it from ending too early by matching another } + begin: /\{/, + end: /\}/, + keywords: KEYWORDS$1, + contains: [ + "self" + ].concat(SUBST_INTERNALS) + }); + const SUBST_AND_COMMENTS = [].concat(COMMENT, SUBST.contains); + const PARAMS_CONTAINS = SUBST_AND_COMMENTS.concat([ // eat recursive parens in sub expressions - { begin: /\(/, end: /\)/, - contains: ["self"].concat(SUBST.contains, [hljs.C_BLOCK_COMMENT_MODE, hljs.C_LINE_COMMENT_MODE]) - }, - hljs.C_BLOCK_COMMENT_MODE, - hljs.C_LINE_COMMENT_MODE + { + begin: /\(/, + end: /\)/, + keywords: KEYWORDS$1, + contains: ["self"].concat(SUBST_AND_COMMENTS) + } ]); - var PARAMS = { + const PARAMS = { className: 'params', - begin: /\(/, end: /\)/, + begin: /\(/, + end: /\)/, excludeBegin: true, excludeEnd: true, + keywords: KEYWORDS$1, contains: PARAMS_CONTAINS }; return { - name: 'JavaScript', + name: 'Javascript', aliases: ['js', 'jsx', 'mjs', 'cjs'], keywords: KEYWORDS$1, + // this will be extended by TypeScript + exports: { PARAMS_CONTAINS }, + illegal: /#(?![$_A-z])/, contains: [ hljs.SHEBANG({ + label: "shebang", binary: "node", relevance: 5 }), { + label: "use_strict", className: 'meta', relevance: 10, begin: /^\s*['"]use (strict|asm)['"]/ @@ -2636,41 +2819,7 @@ hljs.registerLanguage('javascript', function () { HTML_TEMPLATE, CSS_TEMPLATE, TEMPLATE_STRING, - hljs.C_LINE_COMMENT_MODE, - hljs.COMMENT( - '/\\*\\*', - '\\*/', - { - relevance : 0, - contains : [ - { - className : 'doctag', - begin : '@[A-Za-z]+', - contains : [ - { - className: 'type', - begin: '\\{', - end: '\\}', - relevance: 0 - }, - { - className: 'variable', - begin: IDENT_RE$1 + '(?=\\s*(-)|$)', - endsParent: true, - relevance: 0 - }, - // eat spaces (not newlines) so we can find - // types or variables - { - begin: /(?=[^\n])\s/, - relevance: 0 - }, - ] - } - ] - } - ), - hljs.C_BLOCK_COMMENT_MODE, + COMMENT, NUMBER, { // object attr container begin: concat(/[{,\n]\s*/, @@ -2687,42 +2836,44 @@ hljs.registerLanguage('javascript', function () { lookahead(concat( // we also need to allow for multiple possible comments inbetween // the first key:value pairing - /(((\/\/.*$)|(\/\*(.|\n)*\*\/))\s*)*/, + /(((\/\/.*$)|(\/\*(\*[^/]|[^*])*\*\/))\s*)*/, IDENT_RE$1 + '\\s*:'))), relevance: 0, contains: [ { className: 'attr', begin: IDENT_RE$1 + lookahead('\\s*:'), - relevance: 0, - }, + relevance: 0 + } ] }, { // "value" container begin: '(' + hljs.RE_STARTERS_RE + '|\\b(case|return|throw)\\b)\\s*', keywords: 'return throw case', contains: [ - hljs.C_LINE_COMMENT_MODE, - hljs.C_BLOCK_COMMENT_MODE, + COMMENT, hljs.REGEXP_MODE, { className: 'function', // we have to count the parens to make sure we actually have the // correct bounding ( ) before the =>. There could be any number of // sub-expressions inside also surrounded by parens. - begin: '(\\([^(]*' + - '(\\([^(]*' + - '(\\([^(]*' + - '\\))?' + - '\\))?' + - '\\)|' + hljs.UNDERSCORE_IDENT_RE + ')\\s*=>', returnBegin: true, + begin: '(\\(' + + '[^()]*(\\(' + + '[^()]*(\\(' + + '[^()]*' + + '\\))*[^()]*' + + '\\))*[^()]*' + + '\\)|' + hljs.UNDERSCORE_IDENT_RE + ')\\s*=>', + returnBegin: true, end: '\\s*=>', contains: [ { className: 'params', variants: [ { - begin: hljs.UNDERSCORE_IDENT_RE + begin: hljs.UNDERSCORE_IDENT_RE, + relevance: 0 }, { className: null, @@ -2730,8 +2881,10 @@ hljs.registerLanguage('javascript', function () { skip: true }, { - begin: /\(/, end: /\)/, - excludeBegin: true, excludeEnd: true, + begin: /\(/, + end: /\)/, + excludeBegin: true, + excludeEnd: true, keywords: KEYWORDS$1, contains: PARAMS_CONTAINS } @@ -2740,69 +2893,120 @@ hljs.registerLanguage('javascript', function () { ] }, { // could be a comma delimited list of params to a function call - begin: /,/, relevance: 0, + begin: /,/, relevance: 0 }, { className: '', begin: /\s/, end: /\s*/, - skip: true, + skip: true }, { // JSX variants: [ { begin: FRAGMENT.begin, end: FRAGMENT.end }, - { begin: XML_TAG.begin, end: XML_TAG.end } + { + begin: XML_TAG.begin, + // we carefully check the opening tag to see if it truly + // is a tag and not a false positive + 'on:begin': XML_TAG.isTrulyOpeningTag, + end: XML_TAG.end + } ], subLanguage: 'xml', contains: [ { - begin: XML_TAG.begin, end: XML_TAG.end, skip: true, + begin: XML_TAG.begin, + end: XML_TAG.end, + skip: true, contains: ['self'] } ] - }, + } ], relevance: 0 }, { className: 'function', - beginKeywords: 'function', end: /\{/, excludeEnd: true, + beginKeywords: 'function', + end: /[{;]/, + excludeEnd: true, + keywords: KEYWORDS$1, contains: [ - hljs.inherit(hljs.TITLE_MODE, {begin: IDENT_RE$1}), + 'self', + hljs.inherit(hljs.TITLE_MODE, { begin: IDENT_RE$1 }), PARAMS ], - illegal: /\[|%/ + illegal: /%/ }, { - begin: /\$[(.]/ // relevance booster for a pattern common to JS libs: `$(something)` and `$.something` + // prevent this from getting swallowed up by function + // since they appear "function like" + beginKeywords: "while if switch catch for" + }, + { + className: 'function', + // we have to count the parens to make sure we actually have the correct + // bounding ( ). There could be any number of sub-expressions inside + // also surrounded by parens. + begin: hljs.UNDERSCORE_IDENT_RE + + '\\(' + // first parens + '[^()]*(\\(' + + '[^()]*(\\(' + + '[^()]*' + + '\\))*[^()]*' + + '\\))*[^()]*' + + '\\)\\s*\\{', // end parens + returnBegin:true, + contains: [ + PARAMS, + hljs.inherit(hljs.TITLE_MODE, { begin: IDENT_RE$1 }), + ] + }, + // hack: prevents detection of keywords in some circumstances + // .keyword() + // $keyword = x + { + variants: [ + { begin: '\\.' + IDENT_RE$1 }, + { begin: '\\$' + IDENT_RE$1 } + ], + relevance: 0 }, - - hljs.METHOD_GUARD, { // ES6 class className: 'class', - beginKeywords: 'class', end: /[{;=]/, excludeEnd: true, - illegal: /[:"\[\]]/, + beginKeywords: 'class', + end: /[{;=]/, + excludeEnd: true, + illegal: /[:"[\]]/, contains: [ - {beginKeywords: 'extends'}, + { beginKeywords: 'extends' }, hljs.UNDERSCORE_TITLE_MODE ] }, { - beginKeywords: 'constructor', end: /\{/, excludeEnd: true + begin: /\b(?=constructor)/, + end: /[{;]/, + excludeEnd: true, + contains: [ + hljs.inherit(hljs.TITLE_MODE, { begin: IDENT_RE$1 }), + 'self', + PARAMS + ] }, { begin: '(get|set)\\s+(?=' + IDENT_RE$1 + '\\()', - end: /{/, + end: /\{/, keywords: "get set", contains: [ - hljs.inherit(hljs.TITLE_MODE, {begin: IDENT_RE$1}), + hljs.inherit(hljs.TITLE_MODE, { begin: IDENT_RE$1 }), { begin: /\(\)/ }, // eat to avoid empty params PARAMS ] - + }, + { + begin: /\$[(.]/ // relevance booster for a pattern common to JS libs: `$(something)` and `$.something` } - ], - illegal: /#(?!!)/ + ] }; } @@ -2815,32 +3019,95 @@ hljs.registerLanguage('javascript', function () { hljs.registerLanguage('xml', function () { 'use strict'; + /** + * @param {string} value + * @returns {RegExp} + * */ + + /** + * @param {RegExp | string } re + * @returns {string} + */ + function source(re) { + if (!re) return null; + if (typeof re === "string") return re; + + return re.source; + } + + /** + * @param {RegExp | string } re + * @returns {string} + */ + function lookahead(re) { + return concat('(?=', re, ')'); + } + + /** + * @param {RegExp | string } re + * @returns {string} + */ + function optional(re) { + return concat('(', re, ')?'); + } + + /** + * @param {...(RegExp | string) } args + * @returns {string} + */ + function concat(...args) { + const joined = args.map((x) => source(x)).join(""); + return joined; + } + + /** + * Any of the passed expresssions may match + * + * Creates a huge this | this | that | that match + * @param {(RegExp | string)[] } args + * @returns {string} + */ + function either(...args) { + const joined = '(' + args.map((x) => source(x)).join("|") + ")"; + return joined; + } + /* Language: HTML, XML Website: https://www.w3.org/XML/ Category: common */ + /** @type LanguageFn */ function xml(hljs) { - var XML_IDENT_RE = '[A-Za-z0-9\\._:-]+'; - var XML_ENTITIES = { + // Element names can contain letters, digits, hyphens, underscores, and periods + const TAG_NAME_RE = concat(/[A-Z_]/, optional(/[A-Z0-9_.-]+:/), /[A-Z0-9_.-]*/); + const XML_IDENT_RE = '[A-Za-z0-9\\._:-]+'; + const XML_ENTITIES = { className: 'symbol', begin: '&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;' }; - var XML_META_KEYWORDS = { - begin: '\\s', - contains:[ - { - className: 'meta-keyword', - begin: '#?[a-z_][a-z1-9_-]+', - illegal: '\\n', - } - ] + const XML_META_KEYWORDS = { + begin: '\\s', + contains: [ + { + className: 'meta-keyword', + begin: '#?[a-z_][a-z1-9_-]+', + illegal: '\\n' + } + ] }; - var XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, {begin: '\\(', end: '\\)'}); - var APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, {className: 'meta-string'}); - var QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, {className: 'meta-string'}); - var TAG_INTERNALS = { + const XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, { + begin: '\\(', + end: '\\)' + }); + const APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, { + className: 'meta-string' + }); + const QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, { + className: 'meta-string' + }); + const TAG_INTERNALS = { endsWithParent: true, illegal: /</, relevance: 0, @@ -2858,9 +3125,19 @@ hljs.registerLanguage('xml', function () { className: 'string', endsParent: true, variants: [ - {begin: /"/, end: /"/, contains: [XML_ENTITIES]}, - {begin: /'/, end: /'/, contains: [XML_ENTITIES]}, - {begin: /[^\s"'=<>`]+/} + { + begin: /"/, + end: /"/, + contains: [ XML_ENTITIES ] + }, + { + begin: /'/, + end: /'/, + contains: [ XML_ENTITIES ] + }, + { + begin: /[^\s"'=<>`]+/ + } ] } ] @@ -2869,34 +3146,48 @@ hljs.registerLanguage('xml', function () { }; return { name: 'HTML, XML', - aliases: ['html', 'xhtml', 'rss', 'atom', 'xjb', 'xsd', 'xsl', 'plist', 'wsf', 'svg'], + aliases: [ + 'html', + 'xhtml', + 'rss', + 'atom', + 'xjb', + 'xsd', + 'xsl', + 'plist', + 'wsf', + 'svg' + ], case_insensitive: true, contains: [ { className: 'meta', - begin: '<![a-z]', end: '>', + begin: '<![a-z]', + end: '>', relevance: 10, contains: [ - XML_META_KEYWORDS, - QUOTE_META_STRING_MODE, - APOS_META_STRING_MODE, - XML_META_PAR_KEYWORDS, - { - begin: '\\[', end: '\\]', - contains:[ - { - className: 'meta', - begin: '<![a-z]', end: '>', - contains: [ - XML_META_KEYWORDS, - XML_META_PAR_KEYWORDS, - QUOTE_META_STRING_MODE, - APOS_META_STRING_MODE - ] - } - ] - } - ] + XML_META_KEYWORDS, + QUOTE_META_STRING_MODE, + APOS_META_STRING_MODE, + XML_META_PAR_KEYWORDS, + { + begin: '\\[', + end: '\\]', + contains: [ + { + className: 'meta', + begin: '<![a-z]', + end: '>', + contains: [ + XML_META_KEYWORDS, + XML_META_PAR_KEYWORDS, + QUOTE_META_STRING_MODE, + APOS_META_STRING_MODE + ] + } + ] + } + ] }, hljs.COMMENT( '<!--', @@ -2906,13 +3197,16 @@ hljs.registerLanguage('xml', function () { } ), { - begin: '<\\!\\[CDATA\\[', end: '\\]\\]>', + begin: '<!\\[CDATA\\[', + end: '\\]\\]>', relevance: 10 }, XML_ENTITIES, { className: 'meta', - begin: /<\?xml/, end: /\?>/, relevance: 10 + begin: /<\?xml/, + end: /\?>/, + relevance: 10 }, { className: 'tag', @@ -2922,33 +3216,87 @@ hljs.registerLanguage('xml', function () { ending braket. The '$' is needed for the lexeme to be recognized by hljs.subMode() that tests lexemes outside the stream. */ - begin: '<style(?=\\s|>)', end: '>', - keywords: {name: 'style'}, - contains: [TAG_INTERNALS], + begin: '<style(?=\\s|>)', + end: '>', + keywords: { + name: 'style' + }, + contains: [ TAG_INTERNALS ], starts: { - end: '</style>', returnEnd: true, - subLanguage: ['css', 'xml'] + end: '</style>', + returnEnd: true, + subLanguage: [ + 'css', + 'xml' + ] } }, { className: 'tag', // See the comment in the <style tag about the lookahead pattern - begin: '<script(?=\\s|>)', end: '>', - keywords: {name: 'script'}, - contains: [TAG_INTERNALS], + begin: '<script(?=\\s|>)', + end: '>', + keywords: { + name: 'script' + }, + contains: [ TAG_INTERNALS ], starts: { - end: '\<\/script\>', returnEnd: true, - subLanguage: ['javascript', 'handlebars', 'xml'] + end: /<\/script>/, + returnEnd: true, + subLanguage: [ + 'javascript', + 'handlebars', + 'xml' + ] } }, + // we need this for now for jSX { className: 'tag', - begin: '</?', end: '/?>', + begin: /<>|<\/>/ + }, + // open tag + { + className: 'tag', + begin: concat( + /</, + lookahead(concat( + TAG_NAME_RE, + // <tag/> + // <tag> + // <tag ... + either(/\/>/, />/, /\s/) + )) + ), + end: /\/?>/, contains: [ { - className: 'name', begin: /[^\/><\s]+/, relevance: 0 + className: 'name', + begin: TAG_NAME_RE, + relevance: 0, + starts: TAG_INTERNALS + } + ] + }, + // close tag + { + className: 'tag', + begin: concat( + /<\//, + lookahead(concat( + TAG_NAME_RE, />/ + )) + ), + contains: [ + { + className: 'name', + begin: TAG_NAME_RE, + relevance: 0 }, - TAG_INTERNALS + { + begin: />/, + relevance: 0 + } ] } ] |