From fe572dcec9182f998a1961bc99531f39aa88f041 Mon Sep 17 00:00:00 2001 From: Mestery Date: Sun, 18 Jul 2021 10:36:31 +0200 Subject: Update javascript lexer (#1814) * make ts extends js lexer * add regex's d flag for js lexers cf. https://v8.dev/features/regexp-match-indices * update js builtins, operators, exceptions * fixup! update js builtins, operators, exceptions * add typescript override keywork * Update _mapping.py --- pygments/lexers/javascript.py | 199 ++++++++++++++++-------------------------- 1 file changed, 77 insertions(+), 122 deletions(-) (limited to 'pygments/lexers/javascript.py') diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index f04d42d4..485fc4cf 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -10,16 +10,16 @@ import re -from pygments.lexer import RegexLexer, include, bygroups, default, using, \ +from pygments.lexer import RegexLexer, include, bygroups, default, inherit, using, \ this, words, combined from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Other from pygments.util import get_bool_opt import pygments.unistring as uni -__all__ = ['JavascriptLexer', 'KalLexer', 'LiveScriptLexer', 'DartLexer', - 'TypeScriptLexer', 'LassoLexer', 'ObjectiveJLexer', - 'CoffeeScriptLexer', 'MaskLexer', 'EarlGreyLexer', 'JuttleLexer'] +__all__ = ['JavascriptLexer', 'TypeScriptLexer', 'KalLexer', 'LiveScriptLexer', + 'DartLexer', 'LassoLexer', 'ObjectiveJLexer', 'CoffeeScriptLexer', + 'MaskLexer', 'EarlGreyLexer', 'JuttleLexer'] JS_IDENT_START = ('(?:[$_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + ']|\\\\u[a-fA-F0-9]{4})') @@ -52,7 +52,7 @@ class JavascriptLexer(RegexLexer): 'slashstartsregex': [ include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' - r'([gimuys]+\b|\B)', String.Regex, '#pop'), + r'([gimuysd]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), default('#pop') ], @@ -75,23 +75,43 @@ class JavascriptLexer(RegexLexer): (r'(\.[0-9]+|[0-9]+\.[0-9]*|[0-9]+)([eE][-+]?[0-9]+)?', Number.Float), (r'\.\.\.|=>', Punctuation), - (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' - r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), + (r'\+\+|--|~|\?\?=?|\?|:|\\(?=\n)|' + r'(<<|>>>?|==?|!=?|(?:\*\*|\|\||&&|[-<>+*%&|^/]))=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), + + (r'(typeof|instanceof|in|void|delete|new)\b', Operator.Word, 'slashstartsregex'), + + # Match stuff like: constructor + (r'\b(constructor|from|as)\b', Keyword.Reserved), + (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' - r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|await|async|' - r'this|of|static|export|import|from|as|debugger|extends|super)\b', Keyword, 'slashstartsregex'), + r'throw|try|catch|finally|yield|await|async|this|of|static|export|' + r'import|debugger|extends|super)\b', Keyword, 'slashstartsregex'), (r'(var|let|const|with|function|class)\b', Keyword.Declaration, 'slashstartsregex'), - (r'(abstract|boolean|byte|char|double|enum|final|float|goto' - r'implements|int|interface|long|native|package|private|protected' + + (r'(abstract|boolean|byte|char|double|enum|final|float|goto|' + r'implements|int|interface|long|native|package|private|protected|' r'public|short|synchronized|throws|transient|volatile)\b', Keyword.Reserved), (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant), - (r'(Array|Boolean|Date|BigInt|Error|Function|Math|' + + (r'(Array|Boolean|Date|BigInt|Function|Math|ArrayBuffer|' r'Number|Object|RegExp|String|Promise|Proxy|decodeURI|' r'decodeURIComponent|encodeURI|encodeURIComponent|' - r'Error|eval|isFinite|isNaN|isSafeInteger|parseFloat|parseInt|' - r'document|this|window|globalThis|Symbol)\b', Name.Builtin), + r'eval|isFinite|isNaN|parseFloat|parseInt|DataView|' + r'document|window|globalThis|global|Symbol|Intl|' + r'WeakSet|WeakMap|Set|Map|Reflect|JSON|Atomics|' + r'Int(?:8|16|32)Array|BigInt64Array|Float32Array|Float64Array|' + r'Uint8ClampedArray|Uint(?:8|16|32)Array|BigUint64Array)\b', Name.Builtin), + + (r'((?:Eval|Internal|Range|Reference|Syntax|Type|URI)?Error)\b', Name.Exception), + + # Match stuff like: super(argument, list) + (r'(super)(\s*)(\([\w,?.$\s]+\s*\))', + bygroups(Keyword, Text), 'slashstartsregex'), + # Match stuff like: function() {...} + (r'([a-zA-Z_?.$][\w?.$]*)(?=\(\) \{)', Name.Other, 'slashstartsregex'), + (JS_IDENT, Name.Other), (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), @@ -112,6 +132,43 @@ class JavascriptLexer(RegexLexer): } +class TypeScriptLexer(JavascriptLexer): + """ + For `TypeScript `_ source code. + + .. versionadded:: 1.6 + """ + + name = 'TypeScript' + aliases = ['typescript', 'ts'] + filenames = ['*.ts'] + mimetypes = ['application/x-typescript', 'text/x-typescript'] + + # Higher priority than the TypoScriptLexer, as TypeScript is far more + # common these days + priority = 0.5 + + tokens = { + 'root': [ + (r'(abstract|implements|private|protected|public|readonly)\b', + Keyword, 'slashstartsregex'), + (r'(enum|interface|override)\b', Keyword.Declaration, 'slashstartsregex'), + (r'\b(declare|type)\b', Keyword.Reserved), + # Match variable type keywords + (r'\b(string|boolean|number)\b', Keyword.Type), + # Match stuff like: module name {...} + (r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)', + bygroups(Keyword.Reserved, Text, Name.Other, Text), 'slashstartsregex'), + # Match stuff like: (function: return type) + (r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)', + bygroups(Name.Other, Text, Keyword.Type)), + # Match stuff like: Decorators + (r'@' + JS_IDENT, Keyword.Declaration), + inherit, + ], + } + + class KalLexer(RegexLexer): """ For `Kal`_ source code. @@ -157,7 +214,7 @@ class KalLexer(RegexLexer): 'root': [ include('commentsandwhitespace'), (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' - r'([gimuys]+\b|\B)', String.Regex), + r'([gimuysd]+\b|\B)', String.Regex), (r'\?|:|_(?=\n)|==?|!=|-(?!>)|[<>+*/-]=?', Operator), (r'\b(and|or|isnt|is|not|but|bitwise|mod|\^|xor|exists|' @@ -250,7 +307,7 @@ class LiveScriptLexer(RegexLexer): ], 'multilineregex': [ include('commentsandwhitespace'), - (r'//([gimuys]+\b|\B)', String.Regex, '#pop'), + (r'//([gimuysd]+\b|\B)', String.Regex, '#pop'), (r'/', String.Regex), (r'[^/#]+', String.Regex) ], @@ -258,7 +315,7 @@ class LiveScriptLexer(RegexLexer): include('commentsandwhitespace'), (r'//', String.Regex, ('#pop', 'multilineregex')), (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' - r'([gimuys]+\b|\B)', String.Regex, '#pop'), + r'([gimuysd]+\b|\B)', String.Regex, '#pop'), (r'/', Operator, '#pop'), default('#pop'), ], @@ -441,108 +498,6 @@ class DartLexer(RegexLexer): } -class TypeScriptLexer(RegexLexer): - """ - For `TypeScript `_ source code. - - .. versionadded:: 1.6 - """ - - name = 'TypeScript' - aliases = ['typescript', 'ts'] - filenames = ['*.ts', '*.tsx'] - mimetypes = ['text/x-typescript'] - - flags = re.DOTALL | re.MULTILINE - - # Higher priority than the TypoScriptLexer, as TypeScript is far more - # common these days - priority = 0.5 - - tokens = { - 'commentsandwhitespace': [ - (r'\s+', Text), - (r'