diff options
Diffstat (limited to 'lib/internal/test_runner/tap_lexer.js')
-rw-r--r-- | lib/internal/test_runner/tap_lexer.js | 540 |
1 files changed, 0 insertions, 540 deletions
diff --git a/lib/internal/test_runner/tap_lexer.js b/lib/internal/test_runner/tap_lexer.js deleted file mode 100644 index 8af5453b28..0000000000 --- a/lib/internal/test_runner/tap_lexer.js +++ /dev/null @@ -1,540 +0,0 @@ -'use strict'; - -const { - ArrayPrototypePop, - ArrayPrototypePush, - MathMax, - SafeSet, - StringPrototypeCodePointAt, - StringPrototypeTrim, -} = primordials; -const { - codes: { ERR_TAP_LEXER_ERROR }, -} = require('internal/errors'); - -const { isZeroWidthCodePoint } = require('internal/util/inspect'); - -const kEOL = ''; -const kEOF = ''; - -const TokenKind = { - EOF: 'EOF', - EOL: 'EOL', - NEWLINE: 'NewLine', - NUMERIC: 'Numeric', - LITERAL: 'Literal', - KEYWORD: 'Keyword', - WHITESPACE: 'Whitespace', - COMMENT: 'Comment', - DASH: 'Dash', - PLUS: 'Plus', - HASH: 'Hash', - ESCAPE: 'Escape', - UNKNOWN: 'Unknown', - - // TAP tokens - TAP: 'TAPKeyword', - TAP_VERSION: 'VersionKeyword', - TAP_PLAN: 'PlanKeyword', - TAP_TEST_POINT: 'TestPointKeyword', - TAP_SUBTEST_POINT: 'SubTestPointKeyword', - TAP_TEST_OK: 'TestOkKeyword', - TAP_TEST_NOTOK: 'TestNotOkKeyword', - TAP_YAML_START: 'YamlStartKeyword', - TAP_YAML_END: 'YamlEndKeyword', - TAP_YAML_BLOCK: 'YamlKeyword', - TAP_PRAGMA: 'PragmaKeyword', - TAP_BAIL_OUT: 'BailOutKeyword', -}; - -class Token { - constructor({ kind, value, stream }) { - const valueLength = ('' + value).length; - this.kind = kind; - this.value = value; - this.location = { - line: stream.line, - column: MathMax(stream.column - valueLength + 1, 1), // 1 based - start: MathMax(stream.pos - valueLength, 0), // zero based - end: stream.pos - (value === '' ? 0 : 1), // zero based - }; - - // EOF is a special case - if (value === TokenKind.EOF) { - const eofPosition = stream.input.length + 1; // We consider EOF to be outside the stream - this.location.start = eofPosition; - this.location.end = eofPosition; - this.location.column = stream.column + 1; // 1 based - } - } -} - -class InputStream { - constructor(input) { - this.input = input; - this.pos = 0; - this.column = 0; - this.line = 1; - } - - eof() { - return this.peek() === undefined; - } - - peek(offset = 0) { - return this.input[this.pos + offset]; - } - - next() { - const char = this.peek(); - if (char === undefined) { - return undefined; - } - - this.pos++; - this.column++; - if (char === '\n') { - this.line++; - this.column = 0; - } - - return char; - } -} - -class TapLexer { - static Keywords = new SafeSet([ - 'TAP', - 'version', - 'ok', - 'not', - '...', - '---', - '..', - 'pragma', - '-', - '+', - - // NOTE: "Skip", "Todo" and "Bail out!" literals are deferred to the parser - ]); - - #isComment = false; - #source = null; - #line = 1; - #column = 0; - #escapeStack = []; - #lastScannedToken = null; - - constructor(source) { - this.#source = new InputStream(source); - this.#lastScannedToken = new Token({ - kind: TokenKind.EOL, - value: kEOL, - stream: this.#source, - }); - } - - scan() { - const tokens = []; - let chunk = []; - while (!this.eof()) { - const token = this.#scanToken(); - - // Remember the last scanned token (except for whitespace) - if (token.kind !== TokenKind.WHITESPACE) { - this.#lastScannedToken = token; - } - - ArrayPrototypePush(chunk, token); - if (token.kind === TokenKind.NEWLINE) { - // Store the current chunk + NEWLINE token - ArrayPrototypePush(tokens, chunk); - chunk = []; - } - } - - if (chunk.length > 0) { - ArrayPrototypePush(chunk, this.#scanEOL()); - ArrayPrototypePush(tokens, chunk); - } - - // send EOF as a separate chunk - ArrayPrototypePush(tokens, [this.#scanEOF()]); - - return tokens; - } - - next() { - return this.#source.next(); - } - - eof() { - return this.#source.eof(); - } - - error(message, token, expected = '') { - this.#source.error(message, token, expected); - } - - #scanToken() { - const char = this.next(); - - if (this.#isEOFSymbol(char)) { - return this.#scanEOF(); - } else if (this.#isNewLineSymbol(char)) { - return this.#scanNewLine(char); - } else if (this.#isNumericSymbol(char)) { - return this.#scanNumeric(char); - } else if (this.#isDashSymbol(char)) { - return this.#scanDash(char); - } else if (this.#isPlusSymbol(char)) { - return this.#scanPlus(char); - } else if (this.#isHashSymbol(char)) { - return this.#scanHash(char); - } else if (this.#isEscapeSymbol(char)) { - return this.#scanEscapeSymbol(char); - } else if (this.#isWhitespaceSymbol(char)) { - return this.#scanWhitespace(char); - } else if (this.#isLiteralSymbol(char)) { - return this.#scanLiteral(char); - } - - throw new ERR_TAP_LEXER_ERROR( - `Unexpected character: ${char} at line ${this.#line}, column ${ - this.#column - }`, - ); - } - - #scanNewLine(char) { - // In case of odd number of ESCAPE symbols, we need to clear the remaining - // escape chars from the stack and start fresh for the next line. - this.#escapeStack = []; - - // We also need to reset the comment flag - this.#isComment = false; - - return new Token({ - kind: TokenKind.NEWLINE, - value: char, - stream: this.#source, - }); - } - - #scanEOL() { - return new Token({ - kind: TokenKind.EOL, - value: kEOL, - stream: this.#source, - }); - } - - #scanEOF() { - this.#isComment = false; - - return new Token({ - kind: TokenKind.EOF, - value: kEOF, - stream: this.#source, - }); - } - - #scanEscapeSymbol(char) { - // If the escape symbol has been escaped (by previous symbol), - // or if the next symbol is a whitespace symbol, - // then consume it as a literal. - if ( - this.#hasTheCurrentCharacterBeenEscaped() || - this.#source.peek(1) === TokenKind.WHITESPACE - ) { - ArrayPrototypePop(this.#escapeStack); - return new Token({ - kind: TokenKind.LITERAL, - value: char, - stream: this.#source, - }); - } - - // Otherwise, consume the escape symbol as an escape symbol that should be ignored by the parser - // we also need to push the escape symbol to the escape stack - // and consume the next character as a literal (done in the next turn) - ArrayPrototypePush(this.#escapeStack, char); - return new Token({ - kind: TokenKind.ESCAPE, - value: char, - stream: this.#source, - }); - } - - #scanWhitespace(char) { - return new Token({ - kind: TokenKind.WHITESPACE, - value: char, - stream: this.#source, - }); - } - - #scanDash(char) { - // Peek next 3 characters and check if it's a YAML start marker - const marker = char + this.#source.peek() + this.#source.peek(1); - - if (this.#isYamlStartSymbol(marker)) { - this.next(); // consume second - - this.next(); // consume third - - - return new Token({ - kind: TokenKind.TAP_YAML_START, - value: marker, - stream: this.#source, - }); - } - - return new Token({ - kind: TokenKind.DASH, - value: char, - stream: this.#source, - }); - } - - #scanPlus(char) { - return new Token({ - kind: TokenKind.PLUS, - value: char, - stream: this.#source, - }); - } - - #scanHash(char) { - const lastCharacter = this.#source.peek(-2); - const nextToken = this.#source.peek(); - - // If we encounter a hash symbol at the beginning of a line, - // we consider it as a comment - if (!lastCharacter || this.#isNewLineSymbol(lastCharacter)) { - this.#isComment = true; - return new Token({ - kind: TokenKind.COMMENT, - value: char, - stream: this.#source, - }); - } - - // The only valid case where a hash symbol is considered as a hash token - // is when it's preceded by a whitespace symbol and followed by a non-hash symbol - if ( - this.#isWhitespaceSymbol(lastCharacter) && - !this.#isHashSymbol(nextToken) - ) { - return new Token({ - kind: TokenKind.HASH, - value: char, - stream: this.#source, - }); - } - - const charHasBeenEscaped = this.#hasTheCurrentCharacterBeenEscaped(); - if (this.#isComment || charHasBeenEscaped) { - if (charHasBeenEscaped) { - ArrayPrototypePop(this.#escapeStack); - } - - return new Token({ - kind: TokenKind.LITERAL, - value: char, - stream: this.#source, - }); - } - - // As a fallback, we consume the hash symbol as a literal - return new Token({ - kind: TokenKind.LITERAL, - value: char, - stream: this.#source, - }); - } - - #scanLiteral(char) { - let word = char; - while (!this.#source.eof()) { - const nextChar = this.#source.peek(); - if (this.#isLiteralSymbol(nextChar)) { - word += this.#source.next(); - } else { - break; - } - } - - word = StringPrototypeTrim(word); - - if (TapLexer.Keywords.has(word)) { - const token = this.#scanTAPKeyword(word); - if (token) { - return token; - } - } - - if (this.#isYamlEndSymbol(word)) { - return new Token({ - kind: TokenKind.TAP_YAML_END, - value: word, - stream: this.#source, - }); - } - - return new Token({ - kind: TokenKind.LITERAL, - value: word, - stream: this.#source, - }); - } - - #scanTAPKeyword(word) { - const isLastScannedTokenEOLorNewLine = - TokenKind.EOL === this.#lastScannedToken.kind || - TokenKind.NEWLINE === this.#lastScannedToken.kind; - - if (word === 'TAP' && isLastScannedTokenEOLorNewLine) { - return new Token({ - kind: TokenKind.TAP, - value: word, - stream: this.#source, - }); - } - - if (word === 'version' && this.#lastScannedToken.kind === TokenKind.TAP) { - return new Token({ - kind: TokenKind.TAP_VERSION, - value: word, - stream: this.#source, - }); - } - - if (word === '..' && this.#lastScannedToken.kind === TokenKind.NUMERIC) { - return new Token({ - kind: TokenKind.TAP_PLAN, - value: word, - stream: this.#source, - }); - } - - if (word === 'not' && isLastScannedTokenEOLorNewLine) { - return new Token({ - kind: TokenKind.TAP_TEST_NOTOK, - value: word, - stream: this.#source, - }); - } - - if ( - word === 'ok' && - (this.#lastScannedToken.kind === TokenKind.TAP_TEST_NOTOK || - isLastScannedTokenEOLorNewLine) - ) { - return new Token({ - kind: TokenKind.TAP_TEST_OK, - value: word, - stream: this.#source, - }); - } - - if (word === 'pragma' && isLastScannedTokenEOLorNewLine) { - return new Token({ - kind: TokenKind.TAP_PRAGMA, - value: word, - stream: this.#source, - }); - } - - return null; - } - - #scanNumeric(char) { - let number = char; - while (!this.#source.eof()) { - const nextChar = this.#source.peek(); - if (this.#isNumericSymbol(nextChar)) { - number += nextChar; - this.#source.next(); - } else { - break; - } - } - return new Token({ - kind: TokenKind.NUMERIC, - value: number, - stream: this.#source, - }); - } - - #hasTheCurrentCharacterBeenEscaped() { - // Use the escapeStack to keep track of the escape characters - return this.#escapeStack.length > 0; - } - - #isNumericSymbol(char) { - return char >= '0' && char <= '9'; - } - - #isLiteralSymbol(char) { - if (typeof char !== 'string') return false; - const charCode = StringPrototypeCodePointAt(char); - - if (isZeroWidthCodePoint(charCode)) return false; - if (this.#isWhitespaceSymbol(char)) return false; - const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long - // Allow all non-latin characters. - if (charCode > MAX_ASCII_CHAR_CODE) return true; - const ZERO = 48; // 0 - const NINE = 58; // 9 - // Disallow numeric values - if (charCode >= ZERO && char <= NINE) return false; - - // Disallow characters with special meaning in TAP - const HASH = 35; // # - const BACKSLASH = 92; // \ - const PLUS = 43; // + - const DASH = 45; // - - - // Disallow characters with special meaning in TAP - return charCode !== HASH && charCode !== BACKSLASH && - charCode !== PLUS && charCode !== DASH; - } - - #isWhitespaceSymbol(char) { - return char === ' ' || char === '\t'; - } - - #isEOFSymbol(char) { - return char === undefined; - } - - #isNewLineSymbol(char) { - return char === '\n' || char === '\r'; - } - - #isHashSymbol(char) { - return char === '#'; - } - - #isDashSymbol(char) { - return char === '-'; - } - - #isPlusSymbol(char) { - return char === '+'; - } - - #isEscapeSymbol(char) { - return char === '\\' || char === '\x1b'; - } - - #isYamlStartSymbol(char) { - return char === '---'; - } - - #isYamlEndSymbol(char) { - return char === '...'; - } -} - -module.exports = { TapLexer, TokenKind }; |