summaryrefslogtreecommitdiff
path: root/lib/internal/test_runner/tap_lexer.js
diff options
context:
space:
mode:
Diffstat (limited to 'lib/internal/test_runner/tap_lexer.js')
-rw-r--r--lib/internal/test_runner/tap_lexer.js540
1 files changed, 0 insertions, 540 deletions
diff --git a/lib/internal/test_runner/tap_lexer.js b/lib/internal/test_runner/tap_lexer.js
deleted file mode 100644
index 8af5453b28..0000000000
--- a/lib/internal/test_runner/tap_lexer.js
+++ /dev/null
@@ -1,540 +0,0 @@
-'use strict';
-
-const {
- ArrayPrototypePop,
- ArrayPrototypePush,
- MathMax,
- SafeSet,
- StringPrototypeCodePointAt,
- StringPrototypeTrim,
-} = primordials;
-const {
- codes: { ERR_TAP_LEXER_ERROR },
-} = require('internal/errors');
-
-const { isZeroWidthCodePoint } = require('internal/util/inspect');
-
-const kEOL = '';
-const kEOF = '';
-
-const TokenKind = {
- EOF: 'EOF',
- EOL: 'EOL',
- NEWLINE: 'NewLine',
- NUMERIC: 'Numeric',
- LITERAL: 'Literal',
- KEYWORD: 'Keyword',
- WHITESPACE: 'Whitespace',
- COMMENT: 'Comment',
- DASH: 'Dash',
- PLUS: 'Plus',
- HASH: 'Hash',
- ESCAPE: 'Escape',
- UNKNOWN: 'Unknown',
-
- // TAP tokens
- TAP: 'TAPKeyword',
- TAP_VERSION: 'VersionKeyword',
- TAP_PLAN: 'PlanKeyword',
- TAP_TEST_POINT: 'TestPointKeyword',
- TAP_SUBTEST_POINT: 'SubTestPointKeyword',
- TAP_TEST_OK: 'TestOkKeyword',
- TAP_TEST_NOTOK: 'TestNotOkKeyword',
- TAP_YAML_START: 'YamlStartKeyword',
- TAP_YAML_END: 'YamlEndKeyword',
- TAP_YAML_BLOCK: 'YamlKeyword',
- TAP_PRAGMA: 'PragmaKeyword',
- TAP_BAIL_OUT: 'BailOutKeyword',
-};
-
-class Token {
- constructor({ kind, value, stream }) {
- const valueLength = ('' + value).length;
- this.kind = kind;
- this.value = value;
- this.location = {
- line: stream.line,
- column: MathMax(stream.column - valueLength + 1, 1), // 1 based
- start: MathMax(stream.pos - valueLength, 0), // zero based
- end: stream.pos - (value === '' ? 0 : 1), // zero based
- };
-
- // EOF is a special case
- if (value === TokenKind.EOF) {
- const eofPosition = stream.input.length + 1; // We consider EOF to be outside the stream
- this.location.start = eofPosition;
- this.location.end = eofPosition;
- this.location.column = stream.column + 1; // 1 based
- }
- }
-}
-
-class InputStream {
- constructor(input) {
- this.input = input;
- this.pos = 0;
- this.column = 0;
- this.line = 1;
- }
-
- eof() {
- return this.peek() === undefined;
- }
-
- peek(offset = 0) {
- return this.input[this.pos + offset];
- }
-
- next() {
- const char = this.peek();
- if (char === undefined) {
- return undefined;
- }
-
- this.pos++;
- this.column++;
- if (char === '\n') {
- this.line++;
- this.column = 0;
- }
-
- return char;
- }
-}
-
-class TapLexer {
- static Keywords = new SafeSet([
- 'TAP',
- 'version',
- 'ok',
- 'not',
- '...',
- '---',
- '..',
- 'pragma',
- '-',
- '+',
-
- // NOTE: "Skip", "Todo" and "Bail out!" literals are deferred to the parser
- ]);
-
- #isComment = false;
- #source = null;
- #line = 1;
- #column = 0;
- #escapeStack = [];
- #lastScannedToken = null;
-
- constructor(source) {
- this.#source = new InputStream(source);
- this.#lastScannedToken = new Token({
- kind: TokenKind.EOL,
- value: kEOL,
- stream: this.#source,
- });
- }
-
- scan() {
- const tokens = [];
- let chunk = [];
- while (!this.eof()) {
- const token = this.#scanToken();
-
- // Remember the last scanned token (except for whitespace)
- if (token.kind !== TokenKind.WHITESPACE) {
- this.#lastScannedToken = token;
- }
-
- ArrayPrototypePush(chunk, token);
- if (token.kind === TokenKind.NEWLINE) {
- // Store the current chunk + NEWLINE token
- ArrayPrototypePush(tokens, chunk);
- chunk = [];
- }
- }
-
- if (chunk.length > 0) {
- ArrayPrototypePush(chunk, this.#scanEOL());
- ArrayPrototypePush(tokens, chunk);
- }
-
- // send EOF as a separate chunk
- ArrayPrototypePush(tokens, [this.#scanEOF()]);
-
- return tokens;
- }
-
- next() {
- return this.#source.next();
- }
-
- eof() {
- return this.#source.eof();
- }
-
- error(message, token, expected = '') {
- this.#source.error(message, token, expected);
- }
-
- #scanToken() {
- const char = this.next();
-
- if (this.#isEOFSymbol(char)) {
- return this.#scanEOF();
- } else if (this.#isNewLineSymbol(char)) {
- return this.#scanNewLine(char);
- } else if (this.#isNumericSymbol(char)) {
- return this.#scanNumeric(char);
- } else if (this.#isDashSymbol(char)) {
- return this.#scanDash(char);
- } else if (this.#isPlusSymbol(char)) {
- return this.#scanPlus(char);
- } else if (this.#isHashSymbol(char)) {
- return this.#scanHash(char);
- } else if (this.#isEscapeSymbol(char)) {
- return this.#scanEscapeSymbol(char);
- } else if (this.#isWhitespaceSymbol(char)) {
- return this.#scanWhitespace(char);
- } else if (this.#isLiteralSymbol(char)) {
- return this.#scanLiteral(char);
- }
-
- throw new ERR_TAP_LEXER_ERROR(
- `Unexpected character: ${char} at line ${this.#line}, column ${
- this.#column
- }`,
- );
- }
-
- #scanNewLine(char) {
- // In case of odd number of ESCAPE symbols, we need to clear the remaining
- // escape chars from the stack and start fresh for the next line.
- this.#escapeStack = [];
-
- // We also need to reset the comment flag
- this.#isComment = false;
-
- return new Token({
- kind: TokenKind.NEWLINE,
- value: char,
- stream: this.#source,
- });
- }
-
- #scanEOL() {
- return new Token({
- kind: TokenKind.EOL,
- value: kEOL,
- stream: this.#source,
- });
- }
-
- #scanEOF() {
- this.#isComment = false;
-
- return new Token({
- kind: TokenKind.EOF,
- value: kEOF,
- stream: this.#source,
- });
- }
-
- #scanEscapeSymbol(char) {
- // If the escape symbol has been escaped (by previous symbol),
- // or if the next symbol is a whitespace symbol,
- // then consume it as a literal.
- if (
- this.#hasTheCurrentCharacterBeenEscaped() ||
- this.#source.peek(1) === TokenKind.WHITESPACE
- ) {
- ArrayPrototypePop(this.#escapeStack);
- return new Token({
- kind: TokenKind.LITERAL,
- value: char,
- stream: this.#source,
- });
- }
-
- // Otherwise, consume the escape symbol as an escape symbol that should be ignored by the parser
- // we also need to push the escape symbol to the escape stack
- // and consume the next character as a literal (done in the next turn)
- ArrayPrototypePush(this.#escapeStack, char);
- return new Token({
- kind: TokenKind.ESCAPE,
- value: char,
- stream: this.#source,
- });
- }
-
- #scanWhitespace(char) {
- return new Token({
- kind: TokenKind.WHITESPACE,
- value: char,
- stream: this.#source,
- });
- }
-
- #scanDash(char) {
- // Peek next 3 characters and check if it's a YAML start marker
- const marker = char + this.#source.peek() + this.#source.peek(1);
-
- if (this.#isYamlStartSymbol(marker)) {
- this.next(); // consume second -
- this.next(); // consume third -
-
- return new Token({
- kind: TokenKind.TAP_YAML_START,
- value: marker,
- stream: this.#source,
- });
- }
-
- return new Token({
- kind: TokenKind.DASH,
- value: char,
- stream: this.#source,
- });
- }
-
- #scanPlus(char) {
- return new Token({
- kind: TokenKind.PLUS,
- value: char,
- stream: this.#source,
- });
- }
-
- #scanHash(char) {
- const lastCharacter = this.#source.peek(-2);
- const nextToken = this.#source.peek();
-
- // If we encounter a hash symbol at the beginning of a line,
- // we consider it as a comment
- if (!lastCharacter || this.#isNewLineSymbol(lastCharacter)) {
- this.#isComment = true;
- return new Token({
- kind: TokenKind.COMMENT,
- value: char,
- stream: this.#source,
- });
- }
-
- // The only valid case where a hash symbol is considered as a hash token
- // is when it's preceded by a whitespace symbol and followed by a non-hash symbol
- if (
- this.#isWhitespaceSymbol(lastCharacter) &&
- !this.#isHashSymbol(nextToken)
- ) {
- return new Token({
- kind: TokenKind.HASH,
- value: char,
- stream: this.#source,
- });
- }
-
- const charHasBeenEscaped = this.#hasTheCurrentCharacterBeenEscaped();
- if (this.#isComment || charHasBeenEscaped) {
- if (charHasBeenEscaped) {
- ArrayPrototypePop(this.#escapeStack);
- }
-
- return new Token({
- kind: TokenKind.LITERAL,
- value: char,
- stream: this.#source,
- });
- }
-
- // As a fallback, we consume the hash symbol as a literal
- return new Token({
- kind: TokenKind.LITERAL,
- value: char,
- stream: this.#source,
- });
- }
-
- #scanLiteral(char) {
- let word = char;
- while (!this.#source.eof()) {
- const nextChar = this.#source.peek();
- if (this.#isLiteralSymbol(nextChar)) {
- word += this.#source.next();
- } else {
- break;
- }
- }
-
- word = StringPrototypeTrim(word);
-
- if (TapLexer.Keywords.has(word)) {
- const token = this.#scanTAPKeyword(word);
- if (token) {
- return token;
- }
- }
-
- if (this.#isYamlEndSymbol(word)) {
- return new Token({
- kind: TokenKind.TAP_YAML_END,
- value: word,
- stream: this.#source,
- });
- }
-
- return new Token({
- kind: TokenKind.LITERAL,
- value: word,
- stream: this.#source,
- });
- }
-
- #scanTAPKeyword(word) {
- const isLastScannedTokenEOLorNewLine =
- TokenKind.EOL === this.#lastScannedToken.kind ||
- TokenKind.NEWLINE === this.#lastScannedToken.kind;
-
- if (word === 'TAP' && isLastScannedTokenEOLorNewLine) {
- return new Token({
- kind: TokenKind.TAP,
- value: word,
- stream: this.#source,
- });
- }
-
- if (word === 'version' && this.#lastScannedToken.kind === TokenKind.TAP) {
- return new Token({
- kind: TokenKind.TAP_VERSION,
- value: word,
- stream: this.#source,
- });
- }
-
- if (word === '..' && this.#lastScannedToken.kind === TokenKind.NUMERIC) {
- return new Token({
- kind: TokenKind.TAP_PLAN,
- value: word,
- stream: this.#source,
- });
- }
-
- if (word === 'not' && isLastScannedTokenEOLorNewLine) {
- return new Token({
- kind: TokenKind.TAP_TEST_NOTOK,
- value: word,
- stream: this.#source,
- });
- }
-
- if (
- word === 'ok' &&
- (this.#lastScannedToken.kind === TokenKind.TAP_TEST_NOTOK ||
- isLastScannedTokenEOLorNewLine)
- ) {
- return new Token({
- kind: TokenKind.TAP_TEST_OK,
- value: word,
- stream: this.#source,
- });
- }
-
- if (word === 'pragma' && isLastScannedTokenEOLorNewLine) {
- return new Token({
- kind: TokenKind.TAP_PRAGMA,
- value: word,
- stream: this.#source,
- });
- }
-
- return null;
- }
-
- #scanNumeric(char) {
- let number = char;
- while (!this.#source.eof()) {
- const nextChar = this.#source.peek();
- if (this.#isNumericSymbol(nextChar)) {
- number += nextChar;
- this.#source.next();
- } else {
- break;
- }
- }
- return new Token({
- kind: TokenKind.NUMERIC,
- value: number,
- stream: this.#source,
- });
- }
-
- #hasTheCurrentCharacterBeenEscaped() {
- // Use the escapeStack to keep track of the escape characters
- return this.#escapeStack.length > 0;
- }
-
- #isNumericSymbol(char) {
- return char >= '0' && char <= '9';
- }
-
- #isLiteralSymbol(char) {
- if (typeof char !== 'string') return false;
- const charCode = StringPrototypeCodePointAt(char);
-
- if (isZeroWidthCodePoint(charCode)) return false;
- if (this.#isWhitespaceSymbol(char)) return false;
- const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long
- // Allow all non-latin characters.
- if (charCode > MAX_ASCII_CHAR_CODE) return true;
- const ZERO = 48; // 0
- const NINE = 58; // 9
- // Disallow numeric values
- if (charCode >= ZERO && char <= NINE) return false;
-
- // Disallow characters with special meaning in TAP
- const HASH = 35; // #
- const BACKSLASH = 92; // \
- const PLUS = 43; // +
- const DASH = 45; // -
-
- // Disallow characters with special meaning in TAP
- return charCode !== HASH && charCode !== BACKSLASH &&
- charCode !== PLUS && charCode !== DASH;
- }
-
- #isWhitespaceSymbol(char) {
- return char === ' ' || char === '\t';
- }
-
- #isEOFSymbol(char) {
- return char === undefined;
- }
-
- #isNewLineSymbol(char) {
- return char === '\n' || char === '\r';
- }
-
- #isHashSymbol(char) {
- return char === '#';
- }
-
- #isDashSymbol(char) {
- return char === '-';
- }
-
- #isPlusSymbol(char) {
- return char === '+';
- }
-
- #isEscapeSymbol(char) {
- return char === '\\' || char === '\x1b';
- }
-
- #isYamlStartSymbol(char) {
- return char === '---';
- }
-
- #isYamlEndSymbol(char) {
- return char === '...';
- }
-}
-
-module.exports = { TapLexer, TokenKind };