diff options
Diffstat (limited to 'src/scanner.c')
-rw-r--r-- | src/scanner.c | 437 |
1 files changed, 423 insertions, 14 deletions
diff --git a/src/scanner.c b/src/scanner.c index 530bd91..2a555d0 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -484,6 +484,107 @@ #include <assert.h> /* + * Ensure that the buffer contains the required number of characters. + * Return 1 on success, 0 on failure (reader error or memory error). + */ + +#define UPDATE(parser,length) \ + (parser->unread >= (length) \ + ? 1 \ + : yaml_parser_update_buffer(parser, (length))) + +/* + * Check the octet at the specified position. + */ + +#define CHECK_AT(parser,octet,offset) \ + (parser->buffer[offset] == (yaml_char_t)(octet)) + +/* + * Check the current octet in the buffer. + */ + +#define CHECK(parser,octet) CHECK_AT(parser,(octet),0) + +/* + * Check if the character at the specified position is NUL. + */ + +#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset)) + +#define IS_Z(parser) IS_Z_AT(parser,0) + +/* + * Check if the character at the specified position is space. + */ + +#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset)) + +#define IS_SPACE(parser) IS_SPACE_AT(parser,0) + +/* + * Check if the character at the specified position is tab. + */ + +#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset)) + +#define IS_TAB(parser) IS_TAB_AT(parser,0) + +/* + * Check if the character at the specified position is blank (space or tab). + */ + +#define IS_BLANK_AT(parser,offset) \ + (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset))) + +#define IS_BLANK(parser) IS_BLANK_AT(parser,0) + +/* + * Check if the character at the specified position is a line break. + */ + +#define IS_BREAK_AT(parser,offset) \ + (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ + || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ + || (CHECK_AT(parser,'\xC2',(offset)) \ + && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset+1)) \ + && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset+1)) \ + && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */ + +#define IS_BREAK(parser) IS_BREAK_AT(parser,0) + +/* + * Check if the character is a line break or NUL. + */ + +#define IS_BREAKZ_AT(parser,offset) \ + (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset))) + +#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0) + +/* + * Check if the character is a line break, space, or NUL. + */ + +#define IS_SPACEZ_AT(parser,offset) \ + (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) + +#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0) + +/* + * Check if the character is a line break, space, tab, or NUL. + */ + +#define IS_BLANKZ_AT(parser,offset) \ + (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) + +#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) + +/* * Public API declarations. */ @@ -494,6 +595,17 @@ YAML_DECLARE(yaml_token_t *) yaml_parser_peek_token(yaml_parser_t *parser); /* + * Error handling. + */ + +static int +yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, + yaml_mark_t context_mark, const char *problem); + +static yaml_mark_t +yaml_parser_get_mark(yaml_parser_t *parser); + +/* * High-level token API. */ @@ -521,10 +633,10 @@ yaml_parser_remove_simple_key(yaml_parser_t *parser); */ static int -yaml_parser_add_indent(yaml_parser_t *parser); +yaml_parser_roll_indent(yaml_parser_t *parser, int column); static int -yaml_parser_remove_indent(yaml_parser_t *parser); +yaml_parser_unroll_indent(yaml_parser_t *parser, int column); /* * Token fetchers. @@ -591,21 +703,9 @@ static int yaml_parser_fetch_tag(yaml_parser_t *parser); static int -yaml_parser_fetch_literal_scalar(yaml_parser_t *parser); - -static int -yaml_parser_fetch_folded_scalar(yaml_parser_t *parser); - -static int yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal); static int -yaml_parser_fetch_single_quoted_scalar(yaml_parser_t *parser); - -static int -yaml_parser_fetch_double_quoted_scalar(yaml_parser_t *parser); - -static int yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single); static int @@ -665,3 +765,312 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); static yaml_token_t * yaml_parser_scan_plain_scalar(yaml_parser_t *parser); +/* + * Get the next token and remove it from the tokens queue. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_get_token(yaml_parser_t *parser) +{ + yaml_token_t *token; + + assert(parser); /* Non-NULL parser object is expected. */ + assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ + + /* Ensure that the tokens queue contains enough tokens. */ + + if (!yaml_parser_fetch_more_tokens(parser)) return NULL; + + /* Fetch the next token from the queue. */ + + token = parser->tokens[parser->tokens_head]; + + /* Move the queue head. */ + + parser->tokens[parser->tokens_head++] = NULL; + if (parser->tokens_head == parser->tokens_size) + parser->tokens_head = 0; + + parser->tokens_parsed++; + + return token; +} + +/* + * Get the next token, but don't remove it from the queue. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_peek_token(yaml_parser_t *parser) +{ + assert(parser); /* Non-NULL parser object is expected. */ + assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ + + /* Ensure that the tokens queue contains enough tokens. */ + + if (!yaml_parser_fetch_more_tokens(parser)) return NULL; + + /* Fetch the next token from the queue. */ + + return parser->tokens[parser->tokens_head]; +} + +/* + * Set the scanner error and return 0. + */ + +static int +yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, + yaml_mark_t context_mark, const char *problem) +{ + parser->error = YAML_SCANNER_ERROR; + parser->context = context; + parser->context_mark = context_mark; + parser->problem = problem; + parser->problem_mark = yaml_parser_get_mark(parser); +} + +/* + * Get the mark for the current buffer position. + */ + +static yaml_mark_t +yaml_parser_get_mark(yaml_parser_t *parser) +{ + yaml_mark_t mark = { parser->index, parser->line, parser->column }; + + return mark; +} + + +/* + * Ensure that the tokens queue contains at least one token which can be + * returned to the Parser. + */ + +static int +yaml_parser_fetch_more_tokens(yaml_parser_t *parser) +{ + int need_more_tokens; + int k; + + /* While we need more tokens to fetch, do it. */ + + while (1) + { + /* + * Check if we really need to fetch more tokens. + */ + + need_more_tokens = 0; + + if (parser->tokens_head == parser->tokens_tail) + { + /* Queue is empty. */ + + need_more_tokens = 1; + } + else + { + /* Check if any potential simple key may occupy the head position. */ + + for (k = 0; k <= parser->flow_level; k++) { + yaml_simple_key_t *simple_key = parser->simple_keys[k]; + if (simple_key + && (simple_key->token_number == parser->tokens_parsed)) { + need_more_tokens = 1; + break; + } + } + } + + /* We are finished. */ + + if (!need_more_tokens) + break; + + /* Fetch the next token. */ + + if (!yaml_parser_fetch_next_token(parser)) + return 0; + } + + return 1; +} + +/* + * The dispatcher for token fetchers. + */ + +static int +yaml_parser_fetch_next_token(yaml_parser_t *parser) +{ + /* Ensure that the buffer is initialized. */ + + if (!UPDATE(parser, 1)) + return 0; + + /* Check if we just started scanning. Fetch STREAM-START then. */ + + if (!parser->stream_start_produced) + return yaml_parser_fetch_stream_start(parser); + + /* Eat whitespaces and comments until we reach the next token. */ + + if (!yaml_parser_scan_to_next_token(parser)) + return 0; + + /* Check the indentation level against the current column. */ + + if (!yaml_parser_unroll_indent(parser, parser->column)) + return 0; + + /* + * Ensure that the buffer contains at least 4 characters. 4 is the length + * of the longest indicators ('--- ' and '... '). + */ + + if (!UPDATE(parser, 4)) + return 0; + + /* Is it the end of the stream? */ + + if (IS_Z(parser)) + return yaml_parser_fetch_stream_end(parser); + + /* Is it a directive? */ + + if (parser->column == 0 && CHECK(parser, '%')) + return yaml_parser_fetch_directive(parser); + + /* Is it the document start indicator? */ + + if (parser->column == 0 + && CHECK_AT(parser, '-', 0) + && CHECK_AT(parser, '-', 1) + && CHECK_AT(parser, '-', 2) + && IS_BLANKZ_AT(parser, 3)) + return yaml_parser_fetch_document_start(parser); + + /* Is it the document end indicator? */ + + if (parser->column == 0 + && CHECK_AT(parser, '.', 0) + && CHECK_AT(parser, '.', 1) + && CHECK_AT(parser, '.', 2) + && IS_BLANKZ_AT(parser, 3)) + return yaml_parser_fetch_document_start(parser); + + /* Is it the flow sequence start indicator? */ + + if (CHECK(parser, '[')) + return yaml_parser_fetch_flow_sequence_start(parser); + + /* Is it the flow mapping start indicator? */ + + if (CHECK(parser, '{')) + return yaml_parser_fetch_flow_mapping_start(parser); + + /* Is it the flow sequence end indicator? */ + + if (CHECK(parser, ']')) + return yaml_parser_fetch_flow_sequence_end(parser); + + /* Is it the flow mapping end indicator? */ + + if (CHECK(parser, '}')) + return yaml_parser_fetch_flow_mapping_end(parser); + + /* Is it the flow entry indicator? */ + + if (CHECK(parser, ',')) + return yaml_parser_fetch_flow_entry(parser); + + /* Is it the block entry indicator? */ + + if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1)) + return yaml_parser_fetch_block_entry(parser); + + /* Is it the key indicator? */ + + if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_key(parser); + + /* Is it the value indicator? */ + + if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_value(parser); + + /* Is it an alias? */ + + if (CHECK(parser, '*')) + return yaml_parser_fetch_alias(parser); + + /* Is it an anchor? */ + + if (CHECK(parser, '&')) + return yaml_parser_fetch_anchor(parser); + + /* Is it a tag? */ + + if (CHECK(parser, '!')) + return yaml_parser_fetch_tag(parser); + + /* Is it a literal scalar? */ + + if (CHECK(parser, '|') && !parser->flow_level) + return yaml_parser_fetch_block_scalar(parser, 1); + + /* Is it a folded scalar? */ + + if (CHECK(parser, '>') && !parser->flow_level) + return yaml_parser_fetch_block_scalar(parser, 0); + + /* Is it a single-quoted scalar? */ + + if (CHECK(parser, '\'')) + return yaml_parser_fetch_flow_scalar(parser, 1); + + /* Is it a double-quoted scalar? */ + + if (CHECK(parser, '"')) + return yaml_parser_fetch_flow_scalar(parser, 0); + + /* + * Is it a plain scalar? + * + * A plain scalar may start with any non-blank characters except + * + * '-', '?', ':', ',', '[', ']', '{', '}', + * '#', '&', '*', '!', '|', '>', '\'', '\"', + * '%', '@', '`'. + * + * In the block context, it may also start with the characters + * + * '-', '?', ':' + * + * if it is followed by a non-space character. + * + * The last rule is more restrictive than the specification requires. + */ + + if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?') + || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[') + || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}') + || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*') + || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>') + || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%') + || CHECK(parser, '@') || CHECK(parser, '`')) || + (!parser->flow_level && + (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) && + IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_plain_scalar(parser); + + /* + * If we don't determine the token type so far, it is an error. + */ + + return yaml_parser_set_scanner_error(parser, "while scanning for the next token", + yaml_parser_get_mark(parser), "found character that cannot start any token"); +} + |