From fbfe60b4d3cd7a86fe7aa6eeeb73951a1e9a0a58 Mon Sep 17 00:00:00 2001 From: xi Date: Wed, 7 Jun 2006 20:30:22 +0000 Subject: Implementing Scanner: macros and high-level functions. git-svn-id: http://svn.pyyaml.org/libyaml/trunk@185 18f92427-320e-0410-9341-c67f048884a3 --- include/yaml/yaml.h | 20 ++- src/api.c | 66 ++++++-- src/scanner.c | 437 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 494 insertions(+), 29 deletions(-) diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 7ee6b28..6acbb54 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -537,9 +537,6 @@ typedef struct { */ typedef struct { - /** Is a simple key possible? */ - int possible; - /** Is a simple key required? */ int required; @@ -585,6 +582,15 @@ typedef struct { /** The problematic value (@c -1 is none). */ int problem_value; + /** The problem position. */ + yaml_mark_t problem_mark; + + /** The error context. */ + const char *context; + + /** The context position. */ + yaml_mark_t context_mark; + /** * @} */ @@ -661,7 +667,7 @@ typedef struct { int flow_level; /** The tokens queue, which contains the current produced tokens. */ - yaml_token_t *tokens; + yaml_token_t **tokens; /** The size of the tokens queue. */ size_t tokens_size; @@ -691,7 +697,7 @@ typedef struct { int simple_key_allowed; /** The stack of potential simple keys. */ - yaml_simple_key_t *simple_keys; + yaml_simple_key_t **simple_keys; /** The size of the simple keys stack. */ size_t simple_keys_size; @@ -853,6 +859,10 @@ yaml_realloc(void *ptr, size_t size); YAML_DECLARE(void) yaml_free(void *ptr); +/** The initial size for various buffers. */ + +#define YAML_DEFAULT_SIZE 16 + /** The size of the raw buffer. */ #define YAML_RAW_BUFFER_SIZE 16384 diff --git a/src/api.c b/src/api.c index 50f118a..c63da45 100644 --- a/src/api.c +++ b/src/api.c @@ -49,33 +49,76 @@ yaml_parser_new(void) /* Allocate the parser structure. */ parser = yaml_malloc(sizeof(yaml_parser_t)); - if (!parser) return NULL; + if (!parser) goto error; memset(parser, 0, sizeof(yaml_parser_t)); /* Allocate the raw buffer. */ parser->raw_buffer = yaml_malloc(YAML_RAW_BUFFER_SIZE); - if (!parser->raw_buffer) { - yaml_free(parser); - return NULL; - } + if (!parser->raw_buffer) goto error; + memset(parser->raw_buffer, 0, YAML_RAW_BUFFER_SIZE); + parser->raw_pointer = parser->raw_buffer; parser->raw_unread = 0; /* Allocate the character buffer. */ parser->buffer = yaml_malloc(YAML_BUFFER_SIZE); - if (!parser->buffer) { - yaml_free(parser->raw_buffer); - yaml_free(parser); - return NULL; - } + if (!parser->buffer) goto error; + memset(parser->buffer, 0, YAML_BUFFER_SIZE); + parser->buffer_end = parser->buffer; parser->pointer = parser->buffer; parser->unread = 0; + /* Allocate the tokens queue. */ + + parser->tokens = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_token_t *)); + if (!parser->tokens) goto error; + memset(parser->tokens, 0, YAML_DEFAULT_SIZE*sizeof(yaml_token_t *)); + + parser->tokens_size = YAML_DEFAULT_SIZE; + parser->tokens_head = 0; + parser->tokens_tail = 0; + parser->tokens_parsed = 0; + + /* Allocate the indents stack. */ + + parser->indents = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(int)); + if (!parser->indents) goto error; + memset(parser->indents, 0, YAML_DEFAULT_SIZE*sizeof(int)); + + parser->indents_size = YAML_DEFAULT_SIZE; + parser->indents_length = 0; + + /* Allocate the stack of potential simple keys. */ + + parser->simple_keys = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *)); + if (!parser->simple_keys) goto error; + memset(parser->simple_keys, 0, YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *)); + + parser->simple_keys_size = YAML_DEFAULT_SIZE; + + /* Done. */ + return parser; + + /* On error, free allocated buffers. */ + +error: + + if (!parser) return NULL; + + yaml_free(parser->simple_keys); + yaml_free(parser->indents); + yaml_free(parser->tokens); + yaml_free(parser->buffer); + yaml_free(parser->raw_buffer); + + yaml_free(parser); + + return NULL; } /* @@ -87,6 +130,9 @@ yaml_parser_delete(yaml_parser_t *parser) { assert(parser); /* Non-NULL parser object expected. */ + yaml_free(parser->simple_keys); + yaml_free(parser->indents); + yaml_free(parser->tokens); yaml_free(parser->buffer); yaml_free(parser->raw_buffer); diff --git a/src/scanner.c b/src/scanner.c index 530bd91..2a555d0 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -483,6 +483,107 @@ #include +/* + * Ensure that the buffer contains the required number of characters. + * Return 1 on success, 0 on failure (reader error or memory error). + */ + +#define UPDATE(parser,length) \ + (parser->unread >= (length) \ + ? 1 \ + : yaml_parser_update_buffer(parser, (length))) + +/* + * Check the octet at the specified position. + */ + +#define CHECK_AT(parser,octet,offset) \ + (parser->buffer[offset] == (yaml_char_t)(octet)) + +/* + * Check the current octet in the buffer. + */ + +#define CHECK(parser,octet) CHECK_AT(parser,(octet),0) + +/* + * Check if the character at the specified position is NUL. + */ + +#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset)) + +#define IS_Z(parser) IS_Z_AT(parser,0) + +/* + * Check if the character at the specified position is space. + */ + +#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset)) + +#define IS_SPACE(parser) IS_SPACE_AT(parser,0) + +/* + * Check if the character at the specified position is tab. + */ + +#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset)) + +#define IS_TAB(parser) IS_TAB_AT(parser,0) + +/* + * Check if the character at the specified position is blank (space or tab). + */ + +#define IS_BLANK_AT(parser,offset) \ + (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset))) + +#define IS_BLANK(parser) IS_BLANK_AT(parser,0) + +/* + * Check if the character at the specified position is a line break. + */ + +#define IS_BREAK_AT(parser,offset) \ + (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ + || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ + || (CHECK_AT(parser,'\xC2',(offset)) \ + && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset+1)) \ + && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset+1)) \ + && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */ + +#define IS_BREAK(parser) IS_BREAK_AT(parser,0) + +/* + * Check if the character is a line break or NUL. + */ + +#define IS_BREAKZ_AT(parser,offset) \ + (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset))) + +#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0) + +/* + * Check if the character is a line break, space, or NUL. + */ + +#define IS_SPACEZ_AT(parser,offset) \ + (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) + +#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0) + +/* + * Check if the character is a line break, space, tab, or NUL. + */ + +#define IS_BLANKZ_AT(parser,offset) \ + (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) + +#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) + /* * Public API declarations. */ @@ -493,6 +594,17 @@ yaml_parser_get_token(yaml_parser_t *parser); YAML_DECLARE(yaml_token_t *) yaml_parser_peek_token(yaml_parser_t *parser); +/* + * Error handling. + */ + +static int +yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, + yaml_mark_t context_mark, const char *problem); + +static yaml_mark_t +yaml_parser_get_mark(yaml_parser_t *parser); + /* * High-level token API. */ @@ -521,10 +633,10 @@ yaml_parser_remove_simple_key(yaml_parser_t *parser); */ static int -yaml_parser_add_indent(yaml_parser_t *parser); +yaml_parser_roll_indent(yaml_parser_t *parser, int column); static int -yaml_parser_remove_indent(yaml_parser_t *parser); +yaml_parser_unroll_indent(yaml_parser_t *parser, int column); /* * Token fetchers. @@ -590,21 +702,9 @@ yaml_parser_fetch_anchor(yaml_parser_t *parser); static int yaml_parser_fetch_tag(yaml_parser_t *parser); -static int -yaml_parser_fetch_literal_scalar(yaml_parser_t *parser); - -static int -yaml_parser_fetch_folded_scalar(yaml_parser_t *parser); - static int yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal); -static int -yaml_parser_fetch_single_quoted_scalar(yaml_parser_t *parser); - -static int -yaml_parser_fetch_double_quoted_scalar(yaml_parser_t *parser); - static int yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single); @@ -665,3 +765,312 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); static yaml_token_t * yaml_parser_scan_plain_scalar(yaml_parser_t *parser); +/* + * Get the next token and remove it from the tokens queue. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_get_token(yaml_parser_t *parser) +{ + yaml_token_t *token; + + assert(parser); /* Non-NULL parser object is expected. */ + assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ + + /* Ensure that the tokens queue contains enough tokens. */ + + if (!yaml_parser_fetch_more_tokens(parser)) return NULL; + + /* Fetch the next token from the queue. */ + + token = parser->tokens[parser->tokens_head]; + + /* Move the queue head. */ + + parser->tokens[parser->tokens_head++] = NULL; + if (parser->tokens_head == parser->tokens_size) + parser->tokens_head = 0; + + parser->tokens_parsed++; + + return token; +} + +/* + * Get the next token, but don't remove it from the queue. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_peek_token(yaml_parser_t *parser) +{ + assert(parser); /* Non-NULL parser object is expected. */ + assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ + + /* Ensure that the tokens queue contains enough tokens. */ + + if (!yaml_parser_fetch_more_tokens(parser)) return NULL; + + /* Fetch the next token from the queue. */ + + return parser->tokens[parser->tokens_head]; +} + +/* + * Set the scanner error and return 0. + */ + +static int +yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, + yaml_mark_t context_mark, const char *problem) +{ + parser->error = YAML_SCANNER_ERROR; + parser->context = context; + parser->context_mark = context_mark; + parser->problem = problem; + parser->problem_mark = yaml_parser_get_mark(parser); +} + +/* + * Get the mark for the current buffer position. + */ + +static yaml_mark_t +yaml_parser_get_mark(yaml_parser_t *parser) +{ + yaml_mark_t mark = { parser->index, parser->line, parser->column }; + + return mark; +} + + +/* + * Ensure that the tokens queue contains at least one token which can be + * returned to the Parser. + */ + +static int +yaml_parser_fetch_more_tokens(yaml_parser_t *parser) +{ + int need_more_tokens; + int k; + + /* While we need more tokens to fetch, do it. */ + + while (1) + { + /* + * Check if we really need to fetch more tokens. + */ + + need_more_tokens = 0; + + if (parser->tokens_head == parser->tokens_tail) + { + /* Queue is empty. */ + + need_more_tokens = 1; + } + else + { + /* Check if any potential simple key may occupy the head position. */ + + for (k = 0; k <= parser->flow_level; k++) { + yaml_simple_key_t *simple_key = parser->simple_keys[k]; + if (simple_key + && (simple_key->token_number == parser->tokens_parsed)) { + need_more_tokens = 1; + break; + } + } + } + + /* We are finished. */ + + if (!need_more_tokens) + break; + + /* Fetch the next token. */ + + if (!yaml_parser_fetch_next_token(parser)) + return 0; + } + + return 1; +} + +/* + * The dispatcher for token fetchers. + */ + +static int +yaml_parser_fetch_next_token(yaml_parser_t *parser) +{ + /* Ensure that the buffer is initialized. */ + + if (!UPDATE(parser, 1)) + return 0; + + /* Check if we just started scanning. Fetch STREAM-START then. */ + + if (!parser->stream_start_produced) + return yaml_parser_fetch_stream_start(parser); + + /* Eat whitespaces and comments until we reach the next token. */ + + if (!yaml_parser_scan_to_next_token(parser)) + return 0; + + /* Check the indentation level against the current column. */ + + if (!yaml_parser_unroll_indent(parser, parser->column)) + return 0; + + /* + * Ensure that the buffer contains at least 4 characters. 4 is the length + * of the longest indicators ('--- ' and '... '). + */ + + if (!UPDATE(parser, 4)) + return 0; + + /* Is it the end of the stream? */ + + if (IS_Z(parser)) + return yaml_parser_fetch_stream_end(parser); + + /* Is it a directive? */ + + if (parser->column == 0 && CHECK(parser, '%')) + return yaml_parser_fetch_directive(parser); + + /* Is it the document start indicator? */ + + if (parser->column == 0 + && CHECK_AT(parser, '-', 0) + && CHECK_AT(parser, '-', 1) + && CHECK_AT(parser, '-', 2) + && IS_BLANKZ_AT(parser, 3)) + return yaml_parser_fetch_document_start(parser); + + /* Is it the document end indicator? */ + + if (parser->column == 0 + && CHECK_AT(parser, '.', 0) + && CHECK_AT(parser, '.', 1) + && CHECK_AT(parser, '.', 2) + && IS_BLANKZ_AT(parser, 3)) + return yaml_parser_fetch_document_start(parser); + + /* Is it the flow sequence start indicator? */ + + if (CHECK(parser, '[')) + return yaml_parser_fetch_flow_sequence_start(parser); + + /* Is it the flow mapping start indicator? */ + + if (CHECK(parser, '{')) + return yaml_parser_fetch_flow_mapping_start(parser); + + /* Is it the flow sequence end indicator? */ + + if (CHECK(parser, ']')) + return yaml_parser_fetch_flow_sequence_end(parser); + + /* Is it the flow mapping end indicator? */ + + if (CHECK(parser, '}')) + return yaml_parser_fetch_flow_mapping_end(parser); + + /* Is it the flow entry indicator? */ + + if (CHECK(parser, ',')) + return yaml_parser_fetch_flow_entry(parser); + + /* Is it the block entry indicator? */ + + if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1)) + return yaml_parser_fetch_block_entry(parser); + + /* Is it the key indicator? */ + + if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_key(parser); + + /* Is it the value indicator? */ + + if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_value(parser); + + /* Is it an alias? */ + + if (CHECK(parser, '*')) + return yaml_parser_fetch_alias(parser); + + /* Is it an anchor? */ + + if (CHECK(parser, '&')) + return yaml_parser_fetch_anchor(parser); + + /* Is it a tag? */ + + if (CHECK(parser, '!')) + return yaml_parser_fetch_tag(parser); + + /* Is it a literal scalar? */ + + if (CHECK(parser, '|') && !parser->flow_level) + return yaml_parser_fetch_block_scalar(parser, 1); + + /* Is it a folded scalar? */ + + if (CHECK(parser, '>') && !parser->flow_level) + return yaml_parser_fetch_block_scalar(parser, 0); + + /* Is it a single-quoted scalar? */ + + if (CHECK(parser, '\'')) + return yaml_parser_fetch_flow_scalar(parser, 1); + + /* Is it a double-quoted scalar? */ + + if (CHECK(parser, '"')) + return yaml_parser_fetch_flow_scalar(parser, 0); + + /* + * Is it a plain scalar? + * + * A plain scalar may start with any non-blank characters except + * + * '-', '?', ':', ',', '[', ']', '{', '}', + * '#', '&', '*', '!', '|', '>', '\'', '\"', + * '%', '@', '`'. + * + * In the block context, it may also start with the characters + * + * '-', '?', ':' + * + * if it is followed by a non-space character. + * + * The last rule is more restrictive than the specification requires. + */ + + if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?') + || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[') + || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}') + || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*') + || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>') + || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%') + || CHECK(parser, '@') || CHECK(parser, '`')) || + (!parser->flow_level && + (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) && + IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_plain_scalar(parser); + + /* + * If we don't determine the token type so far, it is an error. + */ + + return yaml_parser_set_scanner_error(parser, "while scanning for the next token", + yaml_parser_get_mark(parser), "found character that cannot start any token"); +} + -- cgit v1.2.1