summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-06-07 20:30:22 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-06-07 20:30:22 +0000
commitfbfe60b4d3cd7a86fe7aa6eeeb73951a1e9a0a58 (patch)
tree877ff441e7e0a908cd184321c71f9aa6817f178c
parent429cd7f8dd08c81d4fcfae59196fadd82d2fc7d6 (diff)
downloadlibyaml-fbfe60b4d3cd7a86fe7aa6eeeb73951a1e9a0a58.tar.gz
Implementing Scanner: macros and high-level functions.
git-svn-id: http://svn.pyyaml.org/libyaml/trunk@185 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r--include/yaml/yaml.h20
-rw-r--r--src/api.c66
-rw-r--r--src/scanner.c437
3 files changed, 494 insertions, 29 deletions
diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h
index 7ee6b28..6acbb54 100644
--- a/include/yaml/yaml.h
+++ b/include/yaml/yaml.h
@@ -537,9 +537,6 @@ typedef struct {
*/
typedef struct {
- /** Is a simple key possible? */
- int possible;
-
/** Is a simple key required? */
int required;
@@ -585,6 +582,15 @@ typedef struct {
/** The problematic value (@c -1 is none). */
int problem_value;
+ /** The problem position. */
+ yaml_mark_t problem_mark;
+
+ /** The error context. */
+ const char *context;
+
+ /** The context position. */
+ yaml_mark_t context_mark;
+
/**
* @}
*/
@@ -661,7 +667,7 @@ typedef struct {
int flow_level;
/** The tokens queue, which contains the current produced tokens. */
- yaml_token_t *tokens;
+ yaml_token_t **tokens;
/** The size of the tokens queue. */
size_t tokens_size;
@@ -691,7 +697,7 @@ typedef struct {
int simple_key_allowed;
/** The stack of potential simple keys. */
- yaml_simple_key_t *simple_keys;
+ yaml_simple_key_t **simple_keys;
/** The size of the simple keys stack. */
size_t simple_keys_size;
@@ -853,6 +859,10 @@ yaml_realloc(void *ptr, size_t size);
YAML_DECLARE(void)
yaml_free(void *ptr);
+/** The initial size for various buffers. */
+
+#define YAML_DEFAULT_SIZE 16
+
/** The size of the raw buffer. */
#define YAML_RAW_BUFFER_SIZE 16384
diff --git a/src/api.c b/src/api.c
index 50f118a..c63da45 100644
--- a/src/api.c
+++ b/src/api.c
@@ -49,33 +49,76 @@ yaml_parser_new(void)
/* Allocate the parser structure. */
parser = yaml_malloc(sizeof(yaml_parser_t));
- if (!parser) return NULL;
+ if (!parser) goto error;
memset(parser, 0, sizeof(yaml_parser_t));
/* Allocate the raw buffer. */
parser->raw_buffer = yaml_malloc(YAML_RAW_BUFFER_SIZE);
- if (!parser->raw_buffer) {
- yaml_free(parser);
- return NULL;
- }
+ if (!parser->raw_buffer) goto error;
+ memset(parser->raw_buffer, 0, YAML_RAW_BUFFER_SIZE);
+
parser->raw_pointer = parser->raw_buffer;
parser->raw_unread = 0;
/* Allocate the character buffer. */
parser->buffer = yaml_malloc(YAML_BUFFER_SIZE);
- if (!parser->buffer) {
- yaml_free(parser->raw_buffer);
- yaml_free(parser);
- return NULL;
- }
+ if (!parser->buffer) goto error;
+ memset(parser->buffer, 0, YAML_BUFFER_SIZE);
+
parser->buffer_end = parser->buffer;
parser->pointer = parser->buffer;
parser->unread = 0;
+ /* Allocate the tokens queue. */
+
+ parser->tokens = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_token_t *));
+ if (!parser->tokens) goto error;
+ memset(parser->tokens, 0, YAML_DEFAULT_SIZE*sizeof(yaml_token_t *));
+
+ parser->tokens_size = YAML_DEFAULT_SIZE;
+ parser->tokens_head = 0;
+ parser->tokens_tail = 0;
+ parser->tokens_parsed = 0;
+
+ /* Allocate the indents stack. */
+
+ parser->indents = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(int));
+ if (!parser->indents) goto error;
+ memset(parser->indents, 0, YAML_DEFAULT_SIZE*sizeof(int));
+
+ parser->indents_size = YAML_DEFAULT_SIZE;
+ parser->indents_length = 0;
+
+ /* Allocate the stack of potential simple keys. */
+
+ parser->simple_keys = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *));
+ if (!parser->simple_keys) goto error;
+ memset(parser->simple_keys, 0, YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *));
+
+ parser->simple_keys_size = YAML_DEFAULT_SIZE;
+
+ /* Done. */
+
return parser;
+
+ /* On error, free allocated buffers. */
+
+error:
+
+ if (!parser) return NULL;
+
+ yaml_free(parser->simple_keys);
+ yaml_free(parser->indents);
+ yaml_free(parser->tokens);
+ yaml_free(parser->buffer);
+ yaml_free(parser->raw_buffer);
+
+ yaml_free(parser);
+
+ return NULL;
}
/*
@@ -87,6 +130,9 @@ yaml_parser_delete(yaml_parser_t *parser)
{
assert(parser); /* Non-NULL parser object expected. */
+ yaml_free(parser->simple_keys);
+ yaml_free(parser->indents);
+ yaml_free(parser->tokens);
yaml_free(parser->buffer);
yaml_free(parser->raw_buffer);
diff --git a/src/scanner.c b/src/scanner.c
index 530bd91..2a555d0 100644
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -484,6 +484,107 @@
#include <assert.h>
/*
+ * Ensure that the buffer contains the required number of characters.
+ * Return 1 on success, 0 on failure (reader error or memory error).
+ */
+
+#define UPDATE(parser,length) \
+ (parser->unread >= (length) \
+ ? 1 \
+ : yaml_parser_update_buffer(parser, (length)))
+
+/*
+ * Check the octet at the specified position.
+ */
+
+#define CHECK_AT(parser,octet,offset) \
+ (parser->buffer[offset] == (yaml_char_t)(octet))
+
+/*
+ * Check the current octet in the buffer.
+ */
+
+#define CHECK(parser,octet) CHECK_AT(parser,(octet),0)
+
+/*
+ * Check if the character at the specified position is NUL.
+ */
+
+#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset))
+
+#define IS_Z(parser) IS_Z_AT(parser,0)
+
+/*
+ * Check if the character at the specified position is space.
+ */
+
+#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset))
+
+#define IS_SPACE(parser) IS_SPACE_AT(parser,0)
+
+/*
+ * Check if the character at the specified position is tab.
+ */
+
+#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset))
+
+#define IS_TAB(parser) IS_TAB_AT(parser,0)
+
+/*
+ * Check if the character at the specified position is blank (space or tab).
+ */
+
+#define IS_BLANK_AT(parser,offset) \
+ (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset)))
+
+#define IS_BLANK(parser) IS_BLANK_AT(parser,0)
+
+/*
+ * Check if the character at the specified position is a line break.
+ */
+
+#define IS_BREAK_AT(parser,offset) \
+ (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \
+ || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \
+ || (CHECK_AT(parser,'\xC2',(offset)) \
+ && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \
+ || (CHECK_AT(parser,'\xE2',(offset)) \
+ && CHECK_AT(parser,'\x80',(offset+1)) \
+ && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \
+ || (CHECK_AT(parser,'\xE2',(offset)) \
+ && CHECK_AT(parser,'\x80',(offset+1)) \
+ && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */
+
+#define IS_BREAK(parser) IS_BREAK_AT(parser,0)
+
+/*
+ * Check if the character is a line break or NUL.
+ */
+
+#define IS_BREAKZ_AT(parser,offset) \
+ (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset)))
+
+#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0)
+
+/*
+ * Check if the character is a line break, space, or NUL.
+ */
+
+#define IS_SPACEZ_AT(parser,offset) \
+ (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
+
+#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0)
+
+/*
+ * Check if the character is a line break, space, tab, or NUL.
+ */
+
+#define IS_BLANKZ_AT(parser,offset) \
+ (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset)))
+
+#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0)
+
+/*
* Public API declarations.
*/
@@ -494,6 +595,17 @@ YAML_DECLARE(yaml_token_t *)
yaml_parser_peek_token(yaml_parser_t *parser);
/*
+ * Error handling.
+ */
+
+static int
+yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
+ yaml_mark_t context_mark, const char *problem);
+
+static yaml_mark_t
+yaml_parser_get_mark(yaml_parser_t *parser);
+
+/*
* High-level token API.
*/
@@ -521,10 +633,10 @@ yaml_parser_remove_simple_key(yaml_parser_t *parser);
*/
static int
-yaml_parser_add_indent(yaml_parser_t *parser);
+yaml_parser_roll_indent(yaml_parser_t *parser, int column);
static int
-yaml_parser_remove_indent(yaml_parser_t *parser);
+yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
/*
* Token fetchers.
@@ -591,21 +703,9 @@ static int
yaml_parser_fetch_tag(yaml_parser_t *parser);
static int
-yaml_parser_fetch_literal_scalar(yaml_parser_t *parser);
-
-static int
-yaml_parser_fetch_folded_scalar(yaml_parser_t *parser);
-
-static int
yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
static int
-yaml_parser_fetch_single_quoted_scalar(yaml_parser_t *parser);
-
-static int
-yaml_parser_fetch_double_quoted_scalar(yaml_parser_t *parser);
-
-static int
yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
static int
@@ -665,3 +765,312 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single);
static yaml_token_t *
yaml_parser_scan_plain_scalar(yaml_parser_t *parser);
+/*
+ * Get the next token and remove it from the tokens queue.
+ */
+
+YAML_DECLARE(yaml_token_t *)
+yaml_parser_get_token(yaml_parser_t *parser)
+{
+ yaml_token_t *token;
+
+ assert(parser); /* Non-NULL parser object is expected. */
+ assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
+
+ /* Ensure that the tokens queue contains enough tokens. */
+
+ if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
+
+ /* Fetch the next token from the queue. */
+
+ token = parser->tokens[parser->tokens_head];
+
+ /* Move the queue head. */
+
+ parser->tokens[parser->tokens_head++] = NULL;
+ if (parser->tokens_head == parser->tokens_size)
+ parser->tokens_head = 0;
+
+ parser->tokens_parsed++;
+
+ return token;
+}
+
+/*
+ * Get the next token, but don't remove it from the queue.
+ */
+
+YAML_DECLARE(yaml_token_t *)
+yaml_parser_peek_token(yaml_parser_t *parser)
+{
+ assert(parser); /* Non-NULL parser object is expected. */
+ assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */
+
+ /* Ensure that the tokens queue contains enough tokens. */
+
+ if (!yaml_parser_fetch_more_tokens(parser)) return NULL;
+
+ /* Fetch the next token from the queue. */
+
+ return parser->tokens[parser->tokens_head];
+}
+
+/*
+ * Set the scanner error and return 0.
+ */
+
+static int
+yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
+ yaml_mark_t context_mark, const char *problem)
+{
+ parser->error = YAML_SCANNER_ERROR;
+ parser->context = context;
+ parser->context_mark = context_mark;
+ parser->problem = problem;
+ parser->problem_mark = yaml_parser_get_mark(parser);
+}
+
+/*
+ * Get the mark for the current buffer position.
+ */
+
+static yaml_mark_t
+yaml_parser_get_mark(yaml_parser_t *parser)
+{
+ yaml_mark_t mark = { parser->index, parser->line, parser->column };
+
+ return mark;
+}
+
+
+/*
+ * Ensure that the tokens queue contains at least one token which can be
+ * returned to the Parser.
+ */
+
+static int
+yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
+{
+ int need_more_tokens;
+ int k;
+
+ /* While we need more tokens to fetch, do it. */
+
+ while (1)
+ {
+ /*
+ * Check if we really need to fetch more tokens.
+ */
+
+ need_more_tokens = 0;
+
+ if (parser->tokens_head == parser->tokens_tail)
+ {
+ /* Queue is empty. */
+
+ need_more_tokens = 1;
+ }
+ else
+ {
+ /* Check if any potential simple key may occupy the head position. */
+
+ for (k = 0; k <= parser->flow_level; k++) {
+ yaml_simple_key_t *simple_key = parser->simple_keys[k];
+ if (simple_key
+ && (simple_key->token_number == parser->tokens_parsed)) {
+ need_more_tokens = 1;
+ break;
+ }
+ }
+ }
+
+ /* We are finished. */
+
+ if (!need_more_tokens)
+ break;
+
+ /* Fetch the next token. */
+
+ if (!yaml_parser_fetch_next_token(parser))
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * The dispatcher for token fetchers.
+ */
+
+static int
+yaml_parser_fetch_next_token(yaml_parser_t *parser)
+{
+ /* Ensure that the buffer is initialized. */
+
+ if (!UPDATE(parser, 1))
+ return 0;
+
+ /* Check if we just started scanning. Fetch STREAM-START then. */
+
+ if (!parser->stream_start_produced)
+ return yaml_parser_fetch_stream_start(parser);
+
+ /* Eat whitespaces and comments until we reach the next token. */
+
+ if (!yaml_parser_scan_to_next_token(parser))
+ return 0;
+
+ /* Check the indentation level against the current column. */
+
+ if (!yaml_parser_unroll_indent(parser, parser->column))
+ return 0;
+
+ /*
+ * Ensure that the buffer contains at least 4 characters. 4 is the length
+ * of the longest indicators ('--- ' and '... ').
+ */
+
+ if (!UPDATE(parser, 4))
+ return 0;
+
+ /* Is it the end of the stream? */
+
+ if (IS_Z(parser))
+ return yaml_parser_fetch_stream_end(parser);
+
+ /* Is it a directive? */
+
+ if (parser->column == 0 && CHECK(parser, '%'))
+ return yaml_parser_fetch_directive(parser);
+
+ /* Is it the document start indicator? */
+
+ if (parser->column == 0
+ && CHECK_AT(parser, '-', 0)
+ && CHECK_AT(parser, '-', 1)
+ && CHECK_AT(parser, '-', 2)
+ && IS_BLANKZ_AT(parser, 3))
+ return yaml_parser_fetch_document_start(parser);
+
+ /* Is it the document end indicator? */
+
+ if (parser->column == 0
+ && CHECK_AT(parser, '.', 0)
+ && CHECK_AT(parser, '.', 1)
+ && CHECK_AT(parser, '.', 2)
+ && IS_BLANKZ_AT(parser, 3))
+ return yaml_parser_fetch_document_start(parser);
+
+ /* Is it the flow sequence start indicator? */
+
+ if (CHECK(parser, '['))
+ return yaml_parser_fetch_flow_sequence_start(parser);
+
+ /* Is it the flow mapping start indicator? */
+
+ if (CHECK(parser, '{'))
+ return yaml_parser_fetch_flow_mapping_start(parser);
+
+ /* Is it the flow sequence end indicator? */
+
+ if (CHECK(parser, ']'))
+ return yaml_parser_fetch_flow_sequence_end(parser);
+
+ /* Is it the flow mapping end indicator? */
+
+ if (CHECK(parser, '}'))
+ return yaml_parser_fetch_flow_mapping_end(parser);
+
+ /* Is it the flow entry indicator? */
+
+ if (CHECK(parser, ','))
+ return yaml_parser_fetch_flow_entry(parser);
+
+ /* Is it the block entry indicator? */
+
+ if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1))
+ return yaml_parser_fetch_block_entry(parser);
+
+ /* Is it the key indicator? */
+
+ if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
+ return yaml_parser_fetch_key(parser);
+
+ /* Is it the value indicator? */
+
+ if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1)))
+ return yaml_parser_fetch_value(parser);
+
+ /* Is it an alias? */
+
+ if (CHECK(parser, '*'))
+ return yaml_parser_fetch_alias(parser);
+
+ /* Is it an anchor? */
+
+ if (CHECK(parser, '&'))
+ return yaml_parser_fetch_anchor(parser);
+
+ /* Is it a tag? */
+
+ if (CHECK(parser, '!'))
+ return yaml_parser_fetch_tag(parser);
+
+ /* Is it a literal scalar? */
+
+ if (CHECK(parser, '|') && !parser->flow_level)
+ return yaml_parser_fetch_block_scalar(parser, 1);
+
+ /* Is it a folded scalar? */
+
+ if (CHECK(parser, '>') && !parser->flow_level)
+ return yaml_parser_fetch_block_scalar(parser, 0);
+
+ /* Is it a single-quoted scalar? */
+
+ if (CHECK(parser, '\''))
+ return yaml_parser_fetch_flow_scalar(parser, 1);
+
+ /* Is it a double-quoted scalar? */
+
+ if (CHECK(parser, '"'))
+ return yaml_parser_fetch_flow_scalar(parser, 0);
+
+ /*
+ * Is it a plain scalar?
+ *
+ * A plain scalar may start with any non-blank characters except
+ *
+ * '-', '?', ':', ',', '[', ']', '{', '}',
+ * '#', '&', '*', '!', '|', '>', '\'', '\"',
+ * '%', '@', '`'.
+ *
+ * In the block context, it may also start with the characters
+ *
+ * '-', '?', ':'
+ *
+ * if it is followed by a non-space character.
+ *
+ * The last rule is more restrictive than the specification requires.
+ */
+
+ if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?')
+ || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[')
+ || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}')
+ || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*')
+ || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>')
+ || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%')
+ || CHECK(parser, '@') || CHECK(parser, '`')) ||
+ (!parser->flow_level &&
+ (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) &&
+ IS_BLANKZ_AT(parser, 1)))
+ return yaml_parser_fetch_plain_scalar(parser);
+
+ /*
+ * If we don't determine the token type so far, it is an error.
+ */
+
+ return yaml_parser_set_scanner_error(parser, "while scanning for the next token",
+ yaml_parser_get_mark(parser), "found character that cannot start any token");
+}
+