diff options
author | Dave Beckett <dave@dajobe.org> | 2011-05-29 22:45:29 -0700 |
---|---|---|
committer | Dave Beckett <dave@dajobe.org> | 2011-05-30 00:03:47 -0700 |
commit | e86d5734d71859df8805af84de60d6a92d4c218f (patch) | |
tree | 285066eab6d493c92de9aecaf3d3009bb14f89d8 | |
parent | a536e79bb2a7b5335e1c62be23fe7756287d5796 (diff) | |
download | raptor-streaming-parser.tar.gz |
More cleanups to use push parser by defaultstreaming-parser
Do not use yy_init_globals in an error
Move TURTLE_PUSH_PARSE to raptor_internal.h
(raptor_turtle_parse_chunk): token never < 0
rapper: Add maintainer only -k chunked parsing mode for local files.
(rapper_iostream_hex_buffer_write): Added to print a buffer as hex.
(rapper_parser_parse_chunked_buffer): Added to parse with given chunk
size.
(main): If debug, accept -k SIZE arg to use chunked parsing rather
than raptor_parser_parse_file() on a local file.
-rw-r--r-- | src/turtle_parser.y | 100 | ||||
-rw-r--r-- | utils/rapper.c | 117 |
2 files changed, 119 insertions, 98 deletions
diff --git a/src/turtle_parser.y b/src/turtle_parser.y index 5cd999ae..efd618a3 100644 --- a/src/turtle_parser.y +++ b/src/turtle_parser.y @@ -79,8 +79,6 @@ const char * turtle_token_print(raptor_world* world, int token, YYSTYPE *lval); /* the lexer does not seem to track this */ #undef RAPTOR_TURTLE_USE_ERROR_COLUMNS -#define TURTLE_PUSH_PARSE 1 - /* Prototypes */ int turtle_parser_error(void* rdf_parser, const char *msg); @@ -1134,12 +1132,6 @@ collection: LEFT_ROUND itemList RIGHT_ROUND /* Support functions */ -/* This is declared in turtle_lexer.h but never used, so we always get - * a warning unless this dummy code is here. Used once below as a return. - */ -static int yy_init_globals (yyscan_t yyscanner ) { return 0; }; - - int turtle_parser_error(void* ctx, const char *msg) { @@ -1159,7 +1151,7 @@ turtle_parser_error(void* ctx, const char *msg) raptor_log_error(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR, &rdf_parser->locator, msg); - return yy_init_globals(NULL); /* 0 but a way to use yy_init_globals */ + return 0; } @@ -1202,33 +1194,6 @@ turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_ -#ifndef TURTLE_PUSH_PARSE -static int -turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) -{ - raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; - void *buffer; - - if(!string || !*string) - return 0; - - if(turtle_lexer_lex_init(&turtle_parser->scanner)) - return 1; - turtle_parser->scanner_set = 1; - - turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); - buffer = turtle_lexer__scan_bytes(string, length, turtle_parser->scanner); - - turtle_parser_parse(rdf_parser); - - turtle_lexer_lex_destroy(turtle_parser->scanner); - turtle_parser->scanner_set = 0; - - return 0; -} -#endif - - /** * raptor_turtle_parse_init - Initialise the Raptor Turtle parser * @@ -1263,9 +1228,7 @@ raptor_turtle_parse_terminate(raptor_parser *rdf_parser) { raptor_namespaces_clear(&turtle_parser->namespaces); if(turtle_parser->scanner_set) { -#ifdef TURTLE_PUSH_PARSE yypstate_delete(turtle_parser->ps); turtle_parser->ps = NULL; -#endif turtle_lexer_lex_destroy(turtle_parser->scanner); turtle_parser->scanner_set = 0; } @@ -1363,16 +1326,11 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser, { raptor_turtle_parser *turtle_parser; int rc = 0; -#ifdef TURTLE_PUSH_PARSE void *buffer; int status; -#else - char *ptr; -#endif turtle_parser = (raptor_turtle_parser*)rdf_parser->context; -#ifdef TURTLE_PUSH_PARSE buffer = turtle_lexer__scan_bytes((const char*)s, len, turtle_parser->scanner); do { @@ -1382,15 +1340,8 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser, memset(&lval, 0, sizeof(YYSTYPE)); token = turtle_lexer_lex(&lval, turtle_parser->scanner); - if(token < 0) { - /* need more input */ - fprintf(stderr, "Turtle lexer needs more input\n"); - rc = 0; - break; - } - #if RAPTOR_DEBUG > 1 - printf("token %s\n", turtle_token_print(world, token, &lval)); + printf("token %s\n", turtle_token_print(rdf_parser->world, token, &lval)); #endif status = yypush_parse(turtle_parser->ps, token, &lval, rdf_parser); @@ -1408,47 +1359,6 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser, /* when len == 0 FALL THROUGH below to handle end of TRIG */ -#else -#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 - RAPTOR_DEBUG2("adding %d bytes to line buffer\n", (int)len); -#endif - - if(len) { - turtle_parser->buffer = (char*)RAPTOR_REALLOC(cstring, turtle_parser->buffer, turtle_parser->buffer_length + len + 1); - if(!turtle_parser->buffer) { - raptor_parser_fatal_error(rdf_parser, "Out of memory"); - return 1; - } - - /* move pointer to end of cdata buffer */ - ptr = turtle_parser->buffer+turtle_parser->buffer_length; - - /* adjust stored length */ - turtle_parser->buffer_length += len; - - /* now write new stuff at end of cdata buffer */ - memcpy(ptr, s, len); - ptr += len; - *ptr = '\0'; - -#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 - RAPTOR_DEBUG3("buffer buffer now '%s' (%d bytes)\n", - turtle_parser->buffer, turtle_parser->buffer_length); -#endif - } - - /* if not end, wait for rest of input */ - if(!is_end) - return rc; - - /* Nothing to do */ - if(!turtle_parser->buffer_length) - return rc; - - turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->buffer_length); -#endif - - if(rdf_parser->emitted_default_graph) { /* for non-TRIG - end default graph after last triple */ raptor_parser_end_graph(rdf_parser, NULL, 0); @@ -1480,7 +1390,6 @@ raptor_turtle_parse_start(raptor_parser *rdf_parser) turtle_parser->lineno = 1; -#ifdef TURTLE_PUSH_PARSE if(turtle_parser->ps) { yypstate_delete(turtle_parser->ps); turtle_parser->ps = NULL; turtle_lexer_lex_destroy(turtle_parser->scanner); @@ -1497,7 +1406,6 @@ raptor_turtle_parse_start(raptor_parser *rdf_parser) turtle_parser->scanner_set = 1; turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); -#endif return 0; } @@ -1793,11 +1701,7 @@ main(int argc, char *argv[]) turtle_parser.error_count = 0; -#ifdef TURTLE_PUSH_PARSE turtle_push_parse(&rdf_parser, string, strlen(string)); -#else - turtle_parse(&rdf_parser, string, strlen(string)); -#endif raptor_turtle_parse_terminate(&rdf_parser); diff --git a/utils/rapper.c b/utils/rapper.c index 08ac1678..97726eef 100644 --- a/utils/rapper.c +++ b/utils/rapper.c @@ -183,8 +183,14 @@ relay_namespaces(void* user_data, raptor_namespace *nspace) #define HELP_PAD "\n " #endif +#define DEFAULT_CHUNK_SIZE 1 +#ifdef RAPTOR_DEBUG +#define GETOPT_STRING "cef:ghi:I:k:o:O:qrtvw" +#else #define GETOPT_STRING "cef:ghi:I:o:O:qrtvw" +#endif + #ifdef HAVE_GETOPT_LONG #define SHOW_NAMESPACES_FLAG 0x100 @@ -200,6 +206,9 @@ static const struct option long_options[] = {"help", 0, 0, 'h'}, {"input", 1, 0, 'i'}, {"input-uri", 1, 0, 'I'}, +#ifdef RAPTOR_DEBUG + {"chunk-size", 1, 0, 'k'}, +#endif {"output", 1, 0, 'o'}, {"output-uri", 1, 0, 'O'}, {"quiet", 0, 0, 'q'}, @@ -303,6 +312,94 @@ typedef struct +#ifdef RAPTOR_DEBUG +static void +rapper_iostream_hex_buffer_write(const unsigned char* buffer, size_t len, + raptor_iostream* iostr) +{ + int i; + + for(i = 0; i < (int)len; i++) { + int c = buffer[i]; + raptor_iostream_hexadecimal_write(c, 2, iostr);; + raptor_iostream_write_byte(' ', iostr); + } + raptor_iostream_write_byte('\'', iostr); + raptor_string_ntriples_write(buffer, len, '\'', iostr); + raptor_iostream_write_byte('\'', iostr); +} + + +static int +rapper_parser_parse_chunked_buffer(raptor_parser* rdf_parser, + raptor_uri *uri, + raptor_uri *base_uri, + size_t chunk_size) +{ + raptor_world* world = raptor_parser_get_world(rdf_parser); + const char* filename = NULL; + raptor_iostream* iostr = NULL; + raptor_iostream* out_iostr = NULL; + unsigned char* buffer = NULL; + int free_base_uri = 0; + int rc = 0; + + filename = raptor_uri_uri_string_to_filename(raptor_uri_as_string(uri)); + + if(!base_uri) { + base_uri = raptor_uri_copy(uri); + free_base_uri = 1; + } + + iostr = raptor_new_iostream_from_filename(world, filename); + if(!iostr) { + rc = 1; + goto done; + } + + buffer = (unsigned char*)raptor_alloc_memory(chunk_size + 1); + if(!buffer) { + rc = 1; + goto done; + } + + rc = raptor_parser_parse_start(rdf_parser, base_uri); + if(rc) + goto done; + + out_iostr = raptor_new_iostream_to_file_handle(world, stderr); + + while(!raptor_iostream_read_eof(iostr)) { + int len = raptor_iostream_read_bytes(buffer, 1, chunk_size, iostr); + int is_end = (len < (int)chunk_size); + + raptor_iostream_string_write(program, out_iostr); + raptor_iostream_string_write(": Read ", out_iostr); + raptor_iostream_decimal_write(len, out_iostr); + raptor_iostream_string_write(" bytes: ", out_iostr); + rapper_iostream_hex_buffer_write(buffer, len, out_iostr); + raptor_iostream_write_byte('\n', out_iostr); + + rc = raptor_parser_parse_chunk(rdf_parser, buffer, len, is_end); + if(rc || is_end) + break; + } + + done: + if(iostr) + raptor_free_iostream(iostr); + if(buffer) + raptor_free_memory(buffer); + if(free_base_uri) + raptor_free_uri(base_uri); + if(out_iostr) + raptor_free_iostream(out_iostr); + + return rc; +} +#endif + + int main(int argc, char *argv[]) { @@ -335,6 +432,9 @@ main(int argc, char *argv[]) int rc; int usage = 0; int help = 0; +#ifdef RAPTOR_DEBUG + int chunk_size = DEFAULT_CHUNK_SIZE; +#endif char *p; program = argv[0]; @@ -524,6 +624,15 @@ main(int argc, char *argv[]) help = 1; break; +#ifdef RAPTOR_DEBUG + case 'k': + if(optarg) + chunk_size = atoi(optarg); + else + chunk_size = DEFAULT_CHUNK_SIZE; + break; +#endif + case 't': trace = 1; break; @@ -707,6 +816,9 @@ main(int argc, char *argv[]) puts(HELP_TEXT("g", "guess ", "Guess the input syntax (same as -i guess)")); puts(HELP_TEXT("h", "help ", "Print this help, then exit")); puts(HELP_TEXT("m MODE", "mode MODE ", "Set parser mode - 'lax' (default) or 'strict'")); +#ifdef RAPTOR_DEBUG + puts(HELP_TEXT("k CHUNK-SIZE", "chunk-size CHUNK-SIZE ", "Set parse chunk size")); +#endif puts(HELP_TEXT("q", "quiet ", "No extra information messages")); puts(HELP_TEXT("r", "replace-newlines", "Replace newlines with spaces in literals")); #ifdef SHOW_GRAPHS_FLAG @@ -913,11 +1025,16 @@ main(int argc, char *argv[]) */ rc = 0; if(!uri || filename) { +#ifdef DEFAULT_CHUNK_SIZE + rc = rapper_parser_parse_chunked_buffer(rdf_parser, uri, base_uri, + chunk_size); +#else if(raptor_parser_parse_file(rdf_parser, uri, base_uri)) { fprintf(stderr, "%s: Failed to parse file %s %s content\n", program, FILENAME_LABEL(filename), syntax_name); rc = 1; } +#endif } else { if(raptor_parser_parse_uri(rdf_parser, uri, base_uri)) { fprintf(stderr, "%s: Failed to parse URI %s %s content\n", |