summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2011-05-29 22:45:29 -0700
committerDave Beckett <dave@dajobe.org>2011-05-30 00:03:47 -0700
commite86d5734d71859df8805af84de60d6a92d4c218f (patch)
tree285066eab6d493c92de9aecaf3d3009bb14f89d8
parenta536e79bb2a7b5335e1c62be23fe7756287d5796 (diff)
downloadraptor-streaming-parser.tar.gz
More cleanups to use push parser by defaultstreaming-parser
Do not use yy_init_globals in an error Move TURTLE_PUSH_PARSE to raptor_internal.h (raptor_turtle_parse_chunk): token never < 0 rapper: Add maintainer only -k chunked parsing mode for local files. (rapper_iostream_hex_buffer_write): Added to print a buffer as hex. (rapper_parser_parse_chunked_buffer): Added to parse with given chunk size. (main): If debug, accept -k SIZE arg to use chunked parsing rather than raptor_parser_parse_file() on a local file.
-rw-r--r--src/turtle_parser.y100
-rw-r--r--utils/rapper.c117
2 files changed, 119 insertions, 98 deletions
diff --git a/src/turtle_parser.y b/src/turtle_parser.y
index 5cd999ae..efd618a3 100644
--- a/src/turtle_parser.y
+++ b/src/turtle_parser.y
@@ -79,8 +79,6 @@ const char * turtle_token_print(raptor_world* world, int token, YYSTYPE *lval);
/* the lexer does not seem to track this */
#undef RAPTOR_TURTLE_USE_ERROR_COLUMNS
-#define TURTLE_PUSH_PARSE 1
-
/* Prototypes */
int turtle_parser_error(void* rdf_parser, const char *msg);
@@ -1134,12 +1132,6 @@ collection: LEFT_ROUND itemList RIGHT_ROUND
/* Support functions */
-/* This is declared in turtle_lexer.h but never used, so we always get
- * a warning unless this dummy code is here. Used once below as a return.
- */
-static int yy_init_globals (yyscan_t yyscanner ) { return 0; };
-
-
int
turtle_parser_error(void* ctx, const char *msg)
{
@@ -1159,7 +1151,7 @@ turtle_parser_error(void* ctx, const char *msg)
raptor_log_error(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR,
&rdf_parser->locator, msg);
- return yy_init_globals(NULL); /* 0 but a way to use yy_init_globals */
+ return 0;
}
@@ -1202,33 +1194,6 @@ turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_
-#ifndef TURTLE_PUSH_PARSE
-static int
-turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length)
-{
- raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
- void *buffer;
-
- if(!string || !*string)
- return 0;
-
- if(turtle_lexer_lex_init(&turtle_parser->scanner))
- return 1;
- turtle_parser->scanner_set = 1;
-
- turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner);
- buffer = turtle_lexer__scan_bytes(string, length, turtle_parser->scanner);
-
- turtle_parser_parse(rdf_parser);
-
- turtle_lexer_lex_destroy(turtle_parser->scanner);
- turtle_parser->scanner_set = 0;
-
- return 0;
-}
-#endif
-
-
/**
* raptor_turtle_parse_init - Initialise the Raptor Turtle parser
*
@@ -1263,9 +1228,7 @@ raptor_turtle_parse_terminate(raptor_parser *rdf_parser) {
raptor_namespaces_clear(&turtle_parser->namespaces);
if(turtle_parser->scanner_set) {
-#ifdef TURTLE_PUSH_PARSE
yypstate_delete(turtle_parser->ps); turtle_parser->ps = NULL;
-#endif
turtle_lexer_lex_destroy(turtle_parser->scanner);
turtle_parser->scanner_set = 0;
}
@@ -1363,16 +1326,11 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser,
{
raptor_turtle_parser *turtle_parser;
int rc = 0;
-#ifdef TURTLE_PUSH_PARSE
void *buffer;
int status;
-#else
- char *ptr;
-#endif
turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
-#ifdef TURTLE_PUSH_PARSE
buffer = turtle_lexer__scan_bytes((const char*)s, len, turtle_parser->scanner);
do {
@@ -1382,15 +1340,8 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser,
memset(&lval, 0, sizeof(YYSTYPE));
token = turtle_lexer_lex(&lval, turtle_parser->scanner);
- if(token < 0) {
- /* need more input */
- fprintf(stderr, "Turtle lexer needs more input\n");
- rc = 0;
- break;
- }
-
#if RAPTOR_DEBUG > 1
- printf("token %s\n", turtle_token_print(world, token, &lval));
+ printf("token %s\n", turtle_token_print(rdf_parser->world, token, &lval));
#endif
status = yypush_parse(turtle_parser->ps, token, &lval, rdf_parser);
@@ -1408,47 +1359,6 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser,
/* when len == 0 FALL THROUGH below to handle end of TRIG */
-#else
-#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
- RAPTOR_DEBUG2("adding %d bytes to line buffer\n", (int)len);
-#endif
-
- if(len) {
- turtle_parser->buffer = (char*)RAPTOR_REALLOC(cstring, turtle_parser->buffer, turtle_parser->buffer_length + len + 1);
- if(!turtle_parser->buffer) {
- raptor_parser_fatal_error(rdf_parser, "Out of memory");
- return 1;
- }
-
- /* move pointer to end of cdata buffer */
- ptr = turtle_parser->buffer+turtle_parser->buffer_length;
-
- /* adjust stored length */
- turtle_parser->buffer_length += len;
-
- /* now write new stuff at end of cdata buffer */
- memcpy(ptr, s, len);
- ptr += len;
- *ptr = '\0';
-
-#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
- RAPTOR_DEBUG3("buffer buffer now '%s' (%d bytes)\n",
- turtle_parser->buffer, turtle_parser->buffer_length);
-#endif
- }
-
- /* if not end, wait for rest of input */
- if(!is_end)
- return rc;
-
- /* Nothing to do */
- if(!turtle_parser->buffer_length)
- return rc;
-
- turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->buffer_length);
-#endif
-
-
if(rdf_parser->emitted_default_graph) {
/* for non-TRIG - end default graph after last triple */
raptor_parser_end_graph(rdf_parser, NULL, 0);
@@ -1480,7 +1390,6 @@ raptor_turtle_parse_start(raptor_parser *rdf_parser)
turtle_parser->lineno = 1;
-#ifdef TURTLE_PUSH_PARSE
if(turtle_parser->ps) {
yypstate_delete(turtle_parser->ps); turtle_parser->ps = NULL;
turtle_lexer_lex_destroy(turtle_parser->scanner);
@@ -1497,7 +1406,6 @@ raptor_turtle_parse_start(raptor_parser *rdf_parser)
turtle_parser->scanner_set = 1;
turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner);
-#endif
return 0;
}
@@ -1793,11 +1701,7 @@ main(int argc, char *argv[])
turtle_parser.error_count = 0;
-#ifdef TURTLE_PUSH_PARSE
turtle_push_parse(&rdf_parser, string, strlen(string));
-#else
- turtle_parse(&rdf_parser, string, strlen(string));
-#endif
raptor_turtle_parse_terminate(&rdf_parser);
diff --git a/utils/rapper.c b/utils/rapper.c
index 08ac1678..97726eef 100644
--- a/utils/rapper.c
+++ b/utils/rapper.c
@@ -183,8 +183,14 @@ relay_namespaces(void* user_data, raptor_namespace *nspace)
#define HELP_PAD "\n "
#endif
+#define DEFAULT_CHUNK_SIZE 1
+#ifdef RAPTOR_DEBUG
+#define GETOPT_STRING "cef:ghi:I:k:o:O:qrtvw"
+#else
#define GETOPT_STRING "cef:ghi:I:o:O:qrtvw"
+#endif
+
#ifdef HAVE_GETOPT_LONG
#define SHOW_NAMESPACES_FLAG 0x100
@@ -200,6 +206,9 @@ static const struct option long_options[] =
{"help", 0, 0, 'h'},
{"input", 1, 0, 'i'},
{"input-uri", 1, 0, 'I'},
+#ifdef RAPTOR_DEBUG
+ {"chunk-size", 1, 0, 'k'},
+#endif
{"output", 1, 0, 'o'},
{"output-uri", 1, 0, 'O'},
{"quiet", 0, 0, 'q'},
@@ -303,6 +312,94 @@ typedef struct
+#ifdef RAPTOR_DEBUG
+static void
+rapper_iostream_hex_buffer_write(const unsigned char* buffer, size_t len,
+ raptor_iostream* iostr)
+{
+ int i;
+
+ for(i = 0; i < (int)len; i++) {
+ int c = buffer[i];
+ raptor_iostream_hexadecimal_write(c, 2, iostr);;
+ raptor_iostream_write_byte(' ', iostr);
+ }
+ raptor_iostream_write_byte('\'', iostr);
+ raptor_string_ntriples_write(buffer, len, '\'', iostr);
+ raptor_iostream_write_byte('\'', iostr);
+}
+
+
+static int
+rapper_parser_parse_chunked_buffer(raptor_parser* rdf_parser,
+ raptor_uri *uri,
+ raptor_uri *base_uri,
+ size_t chunk_size)
+{
+ raptor_world* world = raptor_parser_get_world(rdf_parser);
+ const char* filename = NULL;
+ raptor_iostream* iostr = NULL;
+ raptor_iostream* out_iostr = NULL;
+ unsigned char* buffer = NULL;
+ int free_base_uri = 0;
+ int rc = 0;
+
+ filename = raptor_uri_uri_string_to_filename(raptor_uri_as_string(uri));
+
+ if(!base_uri) {
+ base_uri = raptor_uri_copy(uri);
+ free_base_uri = 1;
+ }
+
+ iostr = raptor_new_iostream_from_filename(world, filename);
+ if(!iostr) {
+ rc = 1;
+ goto done;
+ }
+
+ buffer = (unsigned char*)raptor_alloc_memory(chunk_size + 1);
+ if(!buffer) {
+ rc = 1;
+ goto done;
+ }
+
+ rc = raptor_parser_parse_start(rdf_parser, base_uri);
+ if(rc)
+ goto done;
+
+ out_iostr = raptor_new_iostream_to_file_handle(world, stderr);
+
+ while(!raptor_iostream_read_eof(iostr)) {
+ int len = raptor_iostream_read_bytes(buffer, 1, chunk_size, iostr);
+ int is_end = (len < (int)chunk_size);
+
+ raptor_iostream_string_write(program, out_iostr);
+ raptor_iostream_string_write(": Read ", out_iostr);
+ raptor_iostream_decimal_write(len, out_iostr);
+ raptor_iostream_string_write(" bytes: ", out_iostr);
+ rapper_iostream_hex_buffer_write(buffer, len, out_iostr);
+ raptor_iostream_write_byte('\n', out_iostr);
+
+ rc = raptor_parser_parse_chunk(rdf_parser, buffer, len, is_end);
+ if(rc || is_end)
+ break;
+ }
+
+ done:
+ if(iostr)
+ raptor_free_iostream(iostr);
+ if(buffer)
+ raptor_free_memory(buffer);
+ if(free_base_uri)
+ raptor_free_uri(base_uri);
+ if(out_iostr)
+ raptor_free_iostream(out_iostr);
+
+ return rc;
+}
+#endif
+
+
int
main(int argc, char *argv[])
{
@@ -335,6 +432,9 @@ main(int argc, char *argv[])
int rc;
int usage = 0;
int help = 0;
+#ifdef RAPTOR_DEBUG
+ int chunk_size = DEFAULT_CHUNK_SIZE;
+#endif
char *p;
program = argv[0];
@@ -524,6 +624,15 @@ main(int argc, char *argv[])
help = 1;
break;
+#ifdef RAPTOR_DEBUG
+ case 'k':
+ if(optarg)
+ chunk_size = atoi(optarg);
+ else
+ chunk_size = DEFAULT_CHUNK_SIZE;
+ break;
+#endif
+
case 't':
trace = 1;
break;
@@ -707,6 +816,9 @@ main(int argc, char *argv[])
puts(HELP_TEXT("g", "guess ", "Guess the input syntax (same as -i guess)"));
puts(HELP_TEXT("h", "help ", "Print this help, then exit"));
puts(HELP_TEXT("m MODE", "mode MODE ", "Set parser mode - 'lax' (default) or 'strict'"));
+#ifdef RAPTOR_DEBUG
+ puts(HELP_TEXT("k CHUNK-SIZE", "chunk-size CHUNK-SIZE ", "Set parse chunk size"));
+#endif
puts(HELP_TEXT("q", "quiet ", "No extra information messages"));
puts(HELP_TEXT("r", "replace-newlines", "Replace newlines with spaces in literals"));
#ifdef SHOW_GRAPHS_FLAG
@@ -913,11 +1025,16 @@ main(int argc, char *argv[])
*/
rc = 0;
if(!uri || filename) {
+#ifdef DEFAULT_CHUNK_SIZE
+ rc = rapper_parser_parse_chunked_buffer(rdf_parser, uri, base_uri,
+ chunk_size);
+#else
if(raptor_parser_parse_file(rdf_parser, uri, base_uri)) {
fprintf(stderr, "%s: Failed to parse file %s %s content\n",
program, FILENAME_LABEL(filename), syntax_name);
rc = 1;
}
+#endif
} else {
if(raptor_parser_parse_uri(rdf_parser, uri, base_uri)) {
fprintf(stderr, "%s: Failed to parse URI %s %s content\n",