summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2001-07-22 20:05:52 +0000
committerDave Beckett <dave@dajobe.org>2001-07-22 20:05:52 +0000
commita61fb179c2a8f185bb33cb37c6a83f6695efcda8 (patch)
tree130b3ebb33ce6bc5b98a3df4a507788050894b01
parentf39d9113e7a212baf38b5fd3b0e4bac79693c044 (diff)
downloadraptor-a61fb179c2a8f185bb33cb37c6a83f6695efcda8.tar.gz
Added CR | LF | CR LF handling.
Handle space before/after trailing . Various bits of tidying
-rw-r--r--src/ntriples_parse.c234
1 files changed, 129 insertions, 105 deletions
diff --git a/src/ntriples_parse.c b/src/ntriples_parse.c
index 6a31d22c..b6250e54 100644
--- a/src/ntriples_parse.c
+++ b/src/ntriples_parse.c
@@ -295,23 +295,36 @@ raptor_ntriples_generate_statement(raptor_ntriples_parser *parser,
static int
-ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
+raptor_ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer,
+ int len)
{
int i;
char *p;
- char *q=NULL; /* keeps gcc -Wall happy */
+ char *start=NULL; /* keeps gcc -Wall happy */
char *dest;
- char c;
+ char c = '\0';
char *terms[3];
int term_lengths[3];
raptor_ntriples_term_type term_types[3];
int backslash=0;
+
+ /* ASSERTION:
+ * p always points to first char we are considering
+ * p[len-1] always points to last char
+ */
/* Handle empty lines */
if(!len)
return 0;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ LIBRDF_DEBUG3(raptor_ntriples_parse_line,
+ "handling line '%s' (%d bytes)\n",
+ buffer, len);
+#endif
p=buffer;
+
while(len>0 && isspace(*p)) {
p++;
parser->locator.column++;
@@ -327,13 +340,28 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
if(*p == '#')
return 0;
+ /* Remove trailing spaces */
+ while(len>0 && isspace(p[len-1])) {
+ p[len-1]='\0';
+ len--;
+ }
+
+ /* can't be empty now - that would have been caught above */
+
+ /* Check for terminating '.' */
+ if(p[len-1] != '.') {
+ raptor_ntriples_parser_fatal_error(parser, "Missing . at end of line");
+ return 1;
+ }
+
+ p[len-1]='\0';
+ len--;
+
/* Must be triple */
for(i=0; i<3; i++) {
- int tlen;
-
- if(!*p) {
+ if(!len) {
raptor_ntriples_parser_fatal_error(parser, "Unexpected end of line");
return 1;
}
@@ -341,12 +369,12 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
/* Expect either <anonURI> or _:name */
if(i == 2) {
if(*p != '<' && *p != '_' && *p != '"') {
- raptor_ntriples_parser_fatal_error(parser, "Expected <URIref>, _:anonNode or \"literal\"");
+ raptor_ntriples_parser_fatal_error(parser, "Saw '%c', expected <URIref>, _:anonNode or \"literal\"", *p);
return 1;
}
} else {
if(*p != '<' && *p != '_') {
- raptor_ntriples_parser_fatal_error(parser, "Expected <URIref> or _:anonNode");
+ raptor_ntriples_parser_fatal_error(parser, "Saw '%c', expected <URIref> or _:anonNode", *p);
return 1;
}
}
@@ -356,27 +384,45 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
term_types[i]= RAPTOR_NTRIPLES_TERM_TYPE_URI_REF;
p++;
+ len--;
parser->locator.column++;
parser->locator.byte++;
- len--;
- q=p;
- while(*q && *q != '>')
- q++;
+ start=p;
+ while(len > 0 && *p != '>') {
+ p++;
+ len--;
+ parser->locator.column++;
+ parser->locator.byte++;
+ }
+
+ if(!len) {
+ raptor_ntriples_parser_fatal_error(parser, "Missing end > for URI");
+ return 1;
+ }
+
break;
case '"':
term_types[i]= RAPTOR_NTRIPLES_TERM_TYPE_LITERAL;
+ start=p;
+ dest=p;
+
p++;
+ len--;
parser->locator.column++;
parser->locator.byte++;
- len--;
/* find end of string, fixing backslashed characters on the way */
- q=p;
- dest=p;
- while((c = *q++)) {
+ while(len > 0) {
+ c = *p;
+
+ p++;
+ len--;
+ parser->locator.column++;
+ parser->locator.byte++;
+
if(c == '\\') {
if(backslash) {
*dest++='\\';
@@ -400,7 +446,7 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
*dest++='\t';
break;
default:
- raptor_ntriples_parser_fatal_error(parser, "Illegal string escape \\%c in \"%s\"", c, p);
+ raptor_ntriples_parser_fatal_error(parser, "Illegal string escape \\%c in \"%s\"", c, start);
break;
}
backslash=0;
@@ -417,27 +463,44 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
/* otherwise store and move on */
*dest++=c;
} /* end while */
+
+ if(c != '"') {
+ raptor_ntriples_parser_fatal_error(parser, "Missing end \" for literal");
+ return 1;
+ }
break;
+
case '_':
term_types[i]= RAPTOR_NTRIPLES_TERM_TYPE_ANON_NODE;
- q=p;
- q++;
- if(!*q || (*q && *q != ':')) {
- if(*q) {
- parser->locator.column++;
- parser->locator.byte++;
- }
+ /* NOTE: here: start includes _ */
+ start=p;
+
+ p++;
+ len--;
+ parser->locator.column++;
+ parser->locator.byte++;
+
+ if(!len || (len > 0 && *p != ':')) {
raptor_ntriples_parser_fatal_error(parser, "Illegal anonNode _ not followed by :");
return 1;
}
/* Found ':' - move on */
- q++;
- while(*q && isalnum(*q))
- q++;
+ p++;
+ len--;
+ parser->locator.column++;
+ parser->locator.byte++;
+
+ while(len>0 && isalnum(*p)) {
+ p++;
+ len--;
+ parser->locator.column++;
+ parser->locator.byte++;
+ }
+
break;
default:
@@ -446,61 +509,30 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
}
- /* Move locator to last char */
- tlen=q-p-1;
- parser->locator.column +=tlen;
- parser->locator.byte +=tlen;
-
- if(!*q) {
- switch(term_types[i]) {
- case RAPTOR_NTRIPLES_TERM_TYPE_URI_REF:
- raptor_ntriples_parser_fatal_error(parser, "Missing end > for URI");
- break;
- case RAPTOR_NTRIPLES_TERM_TYPE_ANON_NODE:
- raptor_ntriples_parser_fatal_error(parser, "Missing space after anonNode");
- break;
- case RAPTOR_NTRIPLES_TERM_TYPE_LITERAL:
- raptor_ntriples_parser_fatal_error(parser, "Missing end \" for literal");
- break;
- default:
- raptor_ntriples_parser_fatal_error(parser, "Illegal term type %d",
- term_types[i]);
- }
- return 1;
- }
-
-
/* Replace
* end '>' for <URIref>
* whitespace after _:anonNode
* with '\0' to terminate string
+ * and move to char after delimiter
*/
- if(term_types[i] != RAPTOR_NTRIPLES_TERM_TYPE_LITERAL)
- *q='\0';
-
- /* Store term */
- terms[i]=p; term_lengths[i]=tlen+1;
-
- /* move to last char seen
- * for literal and anonNode, is char after end
- * for <URIref> is '>'
- */
- p=q;
-
- /* move to char after delimter for non-literal */
- if(term_types[i] != RAPTOR_NTRIPLES_TERM_TYPE_LITERAL) {
+ if(len>0 && term_types[i] != RAPTOR_NTRIPLES_TERM_TYPE_LITERAL) {
+ *p='\0';
p++;
len--;
parser->locator.column++;
parser->locator.byte++;
}
+ /* Store term */
+ terms[i]=start; term_lengths[i]=(p-start);
+
+
/* Skip whitespace between parts */
while(len>0 && isspace(*p)) {
p++;
+ len--;
parser->locator.column++;
parser->locator.byte++;
- len--;
}
#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
@@ -510,21 +542,8 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
#endif
}
- /* Check for terminating '.' */
- if(!*p || (*p && *p != '.')) {
- raptor_ntriples_parser_fatal_error(parser, "Missing . at end of line");
- return 1;
- }
-
- do {
- p++;
- parser->locator.column++;
- parser->locator.byte++;
- len--;
- } while(len>0 && isspace(*p));
-
- if(*p) {
- raptor_ntriples_parser_fatal_error(parser, "Extra junk after .");
+ if(len) {
+ raptor_ntriples_parser_fatal_error(parser, "Extra junk before .: '%s' (%d bytes)", p, len);
return 1;
}
@@ -534,12 +553,6 @@ ntriples_parse_line (raptor_ntriples_parser* parser, char *buffer, int len)
terms[1], term_types[1],
terms[2], term_types[2]);
-#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
- fprintf(stderr, "%d: '", parser->locator.line);
- fwrite(p, 1, len, stderr);
- fprintf(stderr, "' (%d bytes)\n", len);
-#endif
-
parser->locator.byte += len;
return 0;
@@ -553,9 +566,10 @@ raptor_ntriples_parse(raptor_ntriples_parser* parser, char *s, int len,
char *buffer;
char *ptr;
char *start;
-
+ char last_nl;
+
#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
- LIBRDF_DEBUG2(ntriples_parse, "adding %d data bytes\n", len);
+ LIBRDF_DEBUG2(raptor_ntriples_parse, "adding %d bytes to line buffer\n", len);
#endif
buffer=(char*)LIBRDF_MALLOC(cstring, parser->line_length + len + 1);
@@ -583,42 +597,52 @@ raptor_ntriples_parse(raptor_ntriples_parser* parser, char *s, int len,
*ptr = '\0';
#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
- LIBRDF_DEBUG2(raptor_xml_cdata_handler,
- "line buffer now %d bytes\n",
- parser->line_length);
+ LIBRDF_DEBUG3(raptor_ntriples_parse,
+ "line buffer now '%s' (%d bytes)\n",
+ parser->line, parser->line_length);
#endif
+ last_nl='\n'; /* last newline character - \r triggers check */
+
ptr=buffer+parser->offset;
start=ptr;
while(*ptr) {
- if(*ptr != '\n') {
+ /* skip \n when just seen \r - i.e. \r\n or CR LF */
+ if(last_nl == '\r' && *ptr == '\n') {
ptr++;
- continue;
+ parser->locator.byte++;
}
+
+ while(*ptr && *ptr != '\n' && *ptr != '\r')
+ ptr++;
+
+ /* keep going - no newline yet */
+ if(!*ptr && !is_end)
+ break;
+
+ last_nl=*ptr;
len=ptr-start;
- if(len>0 && ptr[-1] == '\r')
- len--;
parser->locator.column=0;
*ptr='\0';
- if(ntriples_parse_line(parser,start,len))
+ if(raptor_ntriples_parse_line(parser,start,len))
return 1;
- /* go past '\r' in bytes only */
- if(ptr[-1] == '\r')
- parser->locator.byte++;
-
parser->locator.line++;
- /* go past '\n' */
+ /* go past newline */
ptr++;
parser->locator.byte++;
start=ptr;
}
- parser->offset=start-buffer;
+ /* exit now, no more input */
+ if(is_end)
+ return 0;
+
+ parser->offset=start-buffer;
len=parser->line_length - parser->offset;
@@ -626,7 +650,7 @@ raptor_ntriples_parse(raptor_ntriples_parser* parser, char *s, int len,
/* collapse buffer */
#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
- LIBRDF_DEBUG3(raptor_xml_cdata_handler,
+ LIBRDF_DEBUG3(raptor_ntriples_parse,
"collapsing line buffer from %d to %d bytes\n",
parser->line_length, len);
#endif
@@ -646,7 +670,7 @@ raptor_ntriples_parse(raptor_ntriples_parser* parser, char *s, int len,
parser->offset=0;
#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
- LIBRDF_DEBUG3(raptor_xml_cdata_handler,
+ LIBRDF_DEBUG3(raptor_ntriples_parse,
"line buffer now '%s' (%d bytes)\n",
parser->line, parser->line_length);
#endif