summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2014-11-01 20:34:48 -0700
committerDave Beckett <dave@dajobe.org>2014-11-01 20:37:28 -0700
commit8aa29de2ea2507c1995bb9777c87935f4618c460 (patch)
treed2693fda3cdcd683cbea0bb451337b35e8c98534
parent49a93332f218e912f64b4fd36f75f0904a2ec4c0 (diff)
downloadraptor-8aa29de2ea2507c1995bb9777c87935f4618c460.tar.gz
(raptor_stringbuffer_append_turtle_string): Enforce URI restrictions.
Add is_uri argument to distinguish. Report type label correctly. URIs may not have \t \b \n \r \f or raw ' ' or \u0020 or \u003C or \u003E Update all raptor_stringbuffer_append_turtle_string callers to pass URI flag.
-rw-r--r--src/raptor_internal.h2
-rw-r--r--src/turtle_common.c98
-rw-r--r--src/turtle_lexer.l12
3 files changed, 68 insertions, 44 deletions
diff --git a/src/raptor_internal.h b/src/raptor_internal.h
index 95433d27..8ec62e9f 100644
--- a/src/raptor_internal.h
+++ b/src/raptor_internal.h
@@ -1232,7 +1232,7 @@ typedef void (*raptor_simple_message_handler)(void *user_data, const char *messa
/* turtle_common.c */
-RAPTOR_INTERNAL_API int raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer, const unsigned char *text, size_t len, int delim, raptor_simple_message_handler error_handler, void *error_data);
+RAPTOR_INTERNAL_API int raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer, const unsigned char *text, size_t len, int delim, raptor_simple_message_handler error_handler, void *error_data, int is_uri);
/* raptor_abbrev.c */
diff --git a/src/turtle_common.c b/src/turtle_common.c
index 97d05cc6..a8625764 100644
--- a/src/turtle_common.c
+++ b/src/turtle_common.c
@@ -64,6 +64,8 @@
*
* Turtle 2013 allows \ with -_~.!$&\'()*+,;=/?#@%
*
+ * URIs may not have \t \b \n \r \f or raw ' ' or \u0020 or \u003C or \u003E
+ *
* Return value: non-0 on failure
**/
int
@@ -71,33 +73,49 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
const unsigned char *text,
size_t len, int delim,
raptor_simple_message_handler error_handler,
- void *error_data)
+ void *error_data,
+ int is_uri)
{
size_t i;
const unsigned char *s;
unsigned char *d;
unsigned char *string = RAPTOR_MALLOC(unsigned char*, len + 1);
-
+ const char* label = (is_uri ? "URI" : "string");
+
if(!string)
return -1;
for(s = text, d = string, i = 0; i < len; s++, i++) {
unsigned char c=*s;
+ if(c == ' ' && is_uri) {
+ error_handler(error_data,
+ "Turtle %s error - character '%c'", label, c);
+ RAPTOR_FREE(char*, string);
+ return 1;
+ }
+
if(c == '\\' ) {
s++; i++;
c = *s;
- if(c == 'n')
- *d++ = '\n';
- else if(c == 'r')
- *d++ = '\r';
- else if(c == 't')
- *d++ = '\t';
- else if(c == 'b')
- *d++ = '\b';
- else if(c == 'f')
- *d++ = '\f';
- else if(c == '\\' || c == delim ||
+ if(c == 'n' || c == 'r' || c == 't' || c == 'b' || c == 'f') {
+ if(is_uri) {
+ error_handler(error_data,
+ "Turtle %s error - illegal URI escape '\\%c'", label, c);
+ RAPTOR_FREE(char*, string);
+ return 1;
+ }
+ if(c == 'n')
+ *d++ = '\n';
+ else if(c == 'r')
+ *d++ = '\r';
+ else if(c == 't')
+ *d++ = '\t';
+ else if(c == 'b')
+ *d++ = '\b';
+ else /* 'f' */
+ *d++ = '\f';
+ } else if(c == '\\' || c == delim ||
c == '-' || c == '_' || c == '~' || c == '.' || c == '!' ||
c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' ||
c == '*' || c == '+' || c == ',' || c == ';' ||c == '=' ||
@@ -113,7 +131,7 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
s++; i++;
if(i+ulen > len) {
error_handler(error_data,
- "Turtle string error - \\%c over end of line", c);
+ "Turtle %s error - \\%c over end of line", label, c);
RAPTOR_FREE(char*, string);
return 1;
}
@@ -122,8 +140,8 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
char cc = s[ii];
if(!isxdigit(RAPTOR_GOOD_CAST(char, cc))) {
error_handler(error_data,
- "Turtle string error - illegal hex digit %c in Unicode escape '%c%s...'",
- cc, c, s);
+ "Turtle %s error - illegal hex digit %c in Unicode escape '%c%s...'",
+ label, cc, c, s);
RAPTOR_FREE(char*, string);
return 1;
}
@@ -132,8 +150,8 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
n = sscanf((const char*)s, ((ulen == 4) ? "%04lx" : "%08lx"), &unichar);
if(n != 1) {
error_handler(error_data,
- "Turtle string error - illegal Unicode escape '%c%s...'",
- c, s);
+ "Turtle %s error - illegal Unicode escape '%c%s...'",
+ label, c, s);
RAPTOR_FREE(char*, string);
return 1;
}
@@ -141,10 +159,16 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
s+= ulen-1;
i+= ulen-1;
+ if(is_uri && (unichar == 0x0020 || unichar == 0x003C || unichar == 0x003E)) {
+ error_handler(error_data,
+ "Turtle %s error - illegal Unicode escape \\u%04lX in URI.", label, unichar);
+ break;
+ }
+
if(unichar > raptor_unicode_max_codepoint) {
error_handler(error_data,
- "Turtle string error - illegal Unicode character with code point #x%lX (max #x%lX).",
- unichar, raptor_unicode_max_codepoint);
+ "Turtle %s error - illegal Unicode character with code point #x%lX (max #x%lX).",
+ label, unichar, raptor_unicode_max_codepoint);
RAPTOR_FREE(char*, string);
return 1;
}
@@ -153,8 +177,8 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
len-(d-string));
if(unichar_width < 0) {
error_handler(error_data,
- "Turtle string error - illegal Unicode character with code point #x%lX.",
- unichar);
+ "Turtle %s error - illegal Unicode character with code point #x%lX.",
+ label, unichar);
RAPTOR_FREE(char*, string);
return 1;
}
@@ -163,8 +187,8 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
} else {
/* don't handle \x where x isn't one of: \t \n \r \\ (delim) */
error_handler(error_data,
- "Turtle string error - illegal escape \\%c (#x%02X) in \"%s\"",
- c, c, text);
+ "Turtle %s error - illegal escape \\%c (#x%02X) in \"%s\"",
+ label, c, c, text);
}
} else
*d++=c;
@@ -182,13 +206,13 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
/**
- * raptor_turtle_expand_name_escapes:
- * @name: turtle name to decode
+ * raptor_turtle_expand_qname_escapes:
+ * @name: turtle qname string to decode
* @len: length of name
* @error_handler: error handling function
* @error_data: error handler data
*
- * Expands Turtle escapes for the given name
+ * Expands Turtle escapes for the given turtle qname string
*
* The passed in string is handled according to the Turtle string
* escape rules giving a UTF-8 encoded output of the Unicode codepoints.
@@ -201,10 +225,10 @@ raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer,
* Return value: new length or 0 on failure
**/
size_t
-raptor_turtle_expand_name_escapes(unsigned char *name,
- size_t len,
- raptor_simple_message_handler error_handler,
- void *error_data)
+raptor_turtle_expand_qname_escapes(unsigned char *name,
+ size_t len,
+ raptor_simple_message_handler error_handler,
+ void *error_data)
{
size_t i;
const unsigned char *s;
@@ -245,7 +269,7 @@ raptor_turtle_expand_name_escapes(unsigned char *name,
s++; i++;
if(i+ulen > len) {
error_handler(error_data,
- "Turtle string error - \\%c over end of line", c);
+ "Turtle name error - \\%c over end of line", c);
return 1;
}
@@ -253,7 +277,7 @@ raptor_turtle_expand_name_escapes(unsigned char *name,
char cc = s[ii];
if(!isxdigit(RAPTOR_GOOD_CAST(char, cc))) {
error_handler(error_data,
- "Turtle string error - illegal hex digit %c in Unicode escape '%c%s...'",
+ "Turtle name error - illegal hex digit %c in Unicode escape '%c%s...'",
cc, c, s);
return 1;
}
@@ -262,7 +286,7 @@ raptor_turtle_expand_name_escapes(unsigned char *name,
n = sscanf((const char*)s, ((ulen == 4) ? "%04lx" : "%08lx"), &unichar);
if(n != 1) {
error_handler(error_data,
- "Turtle string error - illegal Uncode escape '%c%s...'",
+ "Turtle name error - illegal Uncode escape '%c%s...'",
c, s);
return 1;
}
@@ -272,7 +296,7 @@ raptor_turtle_expand_name_escapes(unsigned char *name,
if(unichar > raptor_unicode_max_codepoint) {
error_handler(error_data,
- "Turtle string error - illegal Unicode character with code point #x%lX (max #x%lX).",
+ "Turtle name error - illegal Unicode character with code point #x%lX (max #x%lX).",
unichar, raptor_unicode_max_codepoint);
return 1;
}
@@ -281,7 +305,7 @@ raptor_turtle_expand_name_escapes(unsigned char *name,
len - (d-name));
if(unichar_width < 0) {
error_handler(error_data,
- "Turtle string error - illegal Unicode character with code point #x%lX.",
+ "Turtle name error - illegal Unicode character with code point #x%lX.",
unichar);
return 1;
}
@@ -290,7 +314,7 @@ raptor_turtle_expand_name_escapes(unsigned char *name,
} else {
/* don't handle \x where x isn't one of: \t \n \r \\ (delim) */
error_handler(error_data,
- "Turtle string error - illegal escape \\%c (#x%02X) in \"%s\"",
+ "Turtle name error - illegal escape \\%c (#x%02X) in \"%s\"",
c, c, name);
}
} else
diff --git a/src/turtle_lexer.l b/src/turtle_lexer.l
index 08bdb62b..fb5bb665 100644
--- a/src/turtle_lexer.l
+++ b/src/turtle_lexer.l
@@ -290,7 +290,7 @@ EXPONENT [eE][+-]?[0-9]+
turtle_parser->lineno++;
}
- if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser)) { /* " */
+ if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */
BEGIN(INITIAL);
raptor_free_stringbuffer(turtle_parser->sb);
turtle_parser->sb = NULL;
@@ -353,7 +353,7 @@ EXPONENT [eE][+-]?[0-9]+
turtle_parser->lineno++;
}
- if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser)) { /* " */
+ if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */
BEGIN(INITIAL);
raptor_free_stringbuffer(turtle_parser->sb);
turtle_parser->sb = NULL;
@@ -444,7 +444,7 @@ EXPONENT [eE][+-]?[0-9]+
/* start at yytext + 1 to skip '<' and operate over
* length-2 bytes to skip '<' and '>'
*/
- if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-2, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser)) {
+ if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-2, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) {
raptor_free_stringbuffer(sb);
YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed");
}
@@ -490,7 +490,7 @@ EXPONENT [eE][+-]?[0-9]+
sb = raptor_new_stringbuffer();
if(!sb)
TURTLE_LEXER_OOM();
- if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-1, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser)) {
+ if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-1, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) {
raptor_free_stringbuffer(sb);
YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed");
}
@@ -560,8 +560,8 @@ turtle_copy_string_token(raptor_parser* rdf_parser,
return NULL;
rc = raptor_stringbuffer_append_turtle_string(sb, string, len, delim,
- (raptor_simple_message_handler)turtle_lexer_syntax_error,
- rdf_parser);
+ (raptor_simple_message_handler)turtle_lexer_syntax_error,
+ rdf_parser, 0);
if(rc) {
raptor_free_stringbuffer(sb);
return NULL;