summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2014-11-01 20:33:11 -0700
committerDave Beckett <dave@dajobe.org>2014-11-01 20:33:11 -0700
commit49a93332f218e912f64b4fd36f75f0904a2ec4c0 (patch)
treea0ab893a2c23e1bc478b47a2f490c82bf6e7c279
parent80da44fe63872fc48fcac14cd5911e45e3a69bbf (diff)
downloadraptor-49a93332f218e912f64b4fd36f75f0904a2ec4c0.tar.gz
(raptor_ntriples_parse_term_internal): Enforce URI restrictions.
URIs may not have \t \b \n \r \f or raw ' ' or \u0020 or \u003C or \u003E
-rw-r--r--src/raptor_ntriples.c37
1 files changed, 28 insertions, 9 deletions
diff --git a/src/raptor_ntriples.c b/src/raptor_ntriples.c
index 5151ceb7..60fd3aa0 100644
--- a/src/raptor_ntriples.c
+++ b/src/raptor_ntriples.c
@@ -127,6 +127,8 @@ raptor_ntriples_term_valid(unsigned char c, int position,
*
* UTF-8 and the \u and \U esapes are both allowed.
*
+ * URIs may not have \t \b \n \r \f or raw ' ' or \u0020 or \u003C or \u003E
+ *
* Return value: Non 0 on failure
**/
static int
@@ -158,6 +160,11 @@ raptor_ntriples_parse_term_internal(raptor_world* world,
locator->byte++;
}
+ if(term_class == RAPTOR_TERM_CLASS_URI && c == ' ') {
+ raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "URI error - illegal character %d (0x%02X) found.", c, c);
+ return 1;
+ }
+
if(c > 0x7f) {
/* just copy the UTF-8 bytes through */
int unichar_len;
@@ -242,19 +249,25 @@ raptor_ntriples_parse_term_internal(raptor_world* world,
*dest++ = c;
break;
case 'b':
- *dest++ = '\b';
- break;
case 'f':
- *dest++ = '\f';
- break;
case 'n':
- *dest++ = '\n';
- break;
case 'r':
- *dest++ = '\r';
- break;
case 't':
- *dest++ = '\t';
+ if(term_class == RAPTOR_TERM_CLASS_URI) {
+ raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "URI error - illegal URI escape '\\%c'.", c);
+ return 1;
+ }
+
+ if(c == 'b')
+ *dest++ = '\b';
+ else if(c == 'f')
+ *dest++ = '\f';
+ else if(c == 'n')
+ *dest++ = '\n';
+ else if(c == 'r')
+ *dest++ = '\r';
+ else /* 't' */
+ *dest++ = '\t';
break;
case '<':
case '>':
@@ -307,6 +320,12 @@ raptor_ntriples_parse_term_internal(raptor_world* world,
locator->byte += RAPTOR_GOOD_CAST(int, ulen);
}
+ if(term_class == RAPTOR_TERM_CLASS_URI &&
+ (unichar == 0x0020 || unichar == 0x003C || unichar == 0x003E)) {
+ raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "URI error - illegal Unicode escape \\u%04lX in URI.", unichar);
+ break;
+ }
+
if(unichar > raptor_unicode_max_codepoint) {
raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal Unicode character with code point #x%lX (max #x%lX).", unichar, raptor_unicode_max_codepoint);
break;