diff options
author | Sebastian Pipping <sebastian@pipping.org> | 2021-02-28 17:33:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-28 17:33:42 +0100 |
commit | e8a338e0c65fd875a46067d711750e4c13e044e7 (patch) | |
tree | 53ca40ab6964b243d8ad03c449e4bfbeb70bd26a | |
parent | 9886535c4db499d0793af75c8e6f1dbc652dbb11 (diff) | |
parent | bdd1ffabb3b2b575661c24e51caf576b93da3ffc (diff) | |
download | uriparser-e8a338e0c65fd875a46067d711750e4c13e044e7.tar.gz |
Merge pull request #97 from uriparser/issue-92
Fix RemoveDotSegmentsEx for URIs like "http://a/.." (fixes #92)
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | THANKS | 1 | ||||
-rw-r--r-- | src/UriCommon.c | 96 | ||||
-rw-r--r-- | src/UriCommon.h | 2 | ||||
-rw-r--r-- | test/test.cpp | 55 |
5 files changed, 130 insertions, 28 deletions
@@ -4,6 +4,10 @@ NOTE: uriparser is looking for help with a few things: 20xx-xx-x -- x.x.x + * Fixed: Fix a bug regarding section "5.2.4. Remove Dot Segments" + of RFC 3986 that affected both normalization and reference resolution + with regard to trailing slashes (GitHub #92) + Thanks to Dan Pape for the report! * Fixed: MinGW: Fix name of static library (GitHub #90) Thanks to SpaceIm for the patch and Sandro Mani for review! * Fixed: Use correct inline marker "__forceinline" for Intel C++ Compiler @@ -7,6 +7,7 @@ Arkadiusz Miskiewicz Blair Sadewitz Chris Hills Cristian Rodriguez +Dan Pape Daniel Chapiesky Daniel Solano Gómez David Demelier diff --git a/src/UriCommon.c b/src/UriCommon.c index 60bd319..7ba92f2 100644 --- a/src/UriCommon.c +++ b/src/UriCommon.c @@ -119,17 +119,6 @@ int URI_FUNC(CompareRange)( -/* Properly removes "." and ".." path segments */ -UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri, - UriBool relative, UriMemoryManager * memory) { - if (uri == NULL) { - return URI_TRUE; - } - return URI_FUNC(RemoveDotSegmentsEx)(uri, relative, uri->owner, memory); -} - - - UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, UriBool relative, UriBool pathOwned, UriMemoryManager * memory) { URI_TYPE(PathSegment) * walker; @@ -149,7 +138,13 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, URI_TYPE(PathSegment) * const prev = walker->reserved; URI_TYPE(PathSegment) * const nextBackup = walker->next; - /* Is this dot segment essential? */ + /* + * Is this dot segment essential, + * i.e. is there a chance of changing semantics by dropping this dot segment? + * + * For example, changing "./http://foo" into "http://foo" would change semantics + * and hence the dot segment is essential to that case and cannot be removed. + */ removeSegment = URI_TRUE; if (relative && (walker == uri->pathHead) && (walker->next != NULL)) { const URI_CHAR * ch = walker->next->text.first; @@ -162,16 +157,23 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, } if (removeSegment) { + /* .. then let's go remove that segment. */ /* Last segment? */ if (walker->next != NULL) { - /* Not last segment */ + /* Not last segment, i.e. first or middle segment + * OLD: (prev|NULL) <- walker <- next + * NEW: (prev|NULL) <----------- next */ walker->next->reserved = prev; if (prev == NULL) { - /* First but not last segment */ + /* First but not last segment + * OLD: head -> walker -> next + * NEW: head -----------> next */ uri->pathHead = walker->next; } else { - /* Middle segment */ + /* Middle segment + * OLD: prev -> walker -> next + * NEW: prev -----------> next */ prev->next = walker->next; } @@ -220,11 +222,16 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, removeSegment = URI_TRUE; if (relative) { if (prev == NULL) { + /* We cannot remove traversal beyond because the + * URI is relative and may be resolved later. + * So we can simplify "a/../b/d" to "b/d" but + * we cannot simplify "../b/d" (outside of reference resolution). */ removeSegment = URI_FALSE; } else if ((prev != NULL) && ((prev->text.afterLast - prev->text.first) == 2) && ((prev->text.first)[0] == _UT('.')) && ((prev->text.first)[1] == _UT('.'))) { + /* We need to protect against mis-simplifying "a/../../b" to "a/b". */ removeSegment = URI_FALSE; } } @@ -234,9 +241,14 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, /* Not first segment */ prevPrev = prev->reserved; if (prevPrev != NULL) { - /* Not even prev is the first one */ + /* Not even prev is the first one + * OLD: prevPrev -> prev -> walker -> (next|NULL) + * NEW: prevPrev -------------------> (next|NULL) */ prevPrev->next = walker->next; if (walker->next != NULL) { + /* Update parent relationship as well + * OLD: prevPrev <- prev <- walker <- next + * NEW: prevPrev <------------------- next */ walker->next->reserved = prevPrev; } else { /* Last segment -> insert "" segment to represent trailing slash, update tail */ @@ -302,29 +314,58 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, } } else { URI_TYPE(PathSegment) * const anotherNextBackup = walker->next; - /* First segment -> update head pointer */ - uri->pathHead = walker->next; + int freeWalker = URI_TRUE; + + /* First segment */ if (walker->next != NULL) { + /* First segment of multiple -> update head + * OLD: head -> walker -> next + * NEW: head -----------> next */ + uri->pathHead = walker->next; + + /* Update parent link as well + * OLD: head <- walker <- next + * NEW: head <----------- next */ walker->next->reserved = NULL; } else { - /* Last segment -> update tail */ - uri->pathTail = NULL; + if (uri->absolutePath) { + /* First and only segment -> update head + * OLD: head -> walker -> NULL + * NEW: head -----------> NULL */ + uri->pathHead = NULL; + + /* Last segment -> update tail + * OLD: tail -> walker + * NEW: tail -> NULL */ + uri->pathTail = NULL; + } else { + /* Re-use segment for "" path segment to represent trailing slash, + * then update head and tail */ + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + walker->text.first = URI_FUNC(SafeToPointTo); + walker->text.afterLast = URI_FUNC(SafeToPointTo); + freeWalker = URI_FALSE; + } } - if (pathOwned && (walker->text.first != walker->text.afterLast)) { - memory->free(memory, (URI_CHAR *)walker->text.first); + if (freeWalker) { + if (pathOwned && (walker->text.first != walker->text.afterLast)) { + memory->free(memory, (URI_CHAR *)walker->text.first); + } + memory->free(memory, walker); } - memory->free(memory, walker); walker = anotherNextBackup; } } } break; - - } + } /* end of switch */ if (!removeSegment) { + /* .. then let's move to the next element, and start again. */ if (walker->next != NULL) { walker->next->reserved = walker; } else { @@ -344,7 +385,10 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { const UriBool ABSOLUTE = URI_FALSE; - return URI_FUNC(RemoveDotSegments)(uri, ABSOLUTE, memory); + if (uri == NULL) { + return URI_TRUE; + } + return URI_FUNC(RemoveDotSegmentsEx)(uri, ABSOLUTE, uri->owner, memory); } diff --git a/src/UriCommon.h b/src/UriCommon.h index 10bc250..42311dd 100644 --- a/src/UriCommon.h +++ b/src/UriCommon.h @@ -84,8 +84,6 @@ int URI_FUNC(CompareRange)( UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri, UriMemoryManager * memory); -UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri, UriBool relative, - UriMemoryManager * memory); UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, UriBool relative, UriBool pathOwned, UriMemoryManager * memory); diff --git a/test/test.cpp b/test/test.cpp index 9a189f9..fd8d6d9 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -1098,6 +1098,19 @@ TEST(UriSuite, TestAddBase) { // Bug related to absolutePath flag set despite presence of host ASSERT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"/", L"http://a/")); ASSERT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"/g/", L"http://a/g/")); + + // GitHub issue #92 + EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/../d;p?q", L"../..", L"http://a/")); + EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/../d;p?q", L"../../", L"http://a/")); + + EXPECT_TRUE(testAddBaseHelper(L"http://a/b/../c/d;p?q", L"../..", L"http://a/")); + EXPECT_TRUE(testAddBaseHelper(L"http://a/b/../c/d;p?q", L"../../", L"http://a/")); + + EXPECT_TRUE(testAddBaseHelper(L"http://a/../b/c/d;p?q", L"../..", L"http://a/")); + EXPECT_TRUE(testAddBaseHelper(L"http://a/../b/c/d;p?q", L"../../", L"http://a/")); + + EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"../../..", L"http://a/")); + EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"../../../", L"http://a/")); } namespace { @@ -1477,6 +1490,48 @@ TEST(UriSuite, TestNormalizeSyntaxComponents) { URI_NORMALIZE_FRAGMENT)); } +TEST(UriSuite, TestNormalizeSyntaxPath) { + // These are from GitHub issue #92 + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"http://a/b/c/../../..", + L"http://a/", + URI_NORMALIZE_PATH)); + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"http://a/b/../c/../..", + L"http://a/", + URI_NORMALIZE_PATH)); + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"http://a/b/c/../../..", + L"http://a/", + URI_NORMALIZE_PATH)); + + // .. and these are related + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"http://a/..", + L"http://a/", + URI_NORMALIZE_PATH)); + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"/..", + L"/", + URI_NORMALIZE_PATH)); + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"http://a/..///", + L"http://a///", + URI_NORMALIZE_PATH)); + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"http://a/..///..", + L"http://a//", + URI_NORMALIZE_PATH)); + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"a/b/c/../../..", + L"", + URI_NORMALIZE_PATH)); + EXPECT_TRUE(testNormalizeSyntaxHelper( + L"a/b/../../c/..", + L"", + URI_NORMALIZE_PATH)); +} + TEST(UriSuite, TestNormalizeCrashBug20080224) { UriParserStateW stateW; int res; |