summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Pipping <sebastian@pipping.org>2021-02-28 17:33:42 +0100
committerGitHub <noreply@github.com>2021-02-28 17:33:42 +0100
commite8a338e0c65fd875a46067d711750e4c13e044e7 (patch)
tree53ca40ab6964b243d8ad03c449e4bfbeb70bd26a
parent9886535c4db499d0793af75c8e6f1dbc652dbb11 (diff)
parentbdd1ffabb3b2b575661c24e51caf576b93da3ffc (diff)
downloaduriparser-e8a338e0c65fd875a46067d711750e4c13e044e7.tar.gz
Merge pull request #97 from uriparser/issue-92
Fix RemoveDotSegmentsEx for URIs like "http://a/.." (fixes #92)
-rw-r--r--ChangeLog4
-rw-r--r--THANKS1
-rw-r--r--src/UriCommon.c96
-rw-r--r--src/UriCommon.h2
-rw-r--r--test/test.cpp55
5 files changed, 130 insertions, 28 deletions
diff --git a/ChangeLog b/ChangeLog
index e92a9f0..ad7fc04 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,10 @@ NOTE: uriparser is looking for help with a few things:
20xx-xx-x -- x.x.x
+ * Fixed: Fix a bug regarding section "5.2.4. Remove Dot Segments"
+ of RFC 3986 that affected both normalization and reference resolution
+ with regard to trailing slashes (GitHub #92)
+ Thanks to Dan Pape for the report!
* Fixed: MinGW: Fix name of static library (GitHub #90)
Thanks to SpaceIm for the patch and Sandro Mani for review!
* Fixed: Use correct inline marker "__forceinline" for Intel C++ Compiler
diff --git a/THANKS b/THANKS
index edc56f7..985009d 100644
--- a/THANKS
+++ b/THANKS
@@ -7,6 +7,7 @@ Arkadiusz Miskiewicz
Blair Sadewitz
Chris Hills
Cristian Rodriguez
+Dan Pape
Daniel Chapiesky
Daniel Solano Gómez
David Demelier
diff --git a/src/UriCommon.c b/src/UriCommon.c
index 60bd319..7ba92f2 100644
--- a/src/UriCommon.c
+++ b/src/UriCommon.c
@@ -119,17 +119,6 @@ int URI_FUNC(CompareRange)(
-/* Properly removes "." and ".." path segments */
-UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri,
- UriBool relative, UriMemoryManager * memory) {
- if (uri == NULL) {
- return URI_TRUE;
- }
- return URI_FUNC(RemoveDotSegmentsEx)(uri, relative, uri->owner, memory);
-}
-
-
-
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool relative, UriBool pathOwned, UriMemoryManager * memory) {
URI_TYPE(PathSegment) * walker;
@@ -149,7 +138,13 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
URI_TYPE(PathSegment) * const prev = walker->reserved;
URI_TYPE(PathSegment) * const nextBackup = walker->next;
- /* Is this dot segment essential? */
+ /*
+ * Is this dot segment essential,
+ * i.e. is there a chance of changing semantics by dropping this dot segment?
+ *
+ * For example, changing "./http://foo" into "http://foo" would change semantics
+ * and hence the dot segment is essential to that case and cannot be removed.
+ */
removeSegment = URI_TRUE;
if (relative && (walker == uri->pathHead) && (walker->next != NULL)) {
const URI_CHAR * ch = walker->next->text.first;
@@ -162,16 +157,23 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
}
if (removeSegment) {
+ /* .. then let's go remove that segment. */
/* Last segment? */
if (walker->next != NULL) {
- /* Not last segment */
+ /* Not last segment, i.e. first or middle segment
+ * OLD: (prev|NULL) <- walker <- next
+ * NEW: (prev|NULL) <----------- next */
walker->next->reserved = prev;
if (prev == NULL) {
- /* First but not last segment */
+ /* First but not last segment
+ * OLD: head -> walker -> next
+ * NEW: head -----------> next */
uri->pathHead = walker->next;
} else {
- /* Middle segment */
+ /* Middle segment
+ * OLD: prev -> walker -> next
+ * NEW: prev -----------> next */
prev->next = walker->next;
}
@@ -220,11 +222,16 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
removeSegment = URI_TRUE;
if (relative) {
if (prev == NULL) {
+ /* We cannot remove traversal beyond because the
+ * URI is relative and may be resolved later.
+ * So we can simplify "a/../b/d" to "b/d" but
+ * we cannot simplify "../b/d" (outside of reference resolution). */
removeSegment = URI_FALSE;
} else if ((prev != NULL)
&& ((prev->text.afterLast - prev->text.first) == 2)
&& ((prev->text.first)[0] == _UT('.'))
&& ((prev->text.first)[1] == _UT('.'))) {
+ /* We need to protect against mis-simplifying "a/../../b" to "a/b". */
removeSegment = URI_FALSE;
}
}
@@ -234,9 +241,14 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
/* Not first segment */
prevPrev = prev->reserved;
if (prevPrev != NULL) {
- /* Not even prev is the first one */
+ /* Not even prev is the first one
+ * OLD: prevPrev -> prev -> walker -> (next|NULL)
+ * NEW: prevPrev -------------------> (next|NULL) */
prevPrev->next = walker->next;
if (walker->next != NULL) {
+ /* Update parent relationship as well
+ * OLD: prevPrev <- prev <- walker <- next
+ * NEW: prevPrev <------------------- next */
walker->next->reserved = prevPrev;
} else {
/* Last segment -> insert "" segment to represent trailing slash, update tail */
@@ -302,29 +314,58 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
}
} else {
URI_TYPE(PathSegment) * const anotherNextBackup = walker->next;
- /* First segment -> update head pointer */
- uri->pathHead = walker->next;
+ int freeWalker = URI_TRUE;
+
+ /* First segment */
if (walker->next != NULL) {
+ /* First segment of multiple -> update head
+ * OLD: head -> walker -> next
+ * NEW: head -----------> next */
+ uri->pathHead = walker->next;
+
+ /* Update parent link as well
+ * OLD: head <- walker <- next
+ * NEW: head <----------- next */
walker->next->reserved = NULL;
} else {
- /* Last segment -> update tail */
- uri->pathTail = NULL;
+ if (uri->absolutePath) {
+ /* First and only segment -> update head
+ * OLD: head -> walker -> NULL
+ * NEW: head -----------> NULL */
+ uri->pathHead = NULL;
+
+ /* Last segment -> update tail
+ * OLD: tail -> walker
+ * NEW: tail -> NULL */
+ uri->pathTail = NULL;
+ } else {
+ /* Re-use segment for "" path segment to represent trailing slash,
+ * then update head and tail */
+ if (pathOwned && (walker->text.first != walker->text.afterLast)) {
+ memory->free(memory, (URI_CHAR *)walker->text.first);
+ }
+ walker->text.first = URI_FUNC(SafeToPointTo);
+ walker->text.afterLast = URI_FUNC(SafeToPointTo);
+ freeWalker = URI_FALSE;
+ }
}
- if (pathOwned && (walker->text.first != walker->text.afterLast)) {
- memory->free(memory, (URI_CHAR *)walker->text.first);
+ if (freeWalker) {
+ if (pathOwned && (walker->text.first != walker->text.afterLast)) {
+ memory->free(memory, (URI_CHAR *)walker->text.first);
+ }
+ memory->free(memory, walker);
}
- memory->free(memory, walker);
walker = anotherNextBackup;
}
}
}
break;
-
- }
+ } /* end of switch */
if (!removeSegment) {
+ /* .. then let's move to the next element, and start again. */
if (walker->next != NULL) {
walker->next->reserved = walker;
} else {
@@ -344,7 +385,10 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory) {
const UriBool ABSOLUTE = URI_FALSE;
- return URI_FUNC(RemoveDotSegments)(uri, ABSOLUTE, memory);
+ if (uri == NULL) {
+ return URI_TRUE;
+ }
+ return URI_FUNC(RemoveDotSegmentsEx)(uri, ABSOLUTE, uri->owner, memory);
}
diff --git a/src/UriCommon.h b/src/UriCommon.h
index 10bc250..42311dd 100644
--- a/src/UriCommon.h
+++ b/src/UriCommon.h
@@ -84,8 +84,6 @@ int URI_FUNC(CompareRange)(
UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory);
-UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri, UriBool relative,
- UriMemoryManager * memory);
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool relative, UriBool pathOwned, UriMemoryManager * memory);
diff --git a/test/test.cpp b/test/test.cpp
index 9a189f9..fd8d6d9 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -1098,6 +1098,19 @@ TEST(UriSuite, TestAddBase) {
// Bug related to absolutePath flag set despite presence of host
ASSERT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"/", L"http://a/"));
ASSERT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"/g/", L"http://a/g/"));
+
+ // GitHub issue #92
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/../d;p?q", L"../..", L"http://a/"));
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/../d;p?q", L"../../", L"http://a/"));
+
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/b/../c/d;p?q", L"../..", L"http://a/"));
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/b/../c/d;p?q", L"../../", L"http://a/"));
+
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/../b/c/d;p?q", L"../..", L"http://a/"));
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/../b/c/d;p?q", L"../../", L"http://a/"));
+
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"../../..", L"http://a/"));
+ EXPECT_TRUE(testAddBaseHelper(L"http://a/b/c/d;p?q", L"../../../", L"http://a/"));
}
namespace {
@@ -1477,6 +1490,48 @@ TEST(UriSuite, TestNormalizeSyntaxComponents) {
URI_NORMALIZE_FRAGMENT));
}
+TEST(UriSuite, TestNormalizeSyntaxPath) {
+ // These are from GitHub issue #92
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"http://a/b/c/../../..",
+ L"http://a/",
+ URI_NORMALIZE_PATH));
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"http://a/b/../c/../..",
+ L"http://a/",
+ URI_NORMALIZE_PATH));
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"http://a/b/c/../../..",
+ L"http://a/",
+ URI_NORMALIZE_PATH));
+
+ // .. and these are related
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"http://a/..",
+ L"http://a/",
+ URI_NORMALIZE_PATH));
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"/..",
+ L"/",
+ URI_NORMALIZE_PATH));
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"http://a/..///",
+ L"http://a///",
+ URI_NORMALIZE_PATH));
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"http://a/..///..",
+ L"http://a//",
+ URI_NORMALIZE_PATH));
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"a/b/c/../../..",
+ L"",
+ URI_NORMALIZE_PATH));
+ EXPECT_TRUE(testNormalizeSyntaxHelper(
+ L"a/b/../../c/..",
+ L"",
+ URI_NORMALIZE_PATH));
+}
+
TEST(UriSuite, TestNormalizeCrashBug20080224) {
UriParserStateW stateW;
int res;