summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2021-08-29 07:36:36 -0700
committerGitHub <noreply@github.com>2021-08-29 16:36:36 +0200
commit007221a43e566db08c0c5c00756d80dfd9dccafe (patch)
tree49d81dc0e3697ceffee84c0dd4aee347d990d52b
parent45409518c1cec5ee91d49f69a2f8eb4196d242f0 (diff)
downloadcpython-git-007221a43e566db08c0c5c00756d80dfd9dccafe.tar.gz
bpo-44394: Update libexpat copy to 2.4.1 (GH-26945) (GH-28032)
Update the vendored copy of libexpat to 2.4.1 (from 2.2.8) to get the fix for the CVE-2013-0340 "Billion Laughs" vulnerability. This copy is most used on Windows and macOS. Co-authored-by: Łukasz Langa <lukasz@langa.pl> (cherry picked from commit 3fc5d84046ddbd66abac5b598956ea34605a4e5d) Co-authored-by: Victor Stinner <vstinner@python.org>
-rw-r--r--Doc/library/xml.rst32
-rw-r--r--Misc/NEWS.d/next/Security/2021-06-29-02-45-53.bpo-44394.A220N1.rst3
-rw-r--r--Modules/expat/COPYING2
-rw-r--r--Modules/expat/ascii.h7
-rw-r--r--Modules/expat/asciitab.h4
-rw-r--r--Modules/expat/expat.h44
-rw-r--r--Modules/expat/expat_external.h9
-rw-r--r--Modules/expat/iasciitab.h4
-rw-r--r--Modules/expat/internal.h58
-rw-r--r--Modules/expat/latin1tab.h4
-rw-r--r--Modules/expat/nametab.h4
-rw-r--r--Modules/expat/siphash.h13
-rw-r--r--Modules/expat/utf8tab.h4
-rw-r--r--Modules/expat/winconfig.h19
-rw-r--r--Modules/expat/xmlparse.c1263
-rw-r--r--Modules/expat/xmlrole.c19
-rw-r--r--Modules/expat/xmlrole.h5
-rw-r--r--Modules/expat/xmltok.c48
-rw-r--r--Modules/expat/xmltok.h6
-rw-r--r--Modules/expat/xmltok_impl.c21
-rw-r--r--Modules/expat/xmltok_impl.h3
-rw-r--r--Modules/expat/xmltok_ns.c8
22 files changed, 1389 insertions, 191 deletions
diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst
index 1981cab7cd..e3b3516296 100644
--- a/Doc/library/xml.rst
+++ b/Doc/library/xml.rst
@@ -60,22 +60,26 @@ circumvent firewalls.
The following table gives an overview of the known attacks and whether
the various modules are vulnerable to them.
-========================= ============== =============== ============== ============== ==============
-kind sax etree minidom pulldom xmlrpc
-========================= ============== =============== ============== ============== ==============
-billion laughs **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
-quadratic blowup **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
-external entity expansion Safe (4) Safe (1) Safe (2) Safe (4) Safe (3)
-`DTD`_ retrieval Safe (4) Safe Safe Safe (4) Safe
-decompression bomb Safe Safe Safe Safe **Vulnerable**
-========================= ============== =============== ============== ============== ==============
-
-1. :mod:`xml.etree.ElementTree` doesn't expand external entities and raises a
+========================= ================== ================== ================== ================== ==================
+kind sax etree minidom pulldom xmlrpc
+========================= ================== ================== ================== ================== ==================
+billion laughs **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1)
+quadratic blowup **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1)
+external entity expansion Safe (5) Safe (2) Safe (3) Safe (5) Safe (4)
+`DTD`_ retrieval Safe (5) Safe Safe Safe (5) Safe
+decompression bomb Safe Safe Safe Safe **Vulnerable**
+========================= ================== ================== ================== ================== ==================
+
+1. Expat 2.4.1 and newer is not vulnerable to the "billion laughs" and
+ "quadratic blowup" vulnerabilities. Items still listed as vulnerable due to
+ potential reliance on system-provided libraries. Check
+ :data:`pyexpat.EXPAT_VERSION`.
+2. :mod:`xml.etree.ElementTree` doesn't expand external entities and raises a
:exc:`ParserError` when an entity occurs.
-2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns
+3. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns
the unexpanded entity verbatim.
-3. :mod:`xmlrpclib` doesn't expand external entities and omits them.
-4. Since Python 3.7.1, external general entities are no longer processed by
+4. :mod:`xmlrpclib` doesn't expand external entities and omits them.
+5. Since Python 3.7.1, external general entities are no longer processed by
default.
diff --git a/Misc/NEWS.d/next/Security/2021-06-29-02-45-53.bpo-44394.A220N1.rst b/Misc/NEWS.d/next/Security/2021-06-29-02-45-53.bpo-44394.A220N1.rst
new file mode 100644
index 0000000000..e32563d253
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-06-29-02-45-53.bpo-44394.A220N1.rst
@@ -0,0 +1,3 @@
+Update the vendored copy of libexpat to 2.4.1 (from 2.2.8) to get the fix
+for the CVE-2013-0340 "Billion Laughs" vulnerability. This copy is most used
+on Windows and macOS.
diff --git a/Modules/expat/COPYING b/Modules/expat/COPYING
index 8d288f0f28..3c0142e71c 100644
--- a/Modules/expat/COPYING
+++ b/Modules/expat/COPYING
@@ -1,5 +1,5 @@
Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
-Copyright (c) 2001-2017 Expat maintainers
+Copyright (c) 2001-2019 Expat maintainers
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/Modules/expat/ascii.h b/Modules/expat/ascii.h
index c3587e5733..1f594d2e54 100644
--- a/Modules/expat/ascii.h
+++ b/Modules/expat/ascii.h
@@ -6,8 +6,11 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 1999-2000 Thai Open Source Software Center Ltd
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2007 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/asciitab.h b/Modules/expat/asciitab.h
index 63b1d1b448..af766fb247 100644
--- a/Modules/expat/asciitab.h
+++ b/Modules/expat/asciitab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
index 6c8eb1fda4..b7d6d35480 100644
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -7,7 +7,14 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
+ Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
+ Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -115,7 +122,11 @@ enum XML_Error {
XML_ERROR_RESERVED_PREFIX_XMLNS,
XML_ERROR_RESERVED_NAMESPACE_URI,
/* Added in 2.2.1. */
- XML_ERROR_INVALID_ARGUMENT
+ XML_ERROR_INVALID_ARGUMENT,
+ /* Added in 2.3.0. */
+ XML_ERROR_NO_BUFFER,
+ /* Added in 2.4.0. */
+ XML_ERROR_AMPLIFICATION_LIMIT_BREACH
};
enum XML_Content_Type {
@@ -318,7 +329,7 @@ typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
For internal entities (<!ENTITY foo "bar">), value will
be non-NULL and systemId, publicID, and notationName will be NULL.
- The value string is NOT nul-terminated; the length is provided in
+ The value string is NOT null-terminated; the length is provided in
the value_length argument. Since it is legal to have zero-length
values, do not use this argument to test for internal entities.
@@ -513,7 +524,7 @@ typedef struct {
Otherwise it must return XML_STATUS_ERROR.
If info does not describe a suitable encoding, then the parser will
- return an XML_UNKNOWN_ENCODING error.
+ return an XML_ERROR_UNKNOWN_ENCODING error.
*/
typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData,
const XML_Char *name,
@@ -707,7 +718,7 @@ XML_GetBase(XML_Parser parser);
/* Returns the number of the attribute/value pairs passed in last call
to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus
- this correspondds to an index into the atts array passed to the
+ this corresponds to an index into the atts array passed to the
XML_StartElementHandler. Returns -1 if parser == NULL.
*/
XMLPARSEAPI(int)
@@ -716,7 +727,7 @@ XML_GetSpecifiedAttributeCount(XML_Parser parser);
/* Returns the index of the ID attribute passed in the last call to
XML_StartElementHandler, or -1 if there is no ID attribute or
parser == NULL. Each attribute/value pair counts as 2; thus this
- correspondds to an index into the atts array passed to the
+ corresponds to an index into the atts array passed to the
XML_StartElementHandler.
*/
XMLPARSEAPI(int)
@@ -997,7 +1008,10 @@ enum XML_FeatureEnum {
XML_FEATURE_SIZEOF_XML_LCHAR,
XML_FEATURE_NS,
XML_FEATURE_LARGE_SIZE,
- XML_FEATURE_ATTR_INFO
+ XML_FEATURE_ATTR_INFO,
+ /* Added in Expat 2.4.0. */
+ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
+ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT
/* Additional features must be added to the end of this enum. */
};
@@ -1010,12 +1024,24 @@ typedef struct {
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
+#ifdef XML_DTD
+/* Added in Expat 2.4.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ XML_Parser parser, float maximumAmplificationFactor);
+
+/* Added in Expat 2.4.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ XML_Parser parser, unsigned long long activationThresholdBytes);
+#endif
+
/* Expat follows the semantic versioning convention.
See http://semver.org.
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 2
-#define XML_MICRO_VERSION 8
+#define XML_MINOR_VERSION 4
+#define XML_MICRO_VERSION 1
#ifdef __cplusplus
}
diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h
index f2b75dda8e..12c560e147 100644
--- a/Modules/expat/expat_external.h
+++ b/Modules/expat/expat_external.h
@@ -7,7 +7,14 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2000-2004 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
+ Copyright (c) 2016-2019 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
+ Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/iasciitab.h b/Modules/expat/iasciitab.h
index ea97cfcf67..5d8646f2a3 100644
--- a/Modules/expat/iasciitab.h
+++ b/Modules/expat/iasciitab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h
index 60913dab76..444eba0fb0 100644
--- a/Modules/expat/internal.h
+++ b/Modules/expat/internal.h
@@ -25,8 +25,12 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com>
+ Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -101,22 +105,58 @@
# endif
#endif
+#include <limits.h> // ULONG_MAX
+
+#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO)
+# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u"
+# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u"
+# else
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
+# endif
+#else
+# define EXPAT_FMT_ULL(midpart) "%" midpart "llu"
+# if ! defined(ULONG_MAX)
+# error Compiler did not define ULONG_MAX for us
+# elif ULONG_MAX == 18446744073709551615u // 2^64-1
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu"
+# else
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
+# endif
+#endif
+
#ifndef UNUSED_P
# define UNUSED_P(p) (void)p
#endif
+/* NOTE BEGIN If you ever patch these defaults to greater values
+ for non-attack XML payload in your environment,
+ please file a bug report with libexpat. Thank you!
+*/
+#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \
+ 100.0f
+#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \
+ 8388608 // 8 MiB, 2^23
+/* NOTE END */
+
+#include "expat.h" // so we can use type XML_Parser below
+
#ifdef __cplusplus
extern "C" {
#endif
-#ifdef XML_ENABLE_VISIBILITY
-# if XML_ENABLE_VISIBILITY
-__attribute__((visibility("default")))
-# endif
+void _INTERNAL_trim_to_complete_utf8_characters(const char *from,
+ const char **fromLimRef);
+
+#if defined(XML_DTD)
+unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser);
+unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
+const char *unsignedCharToPrintable(unsigned char c);
#endif
-void
-_INTERNAL_trim_to_complete_utf8_characters(const char *from,
- const char **fromLimRef);
#ifdef __cplusplus
}
diff --git a/Modules/expat/latin1tab.h b/Modules/expat/latin1tab.h
index 6f91604135..b681d278af 100644
--- a/Modules/expat/latin1tab.h
+++ b/Modules/expat/latin1tab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/nametab.h b/Modules/expat/nametab.h
index 3681df348e..63485446b9 100644
--- a/Modules/expat/nametab.h
+++ b/Modules/expat/nametab.h
@@ -6,8 +6,8 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/siphash.h b/Modules/expat/siphash.h
index bfee65a332..e5406d7ee9 100644
--- a/Modules/expat/siphash.h
+++ b/Modules/expat/siphash.h
@@ -11,6 +11,9 @@
* --------------------------------------------------------------------------
* HISTORY:
*
+ * 2020-10-03 (Sebastian Pipping)
+ * - Drop support for Visual Studio 9.0/2008 and earlier
+ *
* 2019-08-03 (Sebastian Pipping)
* - Mark part of sip24_valid as to be excluded from clang-format
* - Re-format code using clang-format 9
@@ -96,15 +99,7 @@
#define SIPHASH_H
#include <stddef.h> /* size_t */
-
-#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600)
-/* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */
-typedef unsigned __int8 uint8_t;
-typedef unsigned __int32 uint32_t;
-typedef unsigned __int64 uint64_t;
-#else
-# include <stdint.h> /* uint64_t uint32_t uint8_t */
-#endif
+#include <stdint.h> /* uint64_t uint32_t uint8_t */
/*
* Workaround to not require a C++11 compiler for using ULL suffix
diff --git a/Modules/expat/utf8tab.h b/Modules/expat/utf8tab.h
index a22986acbb..88efcf91cc 100644
--- a/Modules/expat/utf8tab.h
+++ b/Modules/expat/utf8tab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/winconfig.h b/Modules/expat/winconfig.h
index 562a4a82dc..2ecd61b5b9 100644
--- a/Modules/expat/winconfig.h
+++ b/Modules/expat/winconfig.h
@@ -6,8 +6,10 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2005 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -40,17 +42,4 @@
#include <memory.h>
#include <string.h>
-#if defined(HAVE_EXPAT_CONFIG_H) /* e.g. MinGW */
-# include <expat_config.h>
-#else /* !defined(HAVE_EXPAT_CONFIG_H) */
-
-# define XML_NS 1
-# define XML_DTD 1
-# define XML_CONTEXT_BYTES 1024
-
-/* we will assume all Windows platforms are little endian */
-# define BYTEORDER 1234
-
-#endif /* !defined(HAVE_EXPAT_CONFIG_H) */
-
#endif /* ndef WINCONFIG_H */
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
index e740f0e19c..5ba56eaea6 100644
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -1,4 +1,4 @@
-/* f2d0ab6d1d4422a08cf1cf3bbdfba96b49dea42fb5ff4615e03a2a25c306e769 (2.2.8+)
+/* 8539b9040d9d901366a62560a064af7cb99811335784b363abc039c5b0ebc416 (2.4.1+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -7,7 +7,31 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
+ Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
+ Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
+ Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
+ Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
+ Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
+ Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
+ Copyright (c) 2017-2018 Rhodri James <rhodri@wildebeest.org.uk>
+ Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
+ Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
+ Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
+ Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
+ Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
+ Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
+ Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
+ Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
+ Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
+ Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
+ Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
+ Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -36,7 +60,9 @@
#ifdef _WIN32
/* force stdlib to define rand_s() */
-# define _CRT_RAND_S
+# if ! defined(_CRT_RAND_S)
+# define _CRT_RAND_S
+# endif
#endif
#include <stddef.h>
@@ -45,6 +71,8 @@
#include <limits.h> /* UINT_MAX */
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* getenv, rand_s */
+#include <stdint.h> /* uintptr_t */
+#include <math.h> /* isnan */
#ifdef _WIN32
# define getpid GetCurrentProcessId
@@ -60,9 +88,9 @@
#ifdef _WIN32
# include "winconfig.h"
-#elif defined(HAVE_EXPAT_CONFIG_H)
-# include <expat_config.h>
-#endif /* ndef _WIN32 */
+#endif
+
+#include <expat_config.h>
#include "ascii.h"
#include "expat.h"
@@ -97,14 +125,14 @@
enabled. For end user security, that is probably not what you want. \
\
Your options include: \
- * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
- * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
+ * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
+ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
* BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
- * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
+ * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
- * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
- * Windows (rand_s): _WIN32. \
+ * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
+ * Windows >=Vista (rand_s): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
XML_POOR_ENTROPY; you have been warned. \
@@ -119,9 +147,7 @@
# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
# define XmlEncode XmlUtf16Encode
-/* Using pointer subtraction to convert to integer type. */
-# define MUST_CONVERT(enc, s) \
- (! (enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
+# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
typedef unsigned short ICHAR;
#else
# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
@@ -371,6 +397,31 @@ typedef struct open_internal_entity {
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
} OPEN_INTERNAL_ENTITY;
+enum XML_Account {
+ XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
+ XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
+ expansion */
+ XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
+};
+
+#ifdef XML_DTD
+typedef unsigned long long XmlBigCount;
+typedef struct accounting {
+ XmlBigCount countBytesDirect;
+ XmlBigCount countBytesIndirect;
+ int debugLevel;
+ float maximumAmplificationFactor; // >=1.0
+ unsigned long long activationThresholdBytes;
+} ACCOUNTING;
+
+typedef struct entity_stats {
+ unsigned int countEverOpened;
+ unsigned int currentDepth;
+ unsigned int maximumDepthSeen;
+ int debugLevel;
+} ENTITY_STATS;
+#endif /* XML_DTD */
+
typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
const char *end, const char **endPtr);
@@ -401,16 +452,18 @@ static enum XML_Error initializeEncoding(XML_Parser parser);
static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end, int tok,
const char *next, const char **nextPtr,
- XML_Bool haveMore, XML_Bool allowClosingDoctype);
+ XML_Bool haveMore, XML_Bool allowClosingDoctype,
+ enum XML_Account account);
static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
XML_Bool betweenDecl);
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
- XML_Bool haveMore);
+ XML_Bool haveMore, enum XML_Account account);
static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
const char **startPtr, const char *end,
- const char **nextPtr, XML_Bool haveMore);
+ const char **nextPtr, XML_Bool haveMore,
+ enum XML_Account account);
#ifdef XML_DTD
static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
const char **startPtr, const char *end,
@@ -420,7 +473,8 @@ static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
static void freeBindings(XML_Parser parser, BINDING *bindings);
static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
const char *s, TAG_NAME *tagNamePtr,
- BINDING **bindingsPtr);
+ BINDING **bindingsPtr,
+ enum XML_Account account);
static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
const ATTRIBUTE_ID *attId, const XML_Char *uri,
BINDING **bindingsPtr);
@@ -429,15 +483,18 @@ static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
XML_Parser parser);
static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
XML_Bool isCdata, const char *,
- const char *, STRING_POOL *);
+ const char *, STRING_POOL *,
+ enum XML_Account account);
static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
XML_Bool isCdata, const char *,
- const char *, STRING_POOL *);
+ const char *, STRING_POOL *,
+ enum XML_Account account);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
- const char *start, const char *end);
+ const char *start, const char *end,
+ enum XML_Account account);
static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int reportComment(XML_Parser parser, const ENCODING *enc,
@@ -501,6 +558,34 @@ static XML_Parser parserCreate(const XML_Char *encodingName,
static void parserInit(XML_Parser parser, const XML_Char *encodingName);
+#ifdef XML_DTD
+static float accountingGetCurrentAmplification(XML_Parser rootParser);
+static void accountingReportStats(XML_Parser originParser, const char *epilog);
+static void accountingOnAbort(XML_Parser originParser);
+static void accountingReportDiff(XML_Parser rootParser,
+ unsigned int levelsAwayFromRootParser,
+ const char *before, const char *after,
+ ptrdiff_t bytesMore, int source_line,
+ enum XML_Account account);
+static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
+ const char *before, const char *after,
+ int source_line,
+ enum XML_Account account);
+
+static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
+ const char *action, int sourceLine);
+static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
+ int sourceLine);
+static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
+ int sourceLine);
+
+static XML_Parser getRootParserOf(XML_Parser parser,
+ unsigned int *outLevelDiff);
+#endif /* XML_DTD */
+
+static unsigned long getDebugLevel(const char *variableName,
+ unsigned long defaultDebugLevel);
+
#define poolStart(pool) ((pool)->start)
#define poolEnd(pool) ((pool)->ptr)
#define poolLength(pool) ((pool)->ptr - (pool)->start)
@@ -614,6 +699,10 @@ struct XML_ParserStruct {
enum XML_ParamEntityParsing m_paramEntityParsing;
#endif
unsigned long m_hash_secret_salt;
+#ifdef XML_DTD
+ ACCOUNTING m_accounting;
+ ENTITY_STATS m_entity_stats;
+#endif
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -734,6 +823,15 @@ writeRandomBytes_arc4random(void *target, size_t count) {
#ifdef _WIN32
+/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
+ as it didn't declare it in its header prior to version 5.3.0 of its
+ runtime package (mingwrt, containing stdlib.h). The upstream fix
+ was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
+# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
+ && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
+__declspec(dllimport) int rand_s(unsigned int *);
+# endif
+
/* Obtain entropy on Windows using the rand_s() function which
* generates cryptographically secure random numbers. Internally it
* uses RtlGenRandom API which is present in Windows XP and later.
@@ -789,9 +887,8 @@ gather_time_entropy(void) {
static unsigned long
ENTROPY_DEBUG(const char *label, unsigned long entropy) {
- const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
- if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
- fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
+ if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
+ fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
(int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
}
return entropy;
@@ -1053,6 +1150,18 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
#endif
parser->m_hash_secret_salt = 0;
+
+#ifdef XML_DTD
+ memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
+ parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
+ parser->m_accounting.maximumAmplificationFactor
+ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
+ parser->m_accounting.activationThresholdBytes
+ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
+
+ memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
+ parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
+#endif
}
/* moves list of bindings to m_freeBindingList */
@@ -1399,6 +1508,7 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
parser->m_useForeignDTD = useDTD;
return XML_ERROR_NONE;
#else
+ UNUSED_P(useDTD);
return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
#endif
}
@@ -1780,7 +1890,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
int nLeftOver;
enum XML_Status result;
/* Detect overflow (a+b > MAX <==> b > MAX-a) */
- if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
+ if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
parser->m_processor = errorProcessor;
@@ -1872,6 +1982,12 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
parser->m_errorCode = XML_ERROR_FINISHED;
return XML_STATUS_ERROR;
case XML_INITIALIZED:
+ /* Has someone called XML_GetBuffer successfully before? */
+ if (! parser->m_bufferPtr) {
+ parser->m_errorCode = XML_ERROR_NO_BUFFER;
+ return XML_STATUS_ERROR;
+ }
+
if (parser->m_parentParser == NULL && ! startParsing(parser)) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
return XML_STATUS_ERROR;
@@ -2155,7 +2271,7 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
(void)offset;
(void)size;
#endif /* defined XML_CONTEXT_BYTES */
- return (char *)0;
+ return (const char *)0;
}
XML_Size XMLCALL
@@ -2316,6 +2432,14 @@ XML_ErrorString(enum XML_Error code) {
/* Added in 2.2.5. */
case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
return XML_L("invalid argument");
+ /* Added in 2.3.0. */
+ case XML_ERROR_NO_BUFFER:
+ return XML_L(
+ "a successful prior call to function XML_GetBuffer is required");
+ /* Added in 2.4.0. */
+ case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
+ return XML_L(
+ "limit on input amplification factor (from DTD and entities) breached");
}
return NULL;
}
@@ -2352,41 +2476,75 @@ XML_ExpatVersionInfo(void) {
const XML_Feature *XMLCALL
XML_GetFeatureList(void) {
- static const XML_Feature features[]
- = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
- sizeof(XML_Char)},
- {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
- sizeof(XML_LChar)},
+ static const XML_Feature features[] = {
+ {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
+ sizeof(XML_Char)},
+ {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
+ sizeof(XML_LChar)},
#ifdef XML_UNICODE
- {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
+ {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
#endif
#ifdef XML_UNICODE_WCHAR_T
- {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
+ {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
#endif
#ifdef XML_DTD
- {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
+ {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
#endif
#ifdef XML_CONTEXT_BYTES
- {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
- XML_CONTEXT_BYTES},
+ {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
+ XML_CONTEXT_BYTES},
#endif
#ifdef XML_MIN_SIZE
- {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
+ {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
#endif
#ifdef XML_NS
- {XML_FEATURE_NS, XML_L("XML_NS"), 0},
+ {XML_FEATURE_NS, XML_L("XML_NS"), 0},
#endif
#ifdef XML_LARGE_SIZE
- {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
+ {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
#endif
#ifdef XML_ATTR_INFO
- {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
+ {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
+#endif
+#ifdef XML_DTD
+ /* Added in Expat 2.4.0. */
+ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
+ XML_L("XML_BLAP_MAX_AMP"),
+ (long int)
+ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
+ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
+ XML_L("XML_BLAP_ACT_THRES"),
+ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
#endif
- {XML_FEATURE_END, NULL, 0}};
+ {XML_FEATURE_END, NULL, 0}};
return features;
}
+#ifdef XML_DTD
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ XML_Parser parser, float maximumAmplificationFactor) {
+ if ((parser == NULL) || (parser->m_parentParser != NULL)
+ || isnan(maximumAmplificationFactor)
+ || (maximumAmplificationFactor < 1.0f)) {
+ return XML_FALSE;
+ }
+ parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
+ return XML_TRUE;
+}
+
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ XML_Parser parser, unsigned long long activationThresholdBytes) {
+ if ((parser == NULL) || (parser->m_parentParser != NULL)) {
+ return XML_FALSE;
+ }
+ parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
+ return XML_TRUE;
+}
+#endif /* XML_DTD */
+
/* Initially tag->rawName always points into the parse buffer;
for those TAG instances opened while the current parse buffer was
processed, and not yet closed, we need to store tag->rawName in a more
@@ -2439,9 +2597,9 @@ storeRawNames(XML_Parser parser) {
static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
- enum XML_Error result
- = doContent(parser, 0, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ enum XML_Error result = doContent(
+ parser, 0, parser->m_encoding, start, end, endPtr,
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2466,6 +2624,14 @@ externalEntityInitProcessor2(XML_Parser parser, const char *start,
int tok = XmlContentTok(parser->m_encoding, start, end, &next);
switch (tok) {
case XML_TOK_BOM:
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif /* XML_DTD */
+
/* If we are at the end of the buffer, this would cause the next stage,
i.e. externalEntityInitProcessor3, to pass control directly to
doContent (by detecting XML_TOK_NONE) without processing any xml text
@@ -2503,6 +2669,10 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start,
const char *next = start; /* XmlContentTok doesn't always set the last arg */
parser->m_eventPtr = start;
tok = XmlContentTok(parser->m_encoding, start, end, &next);
+ /* Note: These bytes are accounted later in:
+ - processXmlDecl
+ - externalEntityContentProcessor
+ */
parser->m_eventEndPtr = next;
switch (tok) {
@@ -2544,7 +2714,8 @@ externalEntityContentProcessor(XML_Parser parser, const char *start,
const char *end, const char **endPtr) {
enum XML_Error result
= doContent(parser, 1, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_ENTITY_EXPANSION);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2555,7 +2726,7 @@ externalEntityContentProcessor(XML_Parser parser, const char *start,
static enum XML_Error
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
const char *s, const char *end, const char **nextPtr,
- XML_Bool haveMore) {
+ XML_Bool haveMore, enum XML_Account account) {
/* save one level of indirection */
DTD *const dtd = parser->m_dtd;
@@ -2573,6 +2744,17 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
for (;;) {
const char *next = s; /* XmlContentTok doesn't always set the last arg */
int tok = XmlContentTok(enc, s, end, &next);
+#ifdef XML_DTD
+ const char *accountAfter
+ = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
+ ? (haveMore ? s /* i.e. 0 bytes */ : end)
+ : next;
+ if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
+ account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_TRAILING_CR:
@@ -2628,6 +2810,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
+#ifdef XML_DTD
+ /* NOTE: We are replacing 4-6 characters original input for 1 character
+ * so there is no amplification and hence recording without
+ * protection. */
+ accountingDiffTolerated(parser, tok, (char *)&ch,
+ ((char *)&ch) + sizeof(XML_Char), __LINE__,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#endif /* XML_DTD */
if (parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
else if (parser->m_defaultHandler)
@@ -2746,7 +2936,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
}
tag->name.str = (XML_Char *)tag->buf;
*toPtr = XML_T('\0');
- result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
+ result
+ = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
if (result)
return result;
if (parser->m_startElementHandler)
@@ -2770,7 +2961,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
if (! name.str)
return XML_ERROR_NO_MEMORY;
poolFinish(&parser->m_tempPool);
- result = storeAtts(parser, enc, s, &name, &bindings);
+ result = storeAtts(parser, enc, s, &name, &bindings,
+ XML_ACCOUNT_NONE /* token spans whole start tag */);
if (result != XML_ERROR_NONE) {
freeBindings(parser, bindings);
return result;
@@ -2905,7 +3097,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
/* END disabled code */
else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, next);
- result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
+ result
+ = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
if (result != XML_ERROR_NONE)
return result;
else if (! next) {
@@ -3034,7 +3227,8 @@ freeBindings(XML_Parser parser, BINDING *bindings) {
*/
static enum XML_Error
storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
- TAG_NAME *tagNamePtr, BINDING **bindingsPtr) {
+ TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
ELEMENT_TYPE *elementType;
int nDefaultAtts;
@@ -3144,7 +3338,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
/* normalize the attribute value */
result = storeAttributeValue(
parser, enc, isCdata, parser->m_atts[i].valuePtr,
- parser->m_atts[i].valueEnd, &parser->m_tempPool);
+ parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
if (result)
return result;
appAtts[attIndex] = poolStart(&parser->m_tempPool);
@@ -3533,9 +3727,9 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
- enum XML_Error result
- = doCdataSection(parser, parser->m_encoding, &start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ enum XML_Error result = doCdataSection(
+ parser, parser->m_encoding, &start, end, endPtr,
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
if (result != XML_ERROR_NONE)
return result;
if (start) {
@@ -3555,7 +3749,8 @@ cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
*/
static enum XML_Error
doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
- const char *end, const char **nextPtr, XML_Bool haveMore) {
+ const char *end, const char **nextPtr, XML_Bool haveMore,
+ enum XML_Account account) {
const char *s = *startPtr;
const char **eventPP;
const char **eventEndPP;
@@ -3571,8 +3766,16 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*startPtr = NULL;
for (;;) {
- const char *next;
+ const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
int tok = XmlCdataSectionTok(enc, s, end, &next);
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#else
+ UNUSED_P(account);
+#endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_CDATA_SECT_CLOSE:
@@ -3689,7 +3892,7 @@ ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
static enum XML_Error
doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
const char *end, const char **nextPtr, XML_Bool haveMore) {
- const char *next;
+ const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
int tok;
const char *s = *startPtr;
const char **eventPP;
@@ -3717,6 +3920,13 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*eventPP = s;
*startPtr = NULL;
tok = XmlIgnoreSectionTok(enc, s, end, &next);
+# ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+# endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_IGNORE_SECT:
@@ -3801,6 +4011,15 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
const char *versionend;
const XML_Char *storedversion = NULL;
int standalone = -1;
+
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
+
if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
&version, &versionend, &encodingName, &newEncoding, &standalone)) {
@@ -3950,6 +4169,10 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
tok = XmlPrologTok(parser->m_encoding, start, end, &next);
+ /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
+ - storeEntityValue
+ - processXmlDecl
+ */
parser->m_eventEndPtr = next;
if (tok <= 0) {
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
@@ -3968,7 +4191,8 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, parser->m_encoding, s, end);
+ return storeEntityValue(parser, parser->m_encoding, s, end,
+ XML_ACCOUNT_DIRECT);
} else if (tok == XML_TOK_XML_DECL) {
enum XML_Error result;
result = processXmlDecl(parser, 0, start, next);
@@ -3995,6 +4219,14 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
*/
else if (tok == XML_TOK_BOM && next == end
&& ! parser->m_parsingStatus.finalBuffer) {
+# ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+# endif
+
*nextPtr = next;
return XML_ERROR_NONE;
}
@@ -4037,16 +4269,24 @@ externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
}
/* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
However, when parsing an external subset, doProlog will not accept a BOM
- as valid, and report a syntax error, so we have to skip the BOM
+ as valid, and report a syntax error, so we have to skip the BOM, and
+ account for the BOM bytes.
*/
else if (tok == XML_TOK_BOM) {
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+
s = next;
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
}
parser->m_processor = prologProcessor;
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
}
static enum XML_Error PTRCALL
@@ -4059,6 +4299,9 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
tok = XmlPrologTok(enc, start, end, &next);
+ /* Note: These bytes are accounted later in:
+ - storeEntityValue
+ */
if (tok <= 0) {
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
*nextPtr = s;
@@ -4076,7 +4319,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, enc, s, end);
+ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
}
start = next;
}
@@ -4090,13 +4333,14 @@ prologProcessor(XML_Parser parser, const char *s, const char *end,
const char *next = s;
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
}
static enum XML_Error
doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
- XML_Bool allowClosingDoctype) {
+ XML_Bool allowClosingDoctype, enum XML_Account account) {
#ifdef XML_DTD
static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
#endif /* XML_DTD */
@@ -4123,6 +4367,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
+#ifndef XML_DTD
+ UNUSED_P(account);
+#endif
+
/* save one level of indirection */
DTD *const dtd = parser->m_dtd;
@@ -4187,6 +4435,19 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
}
role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
+#ifdef XML_DTD
+ switch (role) {
+ case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
+ case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
+ case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
+ break;
+ default:
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+ }
+#endif
switch (role) {
case XML_ROLE_XML_DECL: {
enum XML_Error result = processXmlDecl(parser, 0, s, next);
@@ -4462,7 +4723,8 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
const XML_Char *attVal;
enum XML_Error result = storeAttributeValue(
parser, enc, parser->m_declAttributeIsCdata,
- s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool);
+ s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
+ XML_ACCOUNT_NONE);
if (result)
return result;
attVal = poolStart(&dtd->pool);
@@ -4495,8 +4757,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
break;
case XML_ROLE_ENTITY_VALUE:
if (dtd->keepProcessing) {
- enum XML_Error result = storeEntityValue(
- parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
+ enum XML_Error result
+ = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
+ next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
if (parser->m_declEntity) {
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
parser->m_declEntity->textLen
@@ -4886,12 +5149,15 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
if (parser->m_externalEntityRefHandler) {
dtd->paramEntityRead = XML_FALSE;
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
if (! parser->m_externalEntityRefHandler(
parser->m_externalEntityRefHandlerArg, 0, entity->base,
entity->systemId, entity->publicId)) {
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
}
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
handleDefault = XML_FALSE;
if (! dtd->paramEntityRead) {
@@ -5089,6 +5355,13 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
const char *next = NULL;
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
parser->m_eventEndPtr = next;
switch (tok) {
/* report partial linebreak - it might be the last token */
@@ -5162,6 +5435,9 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
+#ifdef XML_DTD
+ entityTrackingOnOpen(parser, entity, __LINE__);
+#endif
entity->processed = 0;
openEntity->next = parser->m_openInternalEntities;
parser->m_openInternalEntities = openEntity;
@@ -5170,8 +5446,8 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
openEntity->betweenDecl = betweenDecl;
openEntity->internalEventPtr = NULL;
openEntity->internalEventEndPtr = NULL;
- textStart = (char *)entity->textPtr;
- textEnd = (char *)(entity->textPtr + entity->textLen);
+ textStart = (const char *)entity->textPtr;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
/* Set a safe default value in case 'next' does not get set */
next = textStart;
@@ -5180,17 +5456,22 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
int tok
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_FALSE);
+ tok, next, &next, XML_FALSE, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
} else
#endif /* XML_DTD */
result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
- textStart, textEnd, &next, XML_FALSE);
+ textStart, textEnd, &next, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
if (result == XML_ERROR_NONE) {
if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
entity->processed = (int)(next - textStart);
parser->m_processor = internalEntityProcessor;
} else {
+#ifdef XML_DTD
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif /* XML_DTD */
entity->open = XML_FALSE;
parser->m_openInternalEntities = openEntity->next;
/* put openEntity back in list of free instances */
@@ -5213,8 +5494,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
return XML_ERROR_UNEXPECTED_STATE;
entity = openEntity->entity;
- textStart = ((char *)entity->textPtr) + entity->processed;
- textEnd = (char *)(entity->textPtr + entity->textLen);
+ textStart = ((const char *)entity->textPtr) + entity->processed;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
/* Set a safe default value in case 'next' does not get set */
next = textStart;
@@ -5223,20 +5504,24 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
int tok
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_TRUE);
+ tok, next, &next, XML_FALSE, XML_TRUE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
} else
#endif /* XML_DTD */
result = doContent(parser, openEntity->startTagLevel,
parser->m_internalEncoding, textStart, textEnd, &next,
- XML_FALSE);
+ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
if (result != XML_ERROR_NONE)
return result;
else if (textEnd != next
&& parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - (char *)entity->textPtr);
+ entity->processed = (int)(next - (const char *)entity->textPtr);
return result;
} else {
+#ifdef XML_DTD
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
entity->open = XML_FALSE;
parser->m_openInternalEntities = openEntity->next;
/* put openEntity back in list of free instances */
@@ -5250,7 +5535,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
parser->m_processor = prologProcessor;
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
} else
#endif /* XML_DTD */
{
@@ -5258,7 +5544,8 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
/* see externalEntityContentProcessor vs contentProcessor */
return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
s, end, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_DIRECT);
}
}
@@ -5273,9 +5560,10 @@ errorProcessor(XML_Parser parser, const char *s, const char *end,
static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
- const char *ptr, const char *end, STRING_POOL *pool) {
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account) {
enum XML_Error result
- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
+ = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
if (result)
return result;
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
@@ -5287,11 +5575,23 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
- const char *ptr, const char *end, STRING_POOL *pool) {
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
+#ifndef XML_DTD
+ UNUSED_P(account);
+#endif
+
for (;;) {
- const char *next;
+ const char *next
+ = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
switch (tok) {
case XML_TOK_NONE:
return XML_ERROR_NONE;
@@ -5351,6 +5651,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
+#ifdef XML_DTD
+ /* NOTE: We are replacing 4-6 characters original input for 1 character
+ * so there is no amplification and hence recording without
+ * protection. */
+ accountingDiffTolerated(parser, tok, (char *)&ch,
+ ((char *)&ch) + sizeof(XML_Char), __LINE__,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#endif /* XML_DTD */
if (! poolAppendChar(pool, ch))
return XML_ERROR_NO_MEMORY;
break;
@@ -5428,9 +5736,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
enum XML_Error result;
const XML_Char *textEnd = entity->textPtr + entity->textLen;
entity->open = XML_TRUE;
+#ifdef XML_DTD
+ entityTrackingOnOpen(parser, entity, __LINE__);
+#endif
result = appendAttributeValue(parser, parser->m_internalEncoding,
- isCdata, (char *)entity->textPtr,
- (char *)textEnd, pool);
+ isCdata, (const char *)entity->textPtr,
+ (const char *)textEnd, pool,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#ifdef XML_DTD
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
entity->open = XML_FALSE;
if (result)
return result;
@@ -5460,13 +5775,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
- const char *entityTextPtr, const char *entityTextEnd) {
+ const char *entityTextPtr, const char *entityTextEnd,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
#ifdef XML_DTD
int oldInEntityValue = parser->m_prologState.inEntityValue;
parser->m_prologState.inEntityValue = 1;
+#else
+ UNUSED_P(account);
#endif /* XML_DTD */
/* never return Null for the value argument in EntityDeclHandler,
since this would indicate an external entity; therefore we
@@ -5477,8 +5795,19 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
}
for (;;) {
- const char *next;
+ const char *next
+ = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
+
+#ifdef XML_DTD
+ if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
+ account)) {
+ accountingOnAbort(parser);
+ result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ goto endEntityValue;
+ }
+#endif
+
switch (tok) {
case XML_TOK_PARAM_ENTITY_REF:
#ifdef XML_DTD
@@ -5514,13 +5843,16 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
if (parser->m_externalEntityRefHandler) {
dtd->paramEntityRead = XML_FALSE;
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
if (! parser->m_externalEntityRefHandler(
parser->m_externalEntityRefHandlerArg, 0, entity->base,
entity->systemId, entity->publicId)) {
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
goto endEntityValue;
}
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
if (! dtd->paramEntityRead)
dtd->keepProcessing = dtd->standalone;
@@ -5528,9 +5860,12 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
dtd->keepProcessing = dtd->standalone;
} else {
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
result = storeEntityValue(
- parser, parser->m_internalEncoding, (char *)entity->textPtr,
- (char *)(entity->textPtr + entity->textLen));
+ parser, parser->m_internalEncoding, (const char *)entity->textPtr,
+ (const char *)(entity->textPtr + entity->textLen),
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
if (result)
goto endEntityValue;
@@ -6485,7 +6820,7 @@ hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
iter->p = table->v;
- iter->end = iter->p + table->size;
+ iter->end = iter->p ? iter->p + table->size : NULL;
}
static NAMED *FASTCALL
@@ -6891,3 +7226,755 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
memcpy(result, s, charsRequired * sizeof(XML_Char));
return result;
}
+
+#ifdef XML_DTD
+
+static float
+accountingGetCurrentAmplification(XML_Parser rootParser) {
+ const XmlBigCount countBytesOutput
+ = rootParser->m_accounting.countBytesDirect
+ + rootParser->m_accounting.countBytesIndirect;
+ const float amplificationFactor
+ = rootParser->m_accounting.countBytesDirect
+ ? (countBytesOutput
+ / (float)(rootParser->m_accounting.countBytesDirect))
+ : 1.0f;
+ assert(! rootParser->m_parentParser);
+ return amplificationFactor;
+}
+
+static void
+accountingReportStats(XML_Parser originParser, const char *epilog) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ if (rootParser->m_accounting.debugLevel < 1) {
+ return;
+ }
+
+ const float amplificationFactor
+ = accountingGetCurrentAmplification(rootParser);
+ fprintf(stderr,
+ "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
+ "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
+ (void *)rootParser, rootParser->m_accounting.countBytesDirect,
+ rootParser->m_accounting.countBytesIndirect,
+ (double)amplificationFactor, epilog);
+}
+
+static void
+accountingOnAbort(XML_Parser originParser) {
+ accountingReportStats(originParser, " ABORTING\n");
+}
+
+static void
+accountingReportDiff(XML_Parser rootParser,
+ unsigned int levelsAwayFromRootParser, const char *before,
+ const char *after, ptrdiff_t bytesMore, int source_line,
+ enum XML_Account account) {
+ assert(! rootParser->m_parentParser);
+
+ fprintf(stderr,
+ " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
+ bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
+ levelsAwayFromRootParser, source_line, 10, "");
+
+ const char ellipis[] = "[..]";
+ const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
+ const unsigned int contextLength = 10;
+
+ /* Note: Performance is of no concern here */
+ const char *walker = before;
+ if ((rootParser->m_accounting.debugLevel >= 3)
+ || (after - before)
+ <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
+ for (; walker < after; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ } else {
+ for (; walker < before + contextLength; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ fprintf(stderr, ellipis);
+ walker = after - contextLength;
+ for (; walker < after; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ }
+ fprintf(stderr, "\"\n");
+}
+
+static XML_Bool
+accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
+ const char *after, int source_line,
+ enum XML_Account account) {
+ /* Note: We need to check the token type *first* to be sure that
+ * we can even access variable <after>, safely.
+ * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
+ switch (tok) {
+ case XML_TOK_INVALID:
+ case XML_TOK_PARTIAL:
+ case XML_TOK_PARTIAL_CHAR:
+ case XML_TOK_NONE:
+ return XML_TRUE;
+ }
+
+ if (account == XML_ACCOUNT_NONE)
+ return XML_TRUE; /* because these bytes have been accounted for, already */
+
+ unsigned int levelsAwayFromRootParser;
+ const XML_Parser rootParser
+ = getRootParserOf(originParser, &levelsAwayFromRootParser);
+ assert(! rootParser->m_parentParser);
+
+ const int isDirect
+ = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
+ const ptrdiff_t bytesMore = after - before;
+
+ XmlBigCount *const additionTarget
+ = isDirect ? &rootParser->m_accounting.countBytesDirect
+ : &rootParser->m_accounting.countBytesIndirect;
+
+ /* Detect and avoid integer overflow */
+ if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
+ return XML_FALSE;
+ *additionTarget += bytesMore;
+
+ const XmlBigCount countBytesOutput
+ = rootParser->m_accounting.countBytesDirect
+ + rootParser->m_accounting.countBytesIndirect;
+ const float amplificationFactor
+ = accountingGetCurrentAmplification(rootParser);
+ const XML_Bool tolerated
+ = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
+ || (amplificationFactor
+ <= rootParser->m_accounting.maximumAmplificationFactor);
+
+ if (rootParser->m_accounting.debugLevel >= 2) {
+ accountingReportStats(rootParser, "");
+ accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
+ bytesMore, source_line, account);
+ }
+
+ return tolerated;
+}
+
+unsigned long long
+testingAccountingGetCountBytesDirect(XML_Parser parser) {
+ if (! parser)
+ return 0;
+ return parser->m_accounting.countBytesDirect;
+}
+
+unsigned long long
+testingAccountingGetCountBytesIndirect(XML_Parser parser) {
+ if (! parser)
+ return 0;
+ return parser->m_accounting.countBytesIndirect;
+}
+
+static void
+entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
+ const char *action, int sourceLine) {
+ assert(! rootParser->m_parentParser);
+ if (rootParser->m_entity_stats.debugLevel < 1)
+ return;
+
+# if defined(XML_UNICODE)
+ const char *const entityName = "[..]";
+# else
+ const char *const entityName = entity->name;
+# endif
+
+ fprintf(
+ stderr,
+ "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
+ (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
+ rootParser->m_entity_stats.currentDepth,
+ rootParser->m_entity_stats.maximumDepthSeen,
+ (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
+ entity->is_param ? "%" : "&", entityName, action, entity->textLen,
+ sourceLine);
+}
+
+static void
+entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ rootParser->m_entity_stats.countEverOpened++;
+ rootParser->m_entity_stats.currentDepth++;
+ if (rootParser->m_entity_stats.currentDepth
+ > rootParser->m_entity_stats.maximumDepthSeen) {
+ rootParser->m_entity_stats.maximumDepthSeen++;
+ }
+
+ entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
+}
+
+static void
+entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
+ rootParser->m_entity_stats.currentDepth--;
+}
+
+static XML_Parser
+getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
+ XML_Parser rootParser = parser;
+ unsigned int stepsTakenUpwards = 0;
+ while (rootParser->m_parentParser) {
+ rootParser = rootParser->m_parentParser;
+ stepsTakenUpwards++;
+ }
+ assert(! rootParser->m_parentParser);
+ if (outLevelDiff != NULL) {
+ *outLevelDiff = stepsTakenUpwards;
+ }
+ return rootParser;
+}
+
+const char *
+unsignedCharToPrintable(unsigned char c) {
+ switch (c) {
+ case 0:
+ return "\\0";
+ case 1:
+ return "\\x1";
+ case 2:
+ return "\\x2";
+ case 3:
+ return "\\x3";
+ case 4:
+ return "\\x4";
+ case 5:
+ return "\\x5";
+ case 6:
+ return "\\x6";
+ case 7:
+ return "\\x7";
+ case 8:
+ return "\\x8";
+ case 9:
+ return "\\t";
+ case 10:
+ return "\\n";
+ case 11:
+ return "\\xB";
+ case 12:
+ return "\\xC";
+ case 13:
+ return "\\r";
+ case 14:
+ return "\\xE";
+ case 15:
+ return "\\xF";
+ case 16:
+ return "\\x10";
+ case 17:
+ return "\\x11";
+ case 18:
+ return "\\x12";
+ case 19:
+ return "\\x13";
+ case 20:
+ return "\\x14";
+ case 21:
+ return "\\x15";
+ case 22:
+ return "\\x16";
+ case 23:
+ return "\\x17";
+ case 24:
+ return "\\x18";
+ case 25:
+ return "\\x19";
+ case 26:
+ return "\\x1A";
+ case 27:
+ return "\\x1B";
+ case 28:
+ return "\\x1C";
+ case 29:
+ return "\\x1D";
+ case 30:
+ return "\\x1E";
+ case 31:
+ return "\\x1F";
+ case 32:
+ return " ";
+ case 33:
+ return "!";
+ case 34:
+ return "\\\"";
+ case 35:
+ return "#";
+ case 36:
+ return "$";
+ case 37:
+ return "%";
+ case 38:
+ return "&";
+ case 39:
+ return "'";
+ case 40:
+ return "(";
+ case 41:
+ return ")";
+ case 42:
+ return "*";
+ case 43:
+ return "+";
+ case 44:
+ return ",";
+ case 45:
+ return "-";
+ case 46:
+ return ".";
+ case 47:
+ return "/";
+ case 48:
+ return "0";
+ case 49:
+ return "1";
+ case 50:
+ return "2";
+ case 51:
+ return "3";
+ case 52:
+ return "4";
+ case 53:
+ return "5";
+ case 54:
+ return "6";
+ case 55:
+ return "7";
+ case 56:
+ return "8";
+ case 57:
+ return "9";
+ case 58:
+ return ":";
+ case 59:
+ return ";";
+ case 60:
+ return "<";
+ case 61:
+ return "=";
+ case 62:
+ return ">";
+ case 63:
+ return "?";
+ case 64:
+ return "@";
+ case 65:
+ return "A";
+ case 66:
+ return "B";
+ case 67:
+ return "C";
+ case 68:
+ return "D";
+ case 69:
+ return "E";
+ case 70:
+ return "F";
+ case 71:
+ return "G";
+ case 72:
+ return "H";
+ case 73:
+ return "I";
+ case 74:
+ return "J";
+ case 75:
+ return "K";
+ case 76:
+ return "L";
+ case 77:
+ return "M";
+ case 78:
+ return "N";
+ case 79:
+ return "O";
+ case 80:
+ return "P";
+ case 81:
+ return "Q";
+ case 82:
+ return "R";
+ case 83:
+ return "S";
+ case 84:
+ return "T";
+ case 85:
+ return "U";
+ case 86:
+ return "V";
+ case 87:
+ return "W";
+ case 88:
+ return "X";
+ case 89:
+ return "Y";
+ case 90:
+ return "Z";
+ case 91:
+ return "[";
+ case 92:
+ return "\\\\";
+ case 93:
+ return "]";
+ case 94:
+ return "^";
+ case 95:
+ return "_";
+ case 96:
+ return "`";
+ case 97:
+ return "a";
+ case 98:
+ return "b";
+ case 99:
+ return "c";
+ case 100:
+ return "d";
+ case 101:
+ return "e";
+ case 102:
+ return "f";
+ case 103:
+ return "g";
+ case 104:
+ return "h";
+ case 105:
+ return "i";
+ case 106:
+ return "j";
+ case 107:
+ return "k";
+ case 108:
+ return "l";
+ case 109:
+ return "m";
+ case 110:
+ return "n";
+ case 111:
+ return "o";
+ case 112:
+ return "p";
+ case 113:
+ return "q";
+ case 114:
+ return "r";
+ case 115:
+ return "s";
+ case 116:
+ return "t";
+ case 117:
+ return "u";
+ case 118:
+ return "v";
+ case 119:
+ return "w";
+ case 120:
+ return "x";
+ case 121:
+ return "y";
+ case 122:
+ return "z";
+ case 123:
+ return "{";
+ case 124:
+ return "|";
+ case 125:
+ return "}";
+ case 126:
+ return "~";
+ case 127:
+ return "\\x7F";
+ case 128:
+ return "\\x80";
+ case 129:
+ return "\\x81";
+ case 130:
+ return "\\x82";
+ case 131:
+ return "\\x83";
+ case 132:
+ return "\\x84";
+ case 133:
+ return "\\x85";
+ case 134:
+ return "\\x86";
+ case 135:
+ return "\\x87";
+ case 136:
+ return "\\x88";
+ case 137:
+ return "\\x89";
+ case 138:
+ return "\\x8A";
+ case 139:
+ return "\\x8B";
+ case 140:
+ return "\\x8C";
+ case 141:
+ return "\\x8D";
+ case 142:
+ return "\\x8E";
+ case 143:
+ return "\\x8F";
+ case 144:
+ return "\\x90";
+ case 145:
+ return "\\x91";
+ case 146:
+ return "\\x92";
+ case 147:
+ return "\\x93";
+ case 148:
+ return "\\x94";
+ case 149:
+ return "\\x95";
+ case 150:
+ return "\\x96";
+ case 151:
+ return "\\x97";
+ case 152:
+ return "\\x98";
+ case 153:
+ return "\\x99";
+ case 154:
+ return "\\x9A";
+ case 155:
+ return "\\x9B";
+ case 156:
+ return "\\x9C";
+ case 157:
+ return "\\x9D";
+ case 158:
+ return "\\x9E";
+ case 159:
+ return "\\x9F";
+ case 160:
+ return "\\xA0";
+ case 161:
+ return "\\xA1";
+ case 162:
+ return "\\xA2";
+ case 163:
+ return "\\xA3";
+ case 164:
+ return "\\xA4";
+ case 165:
+ return "\\xA5";
+ case 166:
+ return "\\xA6";
+ case 167:
+ return "\\xA7";
+ case 168:
+ return "\\xA8";
+ case 169:
+ return "\\xA9";
+ case 170:
+ return "\\xAA";
+ case 171:
+ return "\\xAB";
+ case 172:
+ return "\\xAC";
+ case 173:
+ return "\\xAD";
+ case 174:
+ return "\\xAE";
+ case 175:
+ return "\\xAF";
+ case 176:
+ return "\\xB0";
+ case 177:
+ return "\\xB1";
+ case 178:
+ return "\\xB2";
+ case 179:
+ return "\\xB3";
+ case 180:
+ return "\\xB4";
+ case 181:
+ return "\\xB5";
+ case 182:
+ return "\\xB6";
+ case 183:
+ return "\\xB7";
+ case 184:
+ return "\\xB8";
+ case 185:
+ return "\\xB9";
+ case 186:
+ return "\\xBA";
+ case 187:
+ return "\\xBB";
+ case 188:
+ return "\\xBC";
+ case 189:
+ return "\\xBD";
+ case 190:
+ return "\\xBE";
+ case 191:
+ return "\\xBF";
+ case 192:
+ return "\\xC0";
+ case 193:
+ return "\\xC1";
+ case 194:
+ return "\\xC2";
+ case 195:
+ return "\\xC3";
+ case 196:
+ return "\\xC4";
+ case 197:
+ return "\\xC5";
+ case 198:
+ return "\\xC6";
+ case 199:
+ return "\\xC7";
+ case 200:
+ return "\\xC8";
+ case 201:
+ return "\\xC9";
+ case 202:
+ return "\\xCA";
+ case 203:
+ return "\\xCB";
+ case 204:
+ return "\\xCC";
+ case 205:
+ return "\\xCD";
+ case 206:
+ return "\\xCE";
+ case 207:
+ return "\\xCF";
+ case 208:
+ return "\\xD0";
+ case 209:
+ return "\\xD1";
+ case 210:
+ return "\\xD2";
+ case 211:
+ return "\\xD3";
+ case 212:
+ return "\\xD4";
+ case 213:
+ return "\\xD5";
+ case 214:
+ return "\\xD6";
+ case 215:
+ return "\\xD7";
+ case 216:
+ return "\\xD8";
+ case 217:
+ return "\\xD9";
+ case 218:
+ return "\\xDA";
+ case 219:
+ return "\\xDB";
+ case 220:
+ return "\\xDC";
+ case 221:
+ return "\\xDD";
+ case 222:
+ return "\\xDE";
+ case 223:
+ return "\\xDF";
+ case 224:
+ return "\\xE0";
+ case 225:
+ return "\\xE1";
+ case 226:
+ return "\\xE2";
+ case 227:
+ return "\\xE3";
+ case 228:
+ return "\\xE4";
+ case 229:
+ return "\\xE5";
+ case 230:
+ return "\\xE6";
+ case 231:
+ return "\\xE7";
+ case 232:
+ return "\\xE8";
+ case 233:
+ return "\\xE9";
+ case 234:
+ return "\\xEA";
+ case 235:
+ return "\\xEB";
+ case 236:
+ return "\\xEC";
+ case 237:
+ return "\\xED";
+ case 238:
+ return "\\xEE";
+ case 239:
+ return "\\xEF";
+ case 240:
+ return "\\xF0";
+ case 241:
+ return "\\xF1";
+ case 242:
+ return "\\xF2";
+ case 243:
+ return "\\xF3";
+ case 244:
+ return "\\xF4";
+ case 245:
+ return "\\xF5";
+ case 246:
+ return "\\xF6";
+ case 247:
+ return "\\xF7";
+ case 248:
+ return "\\xF8";
+ case 249:
+ return "\\xF9";
+ case 250:
+ return "\\xFA";
+ case 251:
+ return "\\xFB";
+ case 252:
+ return "\\xFC";
+ case 253:
+ return "\\xFD";
+ case 254:
+ return "\\xFE";
+ case 255:
+ return "\\xFF";
+ default:
+ assert(0); /* never gets here */
+ return "dead code";
+ }
+ assert(0); /* never gets here */
+}
+
+#endif /* XML_DTD */
+
+static unsigned long
+getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
+ const char *const valueOrNull = getenv(variableName);
+ if (valueOrNull == NULL) {
+ return defaultDebugLevel;
+ }
+ const char *const value = valueOrNull;
+
+ errno = 0;
+ char *afterValue = (char *)value;
+ unsigned long debugLevel = strtoul(value, &afterValue, 10);
+ if ((errno != 0) || (afterValue[0] != '\0')) {
+ errno = 0;
+ return defaultDebugLevel;
+ }
+
+ return debugLevel;
+}
diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c
index 4d3e3e86e9..08173b0fd5 100644
--- a/Modules/expat/xmlrole.c
+++ b/Modules/expat/xmlrole.c
@@ -7,7 +7,14 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
+ Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
+ Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -34,11 +41,9 @@
#ifdef _WIN32
# include "winconfig.h"
-#else
-# ifdef HAVE_EXPAT_CONFIG_H
-# include <expat_config.h>
-# endif
-#endif /* ndef _WIN32 */
+#endif
+
+#include <expat_config.h>
#include "expat_external.h"
#include "internal.h"
@@ -1220,6 +1225,8 @@ common(PROLOG_STATE *state, int tok) {
#ifdef XML_DTD
if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
return XML_ROLE_INNER_PARAM_ENTITY_REF;
+#else
+ UNUSED_P(tok);
#endif
state->handler = error;
return XML_ROLE_ERROR;
diff --git a/Modules/expat/xmlrole.h b/Modules/expat/xmlrole.h
index 036aba64fd..d6e1fa150a 100644
--- a/Modules/expat/xmlrole.h
+++ b/Modules/expat/xmlrole.h
@@ -7,7 +7,10 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c
index 54cfedb85c..f2b6b40606 100644
--- a/Modules/expat/xmltok.c
+++ b/Modules/expat/xmltok.c
@@ -7,7 +7,19 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
+ Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
+ Copyright (c) 2016 Don Lewis <truckman@apache.org>
+ Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
+ Copyright (c) 2017 Alexander Bluhm <alexander.bluhm@gmx.net>
+ Copyright (c) 2017 Benbuck Nason <bnason@netflix.com>
+ Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
+ Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -30,26 +42,16 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifdef _WIN32
-# include "winconfig.h"
-#else
-# ifdef HAVE_EXPAT_CONFIG_H
-# include <expat_config.h>
-# endif
-#endif /* ndef _WIN32 */
-
#include <stddef.h>
#include <string.h> /* memcpy */
+#include <stdbool.h>
-#if defined(_MSC_VER) && (_MSC_VER <= 1700)
-/* for vs2012/11.0/1700 and earlier Visual Studio compilers */
-# define bool int
-# define false 0
-# define true 1
-#else
-# include <stdbool.h>
+#ifdef _WIN32
+# include "winconfig.h"
#endif
+#include <expat_config.h>
+
#include "expat_external.h"
#include "internal.h"
#include "xmltok.h"
@@ -269,8 +271,14 @@ sb_byteToAscii(const ENCODING *enc, const char *p) {
#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
-#define IS_INVALID_CHAR(enc, p, n) \
- (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
+#ifdef XML_MIN_SIZE
+# define IS_INVALID_CHAR(enc, p, n) \
+ (AS_NORMAL_ENCODING(enc)->isInvalid##n \
+ && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
+#else
+# define IS_INVALID_CHAR(enc, p, n) \
+ (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
+#endif
#ifdef XML_MIN_SIZE
# define IS_NAME_CHAR_MINBPC(enc, p) \
@@ -589,13 +597,13 @@ static const struct normal_encoding ascii_encoding
static int PTRFASTCALL
unicode_byte_type(char hi, char lo) {
switch ((unsigned char)hi) {
- /* 0xD800–0xDBFF first 16-bit code unit or high surrogate (W1) */
+ /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
return BT_LEAD4;
- /* 0xDC00–0xDFFF second 16-bit code unit or low surrogate (W2) */
+ /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
case 0xDC:
case 0xDD:
case 0xDE:
diff --git a/Modules/expat/xmltok.h b/Modules/expat/xmltok.h
index 2adbf5307b..6f630c2f9b 100644
--- a/Modules/expat/xmltok.h
+++ b/Modules/expat/xmltok.h
@@ -7,7 +7,11 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2002-2005 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c
index c209221cd7..0430591b42 100644
--- a/Modules/expat/xmltok_impl.c
+++ b/Modules/expat/xmltok_impl.c
@@ -1,4 +1,4 @@
-/* This file is included!
+/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -7,7 +7,15 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
+ Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
+ Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
+ Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
+ Copyright (c) 2020 Boris Kolpackov <boris@codesynthesis.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -32,7 +40,7 @@
#ifdef XML_TOK_IMPL_C
-# ifndef IS_INVALID_CHAR
+# ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
# define IS_INVALID_CHAR(enc, ptr, n) (0)
# endif
@@ -1768,13 +1776,14 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
# define LEAD_CASE(n) \
case BT_LEAD##n: \
ptr += n; \
+ pos->columnNumber++; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_LF:
- pos->columnNumber = (XML_Size)-1;
+ pos->columnNumber = 0;
pos->lineNumber++;
ptr += MINBPC(enc);
break;
@@ -1783,13 +1792,13 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
- pos->columnNumber = (XML_Size)-1;
+ pos->columnNumber = 0;
break;
default:
ptr += MINBPC(enc);
+ pos->columnNumber++;
break;
}
- pos->columnNumber++;
}
}
diff --git a/Modules/expat/xmltok_impl.h b/Modules/expat/xmltok_impl.h
index e925dbc7e2..c518aada01 100644
--- a/Modules/expat/xmltok_impl.h
+++ b/Modules/expat/xmltok_impl.h
@@ -7,7 +7,8 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2017-2019 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/xmltok_ns.c b/Modules/expat/xmltok_ns.c
index e1b46a7c04..5fd8392235 100644
--- a/Modules/expat/xmltok_ns.c
+++ b/Modules/expat/xmltok_ns.c
@@ -7,7 +7,11 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
+ Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
+ Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
+ Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
+ Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -89,7 +93,7 @@ NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
static const ENCODING *
NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) {
# define ENCODING_MAX 128
- char buf[ENCODING_MAX] = {0};
+ char buf[ENCODING_MAX];
char *p = buf;
int i;
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);