Introduce the subset of svn_cstring_ functions into APR as the apr_cstr_

family of functions. Requires the introduction of APR_ERANGE. Solves apr_cstr_casecmp[n] for ASCII and EBCDIC and borrows from the work of jim, ylavic and wrowe, see r1715401 forwards in; http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util.c?view=log&pathrev=1722194 More performance review is needed against different compilers, so went ahead and borrowed original svn implementation as a basis, and we can port whichever performance enhancements test out more efficiently. Proposing for APR 1.6.0 which is the thought behind the initial @since tags. These must be changed if not accepted for backport. Note that the svn_cstring_join function could not be implemented as-is due to the absence of svn's counted string functionality. It deserves an implementation if not already present, or should be removed before tagging or backporting. No whitespace/formatting cleanup on this pass in order to preserve as much of the svn attributions as possible. This can happen in a later pass to conform to APR's style conventions. While renaming the functions, declarations themsel git-svn-id: http://svn.apache.org/repos/asf/apr/apr/trunk@1726928 13f79535-47bb-0310-9956-ffa450edef68
author: wrowe <wrowe@13f79535-47bb-0310-9956-ffa450edef68> 2016-01-27 00:08:59 +0000
committer: wrowe <wrowe@13f79535-47bb-0310-9956-ffa450edef68> 2016-01-27 00:08:59 +0000
commit: ec963a3b9647f20e6655bf4a9a3714117aee76e3 (patch)
tree: bf41a078c0249d7cd6b867326b1fd75e4af54257 /include/apr_cstr.h
parent: 03fc502eb7e015477e466e29a6d8ca5119906724 (diff)
download: libapr-ec963a3b9647f20e6655bf4a9a3714117aee76e3.tar.gz
1 files changed, 282 insertions, 0 deletions
diff --git a/include/apr_cstr.h b/include/apr_cstr.h
new file mode 100644
index 000000000..3a7506f35
--- /dev/null
+++ b/include/apr_cstr.h
@@ -0,0 +1,282 @@
+/* ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one
+ *    or more contributor license agreements.  See the NOTICE file
+ *    distributed with this work for additional information
+ *    regarding copyright ownership.  The ASF licenses this file
+ *    to you under the Apache License, Version 2.0 (the
+ *    "License"); you may not use this file except in compliance
+ *    with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing,
+ *    software distributed under the License is distributed on an
+ *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *    KIND, either express or implied.  See the License for the
+ *    specific language governing permissions and limitations
+ *    under the License.
+ * ====================================================================
+ */
+
+/**
+ * @file apr_cstr.h
+ * @brief C string goodies.
+ */
+
+#ifndef APR_CSTR_H
+#define APR_CSTR_H
+
+#include <apr.h>          /* for apr_size_t */
+#include <apr_pools.h>    /* for apr_pool_t */
+#include <apr_tables.h>   /* for apr_array_header_t */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @defgroup apr_cstr C (POSIX) locale string functions
+ * @ingroup apr_strings
+ *
+ * The apr_cstr_* functions provide traditional C char * string text handling,
+ * and notabilty they treat all text in the C (a.k.a. POSIX) locale using the
+ * minimal POSIX character set, represented in either ASCII or a corresponding
+ * EBCDIC subset.
+ *
+ * Character values outside of that set are treated as opaque bytes, and all
+ * multi-byte character sequences are handled as individual distinct octets.
+ *
+ * Multi-byte characters sequences whose octets fall in the ASCII range cause
+ * unexpected results, such as in the ISO-2022-JP code page where ASCII octets
+ * occur within both shift-state and multibyte sequences.
+ *
+ * In the case of the UTF-8 encoding, all multibyte characters all fall outside
+ * of the C/POSIX range of characters, so these functions are generally safe
+ * to use on UTF-8 strings. The programmer must be aware that each octet may
+ * not represent a distinct printable character in such encodings.
+ *
+ * The standard C99/POSIX string functions, rather than apr_cstr, should be
+ * used in all cases where the current locale and encoding of the text is
+ * significant.
+ * @{
+ */
+
+
+/** Divide @a input into substrings, interpreting any char from @a sep
+ * as a token separator.
+ *
+ * Return an array of copies of those substrings (plain const char*),
+ * allocating both the array and the copies in @a pool.
+ *
+ * None of the elements added to the array contain any of the
+ * characters in @a sep_chars, and none of the new elements are empty
+ * (thus, it is possible that the returned array will have length
+ * zero).
+ *
+ * If @a chop_whitespace is TRUE, then remove leading and trailing
+ * whitespace from the returned strings.
+ *
+ * @since New in 1.6
+ */
+apr_array_header_t * apr_cstr_split(const char *input,
+                                    const char *sep_chars,
+                                    int chop_whitespace,
+                                    apr_pool_t *pool);
+
+/** Like apr_cstr_split(), but append to existing @a array instead of
+ * creating a new one.  Allocate the copied substrings in @a pool
+ * (i.e., caller decides whether or not to pass @a array->pool as @a pool).
+ *
+ * @since New in 1.6
+ */
+void apr_cstr_split_append(apr_array_header_t *array,
+                           const char *input,
+                           const char *sep_chars,
+                           int chop_whitespace,
+                           apr_pool_t *pool);
+
+
+/** Return @c TRUE iff @a str matches any of the elements of @a list, a list
+ * of zero or more glob patterns.
+ *
+ * @since New in 1.6
+ */
+int apr_cstr_match_glob_list(const char *str, const apr_array_header_t *list);
+
+/** Return @c TRUE iff @a str exactly matches any of the elements of @a list.
+ *
+ * @since New in 1.6
+ */
+int apr_cstr_match_list(const char *str, const apr_array_header_t *list);
+
+/**
+ * Get the next token from @a *str interpreting any char from @a sep as a
+ * token separator.  Separators at the beginning of @a str will be skipped.
+ * Returns a pointer to the beginning of the first token in @a *str or NULL
+ * if no token is left.  Modifies @a str such that the next call will return
+ * the next token.
+ *
+ * @note The content of @a *str may be modified by this function.
+ *
+ * @since New in 1.6.
+ */
+char * apr_cstr_tokenize(const char *sep, char **str);
+
+/**
+ * Return the number of line breaks in @a msg, allowing any kind of newline
+ * termination (CR, LF, CRLF, or LFCR), even inconsistent.
+ *
+ * @since New in 1.6.
+ */
+int apr_cstr_count_newlines(const char *msg);
+
+#if 0 /* XXX: stringbuf logic is not present in APR */
+/**
+ * Return a cstring which is the concatenation of @a strings (an array
+ * of char *) each followed by @a separator (that is, @a separator
+ * will also end the resulting string).  Allocate the result in @a pool.
+ * If @a strings is empty, then return the empty string.
+ *
+ * @since New in 1.6.
+ */
+char * apr_cstr_join(const apr_array_header_t *strings,
+                     const char *separator,
+                     apr_pool_t *pool);
+#endif
+
+/**
+ * Compare two strings @a atr1 and @a atr2, treating case-equivalent
+ * unaccented Latin (C/POSIX ASCII subset) alpha characters as equal.
+ *
+ * Returns in integer greater than, equal to, or less than 0,
+ * according to whether @a str1 is considered greater than, equal to,
+ * or less than @a str2.
+ *
+ * @since New in 1.6.
+ */
+int apr_cstr_casecmp(const char *str1, const char *str2);
+
+/**
+ * Compare two strings @a atr1 and @a atr2 of @n (or shorter) length,
+ * treating case-equivalent unaccented Latin (C/POSIX ASCII subset)
+ * alpha characters as equal.
+ *
+ * Returns in integer greater than, equal to, or less than 0,
+ * according to whether @a str1 is considered greater than, equal to,
+ * or less than @a str2.
+ *
+ * @since New in 1.6.
+ */
+int apr_cstr_casecmpn(const char *str1, const char *str2, apr_size_t n);
+
+/**
+ * Parse the C string @a str into a 64 bit number, and return it in @a *n.
+ * Assume that the number is represented in base @a base.
+ * Raise an error if conversion fails (e.g. due to overflow), or if the
+ * converted number is smaller than @a minval or larger than @a maxval.
+ *
+ * Leading whitespace in @a str is skipped in a locale-dependent way.
+ * After that, the string may contain an optional '+' (positive, default)
+ * or '-' (negative) character, followed by an optional '0x' prefix if
+ * @a base is 0 or 16, followed by numeric digits appropriate for the base.
+ * If there are any more characters after the numeric digits, an error is
+ * returned.
+ *
+ * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal,
+ * else a leading '0' means octal (implemented, though not documented, in
+ * apr_strtoi64() in APR 0.9.0 through 1.5.0), else use base ten.
+ *
+ * @since New in 1.6.
+ */
+apr_status_t apr_cstr_strtoi64(apr_int64_t *n, const char *str,
+                               apr_int64_t minval, apr_int64_t maxval,
+                               int base);
+
+/**
+ * Parse the C string @a str into a 64 bit number, and return it in @a *n.
+ * Assume that the number is represented in base 10.
+ * Raise an error if conversion fails (e.g. due to overflow).
+ *
+ * The behaviour otherwise is as described for apr_cstr_strtoi64().
+ *
+ * @since New in 1.6.
+ */
+apr_status_t apr_cstr_atoi64(apr_int64_t *n, const char *str);
+
+/**
+ * Parse the C string @a str into a 32 bit number, and return it in @a *n.
+ * Assume that the number is represented in base 10.
+ * Raise an error if conversion fails (e.g. due to overflow).
+ *
+ * The behaviour otherwise is as described for apr_cstr_strtoi64().
+ *
+ * @since New in 1.6.
+ */
+apr_status_t apr_cstr_atoi(int *n, const char *str);
+
+/**
+ * Parse the C string @a str into an unsigned 64 bit number, and return
+ * it in @a *n. Assume that the number is represented in base @a base.
+ * Raise an error if conversion fails (e.g. due to overflow), or if the
+ * converted number is smaller than @a minval or larger than @a maxval.
+ *
+ * Leading whitespace in @a str is skipped in a locale-dependent way.
+ * After that, the string may contain an optional '+' (positive, default)
+ * or '-' (negative) character, followed by an optional '0x' prefix if
+ * @a base is 0 or 16, followed by numeric digits appropriate for the base.
+ * If there are any more characters after the numeric digits, an error is
+ * returned.
+ *
+ * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal,
+ * else a leading '0' means octal (as implemented, though not documented, in
+ * apr_strtoi64(), else use base ten.
+ *
+ * @warning The implementation returns APR_ERANGE if the parsed number
+ * is greater than APR_INT64_MAX, even if it is not greater than @a maxval.
+ *
+ * @since New in 1.6.
+ */
+apr_status_t apr_cstr_strtoui64(apr_uint64_t *n, const char *str,
+                                apr_uint64_t minval, apr_uint64_t maxval,
+                                int base);
+
+/**
+ * Parse the C string @a str into an unsigned 64 bit number, and return
+ * it in @a *n. Assume that the number is represented in base 10.
+ * Raise an error if conversion fails (e.g. due to overflow).
+ *
+ * The behaviour otherwise is as described for apr_cstr_strtoui64(),
+ * including the upper limit of APR_INT64_MAX.
+ *
+ * @since New in 1.6.
+ */
+apr_status_t apr_cstr_atoui64(apr_uint64_t *n, const char *str);
+
+/**
+ * Parse the C string @a str into an unsigned 32 bit number, and return
+ * it in @a *n. Assume that the number is represented in base 10.
+ * Raise an error if conversion fails (e.g. due to overflow).
+ *
+ * The behaviour otherwise is as described for apr_cstr_strtoui64(),
+ * including the upper limit of APR_INT64_MAX.
+ *
+ * @since New in 1.6.
+ */
+apr_status_t apr_cstr_atoui(unsigned int *n, const char *str);
+
+/**
+ * Skip the common prefix @a prefix from the C string @a str, and return
+ * a pointer to the next character after the prefix.
+ * Return @c NULL if @a str does not start with @a prefix.
+ *
+ * @since New in 1.6.
+ */
+const char * apr_cstr_skip_prefix(const char *str, const char *prefix);
+
+/** @} */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif  /* SVN_STRING_H */
author	wrowe <wrowe@13f79535-47bb-0310-9956-ffa450edef68>	2016-01-27 00:08:59 +0000
committer	wrowe <wrowe@13f79535-47bb-0310-9956-ffa450edef68>	2016-01-27 00:08:59 +0000
commit	ec963a3b9647f20e6655bf4a9a3714117aee76e3 (patch)
tree	bf41a078c0249d7cd6b867326b1fd75e4af54257 /include/apr_cstr.h
parent	03fc502eb7e015477e466e29a6d8ca5119906724 (diff)
download	libapr-ec963a3b9647f20e6655bf4a9a3714117aee76e3.tar.gz