diff options
-rw-r--r-- | configure.in | 5 | ||||
-rw-r--r-- | i18n/unix/Makefile.in | 45 | ||||
-rw-r--r-- | i18n/unix/xlate.c | 282 | ||||
-rw-r--r-- | include/apr.h.in | 1 | ||||
-rw-r--r-- | include/apr.hw | 1 | ||||
-rw-r--r-- | include/apr_xlate.h | 171 | ||||
-rw-r--r-- | test/ab_apr.c | 63 |
7 files changed, 560 insertions, 8 deletions
diff --git a/configure.in b/configure.in index 159072313..bf6b5af30 100644 --- a/configure.in +++ b/configure.in @@ -15,7 +15,7 @@ dnl # and which files are to be configured. # These added to allow default directories to be used... DEFAULT_OSDIR="unix" echo "(Default will be ${DEFAULT_OSDIR})" -MODULES="file_io network_io threadproc misc locks time mmap shmem dso" +MODULES="file_io network_io threadproc misc locks time mmap shmem dso i18n" dnl Process this file with autoconf to produce a configure script. AC_INIT(configure.in) @@ -124,11 +124,13 @@ AC_CHECK_FUNC(inet_addr, [ inet_addr="1" ], [ inet_addr="0" ]) AC_CHECK_FUNC(inet_network, [ inet_network="1" ], [ inet_network="0" ]) AC_CHECK_FUNC(_getch) AC_CHECK_FUNCS(gmtime_r localtime_r) +AC_CHECK_FUNCS(iconv, [ iconv="1" ], [ iconv="0" ]) AC_SUBST(sendfile) AC_SUBST(fork) AC_SUBST(inet_addr) AC_SUBST(inet_network) AC_SUBST(have_sigaction) +AC_SUBST(iconv) dnl #----------------------------- Checks for Any required Headers @@ -176,6 +178,7 @@ AC_CHECK_HEADERS(unix.h) AC_CHECK_HEADERS(arpa/inet.h) AC_CHECK_HEADERS(netinet/in.h, netinet_inh="1", netinet_inh="0") AC_CHECK_HEADERS(netinet/tcp.h) +AC_CHECK_HEADERS(iconv.h) AC_CHECK_HEADERS(sys/file.h) AC_CHECK_HEADERS(sys/ioctl.h) diff --git a/i18n/unix/Makefile.in b/i18n/unix/Makefile.in new file mode 100644 index 000000000..9762d996c --- /dev/null +++ b/i18n/unix/Makefile.in @@ -0,0 +1,45 @@ + +RM=@RM@ +CC=@CC@ +RANLIB=@RANLIB@ +CFLAGS=@CFLAGS@ @OPTIM@ +LIBS=@LIBS@ +LDFLAGS=@LDFLAGS@ $(LIBS) +INCDIR=../../include +INCLUDES=-I$(INCDIR) -I. + +OBJS=xlate.o + +.c.o: + $(CC) $(CFLAGS) -c $(INCLUDES) $< + +all: $(OBJS) + +clean: + $(RM) -f *.o *.a *.so + +distclean: clean + -$(RM) -f Makefile + +$(OBJS): Makefile + +# +# We really don't expect end users to use this rule. It works only with +# gcc, and rebuilds Makefile.tmpl. You have to re-run Configure after +# using it. +# +depend: + cp Makefile.in Makefile.in.bak \ + && sed -ne '1,/^# DO NOT REMOVE/p' Makefile.in > Makefile.new \ + && gcc -MM $(INCLUDES) $(CFLAGS) *.c >> Makefile.new \ + && sed -e '1,$$s: $(INCDIR)/: $$(INCDIR)/:g' \ + -e '1,$$s: $(OSDIR)/: $$(OSDIR)/:g' Makefile.new \ + > Makefile.in \ + && rm Makefile.new + +# DO NOT REMOVE +xlate.o: xlate.c $(INCDIR)/apr_config.h $(INCDIR)/apr_lib.h \ + $(INCDIR)/apr_general.h $(INCDIR)/apr.h \ + $(INCDIR)/apr_errno.h $(INCDIR)/apr_file_io.h \ + $(INCDIR)/apr_time.h $(INCDIR)/apr_thread_proc.h \ + $(INCDIR)/apr_xlate.h diff --git a/i18n/unix/xlate.c b/i18n/unix/xlate.c new file mode 100644 index 000000000..9b69d169f --- /dev/null +++ b/i18n/unix/xlate.c @@ -0,0 +1,282 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +#include "apr_config.h" + +#include "apr_lib.h" +#include "apr_xlate.h" + +/* If no implementation is available, don't generate code here since + * apr_xlate.h emitted macros which return APR_ENOTIMPL. + */ + +#if APR_HAS_XLATE + +#ifdef HAVE_ICONV_H +#include <iconv.h> +#endif + +#ifndef min +#define min(x,y) ((x) <= (y) ? (x) : (y)) +#endif + +struct ap_xlate_t { + ap_pool_t *pool; + char *frompage; + char *topage; + char *sbcs_table; +#ifdef HAVE_ICONV + iconv_t ich; +#endif +}; + +/* get_default_codepage() + * + * simple heuristic to determine codepage of source code so that + * literal strings (e.g., "GET /\r\n") in source code can be translated + * properly + * + * If appropriate, a symbol can be set at configure time to determine + * this. On EBCDIC platforms, it will be important how the code was + * unpacked. + */ + +static const char *get_default_codepage(void) +{ +#ifdef __MVS__ + #ifdef __CODESET__ + return __CODESET__; + #else + return "IBM-1047"; + #endif +#endif + + if ('}' == 0xD0) { + return "IBM-1047"; + } + + if ('{' == 0xFB) { + return "EDF04"; + } + + if ('A' == 0xC1) { + return "EBCDIC"; /* not useful */ + } + + if ('A' == 0x41) { + return "ISO8859-1"; /* not necessarily true */ + } + + return "unknown"; +} + +static ap_status_t ap_xlate_cleanup(void *convset) +{ +#ifdef HAVE_ICONV + ap_xlate_t *old = convset; + + if (old->ich != (iconv_t)-1) { + if (iconv_close(old->ich)) { + return errno; + } + } +#endif + return APR_SUCCESS; +} + +#ifdef HAVE_ICONV +static void check_sbcs(ap_xlate_t *convset) +{ + char inbuf[256], outbuf[256]; + char *inbufptr = inbuf, *outbufptr = outbuf; + size_t inbytes_left, outbytes_left; + int i; + size_t translated; + + for (i = 0; i < sizeof(inbuf); i++) { + inbuf[i] = i; + } + + inbytes_left = outbytes_left = sizeof(inbuf); + translated = iconv(convset->ich, (const char **)&inbufptr, + &inbytes_left, &outbufptr, &outbytes_left); + if (translated != (size_t) -1 && + inbytes_left == 0 && + outbytes_left == 0) { + /* hurray... this is simple translation; save the table, + * close the iconv descriptor + */ + + convset->sbcs_table = ap_palloc(convset->pool, sizeof(outbuf)); + memcpy(convset->sbcs_table, outbuf, sizeof(outbuf)); + iconv_close(convset->ich); + convset->ich = (iconv_t)-1; + + /* TODO: add the table to the cache */ + } +} +#endif /* HAVE_ICONV */ + +ap_status_t ap_xlate_open(ap_xlate_t **convset, const char *topage, + const char *frompage, ap_pool_t *pool) +{ + ap_status_t status; + ap_xlate_t *new; + int found = 0; + + *convset = NULL; + + if (!topage) { + topage = get_default_codepage(); + } + + if (!frompage) { + frompage = get_default_codepage(); + } + + new = (ap_xlate_t *)ap_palloc(pool, sizeof(ap_xlate_t)); + if (!new) { + return APR_ENOMEM; + } + + new->pool = pool; + new->topage = ap_pstrdup(pool, topage); + new->frompage = ap_pstrdup(pool, frompage); + if (!new->topage || !new->frompage) { + return APR_ENOMEM; + } + +#ifdef TODO + /* search cache of codepage pairs; we may be able to avoid the + * expensive iconv_open() + */ + + set found to non-zero if found in the cache +#endif + +#ifdef HAVE_ICONV + if (!found) { + new->ich = iconv_open(topage, frompage); + if (new->ich == (iconv_t)-1) { + return errno; + } + found = 1; + check_sbcs(new); + } +#endif /* HAVE_ICONV */ + + if (found) { + *convset = new; + ap_register_cleanup(pool, (void *)new, ap_xlate_cleanup, + ap_null_cleanup); + status = APR_SUCCESS; + } + else { + status = EINVAL; /* same as what iconv() would return if it + couldn't handle the pair */ + } + + return status; +} + +ap_status_t ap_xlate_conv_buffer(ap_xlate_t *convset, const char *inbuf, + ap_size_t *inbytes_left, char *outbuf, + ap_size_t *outbytes_left) +{ + ap_status_t status = APR_SUCCESS; +#ifdef HAVE_ICONV + size_t translated; + + if (convset->ich != (iconv_t)-1) { + char *inbufptr = (char *)inbuf; + char *outbufptr = outbuf; + + translated = iconv(convset->ich, (const char **)&inbufptr, + inbytes_left, &outbufptr, outbytes_left); + if (translated == (size_t)-1) { + return errno; + } + } + else +#endif + { + int to_convert = min(*inbytes_left, *outbytes_left); + int converted = to_convert; + char *table = convset->sbcs_table; + + while (to_convert) { + *outbuf = table[(unsigned char)*inbuf]; + ++outbuf; + ++inbuf; + --to_convert; + } + *inbytes_left -= converted; + *outbytes_left -= converted; + } + + return status; +} + +ap_status_t ap_xlate_close(ap_xlate_t *convset) +{ + ap_status_t status; + + if ((status = ap_xlate_cleanup(convset)) == APR_SUCCESS) { + ap_kill_cleanup(convset->pool, convset, ap_xlate_cleanup); + } + + return status; +} + +#endif /* APR_HAS_XLATE */ diff --git a/include/apr.h.in b/include/apr.h.in index 23753b3a7..ce4e59775 100644 --- a/include/apr.h.in +++ b/include/apr.h.in @@ -62,6 +62,7 @@ #define APR_HAS_MMAP @mmap@ #define APR_HAS_FORK @fork@ #define APR_HAS_RANDOM @rand@ +#define APR_HAS_XLATE @iconv@ /* Typedefs that APR needs. */ diff --git a/include/apr.hw b/include/apr.hw index aa0d3a476..031bdc4c8 100644 --- a/include/apr.hw +++ b/include/apr.hw @@ -111,6 +111,7 @@ #define APR_HAS_SENDFILE 1 #define APR_HAS_MMAP 0 #define APR_HAS_RANDOM 1 +#define APR_HAS_XLATE 0 /* Typedefs that APR needs. */ diff --git a/include/apr_xlate.h b/include/apr_xlate.h new file mode 100644 index 000000000..39c891da7 --- /dev/null +++ b/include/apr_xlate.h @@ -0,0 +1,171 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +#ifndef APR_XLATE_H +#define APR_XLATE_H + +#include "apr.h" +#include "apr_general.h" +#include "apr_time.h" +#include "apr_errno.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* APR_HAS_XLATE determines whether or not useful implementations of + * ap_xlate_open() et al are provided. + * + * If APR_HAS_XLATE is zero, ap_xlate_open() et al will all return + * APR_ENOTIMPL at run-time. + */ + +#if ! APR_HAS_XLATE + +typedef void ap_xlate_t; + +/* For platforms where we don't bother with translating between charsets, + * these are macros which always return failure. + */ + +#define ap_xlate_open(convset, topage, frompage, pool) APR_ENOTIMPL + +#define ap_xlate_conv_buffer(convset, inbuf, inbytes_left, outbuf, \ + outbytes_left) APR_ENOTIMPL + +/* The purpose of ap_xlate_conv_char is to translate one character + * at a time. This needs to be written carefully so that it works + * with double-byte character sets. + */ +#define ap_xlate_conv_char(convset, inchar, outchar) APR_ENOTIMPL + +#define ap_xlate_close(convset) APR_ENOTIMPL + +#else /* ! APR_HAS_XLATE */ + +typedef struct ap_xlate_t ap_xlate_t; + +/* + +=head1 ap_status_t ap_xlate_open(ap_xlate_t **convset, const char *topage, const char *frompage, ap_pool_t *pool) + +B<Set up for converting text from one charset to another.> + + arg 1) The handle to be filled in by this function + arg 2) The name of the target charset + arg 3) The name of the source charset + arg 4) The pool to use + +B<NOTE>: Specify APR_DEFAULT_CHARSET for one of the charset + names to indicate the charset of the source code at + compile time. This is useful if there are literal + strings in the source code which must be translated + according to the charset of the source code. + APR_DEFAULT_CHARSET is not useful if the source code + of the caller was not encoded in the same charset as + APR at compile time. + +=cut + */ +ap_status_t ap_xlate_open(ap_xlate_t **convset, const char *topage, + const char *frompage, ap_pool_t *pool); + +#define APR_DEFAULT_CHARSET NULL + +/* + +=head1 ap_status_t ap_xlate_conv_buffer(ap_xlate_t *convset, const char *inbuf, ap_size_t *inbytes_left, char *outbuf, ap_size_t outbytes_left) + +B<Convert a buffer of text from one codepage to another.> + + arg 1) The handle allocated by ap_xlate_open, specifying the parameters + of conversion + arg 2) The address of the source buffer + arg 3) Input: the amount of input data to be translated + Output: the amount of input data not yet translated + arg 4) The address of the destination buffer + arg 5) Input: the size of the output buffer + Output: the amount of the output buffer not yet used + +=cut + */ +ap_status_t ap_xlate_conv_buffer(ap_xlate_t *convset, const char *inbuf, + ap_size_t *inbytes_left, char *outbuf, + ap_size_t *outbytes_left); + +/* The purpose of ap_xlate_conv_char is to translate one character + * at a time. This needs to be written carefully so that it works + * with double-byte character sets. + */ +ap_status_t ap_xlate_conv_char(ap_xlate_t *convset, char inchar, char outchar); + +/* + +=head1 ap_status_t ap_xlate_close(ap_xlate_t *convset) + +B<Close a codepage translation handle.> + + arg 1) The codepage translation handle to close + +=cut + */ +ap_status_t ap_xlate_close(ap_xlate_t *convset); + +#endif /* ! APR_HAS_XLATE */ + +#ifdef __cplusplus +} +#endif + +#endif /* ! APR_XLATE_H */ diff --git a/test/ab_apr.c b/test/ab_apr.c index 3544d30ee..2379a0006 100644 --- a/test/ab_apr.c +++ b/test/ab_apr.c @@ -97,6 +97,14 @@ /* -------------------------------------------------------------------- */ +#if 'A' != 0x41 +/* Hmmm... This source code isn't being compiled in ASCII. + * In order for data that flows over the network to make + * sense, we need to translate to/from ASCII. + */ +#define NOT_ASCII +#endif + /* affects include files on Solaris */ #define BSD_COMP @@ -104,6 +112,9 @@ #include "apr_file_io.h" #include "apr_time.h" #include "apr_getopt.h" +#ifdef NOT_ASCII +#include "apr_xlate.h" +#endif #include <string.h> #include <stdio.h> #include <stdlib.h> @@ -193,6 +204,9 @@ struct data *stats; /* date for each request */ ap_pool_t *cntxt; ap_pollfd_t *readbits; +#ifdef NOT_ASCII +ap_xlate_t *fromascii, *toascii; +#endif /* --------------------------------------------------------- */ @@ -538,11 +552,19 @@ static void read_connection(struct connection *c) int l = 4; int space = CBUFFSIZE - c->cbx - 1; /* -1 to allow for 0 terminator */ int tocopy = (space < r) ? space : r; -#ifndef CHARSET_EBCDIC +#ifdef NOT_ASCII + ap_size_t inbytes_left = space, outbytes_left = space; + + status = ap_xlate_conv_buffer(fromascii, buffer, &inbytes_left, + c->cbuff + c->cbx, &outbytes_left); + if (status || inbytes_left || outbytes_left) { + fprintf(stderr, "only simple translation is supported (%d/%u/%u)\n", + status, inbytes_left, outbytes_left); + exit(1); + } +#else memcpy(c->cbuff + c->cbx, buffer, space); -#else /*CHARSET_EBCDIC */ - ascii2ebcdic(c->cbuff + c->cbx, buffer, space); -#endif /*CHARSET_EBCDIC */ +#endif /*NOT_ASCII */ c->cbx += tocopy; space -= tocopy; c->cbuff[c->cbx] = 0; /* terminate for benefit of strstr */ @@ -671,6 +693,10 @@ static void test(void) ap_interval_time_t timeout; ap_int16_t rv; int i; +#ifdef NOT_ASCII + ap_status_t status; + ap_size_t inbytes_left, outbytes_left; +#endif if (!use_html) { printf("Benchmarking %s (be patient)...", hostname); @@ -719,9 +745,16 @@ static void test(void) reqlen = strlen(request); -#ifdef CHARSET_EBCDIC - ebcdic2ascii(request, request, reqlen); -#endif /*CHARSET_EBCDIC */ +#ifdef NOT_ASCII + inbytes_left = outbytes_left = reqlen; + status = ap_xlate_conv_buffer(toascii, request, &inbytes_left, + request, &outbytes_left); + if (status || inbytes_left || outbytes_left) { + fprintf(stderr, "only simple translation is supported (%d/%u/%u)\n", + status, inbytes_left, outbytes_left); + exit(1); + } +#endif /*NOT_ASCII */ /* ok - lets start */ start = ap_now(); @@ -886,6 +919,9 @@ static int open_postfile(char *pfile) int main(int argc, char **argv) { int c, r; +#ifdef NOT_ASCII + ap_status_t status; +#endif /* ap_table_t defaults */ tablestring = ""; @@ -896,6 +932,19 @@ int main(int argc, char **argv) atexit(ap_terminate); ap_create_pool(&cntxt, NULL); +#ifdef NOT_ASCII + status = ap_xlate_open(&toascii, "ISO8859-1", APR_DEFAULT_CHARSET, cntxt); + if (status) { + fprintf(stderr, "ap_xlate_open(to ASCII)->%d\n", status); + exit(1); + } + status = ap_xlate_open(&fromascii, APR_DEFAULT_CHARSET, "ISO8859-1", cntxt); + if (status) { + fprintf(stderr, "ap_xlate_open(from ASCII)->%d\n", status); + exit(1); + } +#endif + ap_optind = 1; while (ap_getopt(argc, argv, "n:c:t:T:p:v:kVhwx:y:z:", &c, cntxt) == APR_SUCCESS) { switch (c) { |