summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorYann Ylavic <ylavic@apache.org>2018-01-13 15:57:11 +0000
committerYann Ylavic <ylavic@apache.org>2018-01-13 15:57:11 +0000
commitbf9f2a71aebfdc54d77f53c381da58af066bdf3e (patch)
tree9fe785d4f30f51442ee3e6809aa4965f0c217aa9 /modules
parent8e3695a7048cf83ccef314998cb9c1dde363bc81 (diff)
downloadhttpd-bf9f2a71aebfdc54d77f53c381da58af066bdf3e.tar.gz
Merge r1599012, r1601559, r1818624, r1819969, r1819970 from trunk:
mod_proxy_html: skip documents < 4 bytes PR 56286 Micha Lenk follow up r1599012: C99 fix Fix some style mod_proxy_html: follow up to r1599012. To determine whether or not HTML data are lower than 4 bytes, use a retain buffer rather than assuming that all should be contained in a single bucket with the next one being EOS (if any). mod_proxy_html: don't depend on NUL terminated bucket data. ap_regexec() wants NUL terminated strings, so use ap_regexec_len() instead. Submitted by: niq, takashi, jailletc36, ylavic, ylavic Reviewed by: jim, ylavic, icing git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1821073 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'modules')
-rw-r--r--modules/filters/mod_proxy_html.c72
1 files changed, 60 insertions, 12 deletions
diff --git a/modules/filters/mod_proxy_html.c b/modules/filters/mod_proxy_html.c
index 1ea005bf28..78ebdab34e 100644
--- a/modules/filters/mod_proxy_html.c
+++ b/modules/filters/mod_proxy_html.c
@@ -108,6 +108,9 @@ typedef struct {
size_t avail;
const char *encoding;
urlmap *map;
+ char rbuf[4];
+ apr_size_t rlen;
+ apr_size_t rmin;
} saxctxt;
@@ -638,7 +641,7 @@ static void pstartElement(void *ctxt, const xmlChar *uname,
}
}
-static meta *metafix(request_rec *r, const char *buf)
+static meta *metafix(request_rec *r, const char *buf, apr_size_t len)
{
meta *ret = NULL;
size_t offs = 0;
@@ -649,7 +652,8 @@ static meta *metafix(request_rec *r, const char *buf)
ap_regmatch_t pmatch[2];
char delim;
- while (!ap_regexec(seek_meta, buf+offs, 2, pmatch, 0)) {
+ while (offs < len &&
+ !ap_regexec_len(seek_meta, buf + offs, len - offs, 2, pmatch, 0)) {
header = NULL;
content = NULL;
p = buf+offs+pmatch[1].rm_eo;
@@ -844,6 +848,17 @@ static saxctxt *check_filter_init (ap_filter_t *f)
return f->ctx;
}
+static void prepend_rbuf(saxctxt *ctxt, apr_bucket_brigade *bb)
+{
+ if (ctxt->rlen) {
+ apr_bucket *b = apr_bucket_transient_create(ctxt->rbuf,
+ ctxt->rlen,
+ bb->bucket_alloc);
+ APR_BRIGADE_INSERT_HEAD(bb, b);
+ ctxt->rlen = 0;
+ }
+}
+
static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb)
{
apr_bucket* b;
@@ -865,11 +880,15 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb)
if (APR_BUCKET_IS_METADATA(b)) {
if (APR_BUCKET_IS_EOS(b)) {
if (ctxt->parser != NULL) {
- consume_buffer(ctxt, buf, 0, 1);
+ consume_buffer(ctxt, "", 0, 1);
+ }
+ else {
+ prepend_rbuf(ctxt, ctxt->bb);
}
APR_BRIGADE_INSERT_TAIL(ctxt->bb,
- apr_bucket_eos_create(ctxt->bb->bucket_alloc));
+ apr_bucket_eos_create(ctxt->bb->bucket_alloc));
ap_pass_brigade(ctxt->f->next, ctxt->bb);
+ apr_brigade_cleanup(ctxt->bb);
}
else if (APR_BUCKET_IS_FLUSH(b)) {
/* pass on flush, except at start where it would cause
@@ -884,11 +903,30 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb)
== APR_SUCCESS) {
if (ctxt->parser == NULL) {
const char *cenc;
+
+ /* For documents smaller than four bytes, there is no reason to do
+ * HTML rewriting. The URL schema (i.e. 'http') needs four bytes alone.
+ * And the HTML parser needs at least four bytes to initialise correctly.
+ */
+ ctxt->rmin += bytes;
+ if (ctxt->rmin < sizeof(ctxt->rbuf)) {
+ memcpy(ctxt->rbuf + ctxt->rlen, buf, bytes);
+ ctxt->rlen += bytes;
+ continue;
+ }
+ if (ctxt->rlen && ctxt->rlen < sizeof(ctxt->rbuf)) {
+ apr_size_t rem = sizeof(ctxt->rbuf) - ctxt->rlen;
+ memcpy(ctxt->rbuf + ctxt->rlen, buf, rem);
+ ctxt->rlen += rem;
+ buf += rem;
+ bytes -= rem;
+ }
+
if (!xml2enc_charset ||
(xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) {
if (!xml2enc_charset)
ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01422)
- "No i18n support found. Install mod_xml2enc if required");
+ "No i18n support found. Install mod_xml2enc if required");
enc = XML_CHAR_ENCODING_NONE;
ap_set_content_type(f->r, "text/html;charset=utf-8");
}
@@ -910,15 +948,25 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb)
}
ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype);
- ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf,
- 4, 0, enc);
- buf += 4;
- bytes -= 4;
+
+ if (ctxt->rlen) {
+ ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt,
+ ctxt->rbuf,
+ ctxt->rlen,
+ NULL, enc);
+ }
+ else {
+ ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4,
+ NULL, enc);
+ buf += 4;
+ bytes -= 4;
+ }
if (ctxt->parser == NULL) {
- apr_status_t rv = ap_pass_brigade(f->next, bb);
+ prepend_rbuf(ctxt, bb);
ap_remove_output_filter(f);
- return rv;
+ return ap_pass_brigade(f->next, bb);
}
+ ctxt->rlen = 0;
apr_pool_cleanup_register(f->r->pool, ctxt->parser,
(int(*)(void*))htmlFreeParserCtxt,
apr_pool_cleanup_null);
@@ -928,7 +976,7 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb)
"Unsupported parser opts %x", xmlopts);
#endif
if (ctxt->cfg->metafix)
- m = metafix(f->r, buf);
+ m = metafix(f->r, buf, bytes);
if (m) {
consume_buffer(ctxt, buf, m->start, 0);
consume_buffer(ctxt, buf+m->end, bytes-m->end, 0);