summaryrefslogtreecommitdiff
path: root/fuzz/html.c
blob: a2bd97a383493fd8b29ff025e7567cdf509a7e7e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*
 * html.c: a libFuzzer target to test several HTML parser interfaces.
 *
 * See Copyright for the status of this software.
 */

#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/catalog.h>
#include "fuzz.h"

int
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
                     char ***argv ATTRIBUTE_UNUSED) {
    xmlFuzzMemSetup();
    xmlInitParser();
#ifdef LIBXML_CATALOG_ENABLED
    xmlInitializeCatalog();
#endif
    xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);

    return 0;
}

int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
    static const size_t maxChunkSize = 128;
    htmlDocPtr doc;
    htmlParserCtxtPtr ctxt;
    xmlOutputBufferPtr out;
    const char *docBuffer;
    size_t maxAlloc, docSize, consumed, chunkSize;
    int opts;

    xmlFuzzDataInit(data, size);
    opts = (int) xmlFuzzReadInt(4);
    maxAlloc = xmlFuzzReadInt(4) % (size + 1);

    docBuffer = xmlFuzzReadRemaining(&docSize);
    if (docBuffer == NULL) {
        xmlFuzzDataCleanup();
        return(0);
    }

    /* Pull parser */

    xmlFuzzMemSetLimit(maxAlloc);
    doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);

    /*
     * Also test the serializer. Call htmlDocContentDumpOutput with our
     * own buffer to avoid encoding the output. The HTML encoding is
     * excruciatingly slow (see htmlEntityValueLookup).
     */
    out = xmlAllocOutputBuffer(NULL);
    htmlDocContentDumpOutput(out, doc, NULL);
    xmlOutputBufferClose(out);

    xmlFreeDoc(doc);

    /* Push parser */

    xmlFuzzMemSetLimit(maxAlloc);
    ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
                                    XML_CHAR_ENCODING_NONE);

    if (ctxt != NULL) {
        htmlCtxtUseOptions(ctxt, opts);

        for (consumed = 0; consumed < docSize; consumed += chunkSize) {
            chunkSize = docSize - consumed;
            if (chunkSize > maxChunkSize)
                chunkSize = maxChunkSize;
            htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
        }

        htmlParseChunk(ctxt, NULL, 0, 1);
        xmlFreeDoc(ctxt->myDoc);
        htmlFreeParserCtxt(ctxt);
    }

    /* Cleanup */

    xmlFuzzMemSetLimit(0);
    xmlFuzzDataCleanup();
    xmlResetLastError();

    return(0);
}