diff options
| author | Pierre Quentel <pierre.quentel@gmail.com> | 2017-05-08 14:08:34 +0200 | 
|---|---|---|
| committer | Senthil Kumaran <skumaran@gatech.edu> | 2017-05-08 05:08:34 -0700 | 
| commit | cc3fa204d357be5fafc10eb8c2a80fe0bca998f1 (patch) | |
| tree | bd84defa311575c96461db01238cd231d94c439f /Lib/cgi.py | |
| parent | f34c6850203a2406c4950af7a9c8a134145df4ea (diff) | |
| download | cpython-git-cc3fa204d357be5fafc10eb8c2a80fe0bca998f1.tar.gz | |
bpo-29979: Rewrite cgi.parse_multipart to make it consistent with FieldStorage (#991)
Diffstat (limited to 'Lib/cgi.py')
| -rwxr-xr-x | Lib/cgi.py | 105 | 
1 files changed, 14 insertions, 91 deletions
| diff --git a/Lib/cgi.py b/Lib/cgi.py index 14d15a692b..f5e85aa263 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -198,105 +198,28 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):           DeprecationWarning, 2)      return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing) -def parse_multipart(fp, pdict): +def parse_multipart(fp, pdict, encoding="utf-8"):      """Parse multipart input.      Arguments:      fp   : input file      pdict: dictionary containing other parameters of content-type header +    encoding: request encoding      Returns a dictionary just like parse_qs(): keys are the field names, each -    value is a list of values for that field.  This is easy to use but not -    much good if you are expecting megabytes to be uploaded -- in that case, -    use the FieldStorage class instead which is much more flexible.  Note -    that content-type is the raw, unparsed contents of the content-type -    header. - -    XXX This does not parse nested multipart parts -- use FieldStorage for -    that. - -    XXX This should really be subsumed by FieldStorage altogether -- no -    point in having two implementations of the same parsing algorithm. -    Also, FieldStorage protects itself better against certain DoS attacks -    by limiting the size of the data read in one chunk.  The API here -    does not support that kind of protection.  This also affects parse() -    since it can call parse_multipart(). - +    value is a list of values for that field. For non-file fields, the value +    is a list of strings.      """ -    import http.client - -    boundary = b"" -    if 'boundary' in pdict: -        boundary = pdict['boundary'] -    if not valid_boundary(boundary): -        raise ValueError('Invalid boundary in multipart form: %r' -                            % (boundary,)) - -    nextpart = b"--" + boundary -    lastpart = b"--" + boundary + b"--" -    partdict = {} -    terminator = b"" - -    while terminator != lastpart: -        bytes = -1 -        data = None -        if terminator: -            # At start of next part.  Read headers first. -            headers = http.client.parse_headers(fp) -            clength = headers.get('content-length') -            if clength: -                try: -                    bytes = int(clength) -                except ValueError: -                    pass -            if bytes > 0: -                if maxlen and bytes > maxlen: -                    raise ValueError('Maximum content length exceeded') -                data = fp.read(bytes) -            else: -                data = b"" -        # Read lines until end of part. -        lines = [] -        while 1: -            line = fp.readline() -            if not line: -                terminator = lastpart # End outer loop -                break -            if line.startswith(b"--"): -                terminator = line.rstrip() -                if terminator in (nextpart, lastpart): -                    break -            lines.append(line) -        # Done with part. -        if data is None: -            continue -        if bytes < 0: -            if lines: -                # Strip final line terminator -                line = lines[-1] -                if line[-2:] == b"\r\n": -                    line = line[:-2] -                elif line[-1:] == b"\n": -                    line = line[:-1] -                lines[-1] = line -                data = b"".join(lines) -        line = headers['content-disposition'] -        if not line: -            continue -        key, params = parse_header(line) -        if key != 'form-data': -            continue -        if 'name' in params: -            name = params['name'] -        else: -            continue -        if name in partdict: -            partdict[name].append(data) -        else: -            partdict[name] = [data] - -    return partdict - +    # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always +    # represented as 7bit US-ASCII. +    boundary = pdict['boundary'].decode('ascii') +    ctype = "multipart/form-data; boundary={}".format(boundary) +    headers = Message() +    headers.set_type(ctype) +    headers['Content-Length'] = pdict['CONTENT-LENGTH'] +    fs = FieldStorage(fp, headers=headers, encoding=encoding, +        environ={'REQUEST_METHOD': 'POST'}) +    return {k: fs.getlist(k) for k in fs}  def _parseparam(s):      while s[:1] == ';': | 
