diff options
| author | Nikolay Kim <fafhrd91@gmail.com> | 2013-03-11 21:38:01 -0700 |
|---|---|---|
| committer | Nikolay Kim <fafhrd91@gmail.com> | 2013-03-11 21:38:01 -0700 |
| commit | 5b92017fa1dd0cb2f2b93968273f8a5a96567f93 (patch) | |
| tree | c94cf941ec3b2f4612e8c75a7f981a6a27e7c123 | |
| parent | c18bc54447d723621586b842c4453a6950a52a5f (diff) | |
| download | trollius-5b92017fa1dd0cb2f2b93968273f8a5a96567f93.tar.gz | |
read_headers helper for http protocol
| -rw-r--r-- | srv.py | 16 | ||||
| -rw-r--r-- | tests/http_protocol_test.py | 61 | ||||
| -rw-r--r-- | tulip/http/http_client.py | 24 | ||||
| -rw-r--r-- | tulip/http/protocol.py | 65 |
4 files changed, 140 insertions, 26 deletions
@@ -44,18 +44,10 @@ class HttpServer(tulip.Protocol): self.transport.close() return - lines = [] - while True: - line = yield from self.reader.readline() - print('header line', line) - if not line.strip(b' \t\r\n'): - break - lines.append(line) - if line == b'\r\n': - break - - parser = email.parser.BytesHeaderParser() - parser.parsebytes(b''.join(lines)) + headers = email.message.Message() + for hdr, val in (yield from self.reader.read_headers()): + print(hdr, val) + headers.add_header(hdr, val) write = self.transport.write if isdir and not path.endswith('/'): diff --git a/tests/http_protocol_test.py b/tests/http_protocol_test.py index 61d7ebc..1f3aa65 100644 --- a/tests/http_protocol_test.py +++ b/tests/http_protocol_test.py @@ -140,3 +140,64 @@ class HttpStreamReaderTests(LogTrackingTestCase): tulip.Task(self.stream.read_response_status())) self.assertIn('HTTP/1.1 ttt test', str(cm.exception)) + + def test_read_headers(self): + self.stream.feed_data(b'test: line\r\n' + b' continue\r\n' + b'test2: data\r\n' + b'\r\n') + + headers = self.loop.run_until_complete( + tulip.Task(self.stream.read_headers())) + self.assertEqual(headers, + [('TEST', 'line\r\n continue'), ('TEST2', 'data')]) + + def test_read_headers_size(self): + self.stream.feed_data(b'test: line\r\n') + self.stream.feed_data(b' continue\r\n') + self.stream.feed_data(b'test2: data\r\n') + self.stream.feed_data(b'\r\n') + + self.stream.MAX_HEADERS = 5 + self.assertRaises( + http.client.LineTooLong, + self.loop.run_until_complete, + tulip.Task(self.stream.read_headers())) + + def test_read_headers_invalid_header(self): + self.stream.feed_data(b'test line\r\n') + + with self.assertRaises(ValueError) as cm: + self.loop.run_until_complete( + tulip.Task(self.stream.read_headers())) + + self.assertIn("Invalid header b'test line'", str(cm.exception)) + + def test_read_headers_invalid_name(self): + self.stream.feed_data(b'test[]: line\r\n') + + with self.assertRaises(ValueError) as cm: + self.loop.run_until_complete( + tulip.Task(self.stream.read_headers())) + + self.assertIn("Invalid header name b'TEST[]'", str(cm.exception)) + + def test_read_headers_headers_size(self): + self.stream.MAX_HEADERFIELD_SIZE = 5 + self.stream.feed_data(b'test: line data data\r\ndata\r\n') + + with self.assertRaises(http.client.LineTooLong) as cm: + self.loop.run_until_complete( + tulip.Task(self.stream.read_headers())) + + self.assertIn("limit request headers fields size", str(cm.exception)) + + def test_read_headers_continuation_headers_size(self): + self.stream.MAX_HEADERFIELD_SIZE = 5 + self.stream.feed_data(b'test: line\r\n test\r\n') + + with self.assertRaises(http.client.LineTooLong) as cm: + self.loop.run_until_complete( + tulip.Task(self.stream.read_headers())) + + self.assertIn("limit request headers fields size", str(cm.exception)) diff --git a/tulip/http/http_client.py b/tulip/http/http_client.py index 520d162..fc9365e 100644 --- a/tulip/http/http_client.py +++ b/tulip/http/http_client.py @@ -110,19 +110,15 @@ class HttpClientProtocol: yield from self.event_loop.create_connection( lambda: self, self.host, self.port, ssl=self.ssl) - # TODO: A better mechanism to return all info from the - # status line, all headers, and the buffer, without having - # an N-tuple return value. - version, status, message = yield from self.stream.read_response_status() - - raw_headers = [] - while True: - header = yield from self.stream.readline() - if not header.strip(): - break - raw_headers.append(header) - parser = email.parser.BytesHeaderParser() - headers = parser.parsebytes(b''.join(raw_headers)) + # read response status + version, status, reason = yield from self.stream.read_response_status() + + # read headers + headers = email.message.Message() + for hdr, val in (yield from self.stream.read_headers()): + headers.add_header(hdr, val) + + # read payload content_length = headers.get('content-length') if content_length: content_length = int(content_length) # May raise. @@ -135,7 +131,7 @@ class HttpClientProtocol: stream = protocol.HttpStreamReader() stream.feed_data(body) stream.feed_eof() - sts = '{} {}'.format(self.decode(status), self.decode(message)) + sts = '{} {}'.format(status, reason) return (sts, headers, stream) def encode(self, s): diff --git a/tulip/http/protocol.py b/tulip/http/protocol.py index c6f5a60..dce35a2 100644 --- a/tulip/http/protocol.py +++ b/tulip/http/protocol.py @@ -10,6 +10,8 @@ import tulip METHRE = re.compile('[A-Z0-9$-_.]+') VERSRE = re.compile('HTTP/(\d+).(\d+)') +HDRRE = re.compile(b"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]") +CONTINUATION = (b' ', b'\t') RequestLine = collections.namedtuple( @@ -22,6 +24,9 @@ ResponseStatus = collections.namedtuple( class HttpStreamReader(tulip.StreamReader): + MAX_HEADERS = 32768 + MAX_HEADERFIELD_SIZE = 8190 + @tulip.coroutine def read_request_line(self): """Read request status line. Exception http.client.BadStatusLine @@ -111,3 +116,63 @@ class HttpStreamReader(tulip.StreamReader): raise http.client.BadStatusLine(line) return ResponseStatus(version, status, reason.strip()) + + @tulip.coroutine + def read_headers(self): + """Read and parses RFC2822 headers from a stream. + + Line continuations are supported. Returns list of header name + and value pairs. + """ + size = 0 + headers = [] + + line = yield from self.readline() + + while line not in (b'\r\n', b'\n'): + header_length = len(line) + + # Parse initial header name : value pair. + sep_pos = line.find(b':') + if sep_pos < 0: + raise ValueError('Invalid header %s' % line.strip()) + + name, value = line[:sep_pos], line[sep_pos+1:] + name = name.rstrip(b' \t').upper() + if HDRRE.search(name): + raise ValueError('Invalid header name %s' % name) + + name = name.strip().decode('ascii', 'surrogateescape') + value = [value.lstrip()] + + # next line + line = yield from self.readline() + + # consume continuation lines + continuation = line.startswith(CONTINUATION) + + if continuation: + while continuation: + header_length += len(line) + if header_length > self.MAX_HEADERFIELD_SIZE: + raise http.client.LineTooLong( + 'limit request headers fields size') + value.append(line) + + line = yield from self.readline() + continuation = line.startswith(CONTINUATION) + else: + if header_length > self.MAX_HEADERFIELD_SIZE: + raise http.client.LineTooLong( + 'limit request headers fields size') + + # total headers size + size += header_length + if size >= self.MAX_HEADERS: + raise http.client.LineTooLong('limit request headers fields') + + headers.append( + (name, + b''.join(value).rstrip().decode('ascii', 'surrogateescape'))) + + return headers |
