summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikolay Kim <fafhrd91@gmail.com>2013-03-11 21:38:01 -0700
committerNikolay Kim <fafhrd91@gmail.com>2013-03-11 21:38:01 -0700
commit5b92017fa1dd0cb2f2b93968273f8a5a96567f93 (patch)
treec94cf941ec3b2f4612e8c75a7f981a6a27e7c123
parentc18bc54447d723621586b842c4453a6950a52a5f (diff)
downloadtrollius-5b92017fa1dd0cb2f2b93968273f8a5a96567f93.tar.gz
read_headers helper for http protocol
-rw-r--r--srv.py16
-rw-r--r--tests/http_protocol_test.py61
-rw-r--r--tulip/http/http_client.py24
-rw-r--r--tulip/http/protocol.py65
4 files changed, 140 insertions, 26 deletions
diff --git a/srv.py b/srv.py
index 0a7c87a..296e157 100644
--- a/srv.py
+++ b/srv.py
@@ -44,18 +44,10 @@ class HttpServer(tulip.Protocol):
self.transport.close()
return
- lines = []
- while True:
- line = yield from self.reader.readline()
- print('header line', line)
- if not line.strip(b' \t\r\n'):
- break
- lines.append(line)
- if line == b'\r\n':
- break
-
- parser = email.parser.BytesHeaderParser()
- parser.parsebytes(b''.join(lines))
+ headers = email.message.Message()
+ for hdr, val in (yield from self.reader.read_headers()):
+ print(hdr, val)
+ headers.add_header(hdr, val)
write = self.transport.write
if isdir and not path.endswith('/'):
diff --git a/tests/http_protocol_test.py b/tests/http_protocol_test.py
index 61d7ebc..1f3aa65 100644
--- a/tests/http_protocol_test.py
+++ b/tests/http_protocol_test.py
@@ -140,3 +140,64 @@ class HttpStreamReaderTests(LogTrackingTestCase):
tulip.Task(self.stream.read_response_status()))
self.assertIn('HTTP/1.1 ttt test', str(cm.exception))
+
+ def test_read_headers(self):
+ self.stream.feed_data(b'test: line\r\n'
+ b' continue\r\n'
+ b'test2: data\r\n'
+ b'\r\n')
+
+ headers = self.loop.run_until_complete(
+ tulip.Task(self.stream.read_headers()))
+ self.assertEqual(headers,
+ [('TEST', 'line\r\n continue'), ('TEST2', 'data')])
+
+ def test_read_headers_size(self):
+ self.stream.feed_data(b'test: line\r\n')
+ self.stream.feed_data(b' continue\r\n')
+ self.stream.feed_data(b'test2: data\r\n')
+ self.stream.feed_data(b'\r\n')
+
+ self.stream.MAX_HEADERS = 5
+ self.assertRaises(
+ http.client.LineTooLong,
+ self.loop.run_until_complete,
+ tulip.Task(self.stream.read_headers()))
+
+ def test_read_headers_invalid_header(self):
+ self.stream.feed_data(b'test line\r\n')
+
+ with self.assertRaises(ValueError) as cm:
+ self.loop.run_until_complete(
+ tulip.Task(self.stream.read_headers()))
+
+ self.assertIn("Invalid header b'test line'", str(cm.exception))
+
+ def test_read_headers_invalid_name(self):
+ self.stream.feed_data(b'test[]: line\r\n')
+
+ with self.assertRaises(ValueError) as cm:
+ self.loop.run_until_complete(
+ tulip.Task(self.stream.read_headers()))
+
+ self.assertIn("Invalid header name b'TEST[]'", str(cm.exception))
+
+ def test_read_headers_headers_size(self):
+ self.stream.MAX_HEADERFIELD_SIZE = 5
+ self.stream.feed_data(b'test: line data data\r\ndata\r\n')
+
+ with self.assertRaises(http.client.LineTooLong) as cm:
+ self.loop.run_until_complete(
+ tulip.Task(self.stream.read_headers()))
+
+ self.assertIn("limit request headers fields size", str(cm.exception))
+
+ def test_read_headers_continuation_headers_size(self):
+ self.stream.MAX_HEADERFIELD_SIZE = 5
+ self.stream.feed_data(b'test: line\r\n test\r\n')
+
+ with self.assertRaises(http.client.LineTooLong) as cm:
+ self.loop.run_until_complete(
+ tulip.Task(self.stream.read_headers()))
+
+ self.assertIn("limit request headers fields size", str(cm.exception))
diff --git a/tulip/http/http_client.py b/tulip/http/http_client.py
index 520d162..fc9365e 100644
--- a/tulip/http/http_client.py
+++ b/tulip/http/http_client.py
@@ -110,19 +110,15 @@ class HttpClientProtocol:
yield from self.event_loop.create_connection(
lambda: self, self.host, self.port, ssl=self.ssl)
- # TODO: A better mechanism to return all info from the
- # status line, all headers, and the buffer, without having
- # an N-tuple return value.
- version, status, message = yield from self.stream.read_response_status()
-
- raw_headers = []
- while True:
- header = yield from self.stream.readline()
- if not header.strip():
- break
- raw_headers.append(header)
- parser = email.parser.BytesHeaderParser()
- headers = parser.parsebytes(b''.join(raw_headers))
+ # read response status
+ version, status, reason = yield from self.stream.read_response_status()
+
+ # read headers
+ headers = email.message.Message()
+ for hdr, val in (yield from self.stream.read_headers()):
+ headers.add_header(hdr, val)
+
+ # read payload
content_length = headers.get('content-length')
if content_length:
content_length = int(content_length) # May raise.
@@ -135,7 +131,7 @@ class HttpClientProtocol:
stream = protocol.HttpStreamReader()
stream.feed_data(body)
stream.feed_eof()
- sts = '{} {}'.format(self.decode(status), self.decode(message))
+ sts = '{} {}'.format(status, reason)
return (sts, headers, stream)
def encode(self, s):
diff --git a/tulip/http/protocol.py b/tulip/http/protocol.py
index c6f5a60..dce35a2 100644
--- a/tulip/http/protocol.py
+++ b/tulip/http/protocol.py
@@ -10,6 +10,8 @@ import tulip
METHRE = re.compile('[A-Z0-9$-_.]+')
VERSRE = re.compile('HTTP/(\d+).(\d+)')
+HDRRE = re.compile(b"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
+CONTINUATION = (b' ', b'\t')
RequestLine = collections.namedtuple(
@@ -22,6 +24,9 @@ ResponseStatus = collections.namedtuple(
class HttpStreamReader(tulip.StreamReader):
+ MAX_HEADERS = 32768
+ MAX_HEADERFIELD_SIZE = 8190
+
@tulip.coroutine
def read_request_line(self):
"""Read request status line. Exception http.client.BadStatusLine
@@ -111,3 +116,63 @@ class HttpStreamReader(tulip.StreamReader):
raise http.client.BadStatusLine(line)
return ResponseStatus(version, status, reason.strip())
+
+ @tulip.coroutine
+ def read_headers(self):
+ """Read and parses RFC2822 headers from a stream.
+
+ Line continuations are supported. Returns list of header name
+ and value pairs.
+ """
+ size = 0
+ headers = []
+
+ line = yield from self.readline()
+
+ while line not in (b'\r\n', b'\n'):
+ header_length = len(line)
+
+ # Parse initial header name : value pair.
+ sep_pos = line.find(b':')
+ if sep_pos < 0:
+ raise ValueError('Invalid header %s' % line.strip())
+
+ name, value = line[:sep_pos], line[sep_pos+1:]
+ name = name.rstrip(b' \t').upper()
+ if HDRRE.search(name):
+ raise ValueError('Invalid header name %s' % name)
+
+ name = name.strip().decode('ascii', 'surrogateescape')
+ value = [value.lstrip()]
+
+ # next line
+ line = yield from self.readline()
+
+ # consume continuation lines
+ continuation = line.startswith(CONTINUATION)
+
+ if continuation:
+ while continuation:
+ header_length += len(line)
+ if header_length > self.MAX_HEADERFIELD_SIZE:
+ raise http.client.LineTooLong(
+ 'limit request headers fields size')
+ value.append(line)
+
+ line = yield from self.readline()
+ continuation = line.startswith(CONTINUATION)
+ else:
+ if header_length > self.MAX_HEADERFIELD_SIZE:
+ raise http.client.LineTooLong(
+ 'limit request headers fields size')
+
+ # total headers size
+ size += header_length
+ if size >= self.MAX_HEADERS:
+ raise http.client.LineTooLong('limit request headers fields')
+
+ headers.append(
+ (name,
+ b''.join(value).rstrip().decode('ascii', 'surrogateescape')))
+
+ return headers