summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2022-12-05 15:10:10 -0800
committerGitHub <noreply@github.com>2022-12-05 18:10:10 -0500
commitb5bdf6a90d0c2a9c12b671340f7776e233ef625a (patch)
treec75ae02327e0dbdf282e2f6cb3ebb2755d712662
parentb0b590be9597fd5919228d251812dd54145f70a7 (diff)
downloadcpython-git-b5bdf6a90d0c2a9c12b671340f7776e233ef625a.tar.gz
[3.7] gh-100001: Omit control characters in http.server stderr logs. (GH-100002) (GH-100034)
Replace control characters in http.server.BaseHTTPRequestHandler.log_message with an escaped \xHH sequence to avoid causing problems for the terminal the output is printed to. (cherry picked from commit d8ab0a4dfa48f881b4ac9ab857d2e9de42f72828) Co-authored-by: Gregory P. Smith <greg@krypto.org>
-rw-r--r--Doc/library/http.server.rst10
-rw-r--r--Lib/http/server.py12
-rw-r--r--Lib/test/test_httpservers.py21
-rw-r--r--Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst6
4 files changed, 47 insertions, 2 deletions
diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst
index a93362d96f..28e4f41428 100644
--- a/Doc/library/http.server.rst
+++ b/Doc/library/http.server.rst
@@ -481,3 +481,13 @@ Security Considerations
:class:`SimpleHTTPRequestHandler` will follow symbolic links when handling
requests, this makes it possible for files outside of the specified directory
to be served.
+
+Earlier versions of Python did not scrub control characters from the
+log messages emitted to stderr from ``python -m http.server`` or the
+default :class:`BaseHTTPRequestHandler` ``.log_message``
+implementation. This could allow remote clients connecting to your
+server to send nefarious control codes to your terminal.
+
+.. versionadded:: 3.7.16
+ scrubbing control characters from log messages
+
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 7d51fc9976..ba2acbc98b 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -93,6 +93,7 @@ import email.utils
import html
import http.client
import io
+import itertools
import mimetypes
import os
import posixpath
@@ -564,6 +565,11 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
self.log_message(format, *args)
+ # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
+ _control_char_table = str.maketrans(
+ {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})
+ _control_char_table[ord('\\')] = r'\\'
+
def log_message(self, format, *args):
"""Log an arbitrary message.
@@ -579,12 +585,16 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
The client ip and current date/time are prefixed to
every message.
+ Unicode control characters are replaced with escaped hex
+ before writing the output to stderr.
+
"""
+ message = format % args
sys.stderr.write("%s - - [%s] %s\n" %
(self.address_string(),
self.log_date_time_string(),
- format%args))
+ message.translate(self._control_char_table)))
def version_string(self):
"""Return the server software version string."""
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index c11855ba22..b3e15c475a 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -24,7 +24,7 @@ import time
import datetime
import threading
from unittest import mock
-from io import BytesIO
+from io import BytesIO, StringIO
import unittest
from test import support
@@ -908,6 +908,25 @@ class BaseHTTPRequestHandlerTestCase(unittest.TestCase):
match = self.HTTPResponseMatch.search(response)
self.assertIsNotNone(match)
+ def test_unprintable_not_logged(self):
+ # We call the method from the class directly as our Socketless
+ # Handler subclass overrode it... nice for everything BUT this test.
+ self.handler.client_address = ('127.0.0.1', 1337)
+ log_message = BaseHTTPRequestHandler.log_message
+ with mock.patch.object(sys, 'stderr', StringIO()) as fake_stderr:
+ log_message(self.handler, '/foo')
+ log_message(self.handler, '/\033bar\000\033')
+ log_message(self.handler, '/spam %s.', 'a')
+ log_message(self.handler, '/spam %s.', '\033\x7f\x9f\xa0beans')
+ stderr = fake_stderr.getvalue()
+ self.assertNotIn('\033', stderr) # non-printable chars are caught.
+ self.assertNotIn('\000', stderr) # non-printable chars are caught.
+ lines = stderr.splitlines()
+ self.assertIn('/foo', lines[0])
+ self.assertIn(r'/\x1bbar\x00\x1b', lines[1])
+ self.assertIn('/spam a.', lines[2])
+ self.assertIn('/spam \\x1b\\x7f\\x9f\xa0beans.', lines[3])
+
def test_http_1_1(self):
result = self.send_typical_request(b'GET / HTTP/1.1\r\n\r\n')
self.verify_http_server_response(result[0])
diff --git a/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst b/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst
new file mode 100644
index 0000000000..a396e95cd8
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst
@@ -0,0 +1,6 @@
+``python -m http.server`` no longer allows terminal control characters sent
+within a garbage request to be printed to the stderr server log.
+
+This is done by changing the :mod:`http.server` :class:`BaseHTTPRequestHandler`
+``.log_message`` method to replace control characters with a ``\xHH`` hex escape
+before printing.