From e75b0d9afbea8a933f8f5f11d279e661cbfd676b Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Sat, 12 Mar 2022 18:30:30 -0700 Subject: [PATCH 1/7] Add new regular expressions for Chunked Encoding This also moves some regular expressions for QUOTED_PAIR/QUOTED_STRING into this module from utilities so that they may be reused. --- src/waitress/rfc7230.py | 27 ++++++++++++++++++++++++++- src/waitress/utilities.py | 28 +++------------------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/waitress/rfc7230.py b/src/waitress/rfc7230.py index 9b25fbd9..26e64260 100644 --- a/src/waitress/rfc7230.py +++ b/src/waitress/rfc7230.py @@ -5,6 +5,9 @@ import re +HEXDIG = "[0-9a-fA-F]" +DIGIT = "[0-9]" + WS = "[ \t]" OWS = WS + "{0,}?" RWS = WS + "{1,}?" @@ -25,6 +28,12 @@ # ; visible (printing) characters VCHAR = r"\x21-\x7e" +# The '\\' between \x5b and \x5d is needed to escape \x5d (']') +QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]" + +QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])" +QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"' + # header-field = field-name ":" OWS field-value OWS # field-name = token # field-value = *( field-content / obs-fold ) @@ -43,8 +52,24 @@ # Which allows the field value here to just see if there is even a value in the first place FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?" -HEADER_FIELD = re.compile( +# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +# chunk-ext-name = token +# chunk-ext-val = token / quoted-string + +CHUNK_EXT_NAME = TOKEN +CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")" +CHUNK_EXT = ( + "(?:;(?P" + CHUNK_EXT_NAME + ")(?:=(?P" + CHUNK_EXT_VAL + "))?)*" +) + +# Pre-compiled regular expressions for use elsewhere +ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1")) +ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1")) +HEADER_FIELD_RE = re.compile( ( "^(?P" + TOKEN + "):" + OWS + "(?P" + FIELD_VALUE + ")" + OWS + "$" ).encode("latin-1") ) +QUOTED_PAIR_RE = re.compile(QUOTED_PAIR) +QUOTED_STRING_RE = re.compile(QUOTED_STRING) +CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1")) diff --git a/src/waitress/utilities.py b/src/waitress/utilities.py index 3caaa336..6ae47420 100644 --- a/src/waitress/utilities.py +++ b/src/waitress/utilities.py @@ -22,7 +22,7 @@ import stat import time -from .rfc7230 import OBS_TEXT, VCHAR +from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE logger = logging.getLogger("waitress") queue_logger = logging.getLogger("waitress.queue") @@ -216,32 +216,10 @@ def parse_http_date(d): return retval -# RFC 5234 Appendix B.1 "Core Rules": -# VCHAR = %x21-7E -# ; visible (printing) characters -vchar_re = VCHAR - -# RFC 7230 Section 3.2.6 "Field Value Components": -# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE -# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text -# obs-text = %x80-FF -# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) -obs_text_re = OBS_TEXT - -# The '\\' between \x5b and \x5d is needed to escape \x5d (']') -qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]" - -quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])" -quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"' - -quoted_string = re.compile(quoted_string_re) -quoted_pair = re.compile(quoted_pair_re) - - def undquote(value): if value.startswith('"') and value.endswith('"'): # So it claims to be DQUOTE'ed, let's validate that - matches = quoted_string.match(value) + matches = QUOTED_STRING_RE.match(value) if matches and matches.end() == len(value): # Remove the DQUOTE's from the value @@ -249,7 +227,7 @@ def undquote(value): # Remove all backslashes that are followed by a valid vchar or # obs-text - value = quoted_pair.sub(r"\1", value) + value = QUOTED_PAIR_RE.sub(r"\1", value) return value elif not value.startswith('"') and not value.endswith('"'): From 1f6059f4c4a3a0b256b4027eda64fb9fc311b0a6 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Sat, 12 Mar 2022 18:32:24 -0700 Subject: [PATCH 2/7] Be more strict in parsing Content-Length Validate that we are only parsing digits and nothing else. RFC7230 is explicit in that the Content-Length can only exist of 1*DIGIT and may not include any additional sign information. The Python int() function parses `+10` as `10` which means we were more lenient than the standard intended. --- src/waitress/parser.py | 12 ++++++------ tests/test_parser.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/waitress/parser.py b/src/waitress/parser.py index a6e4d982..ff16a402 100644 --- a/src/waitress/parser.py +++ b/src/waitress/parser.py @@ -23,6 +23,7 @@ from waitress.buffers import OverflowableBuffer from waitress.receiver import ChunkedReceiver, FixedStreamReceiver +from waitress.rfc7230 import HEADER_FIELD_RE, ONLY_DIGIT_RE from waitress.utilities import ( BadRequest, RequestEntityTooLarge, @@ -31,8 +32,6 @@ find_double_newline, ) -from .rfc7230 import HEADER_FIELD - def unquote_bytes_to_wsgi(bytestring): return unquote_to_bytes(bytestring).decode("latin-1") @@ -221,7 +220,7 @@ def parse_header(self, header_plus): headers = self.headers for line in lines: - header = HEADER_FIELD.match(line) + header = HEADER_FIELD_RE.match(line) if not header: raise ParsingError("Invalid header") @@ -317,11 +316,12 @@ def parse_header(self, header_plus): self.connection_close = True if not self.chunked: - try: - cl = int(headers.get("CONTENT_LENGTH", 0)) - except ValueError: + cl = headers.get("CONTENT_LENGTH", "0") + + if not ONLY_DIGIT_RE.match(cl.encode("latin-1")): raise ParsingError("Content-Length is invalid") + cl = int(cl) self.content_length = cl if cl > 0: diff --git a/tests/test_parser.py b/tests/test_parser.py index aacef26d..868c1225 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -193,6 +193,26 @@ def test_parse_header_bad_content_length(self): else: # pragma: nocover self.assertTrue(False) + def test_parse_header_bad_content_length_plus(self): + data = b"GET /foobar HTTP/8.4\r\ncontent-length: +10\r\n" + + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Content-Length is invalid", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_bad_content_length_minus(self): + data = b"GET /foobar HTTP/8.4\r\ncontent-length: -10\r\n" + + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Content-Length is invalid", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + def test_parse_header_multiple_content_length(self): data = b"GET /foobar HTTP/8.4\r\ncontent-length: 10\r\ncontent-length: 20\r\n" From 884bed167d09c3d5fdf0730e2ca2564eefdd4534 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Sat, 12 Mar 2022 18:35:01 -0700 Subject: [PATCH 3/7] Update tests to remove invalid chunked encoding chunk-size RFC7230 states the following: chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF chunk-size = 1*HEXDIG Where chunk-ext is: chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) Only if there is a chunk-ext should there be a `;` after the 1*HEXDIG. And a chunk-ext that is empty is invalid. --- tests/test_functional.py | 6 +++--- tests/test_parser.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_functional.py b/tests/test_functional.py index f74a2527..d1366caf 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -322,7 +322,7 @@ def test_chunking_request_without_content(self): self.assertFalse("transfer-encoding" in headers) def test_chunking_request_with_content(self): - control_line = b"20;\r\n" # 20 hex = 32 dec + control_line = b"20\r\n" # 20 hex = 32 dec s = b"This string has 32 characters.\r\n" expected = s * 12 header = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n" @@ -341,7 +341,7 @@ def test_chunking_request_with_content(self): self.assertFalse("transfer-encoding" in headers) def test_broken_chunked_encoding(self): - control_line = b"20;\r\n" # 20 hex = 32 dec + control_line = b"20\r\n" # 20 hex = 32 dec s = b"This string has 32 characters.\r\n" to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n" to_send += control_line + s + b"\r\n" @@ -365,7 +365,7 @@ def test_broken_chunked_encoding(self): self.assertRaises(ConnectionClosed, read_http, fp) def test_broken_chunked_encoding_missing_chunk_end(self): - control_line = b"20;\r\n" # 20 hex = 32 dec + control_line = b"20\r\n" # 20 hex = 32 dec s = b"This string has 32 characters.\r\n" to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n" to_send += control_line + s diff --git a/tests/test_parser.py b/tests/test_parser.py index 868c1225..4461bdea 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -155,7 +155,7 @@ def test_received_chunked_completed_sets_content_length(self): b"Transfer-Encoding: chunked\r\n" b"X-Foo: 1\r\n" b"\r\n" - b"1d;\r\n" + b"1d\r\n" b"This string has 29 characters\r\n" b"0\r\n\r\n" ) From d032a669682838b26d6a1a1b513b9da83b0e0f90 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Sat, 12 Mar 2022 18:42:51 -0700 Subject: [PATCH 4/7] Error when receiving back Chunk Extension Waitress discards chunked extensions and does no further processing on them, however it failed to validate that the chunked encoding extension did not contain invalid data. We now validate that if there are any chunked extensions that they are well-formed, if they are not and contain invalid characters, then Waitress will now correctly return a Bad Request and stop any further processing of the request. --- src/waitress/receiver.py | 11 ++++++++++- tests/test_functional.py | 22 ++++++++++++++++++++++ tests/test_receiver.py | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/src/waitress/receiver.py b/src/waitress/receiver.py index 87852808..6289d1a9 100644 --- a/src/waitress/receiver.py +++ b/src/waitress/receiver.py @@ -14,6 +14,7 @@ """Data Chunk Receiver """ +from waitress.rfc7230 import CHUNK_EXT_RE, ONLY_HEXDIG_RE from waitress.utilities import BadRequest, find_double_newline @@ -110,6 +111,7 @@ def received(self, s): s = b"" else: self.chunk_end = b"" + if pos == 0: # Chop off the terminating CR LF from the chunk s = s[2:] @@ -140,7 +142,14 @@ def received(self, s): semi = line.find(b";") if semi >= 0: - # discard extension info. + extinfo = line[semi:] + valid_ext_info = CHUNK_EXT_RE.match(extinfo) + + if not valid_ext_info: + self.error = BadRequest("Invalid chunk extension") + self.all_chunks_received = True + + break line = line[:semi] try: sz = int(line.strip(), 16) # hexadecimal diff --git a/tests/test_functional.py b/tests/test_functional.py index d1366caf..9e33fc0f 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -364,6 +364,28 @@ def test_broken_chunked_encoding(self): self.send_check_error(to_send) self.assertRaises(ConnectionClosed, read_http, fp) + def test_broken_chunked_encoding_invalid_extension(self): + control_line = b"20;invalid=\r\n" # 20 hex = 32 dec + s = b"This string has 32 characters.\r\n" + to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n" + to_send += control_line + s + b"\r\n" + self.connect() + self.sock.send(to_send) + with self.sock.makefile("rb", 0) as fp: + line, headers, response_body = read_http(fp) + self.assertline(line, "400", "Bad Request", "HTTP/1.1") + cl = int(headers["content-length"]) + self.assertEqual(cl, len(response_body)) + self.assertIn(b"Invalid chunk extension", response_body) + self.assertEqual( + sorted(headers.keys()), + ["connection", "content-length", "content-type", "date", "server"], + ) + self.assertEqual(headers["content-type"], "text/plain") + # connection has been closed + self.send_check_error(to_send) + self.assertRaises(ConnectionClosed, read_http, fp) + def test_broken_chunked_encoding_missing_chunk_end(self): control_line = b"20\r\n" # 20 hex = 32 dec s = b"This string has 32 characters.\r\n" diff --git a/tests/test_receiver.py b/tests/test_receiver.py index f55aa68e..a3b6f994 100644 --- a/tests/test_receiver.py +++ b/tests/test_receiver.py @@ -1,5 +1,7 @@ import unittest +import pytest + class TestFixedStreamReceiver(unittest.TestCase): def _makeOne(self, cl, buf): @@ -226,6 +228,41 @@ def test_received_multiple_chunks_split(self): self.assertEqual(inst.error, None) +class TestChunkedReceiverParametrized: + def _makeOne(self, buf): + from waitress.receiver import ChunkedReceiver + + return ChunkedReceiver(buf) + + @pytest.mark.parametrize( + "invalid_extension", [b"\n", b"invalid=", b"\r", b"invalid = true"] + ) + def test_received_invalid_extensions(self, invalid_extension): + from waitress.utilities import BadRequest + + buf = DummyBuffer() + inst = self._makeOne(buf) + data = b"4;" + invalid_extension + b"\r\ntest\r\n" + result = inst.received(data) + assert result == len(data) + assert inst.error.__class__ == BadRequest + assert inst.error.body == "Invalid chunk extension" + + @pytest.mark.parametrize( + "valid_extension", [b"test", b"valid=true", b"valid=true;other=true"] + ) + def test_received_valid_extensions(self, valid_extension): + # While waitress may ignore extensions in Chunked Encoding, we do want + # to make sure that we don't fail when we do encounter one that is + # valid + buf = DummyBuffer() + inst = self._makeOne(buf) + data = b"4;" + valid_extension + b"\r\ntest\r\n" + result = inst.received(data) + assert result == len(data) + assert inst.error == None + + class DummyBuffer: def __init__(self, data=None): if data is None: From d9bdfa0cf210f6daf017d7c5a3cc149bdec8a9a7 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Sat, 12 Mar 2022 18:48:26 -0700 Subject: [PATCH 5/7] Validate chunk size in Chunked Encoding are HEXDIG RFC7230 states that a chunk-size should be 1*HEXDIG, this is now validated before passing the resulting string to int() which would also parse other formats for hex, such as: `0x01` as `1` and `+0x01` as `1`. This would lead to a potential for a frontend proxy server and waitress to disagree on where a chunk started and ended, thereby potentially leading to request smuggling. With the increased validation if the size is not just hex digits, Waitress now returns a Bad Request and stops processing the request. --- src/waitress/receiver.py | 19 ++++++++++++++----- tests/test_functional.py | 22 ++++++++++++++++++++++ tests/test_receiver.py | 12 ++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/waitress/receiver.py b/src/waitress/receiver.py index 6289d1a9..2f0b734a 100644 --- a/src/waitress/receiver.py +++ b/src/waitress/receiver.py @@ -150,12 +150,21 @@ def received(self, s): self.all_chunks_received = True break + line = line[:semi] - try: - sz = int(line.strip(), 16) # hexadecimal - except ValueError: # garbage in input - self.error = BadRequest("garbage in chunked encoding input") - sz = 0 + + # Remove any whitespace + line = line.strip() + + if not ONLY_HEXDIG_RE.match(line): + self.error = BadRequest("Invalid chunk size") + self.all_chunks_received = True + + break + + # Can not fail due to matching against the regular + # expression above + sz = int(line.strip(), 16) # hexadecimal if sz > 0: # Start a new chunk. diff --git a/tests/test_functional.py b/tests/test_functional.py index 9e33fc0f..60eb24a4 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -364,6 +364,28 @@ def test_broken_chunked_encoding(self): self.send_check_error(to_send) self.assertRaises(ConnectionClosed, read_http, fp) + def test_broken_chunked_encoding_invalid_hex(self): + control_line = b"0x20\r\n" # 20 hex = 32 dec + s = b"This string has 32 characters.\r\n" + to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n" + to_send += control_line + s + b"\r\n" + self.connect() + self.sock.send(to_send) + with self.sock.makefile("rb", 0) as fp: + line, headers, response_body = read_http(fp) + self.assertline(line, "400", "Bad Request", "HTTP/1.1") + cl = int(headers["content-length"]) + self.assertEqual(cl, len(response_body)) + self.assertIn(b"Invalid chunk size", response_body) + self.assertEqual( + sorted(headers.keys()), + ["connection", "content-length", "content-type", "date", "server"], + ) + self.assertEqual(headers["content-type"], "text/plain") + # connection has been closed + self.send_check_error(to_send) + self.assertRaises(ConnectionClosed, read_http, fp) + def test_broken_chunked_encoding_invalid_extension(self): control_line = b"20;invalid=\r\n" # 20 hex = 32 dec s = b"This string has 32 characters.\r\n" diff --git a/tests/test_receiver.py b/tests/test_receiver.py index a3b6f994..8e43cc86 100644 --- a/tests/test_receiver.py +++ b/tests/test_receiver.py @@ -262,6 +262,18 @@ def test_received_valid_extensions(self, valid_extension): assert result == len(data) assert inst.error == None + @pytest.mark.parametrize("invalid_size", [b"0x04", b"+0x04", b"x04", b"+04"]) + def test_received_invalid_size(self, invalid_size): + from waitress.utilities import BadRequest + + buf = DummyBuffer() + inst = self._makeOne(buf) + data = invalid_size + b"\r\ntest\r\n" + result = inst.received(data) + assert result == len(data) + assert inst.error.__class__ == BadRequest + assert inst.error.body == "Invalid chunk size" + class DummyBuffer: def __init__(self, data=None): From bd22869c143a3f1284f271399524676efbafa655 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Sat, 12 Mar 2022 19:16:23 -0700 Subject: [PATCH 6/7] Remove extraneous calls to .strip() in Chunked Encoding To be valid chunked encoding we should not be removing any whitespace as the standard does not allow for optional whitespace. If whitespace is encountered in the wrong place, it should lead to a 400 Bad Request instead. --- src/waitress/receiver.py | 6 +----- tests/test_receiver.py | 4 +++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/waitress/receiver.py b/src/waitress/receiver.py index 2f0b734a..76633552 100644 --- a/src/waitress/receiver.py +++ b/src/waitress/receiver.py @@ -135,7 +135,6 @@ def received(self, s): line = s[:pos] s = s[pos + 2 :] self.control_line = b"" - line = line.strip() if line: # Begin a new chunk. @@ -153,9 +152,6 @@ def received(self, s): line = line[:semi] - # Remove any whitespace - line = line.strip() - if not ONLY_HEXDIG_RE.match(line): self.error = BadRequest("Invalid chunk size") self.all_chunks_received = True @@ -164,7 +160,7 @@ def received(self, s): # Can not fail due to matching against the regular # expression above - sz = int(line.strip(), 16) # hexadecimal + sz = int(line, 16) # hexadecimal if sz > 0: # Start a new chunk. diff --git a/tests/test_receiver.py b/tests/test_receiver.py index 8e43cc86..d160cac4 100644 --- a/tests/test_receiver.py +++ b/tests/test_receiver.py @@ -262,7 +262,9 @@ def test_received_valid_extensions(self, valid_extension): assert result == len(data) assert inst.error == None - @pytest.mark.parametrize("invalid_size", [b"0x04", b"+0x04", b"x04", b"+04"]) + @pytest.mark.parametrize( + "invalid_size", [b"0x04", b"+0x04", b"x04", b"+04", b" 04", b" 0x04"] + ) def test_received_invalid_size(self, invalid_size): from waitress.utilities import BadRequest From b28c9e8bda326ff2f87bf8eb7ea6b110ee0ae6fe Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Wed, 16 Mar 2022 15:25:23 -0600 Subject: [PATCH 7/7] Prep for 2.1.1 --- CHANGES.txt | 26 ++++++++++++++++++++++++++ setup.cfg | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index a1e60fe6..eb7093c5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,29 @@ +2.1.1 +----- + +Security Bugfix +~~~~~~~~~~~~~~~ + +- Waitress now validates that chunked encoding extensions are valid, and don't + contain invalid characters that are not allowed. They are still skipped/not + processed, but if they contain invalid data we no longer continue in and + return a 400 Bad Request. This stops potential HTTP desync/HTTP request + smuggling. Thanks to Zhang Zeyu for reporting this issue. See + https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36 + +- Waitress now validates that the chunk length is only valid hex digits when + parsing chunked encoding, and values such as ``0x01`` and ``+01`` are no + longer supported. This stops potential HTTP desync/HTTP request smuggling. + Thanks to Zhang Zeyu for reporting this issue. See + https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36 + +- Waitress now validates that the Content-Length sent by a remote contains only + digits in accordance with RFC7230 and will return a 400 Bad Request when the + Content-Length header contains invalid data, such as ``+10`` which would + previously get parsed as ``10`` and accepted. This stops potential HTTP + desync/HTTP request smuggling Thanks to Zhang Zeyu for reporting this issue. See + https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36 + 2.1.0 ----- diff --git a/setup.cfg b/setup.cfg index b1d21988..69086dce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = waitress -version = 2.1.0 +version = 2.1.1 description = Waitress WSGI server long_description = file: README.rst, CHANGES.txt long_description_content_type = text/x-rst