Skip to content

Commit

Permalink
Update httpcompression.py
Browse files Browse the repository at this point in the history
Solved version of issue scrapy#4697
  • Loading branch information
ArianAsghari committed Sep 30, 2023
1 parent 9f8a8dc commit 4ed5132
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions scrapy/downloadermiddlewares/httpcompression.py
Expand Up @@ -31,6 +31,7 @@ class HttpCompressionMiddleware:

def __init__(self, stats=None):
self.stats = stats
self._can_decode_br = b'br' in ACCEPTED_ENCODINGS

@classmethod
def from_crawler(cls, crawler):
Expand All @@ -50,13 +51,22 @@ def from_crawler(cls, crawler):
return result

def process_request(self, request, spider):
ae = request.headers.get('Accept-Encoding')
if ae and not self._can_decode_br and b'br' in ae.split(b', '):
spider.logger.warning('Brotli encoding (br) set in Accept-Encoding header, but brotli is not installed.')
request.headers.setdefault("Accept-Encoding", b", ".join(ACCEPTED_ENCODINGS))

def process_response(self, request, response, spider):
if request.method == "HEAD":
return response
if isinstance(response, Response):
content_encoding = response.headers.getlist("Content-Encoding")
if encoding == b'br':
if self._can_decode_br:
body = brotli.decompress(body)
else:
spider.logger.warning('Received Brotli encoded response, but brotli is not installed. Dropping br from Accept-Encoding.')
response.headers['Content-Encoding'] = [enc for enc in response.headers.getlist('Content-Encoding') if enc.lower() != b'br']
if content_encoding:
encoding = content_encoding.pop()
decoded_body = self._decode(response.body, encoding.lower())
Expand Down Expand Up @@ -99,6 +109,8 @@ def _decode(self, body, encoding):
body = zlib.decompress(body, -15)
if encoding == b"br" and b"br" in ACCEPTED_ENCODINGS:
body = brotli.decompress(body)
else:
raise ValueError('Brotli compression not supported in ACCEPTED_ENCODINGS')
if encoding == b"zstd" and b"zstd" in ACCEPTED_ENCODINGS:
# Using its streaming API since its simple API could handle only cases
# where there is content size data embedded in the frame
Expand Down

0 comments on commit 4ed5132

Please sign in to comment.