Merge pull request #6253 from wRAR/update-tools

Update tool versions, fix some of the pylint problems
scrapy · Feb 28, 2024 · 532cc8a · 532cc8a
2 parents 415c474 + 4f9dd99
commit 532cc8a
Show file tree

Hide file tree

Showing 75 changed files with 156 additions and 156 deletions.
diff --git a/.bandit.yml b/.bandit.yml
@@ -1,20 +1,19 @@
 skips:
-- B101
-- B113  # https://github.com/PyCQA/bandit/issues/1010
-- B105
-- B301
-- B303
-- B307
-- B311
-- B320
-- B321
-- B324
-- B402  # https://github.com/scrapy/scrapy/issues/4180
-- B403
-- B404
-- B406
-- B410
-- B503
-- B603
-- B605
+- B101  # assert_used
+- B105  # hardcoded_password_string
+- B301  # pickle
+- B307  # eval
+- B311  # random
+- B320  # xml_bad_etree
+- B321  # ftplib, https://github.com/scrapy/scrapy/issues/4180
+- B324  # hashlib "Use of weak SHA1 hash for security"
+- B402  # import_ftplib, https://github.com/scrapy/scrapy/issues/4180
+- B403  # import_pickle
+- B404  # import_subprocess
+- B406  # import_xml_sax
+- B410  # import_lxml
+- B411  # import_xmlrpclib, https://github.com/PyCQA/bandit/issues/1082
+- B503  # ssl_with_bad_defaults
+- B603  # subprocess_without_shell_equals_true
+- B605  # start_process_with_a_shell
 exclude_dirs: ['tests']
diff --git a/.flake8 b/.flake8
@@ -1,7 +1,7 @@
 [flake8]
 
 max-line-length = 119
-ignore = W503, E203
+ignore = E203, E501, E701, E704, W503
 
 exclude =
     docs/conf.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,24 +1,24 @@
 repos:
 - repo: https://github.com/PyCQA/bandit
-  rev: 1.7.5
+  rev: 1.7.7
   hooks:
   - id: bandit
     args: [-r, -c, .bandit.yml]
 - repo: https://github.com/PyCQA/flake8
-  rev: 6.1.0
+  rev: 7.0.0
   hooks:
   - id: flake8
 - repo: https://github.com/psf/black.git
-  rev: 23.9.1
+  rev: 24.2.0
   hooks:
   - id: black
 - repo: https://github.com/pycqa/isort
-  rev: 5.12.0
+  rev: 5.13.2
   hooks:
   - id: isort
 - repo: https://github.com/adamchainz/blacken-docs
   rev: 1.16.0
   hooks:
   - id: blacken-docs
     additional_dependencies:
-    - black==23.9.1
+    - black==24.2.0
diff --git a/docs/conf.py b/docs/conf.py
@@ -227,7 +227,7 @@
 # A list of regular expressions that match URIs that should not be checked when
 # doing a linkcheck build.
 linkcheck_ignore = [
-    "http://localhost:\d+",
+    r"http://localhost:\d+",
     "http://hg.scrapy.org",
     "http://directory.google.com/",
 ]

diff --git a/docs/topics/addons.rst b/docs/topics/addons.rst
@@ -150,8 +150,7 @@ Access the crawler instance:
         def from_crawler(cls, crawler):
             return cls(crawler)
 
-        def update_settings(self, settings):
-            ...
+        def update_settings(self, settings): ...
 
 Use a fallback component:
 

diff --git a/pylintrc b/pylintrc
@@ -4,21 +4,14 @@ jobs=1  # >1 hides results
 
 [MESSAGES CONTROL]
 disable=abstract-method,
-        anomalous-backslash-in-string,
         arguments-differ,
         arguments-renamed,
         attribute-defined-outside-init,
         bad-classmethod-argument,
-        bad-mcs-classmethod-argument,
         bare-except,
         broad-except,
         broad-exception-raised,
         c-extension-no-member,
-        catching-non-exception,
-        cell-var-from-loop,
-        comparison-with-callable,
-        consider-using-dict-items,
-        consider-using-in,
         consider-using-with,
         cyclic-import,
         dangerous-default-value,
@@ -32,7 +25,6 @@ disable=abstract-method,
         implicit-str-concat,
         import-error,
         import-outside-toplevel,
-        import-self,
         inconsistent-return-statements,
         inherit-non-class,
         invalid-name,
@@ -44,15 +36,14 @@ disable=abstract-method,
         logging-fstring-interpolation,
         logging-not-lazy,
         lost-exception,
-        method-hidden,
         missing-docstring,
         no-else-raise,
         no-else-return,
         no-member,
         no-method-argument,
         no-name-in-module,
         no-self-argument,
-        no-value-for-parameter,
+        no-value-for-parameter,  # https://github.com/pylint-dev/pylint/issues/3268
         not-callable,
         pointless-exception-statement,
         pointless-statement,
@@ -77,23 +68,17 @@ disable=abstract-method,
         too-many-public-methods,
         too-many-return-statements,
         unbalanced-tuple-unpacking,
-        undefined-variable,
-        undefined-loop-variable,
-        unexpected-special-method-signature,
         unnecessary-comprehension,
         unnecessary-dunder-call,
         unnecessary-pass,
         unreachable,
-        unsubscriptable-object,
         unused-argument,
         unused-import,
         unused-private-member,
         unused-variable,
         unused-wildcard-import,
         use-dict-literal,
         used-before-assignment,
-        useless-object-inheritance,  # Required for Python 2 support
         useless-return,
-        useless-super-delegation,
         wildcard-import,
         wrong-import-position
diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
@@ -1,6 +1,7 @@
 """
 Base class for Scrapy commands
 """
+
 import argparse
 import os
 from pathlib import Path

diff --git a/scrapy/commands/shell.py b/scrapy/commands/shell.py
@@ -3,6 +3,7 @@
 
 See documentation in docs/topics/shell.rst
 """
+
 from argparse import Namespace
 from threading import Thread
 from typing import List, Type

diff --git a/scrapy/core/downloader/handlers/__init__.py b/scrapy/core/downloader/handlers/__init__.py
@@ -21,9 +21,9 @@
 class DownloadHandlers:
     def __init__(self, crawler: "Crawler"):
         self._crawler: "Crawler" = crawler
-        self._schemes: Dict[
-            str, Union[str, Callable]
-        ] = {}  # stores acceptable schemes on instancing
+        self._schemes: Dict[str, Union[str, Callable]] = (
+            {}
+        )  # stores acceptable schemes on instancing
         self._handlers: Dict[str, Any] = {}  # stores instanced handlers for schemes
         self._notconfigured: Dict[str, str] = {}  # remembers failed handlers
         handlers: Dict[str, Union[str, Callable]] = without_none_values(

diff --git a/scrapy/core/downloader/handlers/http10.py b/scrapy/core/downloader/handlers/http10.py
@@ -1,5 +1,6 @@
 """Download handlers for http and https schemes
 """
+
 from scrapy.utils.misc import build_from_crawler, load_object
 from scrapy.utils.python import to_unicode
 

diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py
@@ -3,6 +3,7 @@
 
 See documentation in docs/topics/downloader-middleware.rst
 """
+
 from typing import Any, Callable, Generator, List, Union, cast
 
 from twisted.internet.defer import Deferred, inlineCallbacks

diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
@@ -4,6 +4,7 @@
 For more information see docs/topics/architecture.rst
 
 """
+
 import logging
 from time import time
 from typing import (

diff --git a/scrapy/core/http2/stream.py b/scrapy/core/http2/stream.py
@@ -111,17 +111,17 @@ def __init__(
         # Metadata of an HTTP/2 connection stream
         # initialized when stream is instantiated
         self.metadata: Dict = {
-            "request_content_length": 0
-            if self._request.body is None
-            else len(self._request.body),
+            "request_content_length": (
+                0 if self._request.body is None else len(self._request.body)
+            ),
             # Flag to keep track whether the stream has initiated the request
             "request_sent": False,
             # Flag to track whether we have logged about exceeding download warnsize
             "reached_warnsize": False,
             # Each time we send a data frame, we will decrease value by the amount send.
-            "remaining_content_length": 0
-            if self._request.body is None
-            else len(self._request.body),
+            "remaining_content_length": (
+                0 if self._request.body is None else len(self._request.body)
+            ),
             # Flag to keep track whether client (self) have closed this stream
             "stream_closed_local": False,
             # Flag to keep track whether the server has closed the stream

diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
@@ -1,5 +1,6 @@
 """This module implements the Scraper component which parses responses and
 extracts information from them"""
+
 from __future__ import annotations
 
 import logging

diff --git a/scrapy/core/spidermw.py b/scrapy/core/spidermw.py
@@ -3,6 +3,7 @@
 
 See documentation in docs/topics/spider-middleware.rst
 """
+
 import logging
 from inspect import isasyncgenfunction, iscoroutine
 from itertools import islice

diff --git a/scrapy/downloadermiddlewares/defaultheaders.py b/scrapy/downloadermiddlewares/defaultheaders.py
@@ -3,6 +3,7 @@
 
 See documentation in docs/topics/downloader-middleware.rst
 """
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Iterable, Tuple, Union

diff --git a/scrapy/downloadermiddlewares/downloadtimeout.py b/scrapy/downloadermiddlewares/downloadtimeout.py
@@ -3,6 +3,7 @@
 
 See documentation in docs/topics/downloader-middleware.rst
 """
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Union

diff --git a/scrapy/downloadermiddlewares/httpcompression.py b/scrapy/downloadermiddlewares/httpcompression.py
@@ -169,7 +169,7 @@ def _split_encodings(self, content_encoding):
         return to_decode, to_keep
 
     def _decode(self, body: bytes, encoding: bytes, max_size: int) -> bytes:
-        if encoding == b"gzip" or encoding == b"x-gzip":
+        if encoding in {b"gzip", b"x-gzip"}:
             return gunzip(body, max_size=max_size)
         if encoding == b"deflate":
             return _inflate(body, max_size=max_size)

diff --git a/scrapy/downloadermiddlewares/retry.py b/scrapy/downloadermiddlewares/retry.py
@@ -9,6 +9,7 @@
 Failed pages are collected on the scraping process and rescheduled at the end,
 once the spider has finished crawling all regular (non failed) pages.
 """
+
 from __future__ import annotations
 
 import warnings

diff --git a/scrapy/exceptions.py b/scrapy/exceptions.py
@@ -4,6 +4,7 @@
 These exceptions are documented in docs/topics/exceptions.rst. Please don't add
 new exceptions here without documenting them there.
 """
+
 from typing import Any
 
 # Internal

diff --git a/scrapy/extension.py b/scrapy/extension.py
@@ -3,6 +3,7 @@
 
 See documentation in docs/topics/extensions.rst
 """
+
 from scrapy.middleware import MiddlewareManager
 from scrapy.utils.conf import build_component_list
 

diff --git a/scrapy/extensions/corestats.py b/scrapy/extensions/corestats.py
@@ -1,6 +1,7 @@
 """
 Extension for collecting core stats like items scraped and start/finish times
 """
+
 from datetime import datetime, timezone
 
 from scrapy import signals

diff --git a/scrapy/extensions/memusage.py b/scrapy/extensions/memusage.py
@@ -3,6 +3,7 @@
 
 See documentation in docs/topics/extensions.rst
 """
+
 import logging
 import socket
 import sys

diff --git a/scrapy/extensions/postprocessing.py b/scrapy/extensions/postprocessing.py
@@ -1,6 +1,7 @@
 """
 Extension for processing data before they are exported to feeds.
 """
+
 from bz2 import BZ2File
 from gzip import GzipFile
 from io import IOBase

diff --git a/scrapy/http/headers.py b/scrapy/http/headers.py
@@ -113,7 +113,9 @@ def items(self) -> Iterable[Tuple[bytes, List[bytes]]]:  # type: ignore[override
         return ((k, self.getlist(k)) for k in self.keys())
 
     def values(self) -> List[Optional[bytes]]:  # type: ignore[override]
-        return [self[k] for k in self.keys()]
+        return [
+            self[k] for k in self.keys()  # pylint: disable=consider-using-dict-items
+        ]
 
     def to_string(self) -> bytes:
         # cast() can be removed if the headers_dict_to_raw() hint is improved

diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py
@@ -4,6 +4,7 @@
 
 See documentation in docs/topics/request-response.rst
 """
+
 import inspect
 from typing import (
     Any,
@@ -231,12 +232,16 @@ def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> Dict[str, Any]
         """
         d = {
             "url": self.url,  # urls are safe (safe_string_url)
-            "callback": _find_method(spider, self.callback)
-            if callable(self.callback)
-            else self.callback,
-            "errback": _find_method(spider, self.errback)
-            if callable(self.errback)
-            else self.errback,
+            "callback": (
+                _find_method(spider, self.callback)
+                if callable(self.callback)
+                else self.callback
+            ),
+            "errback": (
+                _find_method(spider, self.errback)
+                if callable(self.errback)
+                else self.errback
+            ),
             "headers": dict(self.headers),
         }
         for attr in self.attributes:

diff --git a/scrapy/http/request/rpc.py b/scrapy/http/request/rpc.py
@@ -4,6 +4,7 @@
 
 See documentation in docs/topics/request-response.rst
 """
+
 import xmlrpc.client as xmlrpclib
 from typing import Any, Optional
 

diff --git a/scrapy/http/response/__init__.py b/scrapy/http/response/__init__.py
@@ -4,6 +4,7 @@
 
 See documentation in docs/topics/request-response.rst
 """
+
 from __future__ import annotations
 
 from ipaddress import IPv4Address, IPv6Address

diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py
@@ -4,6 +4,7 @@
 
 See documentation in docs/topics/request-response.rst
 """
+
 from __future__ import annotations
 
 import json