Skip to content

Commit

Permalink
Merge pull request #6253 from wRAR/update-tools
Browse files Browse the repository at this point in the history
Update tool versions, fix some of the pylint problems
  • Loading branch information
wRAR committed Feb 28, 2024
2 parents 415c474 + 4f9dd99 commit 532cc8a
Show file tree
Hide file tree
Showing 75 changed files with 156 additions and 156 deletions.
35 changes: 17 additions & 18 deletions .bandit.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
skips:
- B101
- B113 # https://github.com/PyCQA/bandit/issues/1010
- B105
- B301
- B303
- B307
- B311
- B320
- B321
- B324
- B402 # https://github.com/scrapy/scrapy/issues/4180
- B403
- B404
- B406
- B410
- B503
- B603
- B605
- B101 # assert_used
- B105 # hardcoded_password_string
- B301 # pickle
- B307 # eval
- B311 # random
- B320 # xml_bad_etree
- B321 # ftplib, https://github.com/scrapy/scrapy/issues/4180
- B324 # hashlib "Use of weak SHA1 hash for security"
- B402 # import_ftplib, https://github.com/scrapy/scrapy/issues/4180
- B403 # import_pickle
- B404 # import_subprocess
- B406 # import_xml_sax
- B410 # import_lxml
- B411 # import_xmlrpclib, https://github.com/PyCQA/bandit/issues/1082
- B503 # ssl_with_bad_defaults
- B603 # subprocess_without_shell_equals_true
- B605 # start_process_with_a_shell
exclude_dirs: ['tests']
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[flake8]

max-line-length = 119
ignore = W503, E203
ignore = E203, E501, E701, E704, W503

exclude =
docs/conf.py
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
repos:
- repo: https://github.com/PyCQA/bandit
rev: 1.7.5
rev: 1.7.7
hooks:
- id: bandit
args: [-r, -c, .bandit.yml]
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
rev: 7.0.0
hooks:
- id: flake8
- repo: https://github.com/psf/black.git
rev: 23.9.1
rev: 24.2.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/adamchainz/blacken-docs
rev: 1.16.0
hooks:
- id: blacken-docs
additional_dependencies:
- black==23.9.1
- black==24.2.0
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@
# A list of regular expressions that match URIs that should not be checked when
# doing a linkcheck build.
linkcheck_ignore = [
"http://localhost:\d+",
r"http://localhost:\d+",
"http://hg.scrapy.org",
"http://directory.google.com/",
]
Expand Down
3 changes: 1 addition & 2 deletions docs/topics/addons.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ Access the crawler instance:
def from_crawler(cls, crawler):
return cls(crawler)
def update_settings(self, settings):
...
def update_settings(self, settings): ...
Use a fallback component:

Expand Down
17 changes: 1 addition & 16 deletions pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,14 @@ jobs=1 # >1 hides results

[MESSAGES CONTROL]
disable=abstract-method,
anomalous-backslash-in-string,
arguments-differ,
arguments-renamed,
attribute-defined-outside-init,
bad-classmethod-argument,
bad-mcs-classmethod-argument,
bare-except,
broad-except,
broad-exception-raised,
c-extension-no-member,
catching-non-exception,
cell-var-from-loop,
comparison-with-callable,
consider-using-dict-items,
consider-using-in,
consider-using-with,
cyclic-import,
dangerous-default-value,
Expand All @@ -32,7 +25,6 @@ disable=abstract-method,
implicit-str-concat,
import-error,
import-outside-toplevel,
import-self,
inconsistent-return-statements,
inherit-non-class,
invalid-name,
Expand All @@ -44,15 +36,14 @@ disable=abstract-method,
logging-fstring-interpolation,
logging-not-lazy,
lost-exception,
method-hidden,
missing-docstring,
no-else-raise,
no-else-return,
no-member,
no-method-argument,
no-name-in-module,
no-self-argument,
no-value-for-parameter,
no-value-for-parameter, # https://github.com/pylint-dev/pylint/issues/3268
not-callable,
pointless-exception-statement,
pointless-statement,
Expand All @@ -77,23 +68,17 @@ disable=abstract-method,
too-many-public-methods,
too-many-return-statements,
unbalanced-tuple-unpacking,
undefined-variable,
undefined-loop-variable,
unexpected-special-method-signature,
unnecessary-comprehension,
unnecessary-dunder-call,
unnecessary-pass,
unreachable,
unsubscriptable-object,
unused-argument,
unused-import,
unused-private-member,
unused-variable,
unused-wildcard-import,
use-dict-literal,
used-before-assignment,
useless-object-inheritance, # Required for Python 2 support
useless-return,
useless-super-delegation,
wildcard-import,
wrong-import-position
1 change: 1 addition & 0 deletions scrapy/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Base class for Scrapy commands
"""

import argparse
import os
from pathlib import Path
Expand Down
1 change: 1 addition & 0 deletions scrapy/commands/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
See documentation in docs/topics/shell.rst
"""

from argparse import Namespace
from threading import Thread
from typing import List, Type
Expand Down
6 changes: 3 additions & 3 deletions scrapy/core/downloader/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
class DownloadHandlers:
def __init__(self, crawler: "Crawler"):
self._crawler: "Crawler" = crawler
self._schemes: Dict[
str, Union[str, Callable]
] = {} # stores acceptable schemes on instancing
self._schemes: Dict[str, Union[str, Callable]] = (
{}
) # stores acceptable schemes on instancing
self._handlers: Dict[str, Any] = {} # stores instanced handlers for schemes
self._notconfigured: Dict[str, str] = {} # remembers failed handlers
handlers: Dict[str, Union[str, Callable]] = without_none_values(
Expand Down
1 change: 1 addition & 0 deletions scrapy/core/downloader/handlers/http10.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Download handlers for http and https schemes
"""

from scrapy.utils.misc import build_from_crawler, load_object
from scrapy.utils.python import to_unicode

Expand Down
1 change: 1 addition & 0 deletions scrapy/core/downloader/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
See documentation in docs/topics/downloader-middleware.rst
"""

from typing import Any, Callable, Generator, List, Union, cast

from twisted.internet.defer import Deferred, inlineCallbacks
Expand Down
1 change: 1 addition & 0 deletions scrapy/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
For more information see docs/topics/architecture.rst
"""

import logging
from time import time
from typing import (
Expand Down
12 changes: 6 additions & 6 deletions scrapy/core/http2/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,17 @@ def __init__(
# Metadata of an HTTP/2 connection stream
# initialized when stream is instantiated
self.metadata: Dict = {
"request_content_length": 0
if self._request.body is None
else len(self._request.body),
"request_content_length": (
0 if self._request.body is None else len(self._request.body)
),
# Flag to keep track whether the stream has initiated the request
"request_sent": False,
# Flag to track whether we have logged about exceeding download warnsize
"reached_warnsize": False,
# Each time we send a data frame, we will decrease value by the amount send.
"remaining_content_length": 0
if self._request.body is None
else len(self._request.body),
"remaining_content_length": (
0 if self._request.body is None else len(self._request.body)
),
# Flag to keep track whether client (self) have closed this stream
"stream_closed_local": False,
# Flag to keep track whether the server has closed the stream
Expand Down
1 change: 1 addition & 0 deletions scrapy/core/scraper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This module implements the Scraper component which parses responses and
extracts information from them"""

from __future__ import annotations

import logging
Expand Down
1 change: 1 addition & 0 deletions scrapy/core/spidermw.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
See documentation in docs/topics/spider-middleware.rst
"""

import logging
from inspect import isasyncgenfunction, iscoroutine
from itertools import islice
Expand Down
1 change: 1 addition & 0 deletions scrapy/downloadermiddlewares/defaultheaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
See documentation in docs/topics/downloader-middleware.rst
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Iterable, Tuple, Union
Expand Down
1 change: 1 addition & 0 deletions scrapy/downloadermiddlewares/downloadtimeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
See documentation in docs/topics/downloader-middleware.rst
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Union
Expand Down
2 changes: 1 addition & 1 deletion scrapy/downloadermiddlewares/httpcompression.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _split_encodings(self, content_encoding):
return to_decode, to_keep

def _decode(self, body: bytes, encoding: bytes, max_size: int) -> bytes:
if encoding == b"gzip" or encoding == b"x-gzip":
if encoding in {b"gzip", b"x-gzip"}:
return gunzip(body, max_size=max_size)
if encoding == b"deflate":
return _inflate(body, max_size=max_size)
Expand Down
1 change: 1 addition & 0 deletions scrapy/downloadermiddlewares/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Failed pages are collected on the scraping process and rescheduled at the end,
once the spider has finished crawling all regular (non failed) pages.
"""

from __future__ import annotations

import warnings
Expand Down
1 change: 1 addition & 0 deletions scrapy/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
These exceptions are documented in docs/topics/exceptions.rst. Please don't add
new exceptions here without documenting them there.
"""

from typing import Any

# Internal
Expand Down
1 change: 1 addition & 0 deletions scrapy/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
See documentation in docs/topics/extensions.rst
"""

from scrapy.middleware import MiddlewareManager
from scrapy.utils.conf import build_component_list

Expand Down
1 change: 1 addition & 0 deletions scrapy/extensions/corestats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Extension for collecting core stats like items scraped and start/finish times
"""

from datetime import datetime, timezone

from scrapy import signals
Expand Down
1 change: 1 addition & 0 deletions scrapy/extensions/memusage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
See documentation in docs/topics/extensions.rst
"""

import logging
import socket
import sys
Expand Down
1 change: 1 addition & 0 deletions scrapy/extensions/postprocessing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Extension for processing data before they are exported to feeds.
"""

from bz2 import BZ2File
from gzip import GzipFile
from io import IOBase
Expand Down
4 changes: 3 additions & 1 deletion scrapy/http/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ def items(self) -> Iterable[Tuple[bytes, List[bytes]]]: # type: ignore[override
return ((k, self.getlist(k)) for k in self.keys())

def values(self) -> List[Optional[bytes]]: # type: ignore[override]
return [self[k] for k in self.keys()]
return [
self[k] for k in self.keys() # pylint: disable=consider-using-dict-items
]

def to_string(self) -> bytes:
# cast() can be removed if the headers_dict_to_raw() hint is improved
Expand Down
17 changes: 11 additions & 6 deletions scrapy/http/request/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
See documentation in docs/topics/request-response.rst
"""

import inspect
from typing import (
Any,
Expand Down Expand Up @@ -231,12 +232,16 @@ def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> Dict[str, Any]
"""
d = {
"url": self.url, # urls are safe (safe_string_url)
"callback": _find_method(spider, self.callback)
if callable(self.callback)
else self.callback,
"errback": _find_method(spider, self.errback)
if callable(self.errback)
else self.errback,
"callback": (
_find_method(spider, self.callback)
if callable(self.callback)
else self.callback
),
"errback": (
_find_method(spider, self.errback)
if callable(self.errback)
else self.errback
),
"headers": dict(self.headers),
}
for attr in self.attributes:
Expand Down
1 change: 1 addition & 0 deletions scrapy/http/request/rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
See documentation in docs/topics/request-response.rst
"""

import xmlrpc.client as xmlrpclib
from typing import Any, Optional

Expand Down
1 change: 1 addition & 0 deletions scrapy/http/response/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
See documentation in docs/topics/request-response.rst
"""

from __future__ import annotations

from ipaddress import IPv4Address, IPv6Address
Expand Down
1 change: 1 addition & 0 deletions scrapy/http/response/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
See documentation in docs/topics/request-response.rst
"""

from __future__ import annotations

import json
Expand Down

0 comments on commit 532cc8a

Please sign in to comment.