Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve type hints for copy() and replace() in Request and Response. #6143

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
3 changes: 3 additions & 0 deletions .github/workflows/checks.yml
Expand Up @@ -18,6 +18,9 @@ jobs:
- python-version: 3.8
env:
TOXENV: typing
- python-version: 3.8
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should run it in the latest Python instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also run the typing check on the latest Python then, as I think they check the same thing? In that case we will stop checking (and so supporting) that the type hints work as expected on older Python versions and I thought it's a nice thing to support.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I mean, for users that use Scrapy on older Python versions and run mypy on their code)

env:
TOXENV: typing-tests
- python-version: "3.11" # Keep in sync with .readthedocs.yml
env:
TOXENV: docs
Expand Down
6 changes: 3 additions & 3 deletions scrapy/downloadermiddlewares/httpcompression.py
Expand Up @@ -2,7 +2,7 @@

import io
import zlib
from typing import TYPE_CHECKING, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

from scrapy import Request, Spider
from scrapy.crawler import Crawler
Expand Down Expand Up @@ -74,12 +74,12 @@ def process_response(
respcls = responsetypes.from_args(
headers=response.headers, url=response.url, body=decoded_body
)
kwargs = dict(cls=respcls, body=decoded_body)
kwargs: Dict[str, Any] = dict(body=decoded_body)
if issubclass(respcls, TextResponse):
# force recalculating the encoding until we make sure the
# responsetypes guessing is reliable
kwargs["encoding"] = None
response = response.replace(**kwargs)
response = response.replace(cls=respcls, **kwargs)
if not content_encoding:
del response.headers["Content-Encoding"]

Expand Down
1 change: 1 addition & 0 deletions scrapy/downloadermiddlewares/redirect.py
Expand Up @@ -27,6 +27,7 @@ def _build_redirect_request(
redirect_request = source_request.replace(
url=url,
**kwargs,
cls=None,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this line the type of redirect_request is inferred to be Any.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a test for this case? I guess it's **kwargs handling?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's because it cannot know that kwargs passed to _build_redirect_request doesn't contain cls.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, what test do you mean?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If kwargs are passed, what's the reveal_type of the result

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added tests that check that the result is Any unless cls was also passed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it's a useful thing to check :)

cookies=None,
)
if "Cookie" in redirect_request.headers:
Expand Down
37 changes: 29 additions & 8 deletions scrapy/http/request/__init__.py
Expand Up @@ -4,8 +4,11 @@

See documentation in docs/topics/request-response.rst
"""
from __future__ import annotations

import inspect
from typing import (
TYPE_CHECKING,
Any,
AnyStr,
Callable,
Expand All @@ -19,7 +22,7 @@
Type,
TypeVar,
Union,
cast,
overload,
)

from w3lib.url import safe_url_string
Expand All @@ -31,6 +34,11 @@
from scrapy.utils.trackref import object_ref
from scrapy.utils.url import escape_ajax

if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self


RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")


Expand Down Expand Up @@ -173,23 +181,36 @@ def encoding(self) -> str:
def __repr__(self) -> str:
return f"<{self.method} {self.url}>"

def copy(self) -> "Request":
def copy(self) -> Self:
return self.replace()

def replace(self, *args: Any, **kwargs: Any) -> "Request":
@overload
def replace(
self, *args: Any, cls: Type[RequestTypeVar], **kwargs: Any
) -> RequestTypeVar:
...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self:
...

def replace(
self, *args: Any, cls: Optional[Type[Request]] = None, **kwargs: Any
) -> Request:
"""Create a new Request with the same attributes except for those given new values"""
for x in self.attributes:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop("cls", self.__class__)
return cast(Request, cls(*args, **kwargs))
if cls is None:
cls = self.__class__
return cls(*args, **kwargs)

@classmethod
def from_curl(
cls: Type[RequestTypeVar],
cls,
curl_command: str,
ignore_unknown_options: bool = True,
**kwargs: Any,
) -> RequestTypeVar:
) -> Self:
"""Create a Request object from a string containing a `cURL
<https://curl.haxx.se/>`_ command. It populates the HTTP method, the
URL, the headers, the cookies and the body. It accepts the same
Expand Down Expand Up @@ -221,7 +242,7 @@ def from_curl(
request_kwargs.update(kwargs)
return cls(**request_kwargs)

def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> Dict[str, Any]:
def to_dict(self, *, spider: Optional[scrapy.Spider] = None) -> Dict[str, Any]:
"""Return a dictionary containing the Request's data.

Use :func:`~scrapy.utils.request.request_from_dict` to convert back into a :class:`~scrapy.Request` object.
Expand Down
26 changes: 22 additions & 4 deletions scrapy/http/request/json_request.py
Expand Up @@ -5,12 +5,18 @@
See documentation in docs/topics/request-response.rst
"""

from __future__ import annotations

import copy
import json
import warnings
from typing import Any, Optional, Tuple
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, overload

from scrapy.http.request import Request, RequestTypeVar

from scrapy.http.request import Request
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self


class JsonRequest(Request):
Expand Down Expand Up @@ -44,7 +50,19 @@ def __init__(
def dumps_kwargs(self) -> dict:
return self._dumps_kwargs

def replace(self, *args: Any, **kwargs: Any) -> Request:
@overload
def replace(
self, *args: Any, cls: Type[RequestTypeVar], **kwargs: Any
) -> RequestTypeVar:
wRAR marked this conversation as resolved.
Show resolved Hide resolved
...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self:
...

def replace(
self, *args: Any, cls: Optional[Type[Request]] = None, **kwargs: Any
) -> Request:
body_passed = kwargs.get("body", None) is not None
data = kwargs.pop("data", None)
data_passed = data is not None
Expand All @@ -54,7 +72,7 @@ def replace(self, *args: Any, **kwargs: Any) -> Request:
elif not body_passed and data_passed:
kwargs["body"] = self._dumps(data)

return super().replace(*args, **kwargs)
return super().replace(*args, cls=cls, **kwargs)

def _dumps(self, data: dict) -> str:
"""Convert to JSON"""
Expand Down
31 changes: 26 additions & 5 deletions scrapy/http/response/__init__.py
Expand Up @@ -19,8 +19,10 @@
Mapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
overload,
)
from urllib.parse import urljoin

Expand All @@ -33,9 +35,15 @@
from scrapy.utils.trackref import object_ref

if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self

from scrapy.selector import SelectorList


ResponseTypeVar = TypeVar("ResponseTypeVar", bound="Response")


class Response(object_ref):
"""An object that represents an HTTP response, which is usually
downloaded (by the Downloader) and fed to the Spiders for processing.
Expand Down Expand Up @@ -132,16 +140,29 @@ def _set_body(self, body: Optional[bytes]) -> None:
def __repr__(self) -> str:
return f"<{self.status} {self.url}>"

def copy(self) -> Response:
def copy(self) -> Self:
"""Return a copy of this Response"""
return self.replace()

def replace(self, *args: Any, **kwargs: Any) -> Response:
@overload
def replace(
self, *args: Any, cls: Type[ResponseTypeVar], **kwargs: Any
) -> ResponseTypeVar:
...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self:
...

def replace(
self, *args: Any, cls: Optional[Type[Response]] = None, **kwargs: Any
) -> Response:
"""Create a new Response with the same attributes except for those given new values"""
for x in self.attributes:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop("cls", self.__class__)
return cast(Response, cls(*args, **kwargs))
if cls is None:
cls = self.__class__
return cls(*args, **kwargs)

def urljoin(self, url: str) -> str:
"""Join this Response's url with a possible relative url to form an
Expand Down
66 changes: 66 additions & 0 deletions tests_typing/test_http_request.mypy-testing
@@ -0,0 +1,66 @@
import pytest

from scrapy import Request
from scrapy.http import JsonRequest


class MyRequest(Request):
pass


class MyRequest2(Request):
pass


@pytest.mark.mypy_testing
def mypy_test_headers():
Request("data:,", headers=1) # E: Argument "headers" to "Request" has incompatible type "int"; expected "Union[Mapping[str, Any], Iterable[Tuple[str, Any]], None]"
Request("data:,", headers=None)
Request("data:,", headers={})
Request("data:,", headers=[])
Request("data:,", headers={"foo": "bar"})
Request("data:,", headers={b"foo": "bar"})
Request("data:,", headers={"foo": b"bar"})
Request("data:,", headers=[("foo", "bar")])
Request("data:,", headers=[(b"foo", "bar")])
Request("data:,", headers=[("foo", b"bar")])


@pytest.mark.mypy_testing
def mypy_test_copy():
req = Request("data:,")
reveal_type(req) # R: scrapy.http.request.Request
req_copy = req.copy()
reveal_type(req_copy) # R: scrapy.http.request.Request

req = MyRequest("data:,")
reveal_type(req) # R: __main__.MyRequest
req_copy = req.copy()
reveal_type(req_copy) # R: __main__.MyRequest


@pytest.mark.mypy_testing
def mypy_test_replace():
req = Request("data:,")
reveal_type(req) # R: scrapy.http.request.Request
req_copy = req.replace(body=b"a")
reveal_type(req_copy) # R: scrapy.http.request.Request

req = MyRequest("data:,")
reveal_type(req) # R: __main__.MyRequest
req_copy = req.replace(body=b"a")
reveal_type(req_copy) # R: __main__.MyRequest
req_copy2 = req.replace(body=b"a", cls=MyRequest2)
reveal_type(req_copy2) # R: __main__.MyRequest2


@pytest.mark.mypy_testing
def mypy_test_jsonrequest_copy_replace():
req = JsonRequest("data:,")
reveal_type(req) # R: scrapy.http.request.json_request.JsonRequest
req_copy = req.copy()
reveal_type(req_copy) # R: scrapy.http.request.json_request.JsonRequest
req_copy = req.replace(body=b"a")
reveal_type(req_copy) # R: scrapy.http.request.json_request.JsonRequest
req_copy_my = req.replace(body=b"a", cls=MyRequest)
reveal_type(req_copy_my) # R: __main__.MyRequest
45 changes: 45 additions & 0 deletions tests_typing/test_http_response.mypy-testing
@@ -0,0 +1,45 @@
import pytest

from scrapy.http import HtmlResponse, Response, TextResponse


@pytest.mark.mypy_testing
def mypy_test_headers():
Response("data:,", headers=1) # E: Argument "headers" to "Response" has incompatible type "int"; expected "Union[Mapping[str, Any], Iterable[Tuple[str, Any]], None]"
Response("data:,", headers=None)
Response("data:,", headers={})
Response("data:,", headers=[])
Response("data:,", headers={"foo": "bar"})
Response("data:,", headers={b"foo": "bar"})
Response("data:,", headers={"foo": b"bar"})
Response("data:,", headers=[("foo", "bar")])
Response("data:,", headers=[(b"foo", "bar")])
Response("data:,", headers=[("foo", b"bar")])


@pytest.mark.mypy_testing
def mypy_test_copy():
resp = Response("data:,")
reveal_type(resp) # R: scrapy.http.response.Response
resp_copy = resp.copy()
reveal_type(resp_copy) # R: scrapy.http.response.Response

resp = HtmlResponse("data:,")
reveal_type(resp) # R: scrapy.http.response.html.HtmlResponse
resp_copy = resp.copy()
reveal_type(resp_copy) # R: scrapy.http.response.html.HtmlResponse


@pytest.mark.mypy_testing
def mypy_test_replace():
resp = Response("data:,")
reveal_type(resp) # R: scrapy.http.response.Response
resp_copy = resp.replace(body=b"a")
reveal_type(resp_copy) # R: scrapy.http.response.Response

resp = HtmlResponse("data:,")
reveal_type(resp) # R: scrapy.http.response.html.HtmlResponse
resp_copy = resp.replace(body=b"a")
reveal_type(resp_copy) # R: scrapy.http.response.html.HtmlResponse
resp_copy2 = resp.replace(body=b"a", cls=TextResponse)
reveal_type(resp_copy2) # R: scrapy.http.response.text.TextResponse
8 changes: 8 additions & 0 deletions tox.ini
Expand Up @@ -46,6 +46,14 @@ deps =
commands =
mypy {posargs: scrapy tests}

[testenv:typing-tests]
wRAR marked this conversation as resolved.
Show resolved Hide resolved
deps =
-rtests/requirements.txt
{[testenv:typing]deps}
pytest-mypy-testing==0.1.1
commands =
pytest {posargs: tests_typing}

[testenv:pre-commit]
basepython = python3
deps =
Expand Down