Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve type hints for copy() and replace() in Request and Response. #6143

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
6 changes: 3 additions & 3 deletions scrapy/downloadermiddlewares/httpcompression.py
Expand Up @@ -3,7 +3,7 @@
import warnings
from itertools import chain
from logging import getLogger
from typing import TYPE_CHECKING, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
Expand Down Expand Up @@ -138,12 +138,12 @@ def process_response(
respcls = responsetypes.from_args(
headers=response.headers, url=response.url, body=decoded_body
)
kwargs = {"cls": respcls, "body": decoded_body}
kwargs: Dict[str, Any] = {"body": decoded_body}
if issubclass(respcls, TextResponse):
# force recalculating the encoding until we make sure the
# responsetypes guessing is reliable
kwargs["encoding"] = None
response = response.replace(**kwargs)
response = response.replace(cls=respcls, **kwargs)
if not content_encoding:
del response.headers["Content-Encoding"]

Expand Down
1 change: 1 addition & 0 deletions scrapy/downloadermiddlewares/redirect.py
Expand Up @@ -27,6 +27,7 @@ def _build_redirect_request(
redirect_request = source_request.replace(
url=url,
**kwargs,
cls=None,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this line the type of redirect_request is inferred to be Any.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a test for this case? I guess it's **kwargs handling?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's because it cannot know that kwargs passed to _build_redirect_request doesn't contain cls.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, what test do you mean?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If kwargs are passed, what's the reveal_type of the result

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added tests that check that the result is Any unless cls was also passed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it's a useful thing to check :)

cookies=None,
)
has_cookie_header = "Cookie" in redirect_request.headers
Expand Down
28 changes: 22 additions & 6 deletions scrapy/http/request/__init__.py
Expand Up @@ -20,8 +20,10 @@
NoReturn,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
overload,
)

from w3lib.url import safe_url_string
Expand All @@ -38,6 +40,9 @@
from typing_extensions import Self


RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")


def NO_CALLBACK(*args: Any, **kwargs: Any) -> NoReturn:
"""When assigned to the ``callback`` parameter of
:class:`~scrapy.http.Request`, it indicates that the request is not meant
Expand Down Expand Up @@ -177,15 +182,26 @@ def encoding(self) -> str:
def __repr__(self) -> str:
return f"<{self.method} {self.url}>"

def copy(self) -> "Request":
def copy(self) -> Self:
return self.replace()

def replace(self, *args: Any, **kwargs: Any) -> "Request":
@overload
def replace(
self, *args: Any, cls: Type[RequestTypeVar], **kwargs: Any
) -> RequestTypeVar: ...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self: ...

def replace(
self, *args: Any, cls: Optional[Type[Request]] = None, **kwargs: Any
) -> Request:
"""Create a new Request with the same attributes except for those given new values"""
for x in self.attributes:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop("cls", self.__class__)
return cast(Request, cls(*args, **kwargs))
if cls is None:
cls = self.__class__
return cls(*args, **kwargs)

@classmethod
def from_curl(
Expand Down Expand Up @@ -225,7 +241,7 @@ def from_curl(
request_kwargs.update(kwargs)
return cls(**request_kwargs)

def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> Dict[str, Any]:
def to_dict(self, *, spider: Optional[scrapy.Spider] = None) -> Dict[str, Any]:
"""Return a dictionary containing the Request's data.

Use :func:`~scrapy.utils.request.request_from_dict` to convert back into a :class:`~scrapy.Request` object.
Expand Down
24 changes: 20 additions & 4 deletions scrapy/http/request/json_request.py
Expand Up @@ -5,12 +5,18 @@
See documentation in docs/topics/request-response.rst
"""

from __future__ import annotations

import copy
import json
import warnings
from typing import Any, Optional, Tuple
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, overload

from scrapy.http.request import Request, RequestTypeVar

from scrapy.http.request import Request
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self


class JsonRequest(Request):
Expand Down Expand Up @@ -44,7 +50,17 @@ def __init__(
def dumps_kwargs(self) -> dict:
return self._dumps_kwargs

def replace(self, *args: Any, **kwargs: Any) -> Request:
@overload
def replace(
self, *args: Any, cls: Type[RequestTypeVar], **kwargs: Any
) -> RequestTypeVar: ...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self: ...

def replace(
self, *args: Any, cls: Optional[Type[Request]] = None, **kwargs: Any
) -> Request:
body_passed = kwargs.get("body", None) is not None
data = kwargs.pop("data", None)
data_passed = data is not None
Expand All @@ -54,7 +70,7 @@ def replace(self, *args: Any, **kwargs: Any) -> Request:
elif not body_passed and data_passed:
kwargs["body"] = self._dumps(data)

return super().replace(*args, **kwargs)
return super().replace(*args, cls=cls, **kwargs)

def _dumps(self, data: dict) -> str:
"""Convert to JSON"""
Expand Down
29 changes: 24 additions & 5 deletions scrapy/http/response/__init__.py
Expand Up @@ -20,8 +20,10 @@
Mapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
overload,
)
from urllib.parse import urljoin

Expand All @@ -34,9 +36,15 @@
from scrapy.utils.trackref import object_ref

if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self

from scrapy.selector import SelectorList


ResponseTypeVar = TypeVar("ResponseTypeVar", bound="Response")


class Response(object_ref):
"""An object that represents an HTTP response, which is usually
downloaded (by the Downloader) and fed to the Spiders for processing.
Expand Down Expand Up @@ -133,16 +141,27 @@ def _set_body(self, body: Optional[bytes]) -> None:
def __repr__(self) -> str:
return f"<{self.status} {self.url}>"

def copy(self) -> Response:
def copy(self) -> Self:
"""Return a copy of this Response"""
return self.replace()

def replace(self, *args: Any, **kwargs: Any) -> Response:
@overload
def replace(
self, *args: Any, cls: Type[ResponseTypeVar], **kwargs: Any
) -> ResponseTypeVar: ...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self: ...

def replace(
self, *args: Any, cls: Optional[Type[Response]] = None, **kwargs: Any
) -> Response:
"""Create a new Response with the same attributes except for those given new values"""
for x in self.attributes:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop("cls", self.__class__)
return cast(Response, cls(*args, **kwargs))
if cls is None:
cls = self.__class__
return cls(*args, **kwargs)

def urljoin(self, url: str) -> str:
"""Join this Response's url with a possible relative url to form an
Expand Down
80 changes: 80 additions & 0 deletions tests_typing/test_http_request.mypy-testing
@@ -0,0 +1,80 @@
from typing import Any, Dict

import pytest

from scrapy import Request
from scrapy.http import JsonRequest


class MyRequest(Request):
pass


class MyRequest2(Request):
pass


@pytest.mark.mypy_testing
def mypy_test_headers():
Request("data:,", headers=1) # E: Argument "headers" to "Request" has incompatible type "int"; expected "Union[Mapping[str, Any], Iterable[Tuple[str, Any]], None]"
Request("data:,", headers=None)
Request("data:,", headers={})
Request("data:,", headers=[])
Request("data:,", headers={"foo": "bar"})
Request("data:,", headers={b"foo": "bar"})
Request("data:,", headers={"foo": b"bar"})
Request("data:,", headers=[("foo", "bar")])
Request("data:,", headers=[(b"foo", "bar")])
Request("data:,", headers=[("foo", b"bar")])


@pytest.mark.mypy_testing
def mypy_test_copy():
req = Request("data:,")
reveal_type(req) # R: scrapy.http.request.Request
req_copy = req.copy()
reveal_type(req_copy) # R: scrapy.http.request.Request


@pytest.mark.mypy_testing
def mypy_test_copy_subclass():
req = MyRequest("data:,")
reveal_type(req) # R: __main__.MyRequest
req_copy = req.copy()
reveal_type(req_copy) # R: __main__.MyRequest


@pytest.mark.mypy_testing
def mypy_test_replace():
req = Request("data:,")
reveal_type(req) # R: scrapy.http.request.Request
req_copy = req.replace(body=b"a")
reveal_type(req_copy) # R: scrapy.http.request.Request
kwargs: Dict[str, Any] = {}
req_copy2 = req.replace(body=b"a", **kwargs)
reveal_type(req_copy2) # R: Any


@pytest.mark.mypy_testing
def mypy_test_replace_subclass():
req = MyRequest("data:,")
reveal_type(req) # R: __main__.MyRequest
req_copy = req.replace(body=b"a")
reveal_type(req_copy) # R: __main__.MyRequest
req_copy2 = req.replace(body=b"a", cls=MyRequest2)
reveal_type(req_copy2) # R: __main__.MyRequest2
kwargs: Dict[str, Any] = {}
req_copy3 = req.replace(body=b"a", cls=MyRequest2, **kwargs)
reveal_type(req_copy3) # R: __main__.MyRequest2


@pytest.mark.mypy_testing
def mypy_test_jsonrequest_copy_replace():
req = JsonRequest("data:,")
reveal_type(req) # R: scrapy.http.request.json_request.JsonRequest
req_copy = req.copy()
reveal_type(req_copy) # R: scrapy.http.request.json_request.JsonRequest
req_copy = req.replace(body=b"a")
reveal_type(req_copy) # R: scrapy.http.request.json_request.JsonRequest
req_copy_my = req.replace(body=b"a", cls=MyRequest)
reveal_type(req_copy_my) # R: __main__.MyRequest
59 changes: 59 additions & 0 deletions tests_typing/test_http_response.mypy-testing
@@ -0,0 +1,59 @@
from typing import Any, Dict

import pytest

from scrapy.http import HtmlResponse, Response, TextResponse


@pytest.mark.mypy_testing
def mypy_test_headers():
Response("data:,", headers=1) # E: Argument "headers" to "Response" has incompatible type "int"; expected "Union[Mapping[str, Any], Iterable[Tuple[str, Any]], None]"
Response("data:,", headers=None)
Response("data:,", headers={})
Response("data:,", headers=[])
Response("data:,", headers={"foo": "bar"})
Response("data:,", headers={b"foo": "bar"})
Response("data:,", headers={"foo": b"bar"})
Response("data:,", headers=[("foo", "bar")])
Response("data:,", headers=[(b"foo", "bar")])
Response("data:,", headers=[("foo", b"bar")])


@pytest.mark.mypy_testing
def mypy_test_copy():
resp = Response("data:,")
reveal_type(resp) # R: scrapy.http.response.Response
resp_copy = resp.copy()
reveal_type(resp_copy) # R: scrapy.http.response.Response


@pytest.mark.mypy_testing
def mypy_test_copy_subclass():
resp = HtmlResponse("data:,")
reveal_type(resp) # R: scrapy.http.response.html.HtmlResponse
resp_copy = resp.copy()
reveal_type(resp_copy) # R: scrapy.http.response.html.HtmlResponse


@pytest.mark.mypy_testing
def mypy_test_replace():
resp = Response("data:,")
reveal_type(resp) # R: scrapy.http.response.Response
resp_copy = resp.replace(body=b"a")
reveal_type(resp_copy) # R: scrapy.http.response.Response
kwargs: Dict[str, Any] = {}
resp_copy2 = resp.replace(body=b"a", **kwargs)
reveal_type(resp_copy2) # R: Any


@pytest.mark.mypy_testing
def mypy_test_replace_subclass():
resp = HtmlResponse("data:,")
reveal_type(resp) # R: scrapy.http.response.html.HtmlResponse
resp_copy = resp.replace(body=b"a")
reveal_type(resp_copy) # R: scrapy.http.response.html.HtmlResponse
resp_copy2 = resp.replace(body=b"a", cls=TextResponse)
reveal_type(resp_copy2) # R: scrapy.http.response.text.TextResponse
kwargs: Dict[str, Any] = {}
resp_copy3 = resp.replace(body=b"a", cls=TextResponse, **kwargs)
reveal_type(resp_copy3) # R: scrapy.http.response.text.TextResponse