Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve type hints for copy() and replace() in Request and Response. #6143

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
6 changes: 3 additions & 3 deletions scrapy/downloadermiddlewares/httpcompression.py
Expand Up @@ -2,7 +2,7 @@

import io
import zlib
from typing import TYPE_CHECKING, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

from scrapy import Request, Spider
from scrapy.crawler import Crawler
Expand Down Expand Up @@ -74,12 +74,12 @@ def process_response(
respcls = responsetypes.from_args(
headers=response.headers, url=response.url, body=decoded_body
)
kwargs = dict(cls=respcls, body=decoded_body)
kwargs: Dict[str, Any] = dict(body=decoded_body)
if issubclass(respcls, TextResponse):
# force recalculating the encoding until we make sure the
# responsetypes guessing is reliable
kwargs["encoding"] = None
response = response.replace(**kwargs)
response = response.replace(cls=respcls, **kwargs)
if not content_encoding:
del response.headers["Content-Encoding"]

Expand Down
1 change: 1 addition & 0 deletions scrapy/downloadermiddlewares/redirect.py
Expand Up @@ -27,6 +27,7 @@ def _build_redirect_request(
redirect_request = source_request.replace(
url=url,
**kwargs,
cls=None,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this line the type of redirect_request is inferred to be Any.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a test for this case? I guess it's **kwargs handling?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's because it cannot know that kwargs passed to _build_redirect_request doesn't contain cls.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, what test do you mean?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If kwargs are passed, what's the reveal_type of the result

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added tests that check that the result is Any unless cls was also passed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it's a useful thing to check :)

cookies=None,
)
if "Cookie" in redirect_request.headers:
Expand Down
37 changes: 29 additions & 8 deletions scrapy/http/request/__init__.py
Expand Up @@ -4,8 +4,11 @@

See documentation in docs/topics/request-response.rst
"""
from __future__ import annotations

import inspect
from typing import (
TYPE_CHECKING,
Any,
AnyStr,
Callable,
Expand All @@ -19,7 +22,7 @@
Type,
TypeVar,
Union,
cast,
overload,
)

from w3lib.url import safe_url_string
Expand All @@ -31,6 +34,11 @@
from scrapy.utils.trackref import object_ref
from scrapy.utils.url import escape_ajax

if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self


RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")


Expand Down Expand Up @@ -173,23 +181,36 @@ def encoding(self) -> str:
def __repr__(self) -> str:
return f"<{self.method} {self.url}>"

def copy(self) -> "Request":
def copy(self) -> Self:
return self.replace()

def replace(self, *args: Any, **kwargs: Any) -> "Request":
@overload
def replace(
self, *args: Any, cls: Type[RequestTypeVar], **kwargs: Any
) -> RequestTypeVar:
...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self:
...

def replace(
self, *args: Any, cls: Optional[Type[Request]] = None, **kwargs: Any
) -> Request:
"""Create a new Request with the same attributes except for those given new values"""
for x in self.attributes:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop("cls", self.__class__)
return cast(Request, cls(*args, **kwargs))
if cls is None:
cls = self.__class__
return cls(*args, **kwargs)

@classmethod
def from_curl(
cls: Type[RequestTypeVar],
cls,
curl_command: str,
ignore_unknown_options: bool = True,
**kwargs: Any,
) -> RequestTypeVar:
) -> Self:
"""Create a Request object from a string containing a `cURL
<https://curl.haxx.se/>`_ command. It populates the HTTP method, the
URL, the headers, the cookies and the body. It accepts the same
Expand Down Expand Up @@ -221,7 +242,7 @@ def from_curl(
request_kwargs.update(kwargs)
return cls(**request_kwargs)

def to_dict(self, *, spider: Optional["scrapy.Spider"] = None) -> Dict[str, Any]:
def to_dict(self, *, spider: Optional[scrapy.Spider] = None) -> Dict[str, Any]:
"""Return a dictionary containing the Request's data.

Use :func:`~scrapy.utils.request.request_from_dict` to convert back into a :class:`~scrapy.Request` object.
Expand Down
26 changes: 22 additions & 4 deletions scrapy/http/request/json_request.py
Expand Up @@ -5,12 +5,18 @@
See documentation in docs/topics/request-response.rst
"""

from __future__ import annotations

import copy
import json
import warnings
from typing import Any, Optional, Tuple
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, overload

from scrapy.http.request import Request, RequestTypeVar

from scrapy.http.request import Request
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self


class JsonRequest(Request):
Expand Down Expand Up @@ -44,7 +50,19 @@ def __init__(
def dumps_kwargs(self) -> dict:
return self._dumps_kwargs

def replace(self, *args: Any, **kwargs: Any) -> Request:
@overload
def replace(
self, *args: Any, cls: Type[RequestTypeVar], **kwargs: Any
) -> RequestTypeVar:
wRAR marked this conversation as resolved.
Show resolved Hide resolved
...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self:
...

def replace(
self, *args: Any, cls: Optional[Type[Request]] = None, **kwargs: Any
) -> Request:
body_passed = kwargs.get("body", None) is not None
data = kwargs.pop("data", None)
data_passed = data is not None
Expand All @@ -54,7 +72,7 @@ def replace(self, *args: Any, **kwargs: Any) -> Request:
elif not body_passed and data_passed:
kwargs["body"] = self._dumps(data)

return super().replace(*args, **kwargs)
return super().replace(*args, cls=cls, **kwargs)

def _dumps(self, data: dict) -> str:
"""Convert to JSON"""
Expand Down
31 changes: 26 additions & 5 deletions scrapy/http/response/__init__.py
Expand Up @@ -19,8 +19,10 @@
Mapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
overload,
)
from urllib.parse import urljoin

Expand All @@ -33,9 +35,15 @@
from scrapy.utils.trackref import object_ref

if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self

from scrapy.selector import SelectorList


ResponseTypeVar = TypeVar("ResponseTypeVar", bound="Response")


class Response(object_ref):
"""An object that represents an HTTP response, which is usually
downloaded (by the Downloader) and fed to the Spiders for processing.
Expand Down Expand Up @@ -132,16 +140,29 @@ def _set_body(self, body: Optional[bytes]) -> None:
def __repr__(self) -> str:
return f"<{self.status} {self.url}>"

def copy(self) -> Response:
def copy(self) -> Self:
"""Return a copy of this Response"""
return self.replace()

def replace(self, *args: Any, **kwargs: Any) -> Response:
@overload
def replace(
self, *args: Any, cls: Type[ResponseTypeVar], **kwargs: Any
) -> ResponseTypeVar:
...

@overload
def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self:
...

def replace(
self, *args: Any, cls: Optional[Type[Response]] = None, **kwargs: Any
) -> Response:
"""Create a new Response with the same attributes except for those given new values"""
for x in self.attributes:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop("cls", self.__class__)
return cast(Response, cls(*args, **kwargs))
if cls is None:
cls = self.__class__
return cls(*args, **kwargs)

def urljoin(self, url: str) -> str:
"""Join this Response's url with a possible relative url to form an
Expand Down