Skip to content

Commit

Permalink
Bugfix/1631: Series[Annotated[...]] DataFrameModel types should corre…
Browse files Browse the repository at this point in the history
…ctly create a DataFrameSchema (#1633)

* bugfix: Series[Annotated[...]] types should correctly create dtype

Signed-off-by: cosmicBboy <niels.bantilan@gmail.com>

* fix dangling polars broken tests

Signed-off-by: cosmicBboy <niels.bantilan@gmail.com>

---------

Signed-off-by: cosmicBboy <niels.bantilan@gmail.com>
  • Loading branch information
cosmicBboy committed May 9, 2024
1 parent 63140c9 commit be74271
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 70 deletions.
22 changes: 22 additions & 0 deletions pandera/api/polars/components.py
@@ -1,5 +1,7 @@
"""Schema components for polars."""

from __future__ import annotations

import logging
from typing import Any, Optional

Expand Down Expand Up @@ -153,6 +155,26 @@ def validate(
)
return output

@property
def properties(self) -> dict[str, Any]:
"""Get column properties."""
return {
"dtype": self.dtype,
"parsers": self.parsers,
"checks": self.checks,
"nullable": self.nullable,
"unique": self.unique,
"report_duplicates": self.report_duplicates,
"coerce": self.coerce,
"required": self.required,
"name": self.name,
"regex": self.regex,
"title": self.title,
"description": self.description,
"default": self.default,
"metadata": self.metadata,
}

@property
def dtype(self):
return self._dtype
Expand Down
5 changes: 2 additions & 3 deletions pandera/typing/common.py
Expand Up @@ -269,8 +269,8 @@ def _parse_annotation(self, raw_annotation: Type) -> None:
metadata = getattr(raw_annotation, "__metadata__", None)
if metadata:
self.is_annotated_type = True
else:
metadata = getattr(self.arg, "__metadata__", None)
elif metadata := getattr(self.arg, "__metadata__", None):
self.arg = typing_inspect.get_args(self.arg)[0]

self.metadata = metadata
self.literal = typing_inspect.is_literal_type(self.arg)
Expand All @@ -286,5 +286,4 @@ def _parse_annotation(self, raw_annotation: Type) -> None:
else:
# otherwise assume that the annotation is the data type itself.
self.arg = raw_annotation

self.default_dtype = getattr(raw_annotation, "default_dtype", None)
141 changes: 74 additions & 67 deletions tests/core/test_typing.py
Expand Up @@ -322,78 +322,85 @@ class SchemaAnnotatedIntervalDtype(pa.DataFrameModel):
class SchemaAnnotatedIntervalDtype(pa.DataFrameModel): # type: ignore
col: Series[Annotated[pd.IntervalDtype, "int32"]]

class SchemaAnnotatedPeriodDtype(pa.DataFrameModel):
col: Series[Annotated[pd.PeriodDtype, "D"]]

class SchemaAnnotatedSparseDtype(pa.DataFrameModel):
col: Series[Annotated[pd.SparseDtype, np.int32, 0]]
class SchemaAnnotatedPeriodDtype(pa.DataFrameModel):
col: Series[Annotated[pd.PeriodDtype, "D"]]

@pytest.mark.parametrize(
"model, dtype, dtype_kwargs",
[
(
SchemaAnnotatedCategoricalDtype,
pd.CategoricalDtype,
{"categories": ["b", "a"], "ordered": True},
),
(
SchemaAnnotatedDatetimeTZDtype,
pd.DatetimeTZDtype,
{"unit": "ns", "tz": "EST"},
),
(
SchemaAnnotatedIntervalDtype,
pd.IntervalDtype,
(
{"subtype": "int32", "closed": "both"}
if pa.PANDAS_1_3_0_PLUS
else {"subtype": "int32"}
),
),
(SchemaAnnotatedPeriodDtype, pd.PeriodDtype, {"freq": "D"}),

class SchemaAnnotatedSparseDtype(pa.DataFrameModel):
col: Series[Annotated[pd.SparseDtype, np.int32, 0]]


@pytest.mark.parametrize(
"model, dtype, dtype_kwargs",
[
(
SchemaAnnotatedCategoricalDtype,
pd.CategoricalDtype,
{"categories": ["b", "a"], "ordered": True},
),
(
SchemaAnnotatedDatetimeTZDtype,
pd.DatetimeTZDtype,
{"unit": "ns", "tz": "EST"},
),
(
SchemaAnnotatedIntervalDtype,
pd.IntervalDtype,
(
SchemaAnnotatedSparseDtype,
pd.SparseDtype,
{"dtype": np.int32, "fill_value": 0},
{"subtype": "int32", "closed": "both"}
if pa.PANDAS_1_3_0_PLUS
else {"subtype": "int32"}
),
],
),
(SchemaAnnotatedPeriodDtype, pd.PeriodDtype, {"freq": "D"}),
(
SchemaAnnotatedSparseDtype,
pd.SparseDtype,
{"dtype": np.int32, "fill_value": 0},
),
],
)
def test_annotated_dtype(
model: Type[pa.DataFrameModel],
dtype: Type,
dtype_kwargs: Dict[str, Any],
):
"""Test type annotations for parametrized pandas extension dtypes."""
_test_annotated_dtype(model, dtype, dtype_kwargs)


class SchemaInvalidAnnotatedDtype(pa.DataFrameModel):
col: Series[Annotated[pd.DatetimeTZDtype, "utc"]]


def test_invalid_annotated_dtype():
"""
Test incorrect number of parameters for parametrized pandas extension
dtypes.
"""
err_msg = re.escape(
"Annotation 'DatetimeTZDtype' requires all "
r"positional arguments ['unit', 'tz']."
)
def test_annotated_dtype(
model: Type[pa.DataFrameModel],
dtype: Type,
dtype_kwargs: Dict[str, Any],
):
"""Test type annotations for parametrized pandas extension dtypes."""
_test_annotated_dtype(model, dtype, dtype_kwargs)

class SchemaInvalidAnnotatedDtype(pa.DataFrameModel):
col: Series[Annotated[pd.DatetimeTZDtype, "utc"]]

def test_invalid_annotated_dtype():
"""
Test incorrect number of parameters for parametrized pandas extension
dtypes.
"""
err_msg = re.escape(
"Annotation 'DatetimeTZDtype' requires all "
r"positional arguments ['unit', 'tz']."
)
with pytest.raises(TypeError, match=err_msg):
SchemaInvalidAnnotatedDtype.to_schema()

class SchemaRedundantField(pa.DataFrameModel):
col: Series[Annotated[pd.DatetimeTZDtype, "utc"]] = pa.Field(
dtype_kwargs={"tz": "utc"}
)

def test_pandas_extension_dtype_redundant_field():
"""
Test incorrect number of parameters for parametrized pandas extension
dtypes.
"""
err_msg = r"Cannot specify redundant 'dtype_kwargs' for"
with pytest.raises(TypeError, match=err_msg):
SchemaRedundantField.to_schema()
with pytest.raises(TypeError, match=err_msg):
SchemaInvalidAnnotatedDtype.to_schema()


class SchemaRedundantField(pa.DataFrameModel):
col: Series[Annotated[pd.DatetimeTZDtype, "utc"]] = pa.Field(
dtype_kwargs={"tz": "utc"}
)


def test_pandas_extension_dtype_redundant_field():
"""
Test incorrect number of parameters for parametrized pandas extension
dtypes.
"""
err_msg = r"Cannot specify redundant 'dtype_kwargs' for"
with pytest.raises(TypeError, match=err_msg):
SchemaRedundantField.to_schema()


class SchemaInt8Dtype(pa.DataFrameModel):
Expand Down

0 comments on commit be74271

Please sign in to comment.