Skip to content

Commit

Permalink
pd.eval: Series names are now preserved even for "numexpr" engi…
Browse files Browse the repository at this point in the history
…ne. (pandas-dev#58437)

* Eval: Series names are preserved for numexpr

Series names are now preserved even when using
numexpr engine. Making the behavior consistent
with python engine.

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/core/computation/align.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/tests/computation/test_eval.py

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
2 people authored and pmhatre1 committed May 7, 2024
1 parent 1feafb5 commit 593a815
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 33 deletions.
19 changes: 13 additions & 6 deletions pandas/core/computation/align.py
Expand Up @@ -160,19 +160,24 @@ def align_terms(terms):
# can't iterate so it must just be a constant or single variable
if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
typ = type(terms.value)
return typ, _zip_axes_from_type(typ, terms.value.axes)
return np.result_type(terms.type), None
name = terms.value.name if isinstance(terms.value, ABCSeries) else None
return typ, _zip_axes_from_type(typ, terms.value.axes), name
return np.result_type(terms.type), None, None

# if all resolved variables are numeric scalars
if all(term.is_scalar for term in terms):
return result_type_many(*(term.value for term in terms)).type, None
return result_type_many(*(term.value for term in terms)).type, None, None

# if all input series have a common name, propagate it to the returned series
names = {term.value.name for term in terms if isinstance(term.value, ABCSeries)}
name = names.pop() if len(names) == 1 else None

# perform the main alignment
typ, axes = _align_core(terms)
return typ, axes
return typ, axes, name


def reconstruct_object(typ, obj, axes, dtype):
def reconstruct_object(typ, obj, axes, dtype, name):
"""
Reconstruct an object given its type, raw value, and possibly empty
(None) axes.
Expand Down Expand Up @@ -200,7 +205,9 @@ def reconstruct_object(typ, obj, axes, dtype):
res_t = np.result_type(obj.dtype, dtype)

if not isinstance(typ, partial) and issubclass(typ, PandasObject):
return typ(obj, dtype=res_t, **axes)
if name is None:
return typ(obj, dtype=res_t, **axes)
return typ(obj, dtype=res_t, name=name, **axes)

# special case for pathological things like ~True/~False
if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/computation/engines.py
Expand Up @@ -54,6 +54,7 @@ def __init__(self, expr) -> None:
self.expr = expr
self.aligned_axes = None
self.result_type = None
self.result_name = None

def convert(self) -> str:
"""
Expand All @@ -76,12 +77,18 @@ def evaluate(self) -> object:
The result of the passed expression.
"""
if not self._is_aligned:
self.result_type, self.aligned_axes = align_terms(self.expr.terms)
self.result_type, self.aligned_axes, self.result_name = align_terms(
self.expr.terms
)

# make sure no names in resolvers and locals/globals clash
res = self._evaluate()
return reconstruct_object(
self.result_type, res, self.aligned_axes, self.expr.terms.return_type
self.result_type,
res,
self.aligned_axes,
self.expr.terms.return_type,
self.result_name,
)

@property
Expand Down
43 changes: 24 additions & 19 deletions pandas/tests/computation/test_eval.py
Expand Up @@ -737,6 +737,17 @@ def test_and_logic_string_match(self):
assert pd.eval(f"{event.str.match('hello').a}")
assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}")

def test_eval_keep_name(self, engine, parser):
df = Series([2, 15, 28], name="a").to_frame()
res = df.eval("a + a", engine=engine, parser=parser)
expected = Series([4, 30, 56], name="a")
tm.assert_series_equal(expected, res)

def test_eval_unmatching_names(self, engine, parser):
variable_name = Series([42], name="series_name")
res = pd.eval("variable_name + 0", engine=engine, parser=parser)
tm.assert_series_equal(variable_name, res)


# -------------------------------------
# gh-12388: Typecasting rules consistency with python
Expand Down Expand Up @@ -1269,14 +1280,12 @@ def test_assignment_explicit(self):
expected["c"] = expected["a"] + expected["b"]
tm.assert_frame_equal(df, expected)

def test_column_in(self):
def test_column_in(self, engine):
# GH 11235
df = DataFrame({"a": [11], "b": [-32]})
result = df.eval("a in [11, -32]")
expected = Series([True])
# TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI
# but cannot reproduce locally
tm.assert_series_equal(result, expected, check_names=False)
result = df.eval("a in [11, -32]", engine=engine)
expected = Series([True], name="a")
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.")
def test_assignment_not_inplace(self):
Expand Down Expand Up @@ -1505,7 +1514,7 @@ def test_date_boolean(self, engine, parser):
parser=parser,
)
expec = df.dates1 < "20130101"
tm.assert_series_equal(res, expec, check_names=False)
tm.assert_series_equal(res, expec)

def test_simple_in_ops(self, engine, parser):
if parser != "python":
Expand Down Expand Up @@ -1620,7 +1629,7 @@ def test_unary_functions(self, fn, engine, parser):
got = self.eval(expr, engine=engine, parser=parser)
with np.errstate(all="ignore"):
expect = getattr(np, fn)(a)
tm.assert_series_equal(got, expect, check_names=False)
tm.assert_series_equal(got, expect)

@pytest.mark.parametrize("fn", _binary_math_ops)
def test_binary_functions(self, fn, engine, parser):
Expand All @@ -1637,7 +1646,7 @@ def test_binary_functions(self, fn, engine, parser):
got = self.eval(expr, engine=engine, parser=parser)
with np.errstate(all="ignore"):
expect = getattr(np, fn)(a, b)
tm.assert_almost_equal(got, expect, check_names=False)
tm.assert_almost_equal(got, expect)

def test_df_use_case(self, engine, parser):
df = DataFrame(
Expand All @@ -1653,8 +1662,8 @@ def test_df_use_case(self, engine, parser):
inplace=True,
)
got = df.e
expect = np.arctan2(np.sin(df.a), df.b)
tm.assert_series_equal(got, expect, check_names=False)
expect = np.arctan2(np.sin(df.a), df.b).rename("e")
tm.assert_series_equal(got, expect)

def test_df_arithmetic_subexpression(self, engine, parser):
df = DataFrame(
Expand All @@ -1665,8 +1674,8 @@ def test_df_arithmetic_subexpression(self, engine, parser):
)
df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True)
got = df.e
expect = np.sin(df.a + df.b)
tm.assert_series_equal(got, expect, check_names=False)
expect = np.sin(df.a + df.b).rename("e")
tm.assert_series_equal(got, expect)

@pytest.mark.parametrize(
"dtype, expect_dtype",
Expand All @@ -1690,10 +1699,10 @@ def test_result_types(self, dtype, expect_dtype, engine, parser):
assert df.a.dtype == dtype
df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True)
got = df.b
expect = np.sin(df.a)
expect = np.sin(df.a).rename("b")
assert expect.dtype == got.dtype
assert expect_dtype == got.dtype
tm.assert_series_equal(got, expect, check_names=False)
tm.assert_series_equal(got, expect)

def test_undefined_func(self, engine, parser):
df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
Expand Down Expand Up @@ -1898,10 +1907,6 @@ def test_equals_various(other):
df = DataFrame({"A": ["a", "b", "c"]}, dtype=object)
result = df.eval(f"A == {other}")
expected = Series([False, False, False], name="A")
if USE_NUMEXPR:
# https://github.com/pandas-dev/pandas/issues/10239
# lose name with numexpr engine. Remove when that's fixed.
expected.name = None
tm.assert_series_equal(result, expected)


Expand Down
16 changes: 10 additions & 6 deletions pandas/tests/frame/test_query_eval.py
Expand Up @@ -58,26 +58,26 @@ def test_query_default(self, df, expected1, expected2):
result = df.query("A>0")
tm.assert_frame_equal(result, expected1)
result = df.eval("A+1")
tm.assert_series_equal(result, expected2, check_names=False)
tm.assert_series_equal(result, expected2)

def test_query_None(self, df, expected1, expected2):
result = df.query("A>0", engine=None)
tm.assert_frame_equal(result, expected1)
result = df.eval("A+1", engine=None)
tm.assert_series_equal(result, expected2, check_names=False)
tm.assert_series_equal(result, expected2)

def test_query_python(self, df, expected1, expected2):
result = df.query("A>0", engine="python")
tm.assert_frame_equal(result, expected1)
result = df.eval("A+1", engine="python")
tm.assert_series_equal(result, expected2, check_names=False)
tm.assert_series_equal(result, expected2)

def test_query_numexpr(self, df, expected1, expected2):
if NUMEXPR_INSTALLED:
result = df.query("A>0", engine="numexpr")
tm.assert_frame_equal(result, expected1)
result = df.eval("A+1", engine="numexpr")
tm.assert_series_equal(result, expected2, check_names=False)
tm.assert_series_equal(result, expected2)
else:
msg = (
r"'numexpr' is not installed or an unsupported version. "
Expand Down Expand Up @@ -194,8 +194,12 @@ def test_using_numpy(self, engine, parser):
df = Series([0.2, 1.5, 2.8], name="a").to_frame()
res = df.eval("@np.floor(a)", engine=engine, parser=parser)
expected = np.floor(df["a"])
if engine == "numexpr":
expected.name = None # See GH 58069
tm.assert_series_equal(expected, res)

def test_eval_simple(self, engine, parser):
df = Series([0.2, 1.5, 2.8], name="a").to_frame()
res = df.eval("a", engine=engine, parser=parser)
expected = df["a"]
tm.assert_series_equal(expected, res)


Expand Down

0 comments on commit 593a815

Please sign in to comment.