Skip to content

Commit

Permalink
handle numpy / pandas for memoize equal check
Browse files Browse the repository at this point in the history
  • Loading branch information
smacke committed Nov 23, 2023
1 parent 41e5812 commit 03982cb
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 16 deletions.
2 changes: 1 addition & 1 deletion core/ipyflow/data_model/cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def _maybe_memoize_params(self) -> None:
if sym not in memoized_params:
memoized_params[sym] = (
sym.timestamp.cell_num,
sym.make_memoize_comparable(),
sym.make_memoize_comparable()[0],
)
outputs = {}
for sym in flow().updated_symbols:
Expand Down
52 changes: 41 additions & 11 deletions core/ipyflow/data_model/symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
Generator,
List,
Expand Down Expand Up @@ -1205,30 +1206,59 @@ def refresh(

_MAX_MEMOIZE_COMPARABLE_SIZE = 10**6

@staticmethod
def _equal(obj1: Any, obj2: Any) -> bool:
return obj1 == obj2

@staticmethod
def _array_equal(obj1: Any, obj2: Any) -> bool:
import numpy as np

try:
return np.alltrue(obj1 == obj2)
except: # noqa
return False

@classmethod
def make_memoize_comparable_for_obj(cls, obj: Any) -> Tuple[Any, int]:
def make_memoize_comparable_for_obj(
cls, obj: Any
) -> Tuple[Any, Optional[Callable[[Any, Any], bool]], int]:
if isinstance(obj, (int, str)):
return obj, 1
return obj, cls._equal, 1
elif isinstance(obj, (dict, frozenset, list, set, tuple)):
size = 0
comparable = []
for inner in obj:
inner_comp, inner_size = cls.make_memoize_comparable_for_obj(inner)
if inner_comp is cls.NULL:
return cls.NULL, -1
inner_comp, inner_eq, inner_size = cls.make_memoize_comparable_for_obj(
inner
)
if inner_comp is cls.NULL or inner_eq is not cls._equal:
return cls.NULL, None, -1
size += inner_size + 1
if size > cls._MAX_MEMOIZE_COMPARABLE_SIZE:
return cls.NULL, -1
return cls.NULL, None, -1
comparable.append(inner_comp)
return type(obj)(comparable), size
return type(obj)(comparable), cls._equal, size
else:
return cls.NULL, -1

def make_memoize_comparable(self) -> Any:
# hacks to check if they are arrays or dataframes without explicitly importing these
module = getattr(type(obj), "__module__", "")
if not module.startswith(("modin", "numpy", "pandas")):
return cls.NULL, None, -1
name = getattr(type(obj), "__name__", "")
if name.endswith(("DataFrame", "Series", "ndarray")):
return obj, cls._array_equal, obj.size
return cls.NULL, None, -1

def make_memoize_comparable(
self,
) -> Tuple[Any, Optional[Callable[[Any, Any], bool]]]:
if isinstance(self.stmt_node, ast.FunctionDef):
return astunparse.unparse(self.stmt_node)
obj, eq, size = self.make_memoize_comparable_for_obj(self.obj)
if size > self._MAX_MEMOIZE_COMPARABLE_SIZE:
return self.NULL, None
else:
return self.make_memoize_comparable_for_obj(self.obj)[0]
return obj, eq


if len(_SymbolContainer) == 0:
Expand Down
8 changes: 4 additions & 4 deletions core/ipyflow/shell/interactiveshell.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,10 +492,10 @@ def before_run_cell(
for param, (ts, comparable_obj) in params.items():
if param.timestamp.cell_num == ts:
continue
elif (
comparable_obj is Symbol.NULL
or comparable_obj != param.make_memoize_comparable()
):
elif comparable_obj is Symbol.NULL:
break
comparable_param, eq = param.make_memoize_comparable()
if not eq(comparable_param, comparable_obj):
break
else:
identical_result_ctr = ctr
Expand Down

0 comments on commit 03982cb

Please sign in to comment.