Skip to content

Commit

Permalink
Merge branch 'dev/richard/wlw' of github.com:whylabs/whylogs into dev…
Browse files Browse the repository at this point in the history
…/richard/wlw
  • Loading branch information
richard-rogers committed Apr 12, 2024
2 parents 6f6c655 + 8bd8b64 commit 375a49f
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 11 deletions.
2 changes: 1 addition & 1 deletion python/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.3.28
current_version = 1.3.29
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
serialize =
Expand Down
2 changes: 1 addition & 1 deletion python/Makefile
Expand Up @@ -5,7 +5,7 @@ src.proto.dir := ../proto/src
src.proto := $(shell find $(src.proto.dir) -type f -name "*.proto")
src.proto.v0.dir := ../proto/v0
src.proto.v0 := $(shell find $(src.proto.v0.dir) -type f -name "*.proto")
version := 1.3.28
version := 1.3.29

dist.dir := dist
egg.dir := .eggs
Expand Down
2 changes: 1 addition & 1 deletion python/docs/conf.py
Expand Up @@ -8,7 +8,7 @@
print("Pandoc is required to build our documentation.")
sys.exit(1)

version = "1.3.28"
version = "1.3.29"

project = "whylogs"
author = "whylogs developers"
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "whylogs"
version = "1.3.28"
version = "1.3.29"
description = "Profile and monitor your ML data pipeline end-to-end"
authors = ["WhyLabs.ai <support@whylabs.ai>"]
license = "Apache-2.0"
Expand Down
19 changes: 19 additions & 0 deletions python/tests/core/test_declarative_schema.py
Expand Up @@ -208,6 +208,25 @@ def test_declarative_schema_with_additional_resolvers(pandas_dataframe):
}.issubset(colset)


def test_declarative_schema_add_multiple_resolvers(pandas_dataframe):
schema = DeclarativeSchema([])
schema.add_resolver_spec("animal", None, [MetricSpec(StandardMetric.cardinality.value)])
schema.add_resolver_spec({"legs", "weight"}, None, [MetricSpec(StandardMetric.distribution.value)])
results = why.log(pandas_dataframe, schema=schema).view()
metrics = set(results.get_column("animal").get_metric_names())
assert metrics == {"cardinality"}
metrics = set(results.get_column("legs").get_metric_names())
assert metrics == {"distribution"}
metrics = set(results.get_column("weight").get_metric_names())
assert metrics == {"distribution"}


def test_invalid_column_name():
schema = DeclarativeSchema([])
with pytest.raises(ValueError):
schema.add_resolver_spec(42)


def test_additional_metrics_nonexistent(pandas_dataframe):
count_spec = ResolverSpec(
column_name="nonexistent_columns",
Expand Down
7 changes: 7 additions & 0 deletions python/tests/migration/test_uncompound.py
Expand Up @@ -141,3 +141,10 @@ def test_uncompounded_condition_count() -> None:
assert metric.null.value == 0
assert metric.nan.value == 0
assert metric.inf.value == 0
metric = uncompounded._columns[column_name]._metrics["types"]
assert metric.integral.value == 2
assert metric.fractional.value == 0
assert metric.boolean.value == 0
assert metric.string.value == 0
assert metric.object.value == 0
assert metric.tensor.value == 0
2 changes: 1 addition & 1 deletion python/tests/smoketest.py
Expand Up @@ -12,7 +12,7 @@
not a development environment.
"""

current_version = "1.3.28"
current_version = "1.3.29"


def test_package_version() -> None:
Expand Down
13 changes: 11 additions & 2 deletions python/whylogs/core/schema.py
@@ -1,7 +1,7 @@
import logging
from copy import deepcopy
from dataclasses import dataclass, field
from typing import Any, Dict, List, Mapping, Optional, Tuple, TypeVar
from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, TypeVar, Union

import whylogs.core.resolvers as res
from whylogs.core.datatypes import StandardTypeMapper, TypeMapper
Expand Down Expand Up @@ -232,10 +232,19 @@ def add_resolver(self, resolver_spec: ResolverSpec):

def add_resolver_spec(
self,
column_name: Optional[str] = None,
column_name: Optional[Union[str, Set[str]]] = None,
column_type: Optional[Any] = None,
metrics: Optional[List[MetricSpec]] = None,
):
if column_name is not None and not isinstance(column_name, (str, set)):
raise ValueError("column_name must be a stirng or set of strings")

if isinstance(column_name, set):
for name in column_name:
spec = ResolverSpec(column_name=name, column_type=column_type, metrics=metrics or [])
self.add_resolver(spec)
return

spec = ResolverSpec(column_name=column_name, column_type=column_type, metrics=metrics or [])
self.add_resolver(spec)

Expand Down
31 changes: 27 additions & 4 deletions python/whylogs/migration/uncompound.py
Expand Up @@ -4,7 +4,7 @@

from whylogs.core import ColumnProfileView, DatasetProfileView
from whylogs.core.metrics import Metric
from whylogs.core.metrics.column_metrics import ColumnCountsMetric
from whylogs.core.metrics.column_metrics import ColumnCountsMetric, TypeCountersMetric
from whylogs.core.metrics.compound_metric import CompoundMetric
from whylogs.core.metrics.condition_count_metric import ConditionCountMetric
from whylogs.core.metrics.metric_components import IntegralComponent
Expand Down Expand Up @@ -118,6 +118,14 @@ def _uncompound_condition_count(
return dict()

result: Dict[str, ColumnProfileView] = dict()
type_metric = TypeCountersMetric(
integral=metric.total, # total condition evaluations
fractional=IntegralComponent(0),
boolean=IntegralComponent(0),
string=IntegralComponent(0),
object=IntegralComponent(0),
tensor=IntegralComponent(0),
)
for condition_name, count_component in metric.matches.items():
new_col_name = f"{_condition_count_magic_string()}{col_name}.{condition_name}.total"
new_metric = ColumnCountsMetric(
Expand All @@ -126,23 +134,38 @@ def _uncompound_condition_count(
nan=IntegralComponent(0), # unused
inf=IntegralComponent(0), # unused
)
result[new_col_name] = ColumnProfileView({ColumnCountsMetric.get_namespace(): new_metric})
result[new_col_name] = ColumnProfileView(
{
ColumnCountsMetric.get_namespace(): new_metric,
TypeCountersMetric.get_namespace(): type_metric,
}
)
new_col_name = f"{_condition_count_magic_string()}{col_name}.{condition_name}.matches"
new_metric = ColumnCountsMetric(
n=count_component, # count of evaluations that matched condition
null=IntegralComponent(0), # unused
nan=IntegralComponent(0), # unused
inf=IntegralComponent(0), # unused
)
result[new_col_name] = ColumnProfileView({ColumnCountsMetric.get_namespace(): new_metric})
result[new_col_name] = ColumnProfileView(
{
ColumnCountsMetric.get_namespace(): new_metric,
TypeCountersMetric.get_namespace(): type_metric,
}
)
new_col_name = f"{_condition_count_magic_string()}{col_name}.{condition_name}.non_matches"
new_metric = ColumnCountsMetric(
n=IntegralComponent(metric.total.value - count_component.value), # evaluations that didn't match
null=IntegralComponent(0), # unused
nan=IntegralComponent(0), # unused
inf=IntegralComponent(0), # unused
)
result[new_col_name] = ColumnProfileView({ColumnCountsMetric.get_namespace(): new_metric})
result[new_col_name] = ColumnProfileView(
{
ColumnCountsMetric.get_namespace(): new_metric,
TypeCountersMetric.get_namespace(): type_metric,
}
)

return result

Expand Down

0 comments on commit 375a49f

Please sign in to comment.