Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 385 CLI: show beliefs does not handle NaN well #516

Merged
merged 6 commits into from Oct 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion documentation/changelog.rst
Expand Up @@ -15,7 +15,7 @@ New features

Bugfixes
-----------

* The CLI command ``flexmeasures show beliefs`` now supports plotting time series data that includes NaN values, and provides better support for plotting multiple sensors that do not share the same unit [see `PR #516 <http://www.github.com/FlexMeasures/flexmeasures/pull/516>`_]

Infrastructure / Support
----------------------
Expand Down
37 changes: 23 additions & 14 deletions flexmeasures/cli/data_show.py
Expand Up @@ -8,6 +8,7 @@
from flask.cli import with_appcontext
from tabulate import tabulate
from humanize import naturaldelta, naturaltime
import pandas as pd
import uniplot

from flexmeasures.data.models.user import Account, AccountRole, User, Role
Expand All @@ -19,6 +20,8 @@
from flexmeasures.data.schemas.account import AccountIdField
from flexmeasures.data.schemas.sources import DataSourceIdField
from flexmeasures.data.schemas.times import AwareDateTimeField, DurationField
from flexmeasures.data.services.time_series import simplify_index
from flexmeasures.utils.time_utils import determine_minimum_resampling_resolution


@click.group("show")
Expand Down Expand Up @@ -266,7 +269,9 @@ def plot_beliefs(
Show a simple plot of belief data directly in the terminal.
"""
sensors = list(sensors)
min_resolution = min([s.event_resolution for s in sensors])
minimum_resampling_resolution = determine_minimum_resampling_resolution(
[sensor.event_resolution for sensor in sensors]
)

# query data
beliefs_by_sensor = TimedBelief.search(
Expand All @@ -276,7 +281,7 @@ def plot_beliefs(
beliefs_before=belief_time_before,
source=source,
one_deterministic_belief_per_event=True,
resolution=min_resolution,
resolution=minimum_resampling_resolution,
sum_multiple=False,
)
# only keep non-empty
Expand All @@ -288,33 +293,37 @@ def plot_beliefs(
if len(beliefs_by_sensor.keys()) == 0:
click.echo("No data found!")
raise click.Abort()
first_df = beliefs_by_sensor[sensors[0].name]
if all(sensor.unit == sensors[0].unit for sensor in sensors):
shared_unit = sensors[0].unit
else:
shared_unit = ""
click.echo(
"The y-axis shows no unit, because the selected sensors do not share the same unit."
)
df = pd.concat([simplify_index(df) for df in beliefs_by_sensor.values()], axis=1)
df.columns = beliefs_by_sensor.keys()

# Build title
if len(sensors) == 1:
title = f"Beliefs for Sensor '{sensors[0].name}' (Id {sensors[0].id}).\n"
else:
title = f"Beliefs for Sensor(s) [{','.join([s.name for s in sensors])}], (Id(s): [{','.join([str(s.id) for s in sensors])}]).\n"
title = f"Beliefs for Sensor(s) [{', '.join([s.name for s in sensors])}], (Id(s): [{', '.join([str(s.id) for s in sensors])}]).\n"
title += f"Data spans {naturaldelta(duration)} and starts at {start}."
if belief_time_before:
title += f"\nOnly beliefs made before: {belief_time_before}."
if source:
title += f"\nSource: {source.description}"
title += f"\nThe time resolution (x-axis) is {naturaldelta(min_resolution)}."
title += f"\nThe time resolution (x-axis) is {naturaldelta(minimum_resampling_resolution)}."

uniplot.plot(
[
beliefs.event_value
for beliefs in [beliefs_by_sensor[sn] for sn in [s.name for s in sensors]]
],
[df[col] for col in df.columns],
title=title,
color=True,
lines=True,
y_unit=first_df.sensor.unit
if len(beliefs_by_sensor) == 1
or all(sensor.unit == first_df.sensor.unit for sensor in sensors)
else "",
legend_labels=[s.name for s in sensors],
y_unit=shared_unit,
legend_labels=[s.name for s in sensors]
if shared_unit
else [s.name + f" (in {s.unit})" for s in sensors],
)


Expand Down
18 changes: 8 additions & 10 deletions flexmeasures/data/models/generic_assets.py
Expand Up @@ -20,7 +20,10 @@
from flexmeasures.data.queries.annotations import query_asset_annotations
from flexmeasures.auth.policy import AuthModelMixin, EVERY_LOGGED_IN_USER
from flexmeasures.utils import geo_utils
from flexmeasures.utils.time_utils import server_now
from flexmeasures.utils.time_utils import (
determine_minimum_resampling_resolution,
server_now,
)


class GenericAssetType(db.Model):
Expand Down Expand Up @@ -382,18 +385,13 @@ def search_beliefs(
from flexmeasures.data.services.time_series import simplify_index

if sensors:
condition = list(
bdf.event_resolution
for bdf in bdf_dict.values()
if bdf.event_resolution > timedelta(0)
)
minimum_non_zero_resolution = (
min(condition) if any(condition) else timedelta(0)
minimum_resampling_resolution = determine_minimum_resampling_resolution(
[bdf.event_resolution for bdf in bdf_dict.values()]
)
df_dict = {}
for sensor, bdf in bdf_dict.items():
if bdf.event_resolution > timedelta(0):
bdf = bdf.resample_events(minimum_non_zero_resolution)
bdf = bdf.resample_events(minimum_resampling_resolution)
bdf["belief_horizon"] = bdf.belief_horizons.to_numpy()
df = simplify_index(
bdf,
Expand All @@ -406,7 +404,7 @@ def search_beliefs(
else ["belief_time", "source"],
append=True,
)
df["sensor"] = sensor # or some JSONfiable representation
df["sensor"] = sensor # or some JSONifiable representation
df = df.set_index(["sensor"], append=True)
df_dict[sensor.id] = df
df = pd.concat(df_dict.values())
Expand Down
14 changes: 14 additions & 0 deletions flexmeasures/utils/time_utils.py
@@ -1,3 +1,5 @@
from __future__ import annotations

import re
from datetime import datetime, timedelta
from typing import List, Union, Tuple, Optional
Expand Down Expand Up @@ -333,3 +335,15 @@ def duration_isoformat(duration: timedelta):
# at least one component has to be there.
repl = ret and "".join(ret) or "T0H"
return re.sub("%P", repl, "P%P")


def determine_minimum_resampling_resolution(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my PR in flexmeasures-simulate, I have a similar util function, and I decided to let it raise if the resolutions are actually not compatible (if any([resolution % min_resolution != timedelta(hours=0) for resolution in resolutions])

This function could do this, maybe triggered by a parameter flag raise_if_incompatible. Thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my mind, no combination of resolutions should be incompatible. Resampling a (simplified) time series with instantaneous values should be rather straightforward: when downsampling, select the first value; when upsampling, interpolate using some strategy (such as forward fill or linear interpolation), which we could later define as a sensor attribute. I'll address this soon in a timely-beliefs PR.

event_resolutions: list[timedelta],
) -> timedelta:
"""Return minimum non-zero event resolution, or zero resolution if none of the event resolutions is non-zero."""
condition = list(
event_resolution
for event_resolution in event_resolutions
if event_resolution > timedelta(0)
)
return min(condition) if any(condition) else timedelta(0)
3 changes: 2 additions & 1 deletion requirements/app.in
Expand Up @@ -50,7 +50,8 @@ marshmallow>=3
marshmallow-polyfield
marshmallow-sqlalchemy>=0.23.1
webargs
uniplot
# Minimum version that correctly aligns time series that include NaN values
uniplot>=0.7.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't see how you use uniplot differently now. It would probably still work with <0.7.0, just not if NaN is in the data. So, do we technically need to depend on a higher version, or do we like this feature to be around? Always good to add the reasoning as a comment to make this clear for later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like this feature to be around. When selecting multiple sensors for some time period, having NaN data in that set is very common. With previous uniplot versions, the time series will not align right, which is a serious problem to me.

From uniplot's setup.py (version 0.7.0):

python_requires=">=3.5",
install_requires=["numpy>=1.15.0"],

which are older versions than the first ones we started using for this repo as far back as I can tell (10cff9f).

I'll add an inline note to where we define the minimum version requirement.

# flask should be after all the flask plugins, because setup might find they ARE flask
# Minimum here due to Flask-Classful not supporting Werkzeug 2.2.0 yet, see https://github.com/teracyhq/flask-classful/pull/145
flask>=1.0, <=2.1.2
Expand Down
2 changes: 1 addition & 1 deletion requirements/app.txt
Expand Up @@ -339,7 +339,7 @@ typing-extensions==4.3.0
# via
# py-moneyed
# pydantic
uniplot==0.5.0
uniplot==0.7.0
# via -r requirements/app.in
urllib3[socks]==1.26.12
# via
Expand Down