From 668425805ba10f208c498ece4a9a6020e7c533f2 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 6 Jun 2023 13:38:35 +0200 Subject: [PATCH 01/70] feat: revision to add `attributes` column to the `data_source` table Signed-off-by: Victor Garcia Reolid --- ...e0c_add_attribute_column_to_data_source.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py diff --git a/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py b/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py new file mode 100644 index 000000000..2dc59a6c1 --- /dev/null +++ b/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py @@ -0,0 +1,28 @@ +"""add attribute column to data source + +Revision ID: 2ac7fb39ce0c +Revises: d814c0688ae0 +Create Date: 2023-06-05 23:41:31.788961 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "2ac7fb39ce0c" +down_revision = "d814c0688ae0" +branch_labels = None +depends_on = None + + +def upgrade(): + # add the column `attributes`to the table `data_source` + op.add_column( + "data_source", + sa.Column("attributes", sa.JSON(), nullable=True, default={}), + ) + + +def downgrade(): + pass From abfe310870f60bf943774c7d33374bdc06235214 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 6 Jun 2023 13:42:59 +0200 Subject: [PATCH 02/70] feat: add `attributes` column to the DataSource model Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index e0524021d..080dd4be3 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING +from sqlalchemy.ext.mutable import MutableDict import timely_beliefs as tb @@ -68,6 +69,8 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): ) user = db.relationship("User", backref=db.backref("data_source", lazy=True)) + attributes = db.Column(MutableDict.as_mutable(db.JSON), nullable=False, default={}) + # The model and version of a script source model = db.Column(db.String(80), nullable=True) version = db.Column( From c5fddad053c246019d77c84deac2a8d138cf4d2d Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 6 Jun 2023 13:43:32 +0200 Subject: [PATCH 03/70] feat: add sensors relationship in DataSource Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 080dd4be3..9550dc5d8 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -78,6 +78,12 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): nullable=True, ) + sensors = db.relationship( + "Sensor", + secondary="timed_belief", + backref=db.backref("data_sources", lazy="dynamic"), + ) + def __init__( self, name: str | None = None, From c2de8fc0d62d84ef7b7360130d8746d38ba1a082 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 20 Jun 2023 14:02:29 +0200 Subject: [PATCH 04/70] fix: make sensors relationship viewonly Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 9550dc5d8..baad820e9 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -82,6 +82,7 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): "Sensor", secondary="timed_belief", backref=db.backref("data_sources", lazy="dynamic"), + viewonly=True, ) def __init__( From a26ed8e599f7425d9c377534eaec0d642459b51e Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 6 Jun 2023 12:47:29 +0200 Subject: [PATCH 05/70] feat: add report_config to Reporter class Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/__init__.py | 118 ++++-------------- 1 file changed, 24 insertions(+), 94 deletions(-) diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index 0364dc678..8eb56c8b9 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -1,10 +1,7 @@ from __future__ import annotations -from typing import Optional, Union, Dict -from datetime import datetime, timedelta +from typing import Optional -import pandas as pd -from flexmeasures.data.schemas.reporting import ReporterConfigSchema from flexmeasures.data.models.time_series import Sensor from flexmeasures.data.models.data_sources import DataGeneratorMixin @@ -21,103 +18,38 @@ class Reporter(DataGeneratorMixin): sensor: Sensor = None reporter_config: Optional[dict] = None - reporter_config_raw: Optional[dict] = None - schema = ReporterConfigSchema() - data: Dict[str, Union[tb.BeliefsDataFrame, pd.DataFrame]] = None + report_config: Optional[dict] = None - def __init__( - self, sensor: Sensor, reporter_config_raw: Optional[dict] = None - ) -> None: + reporter_config_schema = None + report_config_schema = None + + def __init__(self, sensor: Sensor, reporter_config: dict = {}) -> None: """ Initialize a new Reporter. Attributes: :param sensor: sensor where the output of the reporter will be saved to. - :param reporter_config_raw: dictionary with the serialized configuration of the reporter. + :param reporter_config: dictionary with the serialized configuration of the reporter. """ + self.deserialize_reporter_config(reporter_config) self.sensor = sensor - if not reporter_config_raw: - reporter_config_raw = {} - - self.reporter_config_raw = reporter_config_raw - - def fetch_data( - self, - start: datetime, - end: datetime, - input_resolution: timedelta = None, - belief_time: datetime = None, - ): - """ - Fetches the time_beliefs from the database - """ - - self.data = {} - for tb_query in self.beliefs_search_configs: - _tb_query = tb_query.copy() - # using start / end instead of event_starts_after/event_ends_before when not defined - event_starts_after = _tb_query.pop("event_starts_after", start) - event_ends_before = _tb_query.pop("event_ends_before", end) - resolution = _tb_query.pop("resolution", input_resolution) - belief_time = _tb_query.pop("belief_time", belief_time) - - sensor: Sensor = _tb_query.pop("sensor", None) - alias: str = _tb_query.pop("alias", None) - - bdf = sensor.search_beliefs( - event_starts_after=event_starts_after, - event_ends_before=event_ends_before, - resolution=resolution, - beliefs_before=belief_time, - **_tb_query, - ) - - # store data source as local variable - for source in bdf.sources.unique(): - self.data[f"source_{source.id}"] = source - - # store BeliefsDataFrame as local variable - if alias: - self.data[alias] = bdf - else: - self.data[f"sensor_{sensor.id}"] = bdf - def update_attribute(self, attribute, default): if default is not None: setattr(self, attribute, default) - def compute( - self, - start: datetime, - end: datetime, - input_resolution: timedelta | None = None, - belief_time: datetime | None = None, - **kwargs, - ) -> tb.BeliefsDataFrame: + def compute(self, **kwargs) -> tb.BeliefsDataFrame: """This method triggers the creation of a new report. - The same object can generate multiple reports with different start, end, input_resolution + The same object can generate multiple reports with different start, end, resolution and belief_time values. In the future, this function will parse arbitrary input arguments defined in a schema. """ - # deserialize configuration - if self.reporter_config is None: - self.deserialize_config() - - if input_resolution is None: - input_resolution = self.sensor.event_resolution - - # fetch data - self.fetch_data(start, end, input_resolution, belief_time) - # Result - result: tb.BeliefsDataFrame = self._compute( - start, end, input_resolution, belief_time - ) + result: tb.BeliefsDataFrame = self._compute(**kwargs) # checking that the event_resolution of the output BeliefDataFrame is equal to the one of the output sensor assert ( @@ -137,13 +69,7 @@ def compute( return result - def _compute( - self, - start: datetime, - end: datetime, - input_resolution: timedelta = None, - belief_time: datetime = None, - ) -> tb.BeliefsDataFrame: + def _compute(self, **kwargs) -> tb.BeliefsDataFrame: """ Overwrite with the actual computation of your report. @@ -151,9 +77,9 @@ def _compute( """ raise NotImplementedError() - def deserialize_config(self): + def deserialize_reporter_config(self, reporter_config: dict) -> dict: """ - Validate the report config against a Marshmallow Schema. + Validate the reporter config against a Marshmallow Schema. Ideas: - Override this method - Call superclass method to apply validation and common variables deserialization (see PandasReporter) @@ -162,9 +88,13 @@ def deserialize_config(self): Raises ValidationErrors or ValueErrors. """ - self.reporter_config = self.schema.load( - self.reporter_config_raw - ) # validate reporter config - self.beliefs_search_configs = self.reporter_config.get( - "beliefs_search_configs" - ) # extracting TimeBelief query configuration parameters + raise NotImplementedError() + + def deserialize_report_config(self, report_config: dict) -> dict: + """_summary_ + + :param report_config: _description_ + :return: _description_ + """ + + raise NotImplementedError() From d9cef3df40e50a5b680ff44f1ead607eb892e584 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 6 Jun 2023 13:20:03 +0200 Subject: [PATCH 06/70] feat: add PandasReporter report and reporter schemas Signed-off-by: Victor Garcia Reolid --- .../data/schemas/reporting/pandas_reporter.py | 85 ++++++++++++++++-- .../data/schemas/tests/test_reporting.py | 89 ++++++++++--------- 2 files changed, 125 insertions(+), 49 deletions(-) diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index 3b076ddb2..23fce775c 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -1,7 +1,11 @@ -from marshmallow import Schema, fields, ValidationError, validates_schema +from marshmallow import Schema, fields, ValidationError, validates_schema, validate from inspect import signature -from flexmeasures.data.schemas.reporting import ReporterConfigSchema +from flexmeasures.data.schemas.sensors import SensorIdField +from flexmeasures.data.schemas.sources import DataSourceIdField + +from flexmeasures.data.schemas import AwareDateTimeField, DurationField + from timely_beliefs import BeliefsDataFrame @@ -43,16 +47,43 @@ def validate_method_call(self, data, **kwargs): ) -class PandasReporterConfigSchema(ReporterConfigSchema): +class BeliefsSearchConfigSchema(Schema): + """ + This schema implements the required fields to perform a TimedBeliefs search + using the method flexmeasures.data.models.time_series:Sensor.search_beliefs + """ + + sensor = SensorIdField(required=True) + + event_starts_after = AwareDateTimeField() + event_ends_before = AwareDateTimeField() + + belief_time = AwareDateTimeField() + + horizons_at_least = DurationField() + horizons_at_most = DurationField() + + source = DataSourceIdField() + + source_types = fields.List(fields.Str()) + exclude_source_types = fields.List(fields.Str()) + most_recent_beliefs_only = fields.Boolean() + most_recent_events_only = fields.Boolean() + + one_deterministic_belief_per_event = fields.Boolean() + one_deterministic_belief_per_event_per_source = fields.Boolean() + resolution = DurationField() + sum_multiple = fields.Boolean() + + +class PandasReporterReporterConfigSchema(Schema): """ This schema lists fields that can be used to describe sensors in the optimised portfolio Example: { - "input_sensors" : [ - {"sensor" : 1, "alias" : "df1"} - ], + "input_variables" : ["df1"], "transformations" : [ { "df_input" : "df1", @@ -72,6 +103,7 @@ class PandasReporterConfigSchema(ReporterConfigSchema): "final_df_output" : "df2" """ + input_variables = fields.List(fields.Str(), required=True) transformations = fields.List(fields.Nested(PandasMethodCall()), required=True) final_df_output = fields.Str(required=True) @@ -85,8 +117,7 @@ def validate_chaining(self, data, **kwargs): # create dictionary data with objects of the types that is supposed to be generated # loading the initial data, the sensors' data fake_data = dict( - (f"sensor_{s['sensor'].id}", BeliefsDataFrame) - for s in data.get("beliefs_search_configs") + (variable, BeliefsDataFrame) for variable in data.get("input_variables") ) final_df_output = data.get("final_df_output") @@ -101,7 +132,7 @@ def validate_chaining(self, data, **kwargs): if df_output == final_df_output: final_df_output_method = transformation.get("method") - if not previous_df and not df_input: + if df_input not in fake_data: raise ValidationError("Cannot find the input DataFrame.") previous_df = df_output # keeping last BeliefsDataFrame calculation @@ -117,3 +148,39 @@ def validate_chaining(self, data, **kwargs): raise ValidationError( "Final output type cannot by of type `Resampler` or `DataFrameGroupBy`" ) + + +class PandasReporterReportConfigSchema(Schema): + input_sensors = fields.Dict( + keys=fields.Str(), + values=fields.Nested(BeliefsSearchConfigSchema()), + required=True, + validator=validate.Length(min=1), + ) + + start = AwareDateTimeField(required=False) + end = AwareDateTimeField(required=False) + + resolution = DurationField(required=False) + belief_time = AwareDateTimeField(required=False) + + @validates_schema + def validate_time_parameters(self, data, **kwargs): + """This method validates that all input sensors have start + and end parameters available. + """ + + # it's enough to provide a common start and end + if ("start" in data) and ("end" in data): + return + + for alias, input_sensor in data.get("input_sensors").items(): + if ("event_starts_after" not in input_sensor) and ("start" not in data): + raise ValidationError( + f"Start parameter not provided for sensor `{alias}` ({input_sensor})." + ) + + if ("event_ends_before" not in input_sensor) and ("end" not in data): + raise ValidationError( + f"End parameter not provided for sensor `{alias}` ({input_sensor})." + ) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index cb5490052..3246df14d 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -2,7 +2,8 @@ from flexmeasures.data.models.generic_assets import GenericAsset, GenericAssetType from flexmeasures.data.schemas.reporting.pandas_reporter import ( - PandasReporterConfigSchema, + PandasReporterReporterConfigSchema, + PandasReporterReportConfigSchema, ) from marshmallow.exceptions import ValidationError @@ -40,13 +41,7 @@ def setup_dummy_sensors(db, app): [ ( { # this checks that the final_df_output dataframe is actually generated at some point of the processing pipeline - "beliefs_search_configs": [ - { - "sensor": 1, - "event_starts_after": "2022-01-01T00:00:00 00:00", - "event_ends_before": "2022-01-01T23:00:00 00:00", - }, - ], + "input_variables": ["sensor_1"], "transformations": [ { "df_output": "final_output", @@ -60,13 +55,7 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that chaining works, applying the method copy on the previous dataframe - "beliefs_search_configs": [ - { - "sensor": 1, - "event_starts_after": "2022-01-01T00:00:00 00:00", - "event_ends_before": "2022-01-01T23:00:00 00:00", - }, - ], + "input_variables": ["sensor_1"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, {"method": "copy"}, @@ -78,18 +67,7 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that resample cannot be the last method being applied - "beliefs_search_configs": [ - { - "sensor": 1, - "event_starts_after": "2022-01-01T00:00:00 00:00", - "event_ends_before": "2022-01-01T23:00:00 00:00", - }, - { - "sensor": 2, - "event_starts_after": "2022-01-01T00:00:00 00:00", - "event_ends_before": "2022-01-01T23:00:00 00:00", - }, - ], + "input_variables": ["sensor_1", "sensor_2"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, {"method": "copy"}, @@ -101,18 +79,7 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that resample cannot be the last method being applied - "beliefs_search_configs": [ - { - "sensor": 1, - "event_starts_after": "2022-01-01T00:00:00 00:00", - "event_ends_before": "2022-01-01T23:00:00 00:00", - }, - { - "sensor": 2, - "event_starts_after": "2022-01-01T00:00:00 00:00", - "event_ends_before": "2022-01-01T23:00:00 00:00", - }, - ], + "input_variables": ["sensor_1", "sensor_2"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, {"method": "copy"}, @@ -129,10 +96,52 @@ def test_pandas_reporter_schema( reporter_config, is_valid, db, app, setup_dummy_sensors ): - schema = PandasReporterConfigSchema() + schema = PandasReporterReporterConfigSchema() if is_valid: schema.load(reporter_config) else: with pytest.raises(ValidationError): schema.load(reporter_config) + + +@pytest.mark.parametrize( + "report_config, is_valid", + [ + ( + { + "input_sensors": {"sensor_1": {"sensor": 1}}, + "start": "2023-06-06T00:00:00+02:00", + "end": "2023-06-06T00:00:00+02:00", + }, + True, + ), + ( + { + "input_sensors": {"sensor_1": {"sensor": 1}}, + }, + False, + ), + ( + { + "input_sensors": { + "sensor_1": { + "sensor": 1, + "event_starts_after": "2023-06-07T00:00:00+02:00", + "event_ends_before": "2023-06-07T00:00:00+02:00", + } + }, + }, + True, + ), + ], +) +def test_pandas_report_schema(report_config, is_valid, db, app, setup_dummy_sensors): + + schema = PandasReporterReportConfigSchema() + + if is_valid: + schema.load(report_config) + else: + with pytest.raises(ValidationError): + schema.load(report_config) From d2ef5900de6b63fcd6f38381f0434d6e23ba1ead Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 6 Jun 2023 13:21:16 +0200 Subject: [PATCH 07/70] fix: update fixture by removing beliefs_search_configs and adding input_variables Signed-off-by: Victor Garcia Reolid --- flexmeasures/cli/tests/conftest.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flexmeasures/cli/tests/conftest.py b/flexmeasures/cli/tests/conftest.py index 028012edc..f88fce47e 100644 --- a/flexmeasures/cli/tests/conftest.py +++ b/flexmeasures/cli/tests/conftest.py @@ -72,7 +72,7 @@ def setup_dummy_data(db, app): @pytest.fixture(scope="module") @pytest.mark.skip_github -def reporter_config_raw(app, db, setup_dummy_data): +def reporter_config(app, db, setup_dummy_data): """ This reporter_config defines the operations to add up the values of the sensors 1 and 2 and resamples the result to a @@ -81,8 +81,8 @@ def reporter_config_raw(app, db, setup_dummy_data): sensor1, sensor2, report_sensor = setup_dummy_data - reporter_config_raw = dict( - beliefs_search_configs=[dict(sensor=sensor1.id), dict(sensor=sensor2.id)], + reporter_config = dict( + input_variables=["sensor_1", "sensor_2"], transformations=[ dict( df_input="sensor_1", @@ -95,4 +95,4 @@ def reporter_config_raw(app, db, setup_dummy_data): final_df_output="df_agg", ) - return reporter_config_raw + return reporter_config From 54b5ddad8e25ae33410210675d9c5a19ecaf4b06 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 6 Jun 2023 13:21:45 +0200 Subject: [PATCH 08/70] feat: add report config to PandasReporter Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/pandas_reporter.py | 94 +++++++++++++++++-- .../reporting/tests/test_pandas_reporter.py | 81 +++++++++++----- .../models/reporting/tests/test_reporter.py | 40 -------- 3 files changed, 141 insertions(+), 74 deletions(-) delete mode 100644 flexmeasures/data/models/reporting/tests/test_reporter.py diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index 1e1e98179..a79a96983 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -1,15 +1,17 @@ from __future__ import annotations -from typing import Any +from typing import Any, Union, Dict from datetime import datetime, timedelta from flask import current_app import timely_beliefs as tb - +import pandas as pd from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.schemas.reporting.pandas_reporter import ( - PandasReporterConfigSchema, + PandasReporterReporterConfigSchema, + PandasReporterReportConfigSchema, ) +from flexmeasures.data.models.time_series import Sensor from flexmeasures.utils.time_utils import server_now @@ -18,30 +20,104 @@ class PandasReporter(Reporter): __version__ = "1" __author__ = "Seita" - schema = PandasReporterConfigSchema() + + reporter_config_schema = PandasReporterReporterConfigSchema() + report_config_schema = PandasReporterReportConfigSchema() + + input_variables: list[str] = None transformations: list[dict[str, Any]] = None final_df_output: str = None - def deserialize_config(self): + data: Dict[str, Union[tb.BeliefsDataFrame, pd.DataFrame]] = None + + def deserialize_reporter_config(self, reporter_config): # call super class deserialize_config - super().deserialize_config() + self.reporter_config = self.reporter_config_schema.load(reporter_config) # extract PandasReporter specific fields self.transformations = self.reporter_config.get("transformations") + self.input_variables = self.reporter_config.get("input_variables") self.final_df_output = self.reporter_config.get("final_df_output") - def _compute( + def deserialize_report_config( + self, report_config: dict + ): # TODO: move to Reporter class + self.report_config = self.report_config_schema.load( + report_config + ) # validate reporter configs + + input_sensors = report_config.get("input_sensors") + + # check that all input_variables are provided + for variable in self.input_variables: + assert ( + variable in input_sensors + ), f"Required sensor with alias `{variable}` not provided." + + def fetch_data( self, start: datetime, end: datetime, - input_resolution: timedelta | None = None, + input_sensors: dict, + resolution: timedelta | None = None, belief_time: datetime | None = None, - ) -> tb.BeliefsDataFrame: + ): + """ + Fetches the time_beliefs from the database + """ + + self.data = {} + for alias, tb_query in input_sensors.items(): + _tb_query = tb_query.copy() + + # using start / end instead of event_starts_after/event_ends_before when not defined + event_starts_after = _tb_query.pop("event_starts_after", start) + event_ends_before = _tb_query.pop("event_ends_before", end) + resolution = _tb_query.pop("resolution", resolution) + belief_time = _tb_query.pop("belief_time", belief_time) + + sensor: Sensor = _tb_query.pop("sensor", None) + + bdf = sensor.search_beliefs( + event_starts_after=event_starts_after, + event_ends_before=event_ends_before, + resolution=resolution, + beliefs_before=belief_time, + **_tb_query, + ) + + # store data source as local variable + for source in bdf.sources.unique(): + self.data[f"source_{source.id}"] = source + + # store BeliefsDataFrame as local variable + self.data[alias] = bdf + + def _compute(self, **kwargs) -> tb.BeliefsDataFrame: """ This method applies the transformations and outputs the dataframe defined in `final_df_output` field of the report_config. """ + self.report_config = kwargs + + if "report_config" in kwargs: + self.deserialize_report_config(kwargs.get("report_config")) + + # report configuration + start: datetime = self.report_config.get("start") + end: datetime = self.report_config.get("end") + input_sensors: dict = self.report_config.get("input_sensors") + + resolution: timedelta | None = self.report_config.get("resolution", None) + belief_time: datetime | None = self.report_config.get("belief_time", None) + + if resolution is None: + resolution = self.sensor.event_resolution + + # fetch sensor data + self.fetch_data(start, end, input_sensors, resolution, belief_time) + if belief_time is None: belief_time = server_now() diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index 07f7dac9d..3f833561e 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -8,8 +8,8 @@ def test_reporter(app, setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data - reporter_config_raw = dict( - beliefs_search_configs=[dict(sensor=s1.id), dict(sensor=s2.id)], + reporter_config = dict( + input_variables=["sensor_1", "sensor_2"], transformations=[ dict( df_input="sensor_1", @@ -39,11 +39,13 @@ def test_reporter(app, setup_dummy_data): final_df_output="df_merge", ) - reporter = PandasReporter(reporter_sensor, reporter_config_raw=reporter_config_raw) + reporter = PandasReporter(reporter_sensor, reporter_config=reporter_config) start = datetime(2023, 4, 10, tzinfo=utc) end = datetime(2023, 4, 10, 10, tzinfo=utc) - report1 = reporter.compute(start, end) + input_sensors = dict(sensor_1=dict(sensor=s1), sensor_2=dict(sensor=s2)) + + report1 = reporter.compute(start=start, end=end, input_sensors=input_sensors) assert len(report1) == 5 assert str(report1.event_starts[0]) == "2023-04-10 00:00:00+00:00" @@ -60,9 +62,11 @@ def test_reporter(app, setup_dummy_data): ) # check data source is assigned # check that calling compute with different parameters changes the result - report3 = reporter.compute(start=datetime(2023, 4, 10, 3, tzinfo=utc), end=end) - assert len(report3) == 4 - assert str(report3.event_starts[0]) == "2023-04-10 02:00:00+00:00" + report2 = reporter.compute( + start=datetime(2023, 4, 10, 3, tzinfo=utc), end=end, input_sensors=input_sensors + ) + assert len(report2) == 4 + assert str(report2.event_starts[0]) == "2023-04-10 02:00:00+00:00" def test_reporter_repeated(setup_dummy_data): @@ -70,19 +74,8 @@ def test_reporter_repeated(setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data - reporter_config_raw = dict( - beliefs_search_configs=[ - dict( - sensor=s1.id, - event_starts_after="2023-04-10T00:00:00 00:00", - event_ends_before="2023-04-10T10:00:00 00:00", - ), - dict( - sensor=s2.id, - event_starts_after="2023-04-10T00:00:00 00:00", - event_ends_before="2023-04-10T10:00:00 00:00", - ), - ], + reporter_config = dict( + input_variables=["sensor_1", "sensor_2"], transformations=[ dict( df_input="sensor_1", @@ -112,11 +105,49 @@ def test_reporter_repeated(setup_dummy_data): final_df_output="df_merge", ) - reporter = PandasReporter(reporter_sensor, reporter_config_raw=reporter_config_raw) - start = datetime(2023, 4, 10, tzinfo=utc) - end = datetime(2023, 4, 10, 10, tzinfo=utc) + report_config = dict( + start="2023-04-10T00:00:00 00:00", + end="2023-04-10T10:00:00 00:00", + input_sensors=dict( + sensor_1=dict(sensor=s1.id), + sensor_2=dict(sensor=s2.id), + ), + ) + + reporter = PandasReporter(reporter_sensor, reporter_config=reporter_config) - report1 = reporter.compute(start=start, end=end) - report2 = reporter.compute(start=start, end=end) + report1 = reporter.compute(report_config=report_config) + report2 = reporter.compute(report_config=report_config) assert all(report2.values == report1.values) + + +def test_reporter_empty(setup_dummy_data): + """check that calling compute with missing data returns an empty report""" + s1, s2, reporter_sensor = setup_dummy_data + + reporter_config = dict( + input_variables=["sensor_1"], + transformations=[], + final_df_output="sensor_1", + ) + + reporter = PandasReporter(reporter_sensor, reporter_config=reporter_config) + + # compute report on available data + report = reporter.compute( + start=datetime(2023, 4, 10, tzinfo=utc), + end=datetime(2023, 4, 10, 10, tzinfo=utc), + input_sensors=dict(sensor_1=dict(sensor=s1)), + ) + + assert not report.empty + + # compute report on dates with no data available + report = reporter.compute( + start=datetime(2021, 4, 10, tzinfo=utc), + end=datetime(2021, 4, 10, 10, tzinfo=utc), + input_sensors=dict(sensor_1=dict(sensor=s1)), + ) + + assert report.empty diff --git a/flexmeasures/data/models/reporting/tests/test_reporter.py b/flexmeasures/data/models/reporting/tests/test_reporter.py deleted file mode 100644 index 82bf7a7a3..000000000 --- a/flexmeasures/data/models/reporting/tests/test_reporter.py +++ /dev/null @@ -1,40 +0,0 @@ -from datetime import datetime -from pytz import utc -import timely_beliefs as tb - -from flexmeasures.data.models.reporting import Reporter -from flexmeasures.data.models.time_series import Sensor - - -def test_reporter_empty(setup_dummy_data): - """check that calling compute with missing data returns an empty report""" - - class DummyReporter(Reporter): - def __init__(self, sensor: Sensor, input_sensor: Sensor) -> None: - reporter_config_raw = dict( - beliefs_search_configs=[ - dict(sensor=input_sensor.id, alias="input_sensor") - ] - ) - super().__init__(sensor, reporter_config_raw) - - def _compute(self, *args, **kwargs) -> tb.BeliefsDataFrame: - return self.data["input_sensor"] - - s1, s2, reporter_sensor = setup_dummy_data - - reporter = DummyReporter(reporter_sensor, s1) - - # compute report on available data - report = reporter.compute( - datetime(2023, 4, 10, tzinfo=utc), datetime(2023, 4, 10, 10, tzinfo=utc) - ) - - assert not report.empty - - # compute report on dates with no data available - report = reporter.compute( - datetime(2021, 4, 10, tzinfo=utc), datetime(2021, 4, 10, 10, tzinfo=utc) - ) - - assert report.empty From 80284317f2d2f4a8546494e7da2da4721ab38f2d Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Fri, 23 Jun 2023 12:59:31 +0200 Subject: [PATCH 09/70] feat: add helper methods to DataSource Signed-off-by: Victor Garcia Reolid --- .vscode/settings.json | 7 ++++++- flexmeasures/data/models/data_sources.py | 25 +++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 618764c3f..9e744aa83 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -13,5 +13,10 @@ "python.linting.pylintEnabled": false, "python.linting.flake8Enabled": true, "workbench.editor.wrapTabs": true, - "python.formatting.provider": "black" + "python.formatting.provider": "black", + "python.testing.pytestArgs": [ + "flexmeasures" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index baad820e9..f905dc86d 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from sqlalchemy.ext.mutable import MutableDict import timely_beliefs as tb @@ -90,6 +90,7 @@ def __init__( name: str | None = None, type: str | None = None, user: User | None = None, + attributes: dict | None = None, **kwargs, ): if user is not None: @@ -99,6 +100,10 @@ def __init__( elif user is None and type == "user": raise TypeError("A data source cannot have type 'user' but no user set.") self.type = type + + if attributes is not None: + kwargs["attributes"] = attributes + tb.BeliefSourceDBMixin.__init__(self, name=name) db.Model.__init__(self, **kwargs) @@ -154,3 +159,21 @@ def to_dict(self) -> dict: type=self.type if self.type in ("forecaster", "scheduler") else "other", description=self.description, ) + + def get_attribute(self, attribute: str, default: Any = None) -> Any: + """Looks for the attribute on the DataSource. + If not found, returns the default. + """ + if hasattr(self, attribute): + return getattr(self, attribute) + if attribute in self.attributes: + return self.attributes[attribute] + + return default + + def has_attribute(self, attribute: str) -> bool: + return attribute in self.attributes + + def set_attribute(self, attribute: str, value): + if self.has_attribute(attribute): + self.attributes[attribute] = value From fd175d4573983709d3b1a622ecafe12a0a27a659 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Fri, 23 Jun 2023 17:32:27 +0200 Subject: [PATCH 10/70] fix: modernize AggregatorReporter Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/aggregator.py | 31 ++++++++++++++----- .../models/reporting/tests/test_aggregator.py | 2 +- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/flexmeasures/data/models/reporting/aggregator.py b/flexmeasures/data/models/reporting/aggregator.py index e74596ae0..131017108 100644 --- a/flexmeasures/data/models/reporting/aggregator.py +++ b/flexmeasures/data/models/reporting/aggregator.py @@ -17,16 +17,23 @@ class AggregatorReporter(Reporter): __version__ = "1" __author__ = "Seita" schema = AggregatorSchema() + + reporter_config_schema = AggregatorSchema() + report_config_schema = None + weights: dict method: str - def deserialize_config(self): - # call Reporter deserialize_config - super().deserialize_config() + def deserialize_reporter_config(self, reporter_config): + self.reporter_config = self.reporter_config_schema.load(reporter_config) # extract AggregatorReporter specific fields self.method = self.reporter_config.get("method") self.weights = self.reporter_config.get("weights", dict()) + self.beliefs_search_configs = self.reporter_config.get("beliefs_search_configs") + + def deserialize_report_config(self, report_config: dict): + pass def _compute( self, @@ -43,15 +50,22 @@ def _compute( dataframes = [] - if belief_time is None: - belief_time = server_now() - for belief_search_config in self.beliefs_search_configs: # if alias is not in belief_search_config, using the Sensor id instead column_name = belief_search_config.get( "alias", f"sensor_{belief_search_config['sensor'].id}" ) - data = self.data[column_name].droplevel([1, 2, 3]) + + data = ( + belief_search_config["sensor"] + .search_beliefs( + event_starts_after=start, + event_ends_before=end, + resolution=input_resolution, + beliefs_before=belief_time, + ) + .droplevel([1, 2, 3]) + ) # apply weight if column_name in self.weights: @@ -61,6 +75,9 @@ def _compute( output_df = pd.concat(dataframes, axis=1) + if belief_time is None: + belief_time = server_now() + # apply aggregation method output_df = output_df.aggregate(self.method, axis=1) diff --git a/flexmeasures/data/models/reporting/tests/test_aggregator.py b/flexmeasures/data/models/reporting/tests/test_aggregator.py index 8cd287fa4..e800e6b1b 100644 --- a/flexmeasures/data/models/reporting/tests/test_aggregator.py +++ b/flexmeasures/data/models/reporting/tests/test_aggregator.py @@ -45,7 +45,7 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): ) agg_reporter = AggregatorReporter( - reporter_sensor, reporter_config_raw=reporter_config_raw + reporter_sensor, reporter_config=reporter_config_raw ) result = agg_reporter.compute( From 4567c23f8525f9675cab86017be5843dcd3e804b Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 26 Jun 2023 13:13:33 +0200 Subject: [PATCH 11/70] feat: add attributes hash Signed-off-by: Victor Garcia Reolid --- .../2ac7fb39ce0c_add_attribute_column_to_data_source.py | 8 +++++++- flexmeasures/data/models/data_sources.py | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py b/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py index 2dc59a6c1..4ed55d15d 100644 --- a/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py +++ b/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py @@ -17,12 +17,18 @@ def upgrade(): - # add the column `attributes`to the table `data_source` + # add the column `attributes` to the table `data_source` op.add_column( "data_source", sa.Column("attributes", sa.JSON(), nullable=True, default={}), ) + # add the column `attributes_hash` to the table `data_source` + op.add_column( + "data_source", + sa.Column("attributes_hash", sa.LargeBinary(length=256), nullable=True), + ) + def downgrade(): pass diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index f905dc86d..8350830e8 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from typing import TYPE_CHECKING, Any from sqlalchemy.ext.mutable import MutableDict @@ -7,6 +8,7 @@ from flexmeasures.data import db from flask import current_app +import hashlib if TYPE_CHECKING: @@ -71,6 +73,8 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): attributes = db.Column(MutableDict.as_mutable(db.JSON), nullable=False, default={}) + attributes_hash = db.Column(db.LargeBinary(length=256)) + # The model and version of a script source model = db.Column(db.String(80), nullable=True) version = db.Column( @@ -102,7 +106,10 @@ def __init__( self.type = type if attributes is not None: - kwargs["attributes"] = attributes + self.attributes = attributes + self.attributes_hash = hashlib.sha256( + json.dumps(attributes).encode("utf-8") + ).digest() tb.BeliefSourceDBMixin.__init__(self, name=name) db.Model.__init__(self, **kwargs) From 773dc9db31810050ab32adc1b390649e259b9190 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 26 Jun 2023 13:14:19 +0200 Subject: [PATCH 12/70] feat: add attributes to the function get_or_create_source Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/services/data_sources.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/flexmeasures/data/services/data_sources.py b/flexmeasures/data/services/data_sources.py index d9787f147..b19c37ca0 100644 --- a/flexmeasures/data/services/data_sources.py +++ b/flexmeasures/data/services/data_sources.py @@ -2,6 +2,9 @@ from flask import current_app +import json +import hashlib + from flexmeasures import User from flexmeasures.data import db from flexmeasures.data.models.data_sources import DataSource @@ -13,6 +16,7 @@ def get_or_create_source( source_type: str | None = None, model: str | None = None, version: str | None = None, + attributes: dict | None = None, flush: bool = True, ) -> DataSource: if is_user(source): @@ -22,6 +26,11 @@ def get_or_create_source( query = query.filter(DataSource.model == model) if version is not None: query = query.filter(DataSource.version == version) + if attributes is not None: + attributes_hash = hashlib.sha256( + json.dumps(attributes).encode("utf-8") + ).digest() + query = query.filter(DataSource.attributes_hash == attributes_hash) if is_user(source): query = query.filter(DataSource.user == source) elif isinstance(source, str): From 6d632b86a2f176d5cfb357177a33fa9ab3a0f4be Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 26 Jun 2023 18:13:09 +0200 Subject: [PATCH 13/70] feat: add attribute hash to get_or_create_source Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/services/data_sources.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/services/data_sources.py b/flexmeasures/data/services/data_sources.py index b19c37ca0..70910df6d 100644 --- a/flexmeasures/data/services/data_sources.py +++ b/flexmeasures/data/services/data_sources.py @@ -19,6 +19,7 @@ def get_or_create_source( attributes: dict | None = None, flush: bool = True, ) -> DataSource: + attributes_hash = None if is_user(source): source_type = "user" query = DataSource.query.filter(DataSource.type == source_type) @@ -45,7 +46,12 @@ def get_or_create_source( if source_type is None: raise TypeError("Please specify a source type") _source = DataSource( - name=source, model=model, version=version, type=source_type + name=source, + model=model, + version=version, + type=source_type, + attributes=attributes, + attributes_hash=attributes_hash, ) current_app.logger.info(f"Setting up {_source} as new data source...") db.session.add(_source) From 68eae91443d93345c8cbc5e650b54083079d7608 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 27 Jun 2023 00:30:53 +0200 Subject: [PATCH 14/70] feat: save/fetch data generator to/from data source Signed-off-by: Victor Garcia Reolid --- flexmeasures/app.py | 11 ++- flexmeasures/data/models/data_sources.py | 100 +++++++++++++++++--- flexmeasures/data/tests/conftest.py | 41 ++++++++ flexmeasures/data/tests/test_data_source.py | 50 ++++++++++ flexmeasures/utils/plugin_utils.py | 12 ++- 5 files changed, 199 insertions(+), 15 deletions(-) create mode 100644 flexmeasures/data/tests/test_data_source.py diff --git a/flexmeasures/app.py b/flexmeasures/app.py index e9c2a5554..9fe114d03 100644 --- a/flexmeasures/app.py +++ b/flexmeasures/app.py @@ -5,6 +5,7 @@ from __future__ import annotations import time +from copy import copy from flask import Flask, g, request from flask.cli import load_dotenv @@ -107,8 +108,14 @@ def create( # noqa C901 from flexmeasures.utils.coding_utils import get_classes_module from flexmeasures.data.models import reporting, planning - app.reporters = get_classes_module("flexmeasures.data.models", reporting.Reporter) - app.schedulers = get_classes_module("flexmeasures.data.models", planning.Scheduler) + reporters = get_classes_module("flexmeasures.data.models", reporting.Reporter) + schedulers = get_classes_module("flexmeasures.data.models", planning.Scheduler) + + app.reporters = reporters + app.schedulers = schedulers + + app.data_generators = copy(reporters) # use copy to avoid mutating app.reporters + app.data_generators.update(schedulers) # add auth policy diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 8350830e8..fd75cd7e4 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Type from sqlalchemy.ext.mutable import MutableDict import timely_beliefs as tb @@ -10,14 +10,51 @@ from flask import current_app import hashlib +from marshmallow import Schema + if TYPE_CHECKING: from flexmeasures.data.models.user import User -class DataGeneratorMixin: +class DataGenerator: _data_source: DataSource | None = None + _config: dict = None + + _inputs_schema: Type[Schema] | None = None + _config_schema: Type[Schema] | None = None + + def __init__(self, config: dict | None = None, **kwargs) -> None: + if config is None: + _config = kwargs + else: + if self._config_schema: + _config = self._config_schema.load(config) + else: + _config = config + + self._config = _config + + def _compute(self, **kwargs): + raise NotImplementedError() + + def compute(self, inputs: dict = None, **kwargs): + if inputs is None: + _inputs = kwargs + self.validate_deserialized_inputs(_inputs) + else: + if self._inputs_schema: + _inputs = self._inputs_schema.load(inputs) + + else: # skip validation + _inputs = inputs + + return self._compute(**_inputs) + + def validate_deserialized_inputs(self, inputs: dict): + self._inputs_schema.load(self._inputs_schema.dump(inputs)) + @classmethod def get_data_source_info(cls: type) -> dict: """ @@ -26,33 +63,35 @@ def get_data_source_info(cls: type) -> dict: See for instance get_data_source_for_job(). """ source_info = dict( - name=current_app.config.get("FLEXMEASURES_DEFAULT_DATASOURCE") + source=current_app.config.get("FLEXMEASURES_DEFAULT_DATASOURCE") ) # default from flexmeasures.data.models.planning import Scheduler from flexmeasures.data.models.reporting import Reporter if issubclass(cls, Reporter): - source_info["type"] = "reporter" + source_info["source_type"] = "reporter" elif issubclass(cls, Scheduler): - source_info["type"] = "scheduler" + source_info["source_type"] = "scheduler" else: - source_info["type"] = "undefined" + source_info["source_type"] = "undefined" + + source_info["model"] = cls.__name__ return source_info @property - def data_source(self): + def data_source(self) -> "DataSource | None": from flexmeasures.data.services.data_sources import get_or_create_source if self._data_source is None: data_source_info = self.get_data_source_info() - - self._data_source = get_or_create_source( - source=data_source_info.get("name"), - source_type=data_source_info.get("type"), + data_source_info["attributes"] = dict( + config=self._config_schema.dump(self._config) ) + self._data_source = get_or_create_source(**data_source_info) + return self._data_source @@ -89,6 +128,8 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): viewonly=True, ) + _data_generator: DataGenerator | None = None + def __init__( self, name: str | None = None, @@ -114,6 +155,43 @@ def __init__( tb.BeliefSourceDBMixin.__init__(self, name=name) db.Model.__init__(self, **kwargs) + @property + def data_generator(self): + if self._data_generator: + return self._data_generator + + data_generator = None + + if self.type not in ["scheduler", "forecaster", "reporter"]: + current_app.logger.warning( + "Only the classes Scheduler, Forecaster and Reporters are DataGenerator's." + ) + return None + + if not self.model: + current_app.logger.warning( + "There's no DataGenerator class defined in this DataSource." + ) + return None + + if self.model not in current_app.data_generators: + current_app.logger.warning( + "DataGenerator `{self.model}` not registered in this FlexMeasures instance." + ) + return None + + # fetch DataGenerator details + data_generator_details = self.attributes.get("data_generator", {}) + config = data_generator_details.get("config", {}) + + # create DataGenerator class and assign the current DataSource (self) as its source + data_generator = current_app.data_generators[self.model](config=config) + data_generator._data_source = self + + self._data_generator = data_generator + + return self._data_generator + @property def label(self): """Human-readable label (preferably not starting with a capital letter, so it can be used in a sentence).""" diff --git a/flexmeasures/data/tests/conftest.py b/flexmeasures/data/tests/conftest.py index 98540df7b..db1c6db3f 100644 --- a/flexmeasures/data/tests/conftest.py +++ b/flexmeasures/data/tests/conftest.py @@ -7,7 +7,9 @@ import numpy as np from flask_sqlalchemy import SQLAlchemy from statsmodels.api import OLS +import timely_beliefs as tb +from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.models.annotations import Annotation from flexmeasures.data.models.assets import Asset from flexmeasures.data.models.data_sources import DataSource @@ -232,3 +234,42 @@ def setup_annotations( asset=asset, sensor=sensor, ) + + +@pytest.fixture(scope="module") +def aggregator_reporter_data_source(app, db, add_nearby_weather_sensors): + + sensor = add_nearby_weather_sensors.get("temperature") + + class TestReporter(Reporter): + def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: + start = kwargs.get("start") + end = kwargs.get("end") + resolution = self.sensor.event_resolution + + index = pd.date_range(start=start, end=end, freq=resolution) + + r = pd.DataFrame() + r["event_start"] = index + r["belief_time"] = index + r["source"] = self.data_source + r["cumulative_probability"] = 0.5 + r["event_value"] = 0 + + return tb.BeliefsDataFrame(r, sensor=self.sensor) + + app.data_generators.update({"TestReporter": TestReporter}) + + config = dict(sensor=sensor.id) + + ds = DataSource( + name="Test", + model="TestReporter", + type="reporter", + attributes=dict(data_generator=dict(config=config)), + ) + + db.session.add(ds) + db.session.commit() + + return ds diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py new file mode 100644 index 000000000..31ece7d07 --- /dev/null +++ b/flexmeasures/data/tests/test_data_source.py @@ -0,0 +1,50 @@ +import pytest + +from flexmeasures.data.models.reporting import Reporter +from flexmeasures.data.models.data_sources import DataGenerator + +from datetime import datetime +from pytz import UTC + + +def test_get_reporter_from_source(db, app, aggregator_reporter_data_source): + + reporter = aggregator_reporter_data_source.data_generator + + assert isinstance(reporter, Reporter) + assert reporter.__class__.__name__ == "TestReporter" + + print(aggregator_reporter_data_source.data_generator) + + res = reporter.compute( + start=datetime(2023, 1, 1, tzinfo=UTC), end=datetime(2023, 1, 2, tzinfo=UTC) + ) + + assert res.lineage.sources[0] == reporter.data_source + + with pytest.raises(AttributeError): + reporter.compute(start=datetime(2023, 1, 1, tzinfo=UTC), end="not a date") + + +def test_data_source(db, app): + class TestDataGenerator(DataGenerator): + pass + + ds1 = TestDataGenerator(config={"a": "b"}) + + db.session.add(ds1.data_source) + db.session.commit() + + ds2 = TestDataGenerator(config={"a": "b"}) + + assert ds1.data_source == ds2.data_source + assert ds1.data_source.attributes.get("config") == ds2.data_source.attributes.get( + "config" + ) + + ds3 = TestDataGenerator(config={"a": "c"}) + + assert ds3.data_source != ds2.data_source + assert ds3.data_source.attributes.get("config") != ds2.data_source.attributes.get( + "config" + ) diff --git a/flexmeasures/utils/plugin_utils.py b/flexmeasures/utils/plugin_utils.py index 19568b48a..2614e146a 100644 --- a/flexmeasures/utils/plugin_utils.py +++ b/flexmeasures/utils/plugin_utils.py @@ -110,8 +110,16 @@ def register_plugins(app: Flask): from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.models.planning import Scheduler - app.reporters.update(get_classes_module(module.__name__, Reporter)) - app.schedulers.update(get_classes_module(module.__name__, Scheduler)) + plugin_reporters = get_classes_module(module.__name__, Reporter) + plugin_schedulers = get_classes_module(module.__name__, Scheduler) + + # for legacy, we keep reporters and schedulers + app.reporters.update(plugin_reporters) + app.schedulers.update(plugin_schedulers) + + # add DataGenerators + app.data_generators.update(plugin_schedulers) + app.data_generators.update(plugin_reporters) app.config["LOADED_PLUGINS"][plugin_name] = plugin_version app.logger.info(f"Loaded plugins: {app.config['LOADED_PLUGINS']}") From 30494160b5e6b0cb25029d60624611f442273d7b Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 27 Jun 2023 00:35:35 +0200 Subject: [PATCH 15/70] refactor: adapt reporters to use new DataGenerator class Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/__init__.py | 63 +++++-------------- .../data/models/reporting/aggregator.py | 41 +++++------- .../data/models/reporting/pandas_reporter.py | 55 +++++----------- .../models/reporting/tests/test_aggregator.py | 9 ++- .../reporting/tests/test_pandas_reporter.py | 19 +++--- .../reporting/tests/test_tibber_reporter.py | 56 ++++++++++------- .../data/schemas/reporting/__init__.py | 33 ++++++---- .../data/schemas/reporting/aggregation.py | 28 ++++++--- .../data/schemas/reporting/pandas_reporter.py | 6 +- 9 files changed, 137 insertions(+), 173 deletions(-) diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index 8eb56c8b9..d9aa16714 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -1,14 +1,17 @@ from __future__ import annotations -from typing import Optional - from flexmeasures.data.models.time_series import Sensor -from flexmeasures.data.models.data_sources import DataGeneratorMixin +from flexmeasures.data.models.data_sources import DataGenerator + +from flexmeasures.data.schemas.reporting import ( + ReporterInputsSchema, + ReporterConfigSchema, +) import timely_beliefs as tb -class Reporter(DataGeneratorMixin): +class Reporter(DataGenerator): """Superclass for all FlexMeasures Reporters.""" __version__ = None @@ -17,29 +20,15 @@ class Reporter(DataGeneratorMixin): sensor: Sensor = None - reporter_config: Optional[dict] = None - report_config: Optional[dict] = None - - reporter_config_schema = None - report_config_schema = None - - def __init__(self, sensor: Sensor, reporter_config: dict = {}) -> None: - """ - Initialize a new Reporter. - - Attributes: - :param sensor: sensor where the output of the reporter will be saved to. - :param reporter_config: dictionary with the serialized configuration of the reporter. - """ + _inputs_schema = ReporterInputsSchema() + _config_schema = ReporterConfigSchema() - self.deserialize_reporter_config(reporter_config) - self.sensor = sensor + def __init__(self, config: dict | None = None, **kwargs) -> None: + super().__init__(config, **kwargs) - def update_attribute(self, attribute, default): - if default is not None: - setattr(self, attribute, default) + self.sensor = self._config["sensor"] - def compute(self, **kwargs) -> tb.BeliefsDataFrame: + def _compute(self, **kwargs) -> tb.BeliefsDataFrame: """This method triggers the creation of a new report. The same object can generate multiple reports with different start, end, resolution @@ -49,7 +38,7 @@ def compute(self, **kwargs) -> tb.BeliefsDataFrame: """ # Result - result: tb.BeliefsDataFrame = self._compute(**kwargs) + result: tb.BeliefsDataFrame = self._compute_report(**kwargs) # checking that the event_resolution of the output BeliefDataFrame is equal to the one of the output sensor assert ( @@ -69,32 +58,10 @@ def compute(self, **kwargs) -> tb.BeliefsDataFrame: return result - def _compute(self, **kwargs) -> tb.BeliefsDataFrame: + def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: """ Overwrite with the actual computation of your report. :returns BeliefsDataFrame: report as a BeliefsDataFrame. """ raise NotImplementedError() - - def deserialize_reporter_config(self, reporter_config: dict) -> dict: - """ - Validate the reporter config against a Marshmallow Schema. - Ideas: - - Override this method - - Call superclass method to apply validation and common variables deserialization (see PandasReporter) - - (Partially) extract the relevant reporter_config parameters into class attributes. - - Raises ValidationErrors or ValueErrors. - """ - - raise NotImplementedError() - - def deserialize_report_config(self, report_config: dict) -> dict: - """_summary_ - - :param report_config: _description_ - :return: _description_ - """ - - raise NotImplementedError() diff --git a/flexmeasures/data/models/reporting/aggregator.py b/flexmeasures/data/models/reporting/aggregator.py index 131017108..c1de92409 100644 --- a/flexmeasures/data/models/reporting/aggregator.py +++ b/flexmeasures/data/models/reporting/aggregator.py @@ -6,7 +6,7 @@ import pandas as pd from flexmeasures.data.models.reporting import Reporter -from flexmeasures.data.schemas.reporting.aggregation import AggregatorSchema +from flexmeasures.data.schemas.reporting.aggregation import AggregatorConfigSchema from flexmeasures.utils.time_utils import server_now @@ -16,26 +16,13 @@ class AggregatorReporter(Reporter): __version__ = "1" __author__ = "Seita" - schema = AggregatorSchema() - reporter_config_schema = AggregatorSchema() - report_config_schema = None + _config_schema = AggregatorConfigSchema() weights: dict method: str - def deserialize_reporter_config(self, reporter_config): - self.reporter_config = self.reporter_config_schema.load(reporter_config) - - # extract AggregatorReporter specific fields - self.method = self.reporter_config.get("method") - self.weights = self.reporter_config.get("weights", dict()) - self.beliefs_search_configs = self.reporter_config.get("beliefs_search_configs") - - def deserialize_report_config(self, report_config: dict): - pass - - def _compute( + def _compute_report( self, start: datetime, end: datetime, @@ -48,16 +35,18 @@ def _compute( columns. """ + method: str = self._config.get("method") + weights: list = self._config.get("weights", {}) + data: list = self._config.get("data") + dataframes = [] - for belief_search_config in self.beliefs_search_configs: + for d in data: # if alias is not in belief_search_config, using the Sensor id instead - column_name = belief_search_config.get( - "alias", f"sensor_{belief_search_config['sensor'].id}" - ) + column_name = d.get("alias", f"sensor_{d['sensor'].id}") - data = ( - belief_search_config["sensor"] + df = ( + d["sensor"] .search_beliefs( event_starts_after=start, event_ends_before=end, @@ -68,10 +57,10 @@ def _compute( ) # apply weight - if column_name in self.weights: - data *= self.weights[column_name] + if column_name in weights: + df *= weights[column_name] - dataframes.append(data) + dataframes.append(df) output_df = pd.concat(dataframes, axis=1) @@ -79,7 +68,7 @@ def _compute( belief_time = server_now() # apply aggregation method - output_df = output_df.aggregate(self.method, axis=1) + output_df = output_df.aggregate(method, axis=1) # convert BeliefsSeries into a BeliefsDataFrame output_df = output_df.to_frame("event_value") diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index a79a96983..76cb076ea 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -2,14 +2,15 @@ from typing import Any, Union, Dict from datetime import datetime, timedelta +from copy import deepcopy from flask import current_app import timely_beliefs as tb import pandas as pd from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.schemas.reporting.pandas_reporter import ( - PandasReporterReporterConfigSchema, - PandasReporterReportConfigSchema, + PandasReporterConfigSchema, + PandasReporterInputConfigSchema, ) from flexmeasures.data.models.time_series import Sensor from flexmeasures.utils.time_utils import server_now @@ -21,8 +22,8 @@ class PandasReporter(Reporter): __version__ = "1" __author__ = "Seita" - reporter_config_schema = PandasReporterReporterConfigSchema() - report_config_schema = PandasReporterReportConfigSchema() + _config_schema = PandasReporterConfigSchema() + _inputs_schema = PandasReporterInputConfigSchema() input_variables: list[str] = None transformations: list[dict[str, Any]] = None @@ -30,30 +31,6 @@ class PandasReporter(Reporter): data: Dict[str, Union[tb.BeliefsDataFrame, pd.DataFrame]] = None - def deserialize_reporter_config(self, reporter_config): - # call super class deserialize_config - self.reporter_config = self.reporter_config_schema.load(reporter_config) - - # extract PandasReporter specific fields - self.transformations = self.reporter_config.get("transformations") - self.input_variables = self.reporter_config.get("input_variables") - self.final_df_output = self.reporter_config.get("final_df_output") - - def deserialize_report_config( - self, report_config: dict - ): # TODO: move to Reporter class - self.report_config = self.report_config_schema.load( - report_config - ) # validate reporter configs - - input_sensors = report_config.get("input_sensors") - - # check that all input_variables are provided - for variable in self.input_variables: - assert ( - variable in input_sensors - ), f"Required sensor with alias `{variable}` not provided." - def fetch_data( self, start: datetime, @@ -93,24 +70,19 @@ def fetch_data( # store BeliefsDataFrame as local variable self.data[alias] = bdf - def _compute(self, **kwargs) -> tb.BeliefsDataFrame: + def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: """ This method applies the transformations and outputs the dataframe defined in `final_df_output` field of the report_config. """ - self.report_config = kwargs - - if "report_config" in kwargs: - self.deserialize_report_config(kwargs.get("report_config")) - # report configuration - start: datetime = self.report_config.get("start") - end: datetime = self.report_config.get("end") - input_sensors: dict = self.report_config.get("input_sensors") + start: datetime = kwargs.get("start") + end: datetime = kwargs.get("end") + input_sensors: dict = kwargs.get("input_sensors") - resolution: timedelta | None = self.report_config.get("resolution", None) - belief_time: datetime | None = self.report_config.get("belief_time", None) + resolution: timedelta | None = kwargs.get("resolution", None) + belief_time: datetime | None = kwargs.get("belief_time", None) if resolution is None: resolution = self.sensor.event_resolution @@ -124,7 +96,7 @@ def _compute(self, **kwargs) -> tb.BeliefsDataFrame: # apply pandas transformations to the dataframes in `self.data` self._apply_transformations() - final_output = self.data[self.final_df_output] + final_output = self.data[self._config.get("final_df_output")] if isinstance(final_output, tb.BeliefsDataFrame): @@ -230,7 +202,8 @@ def _apply_transformations(self): previous_df = None - for transformation in self.transformations: + for _transformation in self._config.get("transformations"): + transformation = deepcopy(_transformation) df_input = transformation.get( "df_input", previous_df ) # default is using the previous transformation output diff --git a/flexmeasures/data/models/reporting/tests/test_aggregator.py b/flexmeasures/data/models/reporting/tests/test_aggregator.py index e800e6b1b..dddf20b6d 100644 --- a/flexmeasures/data/models/reporting/tests/test_aggregator.py +++ b/flexmeasures/data/models/reporting/tests/test_aggregator.py @@ -36,17 +36,16 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): """ s1, s2, reporter_sensor = setup_dummy_data - reporter_config_raw = dict( - beliefs_search_configs=[ + reporter_config = dict( + sensor=reporter_sensor.id, + data=[ dict(sensor=s1.id, source=1), dict(sensor=s2.id, source=2), ], method=aggregation_method, ) - agg_reporter = AggregatorReporter( - reporter_sensor, reporter_config=reporter_config_raw - ) + agg_reporter = AggregatorReporter(config=reporter_config) result = agg_reporter.compute( start=datetime(2023, 5, 10, tzinfo=utc), diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index 3f833561e..c3dd01eaf 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -8,7 +8,8 @@ def test_reporter(app, setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data - reporter_config = dict( + config = dict( + sensor=reporter_sensor.id, input_variables=["sensor_1", "sensor_2"], transformations=[ dict( @@ -39,7 +40,7 @@ def test_reporter(app, setup_dummy_data): final_df_output="df_merge", ) - reporter = PandasReporter(reporter_sensor, reporter_config=reporter_config) + reporter = PandasReporter(config=config) start = datetime(2023, 4, 10, tzinfo=utc) end = datetime(2023, 4, 10, 10, tzinfo=utc) @@ -75,6 +76,7 @@ def test_reporter_repeated(setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data reporter_config = dict( + sensor=reporter_sensor.id, input_variables=["sensor_1", "sensor_2"], transformations=[ dict( @@ -105,7 +107,7 @@ def test_reporter_repeated(setup_dummy_data): final_df_output="df_merge", ) - report_config = dict( + inputs = dict( start="2023-04-10T00:00:00 00:00", end="2023-04-10T10:00:00 00:00", input_sensors=dict( @@ -114,10 +116,10 @@ def test_reporter_repeated(setup_dummy_data): ), ) - reporter = PandasReporter(reporter_sensor, reporter_config=reporter_config) + reporter = PandasReporter(config=reporter_config) - report1 = reporter.compute(report_config=report_config) - report2 = reporter.compute(report_config=report_config) + report1 = reporter.compute(inputs=inputs) + report2 = reporter.compute(inputs=inputs) assert all(report2.values == report1.values) @@ -126,13 +128,14 @@ def test_reporter_empty(setup_dummy_data): """check that calling compute with missing data returns an empty report""" s1, s2, reporter_sensor = setup_dummy_data - reporter_config = dict( + config = dict( + sensor=reporter_sensor.id, input_variables=["sensor_1"], transformations=[], final_df_output="sensor_1", ) - reporter = PandasReporter(reporter_sensor, reporter_config=reporter_config) + reporter = PandasReporter(config=config) # compute report on available data report = reporter.compute( diff --git a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py index 30171da86..6076ddecf 100644 --- a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py @@ -1,5 +1,7 @@ +from __future__ import annotations import pytest +from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.models.reporting.pandas_reporter import PandasReporter from flexmeasures.data.models.time_series import Sensor, DataSource, TimedBelief from flexmeasures.data.models.generic_assets import GenericAssetType, GenericAsset @@ -67,12 +69,18 @@ ] # cents/kWh -class TibberReporter(PandasReporter): - def __init__(self, sensor) -> None: +class TibberReporter(Reporter): + + _inner_reporter: PandasReporter | None = None + + def __init__(self, config: dict | None = None, **kwargs) -> None: + """This class calculates the price of energy of a tariff indexed to the Day Ahead prices. Energy Price = (1 + VAT) x ( EnergyTax + Tiber + DA Prices) """ + super().__init__(config=config, **kwargs) + # search the sensors EnergyTax = Sensor.query.filter(Sensor.name == "EnergyTax").one_or_none() VAT = Sensor.query.filter(Sensor.name == "VAT").one_or_none() @@ -82,46 +90,46 @@ def __init__(self, sensor) -> None: da_prices = Sensor.query.filter(Sensor.name == "DA prices").one_or_none() + self.input_sensors = { + "energy_tax": {"sensor": EnergyTax}, + "VAT": {"sensor": VAT}, + "tariff": {"sensor": tibber_tariff}, + "da_prices": {"sensor": da_prices}, + } + # create the PandasReporter reporter config - reporter_config = dict( - beliefs_search_configs=[ - dict(sensor=EnergyTax.id, alias="energy_tax_df"), - dict(sensor=VAT.id), - dict(sensor=tibber_tariff.id), - dict(sensor=da_prices.id), - ], + pandas_reporter_config = dict( + sensor=self.sensor.id, + input_variables=["energy_tax", "VAT", "tariff", "da_prices"], transformations=[ dict( - df_input="sensor_1", - df_output="VAT", + df_input="VAT", method="droplevel", args=[[1, 2, 3]], ), dict(method="add", args=[1]), # this is to get 1 + VAT dict( - df_input="energy_tax_df", - df_output="EnergyTax", + df_input="energy_tax", method="droplevel", args=[[1, 2, 3]], ), dict( - df_input="sensor_3", - df_output="tibber_tariff", + df_input="tariff", + df_output="tariff", method="droplevel", args=[[1, 2, 3]], ), dict( - df_input="sensor_4", - df_output="da_prices", + df_input="da_prices", method="droplevel", args=[[1, 2, 3]], ), dict( - method="add", args=["@tibber_tariff"] + method="add", args=["@tariff"] ), # da_prices = da_prices + tibber_tariff dict( - method="add", args=["@EnergyTax"] - ), # da_prices = da_prices + EnergyTax + method="add", args=["@energy_tax"] + ), # da_prices = da_prices + energy_tax dict( method="multiply", args=["@VAT"] ), # da_prices = da_price * VAT, VAT @@ -130,7 +138,11 @@ def __init__(self, sensor) -> None: final_df_output="da_prices", ) - super().__init__(sensor, reporter_config) + self._inner_reporter = PandasReporter(config=pandas_reporter_config) + + def _compute_report(self, **kwargs): + kwargs["input_sensors"] = self.input_sensors + return self._inner_reporter.compute(**kwargs) def beliefs_from_timeseries(index, values, sensor, source): @@ -245,7 +257,7 @@ def test_tibber_reporter(tibber_test_data): tibber_report_sensor = tibber_test_data - tibber_reporter = TibberReporter(tibber_report_sensor) + tibber_reporter = TibberReporter(sensor=tibber_report_sensor) result = tibber_reporter.compute( start=datetime(2023, 4, 13, tzinfo=utc), diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 7ceaa7152..190ccea93 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -1,4 +1,4 @@ -from marshmallow import Schema, fields, validate +from marshmallow import Schema, fields from flexmeasures.data.schemas.sensors import SensorIdField from flexmeasures.data.schemas.sources import DataSourceIdField @@ -6,6 +6,17 @@ from flexmeasures.data.schemas import AwareDateTimeField, DurationField +class ReporterConfigSchema(Schema): + sensor = SensorIdField(required=True) + + +class ReporterInputsSchema(Schema): + start = AwareDateTimeField(required=True) + end = AwareDateTimeField(required=True) + input_resolution = DurationField() + belief_time = AwareDateTimeField() + + class BeliefsSearchConfigSchema(Schema): """ This schema implements the required fields to perform a TimedBeliefs search @@ -36,14 +47,14 @@ class BeliefsSearchConfigSchema(Schema): sum_multiple = fields.Boolean() -class ReporterConfigSchema(Schema): - """ - This schema is used to validate Reporter class configurations (reporter_config). - Inherit from this to extend this schema with your own parameters. - """ +# class ReporterConfigSchema(Schema): +# """ +# This schema is used to validate Reporter class configurations (reporter_config). +# Inherit from this to extend this schema with your own parameters. +# """ - beliefs_search_configs = fields.List( - fields.Nested(BeliefsSearchConfigSchema()), - required=True, - validator=validate.Length(min=1), - ) +# beliefs_search_configs = fields.List( +# fields.Nested(BeliefsSearchConfigSchema()), +# required=True, +# validator=validate.Length(min=1), +# ) diff --git a/flexmeasures/data/schemas/reporting/aggregation.py b/flexmeasures/data/schemas/reporting/aggregation.py index a42c6d7b9..c269e106d 100644 --- a/flexmeasures/data/schemas/reporting/aggregation.py +++ b/flexmeasures/data/schemas/reporting/aggregation.py @@ -1,15 +1,18 @@ -from marshmallow import fields, ValidationError, validates_schema +from marshmallow import fields, ValidationError, validates_schema, validate -from flexmeasures.data.schemas.reporting import ReporterConfigSchema +from flexmeasures.data.schemas.reporting import ( + ReporterConfigSchema, + BeliefsSearchConfigSchema, +) -class AggregatorSchema(ReporterConfigSchema): +class AggregatorConfigSchema(ReporterConfigSchema): """Schema for the reporter_config of the AggregatorReporter Example: .. code-block:: json { - "beliefs_search_configs": [ + "data": [ { "sensor": 1, "source" : 1, @@ -31,12 +34,17 @@ class AggregatorSchema(ReporterConfigSchema): method = fields.Str(required=False, dump_default="sum") weights = fields.Dict(fields.Str(), fields.Float(), required=False) + data = fields.List( + fields.Nested(BeliefsSearchConfigSchema()), + required=True, + validator=validate.Length(min=1), + ) @validates_schema def validate_source(self, data, **kwargs): - for beliefs_search_config in data["beliefs_search_configs"]: - if "source" not in beliefs_search_config: + for data in data["data"]: + if "source" not in data: raise ValidationError("`source` is a required field.") @validates_schema @@ -46,13 +54,13 @@ def validate_weights(self, data, **kwargs): # get aliases aliases = [] - for beliefs_search_config in data["beliefs_search_configs"]: - if "alias" in beliefs_search_config: - aliases.append(beliefs_search_config.get("alias")) + for data in data["data"]: + if "alias" in data: + aliases.append(data.get("alias")) # check that the aliases in weights are defined for alias in data.get("weights").keys(): if alias not in aliases: raise ValidationError( - f"alias `{alias}` in `weights` is not defined in `beliefs_search_config`" + f"alias `{alias}` in `weights` is not defined in `data`" ) diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index 23fce775c..d9e3eabc4 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -76,13 +76,14 @@ class BeliefsSearchConfigSchema(Schema): sum_multiple = fields.Boolean() -class PandasReporterReporterConfigSchema(Schema): +class PandasReporterConfigSchema(Schema): """ This schema lists fields that can be used to describe sensors in the optimised portfolio Example: { + "sensor" : 1, "input_variables" : ["df1"], "transformations" : [ { @@ -103,6 +104,7 @@ class PandasReporterReporterConfigSchema(Schema): "final_df_output" : "df2" """ + sensor = SensorIdField(required=True) input_variables = fields.List(fields.Str(), required=True) transformations = fields.List(fields.Nested(PandasMethodCall()), required=True) final_df_output = fields.Str(required=True) @@ -150,7 +152,7 @@ def validate_chaining(self, data, **kwargs): ) -class PandasReporterReportConfigSchema(Schema): +class PandasReporterInputConfigSchema(Schema): input_sensors = fields.Dict( keys=fields.Str(), values=fields.Nested(BeliefsSearchConfigSchema()), From 76cf1971b0791227bcc453bfd07d99caf257f912 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Wed, 28 Jun 2023 12:43:18 +0200 Subject: [PATCH 16/70] fix: use default method on load Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/schemas/reporting/aggregation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/flexmeasures/data/schemas/reporting/aggregation.py b/flexmeasures/data/schemas/reporting/aggregation.py index c269e106d..52d0fa881 100644 --- a/flexmeasures/data/schemas/reporting/aggregation.py +++ b/flexmeasures/data/schemas/reporting/aggregation.py @@ -32,7 +32,7 @@ class AggregatorConfigSchema(ReporterConfigSchema): } """ - method = fields.Str(required=False, dump_default="sum") + method = fields.Str(required=False, dump_default="sum", load_default="sum") weights = fields.Dict(fields.Str(), fields.Float(), required=False) data = fields.List( fields.Nested(BeliefsSearchConfigSchema()), @@ -54,12 +54,12 @@ def validate_weights(self, data, **kwargs): # get aliases aliases = [] - for data in data["data"]: - if "alias" in data: - aliases.append(data.get("alias")) + for d in data["data"]: + if "alias" in d: + aliases.append(d.get("alias")) # check that the aliases in weights are defined - for alias in data.get("weights").keys(): + for alias in data.get("weights", {}).keys(): if alias not in aliases: raise ValidationError( f"alias `{alias}` in `weights` is not defined in `data`" From 39f402358d573557af1e3e36ed920f15052d93d3 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Tue, 27 Jun 2023 17:11:14 +0200 Subject: [PATCH 17/70] fix: adapt tests of the schemas Signed-off-by: Victor Garcia Reolid --- .../data/schemas/tests/test_reporting.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 3246df14d..9e4a06edd 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -2,8 +2,8 @@ from flexmeasures.data.models.generic_assets import GenericAsset, GenericAssetType from flexmeasures.data.schemas.reporting.pandas_reporter import ( - PandasReporterReporterConfigSchema, - PandasReporterReportConfigSchema, + PandasReporterConfigSchema, + PandasReporterInputConfigSchema, ) from marshmallow.exceptions import ValidationError @@ -37,10 +37,11 @@ def setup_dummy_sensors(db, app): @pytest.mark.parametrize( - "reporter_config, is_valid", + "config, is_valid", [ ( { # this checks that the final_df_output dataframe is actually generated at some point of the processing pipeline + "sensor": 1, "input_variables": ["sensor_1"], "transformations": [ { @@ -55,6 +56,7 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that chaining works, applying the method copy on the previous dataframe + "sensor": 1, "input_variables": ["sensor_1"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, @@ -67,6 +69,7 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that resample cannot be the last method being applied + "sensor": 1, "input_variables": ["sensor_1", "sensor_2"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, @@ -79,6 +82,7 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that resample cannot be the last method being applied + "sensor": 1, "input_variables": ["sensor_1", "sensor_2"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, @@ -92,21 +96,19 @@ def setup_dummy_sensors(db, app): ), ], ) -def test_pandas_reporter_schema( - reporter_config, is_valid, db, app, setup_dummy_sensors -): +def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_sensors): - schema = PandasReporterReporterConfigSchema() + schema = PandasReporterConfigSchema() if is_valid: - schema.load(reporter_config) + schema.load(config) else: with pytest.raises(ValidationError): - schema.load(reporter_config) + schema.load(config) @pytest.mark.parametrize( - "report_config, is_valid", + "inputs, is_valid", [ ( { @@ -136,12 +138,12 @@ def test_pandas_reporter_schema( ), ], ) -def test_pandas_report_schema(report_config, is_valid, db, app, setup_dummy_sensors): +def test_pandas_reporter_inputs_schema(inputs, is_valid, db, app, setup_dummy_sensors): - schema = PandasReporterReportConfigSchema() + schema = PandasReporterInputConfigSchema() if is_valid: - schema.load(report_config) + schema.load(inputs) else: with pytest.raises(ValidationError): - schema.load(report_config) + schema.load(inputs) From d19c2fa02f5ea33b564e75d56e4564fde460c53f Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Wed, 28 Jun 2023 13:13:17 +0200 Subject: [PATCH 18/70] fix: use a DataGenerator with a schema defined Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/tests/test_data_source.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index 31ece7d07..359a85794 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -1,7 +1,6 @@ import pytest from flexmeasures.data.models.reporting import Reporter -from flexmeasures.data.models.data_sources import DataGenerator from datetime import datetime from pytz import UTC @@ -14,8 +13,6 @@ def test_get_reporter_from_source(db, app, aggregator_reporter_data_source): assert isinstance(reporter, Reporter) assert reporter.__class__.__name__ == "TestReporter" - print(aggregator_reporter_data_source.data_generator) - res = reporter.compute( start=datetime(2023, 1, 1, tzinfo=UTC), end=datetime(2023, 1, 2, tzinfo=UTC) ) @@ -26,23 +23,22 @@ def test_get_reporter_from_source(db, app, aggregator_reporter_data_source): reporter.compute(start=datetime(2023, 1, 1, tzinfo=UTC), end="not a date") -def test_data_source(db, app): - class TestDataGenerator(DataGenerator): - pass +def test_data_source(db, app, aggregator_reporter_data_source): + TestTeporter = app.data_generators.get("TestReporter") - ds1 = TestDataGenerator(config={"a": "b"}) + ds1 = TestTeporter(config={"sensor": 1}) db.session.add(ds1.data_source) db.session.commit() - ds2 = TestDataGenerator(config={"a": "b"}) + ds2 = TestTeporter(config={"sensor": 1}) assert ds1.data_source == ds2.data_source assert ds1.data_source.attributes.get("config") == ds2.data_source.attributes.get( "config" ) - ds3 = TestDataGenerator(config={"a": "c"}) + ds3 = TestTeporter(config={"sensor": 2}) assert ds3.data_source != ds2.data_source assert ds3.data_source.attributes.get("config") != ds2.data_source.attributes.get( From ef6306762c9ae2c4756d213b7bf1ff98a9dd00aa Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 12:05:34 +0200 Subject: [PATCH 19/70] changing backref from "dynamic" to "select" Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 8350830e8..0fc4c314b 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -85,7 +85,7 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): sensors = db.relationship( "Sensor", secondary="timed_belief", - backref=db.backref("data_sources", lazy="dynamic"), + backref=db.backref("data_sources", lazy="select"), viewonly=True, ) From 8ea2702407abd6af307ebb84b42349b0938cbf95 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 12:16:35 +0200 Subject: [PATCH 20/70] feat: add hash_attributes static method Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 0fc4c314b..6d03978e7 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -167,6 +167,10 @@ def to_dict(self) -> dict: description=self.description, ) + @staticmethod + def hash_attributes(attributes: dict) -> str: + return hashlib.sha256(json.dumps(attributes).encode("utf-8")).digest() + def get_attribute(self, attribute: str, default: Any = None) -> Any: """Looks for the attribute on the DataSource. If not found, returns the default. From e717a4e43549f9443f694ed60e47cb2f204fe7ff Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 12:18:51 +0200 Subject: [PATCH 21/70] fix: use hash_attributes static method Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/services/data_sources.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/flexmeasures/data/services/data_sources.py b/flexmeasures/data/services/data_sources.py index 70910df6d..74eae3f56 100644 --- a/flexmeasures/data/services/data_sources.py +++ b/flexmeasures/data/services/data_sources.py @@ -2,9 +2,6 @@ from flask import current_app -import json -import hashlib - from flexmeasures import User from flexmeasures.data import db from flexmeasures.data.models.data_sources import DataSource @@ -19,7 +16,6 @@ def get_or_create_source( attributes: dict | None = None, flush: bool = True, ) -> DataSource: - attributes_hash = None if is_user(source): source_type = "user" query = DataSource.query.filter(DataSource.type == source_type) @@ -28,10 +24,9 @@ def get_or_create_source( if version is not None: query = query.filter(DataSource.version == version) if attributes is not None: - attributes_hash = hashlib.sha256( - json.dumps(attributes).encode("utf-8") - ).digest() - query = query.filter(DataSource.attributes_hash == attributes_hash) + query = query.filter( + DataSource.attributes_hash == DataSource.hash_attributes(attributes) + ) if is_user(source): query = query.filter(DataSource.user == source) elif isinstance(source, str): @@ -51,7 +46,6 @@ def get_or_create_source( version=version, type=source_type, attributes=attributes, - attributes_hash=attributes_hash, ) current_app.logger.info(f"Setting up {_source} as new data source...") db.session.add(_source) From ec3b370ebbb94fade62263ce05f1d0bcd4b9b452 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 12:36:04 +0200 Subject: [PATCH 22/70] feat: adding attributes_hash to the DataSource unique constraint list Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 6d03978e7..528258ebf 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -60,7 +60,9 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): """Each data source is a data-providing entity.""" __tablename__ = "data_source" - __table_args__ = (db.UniqueConstraint("name", "user_id", "model", "version"),) + __table_args__ = ( + db.UniqueConstraint("name", "user_id", "model", "version", "attributes_hash"), + ) # The type of data source (e.g. user, forecaster or scheduler) type = db.Column(db.String(80), default="") From ab4b0ae956d135f1b1141221a5dbde000123c50a Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 17:14:41 +0200 Subject: [PATCH 23/70] fix: add constraint to migration and downgrade Signed-off-by: Victor Garcia Reolid --- ...e0c_add_attribute_column_to_data_source.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py b/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py index 4ed55d15d..8698bc3a5 100644 --- a/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py +++ b/flexmeasures/data/migrations/versions/2ac7fb39ce0c_add_attribute_column_to_data_source.py @@ -29,6 +29,23 @@ def upgrade(): sa.Column("attributes_hash", sa.LargeBinary(length=256), nullable=True), ) + # remove previous uniqueness constraint and add a new that takes attributes_hash into account + op.drop_constraint(op.f("data_source_name_key"), "data_source", type_="unique") + op.create_unique_constraint( + "data_source_name_key", + "data_source", + ["name", "user_id", "model", "version", "attributes_hash"], + ) + def downgrade(): - pass + + op.drop_constraint("data_source_name_key", "data_source", type_="unique") + op.create_unique_constraint( + "data_source_name_key", + "data_source", + ["name", "user_id", "model", "version"], + ) + + op.drop_column("data_source", "attributes") + op.drop_column("data_source", "attributes_hash") From 6b3a585949ab52e0858c5fe73311bb0a003a8e5f Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 17:24:49 +0200 Subject: [PATCH 24/70] fix: only returning keys from the attributes field Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 528258ebf..5f28a95ba 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -174,15 +174,8 @@ def hash_attributes(attributes: dict) -> str: return hashlib.sha256(json.dumps(attributes).encode("utf-8")).digest() def get_attribute(self, attribute: str, default: Any = None) -> Any: - """Looks for the attribute on the DataSource. - If not found, returns the default. - """ - if hasattr(self, attribute): - return getattr(self, attribute) - if attribute in self.attributes: - return self.attributes[attribute] - - return default + """Looks for the attribute on the DataSource.""" + return self.attributes.get(attribute) def has_attribute(self, attribute: str) -> bool: return attribute in self.attributes From 3a88e00209f9ddc7602a8418aa2127b3b211f457 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 21:17:18 +0200 Subject: [PATCH 25/70] refactor: rename _inputs_schema to _input_schema Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 8 ++++---- flexmeasures/data/models/reporting/__init__.py | 2 +- flexmeasures/data/models/reporting/pandas_reporter.py | 2 +- flexmeasures/data/schemas/tests/test_reporting.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 0a18dc300..c6cc018a3 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -22,7 +22,7 @@ class DataGenerator: _config: dict = None - _inputs_schema: Type[Schema] | None = None + _input_schema: Type[Schema] | None = None _config_schema: Type[Schema] | None = None def __init__(self, config: dict | None = None, **kwargs) -> None: @@ -44,8 +44,8 @@ def compute(self, inputs: dict = None, **kwargs): _inputs = kwargs self.validate_deserialized_inputs(_inputs) else: - if self._inputs_schema: - _inputs = self._inputs_schema.load(inputs) + if self._input_schema: + _inputs = self._input_schema.load(inputs) else: # skip validation _inputs = inputs @@ -53,7 +53,7 @@ def compute(self, inputs: dict = None, **kwargs): return self._compute(**_inputs) def validate_deserialized_inputs(self, inputs: dict): - self._inputs_schema.load(self._inputs_schema.dump(inputs)) + self._input_schema.load(self._input_schema.dump(inputs)) @classmethod def get_data_source_info(cls: type) -> dict: diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index d9aa16714..ad4d26436 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -20,7 +20,7 @@ class Reporter(DataGenerator): sensor: Sensor = None - _inputs_schema = ReporterInputsSchema() + _input_schema = ReporterInputsSchema() _config_schema = ReporterConfigSchema() def __init__(self, config: dict | None = None, **kwargs) -> None: diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index 76cb076ea..092a4d195 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -23,7 +23,7 @@ class PandasReporter(Reporter): __author__ = "Seita" _config_schema = PandasReporterConfigSchema() - _inputs_schema = PandasReporterInputConfigSchema() + _input_schema = PandasReporterInputConfigSchema() input_variables: list[str] = None transformations: list[dict[str, Any]] = None diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 9e4a06edd..17337d513 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -138,7 +138,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se ), ], ) -def test_pandas_reporter_inputs_schema(inputs, is_valid, db, app, setup_dummy_sensors): +def test_pandas_reporter_input_schema(inputs, is_valid, db, app, setup_dummy_sensors): schema = PandasReporterInputConfigSchema() From ad9d24bb098cf8569cfb5c132f45e70c85b484b2 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 21:17:56 +0200 Subject: [PATCH 26/70] fix: typing Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index c6cc018a3..a52e244a5 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from typing import TYPE_CHECKING, Any, Type +from typing import TYPE_CHECKING, Any from sqlalchemy.ext.mutable import MutableDict import timely_beliefs as tb @@ -22,8 +22,8 @@ class DataGenerator: _config: dict = None - _input_schema: Type[Schema] | None = None - _config_schema: Type[Schema] | None = None + _input_schema: Schema | None = None + _config_schema: Schema | None = None def __init__(self, config: dict | None = None, **kwargs) -> None: if config is None: From 33d45229d142525824b2c5d14204a202417ebc49 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 23:16:02 +0200 Subject: [PATCH 27/70] fix: avoid future data leakage Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/reporting/aggregator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flexmeasures/data/models/reporting/aggregator.py b/flexmeasures/data/models/reporting/aggregator.py index c1de92409..8647ef9bb 100644 --- a/flexmeasures/data/models/reporting/aggregator.py +++ b/flexmeasures/data/models/reporting/aggregator.py @@ -41,6 +41,9 @@ def _compute_report( dataframes = [] + if belief_time is None: + belief_time = server_now() + for d in data: # if alias is not in belief_search_config, using the Sensor id instead column_name = d.get("alias", f"sensor_{d['sensor'].id}") @@ -64,9 +67,6 @@ def _compute_report( output_df = pd.concat(dataframes, axis=1) - if belief_time is None: - belief_time = server_now() - # apply aggregation method output_df = output_df.aggregate(method, axis=1) From 1119c1fe3e5399cadad6a3effdd4d3dcf43532b7 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 23:18:46 +0200 Subject: [PATCH 28/70] refactor: rename PandasReporterInputConfigSchema to PandasReporterInputSchema Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/reporting/pandas_reporter.py | 4 ++-- flexmeasures/data/schemas/reporting/pandas_reporter.py | 2 +- flexmeasures/data/schemas/tests/test_reporting.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index 092a4d195..5263018a0 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -10,7 +10,7 @@ from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.schemas.reporting.pandas_reporter import ( PandasReporterConfigSchema, - PandasReporterInputConfigSchema, + PandasReporterInputSchema, ) from flexmeasures.data.models.time_series import Sensor from flexmeasures.utils.time_utils import server_now @@ -23,7 +23,7 @@ class PandasReporter(Reporter): __author__ = "Seita" _config_schema = PandasReporterConfigSchema() - _input_schema = PandasReporterInputConfigSchema() + _input_schema = PandasReporterInputSchema() input_variables: list[str] = None transformations: list[dict[str, Any]] = None diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index d9e3eabc4..d2ffc9722 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -152,7 +152,7 @@ def validate_chaining(self, data, **kwargs): ) -class PandasReporterInputConfigSchema(Schema): +class PandasReporterInputSchema(Schema): input_sensors = fields.Dict( keys=fields.Str(), values=fields.Nested(BeliefsSearchConfigSchema()), diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 17337d513..f745eebde 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -3,7 +3,7 @@ from flexmeasures.data.schemas.reporting.pandas_reporter import ( PandasReporterConfigSchema, - PandasReporterInputConfigSchema, + PandasReporterInputSchema, ) from marshmallow.exceptions import ValidationError @@ -140,7 +140,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se ) def test_pandas_reporter_input_schema(inputs, is_valid, db, app, setup_dummy_sensors): - schema = PandasReporterInputConfigSchema() + schema = PandasReporterInputSchema() if is_valid: schema.load(inputs) From dc800a4ae055cfea54cb95bd63fed21d96267c6b Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 29 Jun 2023 23:37:27 +0200 Subject: [PATCH 29/70] docs: clarify description of the fake_data mock variable Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/schemas/reporting/pandas_reporter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index d2ffc9722..cb6cc7d26 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -116,8 +116,8 @@ def validate_chaining(self, data, **kwargs): final_df_output is computed. """ - # create dictionary data with objects of the types that is supposed to be generated - # loading the initial data, the sensors' data + # fake_data mocks the PandasReporter class attribute data. It contains empty BeliefsDataFrame + # to simulate the process of applying the transformations. fake_data = dict( (variable, BeliefsDataFrame) for variable in data.get("input_variables") ) From 54b67f79c0dc3bb99e3ff03068aae76e8abaf518 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:40:09 +0200 Subject: [PATCH 30/70] docs: fix docstring Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index a52e244a5..889ee8226 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -252,7 +252,7 @@ def hash_attributes(attributes: dict) -> str: return hashlib.sha256(json.dumps(attributes).encode("utf-8")).digest() def get_attribute(self, attribute: str, default: Any = None) -> Any: - """Looks for the attribute on the DataSource.""" + """Looks for the attribute in the DataSource's attributes column.""" return self.attributes.get(attribute) def has_attribute(self, attribute: str) -> bool: From 1c755411313b19cdba904dec0356a63621a24d62 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:40:32 +0200 Subject: [PATCH 31/70] fix: use default value Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 889ee8226..6cc549d38 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -253,7 +253,7 @@ def hash_attributes(attributes: dict) -> str: def get_attribute(self, attribute: str, default: Any = None) -> Any: """Looks for the attribute in the DataSource's attributes column.""" - return self.attributes.get(attribute) + return self.attributes.get(attribute, default) def has_attribute(self, attribute: str) -> bool: return attribute in self.attributes From 9211978ef2b755e0c8bbfa2c7f0dbbfa3e24c4bc Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:41:28 +0200 Subject: [PATCH 32/70] fix: allow creating new attributes with the method `set_attributes` Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 6cc549d38..6e54e4d18 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -259,5 +259,4 @@ def has_attribute(self, attribute: str) -> bool: return attribute in self.attributes def set_attribute(self, attribute: str, value): - if self.has_attribute(attribute): - self.attributes[attribute] = value + self.attributes[attribute] = value From dbf01b64f6507e33bd228acd4e27552d88d4cd58 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:43:06 +0200 Subject: [PATCH 33/70] docs: add changelog entry Signed-off-by: Victor Garcia Reolid --- documentation/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/documentation/changelog.rst b/documentation/changelog.rst index 940ea1ccb..f1264f997 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -12,6 +12,7 @@ New features * Allow deleting multiple sensors with a single call to ``flexmeasures delete sensor`` by passing the ``--id`` option multiple times [see `PR #734 `_] * Make it a lot easier to read off the color legend on the asset page, especially when showing many sensors, as they will now be ordered from top to bottom in the same order as they appear in the chart (as defined in the ``sensors_to_show`` attribute), rather than alphabetically [see `PR #742 `_] * Having percentages within the [0, 100] domain is such a common use case that we now always include it in sensor charts with % units, making it easier to read off individual charts and also to compare across charts [see `PR #739 `_] +* DataSource table now allows storing arbitrary attributes as a JSON (without content validation), similar to the Sensor and GenericAsset tables [see `PR #750 `_] Bugfixes ----------- From 95518e015f69f4a210ac35f9422676975c6bb616 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:40:09 +0200 Subject: [PATCH 34/70] docs: fix docstring Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 5f28a95ba..4a260d8f6 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -174,7 +174,7 @@ def hash_attributes(attributes: dict) -> str: return hashlib.sha256(json.dumps(attributes).encode("utf-8")).digest() def get_attribute(self, attribute: str, default: Any = None) -> Any: - """Looks for the attribute on the DataSource.""" + """Looks for the attribute in the DataSource's attributes column.""" return self.attributes.get(attribute) def has_attribute(self, attribute: str) -> bool: From 3cdf91b89f31a132050ec20b7cd75055161cc22f Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:40:32 +0200 Subject: [PATCH 35/70] fix: use default value Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 4a260d8f6..9a49d6583 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -175,7 +175,7 @@ def hash_attributes(attributes: dict) -> str: def get_attribute(self, attribute: str, default: Any = None) -> Any: """Looks for the attribute in the DataSource's attributes column.""" - return self.attributes.get(attribute) + return self.attributes.get(attribute, default) def has_attribute(self, attribute: str) -> bool: return attribute in self.attributes From b347cb210406a3c56be0b584c471d48c82873bee Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:41:28 +0200 Subject: [PATCH 36/70] fix: allow creating new attributes with the method `set_attributes` Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 9a49d6583..307b008d0 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -181,5 +181,4 @@ def has_attribute(self, attribute: str) -> bool: return attribute in self.attributes def set_attribute(self, attribute: str, value): - if self.has_attribute(attribute): - self.attributes[attribute] = value + self.attributes[attribute] = value From 6c5589343d2b7792e21abc14f151b2ffda33a0a2 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 2 Jul 2023 22:43:06 +0200 Subject: [PATCH 37/70] docs: add changelog entry Signed-off-by: Victor Garcia Reolid --- documentation/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/documentation/changelog.rst b/documentation/changelog.rst index 940ea1ccb..f1264f997 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -12,6 +12,7 @@ New features * Allow deleting multiple sensors with a single call to ``flexmeasures delete sensor`` by passing the ``--id`` option multiple times [see `PR #734 `_] * Make it a lot easier to read off the color legend on the asset page, especially when showing many sensors, as they will now be ordered from top to bottom in the same order as they appear in the chart (as defined in the ``sensors_to_show`` attribute), rather than alphabetically [see `PR #742 `_] * Having percentages within the [0, 100] domain is such a common use case that we now always include it in sensor charts with % units, making it easier to read off individual charts and also to compare across charts [see `PR #739 `_] +* DataSource table now allows storing arbitrary attributes as a JSON (without content validation), similar to the Sensor and GenericAsset tables [see `PR #750 `_] Bugfixes ----------- From e565198cf6a0385d43ad2ab3c3f9379697115dc5 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 09:53:17 +0200 Subject: [PATCH 38/70] fix: add reporters and schedulers into the data_generators attribute in the app context Signed-off-by: Victor Garcia Reolid --- flexmeasures/app.py | 7 ++- flexmeasures/data/models/data_sources.py | 61 ++++++++++----------- flexmeasures/data/tests/test_data_source.py | 2 +- flexmeasures/utils/plugin_utils.py | 4 +- 4 files changed, 36 insertions(+), 38 deletions(-) diff --git a/flexmeasures/app.py b/flexmeasures/app.py index 5dea0870b..bcbbf22e9 100644 --- a/flexmeasures/app.py +++ b/flexmeasures/app.py @@ -128,8 +128,11 @@ def create( # noqa C901 app.reporters = reporters app.schedulers = schedulers - app.data_generators = copy(reporters) # use copy to avoid mutating app.reporters - app.data_generators.update(schedulers) + app.data_generators = dict() + app.data_generators["reporter"] = copy( + reporters + ) # use copy to avoid mutating app.reporters + app.data_generators["scheduler"] = schedulers # add auth policy diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 6e54e4d18..d5d490743 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -18,6 +18,7 @@ class DataGenerator: + __data_generator_base__: str | None = None _data_source: DataSource | None = None _config: dict = None @@ -27,33 +28,30 @@ class DataGenerator: def __init__(self, config: dict | None = None, **kwargs) -> None: if config is None: - _config = kwargs + self._config = kwargs + DataGenerator.validate_deserialized(self._config, self._config_schema) + elif self._config_schema: + self._config = self._config_schema.load(config) else: - if self._config_schema: - _config = self._config_schema.load(config) - else: - _config = config - - self._config = _config + self._config = config def _compute(self, **kwargs): raise NotImplementedError() - def compute(self, inputs: dict = None, **kwargs): - if inputs is None: - _inputs = kwargs - self.validate_deserialized_inputs(_inputs) - else: - if self._input_schema: - _inputs = self._input_schema.load(inputs) - - else: # skip validation - _inputs = inputs + def compute(self, input: dict | None = None, **kwargs): + if input is None: + _input = kwargs + DataGenerator.validate_deserialized(_input, self._input_schema) + elif self._input_schema: + _input = self._input_schema.load(input) + else: # skip validation + _input = input - return self._compute(**_inputs) + return self._compute(**_input) - def validate_deserialized_inputs(self, inputs: dict): - self._input_schema.load(self._input_schema.dump(inputs)) + @staticmethod + def validate_deserialized(values: dict, schema: Schema) -> bool: + schema.load(schema.dump(values)) @classmethod def get_data_source_info(cls: type) -> dict: @@ -66,22 +64,13 @@ def get_data_source_info(cls: type) -> dict: source=current_app.config.get("FLEXMEASURES_DEFAULT_DATASOURCE") ) # default - from flexmeasures.data.models.planning import Scheduler - from flexmeasures.data.models.reporting import Reporter - - if issubclass(cls, Reporter): - source_info["source_type"] = "reporter" - elif issubclass(cls, Scheduler): - source_info["source_type"] = "scheduler" - else: - source_info["source_type"] = "undefined" - + source_info["source_type"] = cls.__data_generator_base__ source_info["model"] = cls.__name__ return source_info @property - def data_source(self) -> "DataSource | None": + def data_source(self) -> "DataSource" | None: from flexmeasures.data.services.data_sources import get_or_create_source if self._data_source is None: @@ -176,7 +165,11 @@ def data_generator(self): ) return None - if self.model not in current_app.data_generators: + types = current_app.data_generators + + if all( + [self.model not in current_app.data_generators[_type] for _type in types] + ): current_app.logger.warning( "DataGenerator `{self.model}` not registered in this FlexMeasures instance." ) @@ -187,7 +180,9 @@ def data_generator(self): config = data_generator_details.get("config", {}) # create DataGenerator class and assign the current DataSource (self) as its source - data_generator = current_app.data_generators[self.model](config=config) + data_generator = current_app.data_generators[self.type][self.model]( + config=config + ) data_generator._data_source = self self._data_generator = data_generator diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index 359a85794..ada192240 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -24,7 +24,7 @@ def test_get_reporter_from_source(db, app, aggregator_reporter_data_source): def test_data_source(db, app, aggregator_reporter_data_source): - TestTeporter = app.data_generators.get("TestReporter") + TestTeporter = app.data_generators["reporter"].get("TestReporter") ds1 = TestTeporter(config={"sensor": 1}) diff --git a/flexmeasures/utils/plugin_utils.py b/flexmeasures/utils/plugin_utils.py index 2614e146a..4e9af8817 100644 --- a/flexmeasures/utils/plugin_utils.py +++ b/flexmeasures/utils/plugin_utils.py @@ -118,8 +118,8 @@ def register_plugins(app: Flask): app.schedulers.update(plugin_schedulers) # add DataGenerators - app.data_generators.update(plugin_schedulers) - app.data_generators.update(plugin_reporters) + app.data_generators["scheduler"].update(plugin_schedulers) + app.data_generators["reporter"].update(plugin_reporters) app.config["LOADED_PLUGINS"][plugin_name] = plugin_version app.logger.info(f"Loaded plugins: {app.config['LOADED_PLUGINS']}") From 1b648adcfe6ee628475e86e64bc6a1b54e385a13 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 09:59:42 +0200 Subject: [PATCH 39/70] fix: raise Exceptions instead of returning None Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index d5d490743..a546714e1 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -154,26 +154,23 @@ def data_generator(self): data_generator = None if self.type not in ["scheduler", "forecaster", "reporter"]: - current_app.logger.warning( + raise NotImplementedError( "Only the classes Scheduler, Forecaster and Reporters are DataGenerator's." ) - return None if not self.model: - current_app.logger.warning( + raise NotImplementedError( "There's no DataGenerator class defined in this DataSource." ) - return None types = current_app.data_generators if all( [self.model not in current_app.data_generators[_type] for _type in types] ): - current_app.logger.warning( + raise NotImplementedError( "DataGenerator `{self.model}` not registered in this FlexMeasures instance." ) - return None # fetch DataGenerator details data_generator_details = self.attributes.get("data_generator", {}) From b9c0b3ff7a3c6dafe5e5eb2f4d8487dba62462f5 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 10:00:43 +0200 Subject: [PATCH 40/70] fix: move sensor attribute from config to inputs Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/__init__.py | 8 +++---- .../data/models/reporting/aggregator.py | 2 ++ .../models/reporting/tests/test_aggregator.py | 2 +- .../reporting/tests/test_pandas_reporter.py | 21 +++++++++------- .../reporting/tests/test_tibber_reporter.py | 4 ++-- .../data/schemas/reporting/__init__.py | 9 ++++--- .../data/schemas/reporting/pandas_reporter.py | 24 +++++++++---------- .../data/schemas/tests/test_reporting.py | 2 ++ flexmeasures/data/tests/conftest.py | 14 ++++++++--- flexmeasures/data/tests/test_data_source.py | 22 ++++++++++++----- 10 files changed, 68 insertions(+), 40 deletions(-) diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index ad4d26436..eda2ef4b5 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -16,7 +16,7 @@ class Reporter(DataGenerator): __version__ = None __author__ = None - __data_generator_base__ = "Reporter" + __data_generator_base__ = "reporter" sensor: Sensor = None @@ -26,17 +26,15 @@ class Reporter(DataGenerator): def __init__(self, config: dict | None = None, **kwargs) -> None: super().__init__(config, **kwargs) - self.sensor = self._config["sensor"] - def _compute(self, **kwargs) -> tb.BeliefsDataFrame: """This method triggers the creation of a new report. The same object can generate multiple reports with different start, end, resolution and belief_time values. - - In the future, this function will parse arbitrary input arguments defined in a schema. """ + self.sensor = kwargs["sensor"] + # Result result: tb.BeliefsDataFrame = self._compute_report(**kwargs) diff --git a/flexmeasures/data/models/reporting/aggregator.py b/flexmeasures/data/models/reporting/aggregator.py index 8647ef9bb..6ab8312ae 100644 --- a/flexmeasures/data/models/reporting/aggregator.py +++ b/flexmeasures/data/models/reporting/aggregator.py @@ -7,6 +7,7 @@ from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.schemas.reporting.aggregation import AggregatorConfigSchema +from flexmeasures.data.models.time_series import Sensor from flexmeasures.utils.time_utils import server_now @@ -24,6 +25,7 @@ class AggregatorReporter(Reporter): def _compute_report( self, + sensor: Sensor, start: datetime, end: datetime, input_resolution: timedelta | None = None, diff --git a/flexmeasures/data/models/reporting/tests/test_aggregator.py b/flexmeasures/data/models/reporting/tests/test_aggregator.py index dddf20b6d..ea6883031 100644 --- a/flexmeasures/data/models/reporting/tests/test_aggregator.py +++ b/flexmeasures/data/models/reporting/tests/test_aggregator.py @@ -37,7 +37,6 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): s1, s2, reporter_sensor = setup_dummy_data reporter_config = dict( - sensor=reporter_sensor.id, data=[ dict(sensor=s1.id, source=1), dict(sensor=s2.id, source=2), @@ -48,6 +47,7 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): agg_reporter = AggregatorReporter(config=reporter_config) result = agg_reporter.compute( + sensor=reporter_sensor, start=datetime(2023, 5, 10, tzinfo=utc), end=datetime(2023, 5, 11, tzinfo=utc), ) diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index c3dd01eaf..ca3083e31 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -9,7 +9,6 @@ def test_reporter(app, setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data config = dict( - sensor=reporter_sensor.id, input_variables=["sensor_1", "sensor_2"], transformations=[ dict( @@ -46,7 +45,9 @@ def test_reporter(app, setup_dummy_data): end = datetime(2023, 4, 10, 10, tzinfo=utc) input_sensors = dict(sensor_1=dict(sensor=s1), sensor_2=dict(sensor=s2)) - report1 = reporter.compute(start=start, end=end, input_sensors=input_sensors) + report1 = reporter.compute( + sensor=reporter_sensor, start=start, end=end, input_sensors=input_sensors + ) assert len(report1) == 5 assert str(report1.event_starts[0]) == "2023-04-10 00:00:00+00:00" @@ -64,7 +65,10 @@ def test_reporter(app, setup_dummy_data): # check that calling compute with different parameters changes the result report2 = reporter.compute( - start=datetime(2023, 4, 10, 3, tzinfo=utc), end=end, input_sensors=input_sensors + sensor=reporter_sensor, + start=datetime(2023, 4, 10, 3, tzinfo=utc), + end=end, + input_sensors=input_sensors, ) assert len(report2) == 4 assert str(report2.event_starts[0]) == "2023-04-10 02:00:00+00:00" @@ -76,7 +80,6 @@ def test_reporter_repeated(setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data reporter_config = dict( - sensor=reporter_sensor.id, input_variables=["sensor_1", "sensor_2"], transformations=[ dict( @@ -107,7 +110,8 @@ def test_reporter_repeated(setup_dummy_data): final_df_output="df_merge", ) - inputs = dict( + input = dict( + sensor=reporter_sensor.id, start="2023-04-10T00:00:00 00:00", end="2023-04-10T10:00:00 00:00", input_sensors=dict( @@ -118,8 +122,8 @@ def test_reporter_repeated(setup_dummy_data): reporter = PandasReporter(config=reporter_config) - report1 = reporter.compute(inputs=inputs) - report2 = reporter.compute(inputs=inputs) + report1 = reporter.compute(input=input) + report2 = reporter.compute(input=input) assert all(report2.values == report1.values) @@ -129,7 +133,6 @@ def test_reporter_empty(setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data config = dict( - sensor=reporter_sensor.id, input_variables=["sensor_1"], transformations=[], final_df_output="sensor_1", @@ -139,6 +142,7 @@ def test_reporter_empty(setup_dummy_data): # compute report on available data report = reporter.compute( + sensor=reporter_sensor, start=datetime(2023, 4, 10, tzinfo=utc), end=datetime(2023, 4, 10, 10, tzinfo=utc), input_sensors=dict(sensor_1=dict(sensor=s1)), @@ -148,6 +152,7 @@ def test_reporter_empty(setup_dummy_data): # compute report on dates with no data available report = reporter.compute( + sensor=reporter_sensor, start=datetime(2021, 4, 10, tzinfo=utc), end=datetime(2021, 4, 10, 10, tzinfo=utc), input_sensors=dict(sensor_1=dict(sensor=s1)), diff --git a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py index 6076ddecf..39a6dca63 100644 --- a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py @@ -99,7 +99,6 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: # create the PandasReporter reporter config pandas_reporter_config = dict( - sensor=self.sensor.id, input_variables=["energy_tax", "VAT", "tariff", "da_prices"], transformations=[ dict( @@ -257,9 +256,10 @@ def test_tibber_reporter(tibber_test_data): tibber_report_sensor = tibber_test_data - tibber_reporter = TibberReporter(sensor=tibber_report_sensor) + tibber_reporter = TibberReporter() result = tibber_reporter.compute( + sensor=tibber_report_sensor, start=datetime(2023, 4, 13, tzinfo=utc), end=datetime(2023, 4, 14, tzinfo=utc), ) diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 190ccea93..0f76057e5 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -7,14 +7,17 @@ class ReporterConfigSchema(Schema): - sensor = SensorIdField(required=True) + pass class ReporterInputsSchema(Schema): + sensor = SensorIdField(required=True) + start = AwareDateTimeField(required=True) end = AwareDateTimeField(required=True) - input_resolution = DurationField() - belief_time = AwareDateTimeField() + + resolution = DurationField(required=False) + belief_time = AwareDateTimeField(required=False) class BeliefsSearchConfigSchema(Schema): diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index cb6cc7d26..9b6a499d2 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -5,7 +5,10 @@ from flexmeasures.data.schemas.sources import DataSourceIdField from flexmeasures.data.schemas import AwareDateTimeField, DurationField - +from flexmeasures.data.schemas.reporting import ( + ReporterConfigSchema, + ReporterInputsSchema, +) from timely_beliefs import BeliefsDataFrame @@ -76,14 +79,13 @@ class BeliefsSearchConfigSchema(Schema): sum_multiple = fields.Boolean() -class PandasReporterConfigSchema(Schema): +class PandasReporterConfigSchema(ReporterConfigSchema): """ This schema lists fields that can be used to describe sensors in the optimised portfolio Example: { - "sensor" : 1, "input_variables" : ["df1"], "transformations" : [ { @@ -104,8 +106,7 @@ class PandasReporterConfigSchema(Schema): "final_df_output" : "df2" """ - sensor = SensorIdField(required=True) - input_variables = fields.List(fields.Str(), required=True) + input_variables = fields.List(fields.Str(), required=True) # expected input aliases transformations = fields.List(fields.Nested(PandasMethodCall()), required=True) final_df_output = fields.Str(required=True) @@ -152,7 +153,12 @@ def validate_chaining(self, data, **kwargs): ) -class PandasReporterInputSchema(Schema): +class PandasReporterInputSchema(ReporterInputsSchema): + # make start and end optional, conditional on providing the time parameters + # for the single sensors in `input_sensors` + start = AwareDateTimeField(required=False) + end = AwareDateTimeField(required=False) + input_sensors = fields.Dict( keys=fields.Str(), values=fields.Nested(BeliefsSearchConfigSchema()), @@ -160,12 +166,6 @@ class PandasReporterInputSchema(Schema): validator=validate.Length(min=1), ) - start = AwareDateTimeField(required=False) - end = AwareDateTimeField(required=False) - - resolution = DurationField(required=False) - belief_time = AwareDateTimeField(required=False) - @validates_schema def validate_time_parameters(self, data, **kwargs): """This method validates that all input sensors have start diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index f745eebde..006ac8a90 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -112,6 +112,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se [ ( { + "sensor": 1, "input_sensors": {"sensor_1": {"sensor": 1}}, "start": "2023-06-06T00:00:00+02:00", "end": "2023-06-06T00:00:00+02:00", @@ -126,6 +127,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se ), ( { + "sensor": 1, "input_sensors": { "sensor_1": { "sensor": 1, diff --git a/flexmeasures/data/tests/conftest.py b/flexmeasures/data/tests/conftest.py index db1c6db3f..c09b04c29 100644 --- a/flexmeasures/data/tests/conftest.py +++ b/flexmeasures/data/tests/conftest.py @@ -21,6 +21,9 @@ ) from flexmeasures.utils.time_utils import as_server_time +from marshmallow import fields +from marshmallow import Schema + @pytest.fixture(scope="module") def setup_test_data( @@ -239,9 +242,14 @@ def setup_annotations( @pytest.fixture(scope="module") def aggregator_reporter_data_source(app, db, add_nearby_weather_sensors): - sensor = add_nearby_weather_sensors.get("temperature") + # sensor = add_nearby_weather_sensors.get("temperature") + + class TestReporterConfigSchema(Schema): + a = fields.Str() class TestReporter(Reporter): + _config_schema = TestReporterConfigSchema() + def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: start = kwargs.get("start") end = kwargs.get("end") @@ -258,9 +266,9 @@ def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: return tb.BeliefsDataFrame(r, sensor=self.sensor) - app.data_generators.update({"TestReporter": TestReporter}) + app.data_generators["reporter"].update({"TestReporter": TestReporter}) - config = dict(sensor=sensor.id) + config = dict(a="b") ds = DataSource( name="Test", diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index ada192240..86cec8e49 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -6,39 +6,49 @@ from pytz import UTC -def test_get_reporter_from_source(db, app, aggregator_reporter_data_source): +def test_get_reporter_from_source( + db, app, aggregator_reporter_data_source, add_nearby_weather_sensors +): reporter = aggregator_reporter_data_source.data_generator + reporter_sensor = add_nearby_weather_sensors.get("farther_temperature") + assert isinstance(reporter, Reporter) assert reporter.__class__.__name__ == "TestReporter" res = reporter.compute( - start=datetime(2023, 1, 1, tzinfo=UTC), end=datetime(2023, 1, 2, tzinfo=UTC) + sensor=reporter_sensor, + start=datetime(2023, 1, 1, tzinfo=UTC), + end=datetime(2023, 1, 2, tzinfo=UTC), ) assert res.lineage.sources[0] == reporter.data_source with pytest.raises(AttributeError): - reporter.compute(start=datetime(2023, 1, 1, tzinfo=UTC), end="not a date") + reporter.compute( + sensor=reporter_sensor, + start=datetime(2023, 1, 1, tzinfo=UTC), + end="not a date", + ) def test_data_source(db, app, aggregator_reporter_data_source): TestTeporter = app.data_generators["reporter"].get("TestReporter") - ds1 = TestTeporter(config={"sensor": 1}) + ds1 = TestTeporter(config={"a": "1"}) db.session.add(ds1.data_source) db.session.commit() - ds2 = TestTeporter(config={"sensor": 1}) + ds2 = TestTeporter(config={"a": "1"}) assert ds1.data_source == ds2.data_source assert ds1.data_source.attributes.get("config") == ds2.data_source.attributes.get( "config" ) - ds3 = TestTeporter(config={"sensor": 2}) + ds3 = TestTeporter(config={"a": "2"}) assert ds3.data_source != ds2.data_source assert ds3.data_source.attributes.get("config") != ds2.data_source.attributes.get( From f239be48c18c88b6cec08e26ca3fb234d28d7d39 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 10:09:32 +0200 Subject: [PATCH 41/70] fix: use same structure for data generators and add test Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 6 +++--- flexmeasures/data/tests/test_data_source.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index a546714e1..74b10a17c 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -75,9 +75,9 @@ def data_source(self) -> "DataSource" | None: if self._data_source is None: data_source_info = self.get_data_source_info() - data_source_info["attributes"] = dict( - config=self._config_schema.dump(self._config) - ) + data_source_info["attributes"] = { + "data_generator": {"config": self._config_schema.dump(self._config)} + } self._data_source = get_or_create_source(**data_source_info) diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index 86cec8e49..f31c680be 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -44,13 +44,17 @@ def test_data_source(db, app, aggregator_reporter_data_source): ds2 = TestTeporter(config={"a": "1"}) assert ds1.data_source == ds2.data_source - assert ds1.data_source.attributes.get("config") == ds2.data_source.attributes.get( + assert ds1.data_source.attributes.get("data_generator").get( "config" - ) + ) == ds2.data_source.attributes.get("data_generator").get("config") ds3 = TestTeporter(config={"a": "2"}) assert ds3.data_source != ds2.data_source - assert ds3.data_source.attributes.get("config") != ds2.data_source.attributes.get( + assert ds3.data_source.attributes.get("data_generator").get( "config" - ) + ) != ds2.data_source.attributes.get("data_generator").get("config") + + ds4 = ds3.data_source.data_generator + + assert ds4._config == ds3._config From eac2d7c3675df206068d52c298f0127a94626dfe Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 10:14:07 +0200 Subject: [PATCH 42/70] refactor: use input_resolution instead of resolution Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/schemas/reporting/__init__.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 0f76057e5..09b8bfd4d 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -48,16 +48,3 @@ class BeliefsSearchConfigSchema(Schema): one_deterministic_belief_per_event_per_source = fields.Boolean() resolution = DurationField() sum_multiple = fields.Boolean() - - -# class ReporterConfigSchema(Schema): -# """ -# This schema is used to validate Reporter class configurations (reporter_config). -# Inherit from this to extend this schema with your own parameters. -# """ - -# beliefs_search_configs = fields.List( -# fields.Nested(BeliefsSearchConfigSchema()), -# required=True, -# validator=validate.Length(min=1), -# ) From d3038019ce7bcbd9abb00d8822db53b052d6d62a Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 10:28:52 +0200 Subject: [PATCH 43/70] doc: update schema docstring Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/schemas/reporting/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 09b8bfd4d..0a739058a 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -7,10 +7,21 @@ class ReporterConfigSchema(Schema): + """ + This schema is used to validate Reporter class configurations (config). + Inherit from this class to extend this schema with your own parameters. + """ + pass class ReporterInputsSchema(Schema): + """ + This schema is used to validate the inputs to the method `compute` of + the Reporter class. + Inherit from this class to extend this schema with your own parameters. + """ + sensor = SensorIdField(required=True) start = AwareDateTimeField(required=True) From 2abbeb6c9b2d9c4b25288183ecfe2a24baa79fa3 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 11:27:23 +0200 Subject: [PATCH 44/70] refactor: rename input_resolution to resolution Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/reporting/aggregator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flexmeasures/data/models/reporting/aggregator.py b/flexmeasures/data/models/reporting/aggregator.py index 6ab8312ae..9fd41601d 100644 --- a/flexmeasures/data/models/reporting/aggregator.py +++ b/flexmeasures/data/models/reporting/aggregator.py @@ -28,7 +28,7 @@ def _compute_report( sensor: Sensor, start: datetime, end: datetime, - input_resolution: timedelta | None = None, + resolution: timedelta | None = None, belief_time: datetime | None = None, ) -> tb.BeliefsDataFrame: """ @@ -55,7 +55,7 @@ def _compute_report( .search_beliefs( event_starts_after=start, event_ends_before=end, - resolution=input_resolution, + resolution=resolution, beliefs_before=belief_time, ) .droplevel([1, 2, 3]) From 9f8bd1118d8181181a80c9d671863a51d3a04f2d Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 11:27:46 +0200 Subject: [PATCH 45/70] fix: remove sensor from config Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/tests/test_pandas_reporter.py | 4 ++-- flexmeasures/data/schemas/tests/test_reporting.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index ca3083e31..7e8acb29f 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -8,7 +8,7 @@ def test_reporter(app, setup_dummy_data): s1, s2, reporter_sensor = setup_dummy_data - config = dict( + reporter_config = dict( input_variables=["sensor_1", "sensor_2"], transformations=[ dict( @@ -39,7 +39,7 @@ def test_reporter(app, setup_dummy_data): final_df_output="df_merge", ) - reporter = PandasReporter(config=config) + reporter = PandasReporter(config=reporter_config) start = datetime(2023, 4, 10, tzinfo=utc) end = datetime(2023, 4, 10, 10, tzinfo=utc) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 006ac8a90..6c0804063 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -41,7 +41,6 @@ def setup_dummy_sensors(db, app): [ ( { # this checks that the final_df_output dataframe is actually generated at some point of the processing pipeline - "sensor": 1, "input_variables": ["sensor_1"], "transformations": [ { @@ -56,7 +55,6 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that chaining works, applying the method copy on the previous dataframe - "sensor": 1, "input_variables": ["sensor_1"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, @@ -69,7 +67,6 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that resample cannot be the last method being applied - "sensor": 1, "input_variables": ["sensor_1", "sensor_2"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, @@ -82,7 +79,6 @@ def setup_dummy_sensors(db, app): ), ( { # this checks that resample cannot be the last method being applied - "sensor": 1, "input_variables": ["sensor_1", "sensor_2"], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, From 006661aafea8efc56b0c105c10627dc5922b6530 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 11:32:10 +0200 Subject: [PATCH 46/70] docs: add comment Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/schemas/reporting/pandas_reporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index 9b6a499d2..6ae7b6187 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -160,7 +160,7 @@ class PandasReporterInputSchema(ReporterInputsSchema): end = AwareDateTimeField(required=False) input_sensors = fields.Dict( - keys=fields.Str(), + keys=fields.Str(), # alias values=fields.Nested(BeliefsSearchConfigSchema()), required=True, validator=validate.Length(min=1), From 076a653912cc510107102ac8c26f061bc2d3fe3e Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 11:32:24 +0200 Subject: [PATCH 47/70] fix: remove df_output Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/reporting/tests/test_tibber_reporter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py index 39a6dca63..bf17e2a47 100644 --- a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py @@ -114,7 +114,6 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: ), dict( df_input="tariff", - df_output="tariff", method="droplevel", args=[[1, 2, 3]], ), From 36d3856ce444a1df173fcf742535f06d0e7bfc0d Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 11:35:10 +0200 Subject: [PATCH 48/70] fix:. for data in data["data"] haha Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/schemas/reporting/aggregation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flexmeasures/data/schemas/reporting/aggregation.py b/flexmeasures/data/schemas/reporting/aggregation.py index 52d0fa881..442f56488 100644 --- a/flexmeasures/data/schemas/reporting/aggregation.py +++ b/flexmeasures/data/schemas/reporting/aggregation.py @@ -43,8 +43,8 @@ class AggregatorConfigSchema(ReporterConfigSchema): @validates_schema def validate_source(self, data, **kwargs): - for data in data["data"]: - if "source" not in data: + for d in data["data"]: + if "source" not in d: raise ValidationError("`source` is a required field.") @validates_schema From 6169cd35678060aa6b48ac11c7547a39e0baec81 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 12:05:42 +0200 Subject: [PATCH 49/70] doc: add docstring to compute and __init__ in DataGenerator Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 40 ++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 74b10a17c..a8c540823 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -27,6 +27,40 @@ class DataGenerator: _config_schema: Schema | None = None def __init__(self, config: dict | None = None, **kwargs) -> None: + """Base class for the Schedulers, Reporters and Forecasters. + + The configuration `config` stores static parameters, parameters that, if + changed, trigger the creation of a new DataSource. Dynamic parameters, such as + the start date, can go into the `inputs`. See docstring of the method `DataGenerator.compute` for + more details. + + + Create a new DataGenerator with a certain configuration. There are two alternatives + to define the parameters: + + 1. Serialized through the keyword argument `config`. + 2. Deserialized, passing each parameter as keyword arguments. + + The configuration is validated using the schema `_config_schema`, to be defined. + + Example: + + The configuration requires the parameters start and end, two datetimes. + + Option 1: + dg = DataGenerator(config = { + "start" : "2023-01-01T00:00:00+00:00", + "end" : "2023-01-02T00:00:00+00:00" + }) + + Option 2: + df = DataGenerator(start = datetime(2023, 1, 1, tzinfo = UTC), + end = datetime(2023, 1, 2, tzinfo = UTC)) + + + :param config: serialized `config` parameters, defaults to None + """ + if config is None: self._config = kwargs DataGenerator.validate_deserialized(self._config, self._config_schema) @@ -39,6 +73,12 @@ def _compute(self, **kwargs): raise NotImplementedError() def compute(self, input: dict | None = None, **kwargs): + """The configuration `input` stores dynamic parameters, parameters that, if + changed, DO NOT trigger the creation of a new DataSource. Static parameters, such as + the topology of an energy system, can go into `config`. + + :param input: serialized `input` parameters, defaults to None + """ if input is None: _input = kwargs DataGenerator.validate_deserialized(_input, self._input_schema) From a695d8b9ca7bdb03b24beb78c75cfe3a4fb2c261 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 13:30:28 +0200 Subject: [PATCH 50/70] refactor: rename inputs to input Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 2 +- flexmeasures/data/models/reporting/__init__.py | 4 ++-- flexmeasures/data/models/reporting/pandas_reporter.py | 1 + flexmeasures/data/schemas/reporting/__init__.py | 2 +- flexmeasures/data/schemas/reporting/pandas_reporter.py | 4 ++-- flexmeasures/data/schemas/tests/test_reporting.py | 8 ++++---- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index a8c540823..3b01098ed 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -31,7 +31,7 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: The configuration `config` stores static parameters, parameters that, if changed, trigger the creation of a new DataSource. Dynamic parameters, such as - the start date, can go into the `inputs`. See docstring of the method `DataGenerator.compute` for + the start date, can go into the `input`. See docstring of the method `DataGenerator.compute` for more details. diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index eda2ef4b5..526ec66a1 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -4,7 +4,7 @@ from flexmeasures.data.models.data_sources import DataGenerator from flexmeasures.data.schemas.reporting import ( - ReporterInputsSchema, + ReporterInputSchema, ReporterConfigSchema, ) @@ -20,7 +20,7 @@ class Reporter(DataGenerator): sensor: Sensor = None - _input_schema = ReporterInputsSchema() + _input_schema = ReporterInputSchema() _config_schema = ReporterConfigSchema() def __init__(self, config: dict | None = None, **kwargs) -> None: diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index 5263018a0..9e4398322 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -204,6 +204,7 @@ def _apply_transformations(self): for _transformation in self._config.get("transformations"): transformation = deepcopy(_transformation) + df_input = transformation.get( "df_input", previous_df ) # default is using the previous transformation output diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 0a739058a..434f43c85 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -15,7 +15,7 @@ class ReporterConfigSchema(Schema): pass -class ReporterInputsSchema(Schema): +class ReporterInputSchema(Schema): """ This schema is used to validate the inputs to the method `compute` of the Reporter class. diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index 6ae7b6187..26e492c1d 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -7,7 +7,7 @@ from flexmeasures.data.schemas import AwareDateTimeField, DurationField from flexmeasures.data.schemas.reporting import ( ReporterConfigSchema, - ReporterInputsSchema, + ReporterInputSchema, ) from timely_beliefs import BeliefsDataFrame @@ -153,7 +153,7 @@ def validate_chaining(self, data, **kwargs): ) -class PandasReporterInputSchema(ReporterInputsSchema): +class PandasReporterInputSchema(ReporterInputSchema): # make start and end optional, conditional on providing the time parameters # for the single sensors in `input_sensors` start = AwareDateTimeField(required=False) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 6c0804063..aaa3ff10a 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -104,7 +104,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se @pytest.mark.parametrize( - "inputs, is_valid", + "input, is_valid", [ ( { @@ -136,12 +136,12 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se ), ], ) -def test_pandas_reporter_input_schema(inputs, is_valid, db, app, setup_dummy_sensors): +def test_pandas_reporter_input_schema(input, is_valid, db, app, setup_dummy_sensors): schema = PandasReporterInputSchema() if is_valid: - schema.load(inputs) + schema.load(input) else: with pytest.raises(ValidationError): - schema.load(inputs) + schema.load(input) From 091283b4c297ed0601007d9d59d0b9e70aa1ceb6 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 14:08:41 +0200 Subject: [PATCH 51/70] fix: removing constructor Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/reporting/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index 526ec66a1..a1ee429d2 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -23,9 +23,6 @@ class Reporter(DataGenerator): _input_schema = ReporterInputSchema() _config_schema = ReporterConfigSchema() - def __init__(self, config: dict | None = None, **kwargs) -> None: - super().__init__(config, **kwargs) - def _compute(self, **kwargs) -> tb.BeliefsDataFrame: """This method triggers the creation of a new report. From 52289f66abd92ab74aa2b6b6ffebb867fe60fee9 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 3 Jul 2023 14:12:53 +0200 Subject: [PATCH 52/70] docs: improve docstring Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 3b01098ed..c8c23c703 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -41,21 +41,24 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: 1. Serialized through the keyword argument `config`. 2. Deserialized, passing each parameter as keyword arguments. - The configuration is validated using the schema `_config_schema`, to be defined. + The configuration is validated using the schema `_config_schema`, to be defined by the subclass. Example: - The configuration requires the parameters start and end, two datetimes. + The configuration requires two parameters for the PV and consumption sensors. Option 1: - dg = DataGenerator(config = { - "start" : "2023-01-01T00:00:00+00:00", - "end" : "2023-01-02T00:00:00+00:00" - }) + dg = DataGenerator(config = { + "sensor_pv" : 1, + "sensor_consumption" : 2 + }) Option 2: - df = DataGenerator(start = datetime(2023, 1, 1, tzinfo = UTC), - end = datetime(2023, 1, 2, tzinfo = UTC)) + sensor_pv = Sensor.query.get(1) + sensor_consumption = Sensor.query.get(2) + + dg = DataGenerator(sensor_pv = sensor_pv, + sensor_consumption = sensor_consumption) :param config: serialized `config` parameters, defaults to None From 5520c73eb6dc0c55ae4ce446c828b51419fa549c Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Fri, 7 Jul 2023 14:44:31 +0200 Subject: [PATCH 53/70] test: add data to confest Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/tests/conftest.py | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/flexmeasures/data/models/reporting/tests/conftest.py b/flexmeasures/data/models/reporting/tests/conftest.py index a75a1a465..c5ab65a16 100644 --- a/flexmeasures/data/models/reporting/tests/conftest.py +++ b/flexmeasures/data/models/reporting/tests/conftest.py @@ -37,6 +37,11 @@ def setup_dummy_data(db, app): "report sensor", generic_asset=pandas_report, event_resolution="1h" ) db.session.add(report_sensor) + daily_report_sensor = Sensor( + "daily report sensor", generic_asset=pandas_report, event_resolution="1D" + ) + + db.session.add(daily_report_sensor) """ Create 2 DataSources @@ -77,18 +82,31 @@ def setup_dummy_data(db, app): ) ) + # add simple data for testing DST transition + for t in range(24 * 4): # create data for 4 days + # UTC+1 -> UTC+2 + beliefs.append( + TimedBelief( + event_start=datetime(2023, 3, 24, tzinfo=utc) + timedelta(hours=t), + belief_horizon=timedelta(hours=24), + event_value=value, + sensor=sensor, + source=source, + ) + ) + + # UTC+2 -> UTC+1 + beliefs.append( + TimedBelief( + event_start=datetime(2023, 10, 27, tzinfo=utc) + timedelta(hours=t), + belief_horizon=timedelta(hours=24), + event_value=value, + sensor=sensor, + source=source, + ) + ) + db.session.add_all(beliefs) db.session.commit() yield sensor1, sensor2, report_sensor - - db.session.delete(sensor1) - db.session.delete(sensor2) - - for b in beliefs: - db.session.delete(b) - - db.session.delete(dummy_asset) - db.session.delete(dummy_asset_type) - - db.session.commit() From 3bb13e233b56271035cc36e4be2b333097f9a3c3 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Fri, 7 Jul 2023 14:46:20 +0200 Subject: [PATCH 54/70] test: add test_dst_transition Signed-off-by: Victor Garcia Reolid --- .../models/reporting/tests/test_aggregator.py | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/models/reporting/tests/test_aggregator.py b/flexmeasures/data/models/reporting/tests/test_aggregator.py index ea6883031..13aa65a16 100644 --- a/flexmeasures/data/models/reporting/tests/test_aggregator.py +++ b/flexmeasures/data/models/reporting/tests/test_aggregator.py @@ -3,7 +3,7 @@ from flexmeasures.data.models.reporting.aggregator import AggregatorReporter from datetime import datetime -from pytz import utc +from pytz import utc, timezone @pytest.mark.parametrize( @@ -57,3 +57,37 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): # check that the value is equal to expected_value assert (result == expected_value).all().event_value + + +def test_dst_transition(setup_dummy_data): + s1, _, reporter_sensor = setup_dummy_data + + reporter_config = dict( + data=[ + dict(sensor=s1.id, source=1), + ], + ) + + agg_reporter = AggregatorReporter(config=reporter_config) + + tz = timezone("Europe/Amsterdam") + + # transition from winter (CET) to summer (CEST) + result = agg_reporter.compute( + sensor=reporter_sensor, + start=tz.localize(datetime(2023, 3, 26)), + end=tz.localize(datetime(2023, 3, 27)), + belief_time=tz.localize(datetime(2023, 12, 1)), + ) + + assert len(result) == 23 + + # transition from summer (CEST) to winter (CET) + result = agg_reporter.compute( + sensor=reporter_sensor, + start=tz.localize(datetime(2023, 10, 29)), + end=tz.localize(datetime(2023, 10, 30)), + belief_time=tz.localize(datetime(2023, 12, 1)), + ) + + assert len(result) == 25 From d51fc399193b1ccc5bd75b18527edbab946ae0bf Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Fri, 7 Jul 2023 14:56:38 +0200 Subject: [PATCH 55/70] fix: never returning None Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index c8c23c703..f2aef79a0 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -113,7 +113,7 @@ def get_data_source_info(cls: type) -> dict: return source_info @property - def data_source(self) -> "DataSource" | None: + def data_source(self) -> "DataSource": from flexmeasures.data.services.data_sources import get_or_create_source if self._data_source is None: From e9564d9ec55b8e6b8145b883eed0c7c9dd38d8fe Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Sun, 9 Jul 2023 23:18:11 +0200 Subject: [PATCH 56/70] test: add test to check timely-beliefs resampling and calling an aggregator between to dates with different offsets, in this case, daylight saving transitions. Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/tests/conftest.py | 57 +++++++++++-------- .../models/reporting/tests/test_aggregator.py | 54 ++++++++++++++++-- .../reporting/tests/test_pandas_reporter.py | 18 +++--- 3 files changed, 91 insertions(+), 38 deletions(-) diff --git a/flexmeasures/data/models/reporting/tests/conftest.py b/flexmeasures/data/models/reporting/tests/conftest.py index c5ab65a16..b8e9d74cc 100644 --- a/flexmeasures/data/models/reporting/tests/conftest.py +++ b/flexmeasures/data/models/reporting/tests/conftest.py @@ -33,12 +33,23 @@ def setup_dummy_data(db, app): db.session.add(sensor1) sensor2 = Sensor("sensor 2", generic_asset=dummy_asset, event_resolution="1h") db.session.add(sensor2) + sensor3 = Sensor( + "sensor 3", + generic_asset=dummy_asset, + event_resolution="1h", + timezone="Europe/Amsterdam", + ) + db.session.add(sensor3) + report_sensor = Sensor( "report sensor", generic_asset=pandas_report, event_resolution="1h" ) db.session.add(report_sensor) daily_report_sensor = Sensor( - "daily report sensor", generic_asset=pandas_report, event_resolution="1D" + "daily report sensor", + generic_asset=pandas_report, + event_resolution="1D", + timezone="Europe/Amsterdam", ) db.session.add(daily_report_sensor) @@ -82,31 +93,31 @@ def setup_dummy_data(db, app): ) ) - # add simple data for testing DST transition - for t in range(24 * 4): # create data for 4 days - # UTC+1 -> UTC+2 - beliefs.append( - TimedBelief( - event_start=datetime(2023, 3, 24, tzinfo=utc) + timedelta(hours=t), - belief_horizon=timedelta(hours=24), - event_value=value, - sensor=sensor, - source=source, - ) + # add simple data for testing DST transition + for t in range(24 * 4): # create data for 4 days + # UTC+1 -> UTC+2 + beliefs.append( + TimedBelief( + event_start=datetime(2023, 3, 24, tzinfo=utc) + timedelta(hours=t), + belief_horizon=timedelta(hours=24), + event_value=t, + sensor=sensor3, + source=source1, ) - - # UTC+2 -> UTC+1 - beliefs.append( - TimedBelief( - event_start=datetime(2023, 10, 27, tzinfo=utc) + timedelta(hours=t), - belief_horizon=timedelta(hours=24), - event_value=value, - sensor=sensor, - source=source, - ) + ) + + # UTC+2 -> UTC+1 + beliefs.append( + TimedBelief( + event_start=datetime(2023, 10, 27, tzinfo=utc) + timedelta(hours=t), + belief_horizon=timedelta(hours=24), + event_value=t, + sensor=sensor3, + source=source1, ) + ) db.session.add_all(beliefs) db.session.commit() - yield sensor1, sensor2, report_sensor + yield sensor1, sensor2, sensor3, report_sensor, daily_report_sensor diff --git a/flexmeasures/data/models/reporting/tests/test_aggregator.py b/flexmeasures/data/models/reporting/tests/test_aggregator.py index 13aa65a16..02a2a88aa 100644 --- a/flexmeasures/data/models/reporting/tests/test_aggregator.py +++ b/flexmeasures/data/models/reporting/tests/test_aggregator.py @@ -5,6 +5,8 @@ from datetime import datetime from pytz import utc, timezone +import pandas as pd + @pytest.mark.parametrize( "aggregation_method, expected_value", @@ -34,7 +36,7 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): 7) prod: -1 = (1) * (-1) 8) median: even number of elements, mean of the most central elements, 0 = ((1) + (-1))/2 """ - s1, s2, reporter_sensor = setup_dummy_data + s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data reporter_config = dict( data=[ @@ -47,7 +49,7 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): agg_reporter = AggregatorReporter(config=reporter_config) result = agg_reporter.compute( - sensor=reporter_sensor, + sensor=report_sensor, start=datetime(2023, 5, 10, tzinfo=utc), end=datetime(2023, 5, 11, tzinfo=utc), ) @@ -60,11 +62,11 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): def test_dst_transition(setup_dummy_data): - s1, _, reporter_sensor = setup_dummy_data + s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data reporter_config = dict( data=[ - dict(sensor=s1.id, source=1), + dict(sensor=s3.id, source=1), ], ) @@ -74,7 +76,7 @@ def test_dst_transition(setup_dummy_data): # transition from winter (CET) to summer (CEST) result = agg_reporter.compute( - sensor=reporter_sensor, + sensor=report_sensor, start=tz.localize(datetime(2023, 3, 26)), end=tz.localize(datetime(2023, 3, 27)), belief_time=tz.localize(datetime(2023, 12, 1)), @@ -84,10 +86,50 @@ def test_dst_transition(setup_dummy_data): # transition from summer (CEST) to winter (CET) result = agg_reporter.compute( - sensor=reporter_sensor, + sensor=report_sensor, start=tz.localize(datetime(2023, 10, 29)), end=tz.localize(datetime(2023, 10, 30)), belief_time=tz.localize(datetime(2023, 12, 1)), ) assert len(result) == 25 + + +def test_resampling(setup_dummy_data): + s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data + + reporter_config = dict( + data=[ + dict(sensor=s3.id, source=1), + ], + ) + + agg_reporter = AggregatorReporter(config=reporter_config) + + tz = timezone("Europe/Amsterdam") + + # transition from winter (CET) to summer (CEST) + result = agg_reporter.compute( + sensor=daily_report_sensor, + start=tz.localize(datetime(2023, 3, 27)), + end=tz.localize(datetime(2023, 3, 28)), + belief_time=tz.localize(datetime(2023, 12, 1)), + resolution=pd.Timedelta("1D"), + ) + + assert result.event_starts[0] == pd.Timestamp( + year=2023, month=3, day=27, tz="Europe/Amsterdam" + ) + + # transition from summer (CEST) to winter (CET) + result = agg_reporter.compute( + sensor=daily_report_sensor, + start=tz.localize(datetime(2023, 10, 29)), + end=tz.localize(datetime(2023, 10, 30)), + belief_time=tz.localize(datetime(2023, 12, 1)), + resolution=pd.Timedelta("1D"), + ) + + assert result.event_starts[0] == pd.Timestamp( + year=2023, month=10, day=29, tz="Europe/Amsterdam" + ) diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index 7e8acb29f..f5be5f47e 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -6,7 +6,7 @@ def test_reporter(app, setup_dummy_data): - s1, s2, reporter_sensor = setup_dummy_data + s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data reporter_config = dict( input_variables=["sensor_1", "sensor_2"], @@ -46,13 +46,13 @@ def test_reporter(app, setup_dummy_data): input_sensors = dict(sensor_1=dict(sensor=s1), sensor_2=dict(sensor=s2)) report1 = reporter.compute( - sensor=reporter_sensor, start=start, end=end, input_sensors=input_sensors + sensor=report_sensor, start=start, end=end, input_sensors=input_sensors ) assert len(report1) == 5 assert str(report1.event_starts[0]) == "2023-04-10 00:00:00+00:00" assert ( - report1.sensor == reporter_sensor + report1.sensor == report_sensor ) # check that the output sensor is effectively assigned. data_source_name = app.config.get("FLEXMEASURES_DEFAULT_DATASOURCE") @@ -65,7 +65,7 @@ def test_reporter(app, setup_dummy_data): # check that calling compute with different parameters changes the result report2 = reporter.compute( - sensor=reporter_sensor, + sensor=report_sensor, start=datetime(2023, 4, 10, 3, tzinfo=utc), end=end, input_sensors=input_sensors, @@ -77,7 +77,7 @@ def test_reporter(app, setup_dummy_data): def test_reporter_repeated(setup_dummy_data): """check that calling compute doesn't change the result""" - s1, s2, reporter_sensor = setup_dummy_data + s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data reporter_config = dict( input_variables=["sensor_1", "sensor_2"], @@ -111,7 +111,7 @@ def test_reporter_repeated(setup_dummy_data): ) input = dict( - sensor=reporter_sensor.id, + sensor=report_sensor.id, start="2023-04-10T00:00:00 00:00", end="2023-04-10T10:00:00 00:00", input_sensors=dict( @@ -130,7 +130,7 @@ def test_reporter_repeated(setup_dummy_data): def test_reporter_empty(setup_dummy_data): """check that calling compute with missing data returns an empty report""" - s1, s2, reporter_sensor = setup_dummy_data + s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data config = dict( input_variables=["sensor_1"], @@ -142,7 +142,7 @@ def test_reporter_empty(setup_dummy_data): # compute report on available data report = reporter.compute( - sensor=reporter_sensor, + sensor=report_sensor, start=datetime(2023, 4, 10, tzinfo=utc), end=datetime(2023, 4, 10, 10, tzinfo=utc), input_sensors=dict(sensor_1=dict(sensor=s1)), @@ -152,7 +152,7 @@ def test_reporter_empty(setup_dummy_data): # compute report on dates with no data available report = reporter.compute( - sensor=reporter_sensor, + sensor=report_sensor, start=datetime(2021, 4, 10, tzinfo=utc), end=datetime(2021, 4, 10, 10, tzinfo=utc), input_sensors=dict(sensor_1=dict(sensor=s1)), From 132cf4636d34482f9f8a5032bf10b3fb82077f5c Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 10 Jul 2023 16:27:37 +0200 Subject: [PATCH 57/70] test: change output sensor id Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/schemas/tests/test_reporting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index aaa3ff10a..fa4f211f0 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -108,7 +108,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se [ ( { - "sensor": 1, + "sensor": 2, "input_sensors": {"sensor_1": {"sensor": 1}}, "start": "2023-06-06T00:00:00+02:00", "end": "2023-06-06T00:00:00+02:00", @@ -123,7 +123,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se ), ( { - "sensor": 1, + "sensor": 2, "input_sensors": { "sensor_1": { "sensor": 1, From ff2f478b31080d6b29f97536eac6bc89b031c5bc Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 10 Jul 2023 16:32:12 +0200 Subject: [PATCH 58/70] docs: add docstring to the data_source propert of the class DataGenerator. Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index f2aef79a0..f8970e864 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -114,6 +114,13 @@ def get_data_source_info(cls: type) -> dict: @property def data_source(self) -> "DataSource": + """DataSource property derived from the `source_type` (scheduler, forecaster or reporter), `model` (e.g AggregatorReporter) + and `attributes`. This property creates a new data source in case of not finding one in the database that matches + the source_info. + + This property is created once and cached for the rest of the lifetime of the DataGenerator object. + """ + from flexmeasures.data.services.data_sources import get_or_create_source if self._data_source is None: From 0fc8de2e2b628537b56e90abc2ccbc2f61f95c0a Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 10 Jul 2023 16:34:33 +0200 Subject: [PATCH 59/70] docs: edit data_source docstring Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index f8970e864..ca03325ca 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -114,11 +114,9 @@ def get_data_source_info(cls: type) -> dict: @property def data_source(self) -> "DataSource": - """DataSource property derived from the `source_type` (scheduler, forecaster or reporter), `model` (e.g AggregatorReporter) - and `attributes`. This property creates a new data source in case of not finding one in the database that matches - the source_info. - - This property is created once and cached for the rest of the lifetime of the DataGenerator object. + """DataSource property derived from the `source_info`: `source_type` (scheduler, forecaster or reporter), `model` (e.g AggregatorReporter) + and `attributes`. It looks for a data source in the database the marges the `source_info` and, in case of not finding any, it creates a new one. + This property gets created once and it's cached for the rest of the lifetime of the DataGenerator object. """ from flexmeasures.data.services.data_sources import get_or_create_source From 2c4f8bc1b21865fa844bef7c33fd0b24dccdfdd7 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Mon, 10 Jul 2023 16:55:40 +0200 Subject: [PATCH 60/70] refactor: ranming input_sensors to input_variables Signed-off-by: Victor Garcia Reolid --- .../data/models/reporting/pandas_reporter.py | 8 ++++---- .../reporting/tests/test_pandas_reporter.py | 12 +++++------ .../reporting/tests/test_tibber_reporter.py | 4 ++-- .../data/schemas/reporting/pandas_reporter.py | 6 +++--- .../data/schemas/tests/test_reporting.py | 20 +++++++++++++------ 5 files changed, 29 insertions(+), 21 deletions(-) diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index 9e4398322..640e95be2 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -35,7 +35,7 @@ def fetch_data( self, start: datetime, end: datetime, - input_sensors: dict, + input_variables: dict, resolution: timedelta | None = None, belief_time: datetime | None = None, ): @@ -44,7 +44,7 @@ def fetch_data( """ self.data = {} - for alias, tb_query in input_sensors.items(): + for alias, tb_query in input_variables.items(): _tb_query = tb_query.copy() # using start / end instead of event_starts_after/event_ends_before when not defined @@ -79,7 +79,7 @@ def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: # report configuration start: datetime = kwargs.get("start") end: datetime = kwargs.get("end") - input_sensors: dict = kwargs.get("input_sensors") + input_variables: dict = kwargs.get("input_variables") resolution: timedelta | None = kwargs.get("resolution", None) belief_time: datetime | None = kwargs.get("belief_time", None) @@ -88,7 +88,7 @@ def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: resolution = self.sensor.event_resolution # fetch sensor data - self.fetch_data(start, end, input_sensors, resolution, belief_time) + self.fetch_data(start, end, input_variables, resolution, belief_time) if belief_time is None: belief_time = server_now() diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index f5be5f47e..a14a45463 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -43,10 +43,10 @@ def test_reporter(app, setup_dummy_data): start = datetime(2023, 4, 10, tzinfo=utc) end = datetime(2023, 4, 10, 10, tzinfo=utc) - input_sensors = dict(sensor_1=dict(sensor=s1), sensor_2=dict(sensor=s2)) + input_variables = dict(sensor_1=dict(sensor=s1), sensor_2=dict(sensor=s2)) report1 = reporter.compute( - sensor=report_sensor, start=start, end=end, input_sensors=input_sensors + sensor=report_sensor, start=start, end=end, input_variables=input_variables ) assert len(report1) == 5 @@ -68,7 +68,7 @@ def test_reporter(app, setup_dummy_data): sensor=report_sensor, start=datetime(2023, 4, 10, 3, tzinfo=utc), end=end, - input_sensors=input_sensors, + input_variables=input_variables, ) assert len(report2) == 4 assert str(report2.event_starts[0]) == "2023-04-10 02:00:00+00:00" @@ -114,7 +114,7 @@ def test_reporter_repeated(setup_dummy_data): sensor=report_sensor.id, start="2023-04-10T00:00:00 00:00", end="2023-04-10T10:00:00 00:00", - input_sensors=dict( + input_variables=dict( sensor_1=dict(sensor=s1.id), sensor_2=dict(sensor=s2.id), ), @@ -145,7 +145,7 @@ def test_reporter_empty(setup_dummy_data): sensor=report_sensor, start=datetime(2023, 4, 10, tzinfo=utc), end=datetime(2023, 4, 10, 10, tzinfo=utc), - input_sensors=dict(sensor_1=dict(sensor=s1)), + input_variables=dict(sensor_1=dict(sensor=s1)), ) assert not report.empty @@ -155,7 +155,7 @@ def test_reporter_empty(setup_dummy_data): sensor=report_sensor, start=datetime(2021, 4, 10, tzinfo=utc), end=datetime(2021, 4, 10, 10, tzinfo=utc), - input_sensors=dict(sensor_1=dict(sensor=s1)), + input_variables=dict(sensor_1=dict(sensor=s1)), ) assert report.empty diff --git a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py index bf17e2a47..bac4409ec 100644 --- a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py @@ -90,7 +90,7 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: da_prices = Sensor.query.filter(Sensor.name == "DA prices").one_or_none() - self.input_sensors = { + self.input_variables = { "energy_tax": {"sensor": EnergyTax}, "VAT": {"sensor": VAT}, "tariff": {"sensor": tibber_tariff}, @@ -139,7 +139,7 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: self._inner_reporter = PandasReporter(config=pandas_reporter_config) def _compute_report(self, **kwargs): - kwargs["input_sensors"] = self.input_sensors + kwargs["input_variables"] = self.input_variables return self._inner_reporter.compute(**kwargs) diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index 26e492c1d..062069bd9 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -155,11 +155,11 @@ def validate_chaining(self, data, **kwargs): class PandasReporterInputSchema(ReporterInputSchema): # make start and end optional, conditional on providing the time parameters - # for the single sensors in `input_sensors` + # for the single sensors in `input_variables` start = AwareDateTimeField(required=False) end = AwareDateTimeField(required=False) - input_sensors = fields.Dict( + input_variables = fields.Dict( keys=fields.Str(), # alias values=fields.Nested(BeliefsSearchConfigSchema()), required=True, @@ -176,7 +176,7 @@ def validate_time_parameters(self, data, **kwargs): if ("start" in data) and ("end" in data): return - for alias, input_sensor in data.get("input_sensors").items(): + for alias, input_sensor in data.get("input_variables").items(): if ("event_starts_after" not in input_sensor) and ("start" not in data): raise ValidationError( f"Start parameter not provided for sensor `{alias}` ({input_sensor})." diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index fa4f211f0..36fc28f12 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -108,8 +108,12 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se [ ( { - "sensor": 2, - "input_sensors": {"sensor_1": {"sensor": 1}}, + "sensor": 2, # sensor to save the output to + "input_variables": { # we're describing how the named variables should be constructed, by defining search filters on the sensor data, rather than on the sensor + "sensor_1_df": { + "sensor": 1 + }, # alias, i.e. variable name of the DataFrame containing the input data + }, "start": "2023-06-06T00:00:00+02:00", "end": "2023-06-06T00:00:00+02:00", }, @@ -117,15 +121,19 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se ), ( { - "input_sensors": {"sensor_1": {"sensor": 1}}, + "input_variables": { + "sensor_1_df": { + "sensor": 1 + } # alias, i.e. variable name of the DataFrame containing the input data + }, }, False, ), ( { - "sensor": 2, - "input_sensors": { - "sensor_1": { + "sensor": 2, # sensor to save the output to + "input_variables": { + "sensor_1_df": { # alias, i.e. variable name of the DataFrame containing the input data "sensor": 1, "event_starts_after": "2023-06-07T00:00:00+02:00", "event_ends_before": "2023-06-07T00:00:00+02:00", From 46734928fa92540b1973bb24bc6df0592bc2276c Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Wed, 2 Aug 2023 21:11:21 +0200 Subject: [PATCH 61/70] rename input to parameters Signed-off-by: Victor Garcia Reolid --- flexmeasures/cli/tests/test_data_add.py | 6 ++--- flexmeasures/data/models/data_sources.py | 24 +++++++++---------- .../data/models/reporting/__init__.py | 4 ++-- .../data/models/reporting/pandas_reporter.py | 4 ++-- .../reporting/tests/test_pandas_reporter.py | 6 ++--- .../data/schemas/reporting/__init__.py | 4 ++-- .../data/schemas/reporting/pandas_reporter.py | 4 ++-- .../data/schemas/tests/test_reporting.py | 16 +++++++------ 8 files changed, 35 insertions(+), 33 deletions(-) diff --git a/flexmeasures/cli/tests/test_data_add.py b/flexmeasures/cli/tests/test_data_add.py index 995cc706f..5c174bdb7 100644 --- a/flexmeasures/cli/tests/test_data_add.py +++ b/flexmeasures/cli/tests/test_data_add.py @@ -96,7 +96,7 @@ def test_cli_help(app): @pytest.mark.skip_github -def test_add_reporter(app, db, setup_dummy_data, reporter_config_raw): +def test_add_reporter(app, db, setup_dummy_data, reporter_config): """ The reporter aggregates input data from two sensors (both have 200 data points) to a two-hour resolution. @@ -136,7 +136,7 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config_raw): # save reporter_config to a json file with open("reporter_config.json", "w") as f: - json.dump(reporter_config_raw, f) + json.dump(reporter_config, f) # call command result = runner.invoke(add_report, cli_input) @@ -188,7 +188,7 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config_raw): # save reporter_config to a json file with open("reporter_config.json", "w") as f: - json.dump(reporter_config_raw, f) + json.dump(reporter_config, f) # call command result = runner.invoke(add_report, cli_input) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index ca03325ca..57a0a31fd 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -23,7 +23,7 @@ class DataGenerator: _config: dict = None - _input_schema: Schema | None = None + _parameters_schema: Schema | None = None _config_schema: Schema | None = None def __init__(self, config: dict | None = None, **kwargs) -> None: @@ -31,7 +31,7 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: The configuration `config` stores static parameters, parameters that, if changed, trigger the creation of a new DataSource. Dynamic parameters, such as - the start date, can go into the `input`. See docstring of the method `DataGenerator.compute` for + the start date, can go into the `parameters`. See docstring of the method `DataGenerator.compute` for more details. @@ -75,22 +75,22 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: def _compute(self, **kwargs): raise NotImplementedError() - def compute(self, input: dict | None = None, **kwargs): - """The configuration `input` stores dynamic parameters, parameters that, if + def compute(self, parameters: dict | None = None, **kwargs): + """The configuration `parameters` stores dynamic parameters, parameters that, if changed, DO NOT trigger the creation of a new DataSource. Static parameters, such as the topology of an energy system, can go into `config`. - :param input: serialized `input` parameters, defaults to None + :param parameters: serialized `parameters` parameters, defaults to None """ - if input is None: - _input = kwargs - DataGenerator.validate_deserialized(_input, self._input_schema) - elif self._input_schema: - _input = self._input_schema.load(input) + if parameters is None: + _parameters = kwargs + DataGenerator.validate_deserialized(_parameters, self._parameters_schema) + elif self._parameters_schema: + _parameters = self._parameters_schema.load(parameters) else: # skip validation - _input = input + _parameters = parameters - return self._compute(**_input) + return self._compute(**_parameters) @staticmethod def validate_deserialized(values: dict, schema: Schema) -> bool: diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index a1ee429d2..7427e973d 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -4,7 +4,7 @@ from flexmeasures.data.models.data_sources import DataGenerator from flexmeasures.data.schemas.reporting import ( - ReporterInputSchema, + ReporterParametersSchema, ReporterConfigSchema, ) @@ -20,7 +20,7 @@ class Reporter(DataGenerator): sensor: Sensor = None - _input_schema = ReporterInputSchema() + _parameters_schema = ReporterParametersSchema() _config_schema = ReporterConfigSchema() def _compute(self, **kwargs) -> tb.BeliefsDataFrame: diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index 640e95be2..635f2b01b 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -10,7 +10,7 @@ from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.schemas.reporting.pandas_reporter import ( PandasReporterConfigSchema, - PandasReporterInputSchema, + PandasReporterParametersSchema, ) from flexmeasures.data.models.time_series import Sensor from flexmeasures.utils.time_utils import server_now @@ -23,7 +23,7 @@ class PandasReporter(Reporter): __author__ = "Seita" _config_schema = PandasReporterConfigSchema() - _input_schema = PandasReporterInputSchema() + _parameters_schema = PandasReporterParametersSchema() input_variables: list[str] = None transformations: list[dict[str, Any]] = None diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index a14a45463..15b3d91a9 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -110,7 +110,7 @@ def test_reporter_repeated(setup_dummy_data): final_df_output="df_merge", ) - input = dict( + parameters = dict( sensor=report_sensor.id, start="2023-04-10T00:00:00 00:00", end="2023-04-10T10:00:00 00:00", @@ -122,8 +122,8 @@ def test_reporter_repeated(setup_dummy_data): reporter = PandasReporter(config=reporter_config) - report1 = reporter.compute(input=input) - report2 = reporter.compute(input=input) + report1 = reporter.compute(parameters=parameters) + report2 = reporter.compute(parameters=parameters) assert all(report2.values == report1.values) diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 434f43c85..0d66c7225 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -15,9 +15,9 @@ class ReporterConfigSchema(Schema): pass -class ReporterInputSchema(Schema): +class ReporterParametersSchema(Schema): """ - This schema is used to validate the inputs to the method `compute` of + This schema is used to validate the parameters to the method `compute` of the Reporter class. Inherit from this class to extend this schema with your own parameters. """ diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index 062069bd9..4c3a97e9e 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -7,7 +7,7 @@ from flexmeasures.data.schemas import AwareDateTimeField, DurationField from flexmeasures.data.schemas.reporting import ( ReporterConfigSchema, - ReporterInputSchema, + ReporterParametersSchema, ) from timely_beliefs import BeliefsDataFrame @@ -153,7 +153,7 @@ def validate_chaining(self, data, **kwargs): ) -class PandasReporterInputSchema(ReporterInputSchema): +class PandasReporterParametersSchema(ReporterParametersSchema): # make start and end optional, conditional on providing the time parameters # for the single sensors in `input_variables` start = AwareDateTimeField(required=False) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 11ec68ca3..6354bbd5e 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -1,6 +1,6 @@ from flexmeasures.data.schemas.reporting.pandas_reporter import ( PandasReporterConfigSchema, - PandasReporterInputSchema, + PandasReporterParametersSchema, ) from marshmallow.exceptions import ValidationError @@ -75,7 +75,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se @pytest.mark.parametrize( - "input, is_valid", + "parameters, is_valid", [ ( { @@ -104,7 +104,7 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se { "sensor": 2, # sensor to save the output to "input_variables": { - "sensor_1_df": { # alias, i.e. variable name of the DataFrame containing the input data + "sensor_1_df": { # alias, i.e. variable name of the DataFrame containing the parameters data "sensor": 1, "event_starts_after": "2023-06-07T00:00:00+02:00", "event_ends_before": "2023-06-07T00:00:00+02:00", @@ -115,12 +115,14 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se ), ], ) -def test_pandas_reporter_input_schema(input, is_valid, db, app, setup_dummy_sensors): +def test_pandas_reporter_parameters_schema( + parameters, is_valid, db, app, setup_dummy_sensors +): - schema = PandasReporterInputSchema() + schema = PandasReporterParametersSchema() if is_valid: - schema.load(input) + schema.load(parameters) else: with pytest.raises(ValidationError): - schema.load(input) + schema.load(parameters) From c2e37a9d546f8320f5330af3712c7871a02f9594 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 3 Aug 2023 11:52:45 +0200 Subject: [PATCH 62/70] remove unnecessary import Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/tests/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flexmeasures/data/tests/conftest.py b/flexmeasures/data/tests/conftest.py index fbd35676f..1650817de 100644 --- a/flexmeasures/data/tests/conftest.py +++ b/flexmeasures/data/tests/conftest.py @@ -10,7 +10,6 @@ from statsmodels.api import OLS import timely_beliefs as tb from flexmeasures.data.models.reporting import Reporter -from flexmeasures import User from flexmeasures.data.models.annotations import Annotation from flexmeasures.data.models.data_sources import DataSource From cfc554e972fbc7edc48cd3a56ed2535b40423e6d Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 3 Aug 2023 14:29:27 +0200 Subject: [PATCH 63/70] add save_config attribute Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/models/data_sources.py | 24 +++++++++++++---- flexmeasures/data/tests/test_data_source.py | 30 +++++++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 57a0a31fd..35f2c2865 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -25,8 +25,9 @@ class DataGenerator: _parameters_schema: Schema | None = None _config_schema: Schema | None = None + _save_config: bool = True - def __init__(self, config: dict | None = None, **kwargs) -> None: + def __init__(self, config: dict | None = None, save_config=True, **kwargs) -> None: """Base class for the Schedulers, Reporters and Forecasters. The configuration `config` stores static parameters, parameters that, if @@ -34,7 +35,6 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: the start date, can go into the `parameters`. See docstring of the method `DataGenerator.compute` for more details. - Create a new DataGenerator with a certain configuration. There are two alternatives to define the parameters: @@ -43,6 +43,9 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: The configuration is validated using the schema `_config_schema`, to be defined by the subclass. + `config` cannot contain the key `config` at its top level, otherwise it could conflict with the constructor keyword argument `config` + when passing the config as deserialized attributes. + Example: The configuration requires two parameters for the PV and consumption sensors. @@ -62,8 +65,11 @@ def __init__(self, config: dict | None = None, **kwargs) -> None: :param config: serialized `config` parameters, defaults to None + :param save_config: whether to save the config into the data source attributes """ + self._save_config = save_config + if config is None: self._config = kwargs DataGenerator.validate_deserialized(self._config, self._config_schema) @@ -80,6 +86,9 @@ def compute(self, parameters: dict | None = None, **kwargs): changed, DO NOT trigger the creation of a new DataSource. Static parameters, such as the topology of an energy system, can go into `config`. + `parameters` cannot contain the key `parameters` at its top level, otherwise it could conflict with keyword argument `parameters` + of the method compute when passing the `parameters` as deserialized attributes. + :param parameters: serialized `parameters` parameters, defaults to None """ if parameters is None: @@ -123,9 +132,14 @@ def data_source(self) -> "DataSource": if self._data_source is None: data_source_info = self.get_data_source_info() - data_source_info["attributes"] = { - "data_generator": {"config": self._config_schema.dump(self._config)} - } + + attributes = {} + if self._save_config: + attributes = { + "data_generator": {"config": self._config_schema.dump(self._config)} + } + + data_source_info["attributes"] = attributes self._data_source = get_or_create_source(**data_source_info) diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index f31c680be..160a12c1f 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -58,3 +58,33 @@ def test_data_source(db, app, aggregator_reporter_data_source): ds4 = ds3.data_source.data_generator assert ds4._config == ds3._config + + +def test_data_generator_save_config( + db, app, aggregator_reporter_data_source, add_nearby_weather_sensors +): + TestTeporter = app.data_generators["reporter"].get("TestReporter") + + reporter_sensor = add_nearby_weather_sensors.get("farther_temperature") + + reporter = TestTeporter(config={"a": "1"}) + + res = reporter.compute( + sensor=reporter_sensor, + start=datetime(2023, 1, 1, tzinfo=UTC), + end=datetime(2023, 1, 2, tzinfo=UTC), + ) + + assert res.lineage.sources[0].attributes.get("data_generator").get("config") == { + "a": "1" + } + + reporter = TestTeporter(config={"a": "1"}, save_config=False) + + res = reporter.compute( + sensor=reporter_sensor, + start=datetime(2023, 1, 1, tzinfo=UTC), + end=datetime(2023, 1, 2, tzinfo=UTC), + ) + + assert len(res.lineage.sources[0].attributes) == 0 From b3899517a04bb2b04a2ed614cb433c1bc32276a8 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 3 Aug 2023 16:28:55 +0200 Subject: [PATCH 64/70] remove leftover comment Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/tests/conftest.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/flexmeasures/data/tests/conftest.py b/flexmeasures/data/tests/conftest.py index 1650817de..4cd7b1bf5 100644 --- a/flexmeasures/data/tests/conftest.py +++ b/flexmeasures/data/tests/conftest.py @@ -183,9 +183,6 @@ def setup_annotations( @pytest.fixture(scope="module") def aggregator_reporter_data_source(app, db, add_nearby_weather_sensors): - - # sensor = add_nearby_weather_sensors.get("temperature") - class TestReporterConfigSchema(Schema): a = fields.Str() From 59de8ea67bf5c56787003414a889157704cef0c3 Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 3 Aug 2023 16:34:42 +0200 Subject: [PATCH 65/70] add inline comments Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/tests/conftest.py | 2 +- flexmeasures/data/tests/test_data_source.py | 36 +++++++++++---------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/flexmeasures/data/tests/conftest.py b/flexmeasures/data/tests/conftest.py index 4cd7b1bf5..ef334f52c 100644 --- a/flexmeasures/data/tests/conftest.py +++ b/flexmeasures/data/tests/conftest.py @@ -182,7 +182,7 @@ def setup_annotations( @pytest.fixture(scope="module") -def aggregator_reporter_data_source(app, db, add_nearby_weather_sensors): +def test_reporter(app, db, add_nearby_weather_sensors): class TestReporterConfigSchema(Schema): a = fields.Str() diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index 160a12c1f..b57f126bc 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -33,36 +33,38 @@ def test_get_reporter_from_source( ) -def test_data_source(db, app, aggregator_reporter_data_source): +def test_data_source(db, app, test_reporter): + # get TestReporter class from the data_generators registry TestTeporter = app.data_generators["reporter"].get("TestReporter") - ds1 = TestTeporter(config={"a": "1"}) + reporter1 = TestTeporter(config={"a": "1"}) - db.session.add(ds1.data_source) - db.session.commit() + db.session.add(reporter1.data_source) - ds2 = TestTeporter(config={"a": "1"}) + reporter2 = TestTeporter(config={"a": "1"}) - assert ds1.data_source == ds2.data_source - assert ds1.data_source.attributes.get("data_generator").get( + # reporter1 and reporter2 have the same data_source because they share the same config + assert reporter1.data_source == reporter2.data_source + assert reporter1.data_source.attributes.get("data_generator").get( "config" - ) == ds2.data_source.attributes.get("data_generator").get("config") + ) == reporter2.data_source.attributes.get("data_generator").get("config") - ds3 = TestTeporter(config={"a": "2"}) + reporter3 = TestTeporter(config={"a": "2"}) - assert ds3.data_source != ds2.data_source - assert ds3.data_source.attributes.get("data_generator").get( + # reporter3 and reporter2 have different data sources because they have different config values + assert reporter3.data_source != reporter2.data_source + assert reporter3.data_source.attributes.get("data_generator").get( "config" - ) != ds2.data_source.attributes.get("data_generator").get("config") + ) != reporter2.data_source.attributes.get("data_generator").get("config") - ds4 = ds3.data_source.data_generator + # recreate reporter3 from its data source + reporter4 = reporter3.data_source.data_generator - assert ds4._config == ds3._config + # check that reporter3 and reporter4 share the same config values + assert reporter4._config == reporter3._config -def test_data_generator_save_config( - db, app, aggregator_reporter_data_source, add_nearby_weather_sensors -): +def test_data_generator_save_config(db, app, test_reporter, add_nearby_weather_sensors): TestTeporter = app.data_generators["reporter"].get("TestReporter") reporter_sensor = add_nearby_weather_sensors.get("farther_temperature") From 8470fed5bfbab078d54b4759820c6bc59cb9e94f Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 3 Aug 2023 16:49:51 +0200 Subject: [PATCH 66/70] deprecation message for app.reoprters and app.schedulers Signed-off-by: Victor Garcia Reolid --- flexmeasures/app.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/flexmeasures/app.py b/flexmeasures/app.py index b1aefb475..64e745ff6 100644 --- a/flexmeasures/app.py +++ b/flexmeasures/app.py @@ -127,15 +127,24 @@ def create( # noqa C901 reporters = get_classes_module("flexmeasures.data.models", reporting.Reporter) schedulers = get_classes_module("flexmeasures.data.models", planning.Scheduler) - app.reporters = reporters - app.schedulers = schedulers - app.data_generators = dict() app.data_generators["reporter"] = copy( reporters ) # use copy to avoid mutating app.reporters app.data_generators["scheduler"] = schedulers + # deprecation of app.reporters + app.reporters = reporters + app.schedulers = schedulers + + def get_reporters(): + app.logger.warning( + '`app.reporters` is deprecated. Use `app.data_generators["reporter"]` instead.' + ) + return app.data_generators["reporter"] + + setattr(app, "reporters", property(get_reporters)) + # add auth policy from flexmeasures.auth import register_at as register_auth_at From a55e661bceec777a69d3ab68a637c496a91de0cd Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 3 Aug 2023 16:50:49 +0200 Subject: [PATCH 67/70] fix typo Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/tests/test_data_source.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index b57f126bc..bcae33f8d 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -35,13 +35,13 @@ def test_get_reporter_from_source( def test_data_source(db, app, test_reporter): # get TestReporter class from the data_generators registry - TestTeporter = app.data_generators["reporter"].get("TestReporter") + TestReporter = app.data_generators["reporter"].get("TestReporter") - reporter1 = TestTeporter(config={"a": "1"}) + reporter1 = TestReporter(config={"a": "1"}) db.session.add(reporter1.data_source) - reporter2 = TestTeporter(config={"a": "1"}) + reporter2 = TestReporter(config={"a": "1"}) # reporter1 and reporter2 have the same data_source because they share the same config assert reporter1.data_source == reporter2.data_source @@ -49,7 +49,7 @@ def test_data_source(db, app, test_reporter): "config" ) == reporter2.data_source.attributes.get("data_generator").get("config") - reporter3 = TestTeporter(config={"a": "2"}) + reporter3 = TestReporter(config={"a": "2"}) # reporter3 and reporter2 have different data sources because they have different config values assert reporter3.data_source != reporter2.data_source @@ -65,11 +65,11 @@ def test_data_source(db, app, test_reporter): def test_data_generator_save_config(db, app, test_reporter, add_nearby_weather_sensors): - TestTeporter = app.data_generators["reporter"].get("TestReporter") + TestReporter = app.data_generators["reporter"].get("TestReporter") reporter_sensor = add_nearby_weather_sensors.get("farther_temperature") - reporter = TestTeporter(config={"a": "1"}) + reporter = TestReporter(config={"a": "1"}) res = reporter.compute( sensor=reporter_sensor, @@ -81,7 +81,7 @@ def test_data_generator_save_config(db, app, test_reporter, add_nearby_weather_s "a": "1" } - reporter = TestTeporter(config={"a": "1"}, save_config=False) + reporter = TestReporter(config={"a": "1"}, save_config=False) res = reporter.compute( sensor=reporter_sensor, From 1ed7e5e3f98d648358dda6e3abb7f0d94528f3fc Mon Sep 17 00:00:00 2001 From: Victor Garcia Reolid Date: Thu, 3 Aug 2023 16:59:27 +0200 Subject: [PATCH 68/70] use data source created by the data generator Signed-off-by: Victor Garcia Reolid --- flexmeasures/data/tests/conftest.py | 9 +++------ flexmeasures/data/tests/test_data_source.py | 6 ++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/flexmeasures/data/tests/conftest.py b/flexmeasures/data/tests/conftest.py index ef334f52c..88b1688c8 100644 --- a/flexmeasures/data/tests/conftest.py +++ b/flexmeasures/data/tests/conftest.py @@ -209,12 +209,9 @@ def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: config = dict(a="b") - ds = DataSource( - name="Test", - model="TestReporter", - type="reporter", - attributes=dict(data_generator=dict(config=config)), - ) + ds = TestReporter(config=config).data_source + + assert ds.name == app.config.get("FLEXMEASURES_DEFAULT_DATASOURCE") db.session.add(ds) db.session.commit() diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index bcae33f8d..1233fe122 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -6,11 +6,9 @@ from pytz import UTC -def test_get_reporter_from_source( - db, app, aggregator_reporter_data_source, add_nearby_weather_sensors -): +def test_get_reporter_from_source(db, app, test_reporter, add_nearby_weather_sensors): - reporter = aggregator_reporter_data_source.data_generator + reporter = test_reporter.data_generator reporter_sensor = add_nearby_weather_sensors.get("farther_temperature") From 1e8704e84ba1fcc3fe12caa122855ab79c4d129a Mon Sep 17 00:00:00 2001 From: Victor Date: Thu, 3 Aug 2023 23:47:32 +0200 Subject: [PATCH 69/70] feat: support YAML in `flexmeasures add report` command (#752) * feat: add pyyaml to the requirements Signed-off-by: Victor Garcia Reolid * feat: support YAML and add report_config Signed-off-by: Victor Garcia Reolid * fix: move `types-PyYAML` dependency to the right place Signed-off-by: Victor Garcia Reolid * fix: use a DataGenerator with defined schemas Signed-off-by: Victor Garcia Reolid * fix: adapt tests of the schemas Signed-off-by: Victor Garcia Reolid * feat: add option to open default editor Signed-off-by: Victor Garcia Reolid * fix: move sensor to input Signed-off-by: Victor Garcia Reolid * fix: parse resolution properly Signed-off-by: Victor Garcia Reolid * fix: remove accidentally commited file Signed-off-by: Victor Garcia Reolid * fix: avoid potential bug Signed-off-by: Victor Garcia Reolid * rename input to parameters Signed-off-by: Victor Garcia Reolid * add chagelog entry Signed-off-by: Victor Garcia Reolid * add pyyaml to app.txt Signed-off-by: Victor Garcia Reolid * add --save-config to the add_report command Signed-off-by: Victor Garcia Reolid * improve changelog files Signed-off-by: Victor Garcia Reolid --------- Signed-off-by: Victor Garcia Reolid Signed-off-by: Victor --- ci/run_mypy.sh | 2 +- documentation/changelog.rst | 2 + documentation/cli/change_log.rst | 1 + flexmeasures/cli/data_add.py | 110 ++++++++++++++++++++---- flexmeasures/cli/tests/test_data_add.py | 26 ++++-- requirements/app.in | 1 + requirements/app.txt | 2 + 7 files changed, 120 insertions(+), 24 deletions(-) diff --git a/ci/run_mypy.sh b/ci/run_mypy.sh index 3b29de77b..88d5bd181 100755 --- a/ci/run_mypy.sh +++ b/ci/run_mypy.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e pip install --upgrade 'mypy>=0.902' -pip install types-pytz types-requests types-Flask types-click types-redis types-tzlocal types-python-dateutil types-setuptools types-tabulate +pip install types-pytz types-requests types-Flask types-click types-redis types-tzlocal types-python-dateutil types-setuptools types-tabulate types-PyYAML # We are checking python files which have type hints, and leave out bigger issues we made issues for # * data/scripts: We'll remove legacy code: https://trello.com/c/1wEnHOkK/7-remove-custom-data-scripts # * data/models and data/services: https://trello.com/c/rGxZ9h2H/540-makequery-call-signature-is-incoherent diff --git a/documentation/changelog.rst b/documentation/changelog.rst index 2dd8a64cb..d9982e9d8 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -25,6 +25,8 @@ New features * Added API endpoints `/sensors/` for fetching a single sensor, `/sensors` (POST) for adding a sensor, `/sensors/` (PATCH) for updating a sensor and `/sensors/` (DELETE) for deleting a sensor. [see `PR #759 `_] and [see `PR #767 `_] and [see `PR #773 `_] and [see `PR #784 `_] * The CLI now allows to set lists and dicts as asset & sensor attributes (formerly only single values) [see `PR #762 `_] * Add `ProcessScheduler` class to optimize the starting time of processes one of the policies developed (INFLEXIBLE, SHIFTABLE and BREAKABLE), accessible via the CLI command `flexmeasures add schedule for-process` [see `PR #729 `_ and `PR #768 `_] +* Users will be able to see (e.g. in the UI) exactly which reporter created the report (saved as sensor data), and hosts will be able to identify exactly which configuration was used to create a given report [see `PR #751 `_] +* The CLI `flexmeasures add report` now allows passing `config` and `parameters` in YAML format as files or editable via the system's default editor [see `PR #752 `_] Bugfixes ----------- diff --git a/documentation/cli/change_log.rst b/documentation/cli/change_log.rst index 2d5112445..7ea243638 100644 --- a/documentation/cli/change_log.rst +++ b/documentation/cli/change_log.rst @@ -9,6 +9,7 @@ since v0.15.0 | July XX, 2023 * Allow deleting multiple sensors with a single call to ``flexmeasures delete sensor`` by passing the ``--id`` option multiple times. * Add ``flexmeasures add schedule for-process`` to create a new process schedule for a given power sensor. +* Add support for describing ``config`` and ``parameters`` in YAML for the command ``flexmeasures add report``, editable in user's code editor using the flags ``--edit-config`` or ``--edit-parameters``. * Add ``--kind process`` option to create the asset and sensors for the ``ProcessScheduler`` tutorial. since v0.14.1 | June XX, 2023 diff --git a/flexmeasures/cli/data_add.py b/flexmeasures/cli/data_add.py index cc60491cf..4fe509f45 100755 --- a/flexmeasures/cli/data_add.py +++ b/flexmeasures/cli/data_add.py @@ -8,6 +8,7 @@ from typing import Type, List import isodate import json +import yaml from pathlib import Path from io import TextIOBase @@ -1308,16 +1309,23 @@ def add_schedule_process( "--sensor-id", "sensor", type=SensorIdField(), - required=True, - help="Sensor used to save the report. Follow up with the sensor's ID. " + required=False, + help="Sensor used to save the report. Follow up with the sensor's ID. Can be defined in the parameters file, as well" " If needed, use `flexmeasures add sensor` to create a new sensor first.", ) @click.option( - "--reporter-config", - "reporter_config", - required=True, + "--config", + "config_file", + required=False, type=click.File("r"), - help="Path to the JSON file with the reporter configuration.", + help="Path to the JSON or YAML file with the configuration of the reporter.", +) +@click.option( + "--parameters", + "parameters_file", + required=False, + type=click.File("r"), + help="Path to the JSON or YAML file with the report parameters (passed to the compute step).", ) @click.option( "--reporter", @@ -1382,10 +1390,29 @@ def add_schedule_process( is_flag=True, help="Add this flag to avoid saving the results to the database.", ) +@click.option( + "--edit-config", + "edit_config", + is_flag=True, + help="Add this flag to edit the configuration of the Reporter in your default text editor (e.g. nano).", +) +@click.option( + "--edit-parameters", + "edit_parameters", + is_flag=True, + help="Add this flag to edit the parameters passed to the Reporter in your default text editor (e.g. nano).", +) +@click.option( + "--save-config", + "save_config", + is_flag=True, + help="Add this flag to save the `config` in the attributes of the DataSource for future reference.", +) def add_report( # noqa: C901 reporter_class: str, - sensor: Sensor, - reporter_config: TextIOBase, + sensor: Sensor | None = None, + config_file: TextIOBase | None = None, + parameters_file: TextIOBase | None = None, start: datetime | None = None, end: datetime | None = None, start_offset: str | None = None, @@ -1393,6 +1420,9 @@ def add_report( # noqa: C901 resolution: timedelta | None = None, output_file: Path | None = None, dry_run: bool = False, + edit_config: bool = False, + edit_parameters: bool = False, + save_config: bool = False, timezone: str | None = None, ): """ @@ -1400,9 +1430,40 @@ def add_report( # noqa: C901 to the database or export them as CSV or Excel file. """ + config = dict() + + if config_file: + config = yaml.safe_load(config_file) + + if edit_config: + config = launch_editor("/tmp/config.yml") + + parameters = dict() + + if parameters_file: + parameters = yaml.safe_load(parameters_file) + + if edit_parameters: + parameters = launch_editor("/tmp/parameters.yml") + + if sensor is not None: + parameters["sensor"] = sensor.id + + # check if sensor is not provided either in the parameters or the CLI + # click parameter + if parameters.get("sensor") is None: + click.secho( + "Report sensor needs to be defined, either on the `parameters` file or trough the --sensor CLI parameter...", + **MsgStyle.ERROR, + ) + raise click.Abort() + + sensor = Sensor.query.get(parameters.get("sensor")) + # compute now in the timezone local to the output sensor if timezone is not None: check_timezone(timezone) + now = pytz.timezone( zone=timezone if timezone is not None else sensor.timezone ).localize(datetime.now()) @@ -1457,7 +1518,9 @@ def add_report( # noqa: C901 ) # get reporter class - ReporterClass: Type[Reporter] = app.reporters.get(reporter_class) + ReporterClass: Type[Reporter] = app.data_generators.get("reporter").get( + reporter_class + ) # check if it exists if ReporterClass is None: @@ -1469,19 +1532,20 @@ def add_report( # noqa: C901 click.secho(f"Reporter {reporter_class} found.", **MsgStyle.SUCCESS) - reporter_config_raw = json.load(reporter_config) - # initialize reporter class with the reporter sensor and reporter config - reporter: Reporter = ReporterClass( - sensor=sensor, reporter_config_raw=reporter_config_raw - ) + reporter: Reporter = ReporterClass(config=config, save_config=save_config) click.echo("Report computation is running...") + if ("start" not in parameters) and (start is not None): + parameters["start"] = start.isoformat() + if ("end" not in parameters) and (end is not None): + parameters["end"] = end.isoformat() + if ("resolution" not in parameters) and (resolution is not None): + parameters["resolution"] = pd.Timedelta(resolution).isoformat() + # compute the report - result: BeliefsDataFrame = reporter.compute( - start=start, end=end, input_resolution=resolution - ) + result: BeliefsDataFrame = reporter.compute(parameters=parameters) if not result.empty: click.secho("Report computation done.", **MsgStyle.SUCCESS) @@ -1535,6 +1599,18 @@ def add_report( # noqa: C901 ) +def launch_editor(filename: str) -> dict: + """Launch editor to create/edit a json object""" + click.edit("{\n}", filename=filename) + + with open(filename, "r") as f: + content = yaml.safe_load(f) + if content is None: + return dict() + + return content + + @fm_add_data.command("toy-account") @with_appcontext @click.option( diff --git a/flexmeasures/cli/tests/test_data_add.py b/flexmeasures/cli/tests/test_data_add.py index 5c174bdb7..5c07f0224 100644 --- a/flexmeasures/cli/tests/test_data_add.py +++ b/flexmeasures/cli/tests/test_data_add.py @@ -1,5 +1,6 @@ import pytest import json +import yaml import os @@ -121,22 +122,32 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): runner = app.test_cli_runner() cli_input_params = { - "sensor-id": report_sensor_id, - "reporter-config": "reporter_config.json", + "config": "reporter_config.yaml", + "parameters": "parameters.json", "reporter": "PandasReporter", "start": "2023-04-10T00:00:00 00:00", "end": "2023-04-10T10:00:00 00:00", "output-file": "test.csv", } + parameters = dict( + input_variables=dict( + sensor_1=dict(sensor=sensor1.id), sensor_2=dict(sensor=sensor2.id) + ), + sensor=report_sensor_id, + ) + cli_input = to_flags(cli_input_params) # run test in an isolated file system with runner.isolated_filesystem(): # save reporter_config to a json file - with open("reporter_config.json", "w") as f: - json.dump(reporter_config, f) + with open("reporter_config.yaml", "w") as f: + yaml.dump(reporter_config, f) + + with open("parameters.json", "w") as f: + json.dump(parameters, f) # call command result = runner.invoke(add_report, cli_input) @@ -175,8 +186,8 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): previous_command_end = cli_input_params.get("end").replace(" ", "+") cli_input_params = { - "sensor-id": report_sensor_id, - "reporter-config": "reporter_config.json", + "config": "reporter_config.json", + "parameters": "parameters.json", "reporter": "PandasReporter", "output-file": "test.csv", "timezone": "UTC", @@ -190,6 +201,9 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): with open("reporter_config.json", "w") as f: json.dump(reporter_config, f) + with open("parameters.json", "w") as f: + json.dump(parameters, f) + # call command result = runner.invoke(add_report, cli_input) diff --git a/requirements/app.in b/requirements/app.in index b26e2a80a..0babfb74b 100644 --- a/requirements/app.in +++ b/requirements/app.in @@ -1,4 +1,5 @@ # see ui/utils/plotting_utils: separate_legend() and create_hover_tool() +pyyaml altair colour pscript diff --git a/requirements/app.txt b/requirements/app.txt index 340a23f4d..53917b19d 100644 --- a/requirements/app.txt +++ b/requirements/app.txt @@ -253,6 +253,8 @@ pytz==2023.3 # pandas # timely-beliefs # timetomodel +pyyaml==6.0.1 + # via -r requirements/app.in redis==4.6.0 # via # -r requirements/app.in From 0a38d76f1caa6f2201df3c29ec56cfa4003dc51a Mon Sep 17 00:00:00 2001 From: Victor Date: Mon, 7 Aug 2023 09:47:12 +0200 Subject: [PATCH 70/70] Polish `PandasReporter` schemas (#788) * add required_input and required_output to PandasReporterConfigSchema and input & outupt to parameters Signed-off-by: Victor Garcia Reolid * adapt tests Signed-off-by: Victor Garcia Reolid * implement multiple output and simplify tibber reporter Signed-off-by: Victor Garcia Reolid * fix example in the docstring Signed-off-by: Victor Garcia Reolid * remove max=1 constraint Signed-off-by: Victor Garcia Reolid * add example for _clean_parameters Signed-off-by: Victor Garcia Reolid * remove time parameters in input (_clean_parameters method) Signed-off-by: Victor Garcia Reolid * remove filed added accidentally Signed-off-by: Victor Garcia Reolid * improve assert Signed-off-by: Victor Garcia Reolid * update changelog entry Signed-off-by: Victor Garcia Reolid * fix changelog Signed-off-by: Victor Garcia Reolid * Adapt `test_add_report` to use the new field of the `PandasReporter` schema (#789) * fix: typo Signed-off-by: F.N. Claessen * fix: fetch output sensor only from parameters dict Signed-off-by: F.N. Claessen * adapt the CLI to deal with multiple output Signed-off-by: Victor Garcia Reolid * fix typos Signed-off-by: Victor Garcia Reolid --------- Signed-off-by: F.N. Claessen Signed-off-by: Victor Garcia Reolid Co-authored-by: F.N. Claessen --------- Signed-off-by: Victor Garcia Reolid Signed-off-by: F.N. Claessen Co-authored-by: F.N. Claessen --- documentation/changelog.rst | 4 +- flexmeasures/cli/data_add.py | 148 ++++++++++-------- flexmeasures/cli/tests/conftest.py | 37 +---- flexmeasures/cli/tests/test_data_add.py | 140 +++++++++++++++-- flexmeasures/data/models/data_sources.py | 91 ++++++++--- .../data/models/reporting/__init__.py | 66 +++++--- .../data/models/reporting/aggregator.py | 34 ++-- .../data/models/reporting/pandas_reporter.py | 122 +++++++++------ .../models/reporting/tests/test_aggregator.py | 84 ++++++---- .../reporting/tests/test_pandas_reporter.py | 62 ++++---- .../reporting/tests/test_tibber_reporter.py | 122 ++++++--------- flexmeasures/data/schemas/io.py | 52 ++++++ .../data/schemas/reporting/__init__.py | 11 +- .../data/schemas/reporting/aggregation.py | 74 ++++----- .../data/schemas/reporting/pandas_reporter.py | 135 +++++++--------- .../data/schemas/tests/test_reporting.py | 51 +++--- flexmeasures/data/tests/conftest.py | 14 +- flexmeasures/data/tests/test_data_source.py | 69 +++++++- 18 files changed, 807 insertions(+), 509 deletions(-) create mode 100644 flexmeasures/data/schemas/io.py diff --git a/documentation/changelog.rst b/documentation/changelog.rst index d9982e9d8..581a84610 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -25,8 +25,8 @@ New features * Added API endpoints `/sensors/` for fetching a single sensor, `/sensors` (POST) for adding a sensor, `/sensors/` (PATCH) for updating a sensor and `/sensors/` (DELETE) for deleting a sensor. [see `PR #759 `_] and [see `PR #767 `_] and [see `PR #773 `_] and [see `PR #784 `_] * The CLI now allows to set lists and dicts as asset & sensor attributes (formerly only single values) [see `PR #762 `_] * Add `ProcessScheduler` class to optimize the starting time of processes one of the policies developed (INFLEXIBLE, SHIFTABLE and BREAKABLE), accessible via the CLI command `flexmeasures add schedule for-process` [see `PR #729 `_ and `PR #768 `_] -* Users will be able to see (e.g. in the UI) exactly which reporter created the report (saved as sensor data), and hosts will be able to identify exactly which configuration was used to create a given report [see `PR #751 `_] -* The CLI `flexmeasures add report` now allows passing `config` and `parameters` in YAML format as files or editable via the system's default editor [see `PR #752 `_] +* Users will be able to see (e.g. in the UI) exactly which reporter created the report (saved as sensor data), and hosts will be able to identify exactly which configuration was used to create a given report [see `PR #751 `_, `PR #752 `_ and `PR #788 `_] +* The CLI `flexmeasures add report` now allows passing `config` and `parameters` in YAML format as files or editable via the system's default editor [see `PR #788 `_] Bugfixes ----------- diff --git a/flexmeasures/cli/data_add.py b/flexmeasures/cli/data_add.py index 4fe509f45..9e48e5c88 100755 --- a/flexmeasures/cli/data_add.py +++ b/flexmeasures/cli/data_add.py @@ -11,6 +11,7 @@ import yaml from pathlib import Path from io import TextIOBase +from string import Template from marshmallow import validate import pandas as pd @@ -58,6 +59,7 @@ from flexmeasures.data.schemas.times import TimeIntervalSchema from flexmeasures.data.schemas.scheduling.storage import EfficiencyField from flexmeasures.data.schemas.sensors import SensorSchema +from flexmeasures.data.schemas.io import Output from flexmeasures.data.schemas.units import QuantityField from flexmeasures.data.schemas.generic_assets import ( GenericAssetSchema, @@ -1305,14 +1307,6 @@ def add_schedule_process( @fm_add_data.command("report") @with_appcontext -@click.option( - "--sensor-id", - "sensor", - type=SensorIdField(), - required=False, - help="Sensor used to save the report. Follow up with the sensor's ID. Can be defined in the parameters file, as well" - " If needed, use `flexmeasures add sensor` to create a new sensor first.", -) @click.option( "--config", "config_file", @@ -1372,17 +1366,20 @@ def add_schedule_process( ) @click.option( "--output-file", - "output_file", + "output_file_pattern", required=False, type=click.Path(), - help="Path to save the report to file. Will override any previous file contents." - " Use the `.csv` suffix to save the results as Comma Separated Values and `.xlsx` to export them as Excel sheets.", + help="Format of the output file. Use dollar sign ($) to interpolate values among the following ones:" + " now (current time), name (name of the output), sensor_id (id of the sensor), column (column of the output)." + " Example: 'result_file_$name_$now.csv'. " + "Use the `.csv` suffix to save the results as Comma Separated Values and `.xlsx` to export them as Excel sheets.", ) @click.option( "--timezone", "timezone", required=False, - help="Timezone as string, e.g. 'UTC' or 'Europe/Amsterdam' (defaults to the timezone of the sensor used to save the report).", + help="Timezone as string, e.g. 'UTC' or 'Europe/Amsterdam' (defaults to the timezone of the sensor used to save the report)." + "The timezone of the first output sensor (specified in the parameters) is taken as a default.", ) @click.option( "--dry-run", @@ -1410,7 +1407,6 @@ def add_schedule_process( ) def add_report( # noqa: C901 reporter_class: str, - sensor: Sensor | None = None, config_file: TextIOBase | None = None, parameters_file: TextIOBase | None = None, start: datetime | None = None, @@ -1418,7 +1414,7 @@ def add_report( # noqa: C901 start_offset: str | None = None, end_offset: str | None = None, resolution: timedelta | None = None, - output_file: Path | None = None, + output_file_pattern: Path | None = None, dry_run: bool = False, edit_config: bool = False, edit_parameters: bool = False, @@ -1446,26 +1442,22 @@ def add_report( # noqa: C901 if edit_parameters: parameters = launch_editor("/tmp/parameters.yml") - if sensor is not None: - parameters["sensor"] = sensor.id - - # check if sensor is not provided either in the parameters or the CLI - # click parameter - if parameters.get("sensor") is None: + # check if sensor is not provided in the `parameters` description + if "output" not in parameters or len(parameters["output"]) == 0: click.secho( - "Report sensor needs to be defined, either on the `parameters` file or trough the --sensor CLI parameter...", + "At least one output sensor needs to be specified in the parameters description.", **MsgStyle.ERROR, ) raise click.Abort() - sensor = Sensor.query.get(parameters.get("sensor")) + output = [Output().load(o) for o in parameters["output"]] # compute now in the timezone local to the output sensor if timezone is not None: check_timezone(timezone) now = pytz.timezone( - zone=timezone if timezone is not None else sensor.timezone + zone=timezone if timezone is not None else output[0]["sensor"].timezone ).localize(datetime.now()) # apply offsets, if provided @@ -1486,9 +1478,11 @@ def add_report( # noqa: C901 " Trying to use the latest datapoint of the report sensor as the start time...", **MsgStyle.WARN, ) + + # todo: get the oldest last_value among all the sensors last_value_datetime = ( db.session.query(func.max(TimedBelief.event_start)) - .filter(TimedBelief.sensor_id == sensor.id) + .filter(TimedBelief.sensor_id == output[0]["sensor"].id) .one_or_none() ) @@ -1497,7 +1491,8 @@ def add_report( # noqa: C901 start = last_value_datetime[0] else: click.secho( - f"Could not find any data for the report sensor {sensor}.", + "Could not find any data for the output sensors provided. Such data is needed to compute" + " a sensible default start for the report, so setting a start explicitly would resolve this issue.", **MsgStyle.ERROR, ) raise click.Abort() @@ -1545,58 +1540,77 @@ def add_report( # noqa: C901 parameters["resolution"] = pd.Timedelta(resolution).isoformat() # compute the report - result: BeliefsDataFrame = reporter.compute(parameters=parameters) + results: BeliefsDataFrame = reporter.compute(parameters=parameters) - if not result.empty: - click.secho("Report computation done.", **MsgStyle.SUCCESS) - else: - click.secho( - "Report computation done, but the report is empty.", **MsgStyle.WARN - ) - - # save the report if it's not running in dry mode - if not dry_run: - click.echo("Saving report to the database...") - save_to_db(result.dropna()) - db.session.commit() - click.secho( - "Success. The report has been saved to the database.", - **MsgStyle.SUCCESS, - ) - else: - click.echo( - f"Not saving report to the database (because of --dry-run), but this is what I computed:\n{result}" - ) - - # if an output file path is provided, save the results - if output_file: - suffix = str(output_file).split(".")[-1] if "." in str(output_file) else "" - - if suffix == "xlsx": # save to EXCEL - result.to_excel(output_file) + for result in results: + data = result["data"] + sensor = result["sensor"] + if not data.empty: click.secho( - f"Success. The report has been exported as EXCEL to the file `{output_file}`", - **MsgStyle.SUCCESS, + f"Report computation done for sensor `{sensor}`.", **MsgStyle.SUCCESS + ) + else: + click.secho( + f"Report computation done for sensor `{sensor}`, but the report is empty.", + **MsgStyle.WARN, ) - elif suffix == "csv": # save to CSV - result.to_csv(output_file) + # save the report if it's not running in dry mode + if not dry_run: + click.echo(f"Saving report for sensor `{sensor}` to the database...") + save_to_db(data.dropna()) + db.session.commit() click.secho( - f"Success. The report has been exported as CSV to the file `{output_file}`", + f"Success. The report for sensor `{sensor}` has been saved to the database.", **MsgStyle.SUCCESS, ) + else: + click.echo( + f"Not saving report for sensor `{sensor}` to the database (because of --dry-run), but this is what I computed:\n{data}" + ) + + # if an output file path is provided, save the data + if output_file_pattern: + suffix = ( + str(output_file_pattern).split(".")[-1] + if "." in str(output_file_pattern) + else "" + ) + template = Template(str(output_file_pattern)) + + filename = template.safe_substitute( + sensor_id=result["sensor"].id, + name=result.get("name", ""), + column=result.get("column", ""), + reporter_class=reporter_class, + now=now.strftime("%Y_%m_%dT%H%M%S"), + ) + + if suffix == "xlsx": # save to EXCEL + data.to_excel(filename) + click.secho( + f"Success. The report for sensor `{sensor}` has been exported as EXCEL to the file `{filename}`", + **MsgStyle.SUCCESS, + ) - else: # default output format: CSV. + elif suffix == "csv": # save to CSV + data.to_csv(filename) + click.secho( + f"Success. The report for sensor `{sensor}` has been exported as CSV to the file `{filename}`", + **MsgStyle.SUCCESS, + ) + + else: # default output format: CSV. + click.secho( + f"File suffix not provided. Exporting results for sensor `{sensor}` as CSV to file {filename}", + **MsgStyle.WARN, + ) + data.to_csv(filename) + else: click.secho( - f"File suffix not provided. Exporting results as CSV to file {output_file}", - **MsgStyle.WARN, + "Success.", + **MsgStyle.SUCCESS, ) - result.to_csv(output_file) - else: - click.secho( - "Success.", - **MsgStyle.SUCCESS, - ) def launch_editor(filename: str) -> dict: diff --git a/flexmeasures/cli/tests/conftest.py b/flexmeasures/cli/tests/conftest.py index 41fffe116..097efbda5 100644 --- a/flexmeasures/cli/tests/conftest.py +++ b/flexmeasures/cli/tests/conftest.py @@ -63,6 +63,13 @@ def setup_dummy_data(db, app, setup_dummy_asset): ) db.session.add(report_sensor) + report_sensor_2 = Sensor( + "report sensor 2", + generic_asset=pandas_report, + event_resolution=timedelta(hours=2), + ) + db.session.add(report_sensor_2) + # Create 1 DataSources source = DataSource("source1") @@ -83,35 +90,7 @@ def setup_dummy_data(db, app, setup_dummy_asset): db.session.add_all(beliefs) db.session.commit() - yield sensor1, sensor2, report_sensor - - -@pytest.fixture(scope="module") -@pytest.mark.skip_github -def reporter_config(app, db, setup_dummy_data): - """ - This reporter_config defines the operations to add up the - values of the sensors 1 and 2 and resamples the result to a - two hour resolution. - """ - - sensor1, sensor2, report_sensor = setup_dummy_data - - reporter_config = dict( - input_variables=["sensor_1", "sensor_2"], - transformations=[ - dict( - df_input="sensor_1", - method="add", - args=["@sensor_2"], - df_output="df_agg", - ), - dict(method="resample_events", args=["2h"]), - ], - final_df_output="df_agg", - ) - - return reporter_config + yield sensor1.id, sensor2.id, report_sensor.id, report_sensor_2.id @pytest.mark.skip_github diff --git a/flexmeasures/cli/tests/test_data_add.py b/flexmeasures/cli/tests/test_data_add.py index 5c07f0224..72647d494 100644 --- a/flexmeasures/cli/tests/test_data_add.py +++ b/flexmeasures/cli/tests/test_data_add.py @@ -2,7 +2,8 @@ import json import yaml import os - +from datetime import datetime +import pytz from flexmeasures.cli.tests.utils import to_flags from flexmeasures.data.models.annotations import ( @@ -97,7 +98,7 @@ def test_cli_help(app): @pytest.mark.skip_github -def test_add_reporter(app, db, setup_dummy_data, reporter_config): +def test_add_reporter(app, db, setup_dummy_data): """ The reporter aggregates input data from two sensors (both have 200 data points) to a two-hour resolution. @@ -114,8 +115,21 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): from flexmeasures.cli.data_add import add_report - sensor1, sensor2, report_sensor = setup_dummy_data - report_sensor_id = report_sensor.id + sensor1_id, sensor2_id, report_sensor_id, _ = setup_dummy_data + + reporter_config = dict( + required_input=[{"name": "sensor_1"}, {"name": "sensor_2"}], + required_output=[{"name": "df_agg"}], + transformations=[ + dict( + df_input="sensor_1", + method="add", + args=["@sensor_2"], + df_output="df_agg", + ), + dict(method="resample_events", args=["2h"]), + ], + ) # Running the command with start and end values. @@ -131,10 +145,11 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): } parameters = dict( - input_variables=dict( - sensor_1=dict(sensor=sensor1.id), sensor_2=dict(sensor=sensor2.id) - ), - sensor=report_sensor_id, + input=[ + dict(name="sensor_1", sensor=sensor1_id), + dict(name="sensor_2", sensor=sensor2_id), + ], + output=[dict(name="df_agg", sensor=report_sensor_id)], ) cli_input = to_flags(cli_input_params) @@ -156,15 +171,14 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): assert result.exit_code == 0 # run command without errors - assert "Reporter PandasReporter found" in result.output - assert "Report computation done." in result.output - - # Check report is saved to the database - report_sensor = Sensor.query.get( report_sensor_id ) # get fresh report sensor instance + assert "Reporter PandasReporter found" in result.output + assert f"Report computation done for sensor `{report_sensor}`." in result.output + + # Check report is saved to the database stored_report = report_sensor.search_beliefs( event_starts_after=cli_input_params.get("start").replace(" ", "+"), event_ends_before=cli_input_params.get("end").replace(" ", "+"), @@ -211,14 +225,14 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): assert result.exit_code == 0 # run command without errors - assert "Reporter PandasReporter found" in result.output - assert "Report computation done." in result.output - # Check if the report is saved to the database report_sensor = Sensor.query.get( report_sensor_id ) # get fresh report sensor instance + assert "Reporter PandasReporter found" in result.output + assert f"Report computation done for sensor `{report_sensor}`." in result.output + stored_report = report_sensor.search_beliefs( event_starts_after=previous_command_end, event_ends_before=server_now(), @@ -227,6 +241,100 @@ def test_add_reporter(app, db, setup_dummy_data, reporter_config): assert len(stored_report) == 95 +@pytest.mark.skip_github +def test_add_multiple_output(app, db, setup_dummy_data): + """ """ + + from flexmeasures.cli.data_add import add_report + + sensor_1_id, sensor_2_id, report_sensor_id, report_sensor_2_id = setup_dummy_data + + reporter_config = dict( + required_input=[{"name": "sensor_1"}, {"name": "sensor_2"}], + required_output=[{"name": "df_agg"}, {"name": "df_sub"}], + transformations=[ + dict( + df_input="sensor_1", + method="add", + args=["@sensor_2"], + df_output="df_agg", + ), + dict(method="resample_events", args=["2h"]), + dict( + df_input="sensor_1", + method="subtract", + args=["@sensor_2"], + df_output="df_sub", + ), + dict(method="resample_events", args=["2h"]), + ], + ) + + # Running the command with start and end values. + + runner = app.test_cli_runner() + + cli_input_params = { + "config": "reporter_config.yaml", + "parameters": "parameters.json", + "reporter": "PandasReporter", + "start": "2023-04-10T00:00:00+00:00", + "end": "2023-04-10T10:00:00+00:00", + "output-file": "test-$name.csv", + } + + parameters = dict( + input=[ + dict(name="sensor_1", sensor=sensor_1_id), + dict(name="sensor_2", sensor=sensor_2_id), + ], + output=[ + dict(name="df_agg", sensor=report_sensor_id), + dict(name="df_sub", sensor=report_sensor_2_id), + ], + ) + + cli_input = to_flags(cli_input_params) + + # run test in an isolated file system + with runner.isolated_filesystem(): + + # save reporter_config to a json file + with open("reporter_config.yaml", "w") as f: + yaml.dump(reporter_config, f) + + with open("parameters.json", "w") as f: + json.dump(parameters, f) + + # call command + result = runner.invoke(add_report, cli_input) + + assert os.path.exists("test-df_agg.csv") + assert os.path.exists("test-df_sub.csv") + + print(result) + + assert result.exit_code == 0 # run command without errors + + report_sensor = Sensor.query.get(report_sensor_id) + report_sensor_2 = Sensor.query.get(report_sensor_2_id) + + assert "Reporter PandasReporter found" in result.output + assert f"Report computation done for sensor `{report_sensor}`." in result.output + assert ( + f"Report computation done for sensor `{report_sensor_2}`." in result.output + ) + + # check that the reports are saved + assert all( + report_sensor.search_beliefs( + event_ends_before=datetime(2023, 4, 10, 10, tzinfo=pytz.UTC) + ).values.flatten() + == [1, 5, 9, 13, 17] + ) + assert all(report_sensor_2.search_beliefs() == 0) + + @pytest.mark.skip_github @pytest.mark.parametrize("process_type", [("INFLEXIBLE"), ("SHIFTABLE"), ("BREAKABLE")]) def test_add_process(app, process_power_sensor, process_type): diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 35f2c2865..334ca6721 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, List, Dict from sqlalchemy.ext.mutable import MutableDict import timely_beliefs as tb @@ -22,18 +22,28 @@ class DataGenerator: _data_source: DataSource | None = None _config: dict = None + _parameters: dict = None _parameters_schema: Schema | None = None _config_schema: Schema | None = None _save_config: bool = True + _save_parameters: bool = False - def __init__(self, config: dict | None = None, save_config=True, **kwargs) -> None: + def __init__( + self, + config: dict | None = None, + save_config=True, + save_parameters=False, + **kwargs, + ) -> None: """Base class for the Schedulers, Reporters and Forecasters. The configuration `config` stores static parameters, parameters that, if changed, trigger the creation of a new DataSource. Dynamic parameters, such as the start date, can go into the `parameters`. See docstring of the method `DataGenerator.compute` for - more details. + more details. Nevertheless, the parameter `save_parameters` can be set to True if some `parameters` need + to be saved to the DB. In that case, the method `_clean_parameters` is called to remove any field that is not + to be persisted, e.g. time parameters which are already contained in the TimedBelief. Create a new DataGenerator with a certain configuration. There are two alternatives to define the parameters: @@ -66,22 +76,24 @@ def __init__(self, config: dict | None = None, save_config=True, **kwargs) -> No :param config: serialized `config` parameters, defaults to None :param save_config: whether to save the config into the data source attributes + :param save_parameters: whether to save the parameters into the data source attributes """ self._save_config = save_config + self._save_parameters = save_parameters - if config is None: + if config is None and len(kwargs) > 0: self._config = kwargs DataGenerator.validate_deserialized(self._config, self._config_schema) - elif self._config_schema: + elif config is not None: self._config = self._config_schema.load(config) - else: - self._config = config + elif len(kwargs) == 0: + self._config = self._config_schema.load({}) - def _compute(self, **kwargs): + def _compute(self, **kwargs) -> List[Dict[str, Any]]: raise NotImplementedError() - def compute(self, parameters: dict | None = None, **kwargs): + def compute(self, parameters: dict | None = None, **kwargs) -> List[Dict[str, Any]]: """The configuration `parameters` stores dynamic parameters, parameters that, if changed, DO NOT trigger the creation of a new DataSource. Static parameters, such as the topology of an energy system, can go into `config`. @@ -91,15 +103,18 @@ def compute(self, parameters: dict | None = None, **kwargs): :param parameters: serialized `parameters` parameters, defaults to None """ + + if self._parameters is None: + self._parameters = {} + if parameters is None: - _parameters = kwargs - DataGenerator.validate_deserialized(_parameters, self._parameters_schema) - elif self._parameters_schema: - _parameters = self._parameters_schema.load(parameters) - else: # skip validation - _parameters = parameters + self._parameters.update(self._parameters_schema.dump(kwargs)) + else: + self._parameters.update(parameters) + + self._parameters = self._parameters_schema.load(self._parameters) - return self._compute(**_parameters) + return self._compute(**self._parameters) @staticmethod def validate_deserialized(values: dict, schema: Schema) -> bool: @@ -133,11 +148,17 @@ def data_source(self) -> "DataSource": if self._data_source is None: data_source_info = self.get_data_source_info() - attributes = {} + attributes = {"data_generator": {}} + if self._save_config: - attributes = { - "data_generator": {"config": self._config_schema.dump(self._config)} - } + attributes["data_generator"]["config"] = self._config_schema.dump( + self._config + ) + + if self._save_parameters: + attributes["data_generator"]["parameters"] = self._clean_parameters( + self._parameters_schema.dump(self._parameters) + ) data_source_info["attributes"] = attributes @@ -145,6 +166,30 @@ def data_source(self) -> "DataSource": return self._data_source + def _clean_parameters(self, parameters: dict) -> dict: + """Use this function to clean up the parameters dictionary from the + fields that are not to be persisted to the DB as data source attributes (when save_parameters=True), + e.g. because they are already stored as TimedBelief properties, or otherwise. + + Example: + + An DataGenerator has the following parameters: ["start", "end", "field1", "field2"] and we want just "field1" and "field2" + to be persisted. + + Parameters provided to the `compute` method (input of the method `_clean_parameters`): + parameters = { + "start" : "2023-01-01T00:00:00+02:00", + "end" : "2023-01-02T00:00:00+02:00", + "field1" : 1, + "field2" : 2 + } + + Parameters persisted to the DB (output of the method `_clean_parameters`): + parameters = {"field1" : 1,"field2" : 2} + """ + + raise NotImplementedError() + class DataSource(db.Model, tb.BeliefSourceDBMixin): """Each data source is a data-providing entity.""" @@ -237,11 +282,15 @@ def data_generator(self): # fetch DataGenerator details data_generator_details = self.attributes.get("data_generator", {}) config = data_generator_details.get("config", {}) + parameters = data_generator_details.get("parameters", {}) - # create DataGenerator class and assign the current DataSource (self) as its source + # create DataGenerator class and add the parameters data_generator = current_app.data_generators[self.type][self.model]( config=config ) + data_generator._parameters = parameters + + # assign the current DataSource (self) as its source data_generator._data_source = self self._data_generator = data_generator diff --git a/flexmeasures/data/models/reporting/__init__.py b/flexmeasures/data/models/reporting/__init__.py index 7427e973d..12d996f9f 100644 --- a/flexmeasures/data/models/reporting/__init__.py +++ b/flexmeasures/data/models/reporting/__init__.py @@ -1,6 +1,8 @@ from __future__ import annotations -from flexmeasures.data.models.time_series import Sensor +from copy import deepcopy + +from typing import List, Dict, Any from flexmeasures.data.models.data_sources import DataGenerator from flexmeasures.data.schemas.reporting import ( @@ -8,8 +10,6 @@ ReporterConfigSchema, ) -import timely_beliefs as tb - class Reporter(DataGenerator): """Superclass for all FlexMeasures Reporters.""" @@ -18,45 +18,61 @@ class Reporter(DataGenerator): __author__ = None __data_generator_base__ = "reporter" - sensor: Sensor = None - _parameters_schema = ReporterParametersSchema() _config_schema = ReporterConfigSchema() - def _compute(self, **kwargs) -> tb.BeliefsDataFrame: + def _compute(self, **kwargs) -> List[Dict[str, Any]]: """This method triggers the creation of a new report. The same object can generate multiple reports with different start, end, resolution and belief_time values. """ - self.sensor = kwargs["sensor"] - - # Result - result: tb.BeliefsDataFrame = self._compute_report(**kwargs) - - # checking that the event_resolution of the output BeliefDataFrame is equal to the one of the output sensor - assert ( - self.sensor.event_resolution == result.event_resolution - ), f"The resolution of the results ({result.event_resolution}) should match that of the output sensor ({self.sensor.event_resolution}, ID {self.sensor.id})." + results: List[Dict[str, Any]] = self._compute_report(**kwargs) - # Assign sensor to BeliefDataFrame - result.sensor = self.sensor + for result in results: + # checking that the event_resolution of the output BeliefDataFrame is equal to the one of the output sensor + assert ( + result["sensor"].event_resolution == result["data"].event_resolution + ), f"The resolution of the results ({result['data'].event_resolution}) should match that of the output sensor ({result['sensor'].event_resolution}, ID {result['sensor'].id})." - if result.empty: - return result + # Assign sensor to BeliefDataFrame + result["data"].sensor = result["sensor"] - # update data source - result.index = result.index.set_levels( - [self.data_source] * len(result), level="source", verify_integrity=False - ) + if not result["data"].empty: + # update data source + result["data"].index = result["data"].index.set_levels( + [self.data_source] * len(result["data"]), + level="source", + verify_integrity=False, + ) - return result + return results - def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: + def _compute_report(self, **kwargs) -> List[Dict[str, Any]]: """ Overwrite with the actual computation of your report. :returns BeliefsDataFrame: report as a BeliefsDataFrame. """ raise NotImplementedError() + + def _clean_parameters(self, parameters: dict) -> dict: + _parameters = deepcopy(parameters) + fields_to_remove = ["start", "end", "resolution", "belief_time"] + + for field in fields_to_remove: + _parameters.pop(field, None) + + fields_to_remove_input = [ + "event_starts_after", + "event_ends_before", + "belief_time", + "resolution", + ] + + for _input in _parameters["input"]: + for field in fields_to_remove_input: + _input.pop(field, None) + + return _parameters diff --git a/flexmeasures/data/models/reporting/aggregator.py b/flexmeasures/data/models/reporting/aggregator.py index 9fd41601d..8fb362129 100644 --- a/flexmeasures/data/models/reporting/aggregator.py +++ b/flexmeasures/data/models/reporting/aggregator.py @@ -1,13 +1,15 @@ from __future__ import annotations from datetime import datetime, timedelta +from typing import Any, List, Dict -import timely_beliefs as tb import pandas as pd from flexmeasures.data.models.reporting import Reporter -from flexmeasures.data.schemas.reporting.aggregation import AggregatorConfigSchema -from flexmeasures.data.models.time_series import Sensor +from flexmeasures.data.schemas.reporting.aggregation import ( + AggregatorConfigSchema, + AggregatorParametersSchema, +) from flexmeasures.utils.time_utils import server_now @@ -19,18 +21,20 @@ class AggregatorReporter(Reporter): __author__ = "Seita" _config_schema = AggregatorConfigSchema() + _parameters_schema = AggregatorParametersSchema() weights: dict method: str def _compute_report( self, - sensor: Sensor, start: datetime, end: datetime, + input: List[Dict[str, Any]], + output: List[Dict[str, Any]], resolution: timedelta | None = None, belief_time: datetime | None = None, - ) -> tb.BeliefsDataFrame: + ) -> List[Dict[str, Any]]: """ This method merges all the BeliefDataFrames into a single one, dropping all indexes but event_start, and applies an aggregation function over the @@ -39,19 +43,20 @@ def _compute_report( method: str = self._config.get("method") weights: list = self._config.get("weights", {}) - data: list = self._config.get("data") dataframes = [] if belief_time is None: belief_time = server_now() - for d in data: - # if alias is not in belief_search_config, using the Sensor id instead - column_name = d.get("alias", f"sensor_{d['sensor'].id}") + for input_description in input: + # if name is not in belief_search_config, using the Sensor id instead + column_name = input_description.get( + "name", f"sensor_{input_description['sensor'].id}" + ) df = ( - d["sensor"] + input_description["sensor"] .search_beliefs( event_starts_after=start, event_ends_before=end, @@ -82,4 +87,11 @@ def _compute_report( ["belief_time", "source", "cumulative_probability"], append=True ) - return output_df + return [ + { + "name": "aggregate", + "column": "event_value", + "sensor": output[0]["sensor"], + "data": output_df, + } + ] diff --git a/flexmeasures/data/models/reporting/pandas_reporter.py b/flexmeasures/data/models/reporting/pandas_reporter.py index 635f2b01b..7c8fdaba8 100644 --- a/flexmeasures/data/models/reporting/pandas_reporter.py +++ b/flexmeasures/data/models/reporting/pandas_reporter.py @@ -1,8 +1,8 @@ from __future__ import annotations -from typing import Any, Union, Dict +from typing import Any, Union, Dict, List from datetime import datetime, timedelta -from copy import deepcopy +from copy import deepcopy, copy from flask import current_app import timely_beliefs as tb @@ -25,7 +25,7 @@ class PandasReporter(Reporter): _config_schema = PandasReporterConfigSchema() _parameters_schema = PandasReporterParametersSchema() - input_variables: list[str] = None + input: list[str] = None transformations: list[dict[str, Any]] = None final_df_output: str = None @@ -35,7 +35,7 @@ def fetch_data( self, start: datetime, end: datetime, - input_variables: dict, + input: dict, resolution: timedelta | None = None, belief_time: datetime | None = None, ): @@ -44,23 +44,27 @@ def fetch_data( """ self.data = {} - for alias, tb_query in input_variables.items(): - _tb_query = tb_query.copy() + for input_search_parameters in input: + _input_search_parameters = input_search_parameters.copy() - # using start / end instead of event_starts_after/event_ends_before when not defined - event_starts_after = _tb_query.pop("event_starts_after", start) - event_ends_before = _tb_query.pop("event_ends_before", end) - resolution = _tb_query.pop("resolution", resolution) - belief_time = _tb_query.pop("belief_time", belief_time) + sensor: Sensor = _input_search_parameters.pop("sensor", None) + + name = _input_search_parameters.pop("name", f"sensor_{sensor.id}") - sensor: Sensor = _tb_query.pop("sensor", None) + # using start / end instead of event_starts_after/event_ends_before when not defined + event_starts_after = _input_search_parameters.pop( + "event_starts_after", start + ) + event_ends_before = _input_search_parameters.pop("event_ends_before", end) + resolution = _input_search_parameters.pop("resolution", resolution) + belief_time = _input_search_parameters.pop("belief_time", belief_time) bdf = sensor.search_beliefs( event_starts_after=event_starts_after, event_ends_before=event_ends_before, resolution=resolution, beliefs_before=belief_time, - **_tb_query, + **_input_search_parameters, ) # store data source as local variable @@ -68,9 +72,9 @@ def fetch_data( self.data[f"source_{source.id}"] = source # store BeliefsDataFrame as local variable - self.data[alias] = bdf + self.data[name] = bdf - def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: + def _compute_report(self, **kwargs) -> List[Dict[str, Any]]: """ This method applies the transformations and outputs the dataframe defined in `final_df_output` field of the report_config. @@ -79,16 +83,18 @@ def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: # report configuration start: datetime = kwargs.get("start") end: datetime = kwargs.get("end") - input_variables: dict = kwargs.get("input_variables") + input: dict = kwargs.get("input") resolution: timedelta | None = kwargs.get("resolution", None) belief_time: datetime | None = kwargs.get("belief_time", None) + output: List[Dict[str, Any]] = kwargs.get("output") + # by default, use the minimum resolution among the output sensors if resolution is None: - resolution = self.sensor.event_resolution + resolution = min([o["sensor"].event_resolution for o in output]) # fetch sensor data - self.fetch_data(start, end, input_variables, resolution, belief_time) + self.fetch_data(start, end, input, resolution, belief_time) if belief_time is None: belief_time = server_now() @@ -96,40 +102,66 @@ def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: # apply pandas transformations to the dataframes in `self.data` self._apply_transformations() - final_output = self.data[self._config.get("final_df_output")] + results = [] - if isinstance(final_output, tb.BeliefsDataFrame): + for output_description in output: + result = copy(output_description) - # filing the missing indexes with default values: - # belief_time=belief_time, cummulative_probability=0.5, source=data_source - if "belief_time" not in final_output.index.names: - final_output["belief_time"] = [belief_time] * len(final_output) - final_output = final_output.set_index("belief_time", append=True) + name = output_description["name"] - if "cumulative_probability" not in final_output.index.names: - final_output["cumulative_probability"] = [0.5] * len(final_output) - final_output = final_output.set_index( - "cumulative_probability", append=True - ) + output_data = self.data[name] - if "source" not in final_output.index.names: - final_output["source"] = [self.data_source] * len(final_output) - final_output = final_output.set_index("source", append=True) + if isinstance(output_data, tb.BeliefsDataFrame): + # if column is missing, use the first column + column = output_description.get("column", output_data.columns[0]) + output_data = output_data.rename(columns={column: "event_value"})[ + ["event_value"] + ] + output_data = self._clean_belief_dataframe(output_data, belief_time) - final_output = final_output.reorder_levels( - tb.BeliefsDataFrame().index.names - ) + elif isinstance(output_data, tb.BeliefsSeries): + output_data = self._clean_belief_series(output_data, belief_time) - elif isinstance(final_output, tb.BeliefsSeries): - final_output = final_output.to_frame("event_value") - final_output["belief_time"] = belief_time - final_output["cumulative_probability"] = 0.5 - final_output["source"] = self.data_source - final_output = final_output.set_index( - ["belief_time", "source", "cumulative_probability"], append=True - ) + result["data"] = output_data + + results.append(result) + + return results + + def _clean_belief_series( + self, belief_series: tb.BeliefsSeries, belief_time: datetime + ) -> tb.BeliefsDataFrame: + """Create a BeliefDataFrame from a BeliefsSeries creating the necessary indexes.""" + + belief_series = belief_series.to_frame("event_value") + belief_series["belief_time"] = belief_time + belief_series["cumulative_probability"] = 0.5 + belief_series["source"] = self.data_source + belief_series = belief_series.set_index( + ["belief_time", "source", "cumulative_probability"], append=True + ) + + return belief_series + + def _clean_belief_dataframe( + self, bdf: tb.BeliefsDataFrame, belief_time: datetime + ) -> tb.BeliefsDataFrame: + """Add missing indexes to build a proper BeliefDataFrame.""" + + # filing the missing indexes with default values: + if "belief_time" not in bdf.index.names: + bdf["belief_time"] = [belief_time] * len(bdf) + bdf = bdf.set_index("belief_time", append=True) + + if "cumulative_probability" not in bdf.index.names: + bdf["cumulative_probability"] = [0.5] * len(bdf) + bdf = bdf.set_index("cumulative_probability", append=True) + + if "source" not in bdf.index.names: + bdf["source"] = [self.data_source] * len(bdf) + bdf = bdf.set_index("source", append=True) - return final_output + return bdf def get_object_or_literal(self, value: Any, method: str) -> Any: """This method allows using the dataframes as inputs of the Pandas methods that diff --git a/flexmeasures/data/models/reporting/tests/test_aggregator.py b/flexmeasures/data/models/reporting/tests/test_aggregator.py index 02a2a88aa..6c7b06237 100644 --- a/flexmeasures/data/models/reporting/tests/test_aggregator.py +++ b/flexmeasures/data/models/reporting/tests/test_aggregator.py @@ -1,7 +1,7 @@ import pytest from flexmeasures.data.models.reporting.aggregator import AggregatorReporter - +from flexmeasures.data.models.data_sources import DataSource from datetime import datetime from pytz import utc, timezone @@ -38,21 +38,16 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): """ s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data - reporter_config = dict( - data=[ - dict(sensor=s1.id, source=1), - dict(sensor=s2.id, source=2), - ], - method=aggregation_method, - ) + agg_reporter = AggregatorReporter(method=aggregation_method) - agg_reporter = AggregatorReporter(config=reporter_config) + source_1 = DataSource.query.get(1) result = agg_reporter.compute( - sensor=report_sensor, + input=[dict(sensor=s1, source=source_1), dict(sensor=s2, source=source_1)], + output=[dict(sensor=report_sensor)], start=datetime(2023, 5, 10, tzinfo=utc), end=datetime(2023, 5, 11, tzinfo=utc), - ) + )[0]["data"] # check that we got a result for 24 hours assert len(result) == 24 @@ -61,36 +56,65 @@ def test_aggregator(setup_dummy_data, aggregation_method, expected_value): assert (result == expected_value).all().event_value -def test_dst_transition(setup_dummy_data): +@pytest.mark.parametrize( + "weight_1, weight_2, expected_result", + [(1, 1, 0), (1, -1, 2), (2, 0, 2), (0, 2, -2)], +) +def test_aggregator_reporter_weights( + setup_dummy_data, weight_1, weight_2, expected_result +): s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data - reporter_config = dict( - data=[ - dict(sensor=s3.id, source=1), - ], - ) + reporter_config = dict(method="sum", weights={"s1": weight_1, "sensor_2": weight_2}) + + source_1 = DataSource.query.get(1) + source_2 = DataSource.query.get(1) agg_reporter = AggregatorReporter(config=reporter_config) + result = agg_reporter.compute( + input=[ + dict(name="s1", sensor=s1, source=source_1), + dict(sensor=s2, source=source_2), + ], + output=[dict(sensor=report_sensor)], + start=datetime(2023, 5, 10, tzinfo=utc), + end=datetime(2023, 5, 11, tzinfo=utc), + )[0]["data"] + + # check that we got a result for 24 hours + assert len(result) == 24 + + # check that the value is equal to expected_value + assert (result == expected_result).all().event_value + + +def test_dst_transition(setup_dummy_data): + s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data + + agg_reporter = AggregatorReporter() + tz = timezone("Europe/Amsterdam") # transition from winter (CET) to summer (CEST) result = agg_reporter.compute( - sensor=report_sensor, + input=[dict(sensor=s3, source=DataSource.query.get(1))], + output=[dict(sensor=report_sensor)], start=tz.localize(datetime(2023, 3, 26)), end=tz.localize(datetime(2023, 3, 27)), belief_time=tz.localize(datetime(2023, 12, 1)), - ) + )[0]["data"] assert len(result) == 23 # transition from summer (CEST) to winter (CET) result = agg_reporter.compute( - sensor=report_sensor, + input=[dict(sensor=s3, source=DataSource.query.get(1))], + output=[dict(sensor=report_sensor)], start=tz.localize(datetime(2023, 10, 29)), end=tz.localize(datetime(2023, 10, 30)), belief_time=tz.localize(datetime(2023, 12, 1)), - ) + )[0]["data"] assert len(result) == 25 @@ -98,24 +122,19 @@ def test_dst_transition(setup_dummy_data): def test_resampling(setup_dummy_data): s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data - reporter_config = dict( - data=[ - dict(sensor=s3.id, source=1), - ], - ) - - agg_reporter = AggregatorReporter(config=reporter_config) + agg_reporter = AggregatorReporter() tz = timezone("Europe/Amsterdam") # transition from winter (CET) to summer (CEST) result = agg_reporter.compute( - sensor=daily_report_sensor, start=tz.localize(datetime(2023, 3, 27)), end=tz.localize(datetime(2023, 3, 28)), + input=[dict(sensor=s3, source=DataSource.query.get(1))], + output=[dict(sensor=daily_report_sensor, source=DataSource.query.get(1))], belief_time=tz.localize(datetime(2023, 12, 1)), resolution=pd.Timedelta("1D"), - ) + )[0]["data"] assert result.event_starts[0] == pd.Timestamp( year=2023, month=3, day=27, tz="Europe/Amsterdam" @@ -123,12 +142,13 @@ def test_resampling(setup_dummy_data): # transition from summer (CEST) to winter (CET) result = agg_reporter.compute( - sensor=daily_report_sensor, start=tz.localize(datetime(2023, 10, 29)), end=tz.localize(datetime(2023, 10, 30)), + input=[dict(sensor=s3, source=DataSource.query.get(1))], + output=[dict(sensor=daily_report_sensor, source=DataSource.query.get(1))], belief_time=tz.localize(datetime(2023, 12, 1)), resolution=pd.Timedelta("1D"), - ) + )[0]["data"] assert result.event_starts[0] == pd.Timestamp( year=2023, month=10, day=29, tz="Europe/Amsterdam" diff --git a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py index 15b3d91a9..64c8ba7fd 100644 --- a/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_pandas_reporter.py @@ -9,7 +9,8 @@ def test_reporter(app, setup_dummy_data): s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data reporter_config = dict( - input_variables=["sensor_1", "sensor_2"], + required_input=[{"name": "sensor_1"}, {"name": "sensor_2"}], + required_output=[{"name": "df_merge"}], transformations=[ dict( df_input="sensor_1", @@ -36,23 +37,22 @@ def test_reporter(app, setup_dummy_data): dict(method="mean"), dict(method="sum", kwargs=dict(axis=1)), ], - final_df_output="df_merge", ) reporter = PandasReporter(config=reporter_config) start = datetime(2023, 4, 10, tzinfo=utc) end = datetime(2023, 4, 10, 10, tzinfo=utc) - input_variables = dict(sensor_1=dict(sensor=s1), sensor_2=dict(sensor=s2)) + input = [dict(name="sensor_1", sensor=s1), dict(name="sensor_2", sensor=s2)] + output = [dict(name="df_merge", sensor=report_sensor)] - report1 = reporter.compute( - sensor=report_sensor, start=start, end=end, input_variables=input_variables - ) + report1 = reporter.compute(start=start, end=end, input=input, output=output) + result = report1[0]["data"] - assert len(report1) == 5 - assert str(report1.event_starts[0]) == "2023-04-10 00:00:00+00:00" + assert len(result) == 5 + assert str(result.event_starts[0]) == "2023-04-10 00:00:00+00:00" assert ( - report1.sensor == report_sensor + result.sensor == report_sensor ) # check that the output sensor is effectively assigned. data_source_name = app.config.get("FLEXMEASURES_DEFAULT_DATASOURCE") @@ -60,18 +60,17 @@ def test_reporter(app, setup_dummy_data): assert all( (source.name == data_source_name) and (source.type == data_source_type) - for source in report1.sources + for source in result.sources ) # check data source is assigned # check that calling compute with different parameters changes the result report2 = reporter.compute( - sensor=report_sensor, - start=datetime(2023, 4, 10, 3, tzinfo=utc), - end=end, - input_variables=input_variables, + start=datetime(2023, 4, 10, 3, tzinfo=utc), end=end, input=input, output=output ) - assert len(report2) == 4 - assert str(report2.event_starts[0]) == "2023-04-10 02:00:00+00:00" + result2 = report2[0]["data"] + + assert len(result2) == 4 + assert str(result2.event_starts[0]) == "2023-04-10 02:00:00+00:00" def test_reporter_repeated(setup_dummy_data): @@ -80,7 +79,8 @@ def test_reporter_repeated(setup_dummy_data): s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data reporter_config = dict( - input_variables=["sensor_1", "sensor_2"], + required_input=[{"name": "sensor_1"}, {"name": "sensor_2"}], + required_output=[{"name": "df_merge"}], transformations=[ dict( df_input="sensor_1", @@ -107,17 +107,16 @@ def test_reporter_repeated(setup_dummy_data): dict(method="mean"), dict(method="sum", kwargs=dict(axis=1)), ], - final_df_output="df_merge", ) parameters = dict( - sensor=report_sensor.id, start="2023-04-10T00:00:00 00:00", end="2023-04-10T10:00:00 00:00", - input_variables=dict( - sensor_1=dict(sensor=s1.id), - sensor_2=dict(sensor=s2.id), - ), + input=[ + dict(name="sensor_1", sensor=s1.id), + dict(name="sensor_2", sensor=s2.id), + ], + output=[dict(name="df_merge", sensor=report_sensor.id)], ) reporter = PandasReporter(config=reporter_config) @@ -125,7 +124,7 @@ def test_reporter_repeated(setup_dummy_data): report1 = reporter.compute(parameters=parameters) report2 = reporter.compute(parameters=parameters) - assert all(report2.values == report1.values) + assert all(report2[0]["data"].values == report1[0]["data"].values) def test_reporter_empty(setup_dummy_data): @@ -133,29 +132,30 @@ def test_reporter_empty(setup_dummy_data): s1, s2, s3, report_sensor, daily_report_sensor = setup_dummy_data config = dict( - input_variables=["sensor_1"], + required_input=[{"name": "sensor_1"}], + required_output=[{"name": "sensor_1"}], transformations=[], - final_df_output="sensor_1", ) reporter = PandasReporter(config=config) # compute report on available data report = reporter.compute( - sensor=report_sensor, start=datetime(2023, 4, 10, tzinfo=utc), end=datetime(2023, 4, 10, 10, tzinfo=utc), - input_variables=dict(sensor_1=dict(sensor=s1)), + input=[dict(name="sensor_1", sensor=s1)], + output=[dict(name="sensor_1", sensor=report_sensor)], ) - assert not report.empty + assert not report[0]["data"].empty # compute report on dates with no data available report = reporter.compute( sensor=report_sensor, start=datetime(2021, 4, 10, tzinfo=utc), end=datetime(2021, 4, 10, 10, tzinfo=utc), - input_variables=dict(sensor_1=dict(sensor=s1)), + input=[dict(name="sensor_1", sensor=s1)], + output=[dict(name="sensor_1", sensor=report_sensor)], ) - assert report.empty + assert report[0]["data"].empty diff --git a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py index bac4409ec..23b5f57b6 100644 --- a/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py +++ b/flexmeasures/data/models/reporting/tests/test_tibber_reporter.py @@ -1,7 +1,6 @@ from __future__ import annotations import pytest -from flexmeasures.data.models.reporting import Reporter from flexmeasures.data.models.reporting.pandas_reporter import PandasReporter from flexmeasures.data.models.time_series import Sensor, DataSource, TimedBelief from flexmeasures.data.models.generic_assets import GenericAssetType, GenericAsset @@ -69,78 +68,37 @@ ] # cents/kWh -class TibberReporter(Reporter): - - _inner_reporter: PandasReporter | None = None - - def __init__(self, config: dict | None = None, **kwargs) -> None: - - """This class calculates the price of energy of a tariff indexed to the Day Ahead prices. - Energy Price = (1 + VAT) x ( EnergyTax + Tiber + DA Prices) - """ - - super().__init__(config=config, **kwargs) - - # search the sensors - EnergyTax = Sensor.query.filter(Sensor.name == "EnergyTax").one_or_none() - VAT = Sensor.query.filter(Sensor.name == "VAT").one_or_none() - tibber_tariff = Sensor.query.filter( - Sensor.name == "Tibber Tariff" - ).one_or_none() - - da_prices = Sensor.query.filter(Sensor.name == "DA prices").one_or_none() - - self.input_variables = { - "energy_tax": {"sensor": EnergyTax}, - "VAT": {"sensor": VAT}, - "tariff": {"sensor": tibber_tariff}, - "da_prices": {"sensor": da_prices}, - } - - # create the PandasReporter reporter config - pandas_reporter_config = dict( - input_variables=["energy_tax", "VAT", "tariff", "da_prices"], - transformations=[ - dict( - df_input="VAT", - method="droplevel", - args=[[1, 2, 3]], - ), - dict(method="add", args=[1]), # this is to get 1 + VAT - dict( - df_input="energy_tax", - method="droplevel", - args=[[1, 2, 3]], - ), - dict( - df_input="tariff", - method="droplevel", - args=[[1, 2, 3]], - ), - dict( - df_input="da_prices", - method="droplevel", - args=[[1, 2, 3]], - ), - dict( - method="add", args=["@tariff"] - ), # da_prices = da_prices + tibber_tariff - dict( - method="add", args=["@energy_tax"] - ), # da_prices = da_prices + energy_tax - dict( - method="multiply", args=["@VAT"] - ), # da_prices = da_price * VAT, VAT - dict(method="round"), - ], - final_df_output="da_prices", - ) - - self._inner_reporter = PandasReporter(config=pandas_reporter_config) - - def _compute_report(self, **kwargs): - kwargs["input_variables"] = self.input_variables - return self._inner_reporter.compute(**kwargs) +pandas_reporter_config = dict( + required_input=[{"name": v} for v in ["energy_tax", "VAT", "tariff", "da_prices"]], + required_output=[{"name": "da_prices"}], + transformations=[ + dict( + df_input="VAT", + method="droplevel", + args=[[1, 2, 3]], + ), + dict(method="add", args=[1]), # this is to get 1 + VAT + dict( + df_input="energy_tax", + method="droplevel", + args=[[1, 2, 3]], + ), + dict( + df_input="tariff", + method="droplevel", + args=[[1, 2, 3]], + ), + dict( + df_input="da_prices", + method="droplevel", + args=[[1, 2, 3]], + ), + dict(method="add", args=["@tariff"]), # da_prices = da_prices + tibber_tariff + dict(method="add", args=["@energy_tax"]), # da_prices = da_prices + energy_tax + dict(method="multiply", args=["@VAT"]), # da_prices = da_price * VAT, VAT + dict(method="round"), + ], +) def beliefs_from_timeseries(index, values, sensor, source): @@ -244,7 +202,9 @@ def tibber_test_data(fresh_db, app): ) db.session.add(tibber_report_sensor) - return tibber_report_sensor + db.session.commit() + + return tibber_report_sensor, EnergyTax, VAT, tibber_tariff, da_prices def test_tibber_reporter(tibber_test_data): @@ -253,15 +213,21 @@ def test_tibber_reporter(tibber_test_data): displayed in Tibber's App. """ - tibber_report_sensor = tibber_test_data + tibber_report_sensor, EnergyTax, VAT, tibber_tariff, da_prices = tibber_test_data - tibber_reporter = TibberReporter() + tibber_reporter = PandasReporter(config=pandas_reporter_config) result = tibber_reporter.compute( - sensor=tibber_report_sensor, + input=[ + {"name": "energy_tax", "sensor": EnergyTax}, + {"name": "VAT", "sensor": VAT}, + {"name": "tariff", "sensor": tibber_tariff}, + {"name": "da_prices", "sensor": da_prices}, + ], + output=[dict(sensor=tibber_report_sensor, name="da_prices")], start=datetime(2023, 4, 13, tzinfo=utc), end=datetime(2023, 4, 14, tzinfo=utc), - ) + )[0]["data"] # check that we got a result for 24 hours assert len(result) == 24 diff --git a/flexmeasures/data/schemas/io.py b/flexmeasures/data/schemas/io.py new file mode 100644 index 000000000..34a8317e1 --- /dev/null +++ b/flexmeasures/data/schemas/io.py @@ -0,0 +1,52 @@ +from marshmallow import fields, Schema + +from flexmeasures.data.schemas.sensors import SensorIdField +from flexmeasures.data.schemas import AwareDateTimeField, DurationField +from flexmeasures.data.schemas.sources import DataSourceIdField + + +class RequiredInput(Schema): + name = fields.Str(required=True) + + +class Input(Schema): + """ + This schema implements the required fields to perform a TimedBeliefs search + using the method flexmeasures.data.models.time_series:TimedBelief.search_beliefs. + + It includes the field `name`, which is not part of the search query, for later reference of the belief. + """ + + name = fields.Str(required=False) + + sensor = SensorIdField(required=True) + source = DataSourceIdField() + + event_starts_after = AwareDateTimeField() + event_ends_before = AwareDateTimeField() + + belief_time = AwareDateTimeField() + + horizons_at_least = DurationField() + horizons_at_most = DurationField() + + source_types = fields.List(fields.Str()) + exclude_source_types = fields.List(fields.Str()) + most_recent_beliefs_only = fields.Boolean() + most_recent_events_only = fields.Boolean() + + one_deterministic_belief_per_event = fields.Boolean() + one_deterministic_belief_per_event_per_source = fields.Boolean() + resolution = DurationField() + sum_multiple = fields.Boolean() + + +class Output(Schema): + name = fields.Str(required=False) + column = fields.Str(required=False) + sensor = SensorIdField(required=True) + + +class RequiredOutput(Schema): + name = fields.Str(required=True) + column = fields.Str(required=False) diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 0d66c7225..ed5f08b4f 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -1,9 +1,10 @@ -from marshmallow import Schema, fields +from marshmallow import Schema, fields, validate from flexmeasures.data.schemas.sensors import SensorIdField from flexmeasures.data.schemas.sources import DataSourceIdField from flexmeasures.data.schemas import AwareDateTimeField, DurationField +from flexmeasures.data.schemas.io import Input, Output class ReporterConfigSchema(Schema): @@ -22,7 +23,13 @@ class ReporterParametersSchema(Schema): Inherit from this class to extend this schema with your own parameters. """ - sensor = SensorIdField(required=True) + input = fields.List( + fields.Nested(Input()), + required=True, + validator=validate.Length(min=1), + ) + + output = fields.List(fields.Nested(Output()), validate=validate.Length(min=1)) start = AwareDateTimeField(required=True) end = AwareDateTimeField(required=True) diff --git a/flexmeasures/data/schemas/reporting/aggregation.py b/flexmeasures/data/schemas/reporting/aggregation.py index 442f56488..c83cee1dd 100644 --- a/flexmeasures/data/schemas/reporting/aggregation.py +++ b/flexmeasures/data/schemas/reporting/aggregation.py @@ -2,65 +2,65 @@ from flexmeasures.data.schemas.reporting import ( ReporterConfigSchema, - BeliefsSearchConfigSchema, + ReporterParametersSchema, ) +from flexmeasures.data.schemas.io import Output + class AggregatorConfigSchema(ReporterConfigSchema): - """Schema for the reporter_config of the AggregatorReporter + """Schema for the AggregatorReporter configuration + + Example: + .. code-block:: json + { + "method" : "sum", + "weights" : { + "pv" : 1.0, + "consumption" : -1.0 + } + } + """ + + method = fields.Str(required=False, dump_default="sum", load_default="sum") + weights = fields.Dict(fields.Str(), fields.Float(), required=False) + + +class AggregatorParametersSchema(ReporterParametersSchema): + """Schema for the AggregatorReporter parameters Example: .. code-block:: json { - "data": [ + "input": [ { + "name" : "pv", "sensor": 1, "source" : 1, - "alias" : "pv" }, { + "name" : "consumption", "sensor": 1, "source" : 2, - "alias" : "consumption" } ], - "method" : "sum", - "weights" : { - "pv" : 1.0, - "consumption" : -1.0 - } + "output": [ + { + "sensor": 3, + } + ], + "start" : "2023-01-01T00:00:00+00:00", + "end" : "2023-01-03T00:00:00+00:00", } """ - method = fields.Str(required=False, dump_default="sum", load_default="sum") - weights = fields.Dict(fields.Str(), fields.Float(), required=False) - data = fields.List( - fields.Nested(BeliefsSearchConfigSchema()), - required=True, - validator=validate.Length(min=1), + # redefining output to restrict the output length to 1 + output = fields.List( + fields.Nested(Output()), validate=validate.Length(min=1, max=1) ) @validates_schema def validate_source(self, data, **kwargs): - - for d in data["data"]: - if "source" not in d: + for input_description in data["input"]: + if "source" not in input_description: raise ValidationError("`source` is a required field.") - - @validates_schema - def validate_weights(self, data, **kwargs): - if "weights" not in data: - return - - # get aliases - aliases = [] - for d in data["data"]: - if "alias" in d: - aliases.append(d.get("alias")) - - # check that the aliases in weights are defined - for alias in data.get("weights", {}).keys(): - if alias not in aliases: - raise ValidationError( - f"alias `{alias}` in `weights` is not defined in `data`" - ) diff --git a/flexmeasures/data/schemas/reporting/pandas_reporter.py b/flexmeasures/data/schemas/reporting/pandas_reporter.py index 4c3a97e9e..6dcac010f 100644 --- a/flexmeasures/data/schemas/reporting/pandas_reporter.py +++ b/flexmeasures/data/schemas/reporting/pandas_reporter.py @@ -1,15 +1,13 @@ from marshmallow import Schema, fields, ValidationError, validates_schema, validate from inspect import signature -from flexmeasures.data.schemas.sensors import SensorIdField -from flexmeasures.data.schemas.sources import DataSourceIdField - -from flexmeasures.data.schemas import AwareDateTimeField, DurationField +from flexmeasures.data.schemas import AwareDateTimeField from flexmeasures.data.schemas.reporting import ( ReporterConfigSchema, ReporterParametersSchema, ) +from flexmeasures.data.schemas.io import RequiredInput, RequiredOutput from timely_beliefs import BeliefsDataFrame @@ -50,65 +48,45 @@ def validate_method_call(self, data, **kwargs): ) -class BeliefsSearchConfigSchema(Schema): - """ - This schema implements the required fields to perform a TimedBeliefs search - using the method flexmeasures.data.models.time_series:Sensor.search_beliefs - """ - - sensor = SensorIdField(required=True) - - event_starts_after = AwareDateTimeField() - event_ends_before = AwareDateTimeField() - - belief_time = AwareDateTimeField() - - horizons_at_least = DurationField() - horizons_at_most = DurationField() - - source = DataSourceIdField() - - source_types = fields.List(fields.Str()) - exclude_source_types = fields.List(fields.Str()) - most_recent_beliefs_only = fields.Boolean() - most_recent_events_only = fields.Boolean() - - one_deterministic_belief_per_event = fields.Boolean() - one_deterministic_belief_per_event_per_source = fields.Boolean() - resolution = DurationField() - sum_multiple = fields.Boolean() - - class PandasReporterConfigSchema(ReporterConfigSchema): """ This schema lists fields that can be used to describe sensors in the optimised portfolio Example: - { - "input_variables" : ["df1"], - "transformations" : [ - { - "df_input" : "df1", - "df_output" : "df2", - "method" : "copy" - }, - { - "df_input" : "df2", - "df_output" : "df2", - "method" : "sum" - }, - { - "method" : "sum", - "kwargs" : {"axis" : 0} - } - ], - "final_df_output" : "df2" + { + "required_input" : [ + {"name" : "df1} + ], + "required_output" : [ + {"name" : "df2"} + ], + "transformations" : [ + { + "df_input" : "df1", + "df_output" : "df2", + "method" : "copy" + }, + { + "df_input" : "df2", + "df_output" : "df2", + "method" : "sum" + }, + { + "method" : "sum", + "kwargs" : {"axis" : 0} + } + ], + } """ - input_variables = fields.List(fields.Str(), required=True) # expected input aliases + required_input = fields.List( + fields.Nested(RequiredInput()), validate=validate.Length(min=1) + ) + required_output = fields.List( + fields.Nested(RequiredOutput()), validate=validate.Length(min=1) + ) transformations = fields.List(fields.Nested(PandasMethodCall()), required=True) - final_df_output = fields.Str(required=True) @validates_schema def validate_chaining(self, data, **kwargs): @@ -120,20 +98,20 @@ def validate_chaining(self, data, **kwargs): # fake_data mocks the PandasReporter class attribute data. It contains empty BeliefsDataFrame # to simulate the process of applying the transformations. fake_data = dict( - (variable, BeliefsDataFrame) for variable in data.get("input_variables") + (_input["name"], BeliefsDataFrame) for _input in data.get("required_input") ) - final_df_output = data.get("final_df_output") + output_names = [_output["name"] for _output in data.get("required_output")] previous_df = None - final_df_output_method = None + output_method = dict() for transformation in data.get("transformations"): df_input = transformation.get("df_input", previous_df) df_output = transformation.get("df_output", df_input) - if df_output == final_df_output: - final_df_output_method = transformation.get("method") + if df_output in output_names: + output_method[df_output] = transformation.get("method") if df_input not in fake_data: raise ValidationError("Cannot find the input DataFrame.") @@ -142,15 +120,18 @@ def validate_chaining(self, data, **kwargs): fake_data[df_output] = BeliefsDataFrame - if final_df_output not in fake_data: - raise ValidationError( - "Cannot find final output DataFrame among the resulting DataFrames." - ) + for _output in output_names: + if _output not in fake_data: + raise ValidationError( + "Cannot find final output `{_output}` DataFrame among the resulting DataFrames." + ) - if final_df_output_method in ["resample", "groupby"]: - raise ValidationError( - "Final output type cannot by of type `Resampler` or `DataFrameGroupBy`" - ) + if (_output in output_method) and ( + output_method[_output] in ["resample", "groupby"] + ): + raise ValidationError( + f"Final output (`{_output}`) type cannot by of type `Resampler` or `DataFrameGroupBy`" + ) class PandasReporterParametersSchema(ReporterParametersSchema): @@ -159,13 +140,6 @@ class PandasReporterParametersSchema(ReporterParametersSchema): start = AwareDateTimeField(required=False) end = AwareDateTimeField(required=False) - input_variables = fields.Dict( - keys=fields.Str(), # alias - values=fields.Nested(BeliefsSearchConfigSchema()), - required=True, - validator=validate.Length(min=1), - ) - @validates_schema def validate_time_parameters(self, data, **kwargs): """This method validates that all input sensors have start @@ -176,13 +150,16 @@ def validate_time_parameters(self, data, **kwargs): if ("start" in data) and ("end" in data): return - for alias, input_sensor in data.get("input_variables").items(): - if ("event_starts_after" not in input_sensor) and ("start" not in data): + for input_description in data.get("input", []): + input_sensor = input_description["sensor"] + if ("event_starts_after" not in input_description) and ( + "start" not in data + ): raise ValidationError( - f"Start parameter not provided for sensor `{alias}` ({input_sensor})." + f"Start parameter not provided for sensor {input_sensor}" ) - if ("event_ends_before" not in input_sensor) and ("end" not in data): + if ("event_ends_before" not in input_description) and ("end" not in data): raise ValidationError( - f"End parameter not provided for sensor `{alias}` ({input_sensor})." + f"End parameter not provided for sensor {input_sensor}" ) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 6354bbd5e..ceaab864b 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -12,7 +12,8 @@ [ ( { # this checks that the final_df_output dataframe is actually generated at some point of the processing pipeline - "input_variables": ["sensor_1"], + "required_input": [{"name": "sensor_1"}], + "required_output": [{"name": "final_output"}], "transformations": [ { "df_output": "final_output", @@ -20,44 +21,43 @@ "method": "copy", } ], - "final_df_output": "final_output", }, True, ), ( { # this checks that chaining works, applying the method copy on the previous dataframe - "input_variables": ["sensor_1"], + "required_input": [{"name": "sensor_1"}], + "required_output": [{"name": "final_output"}], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, {"method": "copy"}, {"df_output": "final_output", "method": "copy"}, ], - "final_df_output": "final_output", }, True, ), ( { # this checks that resample cannot be the last method being applied - "input_variables": ["sensor_1", "sensor_2"], + "required_input": [{"name": "sensor_1"}, {"name": "sensor_2"}], + "required_output": [{"name": "final_output"}], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, {"method": "copy"}, {"df_output": "final_output", "method": "resample", "args": ["1h"]}, ], - "final_df_output": "final_output", }, False, ), ( { # this checks that resample cannot be the last method being applied - "input_variables": ["sensor_1", "sensor_2"], + "required_input": [{"name": "sensor_1"}, {"name": "sensor_2"}], + "required_output": [{"name": "final_output"}], "transformations": [ {"df_output": "output1", "df_input": "sensor_1", "method": "copy"}, {"method": "copy"}, {"df_output": "final_output", "method": "resample", "args": ["1h"]}, {"method": "sum"}, ], - "final_df_output": "final_output", }, True, ), @@ -79,37 +79,40 @@ def test_pandas_reporter_config_schema(config, is_valid, db, app, setup_dummy_se [ ( { - "sensor": 2, # sensor to save the output to - "input_variables": { # we're describing how the named variables should be constructed, by defining search filters on the sensor data, rather than on the sensor - "sensor_1_df": { - "sensor": 1 - }, # alias, i.e. variable name of the DataFrame containing the input data - }, + "input": [ + { + "name": "sensor_1_df", + "sensor": 1, + } # we're describing how the named variables should be constructed, by defining search filters on the sensor data, rather than on the sensor + ], + "output": [ + {"name": "df2", "sensor": 2} + ], # sensor to save the output to "start": "2023-06-06T00:00:00+02:00", "end": "2023-06-06T00:00:00+02:00", }, True, ), - ( + ( # missing start and end { - "input_variables": { - "sensor_1_df": { - "sensor": 1 - } # alias, i.e. variable name of the DataFrame containing the input data - }, + "input": [{"name": "sensor_1_df", "sensor": 1}], + "output": [{"name": "df2", "sensor": 2}], }, False, ), ( { - "sensor": 2, # sensor to save the output to - "input_variables": { - "sensor_1_df": { # alias, i.e. variable name of the DataFrame containing the parameters data + "input": [ + { + "name": "sensor_1_df", "sensor": 1, "event_starts_after": "2023-06-07T00:00:00+02:00", "event_ends_before": "2023-06-07T00:00:00+02:00", } - }, + ], + "output": [ + {"name": "df2", "sensor": 2} + ], # sensor to save the output to }, True, ), diff --git a/flexmeasures/data/tests/conftest.py b/flexmeasures/data/tests/conftest.py index 88b1688c8..8f2e37ad5 100644 --- a/flexmeasures/data/tests/conftest.py +++ b/flexmeasures/data/tests/conftest.py @@ -11,6 +11,7 @@ import timely_beliefs as tb from flexmeasures.data.models.reporting import Reporter +from flexmeasures.data.schemas.reporting import ReporterParametersSchema from flexmeasures.data.models.annotations import Annotation from flexmeasures.data.models.data_sources import DataSource from flexmeasures.data.models.time_series import TimedBelief, Sensor @@ -186,13 +187,18 @@ def test_reporter(app, db, add_nearby_weather_sensors): class TestReporterConfigSchema(Schema): a = fields.Str() + class TestReporterParametersSchema(ReporterParametersSchema): + b = fields.Str(required=False) + class TestReporter(Reporter): _config_schema = TestReporterConfigSchema() + _parameters_schema = TestReporterParametersSchema() - def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: + def _compute_report(self, **kwargs) -> list: start = kwargs.get("start") end = kwargs.get("end") - resolution = self.sensor.event_resolution + sensor = kwargs["output"][0]["sensor"] + resolution = sensor.event_resolution index = pd.date_range(start=start, end=end, freq=resolution) @@ -203,7 +209,9 @@ def _compute_report(self, **kwargs) -> tb.BeliefsDataFrame: r["cumulative_probability"] = 0.5 r["event_value"] = 0 - return tb.BeliefsDataFrame(r, sensor=self.sensor) + bdf = tb.BeliefsDataFrame(r, sensor=sensor) + + return [{"data": bdf, "sensor": sensor}] app.data_generators["reporter"].update({"TestReporter": TestReporter}) diff --git a/flexmeasures/data/tests/test_data_source.py b/flexmeasures/data/tests/test_data_source.py index 1233fe122..712174847 100644 --- a/flexmeasures/data/tests/test_data_source.py +++ b/flexmeasures/data/tests/test_data_source.py @@ -16,16 +16,18 @@ def test_get_reporter_from_source(db, app, test_reporter, add_nearby_weather_sen assert reporter.__class__.__name__ == "TestReporter" res = reporter.compute( - sensor=reporter_sensor, + input=[{"sensor": reporter_sensor}], + output=[{"sensor": reporter_sensor}], start=datetime(2023, 1, 1, tzinfo=UTC), end=datetime(2023, 1, 2, tzinfo=UTC), - ) + )[0]["data"] assert res.lineage.sources[0] == reporter.data_source with pytest.raises(AttributeError): reporter.compute( - sensor=reporter_sensor, + input=[{"sensor": reporter_sensor}], + output=[{"sensor": reporter_sensor}], start=datetime(2023, 1, 1, tzinfo=UTC), end="not a date", ) @@ -70,10 +72,11 @@ def test_data_generator_save_config(db, app, test_reporter, add_nearby_weather_s reporter = TestReporter(config={"a": "1"}) res = reporter.compute( - sensor=reporter_sensor, + input=[{"sensor": reporter_sensor}], + output=[{"sensor": reporter_sensor}], start=datetime(2023, 1, 1, tzinfo=UTC), end=datetime(2023, 1, 2, tzinfo=UTC), - ) + )[0]["data"] assert res.lineage.sources[0].attributes.get("data_generator").get("config") == { "a": "1" @@ -82,9 +85,61 @@ def test_data_generator_save_config(db, app, test_reporter, add_nearby_weather_s reporter = TestReporter(config={"a": "1"}, save_config=False) res = reporter.compute( - sensor=reporter_sensor, + input=[{"sensor": reporter_sensor}], + output=[{"sensor": reporter_sensor}], start=datetime(2023, 1, 1, tzinfo=UTC), end=datetime(2023, 1, 2, tzinfo=UTC), + )[0]["data"] + + # check that the data_generator is not saving the config in the data_source attributes + assert res.lineage.sources[0].attributes.get("data_generator") == dict() + + +def test_data_generator_save_parameters( + db, app, test_reporter, add_nearby_weather_sensors +): + TestReporter = app.data_generators["reporter"].get("TestReporter") + + reporter_sensor = add_nearby_weather_sensors.get("farther_temperature") + + reporter = TestReporter(config={"a": "1"}, save_parameters=True) + + parameters = { + "input": [{"sensor": reporter_sensor.id}], + "output": [{"sensor": reporter_sensor.id}], + "start": "2023-01-01T00:00:00+00:00", + "end": "2023-01-02T00:00:00+00:00", + "b": "test", + } + + parameters_without_start_end = { + "input": [{"sensor": reporter_sensor.id}], + "output": [{"sensor": reporter_sensor.id}], + "b": "test", + } + + res = reporter.compute(parameters=parameters)[0]["data"] + + assert res.lineage.sources[0].attributes.get("data_generator").get("config") == { + "a": "1" + } + + assert ( + res.lineage.sources[0].attributes.get("data_generator").get("parameters") + == parameters_without_start_end ) - assert len(res.lineage.sources[0].attributes) == 0 + dg2 = reporter.data_source.data_generator + + parameters_2 = { + "start": "2023-01-01T10:00:00+00:00", + "end": "2023-01-02T00:00:00+00:00", + "b": "test2", + } + + res = dg2.compute(parameters=parameters_2)[0]["data"] + + # check that compute gets data stored in the DB (i.e. `input`/`output`) and updated data + # from the method call (e.g. field `b``) + assert dg2._parameters["b"] == parameters_2["b"] + assert dg2._parameters["start"].isoformat() == parameters_2["start"]