From a683eb54327c45c6291641234564c0860720af88 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 17 Oct 2022 11:47:20 +0200 Subject: [PATCH 01/11] Refactor: move groupby statement into util function, and use private function Signed-off-by: F.N. Claessen --- flexmeasures/api/common/utils/api_utils.py | 6 +----- flexmeasures/data/services/time_series.py | 9 +++++++++ flexmeasures/data/utils.py | 6 +----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/flexmeasures/api/common/utils/api_utils.py b/flexmeasures/api/common/utils/api_utils.py index d92c6e017..20b053a74 100644 --- a/flexmeasures/api/common/utils/api_utils.py +++ b/flexmeasures/api/common/utils/api_utils.py @@ -409,11 +409,7 @@ def save_to_db( if save_changed_beliefs_only: # Drop beliefs that haven't changed - timed_values = ( - timed_values.convert_index_from_belief_horizon_to_time() - .groupby(level=["belief_time", "source"], as_index=False) - .apply(drop_unchanged_beliefs) - ) + timed_values = drop_unchanged_beliefs(timed_values) # Work around bug in which groupby still introduces an index level, even though we asked it not to if None in timed_values.index.names: diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index 515fb8424..2d0d56118 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -320,6 +320,15 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: """ if bdf.empty: return bdf + return ( + bdf.convert_index_from_belief_horizon_to_time() + .groupby(level=["belief_time", "source"], as_index=False) + .apply(_drop_unchanged_beliefs) + ) + + +def _drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: + """Only works on BeliefsDataFrames with a unique belief time and unique source.""" if len(bdf.lineage.belief_times) > 1: raise NotImplementedError("Beliefs should share a unique belief time.") if len(bdf.lineage.sources) > 1: diff --git a/flexmeasures/data/utils.py b/flexmeasures/data/utils.py index b56141e4e..b5700d416 100644 --- a/flexmeasures/data/utils.py +++ b/flexmeasures/data/utils.py @@ -105,11 +105,7 @@ def save_to_db( if save_changed_beliefs_only: # Drop beliefs that haven't changed - timed_values = ( - timed_values.convert_index_from_belief_horizon_to_time() - .groupby(level=["belief_time", "source"], as_index=False) - .apply(drop_unchanged_beliefs) - ) + timed_values = drop_unchanged_beliefs(timed_values) len_after = len(timed_values) if len_after < len_before: status = "success_with_unchanged_beliefs_skipped" From e6741d66afa4264c7789bf1db5e58518e5a375dc Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 17 Oct 2022 11:49:46 +0200 Subject: [PATCH 02/11] Remove unchanged beliefs from within new data itself Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index 2d0d56118..c5b1dd2e0 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -315,11 +315,26 @@ def set_bdf_source(bdf: tb.BeliefsDataFrame, source_name: str) -> tb.BeliefsData def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: """Drop beliefs that are already stored in the database with an earlier belief time. + Also drop beliefs that are already in the data with an earlier belief time. + Quite useful function to prevent cluttering up your database with beliefs that remain unchanged over time. Only works on BeliefsDataFrames with a unique belief time and unique source. """ if bdf.empty: return bdf + + # Remove unchanged beliefs from within the new data itself + index_names = bdf.index.names + bdf = ( + bdf.sort_index() + .reset_index() + .drop_duplicates( + ["event_start", "source", "cumulative_probability", "event_value"], + keep="first", + ) + .set_index(index_names) + ) + return ( bdf.convert_index_from_belief_horizon_to_time() .groupby(level=["belief_time", "source"], as_index=False) From f811948a585548817ba47f948cbe5cff58179a2f Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 21 Oct 2022 11:13:09 +0200 Subject: [PATCH 03/11] Drop unchanged beliefs separately for ex-ante and ex-post beliefs Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index c5b1dd2e0..596a3c6d7 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -323,6 +323,16 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: if bdf.empty: return bdf + # Save the oldest ex-post beliefs explicitly, even if they do not deviate from the most recent ex-ante beliefs + ex_ante_bdf = bdf[bdf.belief_horizons > timedelta(0)] + ex_post_bdf = bdf[bdf.belief_horizons <= timedelta(0)] + if not ex_ante_bdf.empty and not ex_post_bdf.empty: + # Recursive function call + ex_ante_bdf = drop_unchanged_beliefs(ex_ante_bdf) + ex_post_bdf = drop_unchanged_beliefs(ex_post_bdf) + bdf = pd.concat([ex_ante_bdf, ex_post_bdf]) + return bdf + # Remove unchanged beliefs from within the new data itself index_names = bdf.index.names bdf = ( From ac1ab48fdd9fe0a0588a854a8e7cba7db4f2b10e Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 31 Oct 2022 14:08:48 +0100 Subject: [PATCH 04/11] Remove now false statement Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index 596a3c6d7..0022014dc 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -318,7 +318,6 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: Also drop beliefs that are already in the data with an earlier belief time. Quite useful function to prevent cluttering up your database with beliefs that remain unchanged over time. - Only works on BeliefsDataFrames with a unique belief time and unique source. """ if bdf.empty: return bdf From 949742d45bc5869b3b00920856efaea887de20cc Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 31 Oct 2022 14:24:30 +0100 Subject: [PATCH 05/11] Remove explicit checks for private function Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index 0022014dc..94e8cd32b 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -353,10 +353,6 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: def _drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: """Only works on BeliefsDataFrames with a unique belief time and unique source.""" - if len(bdf.lineage.belief_times) > 1: - raise NotImplementedError("Beliefs should share a unique belief time.") - if len(bdf.lineage.sources) > 1: - raise NotImplementedError("Beliefs should share a unique source.") previous_beliefs_in_db = bdf.sensor.search_beliefs( event_starts_after=bdf.event_starts[0], event_ends_before=bdf.event_ends[-1], From 9f4b6a20103e6fdef00075752c7db504ef3e21fc Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 31 Oct 2022 14:25:28 +0100 Subject: [PATCH 06/11] Update todo Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index 94e8cd32b..df869920b 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -360,7 +360,7 @@ def _drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: source=bdf.lineage.sources[0], # unique source most_recent_beliefs_only=False, ) - # todo: delete next line and set most_recent_beliefs_only=True when this is resolved: https://github.com/SeitaBV/timely-beliefs/issues/97 + # todo: delete next line and set most_recent_beliefs_only=True when this is resolved: https://github.com/SeitaBV/timely-beliefs/pull/117 previous_most_recent_beliefs_in_db = belief_utils.select_most_recent_belief( previous_beliefs_in_db ) From 9b10cf3bcde49b92ce88b4f0e34c64e1193458e7 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 31 Oct 2022 14:50:06 +0100 Subject: [PATCH 07/11] Also apply separate ex-ante and ex-post look-ups to previously stored beliefs Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index df869920b..80f969505 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -353,12 +353,19 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: def _drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: """Only works on BeliefsDataFrames with a unique belief time and unique source.""" + if bdf.belief_horizons[0] > timedelta(0): + # Look up only ex-ante beliefs (horizon > 0) + kwargs = dict(horizons_at_least=timedelta(0)) + else: + # Look up only ex-post beliefs (horizon <= 0) + kwargs = dict(horizons_at_most=timedelta(0)) previous_beliefs_in_db = bdf.sensor.search_beliefs( event_starts_after=bdf.event_starts[0], event_ends_before=bdf.event_ends[-1], beliefs_before=bdf.lineage.belief_times[0], # unique belief time source=bdf.lineage.sources[0], # unique source most_recent_beliefs_only=False, + **kwargs, ) # todo: delete next line and set most_recent_beliefs_only=True when this is resolved: https://github.com/SeitaBV/timely-beliefs/pull/117 previous_most_recent_beliefs_in_db = belief_utils.select_most_recent_belief( From 8fad00c9847db59d4205741527f173dd86200723 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 31 Oct 2022 14:52:27 +0100 Subject: [PATCH 08/11] Update docstring Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index 80f969505..5d75ef1c7 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -352,7 +352,11 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: def _drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: - """Only works on BeliefsDataFrames with a unique belief time and unique source.""" + """Drop beliefs that are already stored in the database with an earlier belief time. + + Assumes a BeliefsDataFrame with a unique belief time and unique source, + and either all ex-ante beliefs or all ex-post beliefs. + """ if bdf.belief_horizons[0] > timedelta(0): # Look up only ex-ante beliefs (horizon > 0) kwargs = dict(horizons_at_least=timedelta(0)) From 716489a31db36b7d9f8aa077b2593e5909d81f18 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 31 Oct 2022 14:58:33 +0100 Subject: [PATCH 09/11] changelog entry Signed-off-by: F.N. Claessen --- documentation/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/documentation/changelog.rst b/documentation/changelog.rst index dd56a80ae..2742f8b0b 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -23,6 +23,7 @@ Infrastructure / Support * Reduce size of Docker image (from 2GB to 1.4GB) [see `PR #512 `_] * Remove bokeh dependency and obsolete UI views [see `PR #476 `_] +* Revised strategy for removing unchanged beliefs when saving data: retain the oldest measurement (ex-post belief), too [see `PR #518 `_] v0.11.2 | September 6, 2022 From 3e5e61b43a020f560a8daac88313a2928ef93112 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Thu, 10 Nov 2022 14:40:02 +0100 Subject: [PATCH 10/11] Improve inline comments Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index 5d75ef1c7..b43c09676 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -326,7 +326,7 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: ex_ante_bdf = bdf[bdf.belief_horizons > timedelta(0)] ex_post_bdf = bdf[bdf.belief_horizons <= timedelta(0)] if not ex_ante_bdf.empty and not ex_post_bdf.empty: - # Recursive function call + # We treat each part separately to avoid the ex-post knowledge would be lost ex_ante_bdf = drop_unchanged_beliefs(ex_ante_bdf) ex_post_bdf = drop_unchanged_beliefs(ex_post_bdf) bdf = pd.concat([ex_ante_bdf, ex_post_bdf]) From 605f096a4dc67403d4138acce01ac96ee2937ab1 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Thu, 10 Nov 2022 14:44:59 +0100 Subject: [PATCH 11/11] Clarify use of private util function Signed-off-by: F.N. Claessen --- flexmeasures/data/services/time_series.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/flexmeasures/data/services/time_series.py b/flexmeasures/data/services/time_series.py index b43c09676..755fa6172 100644 --- a/flexmeasures/data/services/time_series.py +++ b/flexmeasures/data/services/time_series.py @@ -344,18 +344,23 @@ def drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: .set_index(index_names) ) + # Remove unchanged beliefs with respect to what is already stored in the database return ( bdf.convert_index_from_belief_horizon_to_time() .groupby(level=["belief_time", "source"], as_index=False) - .apply(_drop_unchanged_beliefs) + .apply(_drop_unchanged_beliefs_compared_to_db) ) -def _drop_unchanged_beliefs(bdf: tb.BeliefsDataFrame) -> tb.BeliefsDataFrame: +def _drop_unchanged_beliefs_compared_to_db( + bdf: tb.BeliefsDataFrame, +) -> tb.BeliefsDataFrame: """Drop beliefs that are already stored in the database with an earlier belief time. Assumes a BeliefsDataFrame with a unique belief time and unique source, and either all ex-ante beliefs or all ex-post beliefs. + + It is preferable to call the public function drop_unchanged_beliefs instead. """ if bdf.belief_horizons[0] > timedelta(0): # Look up only ex-ante beliefs (horizon > 0)