From 742c762f19b0f9313310f275b1a7d8c88c273e72 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 21 Aug 2023 14:15:04 +0200 Subject: [PATCH 1/4] improve performance of normalizedTypicalPeriods creation --- tsam/timeseriesaggregation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tsam/timeseriesaggregation.py b/tsam/timeseriesaggregation.py index 703c3a9..f8adf0f 100644 --- a/tsam/timeseriesaggregation.py +++ b/tsam/timeseriesaggregation.py @@ -1066,9 +1066,10 @@ def createTypicalPeriods(self): ) # put the clustered data in pandas format and scale back - self.normalizedTypicalPeriods = pd.DataFrame( - self.clusterPeriods, columns=self.normalizedPeriodlyProfiles.columns - ).stack(level="TimeStep") + self.normalizedTypicalPeriods = pd.concat([ + pd.Series(s, index=self.normalizedPeriodlyProfiles.columns) + for s in self.clusterPeriods + ], axis=1).unstack("TimeStep").T if self.segmentation: from tsam.utils.segmentation import segmentation From cba0523509784c917befa6e54e21c08d0d307928 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 21 Aug 2023 14:15:52 +0200 Subject: [PATCH 2/4] use .clip() to simplify _rescaleClusterPeriods --- tsam/timeseriesaggregation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tsam/timeseriesaggregation.py b/tsam/timeseriesaggregation.py index f8adf0f..f3349c3 100644 --- a/tsam/timeseriesaggregation.py +++ b/tsam/timeseriesaggregation.py @@ -869,10 +869,9 @@ def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx ) # reset values higher than the upper sacle or less than zero - typicalPeriods[column][typicalPeriods[column] > scale_ub] = scale_ub - typicalPeriods[column][typicalPeriods[column] < 0.0] = 0.0 + typicalPeriods[column].clip(lower=0, upper=scale_ub, inplace=True) - typicalPeriods[column] = typicalPeriods[column].fillna(0.0) + typicalPeriods[column].fillna(0.0, inplace=True) # calc new sum and new diff to orig data sum_clu_wo_peak = sum( From 42fef503753c84e932ba2d66d6f31fccc76a1499 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 21 Aug 2023 14:16:10 +0200 Subject: [PATCH 3/4] use vectorized np.sum() in _rescaleClusterPeriods --- tsam/timeseriesaggregation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tsam/timeseriesaggregation.py b/tsam/timeseriesaggregation.py index f3349c3..5c75a22 100644 --- a/tsam/timeseriesaggregation.py +++ b/tsam/timeseriesaggregation.py @@ -835,11 +835,11 @@ def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx for column in self.timeSeries.columns: diff = 1 sum_raw = self.normalizedPeriodlyProfiles[column].sum().sum() - sum_peak = sum( + sum_peak = np.sum( weightingVec[extremeClusterIdx] * typicalPeriods[column].loc[extremeClusterIdx, :].sum(axis=1) ) - sum_clu_wo_peak = sum( + sum_clu_wo_peak = np.sum( weightingVec[idx_wo_peak] * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1) ) @@ -874,7 +874,7 @@ def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx typicalPeriods[column].fillna(0.0, inplace=True) # calc new sum and new diff to orig data - sum_clu_wo_peak = sum( + sum_clu_wo_peak = np.sum( weightingVec[idx_wo_peak] * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1) ) From 8cf3e65efa1ae0252e410da1f29a9dc7cedba2b0 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 21 Aug 2023 14:16:45 +0200 Subject: [PATCH 4/4] improve performance of typicalPeriods creation in _rescaleClusterPeriods --- tsam/timeseriesaggregation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tsam/timeseriesaggregation.py b/tsam/timeseriesaggregation.py index 5c75a22..9687fc2 100644 --- a/tsam/timeseriesaggregation.py +++ b/tsam/timeseriesaggregation.py @@ -828,9 +828,10 @@ def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx series, without changing the values of the extremePeriods. """ weightingVec = pd.Series(self._clusterPeriodNoOccur).values - typicalPeriods = pd.DataFrame( - clusterPeriods, columns=self.normalizedPeriodlyProfiles.columns - ) + typicalPeriods = pd.concat([ + pd.Series(s, index=self.normalizedPeriodlyProfiles.columns) + for s in self.clusterPeriods + ], axis=1).T idx_wo_peak = np.delete(typicalPeriods.index, extremeClusterIdx) for column in self.timeSeries.columns: diff = 1