From 6198e5bfc6e64cbce688d1c1e733d3ea3a8b3480 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 3 May 2021 12:08:02 -0600
Subject: [PATCH 1/6] removing deprecated aggregate_timeseries

---
 buildstockbatch/base.py                     |  3 ---
 buildstockbatch/schemas/v0.3.yaml           |  3 +--
 buildstockbatch/test/test_base.py           | 14 ++------------
 buildstockbatch/test/test_postprocessing.py |  7 +------
 project_resstock_national.yml               |  1 -
 5 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index 125e4e15..288ceeed 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -270,9 +270,6 @@ def validate_misc_constraints(project_file):
         # validate other miscellaneous constraints
         cfg = get_project_configuration(project_file)
 
-        if cfg.get('postprocessing', {}).get('aggregate_timeseries', False):
-            logger.warning('aggregate_timeseries has been deprecated and will be removed in a future version.')
-
         return True
 
     @staticmethod
diff --git a/buildstockbatch/schemas/v0.3.yaml b/buildstockbatch/schemas/v0.3.yaml
index 1286d948..d0a2900e 100644
--- a/buildstockbatch/schemas/v0.3.yaml
+++ b/buildstockbatch/schemas/v0.3.yaml
@@ -48,7 +48,6 @@ hpc-postprocessing-spec:
   n_workers: int(required=False)
   node_memory_mb: enum(85248, 180224, 751616, required=False)
   parquet_memory_mb: int(required=False)
-  keep_intermediate_files: bool(required=False)
 
 sampler-spec:
   type: str(required=True)
@@ -106,7 +105,7 @@ cost-spec:
 
 postprocessing-spec:
   aws: include('aws-postprocessing-spec', required=False)
-  aggregate_timeseries: bool(required=False)
+  keep_intermediate_files: bool(required=False)
 
 aws-postprocessing-spec:
   region_name: str(required=False)
diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py
index 9b805d61..e8e38095 100644
--- a/buildstockbatch/test/test_base.py
+++ b/buildstockbatch/test/test_base.py
@@ -62,12 +62,7 @@ def test_combine_files_flexible(basic_residential_project_file, mocker):
     # test_results/results_csvs need to be updated with new data *if* columns were indeed supposed to be added/
     # removed/renamed.
 
-    post_process_config = {
-        'postprocessing': {
-            'aggregate_timeseries': True
-        }
-    }
-    project_filename, results_dir = basic_residential_project_file(post_process_config)
+    project_filename, results_dir = basic_residential_project_file()
 
     mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
     get_dask_client_mock = mocker.patch.object(BuildStockBatchBase, 'get_dask_client')
@@ -178,12 +173,7 @@ def test_downselect_integer_options(basic_residential_project_file, mocker):
 
 def test_combine_files(basic_residential_project_file):
 
-    post_process_config = {
-        'postprocessing': {
-            'aggregate_timeseries': True
-        }
-    }
-    project_filename, results_dir = basic_residential_project_file(post_process_config)
+    project_filename, results_dir = basic_residential_project_file()
 
     with patch.object(BuildStockBatchBase, 'weather_dir', None), \
             patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
diff --git a/buildstockbatch/test/test_postprocessing.py b/buildstockbatch/test/test_postprocessing.py
index 766163c3..dfc960cb 100644
--- a/buildstockbatch/test/test_postprocessing.py
+++ b/buildstockbatch/test/test_postprocessing.py
@@ -80,12 +80,7 @@ def test_large_parquet_combine(basic_residential_project_file):
     # Test a simulated scenario where the individual timeseries parquet are larger than the max memory per partition
     # allocated for the parquet file combining.
 
-    post_process_config = {
-        'postprocessing': {
-            'aggregate_timeseries': True
-        }
-    }
-    project_filename, results_dir = basic_residential_project_file(post_process_config)
+    project_filename, results_dir = basic_residential_project_file()
 
     with patch.object(BuildStockBatchBase, 'weather_dir', None), \
             patch.object(BuildStockBatchBase, 'get_dask_client'), \
diff --git a/project_resstock_national.yml b/project_resstock_national.yml
index 67b69115..5afd7150 100644
--- a/project_resstock_national.yml
+++ b/project_resstock_national.yml
@@ -59,7 +59,6 @@ aws:
   notifications_email: user@nrel.gov
 
 postprocessing:
-  aggregate_timeseries: true
   aws:
     region_name: 'us-west-2'
     s3:

From 543339dc893952bacba9316847ea021dc1e7884f Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 3 May 2021 13:42:14 -0600
Subject: [PATCH 2/6] changed delete functionality and added test

---
 buildstockbatch/aws/s3_assets/bsb_post.py   |  3 ++-
 buildstockbatch/base.py                     |  9 +++------
 buildstockbatch/postprocessing.py           |  9 +++++----
 buildstockbatch/test/test_postprocessing.py | 22 +++++++++++++++++++++
 4 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/buildstockbatch/aws/s3_assets/bsb_post.py b/buildstockbatch/aws/s3_assets/bsb_post.py
index f2c4fbe7..cd7e3ef9 100644
--- a/buildstockbatch/aws/s3_assets/bsb_post.py
+++ b/buildstockbatch/aws/s3_assets/bsb_post.py
@@ -57,7 +57,8 @@ def do_postprocessing(s3_bucket, s3_bucket_prefix):
             f'{s3_bucket_prefix}/results/parquet'
         )
 
-    remove_intermediate_files(fs, results_s3_loc)
+    keep_individual_timeseries = cfg.get('postprocessing', {}).get('keep_intermediate_files', False)
+    remove_intermediate_files(fs, results_s3_loc, keep_individual_timeseries)
 
 
 if __name__ == '__main__':
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index 288ceeed..14078216 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -268,7 +268,7 @@ def validate_project_schema(project_file):
     @staticmethod
     def validate_misc_constraints(project_file):
         # validate other miscellaneous constraints
-        cfg = get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)  # noqa F841
 
         return True
 
@@ -554,8 +554,5 @@ def process_results(self, skip_combine=False, force_upload=False):
             if 'athena' in aws_conf:
                 postprocessing.create_athena_tables(aws_conf, os.path.basename(self.output_dir), s3_bucket, s3_prefix)
 
-        if not self.cfg.get('eagle', {}).get('postprocessing', {}).get('keep_intermediate_files', False):
-            logger.info("Removing intermediate files.")
-            postprocessing.remove_intermediate_files(fs, self.results_dir)
-        else:
-            logger.info("Skipped removing intermediate files.")
+        keep_individual_timeseries = self.cfg.get('postprocessing', {}).get('keep_intermediate_files', False)
+        postprocessing.remove_intermediate_files(fs, self.results_dir, keep_individual_timeseries)
diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py
index 0353cfdf..a02be786 100644
--- a/buildstockbatch/postprocessing.py
+++ b/buildstockbatch/postprocessing.py
@@ -365,15 +365,16 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True):
             logger.info(f"Finished combining and saving timeseries for upgrade{upgrade_id}.")
 
 
-def remove_intermediate_files(fs, results_dir):
+def remove_intermediate_files(fs, results_dir, keep_individual_timeseries=False):
     # Remove aggregated files to save space
     sim_output_dir = f'{results_dir}/simulation_output'
-    ts_in_dir = f'{sim_output_dir}/timeseries'
     results_job_json_glob = f'{sim_output_dir}/results_job*.json.gz'
-    logger.info('Removing temporary files')
-    fs.rm(ts_in_dir, recursive=True)
+    logger.info('Removing results_job*.json.gz')
     for filename in fs.glob(results_job_json_glob):
         fs.rm(filename)
+    if not keep_individual_timeseries:
+        ts_in_dir = f'{sim_output_dir}/timeseries'
+        fs.rm(ts_in_dir, recursive=True)
 
 
 def upload_results(aws_conf, output_dir, results_dir):
diff --git a/buildstockbatch/test/test_postprocessing.py b/buildstockbatch/test/test_postprocessing.py
index dfc960cb..6bdff845 100644
--- a/buildstockbatch/test/test_postprocessing.py
+++ b/buildstockbatch/test/test_postprocessing.py
@@ -88,3 +88,25 @@ def test_large_parquet_combine(basic_residential_project_file):
             patch.object(postprocessing, 'MAX_PARQUET_MEMORY', 1e6):  # set the max memory to just 1MB
         bsb = BuildStockBatchBase(project_filename)
         bsb.process_results()  # this would raise exception if the postprocessing could not handle the situation
+
+
+@pytest.mark.parametrize('keep_individual_timeseries', [True, False])
+def test_keep_intermediate_files(keep_individual_timeseries, basic_residential_project_file, mocker):
+    project_filename, results_dir = basic_residential_project_file({
+        'postprocessing': {
+            'keep_intermediate_files': keep_individual_timeseries
+        }
+    })
+
+    mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
+    mocker.patch.object(BuildStockBatchBase, 'get_dask_client')
+    mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
+    bsb = BuildStockBatchBase(project_filename)
+    bsb.process_results()
+
+    results_path = pathlib.Path(results_dir)
+    simout_path = results_path / 'simulation_output'
+    assert len(list(simout_path.glob('results_job*.json.gz'))) == 0
+
+    ts_path = simout_path / 'timeseries'
+    assert ts_path.exists() == keep_individual_timeseries

From 94fd99b9f4cf9dacd97428d3c972de85df2fbedc Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 3 May 2021 13:47:12 -0600
Subject: [PATCH 3/6] changing key name to keep_individual_timeseries

---
 buildstockbatch/aws/s3_assets/bsb_post.py   | 2 +-
 buildstockbatch/base.py                     | 2 +-
 buildstockbatch/schemas/v0.3.yaml           | 2 +-
 buildstockbatch/test/test_postprocessing.py | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/buildstockbatch/aws/s3_assets/bsb_post.py b/buildstockbatch/aws/s3_assets/bsb_post.py
index cd7e3ef9..9d2c8b3d 100644
--- a/buildstockbatch/aws/s3_assets/bsb_post.py
+++ b/buildstockbatch/aws/s3_assets/bsb_post.py
@@ -57,7 +57,7 @@ def do_postprocessing(s3_bucket, s3_bucket_prefix):
             f'{s3_bucket_prefix}/results/parquet'
         )
 
-    keep_individual_timeseries = cfg.get('postprocessing', {}).get('keep_intermediate_files', False)
+    keep_individual_timeseries = cfg.get('postprocessing', {}).get('keep_individual_timeseries', False)
     remove_intermediate_files(fs, results_s3_loc, keep_individual_timeseries)
 
 
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index 14078216..92075bae 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -554,5 +554,5 @@ def process_results(self, skip_combine=False, force_upload=False):
             if 'athena' in aws_conf:
                 postprocessing.create_athena_tables(aws_conf, os.path.basename(self.output_dir), s3_bucket, s3_prefix)
 
-        keep_individual_timeseries = self.cfg.get('postprocessing', {}).get('keep_intermediate_files', False)
+        keep_individual_timeseries = self.cfg.get('postprocessing', {}).get('keep_individual_timeseries', False)
         postprocessing.remove_intermediate_files(fs, self.results_dir, keep_individual_timeseries)
diff --git a/buildstockbatch/schemas/v0.3.yaml b/buildstockbatch/schemas/v0.3.yaml
index d0a2900e..57f825f3 100644
--- a/buildstockbatch/schemas/v0.3.yaml
+++ b/buildstockbatch/schemas/v0.3.yaml
@@ -105,7 +105,7 @@ cost-spec:
 
 postprocessing-spec:
   aws: include('aws-postprocessing-spec', required=False)
-  keep_intermediate_files: bool(required=False)
+  keep_individual_timeseries: bool(required=False)
 
 aws-postprocessing-spec:
   region_name: str(required=False)
diff --git a/buildstockbatch/test/test_postprocessing.py b/buildstockbatch/test/test_postprocessing.py
index 6bdff845..183de211 100644
--- a/buildstockbatch/test/test_postprocessing.py
+++ b/buildstockbatch/test/test_postprocessing.py
@@ -91,10 +91,10 @@ def test_large_parquet_combine(basic_residential_project_file):
 
 
 @pytest.mark.parametrize('keep_individual_timeseries', [True, False])
-def test_keep_intermediate_files(keep_individual_timeseries, basic_residential_project_file, mocker):
+def test_keep_individual_timeseries(keep_individual_timeseries, basic_residential_project_file, mocker):
     project_filename, results_dir = basic_residential_project_file({
         'postprocessing': {
-            'keep_intermediate_files': keep_individual_timeseries
+            'keep_individual_timeseries': keep_individual_timeseries
         }
     })
 

From 42a8e17c0f69bef7aa4d17d6033c9085d7314543 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 3 May 2021 13:53:17 -0600
Subject: [PATCH 4/6] adding item to changelog

---
 docs/changelog/changelog_dev.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst
index 88d7d105..3119f212 100644
--- a/docs/changelog/changelog_dev.rst
+++ b/docs/changelog/changelog_dev.rst
@@ -82,3 +82,13 @@ Development Changelog
         :tickets:
 
         Fix for create_eagle_env.sh not creating environment.
+
+    .. change::
+        :tags: postprocessing
+        :pullreq: 227
+        :tickets: 182
+
+        Moves the ``eagle.postprocessing.keep_intermediate_files`` to
+        ``postprocessing.keep_individual_timeseries`` and changes behavior to
+        keep only the timeseries parquet files. Also, removes the deprecated
+        ``aggregate_timeseries`` key as that aggregation always happens.

From 2264a0276e88a3c5419d9fd7ba3b6ab22641325b Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 3 May 2021 14:07:56 -0600
Subject: [PATCH 5/6] updating docs

---
 docs/changelog/migration_0_20.rst | 88 ++++++++++++++++++++-----------
 docs/project_defn.rst             | 16 +++---
 2 files changed, 66 insertions(+), 38 deletions(-)

diff --git a/docs/changelog/migration_0_20.rst b/docs/changelog/migration_0_20.rst
index fb98fc5d..b49dd092 100644
--- a/docs/changelog/migration_0_20.rst
+++ b/docs/changelog/migration_0_20.rst
@@ -169,37 +169,6 @@ New Spec:
           reporting_frequency: Hourly
           include_enduse_subcategories: true
 
-Commercial Workflw Generator Hard-Coded Measures
-------------------------------------------------
-
-The commercial workflow generator has changed to remove most of the hard-coded
-reporting measures, allowing them to be added to the config file as-needed.
-This should avoid the need to create custom BuildStockBatch environments
-for each project that needs to add/remove/modify reporting measures.
-
-Old hard-coded reporting measures:
-
-- SimulationOutputReport
-- OpenStudio Results (measure_dir_name: f8e23017-894d-4bdf-977f-37e3961e6f42)
-- TimeseriesCSVExport
-- comstock_sensitivity_reports
-- qoi_report
-- la_100_qaqc (if include_qaqc = true in config)
-- simulation_settings_check (if include_qaqc = true in config)
-
-New hard-coded reporting measures:
-
-- SimulationOutputReport (reports annual totals in results.csv)
-- TimeseriesCSVExport (generates timeseries results at Timestep frequency)
-
-Two other hard-coded model measures were removed from the workflow.  These will
-be added to the workflow via the options-lookup.tsv in ComStock instead.
-
-Removed hard-coded model measures:
-
-- add_blinds_to_selected_windows
-- set_space_type_load_subcategories
-
 Reporting Measures in Workflows
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -231,6 +200,37 @@ New Spec:
           - measure_dir_name: ReportingMeasure2
 
 
+Commercial Workflow Generator Hard-Coded Measures
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The commercial workflow generator has changed to remove most of the hard-coded
+reporting measures, allowing them to be added to the config file as-needed.
+This should avoid the need to create custom BuildStockBatch environments
+for each project that needs to add/remove/modify reporting measures.
+
+Old hard-coded reporting measures:
+
+- ``SimulationOutputReport``
+- OpenStudio Results (measure_dir_name: ``f8e23017-894d-4bdf-977f-37e3961e6f42``)
+- ``TimeseriesCSVExport``
+- ``comstock_sensitivity_reports``
+- ``qoi_report``
+- ``la_100_qaqc`` (if include_qaqc = true in config)
+- ``simulation_settings_check`` (if include_qaqc = true in config)
+
+New hard-coded reporting measures:
+
+- ``SimulationOutputReport`` (reports annual totals in results.csv)
+- ``TimeseriesCSVExport`` (generates timeseries results at Timestep frequency)
+
+Two other hard-coded model measures were removed from the workflow.  These will
+be added to the workflow via the options-lookup.tsv in ComStock instead.
+
+Removed hard-coded model measures:
+
+- ``add_blinds_to_selected_windows``
+- ``set_space_type_load_subcategories``
+
 AWS EMR Configuration Name Changes
 ----------------------------------
 
@@ -246,3 +246,29 @@ renamed the following keys under ``aws.emr``:
 +----------------------+-----------------------+
 | slave_instance_count | worker_instance_count |
 +----------------------+-----------------------+
+
+
+Keep Individual Timeseries
+--------------------------
+
+For some applications it is helpful to keep the timeseries parquet files for
+each simulation. Normally, they are aggregated into fewer, larger files. There
+was a key introduced in v0.19.1 that enabled this. We moved it to a new home
+place in the config file.
+
+Old Spec:
+
+.. code-block:: yaml
+
+    schema_version: 0.2
+    eagle:
+      postprocessing:
+        keep_intermediate_files: true  # default false if omitted
+
+New Spec:
+
+.. code-block:: yaml
+
+    schema_version: '0.3'
+    postprocessing:
+      keep_individual_timeseries: true  # default false if omitted
diff --git a/docs/project_defn.rst b/docs/project_defn.rst
index 2e0f49f7..7522509b 100644
--- a/docs/project_defn.rst
+++ b/docs/project_defn.rst
@@ -148,9 +148,6 @@ the Eagle supercomputer.
     *  ``node_memory_mb``: The memory (in MB) to request for eagle node for postprocessing. The valid values are
                            85248, 180224 and 751616. Default is 85248.
     *  ``parquet_memory_mb``: The size (in MB) of the combined parquet file in memory. Default is 40000.
-    *  ``keep_intermediate_files``: Set this to true if you want to keep postprocessing intermediate files (for debugging
-                                    or other explorative purpose). The intermediate files contain results_job*.json.gz
-                                    files and individual building's timeseries parquet files. Default is false.
 
 .. _aws-config:
 
@@ -222,10 +219,11 @@ follows:
    fewer larger parquet files that are better suited for querying using big data
    analysis tools.
 
-   For ResStock runs with the ResidentialScheduleGenerator, the generated schedules
-   are horizontally concatenated with the time series files before aggregation,
-   making sure the schedule values are properly lined up with the timestamps in the
-   `same way that Energeyplus handles ScheduleFiles <https://github.com/NREL/resstock/issues/469#issuecomment-697849076>`_.
+For ResStock runs with the ResidentialScheduleGenerator, the generated schedules
+are horizontally concatenated with the time series files before aggregation,
+making sure the schedule values are properly lined up with the timestamps in the
+`same way that Energeyplus handles ScheduleFiles
+<https://github.com/NREL/resstock/issues/469#issuecomment-697849076>`_.
    
 
 Uploading to AWS Athena
@@ -255,6 +253,10 @@ The configuration options for postprocessing and AWS upload are:
 
 *  ``postprocessing``: postprocessing configuration
 
+    * ``keep_individual_timeseries``: For some use cases it is useful to keep
+      the timeseries output for each simulation as a separate parquet file.
+      Setting this option to ``true`` allows that. Default is ``false``.
+
     *  ``aws``: configuration related to uploading to and managing data in amazon web services. For this to work, please
        `configure aws. <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration>`_
        Including this key will cause your datasets to be uploaded to AWS, omitting it will cause them not to be uploaded.

From eeed61c40a2c18358f79a3ac3cba954fe5beded3 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 3 May 2021 14:10:41 -0600
Subject: [PATCH 6/6] changing to correct PR number

[skip ci]
---
 docs/changelog/changelog_dev.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst
index 3119f212..59abd403 100644
--- a/docs/changelog/changelog_dev.rst
+++ b/docs/changelog/changelog_dev.rst
@@ -85,7 +85,7 @@ Development Changelog
 
     .. change::
         :tags: postprocessing
-        :pullreq: 227
+        :pullreq: 228
         :tickets: 182
 
         Moves the ``eagle.postprocessing.keep_intermediate_files`` to