Merge pull request #84 from NREL/extra-measures

Ability to add reporting measures to projects
NREL · Aug 12, 2019 · d4afdaa · d4afdaa
2 parents e7546a1 + 9759369
commit d4afdaa
Show file tree

Hide file tree

Showing 7 changed files with 99 additions and 9 deletions.
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
@@ -468,8 +468,11 @@ def process_results(self, skip_combine=False, force_upload=False):
 
         aggregate_ts = self.cfg.get('postprocessing', {}).get('aggregate_timeseries', False)
 
+        reporting_measures = self.cfg.get('reporting_measures', [])
+
         if not skip_combine:
-            combine_results(self.results_dir, skip_timeseries=skip_timeseries, aggregate_timeseries=aggregate_ts)
+            combine_results(self.results_dir, skip_timeseries=skip_timeseries, aggregate_timeseries=aggregate_ts,
+                            reporting_measures=reporting_measures)
 
         aws_conf = self.cfg.get('postprocessing', {}).get('aws', {})
         if 's3' in aws_conf or force_upload:

diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py
@@ -34,7 +34,7 @@
 logger = logging.getLogger(__name__)
 
 
-def read_data_point_out_json(fs_uri, filename):
+def read_data_point_out_json(fs_uri, reporting_measures, filename):
     fs = open_fs(fs_uri)
     try:
         with fs.open(filename, 'r') as f:
@@ -44,6 +44,9 @@ def read_data_point_out_json(fs_uri, filename):
     else:
         if 'SimulationOutputReport' not in d:
             d['SimulationOutputReport'] = {'applicable': False}
+        for reporting_measure in reporting_measures:
+            if reporting_measure not in d:
+                d[reporting_measure] = {'applicable': False}
         return d
 
 
@@ -52,7 +55,7 @@ def to_camelcase(x):
     return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
 
 
-def flatten_datapoint_json(d):
+def flatten_datapoint_json(reporting_measures, d):
     new_d = {}
     cols_to_keep = {
         'ApplyUpgrade': [
@@ -83,6 +86,11 @@ def flatten_datapoint_json(d):
     for k, v in d.get(col3, {}).items():
         new_d[f'{col3}.{k}'] = v
 
+    # additional reporting measures
+    for col in reporting_measures:
+        for k, v in d.get(col, {}).items():
+            new_d[f'{col}.{k}'] = v
+
     new_d['building_id'] = new_d['BuildExistingModel.building_id']
     del new_d['BuildExistingModel.building_id']
 
@@ -208,7 +216,7 @@ def write_output(results_dir, group_pq):
     write_dataframe_as_parquet(pq, results_dir, file_path)
 
 
-def combine_results(results_dir, skip_timeseries=False, aggregate_timeseries=False):
+def combine_results(results_dir, skip_timeseries=False, aggregate_timeseries=False, reporting_measures=[]):
     fs = open_fs(results_dir)
 
     sim_out_dir = 'simulation_output'
@@ -252,16 +260,17 @@ def combine_results(results_dir, skip_timeseries=False, aggregate_timeseries=Fal
 
         datapoint_output_jsons = db.from_sequence(sim_dir_list, partition_size=500).\
             map(lambda x: f"{sim_out_dir}/{x}/run/data_point_out.json").\
-            map(partial(read_data_point_out_json, results_dir)).\
+            map(partial(read_data_point_out_json, results_dir, reporting_measures)).\
             filter(lambda x: x is not None)
         meta = pd.DataFrame(list(
             datapoint_output_jsons.filter(lambda x: 'SimulationOutputReport' in x.keys()).
-            map(flatten_datapoint_json).take(10)
+            map(partial(flatten_datapoint_json, reporting_measures)).take(10)
         ))
+
         if meta.shape == (0, 0):
             meta = None
 
-        data_point_out_df_d = datapoint_output_jsons.map(flatten_datapoint_json).\
+        data_point_out_df_d = datapoint_output_jsons.map(partial(flatten_datapoint_json, reporting_measures)).\
             to_dataframe(meta=meta).rename(columns=to_camelcase)
 
         out_osws = db.from_sequence(sim_dir_list, partition_size=500).\
@@ -300,6 +309,11 @@ def combine_results(results_dir, skip_timeseries=False, aggregate_timeseries=Fal
                                          col.startswith('simulation_output_report')])
         sorted_cols = first_few_cols + build_existing_model_cols + simulation_output_cols
 
+        for reporting_measure in reporting_measures:
+            reporting_measure_cols = sorted([col for col in results_df.columns if
+                                            col.startswith(to_camelcase(reporting_measure))])
+            sorted_cols += reporting_measure_cols
+
         results_df = results_df.reindex(columns=sorted_cols, copy=False)
 
         # Save to CSV

diff --git a/buildstockbatch/schemas/v0.1.yaml b/buildstockbatch/schemas/v0.1.yaml
@@ -12,6 +12,7 @@ upgrades: list(include('upgrade-spec'), required=False)
 downselect: include('downselect-spec',required=False)
 postprocessing: include('postprocessing-spec', required=False)
 residential_simulation_controls: include('residential-simulation-spec', required=False)
+reporting_measures: list(required=False)
 schema_version: num(required=False)
 ---
 hpc-spec:

diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py
@@ -9,6 +9,7 @@
 import tempfile
 import yaml
 import shutil
+import glob
 
 from buildstockbatch.base import BuildStockBatchBase
 from buildstockbatch.postprocessing import write_dataframe_as_parquet
@@ -429,3 +430,39 @@ def test_skipping_baseline(basic_residential_project_file):
 
     up01_csv_gz = os.path.join(results_dir, 'results_csvs', 'results_up01.csv.gz')
     assert(os.path.exists(up01_csv_gz))
+
+
+def test_report_additional_results_csv_columns(basic_residential_project_file):
+    project_filename, results_dir = basic_residential_project_file({
+        'reporting_measures': [
+            'ReportingMeasure1',
+            'ReportingMeasure2'
+        ]
+    })
+
+    for filename in glob.glob(os.path.join(results_dir, 'simulation_output', 'up*', 'bldg*', 'run',
+                                           'data_point_out.json')):
+        with open(filename, 'r') as f:
+            dpout = json.load(f)
+        dpout['ReportingMeasure1'] = {'column_1': 1, 'column_2': 2}
+        dpout['ReportingMeasure2'] = {'column_3': 3, 'column_4': 4}
+        with open(filename, 'w') as f:
+            json.dump(dpout, f)
+
+    with patch.object(BuildStockBatchBase, 'weather_dir', None), \
+            patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
+            patch.object(BuildStockBatchBase, 'results_dir', results_dir):
+
+        bsb = BuildStockBatchBase(project_filename)
+        bsb.process_results()
+        get_dask_client_mock.assert_called_once()
+
+    up00_results_csv_path = os.path.join(results_dir, 'results_csvs', 'results_up00.csv.gz')
+    up00 = pd.read_csv(up00_results_csv_path)
+    assert 'reporting_measure1' in [col.split('.')[0] for col in up00.columns]
+    assert 'reporting_measure2' in [col.split('.')[0] for col in up00.columns]
+
+    up01_results_csv_path = os.path.join(results_dir, 'results_csvs', 'results_up01.csv.gz')
+    up01 = pd.read_csv(up01_results_csv_path)
+    assert 'reporting_measure1' in [col.split('.')[0] for col in up01.columns]
+    assert 'reporting_measure2' in [col.split('.')[0] for col in up01.columns]
diff --git a/buildstockbatch/workflow_generator/residential.py b/buildstockbatch/workflow_generator/residential.py
@@ -121,6 +121,14 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
                 'measure_dir_name': 'TimeseriesCSVExport',
                 'arguments': timeseries_csv_export_args
             }
-            osw['steps'].insert(-1, timeseries_measure)
+            osw['steps'].insert(-1, timeseries_measure)  # right before ServerDirectoryCleanup
+
+        if 'reporting_measures' in self.cfg:
+            for measure_dir_name in self.cfg['reporting_measures']:
+                reporting_measure = {
+                    'measure_dir_name': measure_dir_name,
+                    'arguments': {}
+                }
+                osw['steps'].insert(-1, reporting_measure)  # right before ServerDirectoryCleanup
 
         return osw
diff --git a/buildstockbatch/workflow_generator/test_workflow_generator.py b/buildstockbatch/workflow_generator/test_workflow_generator.py
@@ -141,3 +141,25 @@ def test_timeseries_csv_export():
     assert(args['output_variables'] == 'Zone Mean Air Temperature')
     for argname in ('include_enduse_subcategories',):
         assert(args[argname] == default_args[argname])
+
+
+def test_additional_reporting_measures():
+    sim_id = 'bldb1up1'
+    building_id = 1
+    upgrade_idx = None
+    cfg = {
+        'baseline': {
+            'n_datapoints': 10,
+            'n_buildings_represented': 100
+        },
+        'reporting_measures': [
+            'ReportingMeasure1',
+            'ReportingMeasure2'
+        ]
+    }
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
+    reporting_measure_1_step = osw['steps'][-3]
+    assert(reporting_measure_1_step['measure_dir_name'] == 'ReportingMeasure1')
+    reporting_measure_2_step = osw['steps'][-2]
+    assert(reporting_measure_2_step['measure_dir_name'] == 'ReportingMeasure2')
diff --git a/docs/project_defn.rst b/docs/project_defn.rst
@@ -66,7 +66,7 @@ Upgrade Scenarios
 ~~~~~~~~~~~~~~~~~
 
 Under the ``upgrades`` key is a list of upgrades to apply with the
-following properties;
+following properties:
 
 -  ``upgrade_name``: The name that will be in the outputs for this
    upgrade scenario.
@@ -102,6 +102,11 @@ in OpenStudio-BuildStock. Please refer to the measure arguments there to determi
 Note that this measure and arguments may be different depending on which version of OpenStudio-BuildStock you're using.
 The best thing you can do is to verify that it works with what is in your branch.
 
+Additional Reporting Measures
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Include the ``reporting_measures`` key along with a list of reporting measure names to apply additional reporting measures (that require no arguments) to the workflow.
+Any columns reported by these additional measures will be appended to the results csv.
+
 Output Directory
 ~~~~~~~~~~~~~~~~