diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 9d185875..284ce3f1 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -201,7 +201,7 @@ def clean_up_results_df(df, cfg, keep_upgrade_id=False): def get_cols(fs, filename): with fs.open(filename, 'rb') as f: schema = parquet.read_schema(f) - return schema.names + return set(schema.names) def read_results_json(fs, filename): @@ -271,7 +271,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): logger.info("Collecting all the columns in timeseries parquet files.") ts_filenames = fs.glob(f'{ts_in_dir}/up*/bldg*.parquet') all_ts_cols = db.from_sequence(ts_filenames, partition_size=100).map(partial(get_cols, fs)).\ - fold(lambda x, y: set(x).union(y)).compute() + fold(lambda x, y: x.union(y)).compute() # Sort the columns all_ts_cols_sorted = ['building_id'] + sorted(x for x in all_ts_cols if x.startswith('time')) diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index 17304cd0..7d0ddd74 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -65,3 +65,17 @@ Development Changelog :tickets: For ResStock the OpenStudio version has changed to v3.3.0. + + .. change:: + :tags: bugfix + :pullreq: 258, 262 + :tickets: 253 + + Fixes an issue that caused out of memory error when postprocessing large run with many upgrades. + + .. change:: + :tags: bugfix + :pullreq: 263 + :tickets: 261 + + Fixes a bug that caused postprocessing to crash when there is only one datapoint. \ No newline at end of file