From 40aa0ab4909bb3b212e221811fde5d9f6523a087 Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Mon, 4 Dec 2023 15:11:13 +0000 Subject: [PATCH 1/3] Move shared code into DockerBatchBase --- buildstockbatch/aws/aws.py | 123 +------------- buildstockbatch/cloud/docker_base.py | 151 ++++++++++++++++++ buildstockbatch/test/test_docker_base.py | 61 +++++++ .../resources/buildstock_good.csv | 12 +- .../resources/options_lookup.tsv | 66 ++++---- 5 files changed, 253 insertions(+), 160 deletions(-) diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py index 55f95a2c..83f6d8fe 100644 --- a/buildstockbatch/aws/aws.py +++ b/buildstockbatch/aws/aws.py @@ -14,8 +14,6 @@ import base64 import boto3 from botocore.exceptions import ClientError -import csv -from fsspec.implementations.local import LocalFileSystem import gzip from joblib import Parallel, delayed import json @@ -24,8 +22,6 @@ import pathlib import random from s3fs import S3FileSystem -import shutil -import subprocess import tarfile import tempfile import re @@ -33,7 +29,6 @@ import io import zipfile -from buildstockbatch import postprocessing from buildstockbatch.aws.awsbase import AwsJobBase from buildstockbatch.base import ValidationError from buildstockbatch.cloud.docker_base import DockerBatchBase @@ -1717,37 +1712,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): weather_dir = sim_dir / "weather" os.makedirs(weather_dir, exist_ok=True) - # Make a lookup of which parameter points to the weather file from options_lookup.tsv - with open(sim_dir / "lib" / "resources" / "options_lookup.tsv", "r", encoding="utf-8") as f: - tsv_reader = csv.reader(f, delimiter="\t") - next(tsv_reader) # skip headers - param_name = None - epws_by_option = {} - for row in tsv_reader: - row_has_epw = [x.endswith(".epw") for x in row[2:]] - if sum(row_has_epw): - if row[0] != param_name and param_name is not None: - raise RuntimeError( - "The epw files are specified in options_lookup.tsv under more than one parameter type: " - f"{param_name}, {row[0]}" - ) - epw_filename = row[row_has_epw.index(True) + 2].split("=")[1] - param_name = row[0] - option_name = row[1] - epws_by_option[option_name] = epw_filename - - # Look through the buildstock.csv to find the appropriate location and epw - epws_to_download = set() - building_ids = [x[0] for x in jobs_d["batch"]] - with open( - sim_dir / "lib" / "housing_characteristics" / "buildstock.csv", - "r", - encoding="utf-8", - ) as f: - csv_reader = csv.DictReader(f) - for row in csv_reader: - if int(row["Building"]) in building_ids: - epws_to_download.add(epws_by_option[row[param_name]]) + epws_to_download = cls.get_epws_to_download(sim_dir, jobs_d) # Download the epws needed for these simulations for epw_filename in epws_to_download: @@ -1757,92 +1722,8 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): with open(weather_dir / epw_filename, "wb") as f_out: logger.debug("Extracting {}".format(epw_filename)) f_out.write(gzip.decompress(f_gz.getvalue())) - asset_dirs = os.listdir(sim_dir) - - fs = S3FileSystem() - local_fs = LocalFileSystem() - reporting_measures = cls.get_reporting_measures(cfg) - dpouts = [] - simulation_output_tar_filename = sim_dir.parent / "simulation_outputs.tar.gz" - with tarfile.open(str(simulation_output_tar_filename), "w:gz") as simout_tar: - for building_id, upgrade_idx in jobs_d["batch"]: - upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 - sim_id = f"bldg{building_id:07d}up{upgrade_id:02d}" - - # Create OSW - osw = cls.create_osw(cfg, jobs_d["n_datapoints"], sim_id, building_id, upgrade_idx) - with open(os.path.join(sim_dir, "in.osw"), "w") as f: - json.dump(osw, f, indent=4) - - # Run Simulation - with open(sim_dir / "os_stdout.log", "w") as f_out: - try: - logger.debug("Running {}".format(sim_id)) - subprocess.run( - ["openstudio", "run", "-w", "in.osw"], - check=True, - stdout=f_out, - stderr=subprocess.STDOUT, - cwd=str(sim_dir), - ) - except subprocess.CalledProcessError: - logger.debug(f"Simulation failed: see {sim_id}/os_stdout.log") - - # Clean Up simulation directory - cls.cleanup_sim_dir( - sim_dir, - fs, - f"{bucket}/{prefix}/results/simulation_output/timeseries", - upgrade_id, - building_id, - ) - - # Read data_point_out.json - dpout = postprocessing.read_simulation_outputs( - local_fs, reporting_measures, str(sim_dir), upgrade_id, building_id - ) - dpouts.append(dpout) - - # Add the rest of the simulation outputs to the tar archive - logger.info("Archiving simulation outputs") - for dirpath, dirnames, filenames in os.walk(sim_dir): - if dirpath == str(sim_dir): - for dirname in set(dirnames).intersection(asset_dirs): - dirnames.remove(dirname) - for filename in filenames: - abspath = os.path.join(dirpath, filename) - relpath = os.path.relpath(abspath, sim_dir) - simout_tar.add(abspath, os.path.join(sim_id, relpath)) - - # Clear directory for next simulation - logger.debug("Clearing out simulation directory") - for item in set(os.listdir(sim_dir)).difference(asset_dirs): - if os.path.isdir(item): - shutil.rmtree(item) - elif os.path.isfile(item): - os.remove(item) - - # Upload simulation outputs tarfile to s3 - fs.put( - str(simulation_output_tar_filename), - f"{bucket}/{prefix}/results/simulation_output/simulations_job{job_id}.tar.gz", - ) - # Upload aggregated dpouts as a json file - with fs.open( - f"{bucket}/{prefix}/results/simulation_output/results_job{job_id}.json.gz", - "wb", - ) as f1: - with gzip.open(f1, "wt", encoding="utf-8") as f2: - json.dump(dpouts, f2) - - # Remove files (it helps docker if we don't leave a bunch of files laying around) - os.remove(simulation_output_tar_filename) - for item in os.listdir(sim_dir): - if os.path.isdir(item): - shutil.rmtree(item) - elif os.path.isfile(item): - os.remove(item) + cls.run_simulations(cfg, jobs_d, job_id, sim_dir, S3FileSystem(), bucket, prefix) @log_error_details() diff --git a/buildstockbatch/cloud/docker_base.py b/buildstockbatch/cloud/docker_base.py index 2551c45f..cf1117c4 100644 --- a/buildstockbatch/cloud/docker_base.py +++ b/buildstockbatch/cloud/docker_base.py @@ -9,7 +9,10 @@ :license: BSD-3 """ import collections +import csv import docker +from fsspec.implementations.local import LocalFileSystem +import gzip import itertools from joblib import Parallel, delayed import json @@ -19,9 +22,11 @@ import pathlib import random import shutil +import subprocess import tarfile import time +from buildstockbatch import postprocessing from buildstockbatch.base import BuildStockBatchBase from buildstockbatch.utils import ContainerRuntime, calc_hash_for_file, compress_file, read_csv @@ -183,3 +188,149 @@ def prep_batches(self, tmppath): os.makedirs(tmppath / "results" / "simulation_output") return (job_count, unique_epws) + + @classmethod + def get_epws_to_download(cls, sim_dir, jobs_d): + """ + Gets the list of filenames for the weather data required for a single batch of simulations. + + :param sim_dir: Path to the directory where job files are stored + :param jobs_d: Contents of a single job JSON file; contains the list of buildings to simulate in this job. + + :returns: Set of epw filenames needed for this batch of simulations. + """ + # Make a lookup of which parameter points to the weather file from options_lookup.tsv + with open(sim_dir / "lib" / "resources" / "options_lookup.tsv", "r", encoding="utf-8") as f: + tsv_reader = csv.reader(f, delimiter="\t") + next(tsv_reader) # skip headers + param_name = None + epws_by_option = {} + for row in tsv_reader: + row_has_epw = [x.endswith(".epw") for x in row[2:]] + if sum(row_has_epw): + if row[0] != param_name and param_name is not None: + raise RuntimeError( + "The epw files are specified in options_lookup.tsv under more than one parameter type: " + f"{param_name}, {row[0]}" + ) + epw_filename = row[row_has_epw.index(True) + 2].split("=")[1] + param_name = row[0] + option_name = row[1] + epws_by_option[option_name] = epw_filename + + # Look through the buildstock.csv to find the appropriate location and epw + epws_to_download = set() + building_ids = [x[0] for x in jobs_d["batch"]] + with open( + sim_dir / "lib" / "housing_characteristics" / "buildstock.csv", + "r", + encoding="utf-8", + ) as f: + csv_reader = csv.DictReader(f) + for row in csv_reader: + if int(row["Building"]) in building_ids: + epws_to_download.add(epws_by_option[row[param_name]]) + + return epws_to_download + + @classmethod + def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, bucket, prefix): + """ + Run one batch of simulations. + + Runs the simulations, writes outputs to the provided storage bucket, and cleans up intermediate files. + + :param cfg: Project config contents. + :param job_id: Index of this job. + :param jobs_d: Contents of a single job JSON file; contains the list of buildings to simulate in this job. + :param sim_dir: Path to the (local) directory where job files are stored. + :param fs: Filesystem to use when writing outputs to storage bucket + :param bucket: Name of the storage bucket to upload results to. + :param prefix: File prefix to use when writing to storage bucket. + """ + local_fs = LocalFileSystem() + reporting_measures = cls.get_reporting_measures(cfg) + dpouts = [] + simulation_output_tar_filename = sim_dir.parent / "simulation_outputs.tar.gz" + asset_dirs = os.listdir(sim_dir) + ts_output_dir = (f"{bucket}/{prefix}/results/simulation_output/timeseries",) + + with tarfile.open(str(simulation_output_tar_filename), "w:gz") as simout_tar: + for building_id, upgrade_idx in jobs_d["batch"]: + upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 + sim_id = f"bldg{building_id:07d}up{upgrade_id:02d}" + + # Create OSW + osw = cls.create_osw(cfg, jobs_d["n_datapoints"], sim_id, building_id, upgrade_idx) + with open(os.path.join(sim_dir, "in.osw"), "w") as f: + json.dump(osw, f, indent=4) + + # Run Simulation + with open(sim_dir / "os_stdout.log", "w") as f_out: + try: + logger.debug("Running {}".format(sim_id)) + subprocess.run( + ["openstudio", "run", "-w", "in.osw"], + check=True, + stdout=f_out, + stderr=subprocess.STDOUT, + cwd=str(sim_dir), + ) + except subprocess.CalledProcessError: + logger.debug(f"Simulation failed: see {sim_id}/os_stdout.log") + + # Clean Up simulation directory + cls.cleanup_sim_dir( + sim_dir, + fs, + ts_output_dir, + upgrade_id, + building_id, + ) + + # Read data_point_out.json + dpout = postprocessing.read_simulation_outputs( + local_fs, reporting_measures, str(sim_dir), upgrade_id, building_id + ) + dpouts.append(dpout) + + # Add the rest of the simulation outputs to the tar archive + logger.info("Archiving simulation outputs") + for dirpath, dirnames, filenames in os.walk(sim_dir): + if dirpath == str(sim_dir): + for dirname in set(dirnames).intersection(asset_dirs): + dirnames.remove(dirname) + for filename in filenames: + abspath = os.path.join(dirpath, filename) + relpath = os.path.relpath(abspath, sim_dir) + simout_tar.add(abspath, os.path.join(sim_id, relpath)) + + # Clear directory for next simulation + logger.debug("Clearing out simulation directory") + for item in set(os.listdir(sim_dir)).difference(asset_dirs): + if os.path.isdir(item): + shutil.rmtree(item) + elif os.path.isfile(item): + os.remove(item) + + # Upload simulation outputs tarfile to s3 + fs.put( + str(simulation_output_tar_filename), + f"{bucket}/{prefix}/results/simulation_output/simulations_job{job_id}.tar.gz", + ) + + # Upload aggregated dpouts as a json file + with fs.open( + f"{bucket}/{prefix}/results/simulation_output/results_job{job_id}.json.gz", + "wb", + ) as f1: + with gzip.open(f1, "wt", encoding="utf-8") as f2: + json.dump(dpouts, f2) + + # Remove files (it helps docker if we don't leave a bunch of files laying around) + os.remove(simulation_output_tar_filename) + for item in os.listdir(sim_dir): + if os.path.isdir(item): + shutil.rmtree(item) + elif os.path.isfile(item): + os.remove(item) diff --git a/buildstockbatch/test/test_docker_base.py b/buildstockbatch/test/test_docker_base.py index be1a394c..549f5892 100644 --- a/buildstockbatch/test/test_docker_base.py +++ b/buildstockbatch/test/test_docker_base.py @@ -1,13 +1,16 @@ """Tests for the DockerBatchBase class.""" +from fsspec.implementations.local import LocalFileSystem import json import os import pathlib +import shutil import tarfile import tempfile from unittest.mock import MagicMock, PropertyMock from buildstockbatch.cloud.docker_base import DockerBatchBase from buildstockbatch.test.shared_testing_stuff import docker_available +from buildstockbatch.utils import get_project_configuration here = os.path.dirname(os.path.abspath(__file__)) resources_dir = os.path.join(here, "test_inputs", "test_openstudio_buildstock", "resources") @@ -66,3 +69,61 @@ def test_prep_batches(basic_residential_project_file, mocker): assert [building, None] in simulations # Building with upgrade 0 assert [building, 0] in simulations + + +def test_get_epws_to_download(): + resources_dir_path = pathlib.Path(resources_dir) + options_file = resources_dir_path / "options_lookup.tsv" + buildstock_file = resources_dir_path / "buildstock_good.csv" + + with tempfile.TemporaryDirectory(prefix="bsb_") as sim_dir_str: + sim_dir = pathlib.Path(sim_dir_str) + os.makedirs(sim_dir / "lib" / "resources") + os.makedirs(sim_dir / "lib" / "housing_characteristics") + shutil.copy(options_file, sim_dir / "lib" / "resources") + shutil.copy(buildstock_file, sim_dir / "lib" / "housing_characteristics" / "buildstock.csv") + + jobs_d = { + "job_num": 0, + "n_datapoints": 10, + "batch": [ + [1, None], + [5, None], + ], + } + + epws = DockerBatchBase.get_epws_to_download(sim_dir, jobs_d) + assert epws == set(["weather/G0100970.epw", "weather/G0100830.epw"]) + + +def test_run_simulations(basic_residential_project_file): + jobs_d = { + "job_num": 0, + "n_datapoints": 10, + "batch": [ + [1, None], + [5, None], + ], + } + fs = LocalFileSystem() + project_filename, results_dir = basic_residential_project_file() + cfg = get_project_configuration(project_filename) + + with tempfile.TemporaryDirectory(prefix="bsb_") as temp_dir_str: + temp_path = pathlib.Path(temp_dir_str) + sim_dir = temp_path / "simdata" / "openstudio" + os.makedirs(sim_dir) + # sim_dir is also the working directory (defined in the nrel/openstudio + # Dockerfile), which some file operations depend on. + old_cwd = os.getcwd() + os.chdir(sim_dir) + bucket = temp_path / "bucket" + os.makedirs(bucket / "test_prefix" / "results" / "simulation_output") + + DockerBatchBase.run_simulations(cfg, 0, jobs_d, sim_dir, fs, bucket, "test_prefix") + + output_dir = bucket / "test_prefix" / "results" / "simulation_output" + assert sorted(os.listdir(output_dir)) == ["results_job0.json.gz", "simulations_job0.tar.gz"] + # Check that files were cleaned up correctly + assert not os.listdir(sim_dir) + os.chdir(old_cwd) diff --git a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv index 1991e198..d69de02c 100644 --- a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv +++ b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv @@ -1,6 +1,6 @@ -Building,Bedroom,Location,Vintage,State,Insulation Wall,Insulation Slab,Zipcode -1,1,AL_Mobile-Rgnl.AP.722230,<1950,CO,Good Option,None,36608 -2,3,AL_Mobile-Rgnl.AP.722230,1940s,CO,Good Option,None,36601 -3,2,AL_Mobile-Rgnl.AP.722230,2010s,VA,Good Option,None,36602 -4,1,AL_Mobile-Rgnl.AP.722230,2000s,VA,Good Option,None,36603 -5,2,AL_Mobile-Rgnl.AP.722230,1970s,VA,Good Option,None,36604 \ No newline at end of file +Building,Bedroom,Location,Vintage,State,Insulation Wall,Insulation Slab,Zipcode,County +1,1,AL_Mobile-Rgnl.AP.722230,<1950,CO,Good Option,None,36608,County1 +2,3,AL_Mobile-Rgnl.AP.722230,1940s,CO,Good Option,None,36601,County1 +3,2,AL_Mobile-Rgnl.AP.722230,2010s,VA,Good Option,None,36602,County1 +4,1,AL_Mobile-Rgnl.AP.722230,2000s,VA,Good Option,None,36603,County2 +5,2,AL_Mobile-Rgnl.AP.722230,1970s,VA,Good Option,None,36604,County2 diff --git a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv index a8bddc09..73032a93 100644 --- a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv +++ b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv @@ -1,34 +1,34 @@ -Parameter Name Option Name Measure Dir Measure Arg 1 Measure Arg 2 ... -Location AL_Birmingham.Muni.AP.722280 -Location AL_Huntsville.Intl.AP-Jones.Field.723230 -Location AL_Mobile-Rgnl.AP.722230 -Vintage <1940 -Vintage 1940s -Vintage <1950 -Vintage 1950s -Vintage 1960s -Vintage 1970s -Vintage 1980s -Vintage 1990s -Vintage 2000s -Vintage 2010s -State VA -State CO -County County1 -County County2 -Bedroom 1 -Bedroom 2 -Bedroom 3 -Zipcode * -Insulation Slab None -Insulation Slab Good Option ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=0 gap_r=0 exterior_r=0 exterior_depth=0 -Insulation Slab Missing Argument ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=10 gap_r=5 exterior_r=0 -Insulation Unfinished Basement None -Insulation Unfinished Basement Good Option ResidentialConstructionsUnfinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 ceiling_cavity_r=0 ceiling_install_grade=1 ceiling_framing_factor=0.13 ceiling_joist_height_in=9.25 slab_whole_r=0 +Parameter Name Option Name Measure Dir Measure Arg 1 Measure Arg 2 ... +Location AL_Birmingham.Muni.AP.722280 +Location AL_Huntsville.Intl.AP-Jones.Field.723230 +Location AL_Mobile-Rgnl.AP.722230 +Vintage <1940 +Vintage 1940s +Vintage <1950 +Vintage 1950s +Vintage 1960s +Vintage 1970s +Vintage 1980s +Vintage 1990s +Vintage 2000s +Vintage 2010s +State VA +State CO +County County1 weather_station_epw_filepath=weather/G0100970.epw +County County2 weather_station_epw_filepath=weather/G0100830.epw +Bedroom 1 +Bedroom 2 +Bedroom 3 +Zipcode * +Insulation Slab None +Insulation Slab Good Option ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=0 gap_r=0 exterior_r=0 exterior_depth=0 +Insulation Slab Missing Argument ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=10 gap_r=5 exterior_r=0 +Insulation Unfinished Basement None +Insulation Unfinished Basement Good Option ResidentialConstructionsUnfinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 ceiling_cavity_r=0 ceiling_install_grade=1 ceiling_framing_factor=0.13 ceiling_joist_height_in=9.25 slab_whole_r=0 Insulation Unfinished Basement Extra Argument ResidentialConstructionsUnfinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 ceiling_cavity_r=13 ceiling_install_grade=1 ceiling_framing_factor=0.13 ceiling_joist_height_in=9.25 slab_whole_r=0 extra_arg=1 -Insulation Finished Basement None -Insulation Finished Basement Good Option ResidentialConstructionsFinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 slab_whole_r=0 -Insulation Finished Basement Bad Value ResidentialConstructionsFinishedBasement wall_ins_height=4 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=1.5 wall_framing_factor=0 wall_rigid_r=5 wall_drywall_thick_in=0.5 slab_whole_r=0 -Insulation Wall Good Option ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" -Insulation Wall Missing Measure ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" - ResidentialMissingMeasure +Insulation Finished Basement None +Insulation Finished Basement Good Option ResidentialConstructionsFinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 slab_whole_r=0 +Insulation Finished Basement Bad Value ResidentialConstructionsFinishedBasement wall_ins_height=4 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=1.5 wall_framing_factor=0 wall_rigid_r=5 wall_drywall_thick_in=0.5 slab_whole_r=0 +Insulation Wall Good Option ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" +Insulation Wall Missing Measure ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" + ResidentialMissingMeasure From bc66dd56de50dcb1d5698cace67ef6942eecb92c Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Mon, 4 Dec 2023 17:53:22 +0000 Subject: [PATCH 2/3] Add changelog entry --- docs/changelog/changelog_dev.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index 75eeab3d..988c51e2 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -14,3 +14,9 @@ Development Changelog This is an example change. Please copy and paste it - for valid tags please refer to ``conf.py`` in the docs directory. ``pullreq`` should be set to the appropriate pull request number and ``tickets`` to any related github issues. These will be automatically linked in the documentation. + + .. change:: + :tags: general + :pullreq: 422 + + Refactor AWS code so it can be shared by the upcoming GCP implementation. From d826e518d466722b170abded6a00e3e08bfcfe44 Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Tue, 5 Dec 2023 16:16:40 +0000 Subject: [PATCH 3/3] Replace bucket and prefix params with a single path --- buildstockbatch/cloud/docker_base.py | 11 +++++------ buildstockbatch/test/test_docker_base.py | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/buildstockbatch/cloud/docker_base.py b/buildstockbatch/cloud/docker_base.py index cf1117c4..5cdd0117 100644 --- a/buildstockbatch/cloud/docker_base.py +++ b/buildstockbatch/cloud/docker_base.py @@ -234,7 +234,7 @@ def get_epws_to_download(cls, sim_dir, jobs_d): return epws_to_download @classmethod - def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, bucket, prefix): + def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, output_path): """ Run one batch of simulations. @@ -245,15 +245,14 @@ def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, bucket, prefix): :param jobs_d: Contents of a single job JSON file; contains the list of buildings to simulate in this job. :param sim_dir: Path to the (local) directory where job files are stored. :param fs: Filesystem to use when writing outputs to storage bucket - :param bucket: Name of the storage bucket to upload results to. - :param prefix: File prefix to use when writing to storage bucket. + :param output_path: File path (typically `bucket/prefix`) to write outputs to. """ local_fs = LocalFileSystem() reporting_measures = cls.get_reporting_measures(cfg) dpouts = [] simulation_output_tar_filename = sim_dir.parent / "simulation_outputs.tar.gz" asset_dirs = os.listdir(sim_dir) - ts_output_dir = (f"{bucket}/{prefix}/results/simulation_output/timeseries",) + ts_output_dir = (f"{output_path}/results/simulation_output/timeseries",) with tarfile.open(str(simulation_output_tar_filename), "w:gz") as simout_tar: for building_id, upgrade_idx in jobs_d["batch"]: @@ -316,12 +315,12 @@ def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, bucket, prefix): # Upload simulation outputs tarfile to s3 fs.put( str(simulation_output_tar_filename), - f"{bucket}/{prefix}/results/simulation_output/simulations_job{job_id}.tar.gz", + f"{output_path}/results/simulation_output/simulations_job{job_id}.tar.gz", ) # Upload aggregated dpouts as a json file with fs.open( - f"{bucket}/{prefix}/results/simulation_output/results_job{job_id}.json.gz", + f"{output_path}/results/simulation_output/results_job{job_id}.json.gz", "wb", ) as f1: with gzip.open(f1, "wt", encoding="utf-8") as f2: diff --git a/buildstockbatch/test/test_docker_base.py b/buildstockbatch/test/test_docker_base.py index 549f5892..6164c8c9 100644 --- a/buildstockbatch/test/test_docker_base.py +++ b/buildstockbatch/test/test_docker_base.py @@ -120,7 +120,7 @@ def test_run_simulations(basic_residential_project_file): bucket = temp_path / "bucket" os.makedirs(bucket / "test_prefix" / "results" / "simulation_output") - DockerBatchBase.run_simulations(cfg, 0, jobs_d, sim_dir, fs, bucket, "test_prefix") + DockerBatchBase.run_simulations(cfg, 0, jobs_d, sim_dir, fs, f"{bucket}/test_prefix") output_dir = bucket / "test_prefix" / "results" / "simulation_output" assert sorted(os.listdir(output_dir)) == ["results_job0.json.gz", "simulations_job0.tar.gz"]