Skip to content

Commit

Permalink
Merge pull request #65 from NREL/rHorsey/enable-com
Browse files Browse the repository at this point in the history
ComStock!
  • Loading branch information
nmerket committed May 20, 2020
2 parents 2124a82 + 37b56fd commit 9588244
Show file tree
Hide file tree
Showing 49 changed files with 1,174 additions and 266 deletions.
1 change: 1 addition & 0 deletions .circleci/config.yml
Expand Up @@ -4,6 +4,7 @@ jobs:
docker:
- image: continuumio/miniconda3
steps:
- setup_remote_docker
- checkout
- run:
name: Install buildstock
Expand Down
3 changes: 2 additions & 1 deletion .github/pull_request_template.md
Expand Up @@ -12,5 +12,6 @@ Not all may apply
- [ ] Tests exercising your feature/bug fix (check coverage report on CircleCI build -> Artifacts)
- [ ] All other unit tests passing
- [ ] Update validation for project config yaml file changes
- [ ] Update documentation
- [ ] Update existing documentation
- [ ] Run a small batch run to make sure it all works (local is fine, unless an Eagle specific feature)
- [ ] Add to the changelog_dev.rst file and propose migration text in the pull request
4 changes: 2 additions & 2 deletions buildstockbatch/__version__.py
@@ -1,8 +1,8 @@
__title__ = 'buildstock-batch'
__description__ = 'Executing BuildStock projects on batch infrastructure.'
__url__ = 'http://github.com/NREL/buildstockbatch'
__version__ = '0.17.1'
__schema_version__ = '0.1'
__version__ = '0.18'
__schema_version__ = '0.2'
__author__ = 'Noel Merket'
__author_email__ = 'noel.merket@nrel.gov'
__license__ = 'BSD-3'
Expand Down
10 changes: 5 additions & 5 deletions buildstockbatch/aws/aws.py
Expand Up @@ -1745,8 +1745,8 @@ def validate_project(project_file):
super(AwsBatch, AwsBatch).validate_project(project_file)
AwsBatch.validate_instance_types(project_file)

@classmethod
def docker_image(cls):
@property
def docker_image(self):
return 'nrel/buildstockbatch'

@property
Expand All @@ -1755,7 +1755,7 @@ def weather_dir(self):

@property
def container_repo(self):
repo_name = self.docker_image()
repo_name = self.docker_image
repos = self.ecr.describe_repositories()
repo = None
for repo in repos['repositories']:
Expand All @@ -1776,7 +1776,7 @@ def build_image(self):
logger.debug('Building docker image')
self.docker_client.images.build(
path=str(root_path),
tag=self.docker_image(),
tag=self.docker_image,
rm=True
)

Expand All @@ -1795,7 +1795,7 @@ def push_image(self):
registry=registry_url
)
logger.debug(resp)
image = self.docker_client.images.get(self.docker_image())
image = self.docker_client.images.get(self.docker_image)
image.tag(repo_url, tag=self.job_identifier)
last_status = None
for x in self.docker_client.images.push(repo_url, tag=self.job_identifier, stream=True):
Expand Down
140 changes: 69 additions & 71 deletions buildstockbatch/base.py
Expand Up @@ -50,8 +50,8 @@ class ValidationError(Exception):

class BuildStockBatchBase(object):

OS_VERSION = '2.9.1'
OS_SHA = '3472e8b799'
DEFAULT_OS_VERSION = '2.9.1'
DEFAULT_OS_SHA = '3472e8b799'
LOGO = '''
_ __ _ __, _ __
( / ) o // /( _/_ / ( / ) _/_ /
Expand All @@ -63,39 +63,40 @@ class BuildStockBatchBase(object):

def __init__(self, project_filename):
self.project_filename = os.path.abspath(project_filename)
with open(self.project_filename, 'r') as f:
self.cfg = yaml.load(f, Loader=yaml.SafeLoader)
if 'stock_type' not in self.cfg.keys():
raise KeyError('Key `stock_type` not specified in project file `{}`'.format(project_filename))
elif (self.stock_type != 'residential') & (self.stock_type != 'commercial'):
raise KeyError('Key `{}` for value `stock_type` not recognized in `{}`'.format(self.cfg['stock_type'],
project_filename))
if 'buildstock_csv' in self.cfg['baseline']:
buildstock_csv = self.path_rel_to_projectfile(self.cfg['baseline']['buildstock_csv'])
if not os.path.exists(buildstock_csv):
raise FileNotFoundError('The buildstock.csv file does not exist at {}'.format(buildstock_csv))
df = pd.read_csv(buildstock_csv)
n_datapoints = self.cfg['baseline'].get('n_datapoints', df.shape[0])
self.cfg['baseline']['n_datapoints'] = n_datapoints
if n_datapoints != df.shape[0]:
raise RuntimeError(
'A buildstock_csv was provided, so n_datapoints for sampling should not be provided or should be '
'equal to the number of rows in the buildstock.csv file. Remove or comment out '
'baseline->n_datapoints from your project file.'
)

# Load project file to self.cfg
self.cfg = self.get_project_configuration(project_filename)

self.buildstock_dir = self.cfg['buildstock_directory']
if not os.path.isdir(self.buildstock_dir):
raise FileNotFoundError(f'buildstock_directory = {self.buildstock_dir} is not a directory.')
self.project_dir = os.path.join(self.buildstock_dir, self.cfg['project_directory'])
if not os.path.isdir(self.project_dir):
raise FileNotFoundError(f'project_directory = {self.project_dir} is not a directory.')

# To be set in subclasses
self.sampler = None

def path_rel_to_projectfile(self, x):
# Load in OS_VERSION and OS_SHA arguments if they exist in the YAML,
# otherwise use defaults specified here.
self.os_version = self.cfg.get('os_version', self.DEFAULT_OS_VERSION)
self.os_sha = self.cfg.get('os_sha', self.DEFAULT_OS_SHA)
logger.debug(f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}")

@staticmethod
def path_rel_to_file(startfile, x):
if os.path.isabs(x):
return os.path.abspath(x)
else:
return os.path.abspath(os.path.join(os.path.dirname(self.project_filename), x))
return os.path.abspath(os.path.join(os.path.dirname(startfile), x))

def path_rel_to_projectfile(self, x):
return self.path_rel_to_file(self.project_filename, x)

def _get_weather_files(self):
if 'weather_files_path' in self.cfg:
logger.debug('Copying weather files')
weather_file_path = self.path_rel_to_projectfile(self.cfg['weather_files_path'])
weather_file_path = self.cfg['weather_files_path']
with zipfile.ZipFile(weather_file_path, 'r') as zf:
logger.debug('Extracting weather files to: {}'.format(self.weather_dir))
zf.extractall(self.weather_dir)
Expand All @@ -119,22 +120,6 @@ def stock_type(self):
def weather_dir(self):
raise NotImplementedError

@property
def buildstock_dir(self):
d = self.path_rel_to_projectfile(self.cfg['buildstock_directory'])
# logger.debug('buildstock_dir = {}'.format(d))
assert(os.path.isdir(d))
return d

@property
def project_dir(self):
d = os.path.abspath(
os.path.join(self.buildstock_dir, self.cfg['project_directory'])
)
# logger.debug('project_dir = {}'.format(d))
assert(os.path.isdir(d))
return d

@property
def results_dir(self):
raise NotImplementedError
Expand All @@ -151,25 +136,7 @@ def skip_baseline_sims(self):
def run_sampling(self, n_datapoints=None):
if n_datapoints is None:
n_datapoints = self.cfg['baseline']['n_datapoints']
if 'buildstock_csv' in self.cfg['baseline']:
logger.debug("Reusing the buildstock_csv")
buildstock_csv = self.path_rel_to_projectfile(self.cfg['baseline']['buildstock_csv'])
destination_filename = self.sampler.csv_path
if destination_filename != buildstock_csv:
if os.path.exists(destination_filename):
logger.info("Removing {!r} before copying {!r} to that location."
.format(destination_filename, buildstock_csv))
os.remove(destination_filename)
shutil.copy(
buildstock_csv,
destination_filename
)
return destination_filename
else:
logger.debug("Running fresh sampling")
buildstock_csv_filename = self.sampler.run_sampling(n_datapoints)
logger.debug("Sampling completed")
return buildstock_csv_filename
return self.sampler.run_sampling(n_datapoints)

def run_batch(self):
raise NotImplementedError
Expand Down Expand Up @@ -279,7 +246,7 @@ def cleanup_sim_dir(sim_dir, dest_fs, simout_ts_dir, upgrade_id, building_id):
# and copy it to the results directory
timeseries_filepath = os.path.join(sim_dir, 'run', 'enduse_timeseries.csv')
if os.path.isfile(timeseries_filepath):
tsdf = pd.read_csv(timeseries_filepath, parse_dates=['Time'])
tsdf = pd.read_csv(timeseries_filepath, parse_dates=[0])
postprocessing.write_dataframe_as_parquet(
tsdf,
dest_fs,
Expand All @@ -304,22 +271,33 @@ def cleanup_sim_dir(sim_dir, dest_fs, simout_ts_dir, upgrade_id, building_id):
def validate_project(project_file):
assert(BuildStockBatchBase.validate_project_schema(project_file))
assert(BuildStockBatchBase.validate_misc_constraints(project_file))
assert(BuildStockBatchBase.validate_xor_schema_keys(project_file))
assert(BuildStockBatchBase.validate_xor_nor_schema_keys(project_file))
assert(BuildStockBatchBase.validate_precomputed_sample(project_file))
assert(BuildStockBatchBase.validate_reference_scenario(project_file))
assert(BuildStockBatchBase.validate_measures_and_arguments(project_file))
assert(BuildStockBatchBase.validate_options_lookup(project_file))
assert(BuildStockBatchBase.validate_measure_references(project_file))
assert(BuildStockBatchBase.validate_options_lookup(project_file))
logger.info('Base Validation Successful')
return True

@staticmethod
def get_project_configuration(project_file):
@classmethod
def get_project_configuration(cls, project_file):
try:
with open(project_file) as f:
cfg = yaml.load(f, Loader=yaml.SafeLoader)
except FileNotFoundError as err:
logger.error('Failed to load input yaml for validation')
raise err

# Set absolute paths
cfg['buildstock_directory'] = cls.path_rel_to_file(project_file, cfg['buildstock_directory'])
if 'precomputed_sample' in cfg.get('baseline', {}):
cfg['baseline']['precomputed_sample'] = \
cls.path_rel_to_file(project_file, cfg['baseline']['precomputed_sample'])
if 'weather_files_path' in cfg:
cfg['weather_files_path'] = cls.path_rel_to_file(project_file, cfg['weather_files_path'])

return cfg

@staticmethod
Expand All @@ -346,28 +324,48 @@ def validate_project_schema(project_file):
def validate_misc_constraints(project_file):
# validate other miscellaneous constraints
cfg = BuildStockBatchBase.get_project_configuration(project_file)
if 'buildstock_csv' in cfg['baseline']:
if 'precomputed_sample' in cfg['baseline']:
if cfg.get('downselect', {'resample': False}).get('resample', True):
raise ValidationError("Downselect with resampling cannot be used when using buildstock_csv. \n"
"Please set resample: False in downselect, or do not use buildstock_csv.")
raise ValidationError("Downselect with resampling cannot be used when using precomputed buildstock_csv."
"\nPlease set resample: False in downselect or use a different sampler.")

if cfg.get('postprocessing', {}).get('aggregate_timeseries', False):
logger.warning('aggregate_timeseries has been deprecated and will be removed in a future version.')

return True

@staticmethod
def validate_xor_schema_keys(project_file):
def validate_xor_nor_schema_keys(project_file):
cfg = BuildStockBatchBase.get_project_configuration(project_file)
major, minor = cfg.get('version', __schema_version__).split('.')
if int(major) >= 0:
if int(minor) >= 0:
# xor
if ('weather_files_url' in cfg.keys()) is \
('weather_files_path' in cfg.keys()):
raise ValidationError('Both/neither weather_files_url and weather_files_path found in yaml root')
if ('n_datapoints' in cfg['baseline'].keys()) is \
('buildstock_csv' in cfg['baseline'].keys()):
raise ValidationError('Both/neither n_datapoints and buildstock_csv found in yaml baseline key')

# No precomputed sample key unless using precomputed sampling
if cfg['baseline']['sampling_algorithm'] != 'precomputed' and 'precomputed_sample' in cfg['baseline']:
raise ValidationError(
'baseline.precomputed_sample is not allowed unless '
'baseline.sampling_algorithm = "precomputed".'
)
return True

@staticmethod
def validate_precomputed_sample(project_file):
cfg = BuildStockBatchBase.get_project_configuration(project_file)
if 'precomputed_sample' in cfg['baseline']:
buildstock_csv = cfg['baseline']['precomputed_sample']
if not os.path.exists(buildstock_csv):
raise FileNotFoundError(buildstock_csv)
buildstock_df = pd.read_csv(buildstock_csv)
if buildstock_df.shape[0] != cfg['baseline']['n_datapoints']:
raise RuntimeError(
f'`n_datapoints` does not match the number of rows in {buildstock_csv}. '
f'Please set `n_datapoints` to {buildstock_df.shape[0]}'
)
return True

@staticmethod
Expand Down

0 comments on commit 9588244

Please sign in to comment.