NREL · nmerket · May 20, 2020 · Jun 24, 2019 · Jun 24, 2019 · Jun 24, 2019
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
@@ -61,30 +61,15 @@ def __init__(self, project_filename):
         elif (self.stock_type != 'residential') & (self.stock_type != 'commercial'):
             raise KeyError('Key `{}` for value `stock_type` not recognized in `{}`'.format(self.cfg['stock_type'],
                                                                                            project_filename))
+        self.sampler = None
         self._weather_dir = None
         # Call property to create directory and copy weather files there
         _ = self.weather_dir  # noqa: F841
-
-        if 'buildstock_csv' in self.cfg['baseline']:
-            buildstock_csv = self.path_rel_to_projectfile(self.cfg['baseline']['buildstock_csv'])
-            if not os.path.exists(buildstock_csv):
-                raise FileNotFoundError('The buildstock.csv file does not exist at {}'.format(buildstock_csv))
-            df = pd.read_csv(buildstock_csv)
-            n_datapoints = self.cfg['baseline'].get('n_datapoints', df.shape[0])
-            self.cfg['baseline']['n_datapoints'] = n_datapoints
-            if n_datapoints != df.shape[0]:
-                raise RuntimeError(
-                    'A buildstock_csv was provided, so n_datapoints for sampling should not be provided or should be '
-                    'equal to the number of rows in the buildstock.csv file. Remove or comment out '
-                    'baseline->n_datapoints from your project file.'
-                )
-            if 'downselect' in self.cfg:
-                raise RuntimeError(
-                    'A buildstock_csv was provided, which isn\'t compatible with downselecting.'
-                    'Remove or comment out the downselect key from your project file.'
-                )
-
-        self.sampler = None
+        # Load in overriding OS_VERSION and OS_SHA arguments if they exist in the YAML
+        if 'os_version' in self.cfg.keys():
+            self.OS_VERSION = self.cfg['os_version']
+        if 'os_sha' in self.cfg.keys():
+            self.OS_SHA = self.cfg['os_sha']
 
     def path_rel_to_projectfile(self, x):
         if os.path.isabs(x):
@@ -155,23 +140,7 @@ def skip_baseline_sims(self):
         return baseline_skip
 
     def run_sampling(self, n_datapoints=None):
-        if n_datapoints is None:
-            n_datapoints = self.cfg['baseline']['n_datapoints']
-        if 'buildstock_csv' in self.cfg['baseline']:
-            buildstock_csv = self.path_rel_to_projectfile(self.cfg['baseline']['buildstock_csv'])
-            destination_filename = self.sampler.csv_path
-            if destination_filename != buildstock_csv:
-                if os.path.exists(destination_filename):
-                    logger.info("Removing {!r} before copying {!r} to that location."
-                                .format(destination_filename, buildstock_csv))
-                    os.remove(destination_filename)
-                shutil.copy(
-                    buildstock_csv,
-                    destination_filename
-                )
-            return destination_filename
-        else:
-            return self.sampler.run_sampling(n_datapoints)
+        return self.sampler.run_sampling(n_datapoints)
 
     def run_batch(self):
         raise NotImplementedError
@@ -292,6 +261,7 @@ def validate_project(project_file):
         assert(BuildStockBatchBase.validate_options_lookup(project_file))
         assert(BuildStockBatchBase.validate_measure_references(project_file))
         assert(BuildStockBatchBase.validate_reference_scenario(project_file))
+        #assert(BuildStockBatchBase.validate_options_lookup(project_file))
         logger.info('Base Validation Successful')
         return True
 

diff --git a/buildstockbatch/hpc.py b/buildstockbatch/hpc.py
@@ -26,7 +26,7 @@
 import time
 
 from .base import BuildStockBatchBase, SimulationExists
-from .sampler import ResidentialSingularitySampler, CommercialSobolSampler
+from .sampler import ResidentialSingularitySampler, CommercialSobolSingularitySampler, PrecomputedSingularitySampler
 
 logger = logging_.getLogger(__name__)
 
@@ -59,7 +59,15 @@ def __init__(self, project_filename):
         elif self.stock_type == 'commercial':
             sampling_algorithm = self.cfg['baseline'].get('sampling_algorithm', 'sobol')
             if sampling_algorithm == 'sobol':
-                self.sampler = CommercialSobolSampler(
+                self.sampler = CommercialSobolSingularitySampler(
+                    self.output_dir,
+                    self.cfg,
+                    self.buildstock_dir,
+                    self.project_dir
+                )
+            elif sampling_algorithm == 'precomputed':
+                print('calling precomputed sampler')
+                self.sampler = PrecomputedSingularitySampler(
                     self.output_dir,
                     self.cfg,
                     self.buildstock_dir,
@@ -83,17 +91,33 @@ def singularity_image_url(cls):
 
     @property
     def singularity_image(self):
+        # Check the project yaml specification - if the file does not exist do not silently allow for non-specified simg
+        if 'sys_image_dir' in self.cfg.keys():
+            sys_image_dir = self.cfg['sys_image_dir']
+            sys_image = os.path.join(sys_image_dir, 'OpenStudio-{ver}.{sha}-Singularity.simg'.format(
+                ver=self.OS_VERSION,
+                sha=self.OS_SHA
+            ))
+            if os.path.isfile(sys_image):
+                return sys_image
+            else:
+                raise RuntimeError('Unable to find singularity image specified in project file: `{}`'.format(sys_image))
+        # Use the expected HPC environment default if not explicitly defined in the YAML
         sys_image = os.path.join(self.sys_image_dir, 'OpenStudio-{ver}.{sha}-Singularity.simg'.format(
             ver=self.OS_VERSION,
             sha=self.OS_SHA
         ))
         if os.path.isfile(sys_image):
             return sys_image
+        # Download the appropriate singularity image for the defined OS_VERSION and OS_SHA
         else:
             singularity_image_path = os.path.join(self.output_dir, 'openstudio.simg')
             if not os.path.isfile(singularity_image_path):
                 logger.debug('Downloading singularity image')
                 r = requests.get(self.singularity_image_url(), stream=True)
+                if r.status_code != requests.codes.ok:
+                    logger.error('Unable to download simg file from OpenStudio releases S3 bucket.')
+                    r.raise_for_status()
                 with open(singularity_image_path, 'wb') as f:
                     for chunk in r.iter_content(chunk_size=1024):
                         if chunk:
@@ -139,8 +163,19 @@ def run_batch(self):
         else:
             # otherwise just the plain sampling process needs to be run
             buildstock_csv_filename = self.run_sampling()
-
-        # read the results
+        # If the results directory already exists, implying the existence of results, require a user defined override
+        # in the YAML file to allow for those results to be overwritten. Note that this will not impact the
+        # postprocessonly or uploadonly flags as they do not ever invoke the run_batch function, instead skipping to the
+        # queue_post_processing and then process_results functions
+        if 'output_directory' in self.cfg:
+            if os.path.isdir(os.path.join(self.cfg['output_directory'], 'results')):
+                if self.cfg.get('override_existing', False):
+                    raise RuntimeError('results directory exists in {} - please address'.format(
+                        self.cfg['output_directory']))
+                else:
+                    logger.warn('Overriding results in results directory in {}'.format(self.cfg['output_directory']))
+
+        # Determine the number of simulations expected to be executed
         df = pd.read_csv(buildstock_csv_filename, index_col=0)
 
         # find out how many buildings there are to simulate
@@ -229,6 +264,13 @@ def run_building(cls, project_dir, buildstock_dir, weather_dir, output_dir, sing
             weather_dir,
         ]
 
+        # If custom gems are to be used in the singularity container add extra bundle arguments to the cli command
+        cli_cmd = 'openstudio run -w in.osw'
+        if cfg.get('baseline', dict()).get('custom_gems', False):
+            cli_cmd = 'openstudio --bundle /var/oscli/Gemfile --bundle_path /var/oscli/gems run -w in.osw --debug'
+        if get_bool_env_var('MEASURESONLY'):
+            cli_cmd += ' --measures_only'
+
         # Call singularity to run the simulation
         args = [
             'singularity', 'exec',
@@ -247,14 +289,13 @@ def run_building(cls, project_dir, buildstock_dir, weather_dir, output_dir, sing
             args.extend(['-B', '{}:{}:ro'.format(src, container_mount)])
             container_symlink = os.path.join('/var/simdata/openstudio', os.path.basename(src))
             runscript.append('ln -s {} {}'.format(*map(shlex.quote, (container_mount, container_symlink))))
-        runscript.append('openstudio run -w in.osw')
-        if get_bool_env_var('MEASURESONLY'):
-            runscript[-1] += ' --measures_only'
+        runscript.append(cli_cmd)
         args.extend([
             singularity_image,
             'bash', '-x'
         ])
         logger.debug(' '.join(args))
+        logger.debug('\n'.join(runscript))
         with open(os.path.join(sim_dir, 'singularity_output.log'), 'w') as f_out:
             try:
                 subprocess.run(
@@ -268,6 +309,7 @@ def run_building(cls, project_dir, buildstock_dir, weather_dir, output_dir, sing
             except subprocess.CalledProcessError:
                 pass
             finally:
+                time.sleep(600)
                 # Clean up the symbolic links we created in the container
                 for mount_dir in dirs_to_mount + [os.path.join(sim_dir, 'lib')]:
                     try:

diff --git a/buildstockbatch/localdocker.py b/buildstockbatch/localdocker.py
@@ -22,7 +22,7 @@
 import shutil
 
 from buildstockbatch.base import BuildStockBatchBase, SimulationExists
-from buildstockbatch.sampler import ResidentialDockerSampler, CommercialSobolSampler
+from buildstockbatch.sampler import ResidentialDockerSampler, CommercialSobolDockerSampler, PrecomputedDockerSampler
 
 logger = logging.getLogger(__name__)
 
@@ -45,12 +45,20 @@ def __init__(self, project_filename):
         elif self.stock_type == 'commercial':
             sampling_algorithm = self.cfg['baseline'].get('sampling_algorithm', 'sobol')
             if sampling_algorithm == 'sobol':
-                self.sampler = CommercialSobolSampler(
+                self.sampler = CommercialSobolDockerSampler(
                     self.project_dir,
                     self.cfg,
                     self.buildstock_dir,
                     self.project_dir
                 )
+            elif sampling_algorithm == 'precomputed':
+                print('calling precomputed sampler')
+                self.sampler = PrecomputedDockerSampler(
+                    self.output_dir,
+                    self.cfg,
+                    self.buildstock_dir,
+                    self.project_dir
+                )
             else:
                 raise NotImplementedError('Sampling algorithem "{}" is not implemented.'.format(sampling_algorithm))
         else:

diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py
@@ -73,11 +73,20 @@ def flatten_datapoint_json(reporting_measures, d):
         new_d[f'{col1}.{k}'] = v
 
     # if there is no units_represented key, default to 1
+    # TODO @nmerket @rajeee is there a way to not apply this to Commercial jobs? It doesn't hurt, but it is weird for us
     units = int(new_d.get(f'{col1}.units_represented', 1))
     new_d[f'{col1}.units_represented'] = units
+<<<<<<< HEAD
+
+    # copy over all the keys and values in SimulationOutputReport
+    col3 = 'SimulationOutputReport'
+    for k, v in d.get(col3, {}).items():
+        new_d[f'{col3}.{k}'] = v
+=======
     col2 = 'SimulationOutputReport'
     for k, v in d.get(col2, {}).items():
         new_d[f'{col2}.{k}'] = v
+>>>>>>> origin/master
 
     # additional reporting measures
     for col in reporting_measures:

diff --git a/buildstockbatch/sampler/__init__.py b/buildstockbatch/sampler/__init__.py
@@ -2,4 +2,5 @@
 
 from .residential_docker import ResidentialDockerSampler  # noqa F041
 from .residential_singularity import ResidentialSingularitySampler  # noqa F041
-from .commercial_sobol import CommercialSobolSampler  # noqa F041
+from .commercial_sobol import CommercialSobolSingularitySampler, CommercialSobolDockerSampler  # noqa F041
+from .precomputed import PrecomputedDockerSampler, PrecomputedSingularitySampler
diff --git a/buildstockbatch/sampler/commercial_sobol.py b/buildstockbatch/sampler/commercial_sobol.py
@@ -25,7 +25,7 @@
 logger = logging.getLogger(__name__)
 
 
-class CommercialSobolSampler(BuildStockSampler):
+class CommercialBaseSobolSampler(BuildStockSampler):
 
     def __init__(self, output_dir, *args, **kwargs):
         """
@@ -43,7 +43,17 @@ def __init__(self, output_dir, *args, **kwargs):
     def csv_path(self):
         return os.path.join(self.project_dir, 'buildstock.csv')
 
-    def run_sampling(self, n_datapoints):
+    def run_sampling(self, n_datapoints=None):
+        """
+        Execute the sampling generating the specified number of datapoints.
+
+        This is a stub. It needs to be implemented in the child classes for each deployment environment.
+
+        :param n_datapoints: Number of datapoints to sample from the distributions.
+        """
+        raise NotImplementedError
+
+    def run_sobol_sampling(self, n_datapoints=None):
         """
         Run the commercial sampling.
 
@@ -54,7 +64,10 @@ def run_sampling(self, n_datapoints):
         :param n_datapoints: Number of datapoints to sample from the distributions.
         :return: Absolute path to the output buildstock.csv file
         """
-        logging.debug('Sampling, n_datapoints={}'.format(n_datapoints))
+        sample_number = self.cfg['baseline'].get('n_datapoints', 350000)
+        if isinstance(n_datapoints, int):
+            sample_number = n_datapoints
+        logging.debug(f'Sampling, number of data points is {sample_number}')
         tsv_hash = {}
         for tsv_file in os.listdir(self.buildstock_dir):
             if '.tsv' in tsv_file:
@@ -63,7 +76,7 @@ def run_sampling(self, n_datapoints):
                 tsv_df[dependency_columns] = tsv_df[dependency_columns].astype('str')
                 tsv_hash[tsv_file.replace('.tsv', '')] = tsv_df
         dependency_hash, attr_order = self._com_order_tsvs(tsv_hash)
-        sample_matrix = self._com_execute_sobol_sampling(attr_order.__len__(), n_datapoints)
+        sample_matrix = self._com_execute_sobol_sampling(attr_order.__len__(), sample_number)
         csv_path = self.csv_path
         header = 'Building,'
         for item in attr_order:
@@ -78,7 +91,7 @@ def run_sampling(self, n_datapoints):
         Parallel(n_jobs=n_jobs, verbose=5)(
             delayed(self._com_execute_sample)(tsv_hash, dependency_hash, attr_order, sample_matrix, index, csv_path,
                                               lock)
-            for index in range(n_datapoints)
+            for index in range(sample_number)
         )
         return csv_path
 
@@ -175,3 +188,42 @@ def _com_execute_sample(tsv_hash, dependency_hash, attr_order, sample_matrix, sa
                 fd.write(csv_row)
         finally:
             lock.release()
+
+
+class CommercialSobolSingularitySampler(CommercialBaseSobolSampler):
+
+    def __init__(self, output_dir, *args, **kwargs):
+        """
+        This class uses the Commercial Sobol Sampler to execute samples for Peregrine Singularity deployments
+        """
+        self.output_dir = output_dir
+        super().__init__(*args, **kwargs)
+
+    def run_sampling(self, n_datapoints=None):
+        """
+        Execute the sampling for use in Peregrine Singularity deployments
+
+        :param n_datapoints: Number of datapoints to sample from the distributions.
+        :return: Path to the sample CSV file
+        """
+        csv_path = os.path.join(self.output_dir, 'buildstock.csv')
+        return self.run_sobol_sampling(n_datapoints, csv_path)
+
+
+class CommercialSobolDockerSampler(CommercialBaseSobolSampler):
+
+    def __init__(self, *args, **kwargs):
+        """
+        This class uses the Commercial Sobol Sampler to execute samples for local Docker deployments
+        """
+        super().__init__(*args, **kwargs)
+
+    def run_sampling(self, n_datapoints=None):
+        """
+        Execute the sampling for use in local Docker deployments
+
+        :param n_datapoints: Number of datapoints to sample from the distributions.
+        :return: Path to the sample CSV file
+        """
+        csv_path = os.path.join(self.project_dir, 'housing_characteristics', 'buildstock.csv')
+        return self.run_sobol_sampling(n_datapoints, csv_path)