From 9e7847961c807f68bdd4b6c71c048d979a18a1de Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Thu, 8 Oct 2020 16:52:40 -0600
Subject: [PATCH 01/21] refactoring sampler classes

---
 buildstockbatch/__init__.py                   |  6 ++
 buildstockbatch/base.py                       | 21 ++++++
 buildstockbatch/eagle.py                      | 46 ++-----------
 buildstockbatch/localdocker.py                | 41 ++----------
 buildstockbatch/sampler/__init__.py           |  7 +-
 buildstockbatch/sampler/base.py               | 66 ++++++++++++++++---
 buildstockbatch/sampler/commercial_sobol.py   | 66 +++----------------
 buildstockbatch/sampler/precomputed.py        | 43 +-----------
 ...dential_docker.py => residential_quota.py} | 51 +++++++-------
 .../sampler/residential_singularity.py        | 64 ------------------
 buildstockbatch/test/test_base.py             |  3 +-
 11 files changed, 136 insertions(+), 278 deletions(-)
 rename buildstockbatch/sampler/{residential_docker.py => residential_quota.py} (58%)
 delete mode 100644 buildstockbatch/sampler/residential_singularity.py

diff --git a/buildstockbatch/__init__.py b/buildstockbatch/__init__.py
index 80124e43..2d934278 100644
--- a/buildstockbatch/__init__.py
+++ b/buildstockbatch/__init__.py
@@ -26,3 +26,9 @@
 :copyright: (c) 2018 by The Alliance for Sustainable Energy.
 :license: BSD-3, see LICENSE for more details.
 """
+import enum
+
+
+class ContainerRuntime(enum.Enum):
+    DOCKER = 1
+    SINGULARITY = 2
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index e0433dc8..9d796f09 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -30,6 +30,7 @@
 import xml.etree.ElementTree as ET
 
 from buildstockbatch.__version__ import __schema_version__
+from .sampler import ResidentialQuotaSampler, CommercialSobolSampler, PrecomputedSampler
 from .workflow_generator import ResidentialDefaultWorkflowGenerator, CommercialDefaultWorkflowGenerator
 from buildstockbatch import postprocessing
 
@@ -52,6 +53,7 @@ class BuildStockBatchBase(object):
 
     DEFAULT_OS_VERSION = '2.9.1'
     DEFAULT_OS_SHA = '3472e8b799'
+    CONTAINER_RUNTIME = None
     LOGO = '''
      _ __         _     __,              _ __
     ( /  )    o  //   /(    _/_       / ( /  )     _/_    /
@@ -83,6 +85,25 @@ def __init__(self, project_filename):
         self.os_sha = self.cfg.get('os_sha', self.DEFAULT_OS_SHA)
         logger.debug(f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}")
 
+        # Select a sampler
+        sampling_algorithm = self.cfg['baseline']['sampling_algorithm']
+        if sampling_algorithm == 'precomputed':
+            self.sampler = PrecomputedSampler(self)
+        elif self.stock_type == 'residential':
+            if sampling_algorithm == 'quota':
+                self.sampler = ResidentialQuotaSampler(self)
+            else:
+                raise NotImplementedError('Sampling algorithm "{}" is not implemented for residential projects.'.
+                                          format(sampling_algorithm))
+        elif self.stock_type == 'commercial':
+            if sampling_algorithm == 'sobol':
+                self.sampler = CommercialSobolSampler(self)
+            else:
+                raise NotImplementedError('Sampling algorithm "{}" is not implemented for commercial projects.'.
+                                          format(sampling_algorithm))
+        else:
+            raise KeyError('stock_type = "{}" is not valid'.format(self.stock_type))
+
     @staticmethod
     def path_rel_to_file(startfile, x):
         if os.path.isabs(x):
diff --git a/buildstockbatch/eagle.py b/buildstockbatch/eagle.py
index 320cee14..ae027df7 100644
--- a/buildstockbatch/eagle.py
+++ b/buildstockbatch/eagle.py
@@ -33,10 +33,9 @@
 import time
 import yaml
 
-from .base import BuildStockBatchBase, SimulationExists
-from .sampler import ResidentialSingularitySampler, CommercialSobolSingularitySampler, PrecomputedSingularitySampler
-from .utils import log_error_details, get_error_details
-from . import postprocessing
+from buildstockbatch.base import BuildStockBatchBase, SimulationExists
+from buildstockbatch.utils import log_error_details, get_error_details
+from buildstockbatch import postprocessing, ContainerRuntime
 
 logger = logging.getLogger(__name__)
 
@@ -47,6 +46,8 @@ def get_bool_env_var(varname):
 
 class EagleBatch(BuildStockBatchBase):
 
+    CONTAINER_RUNTIME = ContainerRuntime.SINGULARITY
+
     sys_image_dir = '/shared-projects/buildstock/singularity_images'
     hpc_name = 'eagle'
     min_sims_per_job = 36 * 2
@@ -66,43 +67,6 @@ def __init__(self, project_filename):
             os.makedirs(output_dir)
         logger.debug('Output directory = {}'.format(output_dir))
         weather_dir = self.weather_dir  # noqa E841
-        sampling_algorithm = self.cfg['baseline'].get('sampling_algorithm', None)
-        if sampling_algorithm is None:
-            raise KeyError('The key `sampling_algorithm` is not specified in the `baseline` section of the project '
-                           'configuration yaml. This key is required.')
-        if sampling_algorithm == 'precomputed':
-            logger.info('calling precomputed sampler')
-            self.sampler = PrecomputedSingularitySampler(
-                self.output_dir,
-                self.cfg,
-                self.buildstock_dir,
-                self.project_dir
-            )
-        elif self.stock_type == 'residential':
-            if sampling_algorithm == 'quota':
-                self.sampler = ResidentialSingularitySampler(
-                    self.singularity_image,
-                    self.output_dir,
-                    self.cfg,
-                    self.buildstock_dir,
-                    self.project_dir
-                )
-            else:
-                raise NotImplementedError('Sampling algorithem "{}" is not implemented for residential projects.'.
-                                          format(sampling_algorithm))
-        elif self.stock_type == 'commercial':
-            if sampling_algorithm == 'sobol':
-                self.sampler = CommercialSobolSingularitySampler(
-                    self.output_dir,
-                    self.cfg,
-                    self.buildstock_dir,
-                    self.project_dir
-                )
-            else:
-                raise NotImplementedError('Sampling algorithem "{}" is not implemented for commercial projects.'.
-                                          format(sampling_algorithm))
-        else:
-            raise KeyError('stock_type = "{}" is not valid'.format(self.stock_type))
 
     @staticmethod
     def validate_project(project_file):
diff --git a/buildstockbatch/localdocker.py b/buildstockbatch/localdocker.py
index 75171555..14c052da 100644
--- a/buildstockbatch/localdocker.py
+++ b/buildstockbatch/localdocker.py
@@ -28,8 +28,7 @@
 import tempfile
 
 from buildstockbatch.base import BuildStockBatchBase, SimulationExists
-from buildstockbatch.sampler import ResidentialDockerSampler, CommercialSobolDockerSampler, PrecomputedDockerSampler
-from buildstockbatch import postprocessing
+from buildstockbatch import postprocessing, ContainerRuntime
 from .utils import log_error_details
 
 logger = logging.getLogger(__name__)
@@ -37,49 +36,17 @@
 
 class DockerBatchBase(BuildStockBatchBase):
 
+    CONTAINER_RUNTIME = ContainerRuntime.DOCKER
+
     def __init__(self, project_filename):
         super().__init__(project_filename)
+
         self.docker_client = docker.DockerClient.from_env()
         try:
             self.docker_client.ping()
         except:  # noqa: E722 (allow bare except in this case because error can be a weird non-class Windows API error)
             logger.error('The docker server did not respond, make sure Docker Desktop is started then retry.')
             raise RuntimeError('The docker server did not respond, make sure Docker Desktop is started then retry.')
-        sampling_algorithm = self.cfg['baseline'].get('sampling_algorithm', None)
-        if sampling_algorithm is None:
-            raise KeyError('The key `sampling_algorithm` is not specified in the `baseline` section of the project '
-                           'configuration yaml. This key is required.')
-        if sampling_algorithm == 'precomputed':
-            logger.info('calling precomputed sampler')
-            self.sampler = PrecomputedDockerSampler(
-                self.cfg,
-                self.buildstock_dir,
-                self.project_dir
-            )
-        elif self.stock_type == 'residential':
-            if sampling_algorithm == 'quota':
-                self.sampler = ResidentialDockerSampler(
-                    self.docker_image,
-                    self.cfg,
-                    self.buildstock_dir,
-                    self.project_dir
-                )
-            else:
-                raise NotImplementedError('Sampling algorithm "{}" is not implemented for residential projects.'.
-                                          format(sampling_algorithm))
-        elif self.stock_type == 'commercial':
-            if sampling_algorithm == 'sobol':
-                self.sampler = CommercialSobolDockerSampler(
-                    self.project_dir,
-                    self.cfg,
-                    self.buildstock_dir,
-                    self.project_dir
-                )
-            else:
-                raise NotImplementedError('Sampling algorithm "{}" is not implemented for commercial projects.'.
-                                          format(sampling_algorithm))
-        else:
-            raise KeyError('stock_type = "{}" is not valid'.format(self.stock_type))
 
         self._weather_dir = None
 
diff --git a/buildstockbatch/sampler/__init__.py b/buildstockbatch/sampler/__init__.py
index 30a0335c..9486fae5 100644
--- a/buildstockbatch/sampler/__init__.py
+++ b/buildstockbatch/sampler/__init__.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 
-from .residential_docker import ResidentialDockerSampler  # noqa F041
-from .residential_singularity import ResidentialSingularitySampler  # noqa F041
-from .commercial_sobol import CommercialSobolSingularitySampler, CommercialSobolDockerSampler  # noqa F041
-from .precomputed import PrecomputedDockerSampler, PrecomputedSingularitySampler  # noqa F041
+from .residential_quota import ResidentialQuotaSampler  # noqa F041
+from .commercial_sobol import CommercialSobolSampler  # noqa F041
+from .precomputed import PrecomputedSampler  # noqa F041
diff --git a/buildstockbatch/sampler/base.py b/buildstockbatch/sampler/base.py
index 0acd019a..e6d2306e 100644
--- a/buildstockbatch/sampler/base.py
+++ b/buildstockbatch/sampler/base.py
@@ -10,6 +10,10 @@
 """
 
 import logging
+import os
+import weakref
+
+from buildstockbatch import ContainerRuntime
 
 logger = logging.getLogger(__name__)
 
@@ -18,7 +22,7 @@ class BuildStockSampler(object):
 
     csv_path = None
 
-    def __init__(self, cfg, buildstock_dir, project_dir):
+    def __init__(self, parent):
         """
         Create the buildstock.csv file required for batch simulations using this class.
 
@@ -26,20 +30,66 @@ def __init__(self, cfg, buildstock_dir, project_dir):
         sampling strategies. Currently there are separate implementations for commercial & residential stock types\
         due to unique requirements created by the commercial tsv set.
 
-        :param cfg: YAML configuration specified by the user for the analysis
-        :param buildstock_dir: The location of the OpenStudio-BuildStock repo
-        :param project_dir: The project directory within the OpenStudio-BuildStock repo
+        :param parent: The BuildStockBatchBase object that owns this sampler.
         """
-        self.cfg = cfg
-        self.buildstock_dir = buildstock_dir
-        self.project_dir = project_dir
+        self.parent = weakref.ref(parent)  # This removes circular references and allows garbage collection to work.
+        if self.container_runtime == ContainerRuntime.DOCKER:
+            self.csv_path = os.path.join(self.project_dir, 'housing_characteristics', 'buildstock.csv')
+        elif self.container_runtime == ContainerRuntime.SINGULARITY:
+            self.csv_path = os.path.join(self.parent().output_dir, 'housing_characteristics', 'buildstock.csv')
+        else:
+            self.csv_path = None
+
+    @property
+    def cfg(self):
+        return self.parent().cfg
+
+    @property
+    def buildstock_dir(self):
+        return self.parent().buildstock_dir
+
+    @property
+    def project_dir(self):
+        return self.parent().project_dir
+
+    @property
+    def container_runtime(self):
+        return self.parent().CONTAINER_RUNTIME
 
     def run_sampling(self, n_datapoints):
         """
         Execute the sampling generating the specified number of datapoints.
 
-        This is a stub. It needs to be implemented in the child classes.
+        :param n_datapoints: Number of datapoints to sample from the distributions.
+        :type n_datapoints: int
+
+        Replace this in a subclass if your sampling doesn't depend on containerization.
+        """
+        logger.debug('Sampling, n_datapoints={}'.format(n_datapoints))
+        if self.container_runtime == ContainerRuntime.DOCKER:
+            return self._run_sampling_docker(n_datapoints)
+        else:
+            assert self.container_runtime == ContainerRuntime.SINGULARITY
+            return self._run_sampling_singluarity(n_datapoints)
+
+    def _run_sampling_docker(self, n_datapoints):
+        """
+        Execute the sampling in a docker container
 
         :param n_datapoints: Number of datapoints to sample from the distributions.
+        :type n_datapoints: int
+
+        Replace this in a subclass if your sampling needs docker.
+        """
+        raise NotImplementedError
+
+    def _run_sampling_singluarity(self, n_datapoints):
+        """
+        Execute the sampling in a singularity container
+
+        :param n_datapoints: Number of datapoints to sample from the distributions.
+        :type n_datapoints: int
+
+        Replace this in a subclass if your sampling needs docker.
         """
         raise NotImplementedError
diff --git a/buildstockbatch/sampler/commercial_sobol.py b/buildstockbatch/sampler/commercial_sobol.py
index fd93b6c3..4daae992 100644
--- a/buildstockbatch/sampler/commercial_sobol.py
+++ b/buildstockbatch/sampler/commercial_sobol.py
@@ -21,13 +21,14 @@
 
 from .sobol_lib import i4_sobol_generate
 from .base import BuildStockSampler
+from buildstockbatch import ContainerRuntime
 
 logger = logging.getLogger(__name__)
 
 
-class CommercialBaseSobolSampler(BuildStockSampler):
+class CommercialSobolSampler(BuildStockSampler):
 
-    def __init__(self, output_dir, *args, **kwargs):
+    def __init__(self, parent):
         """
         Initialize the sampler.
 
@@ -36,24 +37,14 @@ def __init__(self, output_dir, *args, **kwargs):
         :param buildstock_dir: The location of the OpenStudio-BuildStock repo
         :param project_dir: The project directory within the OpenStudio-BuildStock repo
         """
-        super().__init__(*args, **kwargs)
-        self.output_dir = output_dir
-
-    @property
-    def csv_path(self):
-        return os.path.join(self.project_dir, 'buildstock.csv')
+        super().__init__(parent)
+        if self.container_runtime == ContainerRuntime.SINGULARITY:
+            self.csv_path = os.path.join(self.output_dir, 'buildstock.csv')
+        else:
+            assert self.container_runtime == ContainerRuntime.DOCKER
+            self.csv_path = os.path.join(self.project_dir, 'buildstock.csv')
 
     def run_sampling(self, n_datapoints):
-        """
-        Execute the sampling generating the specified number of datapoints.
-
-        This is a stub. It needs to be implemented in the child classes for each deployment environment.
-
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        """
-        raise NotImplementedError
-
-    def run_sobol_sampling(self, n_datapoints=None):
         """
         Run the commercial sampling.
 
@@ -188,42 +179,3 @@ def _com_execute_sample(tsv_hash, dependency_hash, attr_order, sample_matrix, sa
                 fd.write(csv_row)
         finally:
             lock.release()
-
-
-class CommercialSobolSingularitySampler(CommercialBaseSobolSampler):
-
-    def __init__(self, output_dir, *args, **kwargs):
-        """
-        This class uses the Commercial Sobol Sampler to execute samples for Peregrine Singularity deployments
-        """
-        self.output_dir = output_dir
-        super().__init__(*args, **kwargs)
-
-    def run_sampling(self, n_datapoints):
-        """
-        Execute the sampling for use in Peregrine Singularity deployments
-
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        :return: Path to the sample CSV file
-        """
-        csv_path = os.path.join(self.output_dir, 'buildstock.csv')
-        return self.run_sobol_sampling(n_datapoints, csv_path)
-
-
-class CommercialSobolDockerSampler(CommercialBaseSobolSampler):
-
-    def __init__(self, *args, **kwargs):
-        """
-        This class uses the Commercial Sobol Sampler to execute samples for local Docker deployments
-        """
-        super().__init__(*args, **kwargs)
-
-    def run_sampling(self, n_datapoints):
-        """
-        Execute the sampling for use in local Docker deployments
-
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        :return: Path to the sample CSV file
-        """
-        csv_path = os.path.join(self.project_dir, 'housing_characteristics', 'buildstock.csv')
-        return self.run_sobol_sampling(n_datapoints, csv_path)
diff --git a/buildstockbatch/sampler/precomputed.py b/buildstockbatch/sampler/precomputed.py
index afd06caf..f91899e6 100644
--- a/buildstockbatch/sampler/precomputed.py
+++ b/buildstockbatch/sampler/precomputed.py
@@ -11,7 +11,6 @@
 """
 
 import logging
-import os
 import shutil
 
 from .base import BuildStockSampler
@@ -19,17 +18,10 @@
 logger = logging.getLogger(__name__)
 
 
-class PrecomputedBaseSampler(BuildStockSampler):
+class PrecomputedSampler(BuildStockSampler):
 
-    def __init__(self, *args, **kwargs):
-        """
-        Initialize the sampler.
-
-        :param cfg: YAML configuration specified by the user for the analysis
-        :param buildstock_dir: The location of the OpenStudio-BuildStock repo
-        :param project_dir: The project directory within the OpenStudio-BuildStock repo
-        """
-        super().__init__(*args, **kwargs)
+    def __init__(self, parent):
+        super().__init__(parent)
         self.buildstock_csv = self.cfg['baseline']['precomputed_sample']
 
     def run_sampling(self, n_datapoints):
@@ -41,32 +33,3 @@ def run_sampling(self, n_datapoints):
         if self.csv_path != self.buildstock_csv:
             shutil.copy(self.buildstock_csv, self.csv_path)
         return self.csv_path
-
-
-class PrecomputedSingularitySampler(PrecomputedBaseSampler):
-
-    def __init__(self, output_dir, *args, **kwargs):
-        """
-        Initialize the sampler.
-
-        :param output_dir: Simulation working directory
-        :param cfg: YAML configuration specified by the user for the analysis
-        :param buildstock_dir: The location of the OpenStudio-BuildStock repo
-        :param project_dir: The project directory within the OpenStudio-BuildStock repo
-        """
-        super().__init__(*args, **kwargs)
-        self.csv_path = os.path.join(output_dir, 'housing_characteristics', 'buildstock.csv')
-
-
-class PrecomputedDockerSampler(PrecomputedBaseSampler):
-
-    def __init__(self, *args, **kwargs):
-        """
-        Initialize the sampler.
-
-        :param cfg: YAML configuration specified by the user for the analysis
-        :param buildstock_dir: The location of the OpenStudio-BuildStock repo
-        :param project_dir: The project directory within the OpenStudio-BuildStock repo
-        """
-        super().__init__(*args, **kwargs)
-        self.csv_path = os.path.join(self.project_dir, 'housing_characteristics', 'buildstock.csv')
diff --git a/buildstockbatch/sampler/residential_docker.py b/buildstockbatch/sampler/residential_quota.py
similarity index 58%
rename from buildstockbatch/sampler/residential_docker.py
rename to buildstockbatch/sampler/residential_quota.py
index c6f980e3..f67731b7 100644
--- a/buildstockbatch/sampler/residential_docker.py
+++ b/buildstockbatch/sampler/residential_quota.py
@@ -1,19 +1,17 @@
-# -*- coding: utf-8 -*-
-
 """
-buildstockbatch.sampler.residential_docker
+buildstockbatch.sampler.residential_quota
 ~~~~~~~~~~~~~~~
 This object contains the code required for generating the set of simulations to execute
 
 :author: Noel Merket, Ry Horsey
-:copyright: (c) 2018 by The Alliance for Sustainable Energy
+:copyright: (c) 2020 by The Alliance for Sustainable Energy
 :license: BSD-3
 """
-
 import docker
 import logging
 import os
 import shutil
+import subprocess
 import sys
 import time
 
@@ -22,34 +20,16 @@
 logger = logging.getLogger(__name__)
 
 
-class ResidentialDockerSampler(BuildStockSampler):
-
-    def __init__(self, docker_image, *args, **kwargs):
-        """
-        Initialize the sampler.
-
-        :param docker_image: the docker image to use (i.e. nrel/openstudio:2.7.0)
-        :return: Absolute path to the output buildstock.csv file
-        """
-        super().__init__(*args, **kwargs)
-        self.docker_image = docker_image
-        self.csv_path = os.path.join(self.project_dir, 'housing_characteristics', 'buildstock.csv')
-
-    def run_sampling(self, n_datapoints):
-        """
-        Run the residential sampling in a docker container.
-
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        """
-        logger.debug('Sampling, n_datapoints={}'.format(n_datapoints))
+class ResidentialQuotaSampler(BuildStockSampler):
 
+    def _run_sampling_docker(self, n_datapoints):
         docker_client = docker.DockerClient.from_env()
         tick = time.time()
         extra_kws = {}
         if sys.platform.startswith('linux'):
             extra_kws['user'] = f'{os.getuid()}:{os.getgid()}'
         container_output = docker_client.containers.run(
-            self.docker_image,
+            self.parent().docker_image,
             [
                 'ruby',
                 'resources/run_sampling.rb',
@@ -76,3 +56,22 @@ def run_sampling(self, n_datapoints):
             destination_filename
         )
         return destination_filename
+
+    def _run_sampling_singularity(self, n_datapoints):
+        args = [
+            'singularity',
+            'exec',
+            '--contain',
+            '--home', '{}:/buildstock'.format(self.buildstock_dir),
+            '--bind', '{}:/outbind'.format(os.path.dirname(self.csv_path)),
+            self.parent().singularity_image,
+            'ruby',
+            'resources/run_sampling.rb',
+            '-p', self.cfg['project_directory'],
+            '-n', str(n_datapoints),
+            '-o', '../../outbind/{}'.format(os.path.basename(self.csv_path))
+        ]
+        logger.debug(f"Starting singularity sampling with command: {' '.join(args)}")
+        subprocess.run(args, check=True, env=os.environ, cwd=self.parent().output_dir)
+        logger.debug("Singularity sampling completed.")
+        return self.csv_path
diff --git a/buildstockbatch/sampler/residential_singularity.py b/buildstockbatch/sampler/residential_singularity.py
deleted file mode 100644
index 4e55d826..00000000
--- a/buildstockbatch/sampler/residential_singularity.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-buildstockbatch.sampler.residential_singularity
-~~~~~~~~~~~~~~~
-This object contains the code required for generating the set of simulations to execute
-
-:author: Noel Merket, Ry Horsey
-:copyright: (c) 2018 by The Alliance for Sustainable Energy
-:license: BSD-3
-"""
-
-import logging
-import os
-import subprocess
-
-
-from .base import BuildStockSampler
-
-logger = logging.getLogger(__name__)
-
-
-class ResidentialSingularitySampler(BuildStockSampler):
-
-    def __init__(self, singularity_image, output_dir, *args, **kwargs):
-        """
-        Initialize the sampler.
-
-        :param singularity_image: path to the singularity image to use
-        :param output_dir: Simulation working directory
-        :param cfg: YAML configuration specified by the user for the analysis
-        :param buildstock_dir: The location of the OpenStudio-BuildStock repo
-        :param project_dir: The project directory within the OpenStudio-BuildStock repo
-        """
-        super().__init__(*args, **kwargs)
-        self.singularity_image = singularity_image
-        self.output_dir = output_dir
-        self.csv_path = os.path.join(self.output_dir, 'housing_characteristics', 'buildstock.csv')
-
-    def run_sampling(self, n_datapoints):
-        """
-        Run the residential sampling in a singularity container.
-
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        :return: Absolute path to the output buildstock.csv file
-        """
-        logger.debug('Sampling, n_datapoints={}'.format(n_datapoints))
-        args = [
-            'singularity',
-            'exec',
-            '--contain',
-            '--home', '{}:/buildstock'.format(self.buildstock_dir),
-            '--bind', '{}:/outbind'.format(os.path.dirname(self.csv_path)),
-            self.singularity_image,
-            'ruby',
-            'resources/run_sampling.rb',
-            '-p', self.cfg['project_directory'],
-            '-n', str(n_datapoints),
-            '-o', '../../outbind/{}'.format(os.path.basename(self.csv_path))
-        ]
-        logger.debug(f"Starting singularity sampling with command: {' '.join(args)}")
-        subprocess.run(args, check=True, env=os.environ, cwd=self.output_dir)
-        logger.debug("Singularity sampling completed.")
-        return self.csv_path
diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py
index bba55b70..6fd39faa 100644
--- a/buildstockbatch/test/test_base.py
+++ b/buildstockbatch/test/test_base.py
@@ -332,7 +332,8 @@ def test_write_parquet_no_index():
 def test_skipping_baseline(basic_residential_project_file):
     project_filename, results_dir = basic_residential_project_file({
         'baseline': {
-            'skip_sims': True
+            'skip_sims': True,
+            'sampling_algorithm': 'quota'
         }
     })
 

From d53d81dfd8f01849724a6b977bbfdead9d497170 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 12 Oct 2020 12:09:13 -0600
Subject: [PATCH 02/21] passing args to samplers, making downselect a sampler

---
 buildstockbatch/__version__.py                |   2 +-
 buildstockbatch/aws/aws.py                    |   5 +-
 buildstockbatch/base.py                       | 160 +++---------------
 buildstockbatch/eagle.py                      |  14 +-
 buildstockbatch/exc.py                        |  10 ++
 buildstockbatch/localdocker.py                |   5 +-
 buildstockbatch/sampler/base.py               |  37 ++--
 buildstockbatch/sampler/commercial_sobol.py   |  27 ++-
 buildstockbatch/sampler/downselect.py         | 130 ++++++++++++++
 buildstockbatch/sampler/precomputed.py        |  35 +++-
 buildstockbatch/sampler/residential_quota.py  |  42 ++++-
 buildstockbatch/schemas/v0.3.yaml             | 131 ++++++++++++++
 buildstockbatch/utils.py                      |  10 +-
 .../workflow_generator/residential.py         |   1 +
 14 files changed, 426 insertions(+), 183 deletions(-)
 create mode 100644 buildstockbatch/exc.py
 create mode 100644 buildstockbatch/sampler/downselect.py
 create mode 100644 buildstockbatch/schemas/v0.3.yaml

diff --git a/buildstockbatch/__version__.py b/buildstockbatch/__version__.py
index bfe38c38..55bfc205 100644
--- a/buildstockbatch/__version__.py
+++ b/buildstockbatch/__version__.py
@@ -2,7 +2,7 @@
 __description__ = 'Executing BuildStock projects on batch infrastructure.'
 __url__ = 'http://github.com/NREL/buildstockbatch'
 __version__ = '0.18'
-__schema_version__ = '0.2'
+__schema_version__ = '0.3'
 __author__ = 'Noel Merket'
 __author_email__ = 'noel.merket@nrel.gov'
 __license__ = 'BSD-3'
diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py
index c99fde4f..0ab53e72 100644
--- a/buildstockbatch/aws/aws.py
+++ b/buildstockbatch/aws/aws.py
@@ -1772,10 +1772,7 @@ def run_batch(self):
         """
 
         # Generate buildstock.csv
-        if 'downselect' in self.cfg:
-            buildstock_csv_filename = self.downselect()
-        else:
-            buildstock_csv_filename = self.run_sampling()
+        buildstock_csv_filename = self.run_sampling()
 
         # Compress and upload assets to S3
         with tempfile.TemporaryDirectory() as tmpdir, tempfile.TemporaryDirectory() as tmp_weather_dir:
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index 9d796f09..b92d097c 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -13,10 +13,7 @@
 from dask.distributed import Client
 import difflib
 from fsspec.implementations.local import LocalFileSystem
-import gzip
 import logging
-import math
-import numpy as np
 import os
 import pandas as pd
 import requests
@@ -30,25 +27,15 @@
 import xml.etree.ElementTree as ET
 
 from buildstockbatch.__version__ import __schema_version__
-from .sampler import ResidentialQuotaSampler, CommercialSobolSampler, PrecomputedSampler
+from buildstockbatch import sampler
 from .workflow_generator import ResidentialDefaultWorkflowGenerator, CommercialDefaultWorkflowGenerator
 from buildstockbatch import postprocessing
+from buildstockbatch.exc import SimulationExists, ValidationError
+from buildstockbatch.utils import path_rel_to_file
 
 logger = logging.getLogger(__name__)
 
 
-class SimulationExists(Exception):
-
-    def __init__(self, msg, sim_id, sim_dir):
-        super().__init__(msg)
-        self.sim_id = sim_id
-        self.sim_dir = sim_dir
-
-
-class ValidationError(Exception):
-    pass
-
-
 class BuildStockBatchBase(object):
 
     DEFAULT_OS_VERSION = '2.9.1'
@@ -86,33 +73,16 @@ def __init__(self, project_filename):
         logger.debug(f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}")
 
         # Select a sampler
-        sampling_algorithm = self.cfg['baseline']['sampling_algorithm']
-        if sampling_algorithm == 'precomputed':
-            self.sampler = PrecomputedSampler(self)
-        elif self.stock_type == 'residential':
-            if sampling_algorithm == 'quota':
-                self.sampler = ResidentialQuotaSampler(self)
-            else:
-                raise NotImplementedError('Sampling algorithm "{}" is not implemented for residential projects.'.
-                                          format(sampling_algorithm))
-        elif self.stock_type == 'commercial':
-            if sampling_algorithm == 'sobol':
-                self.sampler = CommercialSobolSampler(self)
-            else:
-                raise NotImplementedError('Sampling algorithm "{}" is not implemented for commercial projects.'.
-                                          format(sampling_algorithm))
-        else:
-            raise KeyError('stock_type = "{}" is not valid'.format(self.stock_type))
+        Sampler = self.get_sampler_class(self.cfg['sampler']['type'])
+        self.sampler = Sampler(self, **self.cfg['sampler'].get('args', {}))
 
     @staticmethod
-    def path_rel_to_file(startfile, x):
-        if os.path.isabs(x):
-            return os.path.abspath(x)
-        else:
-            return os.path.abspath(os.path.join(os.path.dirname(startfile), x))
+    def get_sampler_class(sampler_name):
+        sampler_class_name = ''.join(x.capitalize() for x in sampler_name.strip().split('_')) + 'Sampler'
+        return getattr(sampler, sampler_class_name)
 
     def path_rel_to_projectfile(self, x):
-        return self.path_rel_to_file(self.project_filename, x)
+        return path_rel_to_file(self.project_filename, x)
 
     def _get_weather_files(self):
         if 'weather_files_path' in self.cfg:
@@ -154,70 +124,9 @@ def skip_baseline_sims(self):
         baseline_skip = self.cfg['baseline'].get('skip_sims', False)
         return baseline_skip
 
-    def run_sampling(self, n_datapoints=None):
-        if n_datapoints is None:
-            n_datapoints = self.cfg['baseline']['n_datapoints']
-        return self.sampler.run_sampling(n_datapoints)
-
     def run_batch(self):
         raise NotImplementedError
 
-    @classmethod
-    def downselect_logic(cls, df, logic):
-        if isinstance(logic, dict):
-            assert (len(logic) == 1)
-            key = list(logic.keys())[0]
-            values = logic[key]
-            if key == 'and':
-                retval = cls.downselect_logic(df, values[0])
-                for value in values[1:]:
-                    retval &= cls.downselect_logic(df, value)
-                return retval
-            elif key == 'or':
-                retval = cls.downselect_logic(df, values[0])
-                for value in values[1:]:
-                    retval |= cls.downselect_logic(df, value)
-                return retval
-            elif key == 'not':
-                return ~cls.downselect_logic(df, values)
-        elif isinstance(logic, list):
-            retval = cls.downselect_logic(df, logic[0])
-            for value in logic[1:]:
-                retval &= cls.downselect_logic(df, value)
-            return retval
-        elif isinstance(logic, str):
-            key, value = logic.split('|')
-            return df[key] == value
-
-    def downselect(self):
-        downselect_resample = self.cfg['downselect'].get('resample', True)
-        logger.debug("Starting downselect sampling")
-        if downselect_resample:
-            logger.debug('Performing initial sampling to figure out number of samples for downselect')
-            n_samples_init = 350000
-            buildstock_csv_filename = self.run_sampling(n_samples_init)
-            df = pd.read_csv(buildstock_csv_filename, index_col=0)
-            df_new = df[self.downselect_logic(df, self.cfg['downselect']['logic'])]
-            downselected_n_samples_init = df_new.shape[0]
-            n_samples = math.ceil(self.cfg['baseline']['n_datapoints'] * n_samples_init / downselected_n_samples_init)
-            os.remove(buildstock_csv_filename)
-        else:
-            n_samples = self.cfg['baseline']['n_datapoints']
-        buildstock_csv_filename = self.run_sampling(n_samples)
-        with gzip.open(os.path.splitext(buildstock_csv_filename)[0] + '_orig.csv.gz', 'wb') as f_out:
-            with open(buildstock_csv_filename, 'rb') as f_in:
-                shutil.copyfileobj(f_in, f_out)
-        df = pd.read_csv(buildstock_csv_filename, index_col=0, dtype='str')
-        df_new = df[self.downselect_logic(df, self.cfg['downselect']['logic'])]
-        if len(df_new.index) == 0:
-            raise RuntimeError('There are no buildings left after the down select!')
-        if downselect_resample:
-            old_index_name = df_new.index.name
-            df_new.index = np.arange(len(df_new)) + 1
-            df_new.index.name = old_index_name
-        df_new.to_csv(buildstock_csv_filename)
-        return buildstock_csv_filename
-
     @staticmethod
     def create_osw(cfg, *args, **kwargs):
         if cfg['stock_type'] == 'residential':
@@ -291,9 +200,9 @@ def cleanup_sim_dir(sim_dir, dest_fs, simout_ts_dir, upgrade_id, building_id):
     @staticmethod
     def validate_project(project_file):
         assert(BuildStockBatchBase.validate_project_schema(project_file))
+        assert(BuildStockBatchBase.validate_sampler(project_file))
         assert(BuildStockBatchBase.validate_misc_constraints(project_file))
         assert(BuildStockBatchBase.validate_xor_nor_schema_keys(project_file))
-        assert(BuildStockBatchBase.validate_precomputed_sample(project_file))
         assert(BuildStockBatchBase.validate_reference_scenario(project_file))
         assert(BuildStockBatchBase.validate_measures_and_arguments(project_file))
         assert(BuildStockBatchBase.validate_options_lookup(project_file))
@@ -312,12 +221,12 @@ def get_project_configuration(cls, project_file):
             raise err
 
         # Set absolute paths
-        cfg['buildstock_directory'] = cls.path_rel_to_file(project_file, cfg['buildstock_directory'])
+        cfg['buildstock_directory'] = path_rel_to_file(project_file, cfg['buildstock_directory'])
         if 'precomputed_sample' in cfg.get('baseline', {}):
             cfg['baseline']['precomputed_sample'] = \
-                cls.path_rel_to_file(project_file, cfg['baseline']['precomputed_sample'])
+                path_rel_to_file(project_file, cfg['baseline']['precomputed_sample'])
         if 'weather_files_path' in cfg:
-            cfg['weather_files_path'] = cls.path_rel_to_file(project_file, cfg['weather_files_path'])
+            cfg['weather_files_path'] = path_rel_to_file(project_file, cfg['weather_files_path'])
 
         return cfg
 
@@ -329,6 +238,17 @@ def get_buildstock_dir(project_file, cfg):
         else:
             return os.path.abspath(os.path.join(os.path.dirname(project_file), buildstock_dir))
 
+    @staticmethod
+    def validate_sampler(project_file):
+        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        sampler_name = cfg['sampler']['type']
+        try:
+            Sampler = BuildStockBatchBase.get_sampler_class(sampler_name)
+        except AttributeError:
+            raise ValidationError(f'Sampler class `{sampler_name}` is not available.')
+        args = cfg['sampler']['args']
+        return Sampler.validate_args(project_file, **args)
+
     @staticmethod
     def validate_project_schema(project_file):
         cfg = BuildStockBatchBase.get_project_configuration(project_file)
@@ -345,10 +265,6 @@ def validate_project_schema(project_file):
     def validate_misc_constraints(project_file):
         # validate other miscellaneous constraints
         cfg = BuildStockBatchBase.get_project_configuration(project_file)
-        if 'precomputed_sample' in cfg['baseline']:
-            if cfg.get('downselect', {'resample': False}).get('resample', True):
-                raise ValidationError("Downselect with resampling cannot be used when using precomputed buildstock_csv."
-                                      "\nPlease set resample: False in downselect or use a different sampler.")
 
         if cfg.get('postprocessing', {}).get('aggregate_timeseries', False):
             logger.warning('aggregate_timeseries has been deprecated and will be removed in a future version.')
@@ -366,27 +282,6 @@ def validate_xor_nor_schema_keys(project_file):
                    ('weather_files_path' in cfg.keys()):
                     raise ValidationError('Both/neither weather_files_url and weather_files_path found in yaml root')
 
-                # No precomputed sample key unless using precomputed sampling
-                if cfg['baseline']['sampling_algorithm'] != 'precomputed' and 'precomputed_sample' in cfg['baseline']:
-                    raise ValidationError(
-                        'baseline.precomputed_sample is not allowed unless '
-                        'baseline.sampling_algorithm = "precomputed".'
-                    )
-        return True
-
-    @staticmethod
-    def validate_precomputed_sample(project_file):
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
-        if 'precomputed_sample' in cfg['baseline']:
-            buildstock_csv = cfg['baseline']['precomputed_sample']
-            if not os.path.exists(buildstock_csv):
-                raise FileNotFoundError(buildstock_csv)
-            buildstock_df = pd.read_csv(buildstock_csv)
-            if buildstock_df.shape[0] != cfg['baseline']['n_datapoints']:
-                raise RuntimeError(
-                    f'`n_datapoints` does not match the number of rows in {buildstock_csv}. '
-                    f'Please set `n_datapoints` to {buildstock_df.shape[0]}'
-                )
         return True
 
     @staticmethod
@@ -651,9 +546,10 @@ def get_all_option_str(source_str, inp):
                     source_str_package = source_str_upgrade + ", in package_apply_logic"
                     source_option_str_list += get_all_option_str(source_str_package, upgrade['package_apply_logic'])
 
-        if 'downselect' in cfg:
-            source_str = "In downselect"
-            source_option_str_list += get_all_option_str(source_str, cfg['downselect']['logic'])
+        # FIXME: Get this working in new downselect sampler validation.
+        # if 'downselect' in cfg:
+        #     source_str = "In downselect"
+        #     source_option_str_list += get_all_option_str(source_str, cfg['downselect']['logic'])
 
         # Gather all the errors in the option_str, if any
         error_message = ''
diff --git a/buildstockbatch/eagle.py b/buildstockbatch/eagle.py
index ae027df7..e310814b 100644
--- a/buildstockbatch/eagle.py
+++ b/buildstockbatch/eagle.py
@@ -162,19 +162,9 @@ def run_batch(self, sampling_only=False):
             destination_dir
         )
         logger.debug("Housing characteristics copied.")
+
         # run sampling
-        #   NOTE: If a buildstock_csv is provided, the BuildStockBatch
-        #   constructor ensures that 'downselect' not in self.cfg and
-        #   run_sampling simply copies that .csv to the correct location if
-        #   necessary and returns the path
-        if 'downselect' in self.cfg:
-            # if there is a downselect section in the yml,
-            # BuildStockBatchBase.downselect calls run_sampling and does
-            # additional processing before and after
-            buildstock_csv_filename = self.downselect()
-        else:
-            # otherwise just the plain sampling process needs to be run
-            buildstock_csv_filename = self.run_sampling()
+        buildstock_csv_filename = self.run_sampling()
 
         # Hit the weather_dir API to make sure that creating the weather directory isn't a race condition in the array
         # jobs - this is rare but happens quasi-repeatably when lustre is really lagging
diff --git a/buildstockbatch/exc.py b/buildstockbatch/exc.py
new file mode 100644
index 00000000..b8233e73
--- /dev/null
+++ b/buildstockbatch/exc.py
@@ -0,0 +1,10 @@
+class SimulationExists(Exception):
+
+    def __init__(self, msg, sim_id, sim_dir):
+        super().__init__(msg)
+        self.sim_id = sim_id
+        self.sim_dir = sim_dir
+
+
+class ValidationError(Exception):
+    pass
diff --git a/buildstockbatch/localdocker.py b/buildstockbatch/localdocker.py
index 14c052da..c0dbbabd 100644
--- a/buildstockbatch/localdocker.py
+++ b/buildstockbatch/localdocker.py
@@ -155,10 +155,7 @@ def run_building(cls, project_dir, buildstock_dir, weather_dir, docker_image, re
         return dpout
 
     def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False):
-        if 'downselect' in self.cfg:
-            buildstock_csv_filename = self.downselect()
-        else:
-            buildstock_csv_filename = self.run_sampling()
+        buildstock_csv_filename = self.run_sampling()
 
         if sampling_only:
             return
diff --git a/buildstockbatch/sampler/base.py b/buildstockbatch/sampler/base.py
index e6d2306e..ade32ce2 100644
--- a/buildstockbatch/sampler/base.py
+++ b/buildstockbatch/sampler/base.py
@@ -14,6 +14,7 @@
 import weakref
 
 from buildstockbatch import ContainerRuntime
+from buildstockbatch.utils import path_rel_to_file
 
 logger = logging.getLogger(__name__)
 
@@ -21,6 +22,22 @@
 class BuildStockSampler(object):
 
     csv_path = None
+    ARGS_DEFAULTS = {}
+
+    @staticmethod
+    def validate_args(project_filename, **kw):
+        """Validation of args from config, passed as **kw
+
+        :param project_filename: The path to the project configuration file
+        :type project_filename: str
+        :param args: arguments to pass to the sampler from the config file
+        :type args: dict
+        :return: True if valid
+        :rtype: bool
+
+        This is a stub. Replace it in your subclass. No need to super() anything.
+        """
+        return True
 
     def __init__(self, parent):
         """
@@ -56,40 +73,30 @@ def project_dir(self):
     def container_runtime(self):
         return self.parent().CONTAINER_RUNTIME
 
-    def run_sampling(self, n_datapoints):
+    def run_sampling(self):
         """
         Execute the sampling generating the specified number of datapoints.
 
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        :type n_datapoints: int
-
         Replace this in a subclass if your sampling doesn't depend on containerization.
         """
-        logger.debug('Sampling, n_datapoints={}'.format(n_datapoints))
         if self.container_runtime == ContainerRuntime.DOCKER:
-            return self._run_sampling_docker(n_datapoints)
+            return self._run_sampling_docker()
         else:
             assert self.container_runtime == ContainerRuntime.SINGULARITY
-            return self._run_sampling_singluarity(n_datapoints)
+            return self._run_sampling_singluarity()
 
-    def _run_sampling_docker(self, n_datapoints):
+    def _run_sampling_docker(self):
         """
         Execute the sampling in a docker container
 
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        :type n_datapoints: int
-
         Replace this in a subclass if your sampling needs docker.
         """
         raise NotImplementedError
 
-    def _run_sampling_singluarity(self, n_datapoints):
+    def _run_sampling_singluarity(self):
         """
         Execute the sampling in a singularity container
 
-        :param n_datapoints: Number of datapoints to sample from the distributions.
-        :type n_datapoints: int
-
         Replace this in a subclass if your sampling needs docker.
         """
         raise NotImplementedError
diff --git a/buildstockbatch/sampler/commercial_sobol.py b/buildstockbatch/sampler/commercial_sobol.py
index 4daae992..95d9280b 100644
--- a/buildstockbatch/sampler/commercial_sobol.py
+++ b/buildstockbatch/sampler/commercial_sobol.py
@@ -22,13 +22,14 @@
 from .sobol_lib import i4_sobol_generate
 from .base import BuildStockSampler
 from buildstockbatch import ContainerRuntime
+from buildstockbatch.exc import ValidationError
 
 logger = logging.getLogger(__name__)
 
 
 class CommercialSobolSampler(BuildStockSampler):
 
-    def __init__(self, parent):
+    def __init__(self, parent, n_datapoints):
         """
         Initialize the sampler.
 
@@ -38,13 +39,31 @@ def __init__(self, parent):
         :param project_dir: The project directory within the OpenStudio-BuildStock repo
         """
         super().__init__(parent)
+        self.validate_args(self.parent().project_filename, n_datapoints=n_datapoints)
         if self.container_runtime == ContainerRuntime.SINGULARITY:
             self.csv_path = os.path.join(self.output_dir, 'buildstock.csv')
         else:
             assert self.container_runtime == ContainerRuntime.DOCKER
             self.csv_path = os.path.join(self.project_dir, 'buildstock.csv')
+        self.n_datapoints = n_datapoints
 
-    def run_sampling(self, n_datapoints):
+    @classmethod
+    def validate_args(cls, project_filename, **kw):
+        expected_args = set(['n_datapoints'])
+        for k, v in kw.items():
+            expected_args.discard(k)
+            if k == 'n_datapoints':
+                if not isinstance(v, int):
+                    raise ValidationError('n_datapoints needs to be an integer')
+                if v <= 0:
+                    raise ValidationError('n_datapoints need to be >= 1')
+            else:
+                raise ValidationError(f'Unknown argument for sampler: {k}')
+        if len(expected_args) > 0:
+            raise ValidationError('The following sampler arguments are required: ' + ', '.join(expected_args))
+        return True
+
+    def run_sampling(self):
         """
         Run the commercial sampling.
 
@@ -56,8 +75,8 @@ def run_sampling(self, n_datapoints):
         :return: Absolute path to the output buildstock.csv file
         """
         sample_number = self.cfg['baseline'].get('n_datapoints', 350000)
-        if isinstance(n_datapoints, int):
-            sample_number = n_datapoints
+        if isinstance(self.n_datapoints, int):
+            sample_number = self.n_datapoints
         logging.debug(f'Sampling, number of data points is {sample_number}')
         tsv_hash = {}
         for tsv_file in os.listdir(self.buildstock_dir):
diff --git a/buildstockbatch/sampler/downselect.py b/buildstockbatch/sampler/downselect.py
new file mode 100644
index 00000000..05e304a3
--- /dev/null
+++ b/buildstockbatch/sampler/downselect.py
@@ -0,0 +1,130 @@
+"""
+buildstockbatch.sampler.residential_quota_downselect
+~~~~~~~~~~~~~~~
+This object contains the code required for generating the set of simulations to execute
+
+:author: Noel Merket
+:copyright: (c) 2020 by The Alliance for Sustainable Energy
+:license: BSD-3
+"""
+import gzip
+import logging
+import math
+import numpy as np
+import os
+import pandas as pd
+import shutil
+
+from .base import BuildStockSampler
+from buildstockbatch.exc import ValidationError
+
+logger = logging.getLogger(__name__)
+
+
+class DownselectSamplerBase(BuildStockSampler):
+
+    SUB_SAMPLER_CLASS = None
+
+    def __init__(self, parent, n_datapoints, logic, resample=True, **kw):
+        """Downselect Sampler
+
+        This sampler performs a downselect of another sampler based on
+        building characteristics.
+
+        :param parent: BuildStockBatchBase object
+        :type parent: BuildStockBatchBase (or subclass)
+        :param n_datapoints: number of datapoints to sample
+        :type n_datapoints: int
+        :param logic: downselection logic
+        :type logic: dict, list, or str
+        :param resample: do we resample , defaults to True
+        :type resample: bool, optional
+        :param **kw: args to pass through to sub sampler
+        """
+        super().__init__(parent)
+        self.validate_args(self.parent().project_filename, logic=logic, resample=resample)
+        self.logic = logic
+        self.resample = resample
+        self.n_datapoints
+        self.sub_kw = kw
+        sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_datapoints, **kw)
+        self.csv_path = sampler.csv_path
+
+    @classmethod
+    def validate_args(cls, project_filename, **kw):
+        expected_args = set(['logic', 'n_datapoints'])
+        extra_kw = {}
+        for k, v in kw.items():
+            expected_args.discard(k)
+            if k == 'logic':
+                # TODO: do some validation of the logic here.
+                pass
+            elif k == 'n_datapoints':
+                extra_kw[k] = v
+                if not isinstance(v, int):
+                    raise ValidationError('n_datapoints needs to be an integer')
+                if v <= 0:
+                    raise ValidationError('n_datapoints need to be >= 1')
+            else:
+                extra_kw[k] = v
+        if len(expected_args) > 0:
+            raise ValidationError('The following sampler arguments are required: ' + ', '.join(expected_args))
+        cls.SUB_SAMPLER_CLASS.validate_args(project_filename, **extra_kw)
+        return True
+
+    @classmethod
+    def downselect_logic(cls, df, logic):
+        if isinstance(logic, dict):
+            assert (len(logic) == 1)
+            key = list(logic.keys())[0]
+            values = logic[key]
+            if key == 'and':
+                retval = cls.downselect_logic(df, values[0])
+                for value in values[1:]:
+                    retval &= cls.downselect_logic(df, value)
+                return retval
+            elif key == 'or':
+                retval = cls.downselect_logic(df, values[0])
+                for value in values[1:]:
+                    retval |= cls.downselect_logic(df, value)
+                return retval
+            elif key == 'not':
+                return ~cls.downselect_logic(df, values)
+        elif isinstance(logic, list):
+            retval = cls.downselect_logic(df, logic[0])
+            for value in logic[1:]:
+                retval &= cls.downselect_logic(df, value)
+            return retval
+        elif isinstance(logic, str):
+            key, value = logic.split('|')
+            return df[key] == value
+
+    def run_sampling(self):
+        if self.resample:
+            logger.debug('Performing initial sampling to figure out number of samples for downselect')
+            n_samples_init = 350000
+            init_sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_samples_init, **self.sub_kw)
+            buildstock_csv_filename = init_sampler.run_sampling()
+            df = pd.read_csv(buildstock_csv_filename, index_col=0)
+            df_new = df[self.downselect_logic(df, self.logic)]
+            downselected_n_samples_init = df_new.shape[0]
+            n_samples = math.ceil(self.n_datapoints * n_samples_init / downselected_n_samples_init)
+            os.remove(buildstock_csv_filename)
+            del init_sampler
+        else:
+            n_samples = self.n_datapoints
+        sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_samples, **self.sub_kw)
+        buildstock_csv_filename = sampler.run_sampling()
+        with gzip.open(os.path.splitext(buildstock_csv_filename)[0] + '_orig.csv.gz', 'wb') as f_out:
+            with open(buildstock_csv_filename, 'rb') as f_in:
+                shutil.copyfileobj(f_in, f_out)
+        df = pd.read_csv(buildstock_csv_filename, index_col=0, dtype='str')
+        df_new = df[self.downselect_logic(df, self.logic)]
+        if len(df_new.index) == 0:
+            raise RuntimeError('There are no buildings left after the down select!')
+        if self.resample:
+            old_index_name = df_new.index.name
+            df_new.index = np.arange(len(df_new)) + 1
+            df_new.index.name = old_index_name
+        df_new.to_csv(buildstock_csv_filename)
+        return buildstock_csv_filename
diff --git a/buildstockbatch/sampler/precomputed.py b/buildstockbatch/sampler/precomputed.py
index f91899e6..22e6affa 100644
--- a/buildstockbatch/sampler/precomputed.py
+++ b/buildstockbatch/sampler/precomputed.py
@@ -11,24 +11,47 @@
 """
 
 import logging
+import os
 import shutil
 
 from .base import BuildStockSampler
+from buildstockbatch.exc import ValidationError
+from buildstockbatch.utils import path_rel_to_file
 
 logger = logging.getLogger(__name__)
 
 
 class PrecomputedSampler(BuildStockSampler):
 
-    def __init__(self, parent):
-        super().__init__(parent)
-        self.buildstock_csv = self.cfg['baseline']['precomputed_sample']
+    def __init__(self, parent, sample_file):
+        """Precomputed Sampler
 
-    def run_sampling(self, n_datapoints):
+        :param parent: BuildStockBatchBase object
+        :type parent: BuildStockBatchBase (or subclass)
+        :param sample_file: relative or absolute path to buildstock.csv to use
+        :type sample_file: str
+        """
+        super().__init__(parent)
+        project_filename = self.parent().project_filename
+        self.validate_args(project_filename, sample_file=sample_file)
+        self.buildstock_csv = path_rel_to_file(project_filename, sample_file)
+
+    @classmethod
+    def validate_args(cls, project_filename, **kw):
+        expected_args = set(['sample_file'])
+        for k, v in kw.items():
+            expected_args.discard(k)
+            if k == 'sample_file':
+                if not isinstance(v, str):
+                    raise ValidationError('sample_file should be a path string')
+                if not os.path.exists(path_rel_to_file(project_filename, v)):
+                    raise ValidationError(f'sample_file doesn\'t exist: {v}')
+            else:
+                raise ValidationError(f'Unknown argument for sampler: {k}')
+
+    def run_sampling(self):
         """
         Check that the sampling has been precomputed and if necessary move to the required path.
-
-        :param n_datapoints: Number of datapoints to sample from the distributions.
         """
         if self.csv_path != self.buildstock_csv:
             shutil.copy(self.buildstock_csv, self.csv_path)
diff --git a/buildstockbatch/sampler/residential_quota.py b/buildstockbatch/sampler/residential_quota.py
index f67731b7..f1820f1f 100644
--- a/buildstockbatch/sampler/residential_quota.py
+++ b/buildstockbatch/sampler/residential_quota.py
@@ -16,13 +16,43 @@
 import time
 
 from .base import BuildStockSampler
+from .downselect import DownselectSamplerBase
+from buildstockbatch.exc import ValidationError
 
 logger = logging.getLogger(__name__)
 
 
 class ResidentialQuotaSampler(BuildStockSampler):
 
-    def _run_sampling_docker(self, n_datapoints):
+    def __init__(self, parent, n_datapoints):
+        """Residential Quota Sampler
+
+        :param parent: BuildStockBatchBase object
+        :type parent: BuildStockBatchBase (or subclass)
+        :param n_datapoints: number of datapoints to sample
+        :type n_datapoints: int
+        """
+        super().__init__(parent)
+        self.validate_args(self.parent().project_filename, n_datapoints=n_datapoints)
+        self.n_datapoints = n_datapoints
+
+    @classmethod
+    def validate_args(cls, project_filename, **kw):
+        expected_args = set(['n_datapoints'])
+        for k, v in kw.items():
+            expected_args.discard(k)
+            if k == 'n_datapoints':
+                if not isinstance(v, int):
+                    raise ValidationError('n_datapoints needs to be an integer')
+                if v <= 0:
+                    raise ValidationError('n_datapoints need to be >= 1')
+            else:
+                raise ValidationError(f'Unknown argument for sampler: {k}')
+        if len(expected_args) > 0:
+            raise ValidationError('The following sampler arguments are required: ' + ', '.join(expected_args))
+        return True
+
+    def _run_sampling_docker(self):
         docker_client = docker.DockerClient.from_env()
         tick = time.time()
         extra_kws = {}
@@ -34,7 +64,7 @@ def _run_sampling_docker(self, n_datapoints):
                 'ruby',
                 'resources/run_sampling.rb',
                 '-p', self.cfg['project_directory'],
-                '-n', str(n_datapoints),
+                '-n', str(self.n_datapoints),
                 '-o', 'buildstock.csv'
             ],
             remove=True,
@@ -57,7 +87,7 @@ def _run_sampling_docker(self, n_datapoints):
         )
         return destination_filename
 
-    def _run_sampling_singularity(self, n_datapoints):
+    def _run_sampling_singularity(self):
         args = [
             'singularity',
             'exec',
@@ -68,10 +98,14 @@ def _run_sampling_singularity(self, n_datapoints):
             'ruby',
             'resources/run_sampling.rb',
             '-p', self.cfg['project_directory'],
-            '-n', str(n_datapoints),
+            '-n', str(self.n_datapoints),
             '-o', '../../outbind/{}'.format(os.path.basename(self.csv_path))
         ]
         logger.debug(f"Starting singularity sampling with command: {' '.join(args)}")
         subprocess.run(args, check=True, env=os.environ, cwd=self.parent().output_dir)
         logger.debug("Singularity sampling completed.")
         return self.csv_path
+
+
+class ResidentialQuotaDownselectSampler(DownselectSamplerBase):
+    SUB_SAMPLER_CLASS = ResidentialQuotaSampler
diff --git a/buildstockbatch/schemas/v0.3.yaml b/buildstockbatch/schemas/v0.3.yaml
new file mode 100644
index 00000000..73de49d3
--- /dev/null
+++ b/buildstockbatch/schemas/v0.3.yaml
@@ -0,0 +1,131 @@
+schema_version: enum('0.3')
+buildstock_directory: str()
+project_directory: str(required=True)
+weather_files_path: str(required=False)
+weather_files_url: str(required=False)
+sampler: include('sampler-spec', required=True)
+
+stock_type: enum('residential', 'commercial', required=True)
+eagle: include('hpc-spec', required=False)
+aws: include('aws-spec', required=False)
+output_directory: str(required=False)
+sys_image_dir: str(required=False)
+baseline: include('sim-spec', required=True)
+os_version: str(required=False)
+os_sha: str(required=False)
+simulation_output: map(required=False)
+timeseries_csv_export: map(required=False)
+upgrades: list(include('upgrade-spec'), required=False)
+postprocessing: include('postprocessing-spec', required=False)
+residential_simulation_controls: map(required=False)
+reporting_measures: list(str(), required=False)
+---
+aws-spec:
+  job_identifier: regex('^[a-zA-Z]\w{,9}$', required=True)
+  s3: include('s3-aws-postprocessing-spec', required=True)
+  region: str(required=True)
+  use_spot: bool(required=False)
+  spot_bid_percent: num(min=1, max=100, required=False)
+  batch_array_size: num(min=1, max=10000, required=True)
+  notifications_email: regex('^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$', name='email', required=True)
+  emr: include('aws-emr-spec', required=False)
+  job_environment: include('aws-job-environment', required=False)
+
+aws-job-environment:
+  vcpus: int(min=1, max=36, required=False)
+  memory: int(min=1024, required=False)
+
+aws-emr-spec:
+  master_instance_type: str(required=False)
+  slave_instance_type: str(required=False)
+  slave_instance_count: int(min=1, required=False)
+  dask_worker_vcores: int(min=1, required=False)
+
+hpc-spec:
+  account: str(required=True)
+  minutes_per_sim: int(max=120, required=False)
+  n_jobs: int(required=False)
+  postprocessing: include('hpc-postprocessing-spec', required=False)
+  sampling: include('sampling-spec', required=False)
+
+hpc-postprocessing-spec:
+  time: int(required=True)
+  n_workers: int(required=False)
+
+sampler-spec:
+  type: str(required=True)
+  args: map(key=regex(r'^[a-zA-Z_]\w*$', name='valid variable name'), required=False)
+
+sampling-spec:
+  time: int(required=True)
+
+sim-spec:
+  n_buildings_represented: int(required=True)
+  measures: list(include('measure-spec'), required=False)
+  precomputed_sample: str(required=False)
+  skip_sims: bool(required=False)
+  measures_to_ignore: list(str(), required=False)
+  sampling_algorithm: enum('precomputed', 'quota', 'sobol', required=True)
+  osw_template: str(required=False)
+  include_qaqc: bool(required=False)
+  custom_gems: bool(required=False)
+
+measure-spec:
+  measure_dir_name: str(required=True)
+  arguments: map(required=False)
+
+upgrade-spec:
+  upgrade_name: str(required=True)
+  options: list(include('option-spec'), required=True)
+  package_apply_logic: include('apply-logic-spec', required=False)
+  reference_scenario: str(required=False)
+
+option-spec:
+  option: include('param_option-spec', required=True)
+  apply_logic: include('apply-logic-spec', required=False)
+  costs: list(include('cost-spec'), required=False)
+  lifetime: num(required=False)
+
+param_option-spec: str(exclude=':(){}[]')
+
+apply-logic-spec: >
+  any(
+    list(
+      include('and-spec'),
+      include('or-spec'),
+      include('not-spec'),
+      include('param_option-spec'),
+    ),
+    include('and-spec'),
+    include('or-spec'),
+    include('not-spec'),
+    include('param_option-spec')
+  )
+or-spec:
+  or: list(include('apply-logic-spec'))
+and-spec:
+  and: list(include('apply-logic-spec'))
+not-spec:
+  not: any(include('apply-logic-spec'), list(include('apply-logic-spec')))
+
+cost-spec:
+  value: num(required=True)
+  multiplier: str(required=True)
+
+postprocessing-spec:
+  aws: include('aws-postprocessing-spec', required=False)
+  aggregate_timeseries: bool(required=False)
+
+aws-postprocessing-spec:
+  region_name: str(required=False)
+  s3: include('s3-aws-postprocessing-spec', required=True)
+  athena: include('athena-aws-postprocessing-spec', required=False)
+
+s3-aws-postprocessing-spec:
+  bucket: str(required=True)
+  prefix: str(required=True)
+
+athena-aws-postprocessing-spec:
+  glue_service_role: str(required=False)
+  database_name: str(required=True)
+  max_crawling_time: num(requried=False)
diff --git a/buildstockbatch/utils.py b/buildstockbatch/utils.py
index bf98bfac..9a55333d 100644
--- a/buildstockbatch/utils.py
+++ b/buildstockbatch/utils.py
@@ -1,5 +1,13 @@
-import traceback
 import inspect
+import os
+import traceback
+
+
+def path_rel_to_file(startfile, x):
+    if os.path.isabs(x):
+        return os.path.abspath(x)
+    else:
+        return os.path.abspath(os.path.join(os.path.dirname(startfile), x))
 
 
 def _str_repr(obj, list_max=20, dict_max=20, string_max=100):
diff --git a/buildstockbatch/workflow_generator/residential.py b/buildstockbatch/workflow_generator/residential.py
index b9694b44..adf5deb9 100644
--- a/buildstockbatch/workflow_generator/residential.py
+++ b/buildstockbatch/workflow_generator/residential.py
@@ -40,6 +40,7 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
         }
         res_sim_ctl_args.update(self.cfg.get('residential_simulation_controls', {}))
 
+        # FIXME: Figure this out another way.
         sample_weight = self.cfg['baseline']['n_buildings_represented'] /\
             self.cfg['baseline']['n_datapoints']
         bld_exist_model_args = {

From f54e575736d7949d5a2f2956538ecfa9fe0aa14e Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 26 Oct 2020 15:30:17 -0600
Subject: [PATCH 03/21] mostly fixing validation

---
 buildstockbatch/__init__.py                   |  6 --
 buildstockbatch/aws/aws.py                    |  2 +-
 buildstockbatch/base.py                       |  5 +-
 buildstockbatch/eagle.py                      |  6 +-
 buildstockbatch/localdocker.py                |  6 +-
 buildstockbatch/sampler/__init__.py           |  2 +-
 buildstockbatch/sampler/base.py               |  4 +-
 buildstockbatch/sampler/commercial_sobol.py   |  2 +-
 buildstockbatch/sampler/downselect.py         | 20 +++---
 buildstockbatch/schemas/v0.3.yaml             |  2 -
 buildstockbatch/test/conftest.py              | 10 ++-
 buildstockbatch/test/test_base.py             | 68 +++++++++++++++----
 buildstockbatch/test/test_docker.py           | 44 +-----------
 buildstockbatch/test/test_eagle.py            | 46 -------------
 .../test/test_inputs/complete-schema.yml      | 13 ++--
 .../test/test_inputs/enforce-schema-xor.yml   |  8 ++-
 ...force-validate-downselect-resample-bad.yml | 13 ----
 ...orce-validate-downselect-resample-good.yml | 14 ----
 .../enforce-validate-no-precomputed-quota.yml | 10 ---
 .../test/test_inputs/minimal-schema.yml       |  8 ++-
 buildstockbatch/test/test_validation.py       |  4 --
 buildstockbatch/utils.py                      |  6 ++
 22 files changed, 107 insertions(+), 192 deletions(-)
 delete mode 100644 buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-bad.yml
 delete mode 100644 buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-good.yml
 delete mode 100644 buildstockbatch/test/test_inputs/enforce-validate-no-precomputed-quota.yml

diff --git a/buildstockbatch/__init__.py b/buildstockbatch/__init__.py
index 2d934278..80124e43 100644
--- a/buildstockbatch/__init__.py
+++ b/buildstockbatch/__init__.py
@@ -26,9 +26,3 @@
 :copyright: (c) 2018 by The Alliance for Sustainable Energy.
 :license: BSD-3, see LICENSE for more details.
 """
-import enum
-
-
-class ContainerRuntime(enum.Enum):
-    DOCKER = 1
-    SINGULARITY = 2
diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py
index 0ab53e72..c0c71468 100644
--- a/buildstockbatch/aws/aws.py
+++ b/buildstockbatch/aws/aws.py
@@ -1772,7 +1772,7 @@ def run_batch(self):
         """
 
         # Generate buildstock.csv
-        buildstock_csv_filename = self.run_sampling()
+        buildstock_csv_filename = self.sampler.run_sampling()
 
         # Compress and upload assets to S3
         with tempfile.TemporaryDirectory() as tmpdir, tempfile.TemporaryDirectory() as tmp_weather_dir:
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index b92d097c..e2db4bcc 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -63,9 +63,6 @@ def __init__(self, project_filename):
         if not os.path.isdir(self.project_dir):
             raise FileNotFoundError(f'project_directory = {self.project_dir} is not a directory.')
 
-        # To be set in subclasses
-        self.sampler = None
-
         # Load in OS_VERSION and OS_SHA arguments if they exist in the YAML,
         # otherwise use defaults specified here.
         self.os_version = self.cfg.get('os_version', self.DEFAULT_OS_VERSION)
@@ -252,7 +249,7 @@ def validate_sampler(project_file):
     @staticmethod
     def validate_project_schema(project_file):
         cfg = BuildStockBatchBase.get_project_configuration(project_file)
-        schema_version = cfg.get('schema_version', __schema_version__)
+        schema_version = cfg.get('schema_version')
         version_schema = os.path.join(os.path.dirname(__file__), 'schemas', f'v{schema_version}.yaml')
         if not os.path.isfile(version_schema):
             logger.error(f'Could not find validation schema for YAML version {schema_version}')
diff --git a/buildstockbatch/eagle.py b/buildstockbatch/eagle.py
index e310814b..a6d2bccb 100644
--- a/buildstockbatch/eagle.py
+++ b/buildstockbatch/eagle.py
@@ -34,8 +34,8 @@
 import yaml
 
 from buildstockbatch.base import BuildStockBatchBase, SimulationExists
-from buildstockbatch.utils import log_error_details, get_error_details
-from buildstockbatch import postprocessing, ContainerRuntime
+from buildstockbatch.utils import log_error_details, get_error_details, ContainerRuntime
+from buildstockbatch import postprocessing
 
 logger = logging.getLogger(__name__)
 
@@ -164,7 +164,7 @@ def run_batch(self, sampling_only=False):
         logger.debug("Housing characteristics copied.")
 
         # run sampling
-        buildstock_csv_filename = self.run_sampling()
+        buildstock_csv_filename = self.sampler.run_sampling()
 
         # Hit the weather_dir API to make sure that creating the weather directory isn't a race condition in the array
         # jobs - this is rare but happens quasi-repeatably when lustre is really lagging
diff --git a/buildstockbatch/localdocker.py b/buildstockbatch/localdocker.py
index c0dbbabd..b12b11f0 100644
--- a/buildstockbatch/localdocker.py
+++ b/buildstockbatch/localdocker.py
@@ -28,8 +28,8 @@
 import tempfile
 
 from buildstockbatch.base import BuildStockBatchBase, SimulationExists
-from buildstockbatch import postprocessing, ContainerRuntime
-from .utils import log_error_details
+from buildstockbatch import postprocessing
+from .utils import log_error_details, ContainerRuntime
 
 logger = logging.getLogger(__name__)
 
@@ -155,7 +155,7 @@ def run_building(cls, project_dir, buildstock_dir, weather_dir, docker_image, re
         return dpout
 
     def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False):
-        buildstock_csv_filename = self.run_sampling()
+        buildstock_csv_filename = self.sampler.run_sampling()
 
         if sampling_only:
             return
diff --git a/buildstockbatch/sampler/__init__.py b/buildstockbatch/sampler/__init__.py
index 9486fae5..1cb55992 100644
--- a/buildstockbatch/sampler/__init__.py
+++ b/buildstockbatch/sampler/__init__.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
 
-from .residential_quota import ResidentialQuotaSampler  # noqa F041
+from .residential_quota import ResidentialQuotaSampler, ResidentialQuotaDownselectSampler  # noqa F041
 from .commercial_sobol import CommercialSobolSampler  # noqa F041
 from .precomputed import PrecomputedSampler  # noqa F041
diff --git a/buildstockbatch/sampler/base.py b/buildstockbatch/sampler/base.py
index ade32ce2..b6a25e67 100644
--- a/buildstockbatch/sampler/base.py
+++ b/buildstockbatch/sampler/base.py
@@ -13,8 +13,7 @@
 import os
 import weakref
 
-from buildstockbatch import ContainerRuntime
-from buildstockbatch.utils import path_rel_to_file
+from buildstockbatch.utils import ContainerRuntime
 
 logger = logging.getLogger(__name__)
 
@@ -22,7 +21,6 @@
 class BuildStockSampler(object):
 
     csv_path = None
-    ARGS_DEFAULTS = {}
 
     @staticmethod
     def validate_args(project_filename, **kw):
diff --git a/buildstockbatch/sampler/commercial_sobol.py b/buildstockbatch/sampler/commercial_sobol.py
index 95d9280b..574fa5f0 100644
--- a/buildstockbatch/sampler/commercial_sobol.py
+++ b/buildstockbatch/sampler/commercial_sobol.py
@@ -21,7 +21,7 @@
 
 from .sobol_lib import i4_sobol_generate
 from .base import BuildStockSampler
-from buildstockbatch import ContainerRuntime
+from buildstockbatch.utils import ContainerRuntime
 from buildstockbatch.exc import ValidationError
 
 logger = logging.getLogger(__name__)
diff --git a/buildstockbatch/sampler/downselect.py b/buildstockbatch/sampler/downselect.py
index 05e304a3..575fb3e8 100644
--- a/buildstockbatch/sampler/downselect.py
+++ b/buildstockbatch/sampler/downselect.py
@@ -42,29 +42,31 @@ def __init__(self, parent, n_datapoints, logic, resample=True, **kw):
         :param **kw: args to pass through to sub sampler
         """
         super().__init__(parent)
-        self.validate_args(self.parent().project_filename, logic=logic, resample=resample)
+        self.validate_args(
+            self.parent().project_filename,
+            n_datapoints=n_datapoints,
+            logic=logic,
+            resample=resample,
+            **kw
+        )
         self.logic = logic
         self.resample = resample
-        self.n_datapoints
+        self.n_datapoints = n_datapoints
         self.sub_kw = kw
         sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_datapoints, **kw)
         self.csv_path = sampler.csv_path
 
     @classmethod
     def validate_args(cls, project_filename, **kw):
-        expected_args = set(['logic', 'n_datapoints'])
+        expected_args = set(['logic'])
         extra_kw = {}
         for k, v in kw.items():
             expected_args.discard(k)
             if k == 'logic':
                 # TODO: do some validation of the logic here.
                 pass
-            elif k == 'n_datapoints':
-                extra_kw[k] = v
-                if not isinstance(v, int):
-                    raise ValidationError('n_datapoints needs to be an integer')
-                if v <= 0:
-                    raise ValidationError('n_datapoints need to be >= 1')
+            elif k == 'resample':
+                pass
             else:
                 extra_kw[k] = v
         if len(expected_args) > 0:
diff --git a/buildstockbatch/schemas/v0.3.yaml b/buildstockbatch/schemas/v0.3.yaml
index 73de49d3..b2b52058 100644
--- a/buildstockbatch/schemas/v0.3.yaml
+++ b/buildstockbatch/schemas/v0.3.yaml
@@ -62,10 +62,8 @@ sampling-spec:
 sim-spec:
   n_buildings_represented: int(required=True)
   measures: list(include('measure-spec'), required=False)
-  precomputed_sample: str(required=False)
   skip_sims: bool(required=False)
   measures_to_ignore: list(str(), required=False)
-  sampling_algorithm: enum('precomputed', 'quota', 'sobol', required=True)
   osw_template: str(required=False)
   include_qaqc: bool(required=False)
   custom_gems: bool(required=False)
diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py
index 9c5279c7..da49b438 100644
--- a/buildstockbatch/test/conftest.py
+++ b/buildstockbatch/test/conftest.py
@@ -32,10 +32,14 @@ def _basic_residential_project_file(update_args={}):
                 'project_directory': project_directory,
                 'output_directory': output_directory,
                 'weather_files_url': 'https://s3.amazonaws.com/epwweatherfiles/project_resstock_national.zip',
+                'sampler': {
+                    'type': 'residential_quota',
+                    'args': {
+                        'n_datapoints': 8
+                    }
+                },
                 'baseline': {
-                    'n_datapoints': 8,
                     'n_buildings_represented': 80000000,
-                    'sampling_algorithm': 'quota'
                 },
                 'simulation_output': {
                     'include_enduse_subcategories': True
@@ -50,7 +54,7 @@ def _basic_residential_project_file(update_args={}):
                     },
                     'account': 'testaccount',
                 },
-                'schema_version': 0.2
+                'schema_version': '0.3'
             }
             cfg.update(update_args)
             project_filename = os.path.join(test_directory, 'project.yml')
diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py
index 6fd39faa..058fd029 100644
--- a/buildstockbatch/test/test_base.py
+++ b/buildstockbatch/test/test_base.py
@@ -8,13 +8,17 @@
 import os
 import pandas as pd
 from pyarrow import parquet
+import pytest
 import re
 import shutil
 import tempfile
 from unittest.mock import patch, MagicMock
+import yaml
 
 from buildstockbatch.base import BuildStockBatchBase
+from buildstockbatch.exc import ValidationError
 from buildstockbatch.postprocessing import write_dataframe_as_parquet
+from buildstockbatch.utils import ContainerRuntime
 
 dask.config.set(scheduler='synchronous')
 here = os.path.dirname(os.path.abspath(__file__))
@@ -125,7 +129,7 @@ def simplify_columns(colname):
     pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols])
 
 
-def test_downselect_integer_options(basic_residential_project_file):
+def test_downselect_integer_options(basic_residential_project_file, mocker):
     with tempfile.TemporaryDirectory() as buildstock_csv_dir:
         buildstock_csv = os.path.join(buildstock_csv_dir, 'buildstock.csv')
         valid_option_values = set()
@@ -143,22 +147,27 @@ def test_downselect_integer_options(basic_residential_project_file):
                 cf_out.writerow(row)
 
         project_filename, results_dir = basic_residential_project_file({
-            'downselect': {
-                'resample': False,
-                'logic': 'Geometry House Size|1500-2499'
+            'sampler': {
+                'type': 'residential_quota_downselect',
+                'args': {
+                    'n_datapoints': 8,
+                    'resample': False,
+                    'logic': 'Geometry House Size|1500-2499'
+                }
             }
         })
+        mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
+        mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
+
+        bsb = BuildStockBatchBase(project_filename)
         run_sampling_mock = MagicMock(return_value=buildstock_csv)
-        with patch.object(BuildStockBatchBase, 'weather_dir', None), \
-                patch.object(BuildStockBatchBase, 'results_dir', results_dir), \
-                patch.object(BuildStockBatchBase, 'run_sampling', run_sampling_mock):
-            bsb = BuildStockBatchBase(project_filename)
-            bsb.downselect()
-            run_sampling_mock.assert_called_once()
-            with open(buildstock_csv, 'r', newline='') as f:
-                cf = csv.DictReader(f)
-                for row in cf:
-                    assert(row['Days Shifted'] in valid_option_values)
+        mocker.patch.object(bsb.sampler, 'run_sampling', run_sampling_mock)
+        bsb.sampler.run_sampling()
+        run_sampling_mock.assert_called_once()
+        with open(buildstock_csv, 'r', newline='') as f:
+            cf = csv.DictReader(f)
+            for row in cf:
+                assert(row['Days Shifted'] in valid_option_values)
 
 
 def test_combine_files(basic_residential_project_file):
@@ -365,3 +374,34 @@ def test_skipping_baseline(basic_residential_project_file):
 
     up01_csv_gz = os.path.join(results_dir, 'results_csvs', 'results_up01.csv.gz')
     assert(os.path.exists(up01_csv_gz))
+
+
+def test_provide_buildstock_csv(basic_residential_project_file, mocker):
+    buildstock_csv = os.path.join(here, 'buildstock.csv')
+    df = pd.read_csv(buildstock_csv)
+    project_filename, results_dir = basic_residential_project_file({
+        'sampler': {
+            'type': 'precomputed',
+            'args': {
+                'sample_file': buildstock_csv
+            }
+        }
+    })
+    mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
+    mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
+    mocker.patch.object(BuildStockBatchBase, 'CONTAINER_RUNTIME', ContainerRuntime.DOCKER)
+
+    bsb = BuildStockBatchBase(project_filename)
+    sampling_output_csv = bsb.sampler.run_sampling()
+    df2 = pd.read_csv(sampling_output_csv)
+    pd.testing.assert_frame_equal(df, df2)
+
+    # Test file missing
+    with open(project_filename, 'r') as f:
+        cfg = yaml.safe_load(f)
+    cfg['sampler']['args']['sample_file'] = os.path.join(here, 'non_existant_file.csv')
+    with open(project_filename, 'w') as f:
+        yaml.dump(cfg, f)
+
+    with pytest.raises(ValidationError, match=r"sample_file doesn't exist"):
+        BuildStockBatchBase(project_filename).sampler.run_sampling()
diff --git a/buildstockbatch/test/test_docker.py b/buildstockbatch/test/test_docker.py
index 7cd26ce3..d5f59c14 100644
--- a/buildstockbatch/test/test_docker.py
+++ b/buildstockbatch/test/test_docker.py
@@ -8,6 +8,7 @@
 
 from buildstockbatch.base import BuildStockBatchBase
 from buildstockbatch.localdocker import LocalDockerBatch
+from buildstockbatch.exc import ValidationError
 
 here = os.path.dirname(os.path.abspath(__file__))
 
@@ -39,46 +40,3 @@ def test_docker_image_exists_on_docker_hub(basic_residential_project_file):
             headers={'Authorization': f'Bearer {token}'}
         )
         assert(r3.ok)
-
-
-def test_provide_buildstock_csv(basic_residential_project_file):
-    buildstock_csv = os.path.join(here, 'buildstock.csv')
-    df = pd.read_csv(buildstock_csv)
-    project_filename, results_dir = basic_residential_project_file({
-        'baseline': {
-            'n_datapoints': 10,
-            'n_buildings_represented': 80000000,
-            'sampling_algorithm': 'precomputed',
-            'precomputed_sample': buildstock_csv
-        }
-    })
-    with patch.object(LocalDockerBatch, 'weather_dir', None), \
-            patch.object(LocalDockerBatch, 'results_dir', results_dir):
-        bsb = LocalDockerBatch(project_filename)
-        sampling_output_csv = bsb.run_sampling()
-        df2 = pd.read_csv(sampling_output_csv)
-        pd.testing.assert_frame_equal(df, df2)
-
-    # Test n_datapoints do not match
-    with open(project_filename, 'r') as f:
-        cfg = yaml.safe_load(f)
-    cfg['baseline']['n_datapoints'] = 100
-    with open(project_filename, 'w') as f:
-        yaml.dump(cfg, f)
-
-    with patch.object(LocalDockerBatch, 'weather_dir', None), \
-            patch.object(LocalDockerBatch, 'results_dir', results_dir):
-        with pytest.raises(RuntimeError, match=r'does not match the number of rows in'):
-            LocalDockerBatch.validate_precomputed_sample(project_filename)
-
-    # Test file missing
-    with open(project_filename, 'r') as f:
-        cfg = yaml.safe_load(f)
-    cfg['baseline']['precomputed_sample'] = os.path.join(here, 'non_existant_file.csv')
-    with open(project_filename, 'w') as f:
-        yaml.dump(cfg, f)
-
-    with patch.object(LocalDockerBatch, 'weather_dir', None), \
-            patch.object(LocalDockerBatch, 'results_dir', results_dir):
-        with pytest.raises(FileNotFoundError):
-            LocalDockerBatch(project_filename).run_sampling()
diff --git a/buildstockbatch/test/test_eagle.py b/buildstockbatch/test/test_eagle.py
index 9aeb18ee..20c6802f 100644
--- a/buildstockbatch/test/test_eagle.py
+++ b/buildstockbatch/test/test_eagle.py
@@ -1,14 +1,11 @@
 import joblib
 import json
 import os
-import pandas as pd
 import pathlib
-import pytest
 import requests
 import shutil
 import tarfile
 from unittest.mock import patch
-import yaml
 
 from buildstockbatch.eagle import user_cli, EagleBatch
 
@@ -92,49 +89,6 @@ def test_singularity_image_download_url(basic_residential_project_file):
         assert r.status_code == requests.codes.ok
 
 
-def test_provide_buildstock_csv(basic_residential_project_file):
-    buildstock_csv = os.path.join(here, 'buildstock.csv')
-    df = pd.read_csv(buildstock_csv)
-    project_filename, results_dir = basic_residential_project_file({
-        'baseline': {
-            'n_datapoints': 10,
-            'n_buildings_represented': 80000000,
-            'sampling_algorithm': 'precomputed',
-            'precomputed_sample': buildstock_csv
-        }
-    })
-    with patch.object(EagleBatch, 'weather_dir', None), \
-            patch.object(EagleBatch, 'results_dir', results_dir):
-        bsb = EagleBatch(project_filename)
-        sampling_output_csv = bsb.run_sampling()
-        df2 = pd.read_csv(sampling_output_csv)
-        pd.testing.assert_frame_equal(df, df2)
-
-    # Test n_datapoints do not match
-    with open(project_filename, 'r') as f:
-        cfg = yaml.safe_load(f)
-    cfg['baseline']['n_datapoints'] = 100
-    with open(project_filename, 'w') as f:
-        yaml.dump(cfg, f)
-
-    with patch.object(EagleBatch, 'weather_dir', None), \
-            patch.object(EagleBatch, 'results_dir', results_dir):
-        with pytest.raises(RuntimeError, match=r'does not match the number of rows in'):
-            EagleBatch.validate_precomputed_sample(project_filename)
-
-    # Test file missing
-    with open(project_filename, 'r') as f:
-        cfg = yaml.safe_load(f)
-    cfg['baseline']['precomputed_sample'] = os.path.join(here, 'non_existant_file.csv')
-    with open(project_filename, 'w') as f:
-        yaml.dump(cfg, f)
-
-    with patch.object(EagleBatch, 'weather_dir', None), \
-            patch.object(EagleBatch, 'results_dir', results_dir):
-        with pytest.raises(FileNotFoundError):
-            EagleBatch(project_filename).run_sampling()
-
-
 @patch('buildstockbatch.base.BuildStockBatchBase.validate_measures_and_arguments')
 @patch('buildstockbatch.base.BuildStockBatchBase.validate_options_lookup')
 @patch('buildstockbatch.eagle.subprocess')
diff --git a/buildstockbatch/test/test_inputs/complete-schema.yml b/buildstockbatch/test/test_inputs/complete-schema.yml
index 18895a5a..29d77c1d 100644
--- a/buildstockbatch/test/test_inputs/complete-schema.yml
+++ b/buildstockbatch/test/test_inputs/complete-schema.yml
@@ -3,15 +3,19 @@ buildstock_directory: /home/epresent/OpenStudio-BuildStock-master
 project_directory: project_singlefamilydetached
 output_directory: /scratch/epresent/YAML1s
 weather_files_path: /shared-projects/buildstock/weather/project_resstock_national_weather.zip
-schema_version: 0.2
+schema_version: '0.3'
+sampler:
+  type: residential_quota_downselect
+  args:
+    n_datapoints: 30
+    resample: False
+    logic: HVAC System Heating Natural Gas|Gas Furnace, 60% AFUE
 baseline:
-  n_datapoints: 30
   n_buildings_represented: 81221016
   measures_to_ignore:
     - ResidentialConstructionsSlab
   osw_template: default
   include_qaqc: False
-  sampling_algorithm: quota
 upgrades:
   - upgrade_name: ENERGY STAR 96% AFUE furnace (gas, propane, oil)
     options:
@@ -76,9 +80,6 @@ simulation_output:
 timeseries_csv_export:
   reporting_frequency: Hourly
   include_enduse_subcategories: true
-downselect:
-  resample: False
-  logic: HVAC System Heating Natural Gas|Gas Furnace, 60% AFUE
 eagle:
   account: enduse
   n_jobs: 100
diff --git a/buildstockbatch/test/test_inputs/enforce-schema-xor.yml b/buildstockbatch/test/test_inputs/enforce-schema-xor.yml
index 26e6e116..c4b293a6 100644
--- a/buildstockbatch/test/test_inputs/enforce-schema-xor.yml
+++ b/buildstockbatch/test/test_inputs/enforce-schema-xor.yml
@@ -4,8 +4,10 @@ project_directory: project_singlefamilydetached
 output_directory: /scratch/epresent/YAML1s
 weather_files_path: /shared-projects/buildstock/weather/project_resstock_national_weather.zip
 weather_files_url: s3://my-bucket/weather.zip
+sampler:
+  type: residential_quota
+  args:
+    n_datapoints: 30
 baseline:
-  n_datapoints: 30
   n_buildings_represented: 30
-  sampling_algorithm: quota
-schema_version: 0.2
+schema_version: '0.3'
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-bad.yml b/buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-bad.yml
deleted file mode 100644
index 84a7bebe..00000000
--- a/buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-bad.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-stock_type: residential
-buildstock_directory: /home/epresent/OpenStudio-BuildStock-master
-project_directory: project_singlefamilydetached
-output_directory: /scratch/epresent/YAML1s
-weather_files_path: /shared-projects/buildstock/weather/project_resstock_national_weather.zip
-schema_version: 0.2
-baseline:
-  n_datapoints: 100
-  n_buildings_represented: 30000
-  precomputed_sample: mybuildstock.csv
-  sampling_algorithm: precomputed
-downselect:
-  logic: HVAC System Heating Natural Gas|Gas Furnace, 60% AFUE
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-good.yml b/buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-good.yml
deleted file mode 100644
index 871a2eac..00000000
--- a/buildstockbatch/test/test_inputs/enforce-validate-downselect-resample-good.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-stock_type: residential
-buildstock_directory: /home/epresent/OpenStudio-BuildStock-master
-project_directory: project_singlefamilydetached
-output_directory: /scratch/epresent/YAML1s
-weather_files_path: /shared-projects/buildstock/weather/project_resstock_national_weather.zip
-schema_version: 0.2
-baseline:
-  n_datapoints: 10
-  n_buildings_represented: 30000
-  precomputed_sample: ../buildstock.csv
-  sampling_algorithm: precomputed
-downselect:
-  resample: False
-  logic: HVAC System Heating Natural Gas|Gas Furnace, 60% AFUE
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-no-precomputed-quota.yml b/buildstockbatch/test/test_inputs/enforce-validate-no-precomputed-quota.yml
deleted file mode 100644
index 4af40df7..00000000
--- a/buildstockbatch/test/test_inputs/enforce-validate-no-precomputed-quota.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-stock_type: residential
-buildstock_directory: /home/epresent/OpenStudio-BuildStock-master
-project_directory: project_singlefamilydetached
-output_directory: /scratch/epresent/YAML1s
-weather_files_path: /shared-projects/buildstock/weather/project_resstock_national_weather.zip
-baseline:
-  n_datapoints: 30
-  sampling_algorithm: quota
-  precomputed_sample: mybuildstock.csv
-schema_version: 0.2
diff --git a/buildstockbatch/test/test_inputs/minimal-schema.yml b/buildstockbatch/test/test_inputs/minimal-schema.yml
index f1fcf281..541915ff 100644
--- a/buildstockbatch/test/test_inputs/minimal-schema.yml
+++ b/buildstockbatch/test/test_inputs/minimal-schema.yml
@@ -2,8 +2,10 @@ stock_type: commercial
 buildstock_directory: here
 project_directory: there
 weather_files_path: over-yonder
+sampler:
+  type: residential_quota
+  args:
+    n_datapoints: 12345
 baseline:
-  n_datapoints: 12345
   n_buildings_represented: 12345
-  sampling_algorithm: quota
-schema_version: 0.2
+schema_version: '0.3'
diff --git a/buildstockbatch/test/test_validation.py b/buildstockbatch/test/test_validation.py
index 5539fca9..bdb8b164 100644
--- a/buildstockbatch/test/test_validation.py
+++ b/buildstockbatch/test/test_validation.py
@@ -71,7 +71,6 @@ def test_missing_required_key_fails(project_file):
 @pytest.mark.parametrize("project_file,expected", [
     (os.path.join(example_yml_dir, 'enforce-schema-xor.yml'), ValidationError),
     (os.path.join(example_yml_dir, 'enforce-schema-xor-and-passes.yml'), True),
-    (os.path.join(example_yml_dir, 'enforce-validate-no-precomputed-quota.yml'), ValidationError)
 ])
 def test_xor_violations_fail(project_file, expected):
     # patch the validate_options_lookup function to always return true for this case
@@ -87,8 +86,6 @@ def test_xor_violations_fail(project_file, expected):
     (os.path.join(example_yml_dir, 'missing-required-schema.yml'), ValueError),
     (os.path.join(example_yml_dir, 'missing-nested-required-schema.yml'), ValueError),
     (os.path.join(example_yml_dir, 'enforce-schema-xor.yml'), ValidationError),
-    (os.path.join(example_yml_dir, 'enforce-validate-downselect-resample-bad.yml'), ValidationError),
-    (os.path.join(example_yml_dir, 'enforce-validate-downselect-resample-good.yml'), True),
     (os.path.join(example_yml_dir, 'complete-schema.yml'), True),
     (os.path.join(example_yml_dir, 'minimal-schema.yml'), True)
 ])
@@ -199,7 +196,6 @@ def test_bad_options_validation(project_file):
         assert "Insulation Slat" in er
         assert "Vintage|1960s|Vintage|1960s" in er
         assert "Vintage|1960s||Vintage|1940s&&Vintage|1980s" in er
-        assert "Invalid Parameter" in er
 
     else:
         raise Exception("validate_options was supposed to raise ValueError for enforce-validate-options-bad.yml")
diff --git a/buildstockbatch/utils.py b/buildstockbatch/utils.py
index 9a55333d..32e4e9c1 100644
--- a/buildstockbatch/utils.py
+++ b/buildstockbatch/utils.py
@@ -1,8 +1,14 @@
+import enum
 import inspect
 import os
 import traceback
 
 
+class ContainerRuntime(enum.Enum):
+    DOCKER = 1
+    SINGULARITY = 2
+
+
 def path_rel_to_file(startfile, x):
     if os.path.isabs(x):
         return os.path.abspath(x)

From 674bd94b458b2cfa290ca16eef31a3433041aeb2 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Thu, 12 Nov 2020 11:40:44 -0700
Subject: [PATCH 04/21] minor formatting change in tests

---
 buildstockbatch/test/test_eagle.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/buildstockbatch/test/test_eagle.py b/buildstockbatch/test/test_eagle.py
index 4371aa40..cd02be8a 100644
--- a/buildstockbatch/test/test_eagle.py
+++ b/buildstockbatch/test/test_eagle.py
@@ -213,19 +213,17 @@ def sequential_parallel(**kwargs):
     mocker.patch.object(EagleBatch, 'clear_and_copy_dir')
     mocker.patch.object(EagleBatch, 'local_output_dir', results_dir)
     mocker.patch.object(EagleBatch, 'results_dir', results_dir)
-    mocker.patch.object(EagleBatch, 'run_sampling', lambda _: "buildstock.csv")
 
     def make_sim_dir_mock(building_id, upgrade_idx, base_dir, overwrite_existing=False):
         real_upgrade_idx = 0 if upgrade_idx is None else upgrade_idx + 1
-        sim_id = 'bldg{:07d}up{:02d}'.format(building_id, real_upgrade_idx)
-
-        # Check to see if the simulation is done already and skip it if so.
-        sim_dir = os.path.join(base_dir, 'up{:02d}'.format(real_upgrade_idx), 'bldg{:07d}'.format(building_id))
+        sim_id = f'bldg{building_id:07d}up{real_upgrade_idx:02d}'
+        sim_dir = os.path.join(base_dir, f'up{real_upgrade_idx:02d}', f'bldg{building_id:07d}')
         return sim_id, sim_dir
 
     mocker.patch.object(EagleBatch, 'make_sim_dir', make_sim_dir_mock)
 
     b = EagleBatch(project_filename)
+    mocker.patch.object(b.sampler, 'run_sampling', lambda: "buildstock.csv")
     b.run_batch(sampling_only=True)  # so the directories can be created
     b.run_job_batch(1)
 

From 91183bfd741c66a8cce6507798e92e28fa9a6bf5 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Fri, 13 Nov 2020 17:09:28 -0700
Subject: [PATCH 05/21] rearranging workflow generator

---
 buildstockbatch/base.py                       | 31 ++++-----
 buildstockbatch/schemas/v0.3.yaml             | 18 ++---
 buildstockbatch/test/conftest.py              | 20 +++---
 buildstockbatch/test/test_base.py             | 15 +++--
 .../test/test_inputs/complete-schema.yml      | 20 +++---
 .../test/test_inputs/enforce-schema-xor.yml   |  3 +-
 .../test/test_inputs/minimal-schema.yml       |  3 +-
 buildstockbatch/workflow_generator/base.py    | 12 ++++
 .../workflow_generator/commercial.py          | 34 +++++++++-
 .../workflow_generator/residential.py         | 64 +++++++++++++-----
 .../test_workflow_generator.py                | 65 ++++++++++++++-----
 project_resstock_national.yml                 | 36 +++++-----
 12 files changed, 215 insertions(+), 106 deletions(-)

diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index 5f79e53c..c99807e0 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -27,9 +27,11 @@
 import xml.etree.ElementTree as ET
 
 from buildstockbatch.__version__ import __schema_version__
-from buildstockbatch import sampler
-from .workflow_generator import ResidentialDefaultWorkflowGenerator, CommercialDefaultWorkflowGenerator
-from buildstockbatch import postprocessing
+from buildstockbatch import (
+    sampler,
+    workflow_generator,
+    postprocessing
+)
 from buildstockbatch.exc import SimulationExists, ValidationError
 from buildstockbatch.utils import path_rel_to_file
 
@@ -78,6 +80,12 @@ def get_sampler_class(sampler_name):
         sampler_class_name = ''.join(x.capitalize() for x in sampler_name.strip().split('_')) + 'Sampler'
         return getattr(sampler, sampler_class_name)
 
+    @staticmethod
+    def get_workflow_generator_class(workflow_generator_name):
+        workflow_generator_class_name = \
+            ''.join(x.capitalize() for x in workflow_generator_name.strip().split('_')) + 'WorkflowGenerator'
+        return getattr(workflow_generator, workflow_generator_class_name)
+
     def path_rel_to_projectfile(self, x):
         return path_rel_to_file(self.project_filename, x)
 
@@ -100,10 +108,6 @@ def _get_weather_files(self):
                     logger.debug('Extracting weather files to: {}'.format(self.weather_dir))
                     zf.extractall(self.weather_dir)
 
-    @property
-    def stock_type(self):
-        return self.cfg['stock_type']
-
     @property
     def weather_dir(self):
         raise NotImplementedError
@@ -124,13 +128,10 @@ def skip_baseline_sims(self):
     def run_batch(self):
         raise NotImplementedError
 
-    @staticmethod
-    def create_osw(cfg, *args, **kwargs):
-        if cfg['stock_type'] == 'residential':
-            osw_generator = ResidentialDefaultWorkflowGenerator(cfg)
-        else:
-            assert(cfg['stock_type'] == 'commercial')
-            osw_generator = CommercialDefaultWorkflowGenerator(cfg)
+    @classmethod
+    def create_osw(cls, cfg, *args, **kwargs):
+        WorkflowGenerator = cls.get_workflow_generator_class(cfg['workflow_generator']['type'])
+        osw_generator = WorkflowGenerator(cfg)
         return osw_generator.create_osw(*args, **kwargs)
 
     @staticmethod
@@ -686,7 +687,7 @@ def get_dask_client(self):
     def process_results(self, skip_combine=False, force_upload=False):
         self.get_dask_client()  # noqa: F841
 
-        do_timeseries = 'timeseries_csv_export' in self.cfg.keys()
+        do_timeseries = 'timeseries_csv_export' in self.cfg['workflow_generator']['args'].keys()
 
         fs = LocalFileSystem()
         if not skip_combine:
diff --git a/buildstockbatch/schemas/v0.3.yaml b/buildstockbatch/schemas/v0.3.yaml
index b2b52058..104b35ea 100644
--- a/buildstockbatch/schemas/v0.3.yaml
+++ b/buildstockbatch/schemas/v0.3.yaml
@@ -4,8 +4,8 @@ project_directory: str(required=True)
 weather_files_path: str(required=False)
 weather_files_url: str(required=False)
 sampler: include('sampler-spec', required=True)
+workflow_generator: include('workflow-generator-spec', required=True)
 
-stock_type: enum('residential', 'commercial', required=True)
 eagle: include('hpc-spec', required=False)
 aws: include('aws-spec', required=False)
 output_directory: str(required=False)
@@ -13,12 +13,8 @@ sys_image_dir: str(required=False)
 baseline: include('sim-spec', required=True)
 os_version: str(required=False)
 os_sha: str(required=False)
-simulation_output: map(required=False)
-timeseries_csv_export: map(required=False)
 upgrades: list(include('upgrade-spec'), required=False)
 postprocessing: include('postprocessing-spec', required=False)
-residential_simulation_controls: map(required=False)
-reporting_measures: list(str(), required=False)
 ---
 aws-spec:
   job_identifier: regex('^[a-zA-Z]\w{,9}$', required=True)
@@ -56,22 +52,18 @@ sampler-spec:
   type: str(required=True)
   args: map(key=regex(r'^[a-zA-Z_]\w*$', name='valid variable name'), required=False)
 
+workflow-generator-spec:
+  type: str(required=True)
+  args: map(key=regex(r'^[a-zA-Z_]\w*$', name='valid variable name'), required=False)
+
 sampling-spec:
   time: int(required=True)
 
 sim-spec:
   n_buildings_represented: int(required=True)
-  measures: list(include('measure-spec'), required=False)
   skip_sims: bool(required=False)
-  measures_to_ignore: list(str(), required=False)
-  osw_template: str(required=False)
-  include_qaqc: bool(required=False)
   custom_gems: bool(required=False)
 
-measure-spec:
-  measure_dir_name: str(required=True)
-  arguments: map(required=False)
-
 upgrade-spec:
   upgrade_name: str(required=True)
   options: list(include('option-spec'), required=True)
diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py
index bd99aede..db805fcc 100644
--- a/buildstockbatch/test/conftest.py
+++ b/buildstockbatch/test/conftest.py
@@ -27,7 +27,6 @@ def _basic_residential_project_file(update_args={}, raw=False):
             os.mkdir(os.path.join(output_directory, 'housing_characteristics'))
             os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics'))
             cfg = {
-                'stock_type': 'residential',
                 'buildstock_directory': buildstock_directory,
                 'project_directory': project_directory,
                 'output_directory': output_directory,
@@ -38,22 +37,27 @@ def _basic_residential_project_file(update_args={}, raw=False):
                         'n_datapoints': 8
                     }
                 },
+                'workflow_generator': {
+                    'type': 'residential_default',
+                    'args': {
+                        'timeseries_csv_export': {
+                            'reporting_frequency': 'Hourly',
+                            'include_enduse_subcategories': True
+                        },
+                        'simulation_output': {
+                            'include_enduse_subcategories': True
+                        },
+                    }
+                },
                 'baseline': {
                     'n_buildings_represented': 80000000,
                 },
-                'simulation_output': {
-                    'include_enduse_subcategories': True
-                },
                 'upgrades': [{
                     'upgrade_name': 'Upgrade1',
                     'options': [
                         {'option': 'Infiltration|11.25 ACH50'}
                     ]
                             }],
-                'timeseries_csv_export': {
-                    'reporting_frequency': 'Hourly',
-                    'include_enduse_subcategories': True
-                },
                 'eagle': {
                     'sampling': {
                         'time': 20
diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py
index 058fd029..a0266c4e 100644
--- a/buildstockbatch/test/test_base.py
+++ b/buildstockbatch/test/test_base.py
@@ -53,7 +53,7 @@ def test_reference_scenario(basic_residential_project_file):
     assert test_csv['apply_upgrade.reference_scenario'].iloc[0] == 'example_reference_scenario'
 
 
-def test_combine_files_flexible(basic_residential_project_file):
+def test_combine_files_flexible(basic_residential_project_file, mocker):
     # Allows addition/removable/rename of columns. For columns that remain unchanged, verifies that the data matches
     # with stored test_results. If this test passes but test_combine_files fails, then test_results/parquet and
     # test_results/results_csvs need to be updated with new data *if* columns were indeed supposed to be added/
@@ -66,12 +66,13 @@ def test_combine_files_flexible(basic_residential_project_file):
     }
     project_filename, results_dir = basic_residential_project_file(post_process_config)
 
-    with patch.object(BuildStockBatchBase, 'weather_dir', None), \
-            patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
-            patch.object(BuildStockBatchBase, 'results_dir', results_dir):
-        bsb = BuildStockBatchBase(project_filename)
-        bsb.process_results()
-        get_dask_client_mock.assert_called_once()
+    mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
+    get_dask_client_mock = mocker.patch.object(BuildStockBatchBase, 'get_dask_client')
+    mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
+
+    bsb = BuildStockBatchBase(project_filename)
+    bsb.process_results()
+    get_dask_client_mock.assert_called_once()
 
     def simplify_columns(colname):
         return colname.lower().replace('_', '')
diff --git a/buildstockbatch/test/test_inputs/complete-schema.yml b/buildstockbatch/test/test_inputs/complete-schema.yml
index 29d77c1d..6ef8f29a 100644
--- a/buildstockbatch/test/test_inputs/complete-schema.yml
+++ b/buildstockbatch/test/test_inputs/complete-schema.yml
@@ -1,4 +1,3 @@
-stock_type: residential
 buildstock_directory: /home/epresent/OpenStudio-BuildStock-master
 project_directory: project_singlefamilydetached
 output_directory: /scratch/epresent/YAML1s
@@ -10,12 +9,18 @@ sampler:
     n_datapoints: 30
     resample: False
     logic: HVAC System Heating Natural Gas|Gas Furnace, 60% AFUE
+workflow_generator:
+  type: residential_default
+  args:
+    measures_to_ignore:
+      - ResidentialConstructionsSlab
+    simulation_output:
+      include_enduse_subcategories: true
+    timeseries_csv_export:
+      reporting_frequency: Hourly
+      include_enduse_subcategories: true
 baseline:
   n_buildings_represented: 81221016
-  measures_to_ignore:
-    - ResidentialConstructionsSlab
-  osw_template: default
-  include_qaqc: False
 upgrades:
   - upgrade_name: ENERGY STAR 96% AFUE furnace (gas, propane, oil)
     options:
@@ -75,11 +80,6 @@ upgrades:
           - value: 0.40
             multiplier: Floor Area, Lighting (ft^2)
         lifetime: 78.28
-simulation_output:
-  include_enduse_subcategories: true
-timeseries_csv_export:
-  reporting_frequency: Hourly
-  include_enduse_subcategories: true
 eagle:
   account: enduse
   n_jobs: 100
diff --git a/buildstockbatch/test/test_inputs/enforce-schema-xor.yml b/buildstockbatch/test/test_inputs/enforce-schema-xor.yml
index c4b293a6..e4355f84 100644
--- a/buildstockbatch/test/test_inputs/enforce-schema-xor.yml
+++ b/buildstockbatch/test/test_inputs/enforce-schema-xor.yml
@@ -1,4 +1,3 @@
-stock_type: residential
 buildstock_directory: /home/epresent/OpenStudio-BuildStock-master
 project_directory: project_singlefamilydetached
 output_directory: /scratch/epresent/YAML1s
@@ -8,6 +7,8 @@ sampler:
   type: residential_quota
   args:
     n_datapoints: 30
+workflow_generator:
+  type: residential_default
 baseline:
   n_buildings_represented: 30
 schema_version: '0.3'
diff --git a/buildstockbatch/test/test_inputs/minimal-schema.yml b/buildstockbatch/test/test_inputs/minimal-schema.yml
index 541915ff..e87ed9be 100644
--- a/buildstockbatch/test/test_inputs/minimal-schema.yml
+++ b/buildstockbatch/test/test_inputs/minimal-schema.yml
@@ -1,4 +1,3 @@
-stock_type: commercial
 buildstock_directory: here
 project_directory: there
 weather_files_path: over-yonder
@@ -6,6 +5,8 @@ sampler:
   type: residential_quota
   args:
     n_datapoints: 12345
+workflow_generator:
+  type: commercial_default
 baseline:
   n_buildings_represented: 12345
 schema_version: '0.3'
diff --git a/buildstockbatch/workflow_generator/base.py b/buildstockbatch/workflow_generator/base.py
index 6b10df44..ddf5b105 100644
--- a/buildstockbatch/workflow_generator/base.py
+++ b/buildstockbatch/workflow_generator/base.py
@@ -19,6 +19,7 @@ class WorkflowGeneratorBase(object):
 
     def __init__(self, cfg):
         self.cfg = cfg
+        self.validate(self.cfg['workflow_generator']['args'])
 
     def create_osw(self, sim_id, building_id, upgrade_idx):
         """
@@ -54,3 +55,14 @@ def make_apply_logic_arg(cls, logic):
             return '(' + '&&'.join(map(cls.make_apply_logic_arg, logic)) + ')'
         elif isinstance(logic, str):
             return logic
+
+    @classmethod
+    def validate(cls, workflow_generator_args):
+        """Validate the workflor generator arguments
+
+        Replace this in your subclass.
+
+        :param workflow_generator_args: argmuents passed to the workflow generator in the config file.
+        :type workflow_generator_args: dict
+        """
+        return True
\ No newline at end of file
diff --git a/buildstockbatch/workflow_generator/commercial.py b/buildstockbatch/workflow_generator/commercial.py
index ee97907d..43d089c8 100644
--- a/buildstockbatch/workflow_generator/commercial.py
+++ b/buildstockbatch/workflow_generator/commercial.py
@@ -11,7 +11,10 @@
 """
 
 import datetime as dt
+import json
 import logging
+import re
+import yamale
 
 from .base import WorkflowGeneratorBase
 
@@ -20,6 +23,26 @@
 
 class CommercialDefaultWorkflowGenerator(WorkflowGeneratorBase):
 
+    @classmethod
+    def validate(cls, workflow_generator_args):
+        """Validate arguments
+
+        :param workflow_generator_args: Arguments passed to the workflow generator in the yaml
+        :type workflow_generator_args: dict
+        """
+        schema_yml = """
+        measures: list(include('measure-spec'), required=False)
+        include_qaqc: bool(required=False)
+        ---
+        measure-spec:
+            measure_dir_name: str(required=True)
+            arguments: map(required=False)
+        """
+        schema_yml = '\n'.join([re.sub(r'^ {8}', '', x) for x in schema_yml.strip().split('\n')])
+        schema = yamale.make_schema(content=schema_yml, parser='ruamel')
+        data = yamale.make_data(content=json.dumps(workflow_generator_args), parser='ruamel')
+        return yamale.validate(schema, data, strict=True)
+
     def create_osw(self, sim_id, building_id, upgrade_idx):
         """
         Generate and return the osw as a python dict
@@ -29,6 +52,13 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
         :param upgrade_idx: integer index of the upgrade scenario to apply, None if baseline
         """
         logger.debug('Generating OSW, sim_id={}'.format(sim_id))
+
+        workflow_args = {
+            'measures': [],
+            'include_qaqc': False
+        }
+        workflow_args.update(self.cfg['workflow_generator'].get('args', {}))
+
         osw = {
             'id': sim_id,
             'steps': [
@@ -60,7 +90,7 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
             'weather_file': 'weather/empty.epw'
         }
 
-        osw['steps'].extend(self.cfg['baseline'].get('measures', []))
+        osw['steps'].extend(workflow_args['measures'])
 
         osw['steps'].extend([
             {
@@ -115,7 +145,7 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
             }
         ])
 
-        if self.cfg.get('baseline', {}).get('include_qaqc', False):
+        if workflow_args['include_qaqc']:
             osw['steps'].extend([
                 {
                     'measure_dir_name': 'la_100_qaqc',
diff --git a/buildstockbatch/workflow_generator/residential.py b/buildstockbatch/workflow_generator/residential.py
index adf5deb9..c3771aa9 100644
--- a/buildstockbatch/workflow_generator/residential.py
+++ b/buildstockbatch/workflow_generator/residential.py
@@ -11,7 +11,10 @@
 """
 
 import datetime as dt
+import json
 import logging
+import re
+import yamale
 
 from .base import WorkflowGeneratorBase
 
@@ -20,6 +23,30 @@
 
 class ResidentialDefaultWorkflowGenerator(WorkflowGeneratorBase):
 
+    @classmethod
+    def validate(cls, workflow_generator_args):
+        """Validate arguments
+
+        :param workflow_generator_args: Arguments passed to the workflow generator in the yaml
+        :type workflow_generator_args: dict
+        """
+        schema_yml = """
+        measures_to_ignore: list(str(), required=False)
+        residential_simulation_controls: map(required=False)
+        measures: list(include('measure-spec'), required=False)
+        simulation_output: map(required=False)
+        timeseries_csv_export: map(required=False)
+        reporting_measures: list(str(), required=False)
+        ---
+        measure-spec:
+            measure_dir_name: str(required=True)
+            arguments: map(required=False)
+        """
+        schema_yml = '\n'.join([re.sub(r'^ {8}', '', x) for x in schema_yml.strip().split('\n')])
+        schema = yamale.make_schema(content=schema_yml, parser='ruamel')
+        data = yamale.make_data(content=json.dumps(workflow_generator_args), parser='ruamel')
+        return yamale.validate(schema, data, strict=True)
+
     def create_osw(self, sim_id, building_id, upgrade_idx):
         """
         Generate and return the osw as a python dict
@@ -28,6 +55,14 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
         :param building_id: integer building id to use from the sampled buildstock.csv
         :param upgrade_idx: integer index of the upgrade scenario to apply, None if baseline
         """
+        # Default argument values
+        workflow_args = {
+            'residential_simulation_controls': {},
+            'measures': [],
+            'simulation_output': {}
+        }
+        workflow_args.update(self.cfg['workflow_generator'].get('args', {}))
+
         logger.debug('Generating OSW, sim_id={}'.format(sim_id))
 
         res_sim_ctl_args = {
@@ -38,18 +73,16 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
             'end_day_of_month': 31,
             'calendar_year': 2007
         }
-        res_sim_ctl_args.update(self.cfg.get('residential_simulation_controls', {}))
+        res_sim_ctl_args.update(workflow_args['residential_simulation_controls'])
 
-        # FIXME: Figure this out another way.
-        sample_weight = self.cfg['baseline']['n_buildings_represented'] /\
-            self.cfg['baseline']['n_datapoints']
+        # FIXME: The sample weight will likely be wrong for a downselect.
         bld_exist_model_args = {
             'building_id': building_id,
             'workflow_json': 'measure-info.json',
-            'sample_weight': sample_weight,
+            'number_of_buildings_represented': self.cfg['baseline']['n_buildings_represented']
         }
-        if 'measures_to_ignore' in self.cfg['baseline']:
-            bld_exist_model_args['measures_to_ignore'] = '|'.join(self.cfg['baseline']['measures_to_ignore'])
+        if 'measures_to_ignore' in workflow_args:
+            bld_exist_model_args['measures_to_ignore'] = '|'.join(workflow_args['measures_to_ignore'])
 
         osw = {
             'id': sim_id,
@@ -69,15 +102,12 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
             ],
         }
 
-        osw['steps'].extend(self.cfg['baseline'].get('measures', []))
-
-        sim_output_args = {}
-        sim_output_args.update(self.cfg.get('simulation_output', {}))
+        osw['steps'].extend(workflow_args['measures'])
 
         osw['steps'].extend([
             {
                 'measure_dir_name': 'SimulationOutputReport',
-                'arguments': sim_output_args
+                'arguments': workflow_args['simulation_output']
             },
             {
                 'measure_dir_name': 'ServerDirectoryCleanup',
@@ -113,24 +143,24 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
                     self.make_apply_logic_arg(measure_d['package_apply_logic'])
 
             build_existing_model_idx = \
-                list(map(lambda x: x['measure_dir_name'] == 'BuildExistingModel', osw['steps'])).index(True)
+                [x['measure_dir_name'] == 'BuildExistingModel' for x in osw['steps']].index(True)
             osw['steps'].insert(build_existing_model_idx + 1, apply_upgrade_measure)
 
-        if 'timeseries_csv_export' in self.cfg:
+        if 'timeseries_csv_export' in workflow_args:
             timeseries_csv_export_args = {
                 'reporting_frequency': 'Hourly',
                 'include_enduse_subcategories': False,
                 'output_variables': ''
             }
-            timeseries_csv_export_args.update(self.cfg.get('timeseries_csv_export', {}))
+            timeseries_csv_export_args.update(workflow_args['timeseries_csv_export'])
             timeseries_measure = {
                 'measure_dir_name': 'TimeseriesCSVExport',
                 'arguments': timeseries_csv_export_args
             }
             osw['steps'].insert(-1, timeseries_measure)  # right before ServerDirectoryCleanup
 
-        if 'reporting_measures' in self.cfg:
-            for measure_dir_name in self.cfg['reporting_measures']:
+        if 'reporting_measures' in workflow_args:
+            for measure_dir_name in workflow_args['reporting_measures']:
                 reporting_measure = {
                     'measure_dir_name': measure_dir_name,
                     'arguments': {}
diff --git a/buildstockbatch/workflow_generator/test_workflow_generator.py b/buildstockbatch/workflow_generator/test_workflow_generator.py
index 25659caa..21a5b095 100644
--- a/buildstockbatch/workflow_generator/test_workflow_generator.py
+++ b/buildstockbatch/workflow_generator/test_workflow_generator.py
@@ -45,6 +45,10 @@ def test_residential_package_apply():
             'n_datapoints': 10,
             'n_buildings_represented': 100
         },
+        'workflow_generator': {
+            'type': 'residential_default',
+            'args': {}
+        },
         'upgrades': [
             {
                 'upgrade_name': 'upgrade 1',
@@ -83,6 +87,10 @@ def test_residential_simulation_controls_config():
             'n_datapoints': 10,
             'n_buildings_represented': 100
         },
+        'workflow_generator': {
+            'type': 'residential_default',
+            'args': {}
+        },
     }
     osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
@@ -98,7 +106,7 @@ def test_residential_simulation_controls_config():
     }
     for k, v in default_args.items():
         assert(step0['arguments'][k] == v)
-    cfg['residential_simulation_controls'] = {
+    cfg['workflow_generator']['args']['residential_simulation_controls'] = {
         'timesteps_per_hr': 2,
         'begin_month': 7
     }
@@ -120,9 +128,14 @@ def test_timeseries_csv_export():
             'n_datapoints': 10,
             'n_buildings_represented': 100
         },
-        'timeseries_csv_export': {
-            'reporting_frequency': 'Timestep',
-            'output_variables': 'Zone Mean Air Temperature'
+        'workflow_generator': {
+            'type': 'residential_default',
+            'args': {
+                'timeseries_csv_export': {
+                    'reporting_frequency': 'Timestep',
+                    'output_variables': 'Zone Mean Air Temperature'
+                }
+            }
         }
     }
     osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
@@ -152,10 +165,15 @@ def test_additional_reporting_measures():
             'n_datapoints': 10,
             'n_buildings_represented': 100
         },
-        'reporting_measures': [
-            'ReportingMeasure1',
-            'ReportingMeasure2'
-        ]
+        'workflow_generator': {
+            'type': 'residential_default',
+            'args': {
+                'reporting_measures': [
+                    'ReportingMeasure1',
+                    'ReportingMeasure2'
+                ]
+            }
+        }
     }
     osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
@@ -173,12 +191,18 @@ def test_ignore_measures_argument():
         'baseline': {
             'n_datapoints': 10,
             'n_buildings_represented': 100,
-            'measures_to_ignore': [
-                'ResidentialApplianceCookingRange',
-                'ResidentialApplianceDishwasher',
-                'ResidentialApplianceClothesWasher',
-                'ResidentialApplianceClothesDryer',
-                'ResidentialApplianceRefrigerator']
+        },
+        'workflow_generator': {
+            'type': 'residential_default',
+            'args': {
+                'measures_to_ignore': [
+                    'ResidentialApplianceCookingRange',
+                    'ResidentialApplianceDishwasher',
+                    'ResidentialApplianceClothesWasher',
+                    'ResidentialApplianceClothesDryer',
+                    'ResidentialApplianceRefrigerator'
+                ]
+            }
         }
     }
     osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
@@ -204,6 +228,10 @@ def test_default_apply_upgrade():
             'n_datapoints': 10,
             'n_buildings_represented': 100
         },
+        'workflow_generator': {
+            'type': 'residential_default',
+            'args': {}
+        },
         'upgrades': [
             {
                 'options': [
@@ -234,8 +262,13 @@ def test_simulation_output():
             'n_datapoints': 10,
             'n_buildings_represented': 100
         },
-        'simulation_output': {
-            'include_enduse_subcategories': True
+        'workflow_generator': {
+            'type': 'residential_default',
+            'args': {
+                'simulation_output': {
+                    'include_enduse_subcategories': True
+                }
+            }
         }
     }
     osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
diff --git a/project_resstock_national.yml b/project_resstock_national.yml
index fb22c93a..521e668b 100644
--- a/project_resstock_national.yml
+++ b/project_resstock_national.yml
@@ -1,15 +1,29 @@
-schema_version: 0.2
-stock_type: residential
+schema_version: '0.3'
 buildstock_directory: ../OpenStudio-BuildStock  # Relative to this file or absolute
 project_directory: project_national  # Relative to buildstock_directory
 output_directory: ../national_test_outputs
-weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
-# weather_files_path: ../ResStock_TMY3.zip  # Relative to this file or absolute path to zipped weather files
+# weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
+weather_files_path: ../ResStock_TMY3.zip  # Relative to this file or absolute path to zipped weather files
+
+sampler:
+  type: residential_quota_downselect
+  args:
+    n_datapoints: 10
+    # Uncomment and set specify logic you you want to downselect to a subset of the building stock
+    resample: false
+    logic:
+      - Geometry Building Type RECS|Single-Family Detached
+      - Vacancy Status|Occupied
+
+workflow_generator:
+  type: residential_default
+  args:
+    timeseries_csv_export:
+      reporting_frequency: Hourly
+      include_enduse_subcategories: true
 
 baseline:
-  n_datapoints: 4
   n_buildings_represented: 133172057  # Total number of residential dwelling units in contiguous United States, including unoccupied units, resulting from acensus tract level query of ACS 5-yr 2016 (i.e. 2012-2016), using this script: https://github.com/NREL/resstock-estimation/blob/master/sources/spatial/tsv_maker.py.
-  sampling_algorithm: quota # The default resstock sampling algorithm - use precomputed if using the precomputed_sample option
 
 upgrades:
   - upgrade_name: Triple-Pane Windows
@@ -21,16 +35,6 @@ upgrades:
             multiplier: Window Area (ft^2)
         lifetime: 30
 
-timeseries_csv_export:
-  reporting_frequency: Hourly
-  include_enduse_subcategories: true
-
-# downselect: # Uncomment and set specify logic you you want to downselect to a subset of the building stock
-#   resample: true
-#   logic:
-#     - Geometry Building Type RECS|Single-Family Detached
-#     - Vacancy Status|Occupied
-
 eagle:
   n_jobs: 4
   minutes_per_sim: 30

From 89ff7456418800cf488a96ee00e4c3ebe696d9d7 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 16 Nov 2020 08:26:32 -0700
Subject: [PATCH 06/21] fixing style issues

---
 buildstockbatch/test/test_docker.py        | 5 -----
 buildstockbatch/workflow_generator/base.py | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/buildstockbatch/test/test_docker.py b/buildstockbatch/test/test_docker.py
index d5f59c14..eb8701e7 100644
--- a/buildstockbatch/test/test_docker.py
+++ b/buildstockbatch/test/test_docker.py
@@ -1,14 +1,9 @@
 import os
-import pandas as pd
-import pytest
 import requests
 import re
 from unittest.mock import patch
-import yaml
 
 from buildstockbatch.base import BuildStockBatchBase
-from buildstockbatch.localdocker import LocalDockerBatch
-from buildstockbatch.exc import ValidationError
 
 here = os.path.dirname(os.path.abspath(__file__))
 
diff --git a/buildstockbatch/workflow_generator/base.py b/buildstockbatch/workflow_generator/base.py
index ddf5b105..e01ffd63 100644
--- a/buildstockbatch/workflow_generator/base.py
+++ b/buildstockbatch/workflow_generator/base.py
@@ -65,4 +65,4 @@ def validate(cls, workflow_generator_args):
         :param workflow_generator_args: argmuents passed to the workflow generator in the config file.
         :type workflow_generator_args: dict
         """
-        return True
\ No newline at end of file
+        return True

From 0ab2afcc9c3e7dca4e7bfbab3ed3fa6725c5d095 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 14 Dec 2020 09:05:06 -0700
Subject: [PATCH 07/21] saving my place after a long absence

---
 buildstockbatch/aws/aws.py                    |   4 +-
 buildstockbatch/base.py                       | 182 +-----------------
 buildstockbatch/test/test_eagle.py            |   3 +-
 buildstockbatch/test/test_postprocessing.py   |   5 +-
 buildstockbatch/utils.py                      |  23 +++
 buildstockbatch/workflow_generator/base.py    |   8 +-
 .../workflow_generator/commercial.py          |   9 +-
 .../workflow_generator/residential.py         | 167 +++++++++++++++-
 8 files changed, 210 insertions(+), 191 deletions(-)

diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py
index c0c71468..2f782608 100644
--- a/buildstockbatch/aws/aws.py
+++ b/buildstockbatch/aws/aws.py
@@ -40,7 +40,7 @@
 from buildstockbatch.base import ValidationError
 from buildstockbatch.aws.awsbase import AwsJobBase
 from buildstockbatch import postprocessing
-from ..utils import log_error_details
+from ..utils import log_error_details, get_project_configuration
 
 logger = logging.getLogger(__name__)
 
@@ -1661,7 +1661,7 @@ def __init__(self, project_filename):
 
     @staticmethod
     def validate_instance_types(project_file):
-        cfg = AwsBatch.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
         aws_config = cfg['aws']
         boto3_session = boto3.Session(region_name=aws_config['region'])
         ec2 = boto3_session.client('ec2')
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index c99807e0..fd5e338e 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -19,12 +19,10 @@
 import requests
 import shutil
 import tempfile
-import yaml
 import yamale
 import zipfile
 import csv
 from collections import defaultdict, Counter
-import xml.etree.ElementTree as ET
 
 from buildstockbatch.__version__ import __schema_version__
 from buildstockbatch import (
@@ -33,7 +31,7 @@
     postprocessing
 )
 from buildstockbatch.exc import SimulationExists, ValidationError
-from buildstockbatch.utils import path_rel_to_file
+from buildstockbatch.utils import path_rel_to_file, get_project_configuration
 
 logger = logging.getLogger(__name__)
 
@@ -56,7 +54,7 @@ def __init__(self, project_filename):
         self.project_filename = os.path.abspath(project_filename)
 
         # Load project file to self.cfg
-        self.cfg = self.get_project_configuration(project_filename)
+        self.cfg = get_project_configuration(project_filename)
 
         self.buildstock_dir = self.cfg['buildstock_directory']
         if not os.path.isdir(self.buildstock_dir):
@@ -208,32 +206,12 @@ def validate_project(project_file):
         assert(BuildStockBatchBase.validate_misc_constraints(project_file))
         assert(BuildStockBatchBase.validate_xor_nor_schema_keys(project_file))
         assert(BuildStockBatchBase.validate_reference_scenario(project_file))
-        assert(BuildStockBatchBase.validate_measures_and_arguments(project_file))
         assert(BuildStockBatchBase.validate_options_lookup(project_file))
         assert(BuildStockBatchBase.validate_measure_references(project_file))
         assert(BuildStockBatchBase.validate_options_lookup(project_file))
         logger.info('Base Validation Successful')
         return True
 
-    @classmethod
-    def get_project_configuration(cls, project_file):
-        try:
-            with open(project_file) as f:
-                cfg = yaml.load(f, Loader=yaml.SafeLoader)
-        except FileNotFoundError as err:
-            logger.error('Failed to load input yaml for validation')
-            raise err
-
-        # Set absolute paths
-        cfg['buildstock_directory'] = path_rel_to_file(project_file, cfg['buildstock_directory'])
-        if 'precomputed_sample' in cfg.get('baseline', {}):
-            cfg['baseline']['precomputed_sample'] = \
-                path_rel_to_file(project_file, cfg['baseline']['precomputed_sample'])
-        if 'weather_files_path' in cfg:
-            cfg['weather_files_path'] = path_rel_to_file(project_file, cfg['weather_files_path'])
-
-        return cfg
-
     @staticmethod
     def get_buildstock_dir(project_file, cfg):
         buildstock_dir = cfg["buildstock_directory"]
@@ -244,7 +222,7 @@ def get_buildstock_dir(project_file, cfg):
 
     @staticmethod
     def validate_sampler(project_file):
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
         sampler_name = cfg['sampler']['type']
         try:
             Sampler = BuildStockBatchBase.get_sampler_class(sampler_name)
@@ -255,7 +233,7 @@ def validate_sampler(project_file):
 
     @staticmethod
     def validate_project_schema(project_file):
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
         schema_version = cfg.get('schema_version')
         version_schema = os.path.join(os.path.dirname(__file__), 'schemas', f'v{schema_version}.yaml')
         if not os.path.isfile(version_schema):
@@ -268,7 +246,7 @@ def validate_project_schema(project_file):
     @staticmethod
     def validate_misc_constraints(project_file):
         # validate other miscellaneous constraints
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
 
         if cfg.get('postprocessing', {}).get('aggregate_timeseries', False):
             logger.warning('aggregate_timeseries has been deprecated and will be removed in a future version.')
@@ -277,7 +255,7 @@ def validate_misc_constraints(project_file):
 
     @staticmethod
     def validate_xor_nor_schema_keys(project_file):
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
         major, minor = cfg.get('version', __schema_version__).split('.')
         if int(major) >= 0:
             if int(minor) >= 0:
@@ -288,154 +266,12 @@ def validate_xor_nor_schema_keys(project_file):
 
         return True
 
-    @staticmethod
-    def get_measure_xml(xml_path):
-        tree = ET.parse(xml_path)
-        root = tree.getroot()
-        return root
-
-    @staticmethod
-    def validate_measures_and_arguments(project_file):
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
-        if cfg['stock_type'] != 'residential':  # FIXME: add comstock logic
-            return True
-
-        buildstock_dir = os.path.join(os.path.dirname(project_file), cfg["buildstock_directory"])
-        measures_dir = f'{buildstock_dir}/measures'
-        type_map = {'Integer': int, 'Boolean': bool, 'String': str, 'Double': float}
-
-        measure_names = {
-                        'ResidentialSimulationControls': 'residential_simulation_controls',
-                        'BuildExistingModel': 'baseline',
-                        'SimulationOutputReport': 'simulation_output',
-                        'ServerDirectoryCleanup': None,
-                        'ApplyUpgrade': 'upgrades',
-                        'TimeseriesCSVExport': 'timeseries_csv_export'
-                        }
-        if 'reporting_measures' in cfg.keys():
-            for reporting_measure in cfg['reporting_measures']:
-                measure_names[reporting_measure] = 'reporting_measures'
-
-        error_msgs = ''
-        warning_msgs = ''
-        for measure_name in measure_names.keys():
-            measure_path = os.path.join(measures_dir, measure_name)
-
-            if measure_names[measure_name] in cfg.keys() or \
-                    measure_names[measure_name] == 'residential_simulation_controls':
-                # if they exist in the cfg, make sure they exist in the buildstock checkout
-                if not os.path.exists(measure_path):
-                    error_msgs += f"* {measure_name} does not exist in {buildstock_dir}. \n"
-
-            # check the rest only if that measure exists in cfg
-            if measure_names[measure_name] not in cfg.keys():
-                continue
-
-            # check argument value types for residential simulation controls and timeseries csv export measures
-            if measure_name in ['ResidentialSimulationControls', 'SimulationOutputReport', 'TimeseriesCSVExport']:
-                root = BuildStockBatchBase.get_measure_xml(os.path.join(measure_path, 'measure.xml'))
-                expected_arguments = {}
-                required_args_with_default = {}
-                required_args_no_default = {}
-                for argument in root.findall('./arguments/argument'):
-                    name = argument.find('./name').text
-                    expected_arguments[name] = []
-                    required = argument.find('./required').text
-                    default = argument.find('./default_value')
-                    default = default.text if default is not None else None
-
-                    if required == 'true' and not default:
-                        required_args_no_default[name] = None
-                    elif required == 'true':
-                        required_args_with_default[name] = default
-
-                    if argument.find('./type').text == 'Choice':
-                        for choice in argument.findall('./choices/choice'):
-                            for value in choice.findall('./value'):
-                                expected_arguments[name].append(value.text)
-                    else:
-                        expected_arguments[name] = argument.find('./type').text
-
-                for actual_argument_key in cfg[measure_names[measure_name]].keys():
-                    if actual_argument_key not in expected_arguments.keys():
-                        error_msgs += f"* Found unexpected argument key {actual_argument_key} for "\
-                                      f"{measure_names[measure_name]} in yaml file. The available keys are: " \
-                                      f"{list(expected_arguments.keys())}\n"
-                        continue
-
-                    required_args_no_default.pop(actual_argument_key, None)
-                    required_args_with_default.pop(actual_argument_key, None)
-
-                    actual_argument_value = cfg[measure_names[measure_name]][actual_argument_key]
-                    expected_argument_type = expected_arguments[actual_argument_key]
-
-                    if type(expected_argument_type) is not list:
-                        try:
-                            if type(actual_argument_value) is not list:
-                                actual_argument_value = [actual_argument_value]
-
-                            for val in actual_argument_value:
-                                if not isinstance(val, type_map[expected_argument_type]):
-                                    error_msgs += f"* Wrong argument value type for {actual_argument_key} for measure "\
-                                                  f"{measure_names[measure_name]} in yaml file. Expected type:" \
-                                                  f" {type_map[expected_argument_type]}, got: {val}" \
-                                                  f" of type: {type(val)} \n"
-                        except KeyError:
-                            print(f"Found an unexpected argument value type: {expected_argument_type} for argument "
-                                  f" {actual_argument_key} in measure {measure_name}.\n")
-                    else:  # Choice
-                        if actual_argument_value not in expected_argument_type:
-                            error_msgs += f"* Found unexpected argument value {actual_argument_value} for "\
-                                          f"{measure_names[measure_name]} in yaml file. Valid values are " \
-                                           f"{expected_argument_type}.\n"
-
-                for arg, default in required_args_no_default.items():
-                    error_msgs += f"* Required argument {arg} for measure {measure_name} wasn't supplied. " \
-                                    f"There is no default for this argument.\n"
-
-                for arg, default in required_args_with_default.items():
-                    warning_msgs += f"* Required argument {arg} for measure {measure_name} wasn't supplied. " \
-                                    f"Using default value: {default}. \n"
-
-            elif measure_name in ['ApplyUpgrade']:
-                # For ApplyUpgrade measure, verify that all the cost_multipliers used are correct
-                root = BuildStockBatchBase.get_measure_xml(os.path.join(measure_path, 'measure.xml'))
-                valid_multipliers = set()
-                for argument in root.findall('./arguments/argument'):
-                    name = argument.find('./name')
-                    if name.text.endswith('_multiplier'):
-                        for choice in argument.findall('./choices/choice'):
-                            value = choice.find('./value')
-                            value = value.text if value is not None else ''
-                            valid_multipliers.add(value)
-                invalid_multipliers = Counter()
-                for upgrade_count, upgrade in enumerate(cfg['upgrades']):
-                    for option_count, option in enumerate(upgrade['options']):
-                        for cost_indx, cost_entry in enumerate(option.get('costs', [])):
-                            if cost_entry['multiplier'] not in valid_multipliers:
-                                invalid_multipliers[cost_entry['multiplier']] += 1
-
-                if invalid_multipliers:
-                    error_msgs += "* The following multipliers values are invalid: \n"
-                    for multiplier, count in invalid_multipliers.items():
-                        error_msgs += f"    '{multiplier}' - Used {count} times \n"
-                    error_msgs += f"    The list of valid multipliers are {valid_multipliers}.\n"
-
-        if warning_msgs:
-            logger.warning(warning_msgs)
-
-        if not error_msgs:
-            return True
-        else:
-            logger.error(error_msgs)
-            raise ValidationError(error_msgs)
-
     @staticmethod
     def validate_options_lookup(project_file):
         """
         Validates that the parameter|options specified in the project yaml file is available in the options_lookup.tsv
         """
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
         param_option_dict = defaultdict(set)
         buildstock_dir = BuildStockBatchBase.get_buildstock_dir(project_file, cfg)
         options_lookup_path = f'{buildstock_dir}/resources/options_lookup.tsv'
@@ -603,7 +439,7 @@ def validate_measure_references(project_file):
         Validates that the measures specified in the project yaml file are
         referenced in the options_lookup.tsv
         """
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
         measure_dirs = set()
         buildstock_dir = BuildStockBatchBase.get_buildstock_dir(project_file, cfg)
         options_lookup_path = f'{buildstock_dir}/resources/options_lookup.tsv'
@@ -658,7 +494,7 @@ def validate_reference_scenario(project_file):
         """
         Checks if the reference_scenario mentioned in an upgrade points to a valid upgrade
         """
-        cfg = BuildStockBatchBase.get_project_configuration(project_file)
+        cfg = get_project_configuration(project_file)
 
         # collect all upgrade_names
         upgrade_names = set()
diff --git a/buildstockbatch/test/test_eagle.py b/buildstockbatch/test/test_eagle.py
index cd02be8a..5a74218e 100644
--- a/buildstockbatch/test/test_eagle.py
+++ b/buildstockbatch/test/test_eagle.py
@@ -10,6 +10,7 @@
 import gzip
 
 from buildstockbatch.eagle import user_cli, EagleBatch
+from buildstockbatch.utils import get_project_configuration
 
 here = os.path.dirname(os.path.abspath(__file__))
 
@@ -26,7 +27,7 @@ def test_hpc_run_building(mock_subprocess, monkeypatch, basic_residential_projec
     sim_path = tmp_path / 'output' / 'simulation_output' / 'up00' / 'bldg0000001'
     os.makedirs(sim_path)
 
-    cfg = EagleBatch.get_project_configuration(project_filename)
+    cfg = get_project_configuration(project_filename)
 
     with patch.object(EagleBatch, 'weather_dir', None), \
             patch.object(EagleBatch, 'singularity_image', '/path/to/singularity.simg'), \
diff --git a/buildstockbatch/test/test_postprocessing.py b/buildstockbatch/test/test_postprocessing.py
index 5a2c131b..bdfc9cde 100644
--- a/buildstockbatch/test/test_postprocessing.py
+++ b/buildstockbatch/test/test_postprocessing.py
@@ -10,6 +10,7 @@
 
 from buildstockbatch import postprocessing
 from buildstockbatch.base import BuildStockBatchBase
+from buildstockbatch.utils import get_project_configuration
 from unittest.mock import patch
 
 
@@ -48,7 +49,7 @@ def test_report_additional_results_csv_columns(basic_residential_project_file):
     with gzip.open(sim_out_dir / 'results_job0.json.gz', 'wt', encoding='utf-8') as f:
         json.dump(dpouts2, f)
 
-    cfg = BuildStockBatchBase.get_project_configuration(project_filename)
+    cfg = get_project_configuration(project_filename)
 
     postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False)
 
@@ -67,7 +68,7 @@ def test_empty_results_assertion(basic_residential_project_file, capsys):
     results_dir = pathlib.Path(results_dir)
     sim_out_dir = results_dir / 'simulation_output'
     shutil.rmtree(sim_out_dir)  # no results
-    cfg = BuildStockBatchBase.get_project_configuration(project_filename)
+    cfg = get_project_configuration(project_filename)
 
     with pytest.raises(ValueError, match=r'No simulation results found to post-process'):
         assert postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False)
diff --git a/buildstockbatch/utils.py b/buildstockbatch/utils.py
index 32e4e9c1..af6592a7 100644
--- a/buildstockbatch/utils.py
+++ b/buildstockbatch/utils.py
@@ -1,7 +1,11 @@
 import enum
 import inspect
 import os
+import logging
 import traceback
+import yaml
+
+logger = logging.getLogger(__name__)
 
 
 class ContainerRuntime(enum.Enum):
@@ -16,6 +20,25 @@ def path_rel_to_file(startfile, x):
         return os.path.abspath(os.path.join(os.path.dirname(startfile), x))
 
 
+def get_project_configuration(project_file):
+    try:
+        with open(project_file) as f:
+            cfg = yaml.load(f, Loader=yaml.SafeLoader)
+    except FileNotFoundError as err:
+        logger.error('Failed to load input yaml for validation')
+        raise err
+
+    # Set absolute paths
+    cfg['buildstock_directory'] = path_rel_to_file(project_file, cfg['buildstock_directory'])
+    # if 'precomputed_sample' in cfg.get('baseline', {}):
+    #     cfg['baseline']['precomputed_sample'] = \
+    #         path_rel_to_file(project_file, cfg['baseline']['precomputed_sample'])
+    if 'weather_files_path' in cfg:
+        cfg['weather_files_path'] = path_rel_to_file(project_file, cfg['weather_files_path'])
+
+    return cfg
+
+
 def _str_repr(obj, list_max=20, dict_max=20, string_max=100):
     if type(obj) is str:
         if len(obj) <= string_max:
diff --git a/buildstockbatch/workflow_generator/base.py b/buildstockbatch/workflow_generator/base.py
index e01ffd63..95eca86e 100644
--- a/buildstockbatch/workflow_generator/base.py
+++ b/buildstockbatch/workflow_generator/base.py
@@ -19,7 +19,7 @@ class WorkflowGeneratorBase(object):
 
     def __init__(self, cfg):
         self.cfg = cfg
-        self.validate(self.cfg['workflow_generator']['args'])
+        self.validate(self.cfg)
 
     def create_osw(self, sim_id, building_id, upgrade_idx):
         """
@@ -57,12 +57,12 @@ def make_apply_logic_arg(cls, logic):
             return logic
 
     @classmethod
-    def validate(cls, workflow_generator_args):
+    def validate(cls, cfg):
         """Validate the workflor generator arguments
 
         Replace this in your subclass.
 
-        :param workflow_generator_args: argmuents passed to the workflow generator in the config file.
-        :type workflow_generator_args: dict
+        :param cfg: project configuration
+        :type cfg: dict
         """
         return True
diff --git a/buildstockbatch/workflow_generator/commercial.py b/buildstockbatch/workflow_generator/commercial.py
index 43d089c8..41c33362 100644
--- a/buildstockbatch/workflow_generator/commercial.py
+++ b/buildstockbatch/workflow_generator/commercial.py
@@ -24,11 +24,11 @@
 class CommercialDefaultWorkflowGenerator(WorkflowGeneratorBase):
 
     @classmethod
-    def validate(cls, workflow_generator_args):
+    def validate(cls, cfg):
         """Validate arguments
 
-        :param workflow_generator_args: Arguments passed to the workflow generator in the yaml
-        :type workflow_generator_args: dict
+        :param cfg: project configuration
+        :type cfg: dict
         """
         schema_yml = """
         measures: list(include('measure-spec'), required=False)
@@ -38,7 +38,8 @@ def validate(cls, workflow_generator_args):
             measure_dir_name: str(required=True)
             arguments: map(required=False)
         """
-        schema_yml = '\n'.join([re.sub(r'^ {8}', '', x) for x in schema_yml.strip().split('\n')])
+        workflow_generator_args = cfg['workflow_generator']['args']
+        schema_yml = re.sub(r'^ {8}', '', schema_yml, flags=re.MULTILINE)
         schema = yamale.make_schema(content=schema_yml, parser='ruamel')
         data = yamale.make_data(content=json.dumps(workflow_generator_args), parser='ruamel')
         return yamale.validate(schema, data, strict=True)
diff --git a/buildstockbatch/workflow_generator/residential.py b/buildstockbatch/workflow_generator/residential.py
index c3771aa9..b2fb44e3 100644
--- a/buildstockbatch/workflow_generator/residential.py
+++ b/buildstockbatch/workflow_generator/residential.py
@@ -10,25 +10,35 @@
 :license: BSD-3
 """
 
+from collections import Counter
 import datetime as dt
 import json
 import logging
+import os
 import re
+from xml.etree import ElementTree
 import yamale
 
 from .base import WorkflowGeneratorBase
+from buildstockbatch.exc import ValidationError
 
 logger = logging.getLogger(__name__)
 
 
+def get_measure_xml(xml_path):
+    tree = ElementTree.parse(xml_path)
+    root = tree.getroot()
+    return root
+
+
 class ResidentialDefaultWorkflowGenerator(WorkflowGeneratorBase):
 
     @classmethod
-    def validate(cls, workflow_generator_args):
+    def validate(cls, cfg):
         """Validate arguments
 
-        :param workflow_generator_args: Arguments passed to the workflow generator in the yaml
-        :type workflow_generator_args: dict
+        :param cfg: project configuration
+        :type cfg: dict
         """
         schema_yml = """
         measures_to_ignore: list(str(), required=False)
@@ -42,10 +52,157 @@ def validate(cls, workflow_generator_args):
             measure_dir_name: str(required=True)
             arguments: map(required=False)
         """
-        schema_yml = '\n'.join([re.sub(r'^ {8}', '', x) for x in schema_yml.strip().split('\n')])
+        workflow_generator_args = cfg['workflow_generator']['args']
+        schema_yml = re.sub(r'^ {8}', '', schema_yml, flags=re.MULTILINE)
         schema = yamale.make_schema(content=schema_yml, parser='ruamel')
         data = yamale.make_data(content=json.dumps(workflow_generator_args), parser='ruamel')
-        return yamale.validate(schema, data, strict=True)
+        yamale.validate(schema, data, strict=True)
+        return cls.validate_measures_and_arguments(cfg)
+
+    @staticmethod
+    def validate_measures_and_arguments(cfg):
+
+        buildstock_dir = cfg["buildstock_directory"]
+        measures_dir = os.path.join(buildstock_dir, 'measures')
+        type_map = {'Integer': int, 'Boolean': bool, 'String': str, 'Double': float}
+
+        measure_names = {
+            'ResidentialSimulationControls': 'workflow_generator.args.residential_simulation_controls',
+            'BuildExistingModel': 'baseline',
+            'SimulationOutputReport': 'workflow_generator.args.simulation_output',
+            'ServerDirectoryCleanup': None,
+            'ApplyUpgrade': 'upgrades',
+            'TimeseriesCSVExport': 'workflow_generator.args.timeseries_csv_export'
+        }
+
+        def cfg_path_exists(cfg_path):
+            if cfg_path is None:
+                return False
+            path_items = cfg_path.split('.')
+            a = cfg
+            for path_item in path_items:
+                try:
+                    a = cfg[path_item]  # noqa F841
+                except KeyError:
+                    return False
+            return True
+
+        if 'reporting_measures' in cfg.keys():
+            for reporting_measure in cfg['reporting_measures']:
+                measure_names[reporting_measure] = 'reporting_measures'
+
+        error_msgs = ''
+        warning_msgs = ''
+        for measure_name, cfg_key in measure_names.items():
+            measure_path = os.path.join(measures_dir, measure_name)
+
+            if cfg_path_exists(cfg_key) or cfg_key == 'residential_simulation_controls':
+                # if they exist in the cfg, make sure they exist in the buildstock checkout
+                if not os.path.exists(measure_path):
+                    error_msgs += f"* {measure_name} does not exist in {buildstock_dir}. \n"
+
+            # check the rest only if that measure exists in cfg
+            if not cfg_path_exists(cfg_key):
+                continue
+
+            # check argument value types for residential simulation controls and timeseries csv export measures
+            if measure_name in ['ResidentialSimulationControls', 'SimulationOutputReport', 'TimeseriesCSVExport']:
+                root = get_measure_xml(os.path.join(measure_path, 'measure.xml'))
+                expected_arguments = {}
+                required_args_with_default = {}
+                required_args_no_default = {}
+                for argument in root.findall('./arguments/argument'):
+                    name = argument.find('./name').text
+                    expected_arguments[name] = []
+                    required = argument.find('./required').text
+                    default = argument.find('./default_value')
+                    default = default.text if default is not None else None
+
+                    if required == 'true' and not default:
+                        required_args_no_default[name] = None
+                    elif required == 'true':
+                        required_args_with_default[name] = default
+
+                    if argument.find('./type').text == 'Choice':
+                        for choice in argument.findall('./choices/choice'):
+                            for value in choice.findall('./value'):
+                                expected_arguments[name].append(value.text)
+                    else:
+                        expected_arguments[name] = argument.find('./type').text
+
+                for actual_argument_key in cfg[measure_names[measure_name]].keys():
+                    if actual_argument_key not in expected_arguments.keys():
+                        error_msgs += f"* Found unexpected argument key {actual_argument_key} for "\
+                                      f"{measure_names[measure_name]} in yaml file. The available keys are: " \
+                                      f"{list(expected_arguments.keys())}\n"
+                        continue
+
+                    required_args_no_default.pop(actual_argument_key, None)
+                    required_args_with_default.pop(actual_argument_key, None)
+
+                    actual_argument_value = cfg[measure_names[measure_name]][actual_argument_key]
+                    expected_argument_type = expected_arguments[actual_argument_key]
+
+                    if type(expected_argument_type) is not list:
+                        try:
+                            if type(actual_argument_value) is not list:
+                                actual_argument_value = [actual_argument_value]
+
+                            for val in actual_argument_value:
+                                if not isinstance(val, type_map[expected_argument_type]):
+                                    error_msgs += f"* Wrong argument value type for {actual_argument_key} for measure "\
+                                                  f"{measure_names[measure_name]} in yaml file. Expected type:" \
+                                                  f" {type_map[expected_argument_type]}, got: {val}" \
+                                                  f" of type: {type(val)} \n"
+                        except KeyError:
+                            print(f"Found an unexpected argument value type: {expected_argument_type} for argument "
+                                  f" {actual_argument_key} in measure {measure_name}.\n")
+                    else:  # Choice
+                        if actual_argument_value not in expected_argument_type:
+                            error_msgs += f"* Found unexpected argument value {actual_argument_value} for "\
+                                          f"{measure_names[measure_name]} in yaml file. Valid values are " \
+                                           f"{expected_argument_type}.\n"
+
+                for arg, default in required_args_no_default.items():
+                    error_msgs += f"* Required argument {arg} for measure {measure_name} wasn't supplied. " \
+                                    f"There is no default for this argument.\n"
+
+                for arg, default in required_args_with_default.items():
+                    warning_msgs += f"* Required argument {arg} for measure {measure_name} wasn't supplied. " \
+                                    f"Using default value: {default}. \n"
+
+            elif measure_name in ['ApplyUpgrade']:
+                # For ApplyUpgrade measure, verify that all the cost_multipliers used are correct
+                root = get_measure_xml(os.path.join(measure_path, 'measure.xml'))
+                valid_multipliers = set()
+                for argument in root.findall('./arguments/argument'):
+                    name = argument.find('./name')
+                    if name.text.endswith('_multiplier'):
+                        for choice in argument.findall('./choices/choice'):
+                            value = choice.find('./value')
+                            value = value.text if value is not None else ''
+                            valid_multipliers.add(value)
+                invalid_multipliers = Counter()
+                for upgrade_count, upgrade in enumerate(cfg['upgrades']):
+                    for option_count, option in enumerate(upgrade['options']):
+                        for cost_indx, cost_entry in enumerate(option.get('costs', [])):
+                            if cost_entry['multiplier'] not in valid_multipliers:
+                                invalid_multipliers[cost_entry['multiplier']] += 1
+
+                if invalid_multipliers:
+                    error_msgs += "* The following multipliers values are invalid: \n"
+                    for multiplier, count in invalid_multipliers.items():
+                        error_msgs += f"    '{multiplier}' - Used {count} times \n"
+                    error_msgs += f"    The list of valid multipliers are {valid_multipliers}.\n"
+
+        if warning_msgs:
+            logger.warning(warning_msgs)
+
+        if not error_msgs:
+            return True
+        else:
+            logger.error(error_msgs)
+            raise ValidationError(error_msgs)
 
     def create_osw(self, sim_id, building_id, upgrade_idx):
         """

From 19c909e6b45db3b230ad85556978f2027841c3d9 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 4 Jan 2021 14:38:59 -0700
Subject: [PATCH 08/21] passing n_datapoints into create_osw

---
 buildstockbatch/aws/aws.py                        | 3 ++-
 buildstockbatch/base.py                           | 4 ++--
 buildstockbatch/eagle.py                          | 7 ++++---
 buildstockbatch/localdocker.py                    | 6 ++++--
 buildstockbatch/workflow_generator/base.py        | 3 ++-
 buildstockbatch/workflow_generator/residential.py | 2 +-
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py
index 2f782608..4433f302 100644
--- a/buildstockbatch/aws/aws.py
+++ b/buildstockbatch/aws/aws.py
@@ -1834,6 +1834,7 @@ def run_batch(self):
                 with open(job_json_filename, 'w') as f:
                     json.dump({
                         'job_num': i,
+                        'n_datapoints': n_datapoints,
                         'batch': batch,
                     }, f, indent=4)
             array_size = i
@@ -1978,7 +1979,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region):
                 sim_id = f'bldg{building_id:07d}up{upgrade_id:02d}'
 
                 # Create OSW
-                osw = cls.create_osw(cfg, sim_id, building_id, upgrade_idx)
+                osw = cls.create_osw(cfg, jobs_d['n_datapoints'], sim_id, building_id, upgrade_idx)
                 with open(os.path.join(sim_dir, 'in.osw'), 'w') as f:
                     json.dump(osw, f, indent=4)
 
diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index fd5e338e..6d99ee39 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -127,9 +127,9 @@ def run_batch(self):
         raise NotImplementedError
 
     @classmethod
-    def create_osw(cls, cfg, *args, **kwargs):
+    def create_osw(cls, cfg, n_datapoints, *args, **kwargs):
         WorkflowGenerator = cls.get_workflow_generator_class(cfg['workflow_generator']['type'])
-        osw_generator = WorkflowGenerator(cfg)
+        osw_generator = WorkflowGenerator(cfg, n_datapoints)
         return osw_generator.create_osw(*args, **kwargs)
 
     @staticmethod
diff --git a/buildstockbatch/eagle.py b/buildstockbatch/eagle.py
index a6d2bccb..edc7436c 100644
--- a/buildstockbatch/eagle.py
+++ b/buildstockbatch/eagle.py
@@ -208,6 +208,7 @@ def run_batch(self, sampling_only=False):
             with open(job_json_filename, 'w') as f:
                 json.dump({
                     'job_num': i,
+                    'n_datapoints': n_datapoints,
                     'batch': batch,
                 }, f, indent=4)
 
@@ -250,7 +251,7 @@ def run_job_batch(self, job_array_number):
         @delayed
         def run_building_d(i, upgrade_idx):
             try:
-                return self.run_building(self.output_dir, self.cfg, i, upgrade_idx)
+                return self.run_building(self.output_dir, self.cfg, args['n_datapoints'], i, upgrade_idx)
             except Exception:
                 with open(traceback_file_path, 'a') as f:
                     txt = get_error_details()
@@ -292,7 +293,7 @@ def run_building_d(i, upgrade_idx):
             shutil.copy2(traceback_file_path, lustre_sim_out_dir)
 
     @classmethod
-    def run_building(cls, output_dir, cfg, i, upgrade_idx=None):
+    def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None):
 
         fs = LocalFileSystem()
         upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1
@@ -303,7 +304,7 @@ def run_building(cls, output_dir, cfg, i, upgrade_idx=None):
             sim_dir = ex.sim_dir
         else:
             # Generate the osw for this simulation
-            osw = cls.create_osw(cfg, sim_id, building_id=i, upgrade_idx=upgrade_idx)
+            osw = cls.create_osw(cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx)
             with open(os.path.join(sim_dir, 'in.osw'), 'w') as f:
                 json.dump(osw, f, indent=4)
 
diff --git a/buildstockbatch/localdocker.py b/buildstockbatch/localdocker.py
index b12b11f0..e6d89800 100644
--- a/buildstockbatch/localdocker.py
+++ b/buildstockbatch/localdocker.py
@@ -87,7 +87,7 @@ def weather_dir(self):
 
     @classmethod
     def run_building(cls, project_dir, buildstock_dir, weather_dir, docker_image, results_dir, measures_only,
-                     cfg, i, upgrade_idx=None):
+                     n_datapoints, cfg, i, upgrade_idx=None):
 
         upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1
 
@@ -109,7 +109,7 @@ def run_building(cls, project_dir, buildstock_dir, weather_dir, docker_image, re
             if not os.path.exists(dir_to_make):
                 os.makedirs(dir_to_make)
 
-        osw = cls.create_osw(cfg, sim_id, building_id=i, upgrade_idx=upgrade_idx)
+        osw = cls.create_osw(cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx)
 
         with open(os.path.join(sim_dir, 'in.osw'), 'w') as f:
             json.dump(osw, f, indent=4)
@@ -162,6 +162,7 @@ def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False):
 
         df = pd.read_csv(buildstock_csv_filename, index_col=0)
         building_ids = df.index.tolist()
+        n_datapoints = len(building_ids)
         run_building_d = functools.partial(
             delayed(self.run_building),
             self.project_dir,
@@ -170,6 +171,7 @@ def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False):
             self.docker_image,
             self.results_dir,
             measures_only,
+            n_datapoints,
             self.cfg
         )
         upgrade_sims = []
diff --git a/buildstockbatch/workflow_generator/base.py b/buildstockbatch/workflow_generator/base.py
index 95eca86e..066e1dca 100644
--- a/buildstockbatch/workflow_generator/base.py
+++ b/buildstockbatch/workflow_generator/base.py
@@ -17,8 +17,9 @@
 
 class WorkflowGeneratorBase(object):
 
-    def __init__(self, cfg):
+    def __init__(self, cfg, n_datapoints):
         self.cfg = cfg
+        self.n_datapoints = n_datapoints
         self.validate(self.cfg)
 
     def create_osw(self, sim_id, building_id, upgrade_idx):
diff --git a/buildstockbatch/workflow_generator/residential.py b/buildstockbatch/workflow_generator/residential.py
index b2fb44e3..54c4ada3 100644
--- a/buildstockbatch/workflow_generator/residential.py
+++ b/buildstockbatch/workflow_generator/residential.py
@@ -236,7 +236,7 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
         bld_exist_model_args = {
             'building_id': building_id,
             'workflow_json': 'measure-info.json',
-            'number_of_buildings_represented': self.cfg['baseline']['n_buildings_represented']
+            'sample_weight': self.n_datapoints / self.cfg['baseline']['n_buildings_represented'],
         }
         if 'measures_to_ignore' in workflow_args:
             bld_exist_model_args['measures_to_ignore'] = '|'.join(workflow_args['measures_to_ignore'])

From 5ed38ca1016227b157955c9cd2ae75d3c3b0bce6 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 4 Jan 2021 14:54:06 -0700
Subject: [PATCH 09/21] fixing workflow generator tests

---
 .../test_workflow_generator.py                | 51 ++++++++++---------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/buildstockbatch/workflow_generator/test_workflow_generator.py b/buildstockbatch/workflow_generator/test_workflow_generator.py
index 21a5b095..baca1469 100644
--- a/buildstockbatch/workflow_generator/test_workflow_generator.py
+++ b/buildstockbatch/workflow_generator/test_workflow_generator.py
@@ -36,13 +36,13 @@ def test_apply_logic_recursion():
     assert(apply_logic == '(!abc&&(def||ghi)&&jkl&&mno)')
 
 
-def test_residential_package_apply():
+def test_residential_package_apply(mocker):
+    mocker.patch.object(ResidentialDefaultWorkflowGenerator, 'validate_measures_and_arguments', return_value=True)
     sim_id = 'bldg1up1'
     building_id = 1
     upgrade_idx = 0
     cfg = {
         'baseline': {
-            'n_datapoints': 10,
             'n_buildings_represented': 100
         },
         'workflow_generator': {
@@ -70,7 +70,7 @@ def test_residential_package_apply():
             }
         ]
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, 10)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     upg_step = osw['steps'][2]
     assert(upg_step['measure_dir_name'] == 'ApplyUpgrade')
@@ -78,13 +78,13 @@ def test_residential_package_apply():
     assert(upg_step['arguments']['package_apply_logic'] == WorkflowGeneratorBase.make_apply_logic_arg(cfg['upgrades'][0]['package_apply_logic']))  # noqa E501
 
 
-def test_residential_simulation_controls_config():
+def test_residential_simulation_controls_config(mocker):
+    mocker.patch.object(ResidentialDefaultWorkflowGenerator, 'validate_measures_and_arguments', return_value=True)
     sim_id = 'bldb1up1'
     building_id = 1
     upgrade_idx = None
     cfg = {
         'baseline': {
-            'n_datapoints': 10,
             'n_buildings_represented': 100
         },
         'workflow_generator': {
@@ -92,7 +92,8 @@ def test_residential_simulation_controls_config():
             'args': {}
         },
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    n_datapoints = 10
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, n_datapoints)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     step0 = osw['steps'][0]
     assert(step0['measure_dir_name'] == 'ResidentialSimulationControls')
@@ -110,7 +111,7 @@ def test_residential_simulation_controls_config():
         'timesteps_per_hr': 2,
         'begin_month': 7
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, n_datapoints)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     args = osw['steps'][0]['arguments']
     assert(args['timesteps_per_hr'] == 2)
@@ -119,13 +120,13 @@ def test_residential_simulation_controls_config():
         assert(args[argname] == default_args[argname])
 
 
-def test_timeseries_csv_export():
+def test_timeseries_csv_export(mocker):
+    mocker.patch.object(ResidentialDefaultWorkflowGenerator, 'validate_measures_and_arguments', return_value=True)
     sim_id = 'bldb1up1'
     building_id = 1
     upgrade_idx = None
     cfg = {
         'baseline': {
-            'n_datapoints': 10,
             'n_buildings_represented': 100
         },
         'workflow_generator': {
@@ -138,7 +139,8 @@ def test_timeseries_csv_export():
             }
         }
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    n_datapoints = 10
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, n_datapoints)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     timeseries_step = osw['steps'][-2]
     assert(timeseries_step['measure_dir_name'] == 'TimeseriesCSVExport')
@@ -147,7 +149,7 @@ def test_timeseries_csv_export():
         'include_enduse_subcategories': False,
         'output_variables': ''
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, n_datapoints)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     args = osw['steps'][-2]['arguments']
     assert(args['reporting_frequency'] == 'Timestep')
@@ -156,13 +158,13 @@ def test_timeseries_csv_export():
         assert(args[argname] == default_args[argname])
 
 
-def test_additional_reporting_measures():
+def test_additional_reporting_measures(mocker):
+    mocker.patch.object(ResidentialDefaultWorkflowGenerator, 'validate_measures_and_arguments', return_value=True)
     sim_id = 'bldb1up1'
     building_id = 1
     upgrade_idx = None
     cfg = {
         'baseline': {
-            'n_datapoints': 10,
             'n_buildings_represented': 100
         },
         'workflow_generator': {
@@ -175,7 +177,7 @@ def test_additional_reporting_measures():
             }
         }
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, 10)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     reporting_measure_1_step = osw['steps'][-3]
     assert(reporting_measure_1_step['measure_dir_name'] == 'ReportingMeasure1')
@@ -183,13 +185,13 @@ def test_additional_reporting_measures():
     assert(reporting_measure_2_step['measure_dir_name'] == 'ReportingMeasure2')
 
 
-def test_ignore_measures_argument():
+def test_ignore_measures_argument(mocker):
+    mocker.patch.object(ResidentialDefaultWorkflowGenerator, 'validate_measures_and_arguments', return_value=True)
     sim_id = 'bldb1up1'
     building_id = 1
     upgrade_idx = None
     cfg = {
         'baseline': {
-            'n_datapoints': 10,
             'n_buildings_represented': 100,
         },
         'workflow_generator': {
@@ -205,7 +207,7 @@ def test_ignore_measures_argument():
             }
         }
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, 10)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     measure_step = None
     for step in osw['steps']:
@@ -219,13 +221,13 @@ def test_ignore_measures_argument():
         'ResidentialApplianceClothesDryer|ResidentialApplianceRefrigerator', measure_step
 
 
-def test_default_apply_upgrade():
+def test_default_apply_upgrade(mocker):
+    mocker.patch.object(ResidentialDefaultWorkflowGenerator, 'validate_measures_and_arguments', return_value=True)
     sim_id = 'bldg1up1'
     building_id = 1
     upgrade_idx = 0
     cfg = {
         'baseline': {
-            'n_datapoints': 10,
             'n_buildings_represented': 100
         },
         'workflow_generator': {
@@ -242,7 +244,7 @@ def test_default_apply_upgrade():
             }
         ]
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, 10)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     for step in osw['steps']:
         if step['measure_dir_name'] == 'ApplyUpgrade':
@@ -253,13 +255,13 @@ def test_default_apply_upgrade():
     assert(step['arguments']['option_1'] == 'Parameter|Option')
 
 
-def test_simulation_output():
+def test_simulation_output(mocker):
+    mocker.patch.object(ResidentialDefaultWorkflowGenerator, 'validate_measures_and_arguments', return_value=True)
     sim_id = 'bldb1up1'
     building_id = 1
     upgrade_idx = None
     cfg = {
         'baseline': {
-            'n_datapoints': 10,
             'n_buildings_represented': 100
         },
         'workflow_generator': {
@@ -271,14 +273,15 @@ def test_simulation_output():
             }
         }
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    n_datapoints = 10
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, n_datapoints)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     simulation_output_step = osw['steps'][-2]
     assert(simulation_output_step['measure_dir_name'] == 'SimulationOutputReport')
     default_args = {
         'include_enduse_subcategories': False
     }
-    osw_gen = ResidentialDefaultWorkflowGenerator(cfg)
+    osw_gen = ResidentialDefaultWorkflowGenerator(cfg, n_datapoints)
     osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx)
     args = osw['steps'][-2]['arguments']
     for argname in ('include_enduse_subcategories',):

From beabfd0fe900c6c6fcdaee0316e906c9b2053bb6 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Tue, 5 Jan 2021 12:00:52 -0700
Subject: [PATCH 10/21] fixing tests

---
 buildstockbatch/base.py                       |  7 +++
 buildstockbatch/test/test_eagle.py            | 19 +++++---
 .../enforce-validate-measures-bad-2.yml       | 48 ++++++++++---------
 .../enforce-validate-measures-good-2.yml      | 33 ++++++++-----
 buildstockbatch/test/test_validation.py       |  6 +--
 .../workflow_generator/residential.py         | 27 ++++++++---
 6 files changed, 87 insertions(+), 53 deletions(-)

diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index 91b5bc6d..fab2154a 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -210,6 +210,7 @@ def cleanup_sim_dir(sim_dir, dest_fs, simout_ts_dir, upgrade_id, building_id):
     def validate_project(project_file):
         assert(BuildStockBatchBase.validate_project_schema(project_file))
         assert(BuildStockBatchBase.validate_sampler(project_file))
+        assert(BuildStockBatchBase.validate_workflow_generator(project_file))
         assert(BuildStockBatchBase.validate_misc_constraints(project_file))
         assert(BuildStockBatchBase.validate_xor_nor_schema_keys(project_file))
         assert(BuildStockBatchBase.validate_reference_scenario(project_file))
@@ -238,6 +239,12 @@ def validate_sampler(project_file):
         args = cfg['sampler']['args']
         return Sampler.validate_args(project_file, **args)
 
+    @classmethod
+    def validate_workflow_generator(cls, project_file):
+        cfg = get_project_configuration(project_file)
+        WorkflowGenerator = cls.get_workflow_generator_class(cfg['workflow_generator']['type'])
+        return WorkflowGenerator.validate(cfg)
+
     @staticmethod
     def validate_project_schema(project_file):
         cfg = get_project_configuration(project_file)
diff --git a/buildstockbatch/test/test_eagle.py b/buildstockbatch/test/test_eagle.py
index 5a74218e..708550a3 100644
--- a/buildstockbatch/test/test_eagle.py
+++ b/buildstockbatch/test/test_eagle.py
@@ -92,12 +92,10 @@ def test_singularity_image_download_url(basic_residential_project_file):
         assert r.status_code == requests.codes.ok
 
 
-@patch('buildstockbatch.base.BuildStockBatchBase.validate_measures_and_arguments')
 @patch('buildstockbatch.base.BuildStockBatchBase.validate_options_lookup')
 @patch('buildstockbatch.eagle.subprocess')
-def test_user_cli(mock_subprocess, mock_validate_options, mock_validate_measures, basic_residential_project_file,
+def test_user_cli(mock_subprocess, mock_validate_options, basic_residential_project_file,
                   monkeypatch):
-    mock_validate_measures.return_value = True
     mock_validate_options.return_value = True
 
     project_filename, results_dir = basic_residential_project_file()
@@ -195,8 +193,11 @@ def test_run_building_process(mocker,  basic_residential_project_file):
     project_filename, results_dir = basic_residential_project_file(raw=True)
     results_dir = pathlib.Path(results_dir)
 
-    job_json = {'job_num': 1, 'batch': [(1, 0), (2, 0), (3, 0), (4, 0),
-                                        (1, None), (2, None), (3, None), (4, None)]}
+    job_json = {
+        'job_num': 1,
+        'batch': [(1, 0), (2, 0), (3, 0), (4, 0), (1, None), (2, None), (3, None), (4, None)],
+        'n_datapoints': 8
+    }
     with open(results_dir / 'job001.json', 'w') as f:
         json.dump(job_json, f)
 
@@ -252,12 +253,16 @@ def compare_ts_parquets(source, dst):
         compare_ts_parquets(file, results_file)
 
 
-def test_run_building_error_caught(mocker,  basic_residential_project_file):
+def test_run_building_error_caught(mocker, basic_residential_project_file):
 
     project_filename, results_dir = basic_residential_project_file()
     results_dir = pathlib.Path(results_dir)
 
-    job_json = {'job_num': 1, 'batch': [(1, 0)]}
+    job_json = {
+        'job_num': 1,
+        'batch': [(1, 0)],
+        'n_datapoints': 1
+    }
     with open(results_dir / 'job001.json', 'w') as f:
         json.dump(job_json, f)
 
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml b/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml
index 7df59bd7..294a860a 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml
@@ -1,17 +1,33 @@
-stock_type: residential
+schema_version: '0.3'
 buildstock_directory: test_openstudio_buildstock
 project_directory: project_singlefamilydetached
 weather_files_url: https://fake-url
 baseline:
-  n_datapoints: 30
   n_buildings_represented: 81221016
-  sampling_algorithm: quota
-residential_simulation_controls:
-  timesteps_per_hr: 4
-  begin_month: 1
-  begin_day_of_month: 1.5
-  end_month: 12
-  end_day_of_month: 31
+
+sampler:
+  type: residential_quota
+  args:
+    n_datapoints: 30
+
+workflow_generator:
+  type: residential_default
+  args:
+    residential_simulation_controls:
+      timesteps_per_hr: 4
+      begin_month: 1
+      begin_day_of_month: 1.5
+      end_month: 12
+      end_day_of_month: 31
+    timeseries_csv_export:
+      reporting_frequency: Huorly
+    #  include_enduse_subcategories: true
+      output_variable:
+        - Zone Mean Air Temperature
+    simulation_output:
+      include_enduse_subcategory: true
+    reporting_measures:
+      - ReportingMeasure2
 
 upgrades:
   - upgrade_name: good upgrade
@@ -33,17 +49,3 @@ upgrades:
         apply_logic:
           - Insulation Unfinished Basement|Extra Argument
     reference_scenario: non-existing upgrade
-
-simulation_output:
-  include_enduse_subcategory: true
-
-timeseries_csv_export:
-  reporting_frequency: Huorly
-#  include_enduse_subcategories: true
-  output_variable:
-    - Zone Mean Air Temperature
-
-reporting_measures:
-  - ReportingMeasure2
-
-schema_version: 0.2
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-measures-good-2.yml b/buildstockbatch/test/test_inputs/enforce-validate-measures-good-2.yml
index a0e2efeb..4a2da304 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-measures-good-2.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-measures-good-2.yml
@@ -1,11 +1,28 @@
-stock_type: residential
+schema_version: '0.3'
 buildstock_directory: test_openstudio_buildstock
 project_directory: project_singlefamilydetached
 weather_files_url: https://fake-url
 baseline:
-  n_datapoints: 30
   n_buildings_represented: 81221016
-  sampling_algorithm: quota
+
+sampler:
+  type: residential_quota
+  args:
+    n_datapoints: 30
+
+workflow_generator:
+  type: residential_default
+  args:
+    simulation_output:
+      include_enduse_subcategories: true
+    timeseries_csv_export:
+      reporting_frequency: Timestep
+      include_enduse_subcategories: true
+      output_variables:
+        - Zone Mean Air Temperature
+    reporting_measures:
+      - ReportingMeasure1
+
 upgrades:
   - upgrade_name: cool upgrade
     options:
@@ -29,13 +46,3 @@ upgrades:
           - Insulation Unfinished Basement|Extra Argument
     package_apply_logic: Vintage|1960s||Vintage|1940s
     reference_scenario: cool upgrade
-simulation_output:
-  include_enduse_subcategories: true
-timeseries_csv_export:
-  reporting_frequency: Timestep
-  include_enduse_subcategories: true
-  output_variables:
-    - Zone Mean Air Temperature
-reporting_measures:
-  - ReportingMeasure1
-schema_version: 0.2
diff --git a/buildstockbatch/test/test_validation.py b/buildstockbatch/test/test_validation.py
index bdb8b164..8e7a0cd0 100644
--- a/buildstockbatch/test/test_validation.py
+++ b/buildstockbatch/test/test_validation.py
@@ -93,7 +93,7 @@ def test_validation_integration(project_file, expected):
     # patch the validate_options_lookup function to always return true for this case
     with patch.object(BuildStockBatchBase, 'validate_options_lookup', lambda _: True), \
             patch.object(BuildStockBatchBase, 'validate_measure_references', lambda _: True), \
-            patch.object(BuildStockBatchBase, 'validate_measures_and_arguments', lambda _: True):
+            patch.object(BuildStockBatchBase, 'validate_workflow_generator', lambda _: True):
         if expected is not True:
             with pytest.raises(expected):
                 BuildStockBatchBase.validate_project(project_file)
@@ -131,7 +131,7 @@ def test_bad_measures(project_file):
 
     with LogCapture(level=logging.INFO) as logs:
         try:
-            BuildStockBatchBase.validate_measures_and_arguments(project_file)
+            BuildStockBatchBase.validate_workflow_generator(project_file)
         except ValidationError as er:
             er = str(er)
             warning_logs = filter_logs(logs, 'WARNING')
@@ -154,7 +154,7 @@ def test_bad_measures(project_file):
 ])
 def test_good_measures(project_file):
     with LogCapture(level=logging.INFO) as logs:
-        assert BuildStockBatchBase.validate_measures_and_arguments(project_file)
+        assert BuildStockBatchBase.validate_workflow_generator(project_file)
         warning_logs = filter_logs(logs, 'WARNING')
         error_logs = filter_logs(logs, 'ERROR')
         assert warning_logs == ''
diff --git a/buildstockbatch/workflow_generator/residential.py b/buildstockbatch/workflow_generator/residential.py
index 54c4ada3..76fa37dd 100644
--- a/buildstockbatch/workflow_generator/residential.py
+++ b/buildstockbatch/workflow_generator/residential.py
@@ -82,21 +82,34 @@ def cfg_path_exists(cfg_path):
             a = cfg
             for path_item in path_items:
                 try:
-                    a = cfg[path_item]  # noqa F841
+                    a = a[path_item]  # noqa F841
                 except KeyError:
                     return False
             return True
 
-        if 'reporting_measures' in cfg.keys():
-            for reporting_measure in cfg['reporting_measures']:
-                measure_names[reporting_measure] = 'reporting_measures'
+        def get_cfg_path(cfg_path):
+            if cfg_path is None:
+                return None
+            path_items = cfg_path.split('.')
+            a = cfg
+            for path_item in path_items:
+                try:
+                    a = a[path_item]
+                except KeyError:
+                    return None
+            return a
+
+        workflow_args = cfg['workflow_generator'].get('args', {})
+        if 'reporting_measures' in workflow_args.keys():
+            for reporting_measure in workflow_args['reporting_measures']:
+                measure_names[reporting_measure] = 'workflow_generator.args.reporting_measures'
 
         error_msgs = ''
         warning_msgs = ''
         for measure_name, cfg_key in measure_names.items():
             measure_path = os.path.join(measures_dir, measure_name)
 
-            if cfg_path_exists(cfg_key) or cfg_key == 'residential_simulation_controls':
+            if cfg_path_exists(cfg_key) or cfg_key == 'workflow_generator.args.residential_simulation_controls':
                 # if they exist in the cfg, make sure they exist in the buildstock checkout
                 if not os.path.exists(measure_path):
                     error_msgs += f"* {measure_name} does not exist in {buildstock_dir}. \n"
@@ -130,7 +143,7 @@ def cfg_path_exists(cfg_path):
                     else:
                         expected_arguments[name] = argument.find('./type').text
 
-                for actual_argument_key in cfg[measure_names[measure_name]].keys():
+                for actual_argument_key in get_cfg_path(measure_names[measure_name]).keys():
                     if actual_argument_key not in expected_arguments.keys():
                         error_msgs += f"* Found unexpected argument key {actual_argument_key} for "\
                                       f"{measure_names[measure_name]} in yaml file. The available keys are: " \
@@ -140,7 +153,7 @@ def cfg_path_exists(cfg_path):
                     required_args_no_default.pop(actual_argument_key, None)
                     required_args_with_default.pop(actual_argument_key, None)
 
-                    actual_argument_value = cfg[measure_names[measure_name]][actual_argument_key]
+                    actual_argument_value = get_cfg_path(measure_names[measure_name])[actual_argument_key]
                     expected_argument_type = expected_arguments[actual_argument_key]
 
                     if type(expected_argument_type) is not list:

From 03f3b6f73fb6854250e7e54524b7acd57cb07984 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 11 Jan 2021 13:20:40 -0700
Subject: [PATCH 11/21] changes to project_resstock_national.yml for testing

---
 project_resstock_national.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/project_resstock_national.yml b/project_resstock_national.yml
index 521e668b..73fd3988 100644
--- a/project_resstock_national.yml
+++ b/project_resstock_national.yml
@@ -1,9 +1,9 @@
 schema_version: '0.3'
-buildstock_directory: ../OpenStudio-BuildStock  # Relative to this file or absolute
+buildstock_directory: ../resstock  # Relative to this file or absolute
 project_directory: project_national  # Relative to buildstock_directory
 output_directory: ../national_test_outputs
 # weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
-weather_files_path: ../ResStock_TMY3.zip  # Relative to this file or absolute path to zipped weather files
+weather_files_path: ../buildstock_2018.zip  # Relative to this file or absolute path to zipped weather files
 
 sampler:
   type: residential_quota_downselect
@@ -23,7 +23,7 @@ workflow_generator:
       include_enduse_subcategories: true
 
 baseline:
-  n_buildings_represented: 133172057  # Total number of residential dwelling units in contiguous United States, including unoccupied units, resulting from acensus tract level query of ACS 5-yr 2016 (i.e. 2012-2016), using this script: https://github.com/NREL/resstock-estimation/blob/master/sources/spatial/tsv_maker.py.
+  n_buildings_represented: 133172057  # Total number of residential dwelling units in contiguous United States, including unoccupied units, resulting from a census tract level query of ACS 5-yr 2016 (i.e. 2012-2016), using this script: https://github.com/NREL/resstock-estimation/blob/master/sources/spatial/tsv_maker.py.
 
 upgrades:
   - upgrade_name: Triple-Pane Windows

From 29ee6a2f66e59f2e9a891082517c765e29c1fc82 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Tue, 12 Jan 2021 08:55:26 -0700
Subject: [PATCH 12/21] fixing eagle errors

---
 buildstockbatch/base.py         | 10 ++++++----
 buildstockbatch/sampler/base.py |  4 ++--
 project_resstock_national.yml   | 16 ++++++++--------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py
index fab2154a..dbfa662b 100644
--- a/buildstockbatch/base.py
+++ b/buildstockbatch/base.py
@@ -69,10 +69,6 @@ def __init__(self, project_filename):
         self.os_sha = self.cfg.get('os_sha', self.DEFAULT_OS_SHA)
         logger.debug(f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}")
 
-        # Select a sampler
-        Sampler = self.get_sampler_class(self.cfg['sampler']['type'])
-        self.sampler = Sampler(self, **self.cfg['sampler'].get('args', {}))
-
     @staticmethod
     def get_sampler_class(sampler_name):
         sampler_class_name = ''.join(x.capitalize() for x in sampler_name.strip().split('_')) + 'Sampler'
@@ -84,6 +80,12 @@ def get_workflow_generator_class(workflow_generator_name):
             ''.join(x.capitalize() for x in workflow_generator_name.strip().split('_')) + 'WorkflowGenerator'
         return getattr(workflow_generator, workflow_generator_class_name)
 
+    @property
+    def sampler(self):
+        # Select a sampler
+        Sampler = self.get_sampler_class(self.cfg['sampler']['type'])
+        return Sampler(self, **self.cfg['sampler'].get('args', {}))
+
     def path_rel_to_projectfile(self, x):
         return path_rel_to_file(self.project_filename, x)
 
diff --git a/buildstockbatch/sampler/base.py b/buildstockbatch/sampler/base.py
index b6a25e67..df51af4b 100644
--- a/buildstockbatch/sampler/base.py
+++ b/buildstockbatch/sampler/base.py
@@ -81,7 +81,7 @@ def run_sampling(self):
             return self._run_sampling_docker()
         else:
             assert self.container_runtime == ContainerRuntime.SINGULARITY
-            return self._run_sampling_singluarity()
+            return self._run_sampling_singularity()
 
     def _run_sampling_docker(self):
         """
@@ -91,7 +91,7 @@ def _run_sampling_docker(self):
         """
         raise NotImplementedError
 
-    def _run_sampling_singluarity(self):
+    def _run_sampling_singularity(self):
         """
         Execute the sampling in a singularity container
 
diff --git a/project_resstock_national.yml b/project_resstock_national.yml
index 73fd3988..38794f85 100644
--- a/project_resstock_national.yml
+++ b/project_resstock_national.yml
@@ -1,14 +1,14 @@
 schema_version: '0.3'
-buildstock_directory: ../resstock  # Relative to this file or absolute
+buildstock_directory: ../OpenStudio-BuildStock  # Relative to this file or absolute
 project_directory: project_national  # Relative to buildstock_directory
-output_directory: ../national_test_outputs
+output_directory: /scratch/nmerket/national_test_outputs2
 # weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
-weather_files_path: ../buildstock_2018.zip  # Relative to this file or absolute path to zipped weather files
+weather_files_path: /shared-projects/buildstock/weather/buildstock_2018.zip  # Relative to this file or absolute path to zipped weather files
 
 sampler:
   type: residential_quota_downselect
   args:
-    n_datapoints: 10
+    n_datapoints: 100
     # Uncomment and set specify logic you you want to downselect to a subset of the building stock
     resample: false
     logic:
@@ -37,13 +37,13 @@ upgrades:
 
 eagle:
   n_jobs: 4
-  minutes_per_sim: 30
+  minutes_per_sim: 4
   account: enduse
   sampling:
-    time: 20
+    time: 5
   postprocessing:
-    time: 60
-    n_workers: 3
+    time: 10
+    n_workers: 1
 
 # aws:
 #   # The job_identifier should be unique, start with alpha, not include dashes, and limited to 10 chars or data loss can occur

From b68a1b1f26cffe08540c49aac7f9348393e6049b Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Tue, 12 Jan 2021 11:09:56 -0700
Subject: [PATCH 13/21] fixing testing mock errors

---
 buildstockbatch/test/test_base.py  | 10 ++++++----
 buildstockbatch/test/test_eagle.py |  6 +++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py
index a0266c4e..b34fca92 100644
--- a/buildstockbatch/test/test_base.py
+++ b/buildstockbatch/test/test_base.py
@@ -12,7 +12,7 @@
 import re
 import shutil
 import tempfile
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch, MagicMock, PropertyMock
 import yaml
 
 from buildstockbatch.base import BuildStockBatchBase
@@ -159,12 +159,14 @@ def test_downselect_integer_options(basic_residential_project_file, mocker):
         })
         mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
         mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
+        sampler_property_mock = mocker.patch.object(BuildStockBatchBase, 'sampler', new_callable=PropertyMock)
+        sampler_mock = mocker.MagicMock()
+        sampler_property_mock.return_value = sampler_mock
+        sampler_mock.run_sampling = MagicMock(return_value=buildstock_csv)
 
         bsb = BuildStockBatchBase(project_filename)
-        run_sampling_mock = MagicMock(return_value=buildstock_csv)
-        mocker.patch.object(bsb.sampler, 'run_sampling', run_sampling_mock)
         bsb.sampler.run_sampling()
-        run_sampling_mock.assert_called_once()
+        sampler_mock.run_sampling.assert_called_once()
         with open(buildstock_csv, 'r', newline='') as f:
             cf = csv.DictReader(f)
             for row in cf:
diff --git a/buildstockbatch/test/test_eagle.py b/buildstockbatch/test/test_eagle.py
index 708550a3..34b6c4f2 100644
--- a/buildstockbatch/test/test_eagle.py
+++ b/buildstockbatch/test/test_eagle.py
@@ -223,10 +223,14 @@ def make_sim_dir_mock(building_id, upgrade_idx, base_dir, overwrite_existing=Fal
         return sim_id, sim_dir
 
     mocker.patch.object(EagleBatch, 'make_sim_dir', make_sim_dir_mock)
+    sampler_prop_mock = mocker.patch.object(EagleBatch, 'sampler', new_callable=mocker.PropertyMock)
+    sampler_mock = mocker.MagicMock()
+    sampler_prop_mock.return_value = sampler_mock
+    sampler_mock.run_sampling = mocker.MagicMock(return_value='buildstock.csv')
 
     b = EagleBatch(project_filename)
-    mocker.patch.object(b.sampler, 'run_sampling', lambda: "buildstock.csv")
     b.run_batch(sampling_only=True)  # so the directories can be created
+    sampler_mock.run_sampling.assert_called_once()
     b.run_job_batch(1)
 
     # check results job-json

From 331a251ded480803471e107794f93ea7fcfaf049 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Fri, 22 Jan 2021 09:57:44 -0700
Subject: [PATCH 14/21] documentation, migration guide, and changelog

---
 buildstockbatch/__version__.py                |   7 +-
 buildstockbatch/schemas/v0.3.yaml             |   1 -
 .../workflow_generator/residential.py         |   1 -
 docs/changelog/changelog_dev.rst              |   8 +
 docs/changelog/index.rst                      |   3 +-
 docs/changelog/migration_0_20.rst             | 170 ++++++++++++++
 docs/filtering_logic.rst                      | 100 +++++++++
 docs/index.rst                                |   3 +
 docs/project_defn.rst                         | 212 ++----------------
 docs/samplers/commercial_sobol.rst            |  21 ++
 docs/samplers/index.rst                       |  12 +
 docs/samplers/precomputed.rst                 |  21 ++
 docs/samplers/residential_quota.rst           |  19 ++
 .../samplers/residential_quota_downselect.rst |  42 ++++
 .../commercial_default.rst                    |  33 +++
 docs/workflow_generators/index.rst            |   9 +
 .../residential_default.rst                   |  71 ++++++
 project_resstock_national.yml                 |  50 ++---
 18 files changed, 562 insertions(+), 221 deletions(-)
 create mode 100644 docs/changelog/migration_0_20.rst
 create mode 100644 docs/filtering_logic.rst
 create mode 100644 docs/samplers/commercial_sobol.rst
 create mode 100644 docs/samplers/index.rst
 create mode 100644 docs/samplers/precomputed.rst
 create mode 100644 docs/samplers/residential_quota.rst
 create mode 100644 docs/samplers/residential_quota_downselect.rst
 create mode 100644 docs/workflow_generators/commercial_default.rst
 create mode 100644 docs/workflow_generators/index.rst
 create mode 100644 docs/workflow_generators/residential_default.rst

diff --git a/buildstockbatch/__version__.py b/buildstockbatch/__version__.py
index 8fff09a1..b51df288 100644
--- a/buildstockbatch/__version__.py
+++ b/buildstockbatch/__version__.py
@@ -1,9 +1,12 @@
+import datetime as dt
+
+
 __title__ = 'buildstockbatch'
 __description__ = 'Executing BuildStock projects on batch infrastructure.'
 __url__ = 'http://github.com/NREL/buildstockbatch'
-__version__ = '0.19'
+__version__ = '0.20'
 __schema_version__ = '0.3'
 __author__ = 'Noel Merket'
 __author_email__ = 'noel.merket@nrel.gov'
 __license__ = 'BSD-3'
-__copyright__ = 'Copyright 2020 The Alliance for Sustainable Energy'
+__copyright__ = 'Copyright {} The Alliance for Sustainable Energy'.format(dt.date.today().year)
diff --git a/buildstockbatch/schemas/v0.3.yaml b/buildstockbatch/schemas/v0.3.yaml
index 104b35ea..c848eb98 100644
--- a/buildstockbatch/schemas/v0.3.yaml
+++ b/buildstockbatch/schemas/v0.3.yaml
@@ -5,7 +5,6 @@ weather_files_path: str(required=False)
 weather_files_url: str(required=False)
 sampler: include('sampler-spec', required=True)
 workflow_generator: include('workflow-generator-spec', required=True)
-
 eagle: include('hpc-spec', required=False)
 aws: include('aws-spec', required=False)
 output_directory: str(required=False)
diff --git a/buildstockbatch/workflow_generator/residential.py b/buildstockbatch/workflow_generator/residential.py
index 76fa37dd..fb18551c 100644
--- a/buildstockbatch/workflow_generator/residential.py
+++ b/buildstockbatch/workflow_generator/residential.py
@@ -245,7 +245,6 @@ def create_osw(self, sim_id, building_id, upgrade_idx):
         }
         res_sim_ctl_args.update(workflow_args['residential_simulation_controls'])
 
-        # FIXME: The sample weight will likely be wrong for a downselect.
         bld_exist_model_args = {
             'building_id': building_id,
             'workflow_json': 'measure-info.json',
diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst
index 75eeab3d..1316f47a 100644
--- a/docs/changelog/changelog_dev.rst
+++ b/docs/changelog/changelog_dev.rst
@@ -14,3 +14,11 @@ Development Changelog
         This is an example change. Please copy and paste it - for valid tags please refer to ``conf.py`` in the docs
         directory. ``pullreq`` should be set to the appropriate pull request number and ``tickets`` to any related
         github issues. These will be automatically linked in the documentation.
+
+    .. change::
+        :tags: general, feature
+        :pullreq: 187
+        :tickets: 147
+
+        Changed the project configuration for samplers and workflow generators.
+        See migration guide for details.
diff --git a/docs/changelog/index.rst b/docs/changelog/index.rst
index 2de2c822..abf5bdba 100644
--- a/docs/changelog/index.rst
+++ b/docs/changelog/index.rst
@@ -11,7 +11,7 @@ Current Migration Guide
 .. toctree::
    :titlesonly:
 
-   migration_0_19
+   migration_0_20
 
 Change logs
 -----------
@@ -36,4 +36,5 @@ Older Migration Guides
 .. toctree::
    :titlesonly:
 
+   migration_0_19
    migration_0_18
\ No newline at end of file
diff --git a/docs/changelog/migration_0_20.rst b/docs/changelog/migration_0_20.rst
new file mode 100644
index 00000000..5aa8fcb7
--- /dev/null
+++ b/docs/changelog/migration_0_20.rst
@@ -0,0 +1,170 @@
+===================================
+What's new in buildstockbatch 0.20?
+===================================
+
+.. admonition:: About this Document
+
+    This document describes changes between buildstockbatch version 0.19 and buildstockbatch version 0.20
+
+Introduction
+============
+
+This guide introduces what's new in buildstockbatch version 0.20 and also
+documents changes which affect users migrating their analyses from 0.19.
+
+Please carefully review the section on schema changes as the minimum schema
+version has been updated to the new version, 0.3, and there are
+non-backwards-compatible changes in the schema update.
+
+General
+=======
+
+Schema Version Update
+---------------------
+
+The input schema has been updated to version 0.3. Project configuration files
+will need to be upgraded to use the new format. The ``schema_version`` key now
+needs to be a string, so put some quotes around the version number.
+
+.. code-block:: yaml
+
+    schema_version: '0.3'
+
+There are no more ``stock_type``, ``sampling_algorithm``, or ``downselect``
+keys. This information is defined in the new ``sampler`` and ``workflow`` keys.
+
+Sampler Specification
+---------------------
+
+The sampler now has its own section in the configuration file. In it, you
+specify which kind of sampler to use and the arguments to pass to it. These
+arguments used to be scattered all around the project configuration file and it
+caused some confusion when arguments would only apply to some kinds of samplers.
+They are now consolidated under the ``sampler`` key and each sampler type
+specifies its own options. Complete documentation of the available samplers and
+their arguments is at :doc:`../samplers/index`.
+
+
+Here are a few examples for common scenarios:
+
+Residential Quota Sampling
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+See :doc:`../samplers/residential_quota`.
+
+Old Spec:
+
+.. code-block:: yaml
+
+    schema_version: 0.2
+    stock_type: residential
+    baseline:
+      sampling_algorithm: quota
+      n_datapoints: 350000
+
+New Spec:
+
+.. code-block:: yaml
+
+    schema_version: '0.3'
+    sampler:
+      type: residential_quota
+      args:
+        n_datapoints: 350000
+
+Residential Quota with Downselecting
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Downselecting is no longer a separate config item, it is implemented in a
+different sampler type. See :doc:`../samplers/residential_quota_downselect`.
+
+Old Spec:
+
+.. code-block:: yaml
+
+    schema_version: 0.2
+    stock_type: residential
+    baseline:
+      sampling_algorithm: quota
+      n_datapoints: 350000
+    downselect:
+      resample: false
+      logic:
+        - Geometry Building Type RECS|Single-Family Detached
+        - Vacancy Status|Occupied
+
+New Spec:
+
+.. code-block:: yaml
+
+    schema_version: '0.3'
+    sampler:
+      type: residential_quota_downselect
+      args:
+        n_datapoints: 350000
+        resample: false
+        logic:
+          - Geometry Building Type RECS|Single-Family Detached
+          - Vacancy Status|Occupied
+
+Precomputed Sampling
+~~~~~~~~~~~~~~~~~~~~
+
+The ``baseline.precomputed_sample`` key has been removed and the precomputed
+sampling is its own sampler type. Downselecting is no longer permitted with a
+precomputed sample. If you want to downselect, please modify your buildstock.csv
+file to only include the buildings you want in your sample. See
+:doc:`../samplers/precomputed`.
+
+Old Spec:
+
+.. code-block:: yaml
+
+    schema_version: 0.2
+    stock_type: residential
+    baseline:
+      precomputed_sample: path/to/buildstock.csv
+      n_datapoints: 350000
+
+New Spec:
+
+.. code-block:: yaml
+
+    schema_version: '0.3'
+    sampler:
+      type: precomputed
+      args:
+        path/to/buildstock.csv # n_datapoints determined from csv file
+
+Workflow Generator Specification
+--------------------------------
+
+The workflow generator has changed as well. Many inputs to the workflow were
+previously at the top level in the project configuration file. This was again
+confusing because different config entries only applied to certain workflow
+generators. In previous versions, the appropriate workflow generator was
+inferred based on the ``stock_type`` key. Now it is explicitly defined and the
+arguments to be passed to it are defined under the ``workflow_generator`` key.
+See :doc:`../workflow_generators/index`.
+
+Old Spec:
+
+.. code-block:: yaml
+
+    schema_version: 0.2
+    stock_type: residential
+    timeseries_csv_export:
+      reporting_frequency: Hourly
+      include_enduse_subcategories: true
+
+New Spec:
+
+.. code-block:: yaml
+
+    schema_version: '0.3'
+    workflow_generator:
+      type: residential_default
+      args:
+        timeseries_csv_export:
+          reporting_frequency: Hourly
+          include_enduse_subcategories: true
diff --git a/docs/filtering_logic.rst b/docs/filtering_logic.rst
new file mode 100644
index 00000000..b4bf9518
--- /dev/null
+++ b/docs/filtering_logic.rst
@@ -0,0 +1,100 @@
+.. _filtering-logic:
+
+Filtering Logic
+~~~~~~~~~~~~~~~
+
+There are several places where logic is applied to filter simulations by the option values.
+This is done by specifying the parameter|option criteria you want to include or exclude along
+with the appropriate logical operator. This is done in the YAML syntax as follows:
+
+And
+...
+
+To include certain parameter option combinations, specify them in a list or by using the ``and`` key.
+
+.. code-block:: yaml
+
+  - Vintage|1950s
+  - Location Region|CR02
+
+.. code-block:: yaml
+
+  and:
+    - Vintage|1950s
+    - Location Region|CR02
+
+The above example would include buildings in climate region 2 built in the 1950s. A list, except for that inside an
+``or`` block is always interpreted as ``and`` block.
+
+Or
+..
+
+.. code-block:: yaml
+
+  or:
+    - Vintage|<1950
+    - Vintage|1950s
+    - Vintage|1960s
+
+This example would include buildings built before 1970.
+
+Not
+...
+
+.. code-block:: yaml
+
+  not: Heating Fuel|Propane
+
+This will select buildings that does not have Propane Fuel type.
+
+.. code-block:: yaml
+
+    not:
+      - Vintage|1950s
+      - Location Region|CR02
+
+This will select buildings that are not both Vintage 1950s **and** in location region CR02. It should be noted that this
+**will** select buildings of 1950s vintage provided they aren't in region CR02. It will also select buildings in
+location CR02 provided they aren't of vintage 1950s. If only those buildings that are neither of Vintage 1950s nor in
+region CR02 needs to be selected, the following logic should be used:
+
+.. code-block:: yaml
+
+      - not: Vintage|1950s
+      - not: Location Region|CR02
+
+or,
+
+.. code-block:: yaml
+
+      and:
+        - not: Vintage|1950s
+        - not: Location Region|CR02
+
+or,
+
+.. code-block:: yaml
+
+    not:
+      or:
+        - Vintage|1950s
+        - Location Region|CR02
+
+
+Combining Logic
+...............
+
+These constructs can be combined to declare arbitrarily complex logic. Here is an example:
+
+.. code-block:: yaml
+
+  - or:
+    - Vintage|<1950
+    - Vintage|1950s
+    - Vintage|1960s
+  - not: Geometry Garage|3 Car
+  - not: Geometry House Size|3500+
+  - Geometry Stories|1
+
+This will select homes that were built before 1970, don't have three car garages, are less
+than 3500 sq.ft., and have only one storey.
diff --git a/docs/index.rst b/docs/index.rst
index e033511f..68a27c9f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -19,6 +19,9 @@ supercomputer, Eagle.
 
    installation
    project_defn
+   samplers/index
+   workflow_generators/index
+   filtering_logic
    run_sims
 
 
diff --git a/docs/project_defn.rst b/docs/project_defn.rst
index e47405f9..9cff6462 100644
--- a/docs/project_defn.rst
+++ b/docs/project_defn.rst
@@ -20,8 +20,7 @@ First we tell it what project we're running with the following keys:
 - ``buildstock_directory``: The absolute (or relative to this YAML file) path of the `OpenStudio-BuildStock`_
   repository.
 - ``project_directory``: The relative (to the ``buildstock_directory``) path of the project.
-- ``schema_version``: The version of the project yaml file to use and validate - currently the minimum version is
-  ``0.2``.
+- ``schema_version``: The version of the project yaml file to use and validate - currently the minimum version is ``0.3``.
 
 .. _OpenStudio-BuildStock: https://github.com/NREL/OpenStudio-BuildStock
 
@@ -49,37 +48,40 @@ To use your own custom weather files for a specific location, this can be done i
 
 - Rename your custom .epw weather file to match the references in your local `options_lookup.tsv <https://github.com/NREL/OpenStudio-BuildStock/blob/master/resources/options_lookup.tsv>`_ in the ``resources`` folder.
 
+Sampler
+~~~~~~~
+
+The ``sampler`` key defines the type of building sampler to be used for the batch of simulations. The purpose of the sampler is to enumerate the buildings and building characteristics to be run as part of the building stock simulation. It has two arguments, ``type`` and ``args``.
+
+- ``type`` tells buildstockbatch which sampler class to use.
+- ``args`` are passed to the sampler to define how it is run.
+
+Different samplers are available for ResStock and ComStock and variations thereof. Details of what samplers are available and their arguments are available at :doc:`samplers/index`.
+
+Workflow Generator
+~~~~~~~~~~~~~~~~~~
+
+The ``workflow_generator`` key defines which workflow generator will be used to transform a building description from a set of high level characteristics into an OpenStudio workflow that in turn generates the detailed EnergyPlus model. It has two arguments, ``type`` and ``args``.
+
+- ``type`` tells buildstockbatch which workflow generator class to use.
+- ``args`` are passed to the sampler to define how it is run.
+
+Different workflow generators are available for ResStock and ComStock and variations thereof. Details of what workflow generators and their arguments are available at :doc:`workflow_generators/index`.
+
 Baseline simulations incl. sampling algorithm
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Information about baseline simulations are listed under the ``baseline`` key.
 
-- ``sampling_algorithm``: The sampling algorithm to use for this project - the default residential option is ``quota``,
-  the default commercial option is ``sobol`` (this is not supported for residential projects), or if using a previously
-  computed buildstock.csv file use the ``precomputed`` sampler.
-- ``n_datapoints``: The number of buildings to sample and run for the baseline case if using the ``sobol`` or ``quota``
-  sampling algorithms.
 - ``n_buildings_represented``: The number of buildings that this sample is meant to represent.
-- ``precomputed_sample``: Filepath of csv containing pre-defined building options to use in the precomputed sampling
-  routine. This can be absolute or relative (to this file).
 - ``skip_sims``: Include this key to control whether the set of baseline simulations are run. The default (i.e., when
   this key is not included) is to run all the baseline simulations. No results csv table with baseline characteristics
   will be provided when the baseline simulations are skipped.
-- ``measures_to_ignore``: **ADVANCED FEATURE (USE WITH CAUTION--ADVANCED USERS/WORKFLOW DEVELOPERS ONLY)** to optionally
-  not run one or more measures (specified as a list) that are referenced in the options_lookup.tsv but should be skipped
-  during model creation. The measures are referenced by their directory name. This feature is currently only implemented
-  for residential models constructed with the BuildExistingModel measure.
 - ``custom_gems``: **VERY ADVANCED FEATURE - NOT SUPPORTED - ONLY ATTEMPT USING SINGULARITY CONTAINERS ON EAGLE** This
   activates the ``bundle`` and ``bundle_path`` interfaces in the OpenStudio CLI to allow for custom gem packages (needed
   to support rapid development of the standards gem.) This actually works extraordinarily well if the singularity
   image is properly configured but that's easier said than done. The la100 branch on the nrel/docker-openstudio repo
   is a starting place if this is required.
-- ``osw_template``: An optional key allowing for switching of which workflow generator to use within the commercial or
-  residential classes.
-- ``include_qaqc``: An optional flag - only configured for commercial at the moment - which when set to ``True`` runs
-  some additional measures that check a number of key (and often incorrectly configured) part of the simulation inputs
-  as well as providing additional model QAQC data streams on the output side. Recommended for test runs but not
-  production analyses.
 
 OpenStudio Version Overrides
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -88,19 +90,6 @@ This is a feature only used by ComStock at the moment. Please refer to the ComSt
 details on the correct configuration. This is noted here to explain the presence of two keys in the version ``0.2``
 schema: ``os_version`` and ``os_sha``.
 
-Residential Simulation Controls
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-If the key ``residential_simulation_controls`` is in the project yaml file, the parameters to the
-`ResidentialSimulationControls <https://github.com/NREL/OpenStudio-BuildStock/blob/master/measures/ResidentialSimulationControls/measure.xml>`_
-measure will be modified from their defaults to what is specified there. The defaults are:
-
-.. include:: ../buildstockbatch/workflow_generator/residential.py
-   :code: python
-   :start-after: res_sim_ctl_args = {
-   :end-before: }
-
-
 Upgrade Scenarios
 ~~~~~~~~~~~~~~~~~
 
@@ -133,171 +122,12 @@ following properties:
 - ``reference_scenario``: (optional) The `upgrade_name` which should act as a reference to this upgrade to calculate
   savings. All this does is that reference_scenario show up as a column in results csvs alongside the upgrade name;
   Buildstockbatch will not do the savings calculation.
-Note: ``ApplyUpgrade`` is applied automatically when the ``upgrades`` key is supplied. Do not add to the list of reporting measures.
-
-Simulation Annual Outputs Options
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Include the ``simulation_output`` key to optionally include annual totals for end use subcategories (i.e., interior equipment broken out by end use) along with
-the usual annual simulation results. This argument is passed directly into the
-`SimulationOutputReport measure <https://github.com/NREL/OpenStudio-BuildStock/blob/master/measures/SimulationOutputReport/measure.xml>`_
-in OpenStudio-BuildStock. Please refer to the measure argument there to determine what to set it to in your config file.
-Note that this measure and presence of any arguments may be different depending on which version of OpenStudio-BuildStock you're using.
-The best thing you can do is to verify that it works with what is in your branch.
-
-Time Series Export Options
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Include the ``timeseries_csv_export`` key to include hourly or subhourly results along with the usual
-annual simulation results. These arguments are passed directly to the
-`TimeseriesCSVExport measure <https://github.com/NREL/OpenStudio-BuildStock/blob/master/measures/TimeseriesCSVExport/measure.xml>`_
-in OpenStudio-BuildStock. Please refer to the measure arguments there to determine what to set them to in your config file.
-Note that this measure and arguments may be different depending on which version of OpenStudio-BuildStock you're using.
-The best thing you can do is to verify that it works with what is in your branch.
-
-Additional Reporting Measures
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Include the ``reporting_measures`` key along with a list of reporting measure names to apply additional reporting measures (that require no arguments) to the workflow.
-Any columns reported by these additional measures will be appended to the results csv.
-Note: For upgrade runs, do not add ``ApplyUpgrade`` to the list of reporting measures, doing so will cause run to fail prematurely. ``ApplyUpgrade`` is applied automatically when the ``upgrades`` key is supplied.
 
 Output Directory
 ~~~~~~~~~~~~~~~~
 
 ``output_directory`` specifies where the outputs of the simulation should be stored.
 
-Down Selecting the Sampling Space
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Sometimes it is desirable to run a stock simulation of a subset of what is included in a project.
-For instance one might want to run the simulation only in one climate region or for certain vintages.
-However, it can be a considerable effort to create a new project. Adding the ``downselect`` key to
-the project file permits a user to specify filters of what buildings should be simulated.
-
-Downselecting can be performed in one of two ways: with and without resampling.
-Downselecting with resampling samples twice, once to determine how much smaller the
-set of sampled buildings becomes when it is filtered down and again with a larger sample so
-the final set of sampled buildings is at or near the number specified in ``n_datapoints``.
-
-Downselecting without resampling skips that step. In this case the total sampled buildings returned
-will be the number left over after sampling the entire stock and then filtering down to the
-buildings that meet the criteria. Unlike downselect with resampling, downselect without resampling can be used with
-buildstock.csv too. So, instead of starting with a fresh set of samples and filtering them based on the dowselect logic,
-the sampler starts with the buildstock.csv provided, and filters out the buildings using the downselect logic.
-
-The downselect block works as follows:
-
-.. code-block:: yaml
-
-  downselect:
-    resample: true
-    logic:
-      - Heating Fuel|Natural Gas
-      - Location Region|CR02
-
-For details on how to specify the filters, see :ref:`filtering-logic`.
-
-.. _filtering-logic:
-
-Filtering Logic
-~~~~~~~~~~~~~~~
-
-There are several places where logic is applied to filter simulations by the option values.
-This is done by specifying the parameter|option criteria you want to include or exclude along
-with the appropriate logical operator. This is done in the YAML syntax as follows:
-
-And
-...
-
-To include certain parameter option combinations, specify them in a list or by using the ``and`` key.
-
-.. code-block:: yaml
-
-  - Vintage|1950s
-  - Location Region|CR02
-
-.. code-block:: yaml
-
-  and:
-    - Vintage|1950s
-    - Location Region|CR02
-
-The above example would include buildings in climate region 2 built in the 1950s. A list, except for that inside an
-``or`` block is always interpreted as ``and`` block.
-
-Or
-..
-
-.. code-block:: yaml
-
-  or:
-    - Vintage|<1950
-    - Vintage|1950s
-    - Vintage|1960s
-
-This example would include buildings built before 1970.
-
-Not
-...
-
-.. code-block:: yaml
-
-  not: Heating Fuel|Propane
-
-This will select buildings that does not have Propane Fuel type.
-
-.. code-block:: yaml
-
-    not:
-      - Vintage|1950s
-      - Location Region|CR02
-
-This will select buildings that are not both Vintage 1950s **and** in location region CR02. It should be noted that this
-**will** select buildings of 1950s vintage provided they aren't in region CR02. It will also select buildings in
-location CR02 provided they aren't of vintage 1950s. If only those buildings that are neither of Vintage 1950s nor in
-region CR02 needs to be selected, the following logic should be used:
-
-.. code-block:: yaml
-
-      - not: Vintage|1950s
-      - not: Location Region|CR02
-
-or,
-
-.. code-block:: yaml
-
-      and:
-        - not: Vintage|1950s
-        - not: Location Region|CR02
-
-or,
-
-.. code-block:: yaml
-
-    not:
-      or:
-        - Vintage|1950s
-        - Location Region|CR02
-
-
-Combining Logic
-...............
-
-These constructs can be combined to declare arbitrarily complex logic. Here is an example:
-
-.. code-block:: yaml
-
-  - or:
-    - Vintage|<1950
-    - Vintage|1950s
-    - Vintage|1960s
-  - not: Geometry Garage|3 Car
-  - not: Geometry House Size|3500+
-  - Geometry Stories|1
-
-This will select homes that were built before 1970, don't have three car garages, are less
-than 3500 sq.ft., and have only one storey.
-
 Eagle Configuration
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/samplers/commercial_sobol.rst b/docs/samplers/commercial_sobol.rst
new file mode 100644
index 00000000..0cb5a114
--- /dev/null
+++ b/docs/samplers/commercial_sobol.rst
@@ -0,0 +1,21 @@
+Commercial Sobol Sampler
+------------------------
+
+.. todo::
+
+    Write the description of this sampler.
+
+Configuration Example
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: yaml
+
+    sampler:
+      type: commercial_sobol
+      args:
+        n_datapoints: 350000
+
+Arguments
+~~~~~~~~~
+
+- ``n_datapoints``: The number of datapoints to sample.
diff --git a/docs/samplers/index.rst b/docs/samplers/index.rst
new file mode 100644
index 00000000..10e117de
--- /dev/null
+++ b/docs/samplers/index.rst
@@ -0,0 +1,12 @@
+Samplers
+--------
+
+A sampler is a class thats main function is to enumerate the buildings and building characteristics of a ResStock or ComStock batch simulation. Several samplers are available:
+
+.. toctree::
+   :maxdepth: 1
+
+   residential_quota
+   residential_quota_downselect
+   precomputed
+   commercial_sobol
diff --git a/docs/samplers/precomputed.rst b/docs/samplers/precomputed.rst
new file mode 100644
index 00000000..3d589288
--- /dev/null
+++ b/docs/samplers/precomputed.rst
@@ -0,0 +1,21 @@
+Precomputed Sampler
+-------------------
+
+The Precomputed Sampler provides a way to directly provide buildstockbatch a sample of buildings to simulate. This can be useful for a variety of cases, including where you previously ran sampling for ResStock or ComStock and want to rerun the same set of buildings with a different set of upgrades.
+
+This sampler cannot be used with a downselect (i.e. there is no precomputed downselect sampler). To downselect the buildings in a precomputed sample, simply remove the buildings you don't want to run from the sample file (buildstock.csv). 
+
+Configuration Example
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: yaml
+
+    sampler:
+      type: precomputed
+      args:
+        sample_file: path/to/buildstock.csv
+
+Arguments
+~~~~~~~~~
+
+- ``sample_file``: A csv file containing the building sample--one row per building. The format is that the first column is the building_id, usually starting at one and incrementing from there, and following columns each represent a building characteristic. The characteristic columns expected depend on the workflow generator to be used (ResStock or ComStock).
\ No newline at end of file
diff --git a/docs/samplers/residential_quota.rst b/docs/samplers/residential_quota.rst
new file mode 100644
index 00000000..e91f2aed
--- /dev/null
+++ b/docs/samplers/residential_quota.rst
@@ -0,0 +1,19 @@
+Residential Quota Sampler
+-------------------------
+
+The Residential Quota sampler utilizes a `quota-based sampling method <https://en.wikipedia.org/wiki/Quota_sampling>`_ to determine the buildings to simulate. It is the primary sampling algorithm used in ResStock. 
+
+Configuration Example
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: yaml
+
+    sampler:
+      type: residential_quota
+      args:
+        n_datapoints: 350000
+
+Arguments
+~~~~~~~~~
+
+- ``n_datapoints``: The number of datapoints to sample.
diff --git a/docs/samplers/residential_quota_downselect.rst b/docs/samplers/residential_quota_downselect.rst
new file mode 100644
index 00000000..69b4bf87
--- /dev/null
+++ b/docs/samplers/residential_quota_downselect.rst
@@ -0,0 +1,42 @@
+Residential Quota Downselect Sampler
+------------------------------------
+
+Sometimes it is desirable to run a stock simulation of a subset of what is
+included in a project. For instance one might want to run the simulation only in
+one climate region or for certain vintages. However, it can be a considerable
+effort to create a new project and modify the housing characteristic
+distributions. The Residential Quota Downselect sampler adds a downselection
+capability to the :doc:`residential_quota`. 
+
+Downselecting can be performed in one of two ways: with and without resampling.
+Downselecting with resampling samples twice, once to determine how much smaller
+the set of sampled buildings becomes when it is filtered down and again with a
+larger sample so the final set of sampled buildings is at or near the number
+specified in ``n_datapoints``.
+
+Downselecting without resampling skips that step. In this case the total sampled
+buildings returned will be the number left over after sampling the entire stock
+and then filtering down to the buildings that meet the criteria. 
+
+
+
+Configuration Example
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: yaml
+
+    sampler:
+      type: residential_quota_downselect
+      args:
+        n_datapoints: 350000
+        resample: false
+        logic:
+          - Geometry Building Type RECS|Single-Family Detached
+          - Vacancy Status|Occupied
+
+Arguments
+~~~~~~~~~
+
+- ``n_datapoints``: The number of datapoints to sample.
+- ``logic``: The criteria to apply to remove buildings from the simulation. For details on how to specify the filters, see :ref:`filtering-logic`.
+- ``resample``: boolean, whether to run the sampling twice with a goal to have the downselected number of datapoints be equal to ``n_datapoints``.
diff --git a/docs/workflow_generators/commercial_default.rst b/docs/workflow_generators/commercial_default.rst
new file mode 100644
index 00000000..a4c60fa2
--- /dev/null
+++ b/docs/workflow_generators/commercial_default.rst
@@ -0,0 +1,33 @@
+Commercial Workflow Generator
+-----------------------------
+
+Configuration Example
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: yaml
+
+    workflow_generator:
+      type: commercial_default
+      args:
+        include_qaqc: true
+        measures:
+          - measure_dir_name: MyMeasure
+            arguments:
+              arg1: val1
+              arg2: val2
+          - measure_dir_name: MyOtherMeasure
+            arguments:
+              arg3: val3
+
+
+Arguments
+~~~~~~~~~
+- ``measures`` (optional): Add these optional measures to the end of your workflow.
+
+    - ``measure_dir_name``: Name of measure directory.
+    - ``arguments``: map of key, value arguments to pass to the measure.
+
+- ``include_qaqc``: (optional), when set to ``True`` runs some additional
+  measures that check a number of key (and often incorrectly configured) part of
+  the simulation inputs as well as providing additional model QAQC data streams
+  on the output side. Recommended for test runs but not production analyses.
diff --git a/docs/workflow_generators/index.rst b/docs/workflow_generators/index.rst
new file mode 100644
index 00000000..a72bce29
--- /dev/null
+++ b/docs/workflow_generators/index.rst
@@ -0,0 +1,9 @@
+Workflow Generators
+-------------------
+
+.. toctree::
+    :maxdepth: 1
+
+    residential_default
+    commercial_default
+    
\ No newline at end of file
diff --git a/docs/workflow_generators/residential_default.rst b/docs/workflow_generators/residential_default.rst
new file mode 100644
index 00000000..5fb04d77
--- /dev/null
+++ b/docs/workflow_generators/residential_default.rst
@@ -0,0 +1,71 @@
+Residential Workflow Generator
+------------------------------
+
+Configuration Example
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: yaml
+
+    workflow_generator:
+      type: residential_default
+      args:
+        timeseries_csv_export:
+          reporting_frequency: Hourly
+          include_enduse_subcategories: true
+
+Arguments
+~~~~~~~~~
+
+- ``measures_to_ignore`` (optional): **ADVANCED FEATURE (USE WITH
+  CAUTION--ADVANCED USERS/WORKFLOW DEVELOPERS ONLY)** to optionally
+  not run one or more measures (specified as a list) that are referenced in the
+  options_lookup.tsv but should be skipped during model creation. The measures
+  are referenced by their directory name.
+
+- ``residential_simulation_controls`` update the arguments to the
+  `ResidentialSimulationControls`_ measure. See
+  :ref:`residential-sim-ctrl-defaults` for current defaults.
+
+- ``measures`` (optional): Add these optional measures to the end of your workflow.
+
+    - ``measure_dir_name``: Name of measure directory.
+    - ``arguments``: map of key, value arguments to pass to the measure.
+
+- ``simulation_output`` (optional): optionally include annual totals for end use
+  subcategories (i.e., interior equipment broken out by end use) along with the
+  usual annual simulation results. This argument is passed directly into the
+  `SimulationOutputReport`_ measure in OpenStudio-BuildStock. Please refer to
+  the measure argument there to determine what to set it to in your config file.
+  Note that this measure and presence of any arguments may be different
+  depending on which version of OpenStudio-BuildStock you're using. The best
+  thing you can do is to verify that it works with what is in your branch.
+
+
+- ``timeseries_csv_export`` (optional): include hourly or subhourly results
+  along with the usual annual simulation results. These arguments are passed
+  directly to the `TimeseriesCSVExport`_ measure in resstock. Please refer to
+  the measure arguments there to determine what to set them to in your config
+  file. Note that this measure and arguments may be different depending on which
+  version of OpenStudio-BuildStock you're using. The best thing you can do is to
+  verify that it works with what is in your branch.
+
+- ``reporting_measures`` (optional): a list of reporting measure names to apply
+  additional reporting measures (that require no arguments) to the workflow. Any
+  columns reported by these additional measures will be appended to the results
+  csv. Note: For upgrade runs, do not add ``ApplyUpgrade`` to the list of
+  reporting measures, doing so will cause run to fail prematurely.
+  ``ApplyUpgrade`` is applied automatically when the ``upgrades`` key is supplied.
+
+.. _ResidentialSimulationControls: https://github.com/NREL/OpenStudio-BuildStock/blob/master/measures/ResidentialSimulationControls/measure.xml
+.. _SimulationOutputReport: https://github.com/NREL/OpenStudio-BuildStock/blob/master/measures/SimulationOutputReport/measure.xml
+.. _TimeseriesCSVExport: https://github.com/NREL/resstock/blob/master/measures/TimeseriesCSVExport/measure.xml
+
+.. _residential-sim-ctrl-defaults:
+
+Residential Simulation Control Defaults
+.......................................
+
+.. include:: ../../buildstockbatch/workflow_generator/residential.py
+   :code: python
+   :start-after: res_sim_ctl_args = {
+   :end-before: }
diff --git a/project_resstock_national.yml b/project_resstock_national.yml
index 38794f85..122f4b7d 100644
--- a/project_resstock_national.yml
+++ b/project_resstock_national.yml
@@ -8,7 +8,7 @@ weather_files_path: /shared-projects/buildstock/weather/buildstock_2018.zip  # R
 sampler:
   type: residential_quota_downselect
   args:
-    n_datapoints: 100
+    n_datapoints: 350000
     # Uncomment and set specify logic you you want to downselect to a subset of the building stock
     resample: false
     logic:
@@ -45,28 +45,28 @@ eagle:
     time: 10
     n_workers: 1
 
-# aws:
-#   # The job_identifier should be unique, start with alpha, not include dashes, and limited to 10 chars or data loss can occur
-#   job_identifier: test_proj
-#   s3:
-#     bucket: resbldg-datasets
-#     prefix: testing/user_test
-#   emr:
-#     slave_instance_count: 1
-#   region: us-west-2
-#   use_spot: true
-#   batch_array_size: 10
-#   # To receive email updates on job progress accept the request to receive emails that will be sent from Amazon
-#   notifications_email: user@nrel.gov
+aws:
+  # The job_identifier should be unique, start with alpha, not include dashes, and limited to 10 chars or data loss can occur
+  job_identifier: test_proj
+  s3:
+    bucket: resbldg-datasets
+    prefix: testing/user_test
+  emr:
+    slave_instance_count: 1
+  region: us-west-2
+  use_spot: true
+  batch_array_size: 10
+  # To receive email updates on job progress accept the request to receive emails that will be sent from Amazon
+  notifications_email: user@nrel.gov
 
-# postprocessing:
-#   aggregate_timeseries: true
-#   aws:
-#     region_name: 'us-west-2'
-#     s3:
-#       bucket: resbldg-datasets
-#       prefix: resstock-athena/calibration_runs_new
-#     athena:
-#       glue_service_role: service-role/AWSGlueServiceRole-default
-#       database_name: testing
-#       max_crawling_time: 300 #time to wait for the crawler to complete before aborting it
+postprocessing:
+  aggregate_timeseries: true
+  aws:
+    region_name: 'us-west-2'
+    s3:
+      bucket: resbldg-datasets
+      prefix: resstock-athena/calibration_runs_new
+    athena:
+      glue_service_role: service-role/AWSGlueServiceRole-default
+      database_name: testing
+      max_crawling_time: 300 #time to wait for the crawler to complete before aborting it

From 2f12c9ec27592ccb93a737da395f87a16401a506 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Thu, 28 Jan 2021 10:27:07 -0700
Subject: [PATCH 15/21] cleaning up a few more OpenStudio-BuildStock to
 resstock references

---
 docs/workflow_generators/residential_default.rst | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/docs/workflow_generators/residential_default.rst b/docs/workflow_generators/residential_default.rst
index 5fb04d77..4085d357 100644
--- a/docs/workflow_generators/residential_default.rst
+++ b/docs/workflow_generators/residential_default.rst
@@ -34,19 +34,18 @@ Arguments
 - ``simulation_output`` (optional): optionally include annual totals for end use
   subcategories (i.e., interior equipment broken out by end use) along with the
   usual annual simulation results. This argument is passed directly into the
-  `SimulationOutputReport`_ measure in OpenStudio-BuildStock. Please refer to
+  `SimulationOutputReport`_ measure in resstock. Please refer to
   the measure argument there to determine what to set it to in your config file.
   Note that this measure and presence of any arguments may be different
-  depending on which version of OpenStudio-BuildStock you're using. The best
+  depending on which version of resstock you're using. The best
   thing you can do is to verify that it works with what is in your branch.
 
-
 - ``timeseries_csv_export`` (optional): include hourly or subhourly results
   along with the usual annual simulation results. These arguments are passed
   directly to the `TimeseriesCSVExport`_ measure in resstock. Please refer to
   the measure arguments there to determine what to set them to in your config
   file. Note that this measure and arguments may be different depending on which
-  version of OpenStudio-BuildStock you're using. The best thing you can do is to
+  version of resstock you're using. The best thing you can do is to
   verify that it works with what is in your branch.
 
 - ``reporting_measures`` (optional): a list of reporting measure names to apply
@@ -56,8 +55,8 @@ Arguments
   reporting measures, doing so will cause run to fail prematurely.
   ``ApplyUpgrade`` is applied automatically when the ``upgrades`` key is supplied.
 
-.. _ResidentialSimulationControls: https://github.com/NREL/OpenStudio-BuildStock/blob/master/measures/ResidentialSimulationControls/measure.xml
-.. _SimulationOutputReport: https://github.com/NREL/OpenStudio-BuildStock/blob/master/measures/SimulationOutputReport/measure.xml
+.. _ResidentialSimulationControls: https://github.com/NREL/resstock/blob/master/measures/ResidentialSimulationControls/measure.xml
+.. _SimulationOutputReport: https://github.com/NREL/resstock/blob/master/measures/SimulationOutputReport/measure.xml
 .. _TimeseriesCSVExport: https://github.com/NREL/resstock/blob/master/measures/TimeseriesCSVExport/measure.xml
 
 .. _residential-sim-ctrl-defaults:

From 7a1232b0199ccf13aaea7fb140b02cde92108bb9 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Thu, 28 Jan 2021 11:53:36 -0700
Subject: [PATCH 16/21] updating example project files

---
 aws_demo_project.yml                   | 31 +++++++++--------
 project_resstock_multifamily.yml       | 17 ----------
 project_resstock_national_upgrades.yml | 47 +++++++++++++-------------
 3 files changed, 41 insertions(+), 54 deletions(-)
 delete mode 100644 project_resstock_multifamily.yml

diff --git a/aws_demo_project.yml b/aws_demo_project.yml
index 7a0aee66..acaa8a1d 100644
--- a/aws_demo_project.yml
+++ b/aws_demo_project.yml
@@ -1,33 +1,36 @@
-schema_version: 0.2
-stock_type: residential
+schema_version: '0.3'
 buildstock_directory: ../resstock  # Relative to this file or absolute
 project_directory: project_national  # Relative to buildstock_directory
 output_directory: ../demo_test_outputs
 weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
 
 baseline:
-  n_datapoints: 4  # Comment this line out if using a custom buildstock csv file
   n_buildings_represented: 133172057  # Total number of residential dwelling units in contiguous United States, including unoccupied units, resulting from acensus tract level query of ACS 5-yr 2016 (i.e. 2012-2016), using this script: https://github.com/NREL/resstock-estimation/blob/master/sources/spatial/tsv_maker.py.
-  sampling_algorithm: quota # The default resstock sampling algorithm - use precomputed if using the precomputed_sample option
+
+sampler:
+  type: residential_quota # change to residential_quota_downselect to do downselect
+  args:
+    n_datapoints: 4
+    # resample: false # Uncomment and set specify logic you you want to downselect to a subset of the building stock
+    # logic:
+    #   - Geometry Building Type RECS|Single-Family Detached
+    #   - Vacancy Status|Occupied
+
+workflow_generator:
+  type: residential_default  
+  args:
+    timeseries_csv_export:
+      reporting_frequency: Hourly
+      include_enduse_subcategories: true
 
 upgrades:
   - upgrade_name: Triple-Pane Windows
     options:
       - option: Windows|Low-E, Triple, Non-metal, Air, L-Gain
-#        apply_logic:
         costs:
           - value: 45.77
             multiplier: Window Area (ft^2)
         lifetime: 30
-timeseries_csv_export:
-  reporting_frequency: Hourly
-  include_enduse_subcategories: true
-
-# downselect: # Uncomment and set specify logic you you want to downselect to a subset of the building stock
-#   resample: true
-#   logic:
-#     - Geometry Building Type RECS|Single-Family Detached
-#     - Vacancy Status|Occupied
 
 aws:
   # The job_identifier must be unique, start with alpha, not include dashes, and limited to 10 chars
diff --git a/project_resstock_multifamily.yml b/project_resstock_multifamily.yml
deleted file mode 100644
index 788b74a9..00000000
--- a/project_resstock_multifamily.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-schema_version: 0.2
-buildstock_directory: ../resstock  # Relative to this file or absolute
-project_directory: project_resstock_multifamily  # Relative to buildstock_directory
-weather_files_path: /projects/res_stock/project_resstock_national_weather.zip  # Relative to this file or absolute path to zipped weather files
-baseline:
-  n_datapoints: 10000
-  n_buildings_represented: 80000000
-  sampling_algorithm: quota
-  measures:
-    - measure_dir_name: ZoneMultipliers
-      arguments: {}
-timeseries_csv_export:
-  reporting_frequency: Hourly
-  inc_end_use_subcategories: true
-  inc_output_variables: true
-output_directory: /scratch/nmerket/multifamily10k
-stock_type: residential
diff --git a/project_resstock_national_upgrades.yml b/project_resstock_national_upgrades.yml
index e77b7725..6f54ec9e 100644
--- a/project_resstock_national_upgrades.yml
+++ b/project_resstock_national_upgrades.yml
@@ -1,13 +1,32 @@
-schema_version: 0.2
+schema_version: '0.3'
 buildstock_directory: ../resstock  # Relative to this file or absolute
 project_directory: project_resstock_heterogeneity  # Relative to buildstock_directory
 output_directory: /scratch/nmerket/heterogeneity4
-#weather_files_url: https://s3.amazonaws.com/epwweatherfiles/project_resstock_national.zip
-weather_files_path: /home/nmerket/buildstock/assets/project_resstock_national_weather.zip  # Relative to this file or absolute path to zipped weather files
+# weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
+weather_files_path: /shared-projects/buildstock/weather/buildstock_2018.zip  # Relative to this file or absolute path to zipped weather files
 baseline:
-  n_datapoints: 350000
   n_buildings_represented: 81221016
-  sampling_algorithm: quota
+
+sampler:
+  type: residential_quota
+  args:
+    n_datapoints: 350000
+
+workflow_generator:
+  type: residential_default
+  args:
+    timeseries_csv_export:
+      reporting_frequency: Hourly
+      include_enduse_subcategories: true
+
+eagle:
+  account: enduse
+  n_jobs: 500
+  minutes_per_sim: 2
+  sampling:
+    time: 60
+  postprocessing:
+    time: 300
 
 upgrades:
 
@@ -481,21 +500,3 @@ upgrades:
 #             multiplier: Fixed (1)
 #         lifetime: 24
 # #    package_apply_logic:
-
-timeseries_csv_export:
-  reporting_frequency: Hourly
-  inc_end_use_subcategories: true
-  inc_output_variables: true
-downselect:
-  resample: false
-  logic:
-    - not: Heating Fuel|Natural Gas
-stock_type: residential
-eagle:
-  account: enduse
-  n_jobs: 500
-  minutes_per_sim: 2
-  sampling:
-    time: 60
-  postprocessing:
-    time: 300

From 639cfbfa1f7dd0ea46ac0f909c88a5682347b98a Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 8 Feb 2021 11:29:26 -0700
Subject: [PATCH 17/21] adding sample_file key

---
 docs/changelog/migration_0_20.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/changelog/migration_0_20.rst b/docs/changelog/migration_0_20.rst
index 5aa8fcb7..385303f5 100644
--- a/docs/changelog/migration_0_20.rst
+++ b/docs/changelog/migration_0_20.rst
@@ -134,7 +134,7 @@ New Spec:
     sampler:
       type: precomputed
       args:
-        path/to/buildstock.csv # n_datapoints determined from csv file
+        sample_file: path/to/buildstock.csv # n_datapoints determined from csv file
 
 Workflow Generator Specification
 --------------------------------

From 3b44c8f1801ae48a5f59f2e38fb1c80585aead77 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 8 Feb 2021 11:52:39 -0700
Subject: [PATCH 18/21] pinning to pyarrow<3

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d181e055..be5cff9e 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,7 @@ def run_tests(self):
         'numpy>=1.11',
         'pandas>=1.0.0,!=1.0.4',
         'joblib',
-        'pyarrow>=0.14.1',
+        'pyarrow>=0.14.1,<3.0.0',
         'dask[complete]>=2.1.0',
         'docker',
         'boto3>=1.10.44',

From a9939646eb2ed8a7dab50508b78f6a3964450f33 Mon Sep 17 00:00:00 2001
From: Noel Merket <noel.merket@nrel.gov>
Date: Mon, 8 Feb 2021 15:00:25 -0700
Subject: [PATCH 19/21] yaml file updates for Eric

---
 aws_demo_project.yml                   | 4 ++--
 project_resstock_national.yml          | 5 ++---
 project_resstock_national_upgrades.yml | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/aws_demo_project.yml b/aws_demo_project.yml
index acaa8a1d..caff78cb 100644
--- a/aws_demo_project.yml
+++ b/aws_demo_project.yml
@@ -2,7 +2,7 @@ schema_version: '0.3'
 buildstock_directory: ../resstock  # Relative to this file or absolute
 project_directory: project_national  # Relative to buildstock_directory
 output_directory: ../demo_test_outputs
-weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
+weather_files_url: https://data.nrel.gov/system/files/156/BuildStock_TMY3_FIPS.zip
 
 baseline:
   n_buildings_represented: 133172057  # Total number of residential dwelling units in contiguous United States, including unoccupied units, resulting from acensus tract level query of ACS 5-yr 2016 (i.e. 2012-2016), using this script: https://github.com/NREL/resstock-estimation/blob/master/sources/spatial/tsv_maker.py.
@@ -11,10 +11,10 @@ sampler:
   type: residential_quota # change to residential_quota_downselect to do downselect
   args:
     n_datapoints: 4
-    # resample: false # Uncomment and set specify logic you you want to downselect to a subset of the building stock
     # logic:
     #   - Geometry Building Type RECS|Single-Family Detached
     #   - Vacancy Status|Occupied
+    # resample: false # Uncomment and specify logic you you want to downselect to a subset of the building stock
 
 workflow_generator:
   type: residential_default  
diff --git a/project_resstock_national.yml b/project_resstock_national.yml
index 8ce84a25..522aa414 100644
--- a/project_resstock_national.yml
+++ b/project_resstock_national.yml
@@ -2,14 +2,13 @@ schema_version: '0.3'
 buildstock_directory: ../resstock  # Relative to this file or absolute
 project_directory: project_national  # Relative to buildstock_directory
 output_directory: /scratch/nmerket/national_test_outputs2
-# weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
-weather_files_path: /shared-projects/buildstock/weather/buildstock_2018.zip  # Relative to this file or absolute path to zipped weather files
+# weather_files_url: https://data.nrel.gov/system/files/156/BuildStock_TMY3_FIPS.zip
+weather_files_path: /shared-projects/buildstock/weather/BuildStock_TMY3_FIPS.zip  # Relative to this file or absolute path to zipped weather files
 
 sampler:
   type: residential_quota_downselect
   args:
     n_datapoints: 350000
-    # Uncomment and set specify logic you you want to downselect to a subset of the building stock
     resample: false
     logic:
       - Geometry Building Type RECS|Single-Family Detached
diff --git a/project_resstock_national_upgrades.yml b/project_resstock_national_upgrades.yml
index 6f54ec9e..8de0ebb7 100644
--- a/project_resstock_national_upgrades.yml
+++ b/project_resstock_national_upgrades.yml
@@ -2,8 +2,8 @@ schema_version: '0.3'
 buildstock_directory: ../resstock  # Relative to this file or absolute
 project_directory: project_resstock_heterogeneity  # Relative to buildstock_directory
 output_directory: /scratch/nmerket/heterogeneity4
-# weather_files_url: https://data.nrel.gov/system/files/128/ResStock_TMY3.zip
-weather_files_path: /shared-projects/buildstock/weather/buildstock_2018.zip  # Relative to this file or absolute path to zipped weather files
+# weather_files_url: https://data.nrel.gov/system/files/156/BuildStock_TMY3_FIPS.zip
+weather_files_path: /shared-projects/buildstock/weather/BuildStock_TMY3_FIPS.zip  # Relative to this file or absolute path to zipped weather files
 baseline:
   n_buildings_represented: 81221016
 

From 9fa36bef2f1ead6fb6030431bdd9b3f66d315c63 Mon Sep 17 00:00:00 2001
From: Eric Wilson <eric.wilson@nrel.gov>
Date: Mon, 8 Feb 2021 17:10:52 -0700
Subject: [PATCH 20/21] Comments on intentional bad inputs

---
 .../test/test_inputs/enforce-validate-measures-bad-2.yml      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml b/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml
index 294a860a..f23b8530 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-measures-bad-2.yml
@@ -16,11 +16,11 @@ workflow_generator:
     residential_simulation_controls:
       timesteps_per_hr: 4
       begin_month: 1
-      begin_day_of_month: 1.5
+      begin_day_of_month: 1.5 # intentional bad input data type for unit testing input validation
       end_month: 12
       end_day_of_month: 31
     timeseries_csv_export:
-      reporting_frequency: Huorly
+      reporting_frequency: Huorly # intentionally misspelled for unit testing input validation
     #  include_enduse_subcategories: true
       output_variable:
         - Zone Mean Air Temperature

From c671702ee146c58d8f28ee327b1c317b0d4607d0 Mon Sep 17 00:00:00 2001
From: Eric Wilson <eric.wilson@nrel.gov>
Date: Mon, 8 Feb 2021 17:17:09 -0700
Subject: [PATCH 21/21] Reorder downselect args

---
 buildstockbatch/test/test_inputs/complete-schema.yml          | 2 +-
 .../test/test_inputs/enforce-validate-measures-bad.yml        | 2 +-
 .../test/test_inputs/enforce-validate-measures-good.yml       | 2 +-
 .../test/test_inputs/enforce-validate-options-bad.yml         | 2 +-
 .../test/test_inputs/enforce-validate-options-good.yml        | 2 +-
 .../test/test_inputs/enforce-validate-options-wrong-path.yml  | 2 +-
 docs/changelog/migration_0_20.rst                             | 4 ++--
 docs/samplers/residential_quota_downselect.rst                | 2 +-
 project_resstock_national.yml                                 | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/buildstockbatch/test/test_inputs/complete-schema.yml b/buildstockbatch/test/test_inputs/complete-schema.yml
index 6ef8f29a..0f3ae376 100644
--- a/buildstockbatch/test/test_inputs/complete-schema.yml
+++ b/buildstockbatch/test/test_inputs/complete-schema.yml
@@ -7,8 +7,8 @@ sampler:
   type: residential_quota_downselect
   args:
     n_datapoints: 30
-    resample: False
     logic: HVAC System Heating Natural Gas|Gas Furnace, 60% AFUE
+    resample: False
 workflow_generator:
   type: residential_default
   args:
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-measures-bad.yml b/buildstockbatch/test/test_inputs/enforce-validate-measures-bad.yml
index b863cd4d..6d207dd7 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-measures-bad.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-measures-bad.yml
@@ -8,6 +8,6 @@ baseline:
     - residential_constructions_basement
   sampling_algorithm: quota
 downselect:
-  resample: False
   logic: Vintage|2000s
+  resample: False
 schema_version: 0.2
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-measures-good.yml b/buildstockbatch/test/test_inputs/enforce-validate-measures-good.yml
index 3768dd3e..3eea31d3 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-measures-good.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-measures-good.yml
@@ -9,6 +9,6 @@ baseline:
     - ResidentialConstructionsUnfinishedBasement
     - ResidentialConstructionsFinishedBasement
 downselect:
-  resample: False
   logic: Vintage|2000s
+  resample: False
 schema_version: 0.2
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-options-bad.yml b/buildstockbatch/test/test_inputs/enforce-validate-options-bad.yml
index aff30526..b58eb497 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-options-bad.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-options-bad.yml
@@ -35,6 +35,6 @@ upgrades:
             - Insulation Unfinished Basement|Extra Argument
     package_apply_logic: Vintage|1960s||Vintage|1940s&&Vintage|1980s
 downselect:
-  resample: False
   logic: Invalid Parameter|2000s
+  resample: False
 schema_version: 0.2
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-options-good.yml b/buildstockbatch/test/test_inputs/enforce-validate-options-good.yml
index 7308350b..eddd8c29 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-options-good.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-options-good.yml
@@ -21,6 +21,6 @@ upgrades:
           - Insulation Unfinished Basement|Extra Argument
     package_apply_logic: Vintage|1960s||Vintage|1940s
 downselect:
-  resample: False
   logic: Vintage|2000s
+  resample: False
 schema_version: 0.2
diff --git a/buildstockbatch/test/test_inputs/enforce-validate-options-wrong-path.yml b/buildstockbatch/test/test_inputs/enforce-validate-options-wrong-path.yml
index c4a51109..09e273aa 100644
--- a/buildstockbatch/test/test_inputs/enforce-validate-options-wrong-path.yml
+++ b/buildstockbatch/test/test_inputs/enforce-validate-options-wrong-path.yml
@@ -21,6 +21,6 @@ upgrades:
           - Insulation Unfinished Basement|Extra Argument
     package_apply_logic: Vintage|1960s||Vintage|1940s
 downselect:
-  resample: False
   logic: Vintage|2000s
+  resample: False
 schema_version: 0.2
diff --git a/docs/changelog/migration_0_20.rst b/docs/changelog/migration_0_20.rst
index 385303f5..8ee96f39 100644
--- a/docs/changelog/migration_0_20.rst
+++ b/docs/changelog/migration_0_20.rst
@@ -88,10 +88,10 @@ Old Spec:
       sampling_algorithm: quota
       n_datapoints: 350000
     downselect:
-      resample: false
       logic:
         - Geometry Building Type RECS|Single-Family Detached
         - Vacancy Status|Occupied
+      resample: false
 
 New Spec:
 
@@ -102,10 +102,10 @@ New Spec:
       type: residential_quota_downselect
       args:
         n_datapoints: 350000
-        resample: false
         logic:
           - Geometry Building Type RECS|Single-Family Detached
           - Vacancy Status|Occupied
+        resample: false
 
 Precomputed Sampling
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/samplers/residential_quota_downselect.rst b/docs/samplers/residential_quota_downselect.rst
index 69b4bf87..d0f9b267 100644
--- a/docs/samplers/residential_quota_downselect.rst
+++ b/docs/samplers/residential_quota_downselect.rst
@@ -29,10 +29,10 @@ Configuration Example
       type: residential_quota_downselect
       args:
         n_datapoints: 350000
-        resample: false
         logic:
           - Geometry Building Type RECS|Single-Family Detached
           - Vacancy Status|Occupied
+        resample: false
 
 Arguments
 ~~~~~~~~~
diff --git a/project_resstock_national.yml b/project_resstock_national.yml
index 522aa414..30ea8d5d 100644
--- a/project_resstock_national.yml
+++ b/project_resstock_national.yml
@@ -9,10 +9,10 @@ sampler:
   type: residential_quota_downselect
   args:
     n_datapoints: 350000
-    resample: false
     logic:
       - Geometry Building Type RECS|Single-Family Detached
       - Vacancy Status|Occupied
+    resample: false
 
 workflow_generator:
   type: residential_default