Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: racially insensitive language #205

Merged
merged 12 commits into from Mar 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 1 addition & 3 deletions .circleci/config.yml
Expand Up @@ -2,14 +2,13 @@ version: 2
jobs:
build:
docker:
- image: continuumio/miniconda3
- image: cimg/python:3.8
steps:
- setup_remote_docker
- checkout
- run:
name: Install buildstock
command: |
conda install -c conda-forge -y -q scipy numpy "pandas>=1.0.0,!=1.0.4" "pyarrow>=0.14.1" dask joblib pyyaml "ruamel.yaml>=0.15.0"
pip install .[dev] --progress-bar off
- run:
name: Run PyTest
Expand All @@ -35,7 +34,6 @@ jobs:
name: Build documentation
when: always
command: |
apt-get install make
cd docs
make html
mkdir /tmp/docs
Expand Down
43 changes: 43 additions & 0 deletions .devcontainer/devcontainer.json
@@ -0,0 +1,43 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.158.0/containers/docker-existing-dockerfile
{
"name": "Existing Dockerfile",

// Sets the run context to one level up instead of the .devcontainer folder.
"context": "..",

// Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
"dockerFile": "../Dockerfile",

// Set *default* container specific settings.json values on container create.
"settings": {
"terminal.integrated.shell.linux": null
},

// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python"
],

"workspaceMount": "source=${localWorkspaceFolder},target=/buildstock-batch,type=bind",
"workspaceFolder": "/buildstock-batch",

"mounts": [
"source=${localEnv:HOME}/.aws,target=/root/.aws,type=bind"
]

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Uncomment the next line to run commands after the container is created - for example installing curl.
// "postCreateCommand": "apt-get update && apt-get install -y curl",

// Uncomment when using a ptrace-based debugger like C++, Go, and Rust
// "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ],

// Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker.
// "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ],

// Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root.
// "remoteUser": "vscode"
}
34 changes: 16 additions & 18 deletions Dockerfile
@@ -1,26 +1,24 @@
FROM nrel/openstudio:2.9.1

ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH

RUN apt-get update --fix-missing && \
apt-get install -y wget bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 git mercurial subversion && \
apt-get clean
RUN sudo apt update && \
sudo apt install -y wget build-essential checkinstall libreadline-gplv2-dev libncursesw5-dev libssl-dev \
libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev libffi-dev zlib1g-dev

RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py38_4.8.3-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda clean -tipsy && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc && \
find /opt/conda/ -follow -type f -name '*.a' -delete && \
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
/opt/conda/bin/conda clean -afy
RUN wget https://www.python.org/ftp/python/3.8.8/Python-3.8.8.tgz && \
tar xzf Python-3.8.8.tgz && \
cd Python-3.8.8 && \
./configure --enable-optimizations && \
make altinstall && \
rm -rf Python-3.8.8 && \
rm -rf Python-3.8.8.tgz


RUN conda install -c conda-forge -y pandas dask pyarrow python-dateutil joblib && \
python -m pip install --upgrade pip
RUN sudo apt install -y -V ca-certificates lsb-release && \
wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb && \
sudo apt install -y -V ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb && \
sudo apt update && \
sudo apt install -y -V libarrow-dev libarrow-glib-dev libarrow-dataset-dev libparquet-dev libparquet-glib-dev

COPY . /buildstock-batch/
RUN python -m pip install /buildstock-batch
RUN python3.8 -m pip install /buildstock-batch
9 changes: 6 additions & 3 deletions aws_demo_project.yml
Expand Up @@ -3,14 +3,15 @@ buildstock_directory: ../resstock # Relative to this file or absolute
project_directory: project_national # Relative to buildstock_directory
output_directory: ../demo_test_outputs
weather_files_url: https://data.nrel.gov/system/files/156/BuildStock_TMY3_FIPS.zip
# weather_files_path: ../weather/BuildStock_TMY3_FIPS.zip

baseline:
n_buildings_represented: 133172057 # Total number of residential dwelling units in contiguous United States, including unoccupied units, resulting from acensus tract level query of ACS 5-yr 2016 (i.e. 2012-2016), using this script: https://github.com/NREL/resstock-estimation/blob/master/sources/spatial/tsv_maker.py.

sampler:
type: residential_quota # change to residential_quota_downselect to do downselect
args:
n_datapoints: 4
n_datapoints: 100
# logic:
# - Geometry Building Type RECS|Single-Family Detached
# - Vacancy Status|Occupied
Expand Down Expand Up @@ -39,10 +40,12 @@ aws:
bucket: resbldg-datasets
prefix: testing/external_demo_project
emr:
slave_instance_count: 1
manager_instance_type: m5.xlarge
worker_instance_type: r5.4xlarge
worker_instance_count: 1
region: us-west-2
use_spot: true
batch_array_size: 4
batch_array_size: 100
# To receive email updates on job progress accept the request to receive emails that will be sent from Amazon
notifications_email: user@example.com

Expand Down
102 changes: 82 additions & 20 deletions buildstockbatch/aws/aws.py
Expand Up @@ -14,9 +14,11 @@
import base64
import boto3
from botocore.exceptions import ClientError
import collections
import csv
from fsspec.implementations.local import LocalFileSystem
import gzip
import hashlib
import itertools
from joblib import Parallel, delayed
import json
Expand Down Expand Up @@ -80,6 +82,20 @@ def compress_file(in_filename, out_filename):
shutil.copyfileobj(f_in, f_out)


def calc_hash_for_file(filename):
with open(filename, 'rb') as f:
return hashlib.sha256(f.read()).hexdigest()


def copy_s3_file(src_bucket, src_key, dest_bucket, dest_key):
s3 = boto3.client('s3')
s3.copy(
{'Bucket': src_bucket, 'Key': src_key},
dest_bucket,
dest_key
)


class AwsBatchEnv(AwsJobBase):
"""
Class to manage the AWS Batch environment and Step Function controller.
Expand Down Expand Up @@ -1486,14 +1502,14 @@ def create_emr_cluster_function(self):
{
'Market': 'SPOT' if self.batch_use_spot else 'ON_DEMAND',
'InstanceRole': 'MASTER',
'InstanceType': self.emr_master_instance_type,
'InstanceType': self.emr_manager_instance_type,
'InstanceCount': 1
},
{
'Market': 'SPOT' if self.batch_use_spot else 'ON_DEMAND',
'InstanceRole': 'CORE',
'InstanceType': self.emr_slave_instance_type,
'InstanceCount': self.emr_slave_instance_count
'InstanceType': self.emr_worker_instance_type,
'InstanceCount': self.emr_worker_instance_count
},
],
'Ec2SubnetId': self.priv_vpc_subnet_id_1,
Expand Down Expand Up @@ -1667,8 +1683,8 @@ def validate_instance_types(project_file):
ec2 = boto3_session.client('ec2')
job_base = AwsJobBase('genericjobid', aws_config, boto3_session)
instance_types_requested = set()
instance_types_requested.add(job_base.emr_master_instance_type)
instance_types_requested.add(job_base.emr_slave_instance_type)
instance_types_requested.add(job_base.emr_manager_instance_type)
instance_types_requested.add(job_base.emr_worker_instance_type)
inst_type_resp = ec2.describe_instance_type_offerings(Filters=[{
'Name': 'instance-type',
'Values': list(instance_types_requested)
Expand Down Expand Up @@ -1775,7 +1791,7 @@ def run_batch(self):
buildstock_csv_filename = self.sampler.run_sampling()

# Compress and upload assets to S3
with tempfile.TemporaryDirectory() as tmpdir, tempfile.TemporaryDirectory() as tmp_weather_dir:
with tempfile.TemporaryDirectory(prefix='bsb_') as tmpdir, tempfile.TemporaryDirectory(prefix='bsb_') as tmp_weather_dir: # noqa: E501
self._weather_dir = tmp_weather_dir
self._get_weather_files()
tmppath = pathlib.Path(tmpdir)
Expand All @@ -1786,17 +1802,32 @@ def run_batch(self):
tar_f.add(buildstock_path / 'measures', 'measures')
tar_f.add(buildstock_path / 'resources', 'lib/resources')
tar_f.add(project_path / 'housing_characteristics', 'lib/housing_characteristics')
logger.debug('Compressing weather files')

# Weather files
weather_path = tmppath / 'weather'
os.makedirs(weather_path)

# Determine the unique weather files
epw_filenames = list(filter(lambda x: x.endswith('.epw'), os.listdir(self.weather_dir)))
logger.debug('Calculating hashes for weather files')
epw_hashes = Parallel(n_jobs=-1, verbose=9)(
delayed(calc_hash_for_file)(pathlib.Path(self.weather_dir) / epw_filename)
for epw_filename in epw_filenames
)
unique_epws = collections.defaultdict(list)
for epw_filename, epw_hash in zip(epw_filenames, epw_hashes):
unique_epws[epw_hash].append(epw_filename)

# Compress unique weather files
logger.debug('Compressing weather files')
Parallel(n_jobs=-1, verbose=9)(
delayed(compress_file)(
pathlib.Path(self.weather_dir) / epw_filename,
str(weather_path / epw_filename) + '.gz'
pathlib.Path(self.weather_dir) / x[0],
str(weather_path / x[0]) + '.gz'
)
for epw_filename
in filter(lambda x: x.endswith('.epw'), os.listdir(self.weather_dir))
for x in unique_epws.values()
)

logger.debug('Writing project configuration for upload')
with open(tmppath / 'config.json', 'wt', encoding='utf-8') as f:
json.dump(self.cfg, f)
Expand Down Expand Up @@ -1855,6 +1886,22 @@ def run_batch(self):
logger.debug('Uploading files to S3')
upload_directory_to_s3(tmppath, self.cfg['aws']['s3']['bucket'], self.cfg['aws']['s3']['prefix'])

# Copy the non-unique weather files on S3
epws_to_copy = []
for epws in unique_epws.values():
# The first in the list is already up there, copy the rest
for filename in epws[1:]:
epws_to_copy.append((
f"{self.cfg['aws']['s3']['prefix']}/weather/{epws[0]}.gz",
f"{self.cfg['aws']['s3']['prefix']}/weather/{filename}.gz"
))

logger.debug('Copying weather files on S3')
bucket = self.cfg['aws']['s3']['bucket']
Parallel(n_jobs=-1, verbose=9)(
delayed(copy_s3_file)(bucket, src, bucket, dest) for src, dest in epws_to_copy
)

# Create the output directories
fs = S3FileSystem()
for upgrade_id in range(len(self.cfg.get('upgrades', [])) + 1):
Expand Down Expand Up @@ -1883,7 +1930,7 @@ def run_batch(self):
job_env_cfg = self.cfg['aws'].get('job_environment', {})
batch_env.create_job_definition(
image_url,
command=['python3', '-m', 'buildstockbatch.aws.aws'],
command=['python3.8', '-m', 'buildstockbatch.aws.aws'],
vcpus=job_env_cfg.get('vcpus', 1),
memory=job_env_cfg.get('memory', 1024),
env_vars=env_vars
Expand Down Expand Up @@ -1946,19 +1993,34 @@ def run_job(cls, job_id, bucket, prefix, job_name, region):

logger.debug('Getting weather files')
weather_dir = sim_dir / 'weather'
os.makedirs(weather_dir)
building_ids = [x[0] for x in jobs_d['batch']]
os.makedirs(weather_dir, exist_ok=True)

# Make a lookup of which parameter points to the weather file from options_lookup.tsv
with open(sim_dir / 'lib' / 'resources' / 'options_lookup.tsv', 'r', encoding='utf-8') as f:
tsv_reader = csv.reader(f, delimiter='\t')
next(tsv_reader) # skip headers
param_name = None
epws_by_option = {}
for row in tsv_reader:
row_has_epw = [x.endswith('.epw') for x in row[2:]]
if sum(row_has_epw):
if row[0] != param_name and param_name is not None:
raise RuntimeError(f'The epw files are specified in options_lookup.tsv under more than one parameter type: {param_name}, {row[0]}') # noqa: E501
epw_filename = row[row_has_epw.index(True) + 2].split('=')[1]
param_name = row[0]
option_name = row[1]
epws_by_option[option_name] = epw_filename

# Look through the buildstock.csv to find the appropriate location and epw
epws_to_download = set()
building_ids = [x[0] for x in jobs_d['batch']]
with open(sim_dir / 'lib' / 'housing_characteristics' / 'buildstock.csv', 'r', encoding='utf-8') as f:
csv_reader = csv.DictReader(f)
loc_col = 'Location Weather Filename'
if loc_col not in csv_reader.fieldnames:
loc_col = 'Location'
for row in csv_reader:
if int(row['Building']) in building_ids:
epws_to_download.add(row[loc_col])
if loc_col == 'Location':
epws_to_download = map('USA_{}.epw'.format, epws_to_download)
epws_to_download.add(epws_by_option[row[param_name]])

# Download the epws needed for these simulations
for epw_filename in epws_to_download:
with io.BytesIO() as f_gz:
logger.debug('Downloading {}.gz'.format(epw_filename))
Expand Down
6 changes: 3 additions & 3 deletions buildstockbatch/aws/awsbase.py
Expand Up @@ -177,9 +177,9 @@ def __init__(self, job_identifier, aws_config, boto3_session):

# EMR
emr_config = aws_config.get('emr', {})
self.emr_master_instance_type = emr_config.get('master_instance_type', 'm5.4xlarge')
self.emr_slave_instance_type = emr_config.get('slave_instance_type', 'r5.4xlarge')
self.emr_slave_instance_count = emr_config.get('slave_instance_count', 4)
self.emr_manager_instance_type = emr_config.get('manager_instance_type', 'm5.4xlarge')
self.emr_worker_instance_type = emr_config.get('worker_instance_type', 'r5.4xlarge')
self.emr_worker_instance_count = emr_config.get('worker_instance_count', 4)
self.emr_cluster_security_group_name = f'{self.job_identifier}_emr_security_group'
self.emr_cluster_name = f'{self.job_identifier}_emr_dask_cluster'
self.emr_job_flow_role_name = f'{self.job_identifier}_emr_job_flow_role'
Expand Down
3 changes: 2 additions & 1 deletion buildstockbatch/aws/s3_assets/bootstrap-dask-custom
Expand Up @@ -41,8 +41,9 @@ conda install \
python=3.7 \
"dask-yarn>=0.7.0" \
"pandas>=1.0.0,!=1.0.4" \
"pyarrow>=0.14.1" \
"pyarrow>=3.0.0" \
"s3fs>=0.4.2,<0.5.0" \
"numpy>=1.20.0" \
conda-pack \
tornado=5

Expand Down
3 changes: 2 additions & 1 deletion buildstockbatch/aws/s3_assets/setup_postprocessing.py
Expand Up @@ -10,6 +10,7 @@
's3fs>=0.4.2,<0.5.0',
'boto3',
'pandas>=1.0.0,!=1.0.4',
'pyarrow>=0.14.1',
'pyarrow>=3.0.0',
'numpy>=1.20.0'
]
)
6 changes: 3 additions & 3 deletions buildstockbatch/schemas/v0.3.yaml
Expand Up @@ -31,9 +31,9 @@ aws-job-environment:
memory: int(min=1024, required=False)

aws-emr-spec:
master_instance_type: str(required=False)
slave_instance_type: str(required=False)
slave_instance_count: int(min=1, required=False)
manager_instance_type: str(required=False)
worker_instance_type: str(required=False)
worker_instance_count: int(min=1, required=False)
dask_worker_vcores: int(min=1, required=False)

hpc-spec:
Expand Down
1 change: 0 additions & 1 deletion buildstockbatch/workflow_generator/base.py
Expand Up @@ -20,7 +20,6 @@ class WorkflowGeneratorBase(object):
def __init__(self, cfg, n_datapoints):
self.cfg = cfg
self.n_datapoints = n_datapoints
self.validate(self.cfg)

def create_osw(self, sim_id, building_id, upgrade_idx):
"""
Expand Down
2 changes: 1 addition & 1 deletion create_eagle_env.sh
Expand Up @@ -26,7 +26,7 @@ MY_CONDA_PREFIX="$CONDA_ENVS_DIR/$MY_CONDA_ENV_NAME"
echo "Creating $MY_CONDA_PREFIX"
module load conda
conda remove -y --prefix "$MY_CONDA_PREFIX" --all
conda create -y --prefix "$MY_CONDA_PREFIX" -c conda-forge "pyarrow>=0.14" python=3.7.8 "pandas>=1.0.0,!=1.0.4" dask distributed ruby
conda create -y --prefix "$MY_CONDA_PREFIX" -c conda-forge "pyarrow>=3.0.0" python=3.7.8 "numpy>=1.20.0" "pandas>=1.0.0,!=1.0.4" dask distributed ruby
source activate "$MY_CONDA_PREFIX"
pip install --upgrade pip
if [ $DEV -eq 1 ]
Expand Down