Skip to content

Commit

Permalink
Check for presence of files on s3 before initializing glue crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
asparke2 committed May 27, 2020
1 parent 1e01df2 commit 15701e2
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion buildstockbatch/postprocessing.py
Expand Up @@ -417,10 +417,17 @@ def create_athena_tables(aws_conf, tbl_prefix, s3_bucket, s3_prefix):
max_crawling_time = aws_conf.get('athena', {}).get('max_crawling_time', 600)
assert db_name, "athena:database_name not supplied"

# Check that there are files in the s3 bucket before creating and running glue crawler
s3 = boto3.resource('s3')
bucket = s3.Bucket(s3_bucket)
s3_path = f's3://{s3_bucket}/{s3_prefix}'
n_existing_files = len(list(bucket.objects.filter(Prefix=s3_prefix)))
assert n_existing_files > 0, f"There are no files in {s3_path}, cannot create Athena tables using glue crawler"

glueClient = boto3.client('glue', region_name=region_name)
crawlTarget = {
'S3Targets': [{
'Path': f's3://{s3_bucket}/{s3_prefix}',
'Path': s3_path,
'Exclusions': []
}]
}
Expand Down

0 comments on commit 15701e2

Please sign in to comment.