/
Snakefile
116 lines (92 loc) · 2.79 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import shutil
from pathlib import Path
from glob import glob
import geopandas
import irv_datapkg
import pandas
import requests
import shapely
DATAPKG_VERSION = "0.1.0"
# ZENODO_URL = "sandbox.zenodo.org"
ZENODO_URL = "zenodo.org"
BOUNDARIES = irv_datapkg.read_boundaries(Path("."))
BOUNDARY_LU = BOUNDARIES.set_index("CODE_A3")
envvars:
"ZENODO_TOKEN",
def boundary_geom(iso3):
return BOUNDARY_LU.loc[iso3, "geometry"]
#
# Top-level rules
#
rule clean:
shell:
"rm -rf data"
rule all:
input:
expand("data/{ISO3}/datapackage.json", ISO3=BOUNDARIES.CODE_A3),
rule all_uploaded:
input:
expand("zenodo/{ISO3}.deposited", ISO3=BOUNDARIES.CODE_A3),
rule all_published:
input:
expand("zenodo/{ISO3}.published", ISO3=BOUNDARIES.CODE_A3),
#
# Data package
#
rule datapackage:
input:
checksums="data/{ISO3}/md5sum.txt",
output:
json="data/{ISO3}/datapackage.json",
script:
"scripts/generate_datapackage_json.py"
rule checksums:
# input must require all the data package files
# - summary CSVs require multiple TIFFs in turn
input:
"data/{ISO3}/aqueduct_flood.csv",
"data/{ISO3}/gridfinder/grid__{ISO3}.gpkg",
"data/{ISO3}/gridfinder/targets__{ISO3}.tif",
"data/{ISO3}/isimip_heat_drought.csv",
"data/{ISO3}/jrc_ghsl.csv",
"data/{ISO3}/openstreetmap/openstreetmap_rail__{ISO3}.gpkg",
"data/{ISO3}/openstreetmap/openstreetmap_roads-tertiary__{ISO3}.gpkg",
"data/{ISO3}/storm.csv",
"data/{ISO3}/wri_powerplants/wri-powerplants__{ISO3}.gpkg",
output:
checksums="data/{ISO3}/md5sum.txt",
shell:
"""
cd data/{wildcards.ISO3}
md5sum **/*.* | grep "tif\|gpkg" | sort -k 2 > md5sum.txt
"""
rule clip_tiff:
input:
tiff="incoming_data/{DATASET}/{SLUG}.tif",
output:
tiff="data/{ISO3}/{DATASET}/{SLUG}__{ISO3}.tif",
run:
irv_datapkg.crop_raster(input.tiff, output.tiff, boundary_geom(wildcards.ISO3))
rule clip_geopackage:
input:
gpkg="incoming_data/{DATASET}/{SLUG}.gpkg",
output:
gpkg="data/{ISO3}/{DATASET}/{SLUG}__{ISO3}.gpkg",
run:
gdf = geopandas.read_file(input.gpkg, engine="pyogrio")
geom = boundary_geom(wildcards.ISO3)
(xmin, ymin, xmax, ymax) = geom.bounds
clipped = gdf.cx[xmin:xmax, ymin:ymax]
clipped.to_file(
output.gpkg, driver="GPKG", layer=wildcards.SLUG, engine="pyogrio"
)
include: "rules/aqueduct_flood.smk"
include: "rules/gridfinder.smk"
include: "rules/isimip_heat_drought.smk"
include: "rules/jrc_floods.smk"
include: "rules/jrc_ghsl.smk"
include: "rules/openstreetmap.smk"
include: "rules/storm.smk"
include: "rules/wri_powerplants.smk"
include: "rules/zenodo.smk"