-
Notifications
You must be signed in to change notification settings - Fork 0
/
02-get-huc12shed.Rmd
310 lines (262 loc) · 11.8 KB
/
02-get-huc12shed.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
---
title: "Watershed delineation"
author: "Kelly Hondula"
date: "3/22/2019"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
# install.packages("nhdR")
# library(nhdR)
# library(FedData)
# if (!require(devtools)) install.packages('devtools')
# devtools::install_github("giswqs/whiteboxR")
# library(whitebox)
# library(raster)
library(igraph)
library(sf)
library(fs)
library(dplyr)
```
# data
Locations of watershed boundary dataset and data for this project
```{r}
data_dir_wbd <- "/nfs/khondula-data/wbd"
data_dir_coastalsheds <- "/nfs/khondula-data/coastalsheds"
```
Download and unzip watershed boundary dataset if not done already
```{r}
# zipfiles <- sprintf("https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/HU2/Shape/WBD_%s_HU2_Shape.zip", stringr::str_pad(1:22, width = 2, pad = "0"))
# destfiles <- sprintf(file.path(data_dir_wbd, "huc_%s.zip"), stringr::str_pad(1:22, width = 2, pad = "0"))
# purrr::walk2(.x = zipfiles, .y = destfiles, .f = ~download.file(url = .x, destfile = .y))
# unzipfiles <- sprintf(file.path(data_dir_wbd, "Shape_%s"), stringr::str_pad(1:22, width = 2, pad = "0"))
# purrr::walk2(.x = destfiles, .y = unzipfiles, .f = ~unzip(.x, exdir = .y))
```
HUC 2 geodatabase file downloaded from https://nrcs.app.box.com/v/gateway/folder/39640323180
```{r}
# download.file(url = "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip", destfile = file.path(data_dir_wbd, "nationalwbd.zip"))
# unzip(file.path(data_dir_wbd, "nationalwbd.zip"), exdir = data_dir_wbd)
```
Read in ncaa points file created with consistent coordinate reference system and project to watershed boundary CRS. Note that SITE_ID column gets converted to SITE_ when writing as Esri shapefile
```{r}
ncaa_coords_sf <- st_read(file.path(data_dir_coastalsheds, "ncaa_coords", "ncaa_coords_wgs84.shp")) %>%
ncaa_coords_sf_prj <- st_transform(ncaa_coords_sf, 4269)
```
# Workflow step by step
Select one site ID to test out workflow
```{r}
ncaa_siteID <- ncaa_coords_sf_prj$SITE_[4]
# ncaa_siteID <- "NCCAGL10-1026"
# ncaa_siteID <- "NCCAGL10-NPS09-096" # lake michigan example
my_ncaa_pt <- filter(ncaa_coords_sf_prj, SITE_ == ncaa_siteID)[1]
mypt_id <- ncaa_siteID
```
find which huc 2 the point is in, or the nearest HUC 2
```{r}
huc2 <- st_read(file.path(data_dir_wbd, "wbdhu2_a_us_september2018.gdb")) %>% st_transform(4269)
my_huc2_mat <- st_contains(huc2, my_ncaa_pt, sparse = FALSE)
my_huc2_id <- which(apply(my_huc2_mat, 1, any))
my_huc2 <- huc2[my_huc2_id,]
my_huc2_id <- my_huc2$HUC2 %>% as.character()
my_huc2_id
if(length(my_huc2_id)==0){
my_huc2 <- huc2[which.min(st_distance(my_ncaa_pt, huc2)),]
my_huc2_id <- my_huc2$HUC2 %>% as.character()
}
my_huc2_id
```
then read in HUC 12s for that watershed
```{r}
huc12_filepath <- file.path(data_dir_wbd, sprintf("Shape_%s/Shape/WBDHU12.shp", my_huc2_id))
huc12 <- st_read(huc12_filepath) %>% st_transform(4269)
my_huc12_mat <- st_contains(huc12, my_ncaa_pt, sparse = FALSE)
my_huc12_id <- which(apply(my_huc12_mat, 1, any))
my_huc12 <- huc12[my_huc12_id,]
my_huc12_id <- my_huc12$HUC12 %>% as.character()
my_huc12_id
if(length(my_huc12_id)==0){
my_huc12 <- huc12[which.min(st_distance(my_ncaa_pt, huc12)),]
my_huc12_id <- my_huc12$HUC12 %>% as.character()
}
```
Convert to an edgelist network to find all upstream HUC12s
```{r}
huc12_edgelist <- huc12 %>% st_drop_geometry() %>% dplyr::select(HUC12, ToHUC)
huc12_network <- huc12_edgelist %>% igraph::graph_from_data_frame()
paths_in <- igraph::all_simple_paths(huc12_network, from = my_huc12_id, mode = "in")
upstream_huc12s <- sapply(paths_in, names) %>% unlist() %>% unique()
upstream_huc12s
```
Filter those from huc 12 dataset, combine with huc 12 the point is in in case there are no upstream hucs, and union into one polygon
```{r}
upstream_huc12s_sf <- dplyr::filter(huc12, HUC12 %in% upstream_huc12s) %>% rbind(my_huc12)
upstream_huc12s_sf_union <- upstream_huc12s_sf %>% st_union()
```
Map
```{r}
leaflet() %>%
addTiles() %>%
# addPolygons(data = my_huc12) %>%
addPolygons(data = upstream_huc12s_sf_union) %>%
addMarkers(data = my_ncaa_pt)
```
save
```{r}
st_write(upstream_huc12s_sf, file.path(data_dir_coastalsheds, "ncaa_huc12sheds",
sprintf("%s_huc12shed.shp", mypt_id)))
st_write(upstream_huc12s_sf_union, file.path(data_dir_coastalsheds, "ncaa_huc12sheds_union",
sprintf("%s_huc12shed_union.shp", mypt_id)))
```
# Workflow function
```{r}
ncaa_siteID <- ncaa_siteIDs_GL[1]
ncaa_siteID <- "NCCAGL10-GLBA10-150"
```
Self-contained function based on a site ID
```{r}
save_huc12shed_for_pt <- function(ncaa_siteID){
# locations for data
data_dir_wbd <- "/nfs/khondula-data/wbd"
data_dir_coastalsheds <- "/nfs/khondula-data/coastalsheds"
# where to save output shapefiles
out_path <- file.path(data_dir_coastalsheds, "ncaa_huc12sheds")
out_path2 <- file.path(data_dir_coastalsheds, "ncaa_huc12sheds_union")
if(!fs::dir_exists(out_path)){fs::dir_create(out_path)}
if(!fs::dir_exists(out_path2)){fs::dir_create(out_path2)}
# read in ncaa coordinates
ncaa_coords_sf_prj <- st_read(file.path(data_dir_coastalsheds, "ncaa_coords", "ncaa_coords_wgs84.shp")) %>%
st_transform(4269) # project coords to WBD prj
# select one point
my_ncaa_pt <- dplyr::filter(ncaa_coords_sf_prj, SITE_ == ncaa_siteID) %>% slice(1)
# mypt_id <- my_ncaa_pt$SITE_
mypt_id <- ncaa_siteID
# identify which huc 2 the point is in
huc2 <- st_read(file.path(data_dir_wbd, "wbdhu2_a_us_september2018.gdb")) %>% st_transform(4269)
my_huc2_mat <- st_contains(huc2, my_ncaa_pt, sparse = FALSE)
my_huc2_id <- which(apply(my_huc2_mat, 1, any))
my_huc2 <- huc2[my_huc2_id,]
my_huc2_id <- my_huc2$HUC2 %>% as.character()
# if point is not within any HUC2s then need to find closest
if(length(my_huc2_id)==0){
my_huc2 <- huc2[which.min(st_distance(my_ncaa_pt, huc2)),]
my_huc2_id <- my_huc2$HUC2 %>% as.character()
}
# read in the corresponding huc 12s
huc12_filepath <- file.path(data_dir_wbd, sprintf("Shape_%s/Shape/WBDHU12.shp", my_huc2_id))
huc12 <- st_read(huc12_filepath) %>% st_transform(4269)
# find which huc 12 point is in
my_huc12_mat <- st_contains(huc12, my_ncaa_pt, sparse = FALSE)
my_huc12_id <- which(apply(my_huc12_mat, 1, any))
my_huc12 <- huc12[my_huc12_id,]
my_huc12_id <- my_huc12$HUC12 %>% as.character()
# if not within HUC12s then find closest
if(length(my_huc12_id)==0){
my_huc12 <- huc12[which.min(st_distance(my_ncaa_pt, huc12)),]
my_huc12_id <- my_huc12$HUC12 %>% as.character()
}
# FOR GREAT LAKES
# variable for whether point is in a great lake huc 12
my_huc12_GL <- my_huc12_id %in% c("041800000300", # Lake superior
"041800000100", # frontal lake superior
"042400000300", # Lake Huron
"040700030401", # frontal lake huron
"041900000002", # Lake Michigan
# "", # frontal lake michigan
"042600000300", # Lake Erie
"041502000200", # Lake Ontario
"041502000100", # Frontal Lake Ontario
"042600000200" # Frontal lake eries
)
if(my_huc12_id %in% c("042600000300", "042600000200")){
# combine lake erie and frontal lake erie
my_huc12 <- st_union(filter(huc12, HUC12 == "042600000300"),
filter(huc12, HUC12 == "042600000200"))
}
if(my_huc12_id %in% c("041502000200", "041502000100")){
# combine lake ontario and frontal lake ontario
my_huc12 <- st_union(filter(huc12, HUC12 == "041502000200"),
filter(huc12, HUC12 == "041502000100"))
}
if(my_huc12_id %in% c("041800000300", "041800000100")){
# combine lake superior and frontal lake superior
my_huc12 <- st_union(filter(huc12, HUC12 == "041800000300"),
filter(huc12, HUC12 == "041800000100"))
}
# if(my_huc12_id %in% c("041900000002", "")){
# # combine lake michigan and frontal lake michigan
# my_huc12 <- st_union(filter(huc12, HUC12 == "041900000002"),
# filter(huc12, HUC12 == ""))
# }
if(my_huc12_id %in% c("042400000300", "040700030401")){
# combine lake huron and frontal lake huron
my_huc12 <- st_union(filter(huc12, HUC12 == "042400000300"),
filter(huc12, HUC12 == "040700030401"))
}
if(my_huc12_GL){
# find the point on the outside of the 10m buffered lake polygon that is closest to the point
my_huc12_ls <- my_huc12 %>%
st_transform(5070) %>%
st_buffer(500) %>% # try to get beyond the frontal watershed
st_transform(4269) %>%
st_cast("POLYGON") %>% st_cast("MULTILINESTRING") # convert to line
shoreline_pt <- st_nearest_points(my_huc12_ls, my_ncaa_pt) %>% st_cast("POINT")
shoreline_pt_sf <- st_sf(id = 1:2, shoreline_pt)[1,]
my_huc12_mat <- st_contains(huc12, shoreline_pt_sf, sparse = FALSE)
my_huc12_id <- which(apply(my_huc12_mat, 1, any))
my_huc12 <- huc12[my_huc12_id,]
my_huc12_id <- my_huc12$HUC12 %>% as.character()
# if shoreline point is not in a huc 12, find closest one
if(length(my_huc12_id)==0){
my_huc12 <- huc12[which.min(st_distance(shoreline_pt_sf, huc12)),]
my_huc12_id <- my_huc12$HUC12 %>% as.character()
}
}
# convert to graph to find all upstream huc 12s
huc12_edgelist <- huc12 %>% st_drop_geometry() %>% dplyr::select(HUC12, ToHUC)
huc12_network <- huc12_edgelist %>% igraph::graph_from_data_frame()
paths_in <- igraph::all_simple_paths(huc12_network, from = my_huc12_id, mode = "in")
upstream_huc12s <- sapply(paths_in, names) %>% unlist() %>% unique()
# filter from huc 12 object and save
upstream_huc12s_sf <- dplyr::filter(huc12, HUC12 %in% upstream_huc12s) %>%
rbind(my_huc12) # combine with original huc12 in case no upstream ones found
upstream_huc12s_sf_union <- upstream_huc12s_sf %>% st_union()
st_write(upstream_huc12s_sf,
file.path(data_dir_coastalsheds, "ncaa_huc12sheds", sprintf("%s_huc12shed.shp", mypt_id)),
delete_dsn = TRUE) # overwrite if needed
st_write(upstream_huc12s_sf_union,
file.path(data_dir_coastalsheds, "ncaa_huc12sheds_union", sprintf("%s_huc12shed_union.shp", mypt_id)),
delete_dsn = TRUE) # overwrite if needed
}
```
Iterate over all points
```{r}
data_dir_coastalsheds <- "/nfs/khondula-data/coastalsheds"
ncaa_coords_sf_prj <- st_read(file.path(data_dir_coastalsheds, "ncaa_coords", "ncaa_coords_wgs84.shp")) %>%
st_transform(4269)
ncaa_siteIDs <- ncaa_coords_sf_prj$SITE_ %>% unique()
save_huc12shed_for_pt(ncaa_siteID = ncaa_siteIDs_GL[10])
# system.time(save_huc12shed_for_pt(ncaa_siteID = ncaa_siteIDs[3]))
# purrr::walk(ncaa_siteIDs, ~save_huc12shed_for_pt(.x))
```
```{r}
# find which points havent beendone yet
huc12shed_shpfiles <- fs::dir_ls(file.path(data_dir_coastalsheds, "ncaa_huc12sheds_union"), glob = "*.shp")
huc12shed_ids <- basename(huc12shed_shpfiles) %>% stringr::str_replace("_huc12shed_union.shp", "")
huc12shed_ids %>% length()
ncaa_siteIDs_toget <- ncaa_siteIDs[!ncaa_siteIDs %in% huc12shed_ids]
ncaa_siteIDs_GL <- grep("GL", ncaa_siteIDs, value = TRUE)
ncaa_siteIDs_toget %>% as.character()
```
Run on cluster
```{r}
library(rslurm)
pars <- data.frame(ncaa_siteID = ncaa_siteIDs_GL, stringsAsFactors = FALSE)
sjob2 <- rslurm::slurm_apply(save_huc12shed_for_pt, pars,
jobname = "GL",
slurm_options = list(partition = "sesync",
time = "12:00:00"),
nodes = 8, cpus_per_node = 2,
submit = TRUE)
print_job_status(sjob2)
# rslurm::cancel_slurm(sjob2)
```