PecanProject · mdietze · Jun 4, 2024 · Mar 7, 2024 · May 16, 2024 · May 16, 2024
diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
@@ -3,36 +3,32 @@
 ##' @author Joshua Ploshay
 ##'
 ##' @param data  In quotes, file path for .rds containing ensemble data.
-##' @param focus_year In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
+##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+##' @param date In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
 ##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
-##' @param covariates In quotes, file path of SpatRaster stack, used as predictors in randomForest. Layers within stack should be named.
-##' @param cords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+##' @param covariates Should be loaded in using 'covariates' instructions in inst folder
 ##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
 ##'
 ##' @description This function uses the randomForest model.
 ##'
 ##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 
 
-NA_downscale <- function(data, cords, covariates, focus_year, C_pool){
-
-  # Read in the covariates and set CRS to EPSG:4326
-  covariates <- terra::rast(covariates) # ADD package to every function
-  terra::crs(covariates) <- "EPSG:4326"
+NA_downscale <- function(data, coords, date, C_pool){
 
   # Read the input data and site coordinates
   input_data <- readRDS(data)
-  site_coordinates <- terra::vect(readr::read_csv(cords), geom=c("lon", "lat"), crs="EPSG:4326")
+  site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")
 
   # Extract the carbon data for the specified focus year
-  index <- which(names(input_data) == focus_year)
+  index <- which(names(input_data) == date)
   data <- input_data[[index]]
   carbon_data <- as.data.frame(t(data[which(names(data) == C_pool)]))
   names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
 
   # Extract predictors from covariates raster using site coordinates
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates)) 
-  predictors <- dplyr::select(predictors, -1)
+  predictors <- dplyr::select(predictors, -ID)
 
   # Combine each ensemble member with all predictors
   ensembles <- list()
@@ -61,25 +57,25 @@ NA_downscale <- function(data, cords, covariates, focus_year, C_pool){
   }
 
   # Train a random forest model for each ensemble member using the training data
-  output <- list()
+  rf_output <- list()
   for (i in 1:length(ensembles)) {
-    output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
-                                data = ensembles[[i]][[1]],
-                                ntree = 1000,
-                                na.action = stats::na.omit,
-                                keep.forest = T,
-                                importance = T)
+    rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
+                                                 data = ensembles[[i]][[1]],
+                                                 ntree = 1000,
+                                                 na.action = na.omit,
+                                                 keep.forest = T,
+                                                 importance = T)
   }
 
   # Generate predictions (maps) for each ensemble member using the trained models
-  maps <- list(ncol(output))
-  for (i in 1:length(output)) {
+  maps <- list(ncol(rf_output))
+  for (i in 1:length(rf_output)) {
     maps[[i]] <- terra::predict(object = covariates,
-                         model = output[[i]],na.rm = T)
+                                model = rf_output[[i]],na.rm = T)
   }
 
   # Organize the results into a single output list
-  downscale_output <- list(ensembles, output, maps)
+  downscale_output <- list(ensembles, rf_output, maps)
 
   # Rename each element of the output list with appropriate ensemble numbers
   for (i in 1:length(downscale_output)) {

diff --git a/modules/assim.sequential/inst/covariates.R b/modules/assim.sequential/inst/covariates.R
@@ -0,0 +1,109 @@
+##' @title Covariate Data Prep
+##' @author Joshua Ploshay
+##'
+##' @description This script lists the code needed to download, resample, and 
+##' stack the covariate data used in the downscale_function.R sript.
+##' 
+##' @return A spatraster object consisting of a stack of maps with labeled layers
+
+#### WorldClim ####
+# WorldClim v2.1 (1970-2000) historical climate data
+# Data can be found at https://www.worldclim.org/data/worldclim21.html
+# Product is a zip file containing 12 .tif files, one for each month
+# Use code below to average the 12 files to obtain one map
+# 2023 REU project used 10 minute spatial resolution
+
+## Solar Radiation (kJ m-2 day-1)
+srad <- terra::rast(list.files(path = "/projectnb/dietzelab/jploshay/pecan_copy/jploshay/10m_srad",
+                               pattern='.tif$',
+                               all.files= T,
+                               full.names= T))
+srad <- terra::app(srad, mean)
+
+
+## Vapor Pressure (kPa)
+vapr <- terra::rast(list.files(path = "/projectnb/dietzelab/jploshay/pecan_copy/jploshay/10m_vapr",
+                               pattern='.tif$',
+                               all.files= T,
+                               full.names= T)) 
+vapr <- terra::app(vapr, mean)
+
+
+## Average Temperature (*C)
+tavg <- terra::rast(list.files(path = "/projectnb/dietzelab/jploshay/pecan_copy/jploshay/avg_temp_prep/WorldClim",
+                               pattern='.tif$',
+                               all.files= T,
+                               full.names= T))
+tavg <- terra::app(tavg, mean)
+
+
+## Total Precipitation (mm)
+prec <- terra::rast(list.files(path = "/projectnb/dietzelab/jploshay/pecan_copy/jploshay/total_prec",
+                               pattern='.tif$',
+                               all.files= T,
+                               full.names= T))
+prec <- terra::app(prec, mean)
+
+
+#### SoilGrids ####
+# geodata::soil_world() pulls data from the SoilGRIDS database
+# More information on pulling different soil data can be found in the geodata 
+# manual https://cran.r-project.org/web/packages/geodata/geodata.pdf
+
+## Soil pH
+phh2o <- geodata::soil_world(var = "phh2o", depth = 5, stat = "mean", path = tempdir()) 
+
+## Soil Nitrogen (g kg-1)
+nitrogen <- geodata::soil_world(var = "nitrogen", depth = 5, stat = "mean", path = tempdir())
+
+## Soil Organic Carbon (g kg-1)
+soc <- geodata::soil_world(var = "soc", depth = 5, stat = "mean", path = tempdir())
+
+## Soil Sand (%)
+sand <- geodata::soil_world(var = "sand", depth = 5, stat = "mean", path = tempdir())
+
+
+#### Land Cover ####
+GLanCE_extract <- function(pattern, path) {
+  files <- list.files(path = "/projectnb/dietzelab/dietze/glance2012/e4ftl01.cr.usgs.gov/MEASURES/GLanCE30.001/2012.07.01", #make this path default
+                      all.files = T,
+                      full.names = T,
+                      pattern)
+  # empty .vrt file path
+  vrtfile <- paste0(tempfile(), ".vrt") 
+  # "connects" tiles together that returns SpatRaster
+  GLanCE_product <- terra::vrt(files, vrtfile, overwrite = T) 
+  return(GLanCE_product)
+}
+
+# Integer identifier for class in the current year
+land_cover <- GLanCE_extract(pattern = "NA_LC.tif$")
+
+
+#### Resample and Stack Maps ####
+# Define the extent to crop the covariates to North America
+NA_extent <- terra::ext(-178.19453125, -10, 7.22006835937502, 83.5996093750001)
+
+# Stack WorldClim maps 
+WorldClim <- c(tavg, srad, prec, vapr)
+
+# Crop WolrdClim stack to North America using North America extent
+NA_WorldClim <- terra::crop(WorldClim, NA_extent)
+names(NA_WorldClim) <- c("tavg", "srad", "prec", "vapr")
+
+# Stack SoilGrids maps 
+SoilGrids <- c(phh2o, nitrogen, soc, sand)
+
+# Crop SoilGrids stack to North America using North America extent
+NA_SoilGrids <- terra::crop(SoilGrids, NA_extent)
+names(NA_SoilGrids) <- c("phh2o", "nitrogen", "soc", "sand")
+
+# Resample SoilGrids to match WorldClim maps
+NA_SoilGrids <- terra::resample(NA_WorldClim, NA_SoilGrids, method = 'bilinear') # made change
+
+# Resample land cover to match WorldClim maps (~25 min)
+land_cover <- terra::resample(land_cover, NA_WorldClim, method = 'near')
+names(land_cover) <- "land_cover"
+
+# Stack all maps 
+covariates <- c(NA_WorldClim, NA_SoilGrids, land_cover)