Skip to content

Latest commit

 

History

History
719 lines (547 loc) · 27.5 KB

S04_preparation_of_data_for_modelling.md

File metadata and controls

719 lines (547 loc) · 27.5 KB
title author date output
Preparation of data for modelling
Florencia Grattarola
2022-09-08
html_document
keep_md toc highlight theme number_sections
true
true
pygments
flatly
true

Analysis for the Herpailurus yagouaroundi, with the covariates bio7, bio15, npp, and elev.

Species data (PA & PO)

  • R Libraries
library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)
  • Basemaps
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
Latam_projected <- st_read('data/Latam_vector.shp', quiet = T)
Latam_countries <- sf::st_read('data/Latam_vector_countries.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)
Latam <- st_union(st_make_valid(Latam_projected))
Latam.raster <- terra::rast('data/Latam_raster.tif')
Latam.raster.countries <- terra::rast('data/Latam_raster_countries.tif')

# IUCN range distribution
yaguarundi_IUCN <- sf::st_read('big_data/yaguarundi_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(Latam_countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(yaguarundi_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

  • PA and PO datasets
data_carnivores_PO <- read_csv('data/data_PO.csv')
data_carnivores_PA <- read_csv('data/data_PA.csv')

Check data for each years on example species

# Presence-only
data_carnivores_PO %>% filter(species == 'Herpailurus yagouaroundi') %>% filter(year>=2000) %>% 
  mutate(period=ifelse(year<=2013, 'first period: 2000-2013', 'second period: 2014-2021')) %>%
  group_by(species, period) %>% count() %>% pivot_wider(names_from = 'period', values_from = n) %>% kable()
species first period: 2000-2013 second period: 2014-2021
Herpailurus yagouaroundi 141 286
# Presence-absence
data_carnivores_PA %>% filter(species == 'Herpailurus yagouaroundi' & presence ==1) %>% 
  mutate(period=ifelse(year(date_start)<=2013, 'first period: 2000-2013', 'second period: 2014-2021')) %>%
  group_by(species, period) %>% count() %>% pivot_wider(names_from = 'period', values_from = n) %>% kable()
species first period: 2000-2013 second period: 2014-2021
Herpailurus yagouaroundi 324 290

Presence-absence data

Create blobs.

Blobs contain data on total effort, total temporal span, a list of the records IDs that are included for the blob and an area size of the combined areas.

data_PA_GIS <- data_carnivores_PA %>% 
  filter(species == 'Herpailurus yagouaroundi') %>% 
  mutate(year=lubridate::year(date_end)) %>% 
  filter(year>=2000, year<=2021) %>% 
  as.data.frame() %>% 
  sf::st_as_sf(coords=c('decimalLongitude', 'decimalLatitude')) %>% 
  st_set_crs(4326) 

PA <- st_transform(data_PA_GIS, crs=equalareaCRS)

# create a blob for all sites
# blobs are definded as areas where some sampling was done over the entire period
periodStart <- ymd('2000-01-01')
periodEnd <- ymd('2014-01-01')
periodMax <- ymd('2020-12-31')

PA_allsites <- PA %>%
  mutate(start_date=as_date(ifelse(date_end-date_start<0, date_end, date_start)),
         end_date=as_date(ifelse(date_end-date_start<0, date_start, date_end))) %>%
  mutate(span=interval(date_start, date_end),
         target= interval(periodEnd, today()),
         period=ifelse(span %within% target, 'time2', 'time1')) %>% 
  distinct(independentLocation, period, .keep_all = T) 

PA_allsites_buff <- st_buffer(PA_allsites, sqrt(PA_allsites$area_m2/pi))

blobs_time1 <- PA_allsites_buff %>% filter(period=='time1') %>% 
  st_union() %>% st_cast('POLYGON') %>% st_sf('ID'= 1:length(.)) 
blobs_time2 <- PA_allsites_buff %>% filter(period=='time2') %>% 
  st_union() %>% st_cast('POLYGON') %>% st_sf('ID'= 1:length(.)) 


# to calculate the effort in days for each blob, we need to differenciate the two periods
PA_allsites_effort_and_time  <- PA %>%
  mutate(start_date=as_date(ifelse(date_end-date_start<0, date_end, date_start)),
         end_date=as_date(ifelse(date_end-date_start<0, date_start, date_end))) %>%
  mutate(span=interval(date_start, date_end),
         target= interval(periodEnd, today()),
         period=ifelse(span %within% target, 'time2', 'time1')) %>% 
  group_by(independentLocation, independentYearSpan, area_m2) %>%
  summarise(effort=max(effort),
            period=paste(unique(period)),
            recordIDs=paste(unique(recordID), collapse = ';'),
            maxEndDate=max(end_date),
            minStartDate=min(start_date)) %>% 
  mutate(maxTemporalSpan= maxEndDate-minStartDate)

# to calculate the effort in days for each blob, we need to differenciate the two periods
blobs_efforttime_time1 <- st_join(blobs_time1, 
                            PA_allsites_effort_and_time %>% filter(period=='time1'),
                            left=TRUE)  %>% 
  group_by(ID) %>% 
  summarise(effort=sum(effort),
            temporalSpan=sum(maxTemporalSpan),
            recordIDs=paste(unique(recordIDs), collapse = ';')) %>% 
  mutate(blobArea=st_area(.)) 


blobs_efforttime_time2 <- st_join(blobs_time2, 
                                PA_allsites_effort_and_time %>% filter(period=='time2'),
                                left=TRUE)  %>% 
  group_by(ID) %>% 
  summarise(effort=sum(effort),
            temporalSpan=sum(maxTemporalSpan),
            recordIDs=paste(unique(recordIDs), collapse = ';')) %>% 
  mutate(blobArea=st_area(.))

Calculate PA blobs

# function  to create blobs for time1 and time2 for each species
calculate_PA_sp_blobs <- function(PA, blobs, sp, time){
  periodStart <- ymd('2000-01-01')
  periodEnd <- ymd('2014-01-01')
  df_period <- PA %>% filter(species==sp & presence==1) %>% 
    filter(date_start>=periodStart) %>% 
    mutate(span=interval(date_start, date_end),
           target=interval(periodEnd, today()),
           period=ifelse(span %within% target, 'time2', 'time1')) %>% 
    filter(period==time) %>% select(species, presence)
  
  df_period_blobs <- st_join(blobs, df_period,
                             left=TRUE, join = st_contains) %>%
    group_by(ID) %>% 
    summarise(presence=max(presence),
              temporalSpan=max(temporalSpan),
              effort=max(effort),
              blobArea=max(blobArea)) %>% 
    mutate(presence=ifelse(is.na(presence), 0, presence))
  return(df_period_blobs)
}

# Herpailurus yagouaroundi
PA_hyagouaroundi_time1_blobs <- calculate_PA_sp_blobs(PA, blobs_efforttime_time1, 'Herpailurus yagouaroundi', 'time1')
PA_hyagouaroundi_time2_blobs <- calculate_PA_sp_blobs(PA, blobs_efforttime_time2, 'Herpailurus yagouaroundi', 'time2')

Plots

data_PA_time1_time2 <- rbind(PA_hyagouaroundi_time1_blobs %>% mutate(period='time1'), 
                             PA_hyagouaroundi_time2_blobs %>% mutate(period='time2'))

gg_PA_time1 <- ggplot() + 
    geom_sf(data=Latam_countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=yaguarundi_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PA_time1_time2 %>% filter(period=='time1') %>% st_buffer(20000), 
            aes(fill=factor(presence)), col=NA, show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(yaguarundi_IUCN)
    scale_fill_manual(values = c("#474545","#E31A1C"))+
    scale_color_manual(values = c("#474545","#E31A1C")) +
    labs(title='PA', subtitle='time1: 2000-2013', col='') +
    theme_bw() +    
    theme(text=element_text(size = 14)) 

gg_PA_time2 <- ggplot() + 
    geom_sf(data=Latam_countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=yaguarundi_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PA_time1_time2 %>% filter(period=='time2') %>% st_buffer(20000), 
            aes(fill=factor(presence), col=factor(presence)), show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(yaguarundi_IUCN)
    scale_fill_manual(values = c("#434445","#1F78B4"))+
    scale_color_manual(values = c("#434445","#1F78B4")) +
    labs(title='', subtitle='time2: 2014 to 2021', col='') +
    theme_bw() +    
    theme(text=element_text(size = 14)) 

gg_PA_time1 + gg_PA_time2

# ggsave(plot = gg_PA_time1 + gg_PA_time2, 
#        filename='docs/figs/PA_data.svg', device = 'svg', width=10, height=10, dpi=300)

Presence-only data

Calculate PO rasters

# rasterisation of the presence-only data
data_PO_GIS <- data_carnivores_PO %>% 
  filter(species == 'Herpailurus yagouaroundi') %>% 
  mutate(year=lubridate::year(eventDate)) %>% 
  filter(year>=2000, year<=2021) %>% 
  as.data.frame() %>% 
  sf::st_as_sf(coords=c('decimalLongitude', 'decimalLatitude')) %>% 
  st_set_crs(4326) 

PO <- st_transform(data_PO_GIS, crs=equalareaCRS)

# functions to create presence-only rasters for time1 and time2 for each species
calculate_PO_sp_period_raster <- function(PO, sp, raster, time){
  periodStart <- 2000
  periodEnd <- 2014
  df_period <- PO %>% filter(species==sp & !is.na(year)) %>% 
    filter(year>=periodStart) %>% 
    mutate(period=ifelse(year>=periodEnd , 'time2', 'time1'),
           occurrence=1) %>% filter(period==time)
  df_period_raster <- terra::rasterize(x = vect(df_period),
                                       y = raster,
                                       field = 'occurrence',
                                       fun = 'length',
                                       sum = T, background=0) %>% mask(., raster)
  
  df_country_raster <- terra::rasterize(x = vect(df_period),
                                       y = raster,
                                       field = 'countryCode',
                                       fun = 'first') %>% mask(., raster)
  
  names(df_period_raster) <- 'count'
  names(df_country_raster) <- 'country'
  df_raster <- c(df_period_raster, df_country_raster)
  return(df_raster)
}

# Herpailurus yagouaroundi
PO_hyagouaroundi_time1_raster <- calculate_PO_sp_period_raster(PO, 'Herpailurus yagouaroundi', Latam.raster, 'time1')
PO_hyagouaroundi_time2_raster <- calculate_PO_sp_period_raster(PO, 'Herpailurus yagouaroundi', Latam.raster, 'time2')

Plots

data_PO_time1_time2 <- data_PO_GIS %>% 
  mutate(period=ifelse(year>=2014 , 'time2', 'time1'), occurrence=1) 

gg_PO_time1 <- ggplot() + 
    geom_sf(data=Latam_countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=yaguarundi_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PO_time1_time2 %>% filter(period=='time1'), col="#E31A1C", size=0.5, show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(yaguarundi_IUCN)
    labs(title='PO time1: 2000-2013', col='') +
    theme_bw() +
    theme(text=element_text(size = 14))

gg_PO_time2 <- ggplot() + 
    geom_sf(data=Latam_countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=yaguarundi_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PO_time1_time2 %>% filter(period=='time2'), col="#1F78B4", size=0.5, show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(yaguarundi_IUCN)
    labs(title='PO time2: 2014 to 2021', col='') +
    theme_bw() +
    theme(text=element_text(size = 14))
    
gg_PO_time1 + gg_PO_time2

# ggsave(plot = gg_PO_time1 + gg_PO_time2, 
#        filename='docs/figs/PO_data.svg', device = 'svg', width=10, height=10, dpi=300)

Covariates data

  • bio7: Temperature Annual Range
  • bio15: Precipitation Seasonality
  • npp: Net Primary Production (NPP)
  • elev: Elevation
bio <- rast('big_data/bio_high.tif')
elev <- rast('big_data/elev_high.tif')
npp <- rast('big_data/npp_high.tif')
npp <- resample(npp, bio, method='bilinear') 
## 
|---------|---------|---------|---------|
=========================================
                                          
names(npp) <- 'npp'

# environmental layers for PA
env <- c(bio[[c('bio_7','bio_15')]], elev, npp) %>% 
  classify(cbind(NaN, NA))
## 
|---------|---------|---------|---------|
=========================================
                                          
# resample and scaled values for PO
env_100k_resampled <- terra::resample(env, Latam.raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% scale() # scaled values to 0 mean and sd of 1

Thinning parametres

  • acce: Accessibility
  • country: Country of origin
acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Data for the model

Presence-only data

# calculate area in each raster cell
PO_hyagouaroundi.area <- terra::cellSize(PO_hyagouaroundi_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_hyagouaroundi.coords <- terra::xyFromCell(PO_hyagouaroundi_time1_raster, cell=1:ncell(PO_hyagouaroundi_time1_raster)) %>% as.data.frame()
names(PO_hyagouaroundi.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_hyagouaroundi.cell <- 1:ncell(PO_hyagouaroundi_time1_raster)

# with the environmental variables values for each cell
PO_hyagouaroundi.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_hyagouaroundi.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_hyagouaroundi_time1.count <- PO_hyagouaroundi_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_hyagouaroundi_time2.count <- PO_hyagouaroundi_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_hyagouaroundi.countries <- Latam.raster.countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_hyagouaroundi_time1.model.data <- data.frame(PO_hyagouaroundi.coords,
                                            pixel = PO_hyagouaroundi.cell[],
                                            area = PO_hyagouaroundi.area,
                                            pt.count = PO_hyagouaroundi_time1.count,
                                            env = PO_hyagouaroundi.env,
                                            acce = PO_hyagouaroundi.acce[], 
                                            country = PO_hyagouaroundi.countries[])

PO_hyagouaroundi_time2.model.data <- data.frame(PO_hyagouaroundi.coords,
                                            pixel = PO_hyagouaroundi.cell[],
                                            area = PO_hyagouaroundi.area,
                                            pt.count = PO_hyagouaroundi_time2.count,
                                            env = PO_hyagouaroundi.env,
                                            acce = PO_hyagouaroundi.acce[], 
                                            country = PO_hyagouaroundi.countries[])

Check data

head(PO_hyagouaroundi_time1.model.data) %>% kable()
X Y pixel area count env.bio_7 env.bio_15 env.elev env.npp acce country
-4357686 3867905 1 NA NA NA NA NA NA NA NA
-4257686 3867905 2 NA NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 0.9201098 0.2185891 -0.0964722 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 0.2354014 1.7084783 -0.2009575 -1.597718 0.0209173 47
-3957686 3867905 5 NA NA -0.2380332 2.4191863 -0.4685130 -1.786366 0.0270081 NA
-3857686 3867905 6 NA NA -0.2966836 2.3485775 -0.2928835 -1.858230 0.0456958 NA
head(PO_hyagouaroundi_time2.model.data) %>% kable()
X Y pixel area count env.bio_7 env.bio_15 env.elev env.npp acce country
-4357686 3867905 1 NA NA NA NA NA NA NA NA
-4257686 3867905 2 NA NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 0.9201098 0.2185891 -0.0964722 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 0.2354014 1.7084783 -0.2009575 -1.597718 0.0209173 47
-3957686 3867905 5 NA NA -0.2380332 2.4191863 -0.4685130 -1.786366 0.0270081 NA
-3857686 3867905 6 NA NA -0.2966836 2.3485775 -0.2928835 -1.858230 0.0456958 NA

Presence-absence data

# calculate area, coordinates, and point count in each raster cell
PA_hyagouaroundi.area.time1 <- as.numeric(PA_hyagouaroundi_time1_blobs$blobArea) 
PA_hyagouaroundi.area.time2 <- as.numeric(PA_hyagouaroundi_time2_blobs$blobArea) 

PA_hyagouaroundi.coords.time1 <- st_coordinates(st_centroid(PA_hyagouaroundi_time1_blobs)) %>% as_tibble()
PA_hyagouaroundi.coords.time2 <- st_coordinates(st_centroid(PA_hyagouaroundi_time2_blobs)) %>% as_tibble()

PA_hyagouaroundi.pixel.time1 <- terra::cells(PO_hyagouaroundi_time1_raster, vect(st_centroid(PA_hyagouaroundi_time1_blobs))) 
PA_hyagouaroundi.pixel.time2 <- terra::cells(PO_hyagouaroundi_time2_raster, vect(st_centroid(PA_hyagouaroundi_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_hyagouaroundi.env.time1 <- terra::extract(x = env, y = vect(PA_hyagouaroundi_time1_blobs), fun = mean, rm.na=T) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_hyagouaroundi.env.time2 <- terra::extract(x = env, y = vect(PA_hyagouaroundi_time2_blobs), fun = mean, rm.na=T) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_hyagouaroundi_time1.model.data <- data.frame(pixel = PA_hyagouaroundi.pixel.time1, 
                                            PA_hyagouaroundi.coords.time1,
                                            area = PA_hyagouaroundi.area.time1,
                                            presabs = PA_hyagouaroundi_time1_blobs$presence,
                                            effort = PA_hyagouaroundi_time1_blobs$effort,
                                            env = PA_hyagouaroundi.env.time1[,2:5])

PA_hyagouaroundi_time2.model.data <- data.frame(pixel = PA_hyagouaroundi.pixel.time2, 
                                            PA_hyagouaroundi.coords.time2,
                                            area = PA_hyagouaroundi.area.time2,
                                            presabs = PA_hyagouaroundi_time2_blobs$presence,
                                            effort = PA_hyagouaroundi_time2_blobs$effort,
                                            env = PA_hyagouaroundi.env.time2[,2:5])

Check data

head(PA_hyagouaroundi_time1.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_7 env.bio_15 env.elev env.npp
1 1661 -1768526 1918184 241873943 0 1043 -0.2487598 0.7283218 -0.4777685 0.7591617
2 1660 -1844290 1927648 537081878 1 2404 0.0712062 1.4144742 -0.9077747 0.3141044
3 1661 -1731415 1940145 2241827859 1 11078 -0.2541284 0.7591892 -0.6424854 0.8065265
4 1660 -1820735 1948513 162791738 1 1291 -0.0196676 1.2459454 -0.6135484 0.5779474
5 1661 -1782702 1959695 756798182 0 1779 -0.1176465 0.9874095 -0.3072624 0.6631907
6 1567 -2552096 2041811 19990863 1 3300 1.3360405 2.4070547 1.0770084 -0.0455240
head(PA_hyagouaroundi_time2.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_7 env.bio_15 env.elev env.npp
1 3734 -817257.8 -439796.5 2.229446e+09 0 11095 3.561857 0.4501938 -0.4767570 -0.7574908
2 3735 -779680.2 -398014.3 2.991334e+08 1 9265 2.011734 0.9884363 -1.0276090 -0.2899658
3 2442 -1059235.8 1039140.4 2.351660e+08 1 390 -1.668160 0.0208602 -0.2288443 0.5253182
4 2442 -1007836.0 1039301.4 7.849412e+07 0 30 NA NA NA NA
5 1829 -2160709.8 1781923.2 7.496574e+03 1 5344 1.910035 1.1192408 0.2237356 -0.0220329
6 1829 -2186089.8 1791273.0 3.298492e+03 1 3282 1.972451 0.4821331 0.3683776 0.0162277

Save data

saveRDS(PO_hyagouaroundi_time1.model.data, 'data/data_hyagouaroundi_PO_time1.rds')
saveRDS(PO_hyagouaroundi_time2.model.data, 'data/data_hyagouaroundi_PO_time2.rds')
saveRDS(PA_hyagouaroundi_time1.model.data, 'data/data_hyagouaroundi_PA_time1.rds')
saveRDS(PA_hyagouaroundi_time2.model.data, 'data/data_hyagouaroundi_PA_time2.rds')

# blobs
saveRDS(PA_hyagouaroundi_time1_blobs, 'data/PA_hyagouaroundi_time1_blobs.rds')
saveRDS(PA_hyagouaroundi_time2_blobs, 'data/PA_hyagouaroundi_time2_blobs.rds')

# presence-only rasters
terra::writeRaster(PO_hyagouaroundi_time1_raster, 'data/PO_hyagouaroundi_time1_raster.tif', overwrite=TRUE)
terra::writeRaster(PO_hyagouaroundi_time2_raster, 'data/PO_hyagouaroundi_time2_raster.tif', overwrite=TRUE)

Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_hyagouaroundi_time1_blobs), 
                                   vect(PA_hyagouaroundi_time2_blobs)), 
                                     fun = mean, rm.na=T) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
##      bio_7             bio_15           elev               npp       
##  Min.   :  6.456   Min.   :12.07   Min.   :   3.979   Min.   :    0  
##  1st Qu.: 49.491   1st Qu.:28.12   1st Qu.: 214.063   1st Qu.: 7038  
##  Median : 67.208   Median :29.85   Median : 499.201   Median : 9516  
##  Mean   : 62.920   Mean   :29.82   Mean   : 594.958   Mean   : 9885  
##  3rd Qu.: 78.410   3rd Qu.:32.21   3rd Qu.: 838.588   3rd Qu.:13024  
##  Max.   :170.527   Max.   :37.80   Max.   :4867.706   Max.   :21085  
##  NA's   :12        NA's   :12      NA's   :15         NA's   :12
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
## PA bio_7 -> range: 170.527460734049 - IQR: 28.9187885995615
## PA bio_15 -> range: 37.8010549545288 - IQR: 4.09203597516754
## PA elev -> range: 4867.70556640625 - IQR: 624.524710117106
## PA npp -> range: 21085.2897600446 - IQR: 5985.57916738513
# covariates layers for PO
env_PO <- terra::resample(env, Latam.raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
##      bio_7             bio_15           elev               npp           
##  Min.   :  6.735   Min.   :10.44   Min.   :   2.195   Min.   :    1.935  
##  1st Qu.: 41.522   1st Qu.:29.95   1st Qu.: 140.181   1st Qu.: 4567.937  
##  Median : 60.394   Median :31.86   Median : 294.976   Median : 8232.417  
##  Mean   : 60.435   Mean   :30.66   Mean   : 604.102   Mean   : 7782.353  
##  3rd Qu.: 78.506   3rd Qu.:33.26   3rd Qu.: 692.125   3rd Qu.:10876.451  
##  Max.   :154.105   Max.   :41.97   Max.   :4462.538   Max.   :17393.445
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
## PO bio_7 -> range: 154.105056762695 - IQR: 36.9837703704834
## PO bio_15 -> range: 41.9655418395996 - IQR: 3.31285381317139
## PO elev -> range: 4462.5380859375 - IQR: 551.944019317627
## PO npp -> range: 17393.4453125 - IQR: 6308.51477050781

Covariates

bio7, bio15, npp and elevation

library(patchwork)

# use unscaled values
env_100k_resampled <- terra::resample(env, Latam.raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

elev.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_resampled[["elev"]]) +
    tm_raster(palette = 'BrBG', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(Latam_projected) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

npp.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_resampled[["npp"]]) +
    tm_raster(palette = 'BuGn', midpoint = NA, style= "cont")  + 
    tm_shape(Latam_projected) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_7.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_resampled[["bio_7"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont")  + 
    tm_shape(Latam_projected) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_15.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_resampled[["bio_15"]]) +
    tm_raster(palette = 'PuOr', midpoint = NA, style= "cont")  + 
    tm_shape(Latam_projected) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

elev.plot 

![](S04_preparation_of_data_for_modelling_files/figure-html/covariates plots-1.png)

npp.plot

![](S04_preparation_of_data_for_modelling_files/figure-html/covariates plots-2.png)

bio_7.plot

![](S04_preparation_of_data_for_modelling_files/figure-html/covariates plots-3.png)

bio_15.plot

![](S04_preparation_of_data_for_modelling_files/figure-html/covariates plots-4.png)