-
Notifications
You must be signed in to change notification settings - Fork 3
/
processing_code.R
277 lines (237 loc) · 15.7 KB
/
processing_code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# what is the latest year of data available?
year_latest = 2023
# declare root directory, folder locations and load essential stuff
project.folder = paste0(print(here::here()),'/')
# load required packages
library(data.table)
library(maptools)
library(mapproj)
library(rgeos)
library(rgdal)
library(RColorBrewer)
library(ggplot2)
library(raster)
library(sp)
library(plyr)
library(graticule)
library(zoo)
library(purrr)
# print message detailing what is being processed
if(space.res=='fips'){print(paste0('processing ',year, ' ' , dname, ' ', time.res, ' ' , space.res))}
if(space.res=='zip'){print(paste0('processing ',year, ' ' , dname, ' ', time.res, ' ' , space.res, ' ', state))}
if(space.res=='ct'){print(paste0('processing ',year, ' ' , dname, ' ', time.res, ' ' , space.res, ' ', state))}
if(space.res=='ct_2010'){print(paste0('processing ',year, ' ' , dname, ' ', time.res, ' ' , space.res, ' ', state))}
if(space.res=='prison'){print(paste0('processing ',year, ' ' , dname, ' ', time.res, ' ' , space.res))}
if(space.res=='native'){print(paste0('processing ',year, ' ' , dname, ' ', time.res, ' ' , space.res))}
# create directory to place output files into
dir.output = paste0(project.folder,"output/")
if(space.res=='zip'){dir.output=paste0(dir.output,'zip/',state,'/')}
if(space.res=='fips'){dir.output=paste0(dir.output,'fips/')}
if(space.res=='ct'){dir.output=paste0(dir.output,'ct/',state,'/')}
if(space.res=='ct_2010'){dir.output=paste0(dir.output,'ct_2010/',state,'/')}
if(space.res=='prison'){dir.output=paste0(dir.output,'prison/')}
if(space.res=='native'){dir.output=paste0(dir.output,'native/')}
dir.output=paste0(dir.output,dname,'/')
ifelse(!dir.exists(dir.output), dir.create(dir.output, recursive = T), FALSE)
# load shapefiles of either FIPS or ZIP Codes (ZCTAs), Census Tracts or Prisons
if(space.res=='fips'){
# load shapefile of entire United States from https://www.census.gov/geographies/mapping-files/2015/geo/carto-boundary-file.html
us.national = readOGR(dsn=paste0(project.folder,"data/shapefiles/fips/cb_2015_us_county_500k"),layer="cb_2015_us_county_500k")
# remove non-mainland territories (assuming it's for entire mainland US)
us.main = us.national[!us.national$STATEFP %in% c("02","15","60","66","69","71","72","78"),]
}
if(space.res=='zip'){
# load zcta by state from https://www2.census.gov/geo/tiger/TIGER2010/ZCTA5/2010/?C=D;O=A
us.national = readOGR(dsn=paste0(project.folder,'data/shapefiles/zips/tl_2010_',state,'_zcta510'),layer=paste0('tl_2010_',state,'_zcta510'))
us.national$STATEFP = us.national$STATEFP10 ; us.national$STATEFP10 = NULL
us.national = us.national[us.national$STATEFP %in% c(state),]
# OLD CODE previously in development but stalled
skip=1; if(skip==0){
# load shapefile (just one state at a time) from https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2021&layergroup=ZIP+Code+Tabulation+Areas
us.national = readOGR(dsn=paste0('~/data/climate/prism/shapefiles/tl_2021_us_zcta520'),layer=paste0('tl_2021_us_zcta520'))
# load zip fips lookup to obtain state fips https://www.huduser.gov/portal/datasets/usps_crosswalk.html
zip.fips.lookup = read.csv('~/data/climate/prism/shapefiles/ZIP_COUNTY_122021.csv',colClasses='character')[,c(1:2)]
# manual addendum for unmatching values based on https://www.zipdatamaps.com/
zip.fips.lookup.addendum = read.csv('~/data/climate/prism/shapefiles/ZIP_COUNTY_122021_addendum.csv',colClasses='character')[,c(1:2)]
zip.fips.lookup = rbind(zip.fips.lookup,zip.fips.lookup.addendum)
# join zip codes to county codes and extract state fips
us.national.data = dplyr::left_join(us.national@data,zip.fips.lookup, by=c('ZCTA5CE20'='zip'))
us.national.data$STATEFP = substr(us.national.data$county,1,2)
us.national.data.zips = subset(us.national.data, STATEFP %in% c(state))$ZCTA5CE20
# attach data with state FIPS back into shapefile
# us.national@data = us.national.data
# what are the zip codes which do not match to a FIPs (and hence a state FIPS code?) and optional saving below
# us.national.data.na <- as.data.frame(us.national.data[is.na(us.national.data$STATEFP),][,c('ZCTA5CE20')])
# names(us.national.data.na) = 'zip'
# write.csv(us.national.data.na, '~/data/climate/prism/shapefiles/ZIP_COUNTY_122021_addendum.csv',row.names = F)
}
# OLD CODE previously in development but stalled
skip=1; if(skip==0){
# us.national = subset(us.national, ZCTA5CE20 %in% us.national.data.zips)
# load shapefile for all ZIP Codes in US from https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2021&layergroup=ZIP+Code+Tabulation+Areas
# us.national = readOGR(dsn=paste0('~/data/climate/prism/shapefiles/tl_2021_us_zcta520'),layer=paste0('tl_2021_us_zcta520'))
}
us.main = us.national
}
if(space.res=='ct'){
# load shapefile (just one state at a time) from https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2021&layergroup=Census+Tracts
us.national = readOGR(dsn=paste0(project.folder,'data/shapefiles/ct/tl_2021_',state,'_tract'),layer=paste0('tl_2021_',state,'_tract'))
us.national = us.national[us.national$STATEFP %in% c(state),]
us.main = us.national
}
if(space.res=='ct_2010'){
# load shapefile (just one state at a time) from https://www2.census.gov/geo/pvs/tiger2010st/
us.national = readOGR(dsn=paste0(project.folder,'data/shapefiles/ct/tl_2010_',state,'_tract10'),layer=paste0('tl_2010_',state,'_tract10'))
us.national = us.national[us.national$STATEFP10 %in% c(state),]
us.main = us.national
}
if(space.res=='prison'){
# load shapefile that was fixed in covert_prison_shapefile.R
us.main = readOGR(dsn=paste0(project.folder,"data/shapefiles/Prison_Boundaries/"),
layer="Prison_Boundaries_Edited")
# load shapefile of all prisons in United States from https://hifld-geoplatform.opendata.arcgis.com/datasets/geoplatform::prison-boundaries/about
# projection of this shape file seems to be https://epsg.io/3857
# us.national = readOGR(dsn=paste0(project.folder,"data/shapefiles/Prison_Boundaries/"),layer="Prison_Boundaries")
# us.national$STATEFP = substr(us.national$COUNTYFIPS,1,2)
#
# # remove non-mainland territories (assuming it's for entire mainland US)
# us.main = us.national[!us.national$STATEFP %in% c("02","15","60","66","69","71","72","78","NO"),]
# make projection match the FIPS one on local computer (obtained by code just below)
# us.fips.proj = proj4string(readOGR(dsn=paste0(project.folder,"data/shapefiles/fips/cb_2015_us_county_500k"),layer="cb_2015_us_county_500k"))
# us.fips.proj = proj4string(readOGR(dsn=paste0(project.folder,"data/shapefiles/fips/cb_2015_us_county_500k"),layer="cb_2015_us_county_500k"))
# us.main = spTransform(us.main, CRS("+proj=longlat +datum=NAD83 +no_defs"))
# us.main = spTransform(us.main, CRS(us.fips.proj))
}
if(space.res=='native'){
# load shapefile of entire United States from https://www.census.gov/geographies/mapping-files/2015/geo/carto-boundary-file.html
us.main = readOGR(dsn=paste0(project.folder,"data/shapefiles/native/tl_2020_us_aitsn"),layer="tl_2020_us_aitsn")
}
# get projection of shapefile
original.proj = proj4string(us.main)
# original.proj = comment(slot(us.main, "proj4string"))
# load raster and make same projection as zip code map
if(time.res=='annual'){
raster.full = raster(paste0('~/data/climate/prism/asc/PRISM_',dname,'_stable_4kmM3_',year,'_all_asc/PRISM_',dname,'_stable_4kmM3_',year,'_asc.asc'))
}
# perform analysis across every day of selected year
if(time.res=='daily'){
# loop through each raster file for each day and summarise
dates = seq(as.Date(paste0('0101',year),format="%d%m%Y"), as.Date(paste0('3112',year),format="%d%m%Y"), by=1)
# do just for half the year for brand new year data
if(year==year_latest){
dates = seq(as.Date(paste0('0101',year),format="%d%m%Y"), as.Date(paste0('3107',year),format="%d%m%Y"), by=1)
}
# empty dataframe to load summarised national daily values into
weighted.area.national.total = data.frame()
# loop through each day of the year and perform analysis
print(paste0('Processing dates in ',year))
for(date in dates){
print(format(as.Date(date), "%d/%m/%Y"))
day = format(as.Date(date), "%d")
month = format(as.Date(date), "%m")
day.month = paste0(month,day)
day.month.rh = paste0(month,'-',day)
# load raster for relevant date
if(!(dname%in%c('wbgtmax','rhmean'))){
if(year!=year_latest){
raster.full = raster(paste0('~/data/climate/prism/bil/PRISM_',dname,'_stable_4kmD2_',year,'0101_',year,'1231_bil/PRISM_',dname,'_stable_4kmD2_',year,day.month,'_bil.bil'))
}
if(year==year_latest){
raster.full = raster(paste0('~/data/climate/prism/bil/PRISM_',dname,'_stable_4kmD2_',year,'0101_',year,'0731_bil/PRISM_',dname,'_stable_4kmD2_',year,day.month,'_bil.bil'))
}
raster.full = projectRaster(raster.full, crs=original.proj)
# save an example plot for a specific date
if(year==2000 & day.month=='0101'){
pdf(paste0('~/data/climate/prism/bil/PRISM_',dname,'_stable_4kmD2_',year,'0101_',year,'1231_bil/PRISM_',dname,'_stable_4kmD2_',year,day.month,'_bil_',space.res,'.pdf'))
plot(raster.full); plot(us.main,add=T)
dev.off()
}
}
if(dname == 'wbgtmax'){
raster.full = raster(paste0('~/data/climate/prism/tif/PRISM_',dname,'_stable_4kmD2_',year,'0101_',year,'1231_tif/PRISM_',dname,'_stable_4kmD2_',year,day.month,'.tif'))
# if it's for prisons the raster and shape file are have the same projection, otherwise reproject
if(space.res!='prison'){
raster.full = projectRaster(raster.full, crs=original.proj)
}
raster.full = reclassify(raster.full, cbind(-Inf, -1000, NA), right=FALSE) # get rid of huge negative values if it's wbgtmax
# save an example plot for a specific date
if(year==2000 & day.month=='0101'){
pdf(paste0('~/data/climate/prism/tif/PRISM_',dname,'_stable_4kmD2_',year,'0101_',year,'1231_tif/PRISM_',dname,'_stable_4kmD2_',year,day.month,'_',space.res,'.pdf'))
plot(raster.full); plot(us.main,add=T)
dev.off()
}
}
if(dname =='rhmean'){
raster.full = raster(paste0('~/data/climate/prism/PRISM_RHmean_1999_2012/PRISM_rhmean_stable_4kmD2_',year,'-',day.month.rh,'.tif'))
# if it's for prisons the raster and shape file are have the same projection, otherwise reproject
# if(space.res!='prison'){
# raster.full = projectRaster(raster.full, crs=original.proj)
# }
}
# perform over entire of mainland USA (FIPS or ZIP) or chosen state (CENSUS TRACT)
weighted.area.national = extract(x=raster.full, # raster (x) to extract from
y=us.main, # shapefile (y) to overlay and take values forward
weights = TRUE, # calculate weights used for averaging (area-weighted mean)
normalizeWeights=TRUE, # normalize weights to always add up to 1 if, say, some of the shapefile is not covered by a raster
fun=mean, # take the mean of the values
df=TRUE, # return as a dataframe object
na.rm=TRUE # remove NAs before function (mean) applied so that NAs aren't returned
)
# create some unique ids for each area
if(space.res=='fips'){weighted.area.national = data.frame(code=paste0(us.main$STATEFP,us.main$COUNTYFP), weighted.area.national[,2])}
if(space.res=='zip'){weighted.area.national = data.frame(code=us.main$ZCTA5CE10, weighted.area.national[,2])}
if(space.res=='ct'){weighted.area.national = data.frame(code=us.main$GEOID, weighted.area.national[,2])}
if(space.res=='ct_2010'){weighted.area.national = data.frame(code=us.main$GEOID10, weighted.area.national[,2])}
if(space.res=='prison'){weighted.area.national = data.frame(code=us.main$FID, weighted.area.national[,2])}
if(space.res=='native'){weighted.area.national = data.frame(code=us.main$GEOID, weighted.area.national[,2])}
# order by the unique id area code
weighted.area.national = weighted.area.national[order(weighted.area.national$code),]
# fix row names
rownames(weighted.area.national) = NULL
# fix column names
if(space.res=='fips'){names(weighted.area.national) = c('fips',dname)}
if(space.res=='zip'){names(weighted.area.national) = c('zcta',dname)}
if(space.res=='ct'){names(weighted.area.national) = c('ct_id',dname)}
if(space.res=='ct_2010'){names(weighted.area.national) = c('ct_id',dname)}
if(space.res=='prison'){names(weighted.area.national) = c('prison_id',dname)}
if(space.res=='native'){names(weighted.area.national) = c('native_id',dname)}
# add date details
weighted.area.national$date = format(as.Date(date), "%d/%m/%Y")
weighted.area.national$day = day
weighted.area.national$month = month
weighted.area.national$year = year
# add iteratively to total table of year
weighted.area.national.total = data.table::rbindlist(list(weighted.area.national.total,weighted.area.national))
}
}
# save output
if(space.res=='zip'){
saveRDS(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_zip_',state,'_',dname,'_',time.res,'_',as.character(year),'.rds'))
write.csv(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_zip_',state,'_',dname,'_',time.res,'_',as.character(year),'.csv'),
row.names = F)
}
if(space.res=='fips'){
saveRDS(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_fips_',dname,'_',time.res,'_',as.character(year),'.rds'))
write.csv(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_fips_',dname,'_',time.res,'_',as.character(year),'.csv'),
row.names = F)
}
if(space.res=='ct'){
saveRDS(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_census_tract_',dname,'_',time.res,'_',as.character(year),'.rds'))
write.csv(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_census_tract_',dname,'_',time.res,'_',as.character(year),'.csv'),
row.names = F)
}
if(space.res=='ct_2010'){
saveRDS(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_census_tract_2010_',dname,'_',time.res,'_',as.character(year),'.rds'))
write.csv(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_census_tract_2010_',dname,'_',time.res,'_',as.character(year),'.csv'),
row.names = F)
}
if(space.res=='prison'){
saveRDS(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_prison_',dname,'_',time.res,'_',as.character(year),'.rds'))
write.csv(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_prison_',dname,'_',time.res,'_',as.character(year),'.csv'),
row.names = F)
}
if(space.res=='native'){
saveRDS(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_native_',dname,'_',time.res,'_',as.character(year),'.rds'))
write.csv(weighted.area.national.total,paste0(dir.output,'weighted_area_raster_native_',dname,'_',time.res,'_',as.character(year),'.csv'),
row.names = F)
}