Skip to content

Commit

Permalink
Merge pull request #654 from cmu-delphi/ndefries/fread-types
Browse files Browse the repository at this point in the history
Handle cases where `value` and `quantile` columns can't be read as numeric
  • Loading branch information
dshemetov committed Jul 28, 2023
2 parents fa4bc21 + 811608e commit 3f6548f
Showing 1 changed file with 42 additions and 14 deletions.
56 changes: 42 additions & 14 deletions R-packages/evalcast/R/get_covidhub_predictions.R
Expand Up @@ -237,12 +237,21 @@ get_forecaster_predictions <- function(covidhub_forecaster_name,
forecast_date,
covidhub_forecaster_name)
pred <- fread(filename,
na.strings = c("\"NA\"", "NA"),
colClasses = c(location = "character",
quantile = "double",
value = "double",
target = "character",
type = "character"),
# There are several different missing value encodings. Read them all as `NA`.
na.strings = c("\"NA\"", "NA", "NULL", "\"NULL\"", "\" NA\""),
# Some values in PSI-DRAFT and PSI-DICE value columns have leading
# whitespace for some dates. `fread` can't read these in as
# numerics, so we need to read them as character and later cast to
# double.
colClasses=list(character=c(
"forecast_date",
"target",
"target_end_date",
"type",
"location",
"quantile",
"value"
)),
data.table = FALSE,
showProgress = FALSE)
# Specifying the date conversion after significantly speeds up loading
Expand All @@ -252,7 +261,11 @@ get_forecaster_predictions <- function(covidhub_forecaster_name,

pcards[[forecast_date]] <- pred %>%
process_target(remove = TRUE) %>%
mutate(forecaster = covidhub_forecaster_name) %>%
mutate(
forecaster = covidhub_forecaster_name,
quantile = as.double(.data$quantile),
value = as.double(.data$value)
) %>%
filter_predictions(forecast_type, incidence_period, signal) %>%
select_pcard_cols()
}
Expand Down Expand Up @@ -337,12 +350,24 @@ get_forecaster_predictions_alt <- function(covidhub_forecaster_name,
wait <- base_wait * 2 ^ (attempt - 1)
# If the read attempt succeeds, returns a dataframe; else a try-error
read_status <- try({
fread(target_url, showProgress = FALSE, data.table = FALSE,
colClasses=list(
character=c("forecast_date", "target", "target_end_date", "type", "location"),
numeric = c("quantile", "value")
)
)
fread(target_url,
# There are several different missing value encodings. Read them all as `NA`.
na.strings = c("\"NA\"", "NA", "NULL", "\"NULL\"", "\" NA\""),
# Some values in PSI-DRAFT and PSI-DICE value columns have leading
# whitespace for some dates. `fread` can't read these in as
# numerics, so we need to read them as character and later cast to
# double.
colClasses=list(character=c(
"forecast_date",
"target",
"target_end_date",
"type",
"location",
"quantile",
"value"
)),
data.table = FALSE,
showProgress = FALSE)
})

if (inherits(read_status, "try-error")) {
Expand Down Expand Up @@ -374,7 +399,10 @@ get_forecaster_predictions_alt <- function(covidhub_forecaster_name,
select(-.data$target) %>%
mutate(forecaster = covidhub_forecaster_name,
forecast_date = lubridate::ymd(.data$forecast_date),
target_end_date = lubridate::ymd(.data$target_end_date)) %>%
target_end_date = lubridate::ymd(.data$target_end_date),
quantile = as.double(.data$quantile),
value = as.double(.data$value)
) %>%
filter_predictions(forecast_type, incidence_period, signal) %>%
select_pcard_cols()

Expand Down

0 comments on commit 3f6548f

Please sign in to comment.