Skip to content

Commit

Permalink
Merge pull request #563 from walkerke/pep2023
Browse files Browse the repository at this point in the history
Add support for 2023 PEP and fix #544
  • Loading branch information
walkerke committed Mar 18, 2024
2 parents ba0d73f + 9363bde commit 1624936
Show file tree
Hide file tree
Showing 49 changed files with 212 additions and 286 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
@@ -1,12 +1,12 @@
Package: tidycensus
Type: Package
Title: Load US Census Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data Frames
Version: 1.6.2
Version: 1.6.3
Authors@R: c(
person(given = "Kyle", family = "Walker", email="kyle@walker-data.com", role=c("aut", "cre")),
person(given = "Matt", family = "Herman", email = "mfherman@gmail.com", role = "aut"),
person(given = "Kris", family = "Eberwein", email = "eberwein@knights.ucf.edu", role = "ctb"))
Date: 2024-03-05
Date: 2024-03-18
URL: https://walker-data.com/tidycensus/
BugReports: https://github.com/walkerke/tidycensus/issues
Description: An integrated R interface to several United States Census Bureau
Expand Down
133 changes: 85 additions & 48 deletions R/estimates.R
@@ -1,5 +1,7 @@
#' Get data from the US Census Bureau Population Estimates Program
#'
#' The \code{get_estimates()} function requests data from the US Census Bureau's Population Estimates Program (PEP) datasets. The PEP datasets are defined by the US Census Bureau as follows: "The Census Bureau's Population Estimates Program (PEP) produces estimates of the population for the United States, its states, counties, cities, and towns, as well as for the Commonwealth of Puerto Rico and its municipios. Demographic components of population change (births, deaths, and migration) are produced at the national, state, and county levels of geography. Additionally, housing unit estimates are produced for the nation, states, and counties. PEP annually utilizes current data on births, deaths, and migration to calculate population change since the most recent decennial census and produce a time series of estimates of population, demographic components of change, and housing units. The annual time series of estimates begins with the most recent decennial census data and extends to the vintage year. As each vintage of estimates includes all years since the most recent decennial census, the latest vintage of data available supersedes all previously-produced estimates for those dates."
#'
#' \code{get_estimates()} requests data from the Population Estimates API for years 2019 and earlier; however the Population Estimates are no longer supported on the API as of 2020. For recent years, \code{get_estimates()} reads a flat file from the Census website and parses it. This means that arguments and output for 2020 and later datasets may differ slightly from datasets acquired for 2019 and earlier.
#'
#' As of April 2022, variables available for 2020 and later datasets are as follows: ESTIMATESBASE, POPESTIMATE, NPOPCHG, BIRTHS, DEATHS, NATURALCHG, INTERNATIONALMIG, DOMESTICMIG, NETMIG, RESIDUAL, GQESTIMATESBASE, GQESTIMATES, RBIRTH, RDEATH, RNATURALCHG, RINTERNATIONALMIG, RDOMESTICMIG, and RNETMIG.
Expand All @@ -20,11 +22,9 @@
#'
#' @param breakdown_labels Whether or not to label breakdown elements returned when
#' \code{product = "characteristics"}. Defaults to FALSE.
#'
#' Not yet supported for 2020 and later.
#' @param year The data year (defaults to 2022). It is recommended to use the most recent vintage
#' available for a given decennial series (so, year = 2019 for the 2010s, and year = 2022 for the 2020s)
#' and use \code{time_series = TRUE} to access time-series estimates.
#' @param vintage It is recommended to use the most recent vintage
#' available for a given decennial series (so, year = 2019 for the 2010s, and year = 2023 for the 2020s). Will default to 2022 until the full PEP for 2023 is released.
#' @param year The data year (defaults to the vintage requested). Use \code{time_series = TRUE} to access time-series estimates.
#' @param state The state for which you are requesting data. State
#' names, postal codes, and FIPS codes are accepted.
#' Defaults to NULL.
Expand Down Expand Up @@ -58,14 +58,15 @@
#' @param ... other keyword arguments
#'
#' @return A tibble, or sf tibble, of population estimates data
#' @seealso \url{https://www.census.gov/programs-surveys/popest/about.html}
#' @export
get_estimates <- function(geography = c("us", "region", "division", "state", "county", "county subdivision",
"place/balance (or part)", "place", "consolidated city", "place (or part)",
"metropolitan statistical area/micropolitan statistical area", "cbsa",
"metropolitan division", "combined statistical area"),
product = NULL, variables = NULL,
breakdown = NULL, breakdown_labels = FALSE,
year = 2022, state = NULL, county = NULL,
breakdown = NULL, breakdown_labels = FALSE, vintage = 2022,
year = vintage, state = NULL, county = NULL,
time_series = FALSE,
output = "tidy", geometry = FALSE, keep_geo_vars = FALSE,
shift_geo = FALSE, key = NULL, show_call = FALSE, ...) {
Expand All @@ -91,6 +92,10 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co
rlang::abort("The only supported geographies at this time for population characteristics 2020 and later are 'state' and 'county'.")
}

if (vintage > 2022) {
rlang::abort("The Characteristics dataset has not yet been released for vintages beyond 2022.")
}

if (geography == "state") {

state_raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/state/asrh/sc-est2022-alldata6.csv"))
Expand Down Expand Up @@ -311,15 +316,26 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co
if (product == "population") {
variables <- population_estimates_variables22
} else if (product == "components") {
variables <- components_estimates_variables22
if (year == 2021) {
variables <- components_estimates_variables21

} else {
variables <- components_estimates_variables22

}
}
}

# Get the data into a reasonable first format that is consistent for downstream use
if (geography == "us") {

raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/state/totals/NST-EST2022-ALLDATA.csv")) %>%
dplyr::filter(SUMLEV == "010")
if (vintage == 2021) {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/state/totals/NST-EST%s-alldata.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "010")
} else {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/state/totals/NST-EST%s-ALLDATA.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "010")
}

raw <- raw[,!(names(raw) %in% c("SUMLEV", "REGION", "DIVISION", "STATE"))]

Expand All @@ -335,9 +351,15 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co

} else if (geography == "region") {

raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/state/totals/NST-EST2022-ALLDATA.csv")) %>%
dplyr::filter(SUMLEV == "020") %>%
dplyr::mutate(GEOID = REGION)
if (vintage == 2021) {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/state/totals/NST-EST%s-alldata.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "020") %>%
dplyr::mutate(GEOID = REGION)
} else {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/state/totals/NST-EST%s-ALLDATA.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "020") %>%
dplyr::mutate(GEOID = REGION)
}

raw <- raw[,!(names(raw) %in% c("SUMLEV", "REGION", "DIVISION", "STATE"))]

Expand All @@ -350,9 +372,13 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co

} else if (geography == "division") {

raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/state/totals/NST-EST2022-ALLDATA.csv")) %>%
dplyr::filter(SUMLEV == "030") %>%
dplyr::mutate(GEOID = DIVISION)
if (vintage == 2021) {
rlang::abort("Divisions are not available in the 2021 vintage dataset.")
} else {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/state/totals/NST-EST%s-ALLDATA.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "030") %>%
dplyr::mutate(GEOID = DIVISION)
}

raw <- raw[,!(names(raw) %in% c("SUMLEV", "REGION", "DIVISION", "STATE"))]

Expand All @@ -365,9 +391,15 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co

} else if (geography == "state") {

raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/state/totals/NST-EST2022-ALLDATA.csv")) %>%
dplyr::filter(SUMLEV == "040") %>%
dplyr::mutate(GEOID = STATE)
if (vintage == 2021) {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/state/totals/NST-EST%s-alldata.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "040") %>%
dplyr::mutate(GEOID = STATE)
} else {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/state/totals/NST-EST%s-ALLDATA.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "040") %>%
dplyr::mutate(GEOID = STATE)
}

raw <- raw[,!(names(raw) %in% c("SUMLEV", "REGION", "DIVISION", "STATE"))]

Expand All @@ -380,7 +412,7 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co

} else if (geography == "county") {

raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/counties/totals/co-est2022-alldata.csv")) %>%
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/counties/totals/co-est%s-alldata.csv", vintage, vintage))) %>%
dplyr::filter(SUMLEV == "050") %>%
dplyr::mutate(GEOID = paste0(STATE, COUNTY),
NAME = paste0(CTYNAME, ", ", STNAME))
Expand All @@ -397,9 +429,15 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co

} else if (geography == "cbsa" || geography == "metropolitan statistical area/micropolitan statistical area") {

raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/metro/totals/cbsa-est2022.csv")) %>%
dplyr::filter(LSAD %in% c("Micropolitan Statistical Area", "Metropolitan Statistical Area")) %>%
dplyr::mutate(GEOID = CBSA)
if (vintage != 2022) {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/metro/totals/cbsa-est%s-alldata.csv", vintage, vintage))) %>%
dplyr::filter(LSAD %in% c("Micropolitan Statistical Area", "Metropolitan Statistical Area")) %>%
dplyr::mutate(GEOID = CBSA)
} else {
raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/metro/totals/cbsa-est2022.csv")) %>%
dplyr::filter(LSAD %in% c("Micropolitan Statistical Area", "Metropolitan Statistical Area")) %>%
dplyr::mutate(GEOID = CBSA)
}

raw <- raw[,!(names(raw) %in% c("MDIV", "STCOU", "LSAD", "CBSA"))]

Expand All @@ -411,9 +449,16 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co
dplyr::mutate(variable = stringr::str_remove(variable, "_"))

} else if (geography == "combined statistical area") {
raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/metro/totals/csa-est2022.csv")) %>%
dplyr::filter(LSAD == "Combined Statistical Area") %>%
dplyr::mutate(GEOID = CSA)

if (vintage != 2022) {
raw <- suppressMessages(readr::read_csv(sprintf("https://www2.census.gov/programs-surveys/popest/datasets/2020-%s/metro/totals/csa-est%s-alldata.csv", vintage, vintage))) %>%
dplyr::filter(LSAD == "Combined Statistical Area") %>%
dplyr::mutate(GEOID = CSA)
} else {
raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/metro/totals/csa-est2022.csv")) %>%
dplyr::filter(LSAD == "Combined Statistical Area") %>%
dplyr::mutate(GEOID = CSA)
}

raw <- raw[,!(names(raw) %in% c("MDIV", "STCOU", "LSAD", "CBSA", "CSA"))]

Expand All @@ -425,6 +470,11 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co
dplyr::mutate(variable = stringr::str_remove(variable, "_"))

} else if (geography == "place") {

if (vintage > 2022) {
rlang::abort("The most recent PEP release for this geography is 2022.")
}

raw <- suppressMessages(readr::read_csv("https://www2.census.gov/programs-surveys/popest/datasets/2020-2022/cities/totals/sub-est2022.csv")) %>%
dplyr::filter(SUMLEV == "162") %>%
dplyr::mutate(GEOID = paste0(STATE, PLACE),
Expand Down Expand Up @@ -554,7 +604,7 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co
variables = variables,
breakdown = breakdown,
breakdown_labels = breakdown_labels,
year = year,
year = vintage,
state = .x,
county = county,
time_series = time_series,
Expand Down Expand Up @@ -829,27 +879,14 @@ get_estimates <- function(geography = c("us", "region", "division", "state", "co

} else {

geom <- try(suppressMessages(use_tigris(geography = geography, year = year,
state = state, county = county, ...)))

# if (year > 2021) {
# geom <- try(suppressMessages(use_tigris(geography = geography, year = 2021,
# state = state, county = county, ...)))
#
# if (geography == "county" && "09" %in% stringr::str_sub(geom$GEOID, 1, 2)) {
# ct_2022 <- clean_connecticut()
#
# geom <- geom %>%
# dplyr::filter(!stringr::str_sub(GEOID, 1, 2) == "09") %>%
# dplyr::bind_rows(ct_2022)
# }
#
# } else {
# geom <- try(suppressMessages(use_tigris(geography = geography, year = year,
# state = state, county = county, ...)))
# }


# Handle here until 2023 CB files are released
if (year == 2023) {
geom <- try(suppressMessages(use_tigris(geography = geography, year = 2022,
state = state, county = county, ...)))
} else {
geom <- try(suppressMessages(use_tigris(geography = geography, year = year,
state = state, county = county, ...)))
}

if ("try-error" %in% class(geom)) {
stop("Your geometry data download failed. Please try again later or check the status of the Census Bureau website at https://www2.census.gov/geo/tiger/", call. = FALSE)
Expand Down
1 change: 1 addition & 0 deletions R/zzz.r
Expand Up @@ -33,6 +33,7 @@ population_estimates_variables <- c("POP", "DENSITY")
components_estimates_variables <- c("BIRTHS", "DEATHS","DOMESTICMIG","INTERNATIONALMIG","NATURALINC","NETMIG","RBIRTH","RDEATH","RDOMESTICMIG","RINTERNATIONALMIG","RNATURALINC","RNETMIG")
housing_estimates_variables <- "HUEST"
population_estimates_variables22 <- c("POPESTIMATE", "NPOPCHG")
components_estimates_variables21 <- c("BIRTHS", "DEATHS", "NATURALINC", "DOMESTICMIG","INTERNATIONALMIG","NETMIG","RBIRTH","RDEATH","RDOMESTICMIG","RINTERNATIONALMIG","RNATURALINC","RNETMIG","RESIDUAL")
components_estimates_variables22 <- c("BIRTHS", "DEATHS", "NATURALCHG", "DOMESTICMIG","INTERNATIONALMIG","NETMIG","RBIRTH","RDEATH","RDOMESTICMIG","RINTERNATIONALMIG","RNATURALCHG","RNETMIG","RESIDUAL")


Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/basic-usage.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/margins-of-error.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1624936

Please sign in to comment.