example_code.R

# Some example code on how to use the {tidycovid19}
# Not part of the package  itself
# Some more code that uses the package can be found in the 'add_code' directory.

# --- Some visuals -------------------------------------------------------------

library(tidycovid19)

merged <- download_merged_data(cached = TRUE, silent = TRUE)
plot_covid19_spread(merged)
plot_covid19_spread(merged, highlight = "DEU",
                    intervention = "lockdown")
plot_covid19_spread(merged, highlight = c("ITA", "ESP", "FRA", "DEU", "USA"),
                    intervention = "lockdown")
plot_covid19_spread(merged, highlight = c("ITA", "ESP", "FRA", "DEU", "USA"),
                    exclude_others = TRUE, intervention = "lockdown")

plot_covid19_spread(
  per_capita = TRUE, per_capita_x_axis = TRUE,
  population_cutoff = 10,
  min_cases = 0.1,
  highlight = c("ITA", "ESP", "FRA", "DEU", "USA", "BEL", "FRA", "NLD", "GBR"),
  intervention = "lockdown"
)

# --- Customize shiny app ------------------------------------------------------

shiny_covid19_spread(plot_options = list(
  type = "deaths", min_cases = 100, min_by_ctry_obs = 10,
  edate_cutoff = 40, per_capita = FALSE, cumulative = FALSE, change_ave = 7,
  highlight = c("FRA", "DEU", "ITA", "ESP", "GBR", "USA"),
  intervention = "lockdown"
))


# --- Example clipping code produced by shiny_covid19_spread() ------------------

# Code generated by shiny_covid19_spread() of the {tidycovid19} package
# See: https://github.com/joachim-gassen/tidycovid19
# Run in R/Rstudio. See https://www.r-project.org and https://www.rstudio.com
# Uncomment the following to install the {tidycovid19} package

# remotes::install_github("joachim-gassen/tidycovid19)

library(tidycovid19)

plot_covid19_spread(
  type = "deaths", min_cases = 100, min_by_ctry_obs = 7,
  edate_cutoff = 30, per_capita = FALSE,
  highlight = c("BEL", "CHN", "FRA", "DEU", "IRN", "ITA", "KOR",
                "NLD", "ESP", "CHE", "GBR", "USA"),
  intervention = NULL
)


# --- Find data inconsitencies in JHU CSSE data --------------------------------

library(tidycovid19)
library(dplyr)

df <- download_jhu_csse_covid19_data(cached = TRUE, silent = TRUE)

df %>%
  group_by(iso3c) %>%
  filter(recovered < lag(recovered) |
           recovered > lead(recovered)) -> odd_recovered

df %>%
  group_by(iso3c) %>%
  filter(deaths < lag(deaths) |
           deaths > lead(deaths)) -> odd_deaths

df %>%
  group_by(iso3c) %>%
  filter(confirmed < lag(confirmed) |
           confirmed > lead(confirmed))


# --- Use old PDF scraping code ------------------------------------------------

# Install old package version that still contains the PDF scraping code
# remotes::install_github("joachim-gassen/tidycovid19", ref = "0990bc6")

library(tidycovid19)
library(tidyverse)
library(pdftools)
library(png)

pdf_url <- "https://www.gstatic.com/covid19/mobility/2020-04-05_BR_Mobility_Report_en.pdf"
pdf_convert(pdf_url, pages = 1, filenames = "google_cmr_de_p1.png", verbose = FALSE)

bitmaps <- tidycovid19:::extract_line_graph_bitmaps(pdf_url, 1)
png_file <- tempfile("bitmap_", fileext = ".png")
writePNG(bitmaps[[1]][[1]], "bitmap.png")

df <- tidycovid19:::parse_line_graph_bitmap(bitmaps[[1]][[1]])

# Make sure that you reinstall the current version of the package after you
# are done exploring the PDF scraping code

# remotes::install_github("joachim-gassen/tidycovid19")


# --- Use regional data --------------------------------------------------------

library(tidyverse)
library(tidycovid19)
df <- download_google_cmr_data(type = "country_region", cached = TRUE)

df %>% filter(iso3c == "DEU") %>%
  ggplot(aes(x = date, y = retail_recreation, color = region)) +
  geom_line()


# --- Plot Oxford Data ---------------------------------------------

library(tidyverse)
library(tidycovid19)
df <- download_oxford_npi_data(type = "index", cached = TRUE)

df %>% group_by(date) %>%
  summarise(
    mn_si = mean(stringency_index, na.rm = TRUE),
    ci_si =  1.96*(sd(stringency_index, na.rm = TRUE)/sqrt((n() - 1)))
  ) %>%
  ggplot(aes(x = date, y = mn_si)) +
  geom_line()  +
  geom_errorbar(
    aes(
      ymin= mn_si - ci_si,
      ymax = mn_si + ci_si
    ),
    width = 0.2
  )

df <- download_oxford_npi_data(type = "measures", cached = TRUE)

df %>% filter(
  npi_type != "Emergency investment in healthcare",
  npi_type != "Investment in vaccines",
  npi_measure != 0
) %>%
  ggplot(aes(x = date, fill = npi_type, weight = npi_measure)) +
  geom_histogram(position = "stack", binwidth = 7)

# --- Plot daily new cases as bar graph ----------------------------------------

# Suggestion by AndreaPi (issue #19)

library(tidyverse)
library(tidycovid19)
library(zoo)

df <- download_merged_data(cached = TRUE)

df %>%
  filter(iso3c == "USA") %>%
  mutate(
    new_cases = confirmed - lag(confirmed),
    ave_new_cases = rollmean(new_cases, 7, na.pad=TRUE, align="right")
  ) %>%
  filter(!is.na(new_cases), !is.na(ave_new_cases)) %>%
  ggplot(aes(x = date)) +
  geom_bar(aes(y = new_cases), stat = "identity", fill = "lightblue") +
  geom_line(aes(y = ave_new_cases), color ="red") +
  theme_minimal()

df %>%
  filter(iso3c == "USA") %>%
  mutate(
    new_deaths = deaths - lag(deaths),
    ave_new_deaths = rollmean(new_deaths, 7, na.pad=TRUE, align="right")
  ) %>%
  filter(!is.na(new_deaths), !is.na(ave_new_deaths)) %>%
  ggplot(aes(x = date)) +
  geom_bar(aes(y = new_deaths), stat = "identity", fill = "lightblue") +
  geom_line(aes(y = ave_new_deaths), color ="red") +
  theme_minimal()

df %>%
  filter(iso3c == "DEU") %>%
  mutate(
    new_cases_by_100k = 1e5*((confirmed - lag(confirmed))/population),
    ave_new_cases_by_100k = rollmean(new_cases_by_100k, 7, na.pad=TRUE, align="right")
  ) %>%
  filter(!is.na(new_cases_by_100k), !is.na(ave_new_cases_by_100k)) %>%
  ggplot(aes(x = date)) +
  geom_bar(aes(y = new_cases_by_100k), stat = "identity", fill = "lightblue") +
  geom_line(aes(y = ave_new_cases_by_100k), color ="red") +
  theme_minimal()

df %>%
  filter(iso3c == "DEU") %>%
  mutate(
    new_deaths_by_100k = (deaths - lag(deaths))/(0.1*population),
    ave_new_deaths_by_100k = rollmean(new_deaths_by_100k, 7, na.pad=TRUE, align="right")
  ) %>%
  filter(!is.na(new_deaths_by_100k), !is.na(ave_new_deaths_by_100k)) %>%
  ggplot(aes(x = date)) +
  geom_bar(aes(y = new_deaths_by_100k), stat = "identity", fill = "lightblue") +
  geom_line(aes(y = ave_new_deaths_by_100k), color ="red") +
  theme_minimal()


# --- New Our World in Data data -----------------------------------------------

library(tidyverse)
library(tidycovid19)
library(ggridges)

df <- download_merged_data(cached = TRUE, silent = TRUE)

nobs <- df %>%
  group_by(iso3c) %>%
  summarise(
    nobs_hosp = sum(!is.na(hosp_patients)),
    nobs_icu = sum(!is.na(icu_patients)),
    nobs_vacc = sum(!is.na(total_vaccinations)),
    .groups = "drop"
  ) %>%
  filter(
    nobs_hosp != 0 | nobs_icu != 0 | nobs_vacc  != 0
  ) %>%
  arrange(iso3c)

has_vacc_data <- df %>%
  select(iso3c, total_vaccinations, gdp_capita, deaths, confirmed, population) %>%
  group_by(iso3c) %>%
  filter(
    !all(is.na(confirmed)) & !all(is.na(deaths)) & !all(is.na(population)) &
    !all(is.na(gdp_capita))
  ) %>%
  summarise(
    has_vacc_data = sum(!is.na(total_vaccinations)) > 0,
    gdp_capita = mean(gdp_capita),
    cases = max(1e5*(confirmed/population), na.rm = TRUE),
    deaths = max(1e5*(deaths/population), na.rm = TRUE),
    .groups = "drop"
  )

plot_sel_bias <- function(df, xvar, xlab) {
  xvar <- enquo(xvar)
  ggplot(
    data = df, 
    aes(
      x = !!xvar, y = has_vacc_data, 
      fill = has_vacc_data, height = stat(density)
    )
  ) +
    geom_density_ridges(
      stat = "binline", bins = 20, scale = 0.95
    ) +
    scale_x_log10(labels = scales::comma_format(accuracy = 0.1)) +
    labs(
      x = xlab,
      y = "",
      title = "OWID provides vaccination data"
    ) +
    theme_ridges() +
    theme(
      legend.position = "none",
      plot.title.position = "plot"
    )
}

plot_sel_bias(has_vacc_data, gdp_capita, "GDP per capita (in 2010 US-$, log-scaled)")
plot_sel_bias(has_vacc_data, cases, "Covid-19 cases per 100,000 inhabitants (log-scaled)")
plot_sel_bias(has_vacc_data, deaths, "Covid-19 deaths per 100,000 inhabitants (log-scaled)")

mod <- glm(
  has_vacc_data ~ log(gdp_capita) + log(deaths), 
  data = has_vacc_data %>% filter(deaths > 0),
  family = "binomial"
) 

summary(mod)

clevel <- df %>%
  group_by(iso3c) %>%
  filter(any(!is.na(total_vaccinations))) %>%
  mutate(
    vacc_1e5pop = 1e5*(total_vaccinations/population),
    cases_1e5pop = 1e5*(confirmed/population),
    deaths_1e5pop = 1e5*(deaths/population)
  ) %>%
  summarise(
    vacc_1e5pop  = max(vacc_1e5pop, na.rm = TRUE),
    cases_1e5pop = max(cases_1e5pop, na.rm = TRUE),
    deaths_1e5pop = max(deaths_1e5pop, na.rm = TRUE),
    gdp_capita = max(gdp_capita, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  na.omit()


plot_clevel_vacc_by_x <- function(df, xvar, xlab) {
  xvar <- enquo(xvar)
  ggplot(df, aes(x = !!xvar, y = vacc_1e5pop)) +
    geom_point() +
    scale_x_log10() +
    scale_y_log10() +
    theme_minimal() +
    labs(
      x = xlab,
      y = "Vaccinations per 100,000 inhabitants"
    ) +
    ggrepel::geom_text_repel(aes(label = iso3c)) +
    geom_smooth(method = "lm", formula = "y ~x")
}

plot_clevel_vacc_by_x(clevel, gdp_capita, "National income per capita (2010 US-$)")
plot_clevel_vacc_by_x(clevel, cases_1e5pop, "Cases per 100,000 inhabitants")
plot_clevel_vacc_by_x(clevel, deaths_1e5pop, "Deaths per 100,000 inhabitants")
plot_clevel_vacc_by_x(
  clevel %>% filter(iso3c != "GIN"), 
  gdp_capita, "National income per capita (2010 US-$)"
)

mod <- lm(
  log(vacc_1e5pop) ~ log(gdp_capita) + log(deaths_1e5pop), 
  data = clevel
) 
summary(mod)

mod <- lm(
  log(vacc_1e5pop) ~ log(gdp_capita) + log(deaths_1e5pop), 
  data = clevel %>% filter(iso3c != "GIN")
) 
summary(mod)


mod <- lm(
  log(vacc_1e5pop) ~ log(gdp_capita) + log(deaths_1e5pop), 
  data = clevel %>% filter(!iso3c %in% c("CHN", "GIN"))
) 
summary(mod)