Skip to content

MatreyekLab/ACE2_dependence

Repository files navigation

ACE2_dependence

Kenneth Matreyek first started 1/12/2021 finished 6/9/2022

## Clear existing environment
rm(list = ls())

## Load basic useful packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## âś“ ggplot2 3.3.5     âś“ purrr   0.3.4
## âś“ tibble  3.1.6     âś“ dplyr   1.0.7
## âś“ tidyr   1.2.0     âś“ stringr 1.4.0
## âś“ readr   2.1.2     âś“ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggrepel)
library(ggbeeswarm)
library(reshape)
## 
## Attaching package: 'reshape'

## The following object is masked from 'package:dplyr':
## 
##     rename

## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(here)
## here() starts at /Users/kmatreyek/Box Sync/Github/ACE2_dependence
library(ggfortify)

## Set the seed for immediate reproducibility
set.seed(1234567)

## Set the base theme to what I like
theme_set(theme_bw())
theme_update(legend.title = element_blank())

## Set up consistent colors
virus_colors <- c("VSV" = "red", "SARS-CoV" = "darkgreen", "SARS-CoV-2" = "blue")
combined_infection <- read.csv(file = "Supp_tables/S1_Data.csv")

recombined_construct_key <- read.csv(file = "Data/Keys/Construct_label_key.csv", header = T, stringsAsFactors = F)
pseudovirus_label_key <- read.csv(file = "Data/Keys/Pseudovirus_label_key.csv", header = T, stringsAsFactors = F) #%>% filter(sequence_confirmed != "no")
clade_labels_key <- read.csv(file = "Data/Keys/Clade_labels.csv", header = T, stringsAsFactors = F)
clade_labels_key$clade = factor(clade_labels_key$clade, levels = c(1,2,3,4))
# Two-color data
two_color_precombined <- combined_infection %>% filter(expt %in% c("Two_color", "Receptors", "ACE2_TMPRSS2", "Two_color_ACE2_TMPRSS2", "Mixed_cell_infection", "Btky72_ACE2muts", "Bat_ACE2s","LSign_RBD_panel","Clade3_hum_muts","ACE2_muts","Q493_muts","New_constructs","Full_length") & !(is.na(pseudovirus_env) | pseudovirus_env == "None") & live_singlets != "Too few infected") %>% filter(!is.na(pct_grn_gvn_red))

## Make sure I ignore data where I didn't run enough cells to make a conclusion
two_color_precombined$live_singlets <- as.numeric(two_color_precombined$live_singlets)
two_color_precombined$pct_red  <- as.numeric(two_color_precombined$pct_red)
two_color_precombined$pct_nir <- as.numeric(two_color_precombined$pct_nir)

two_color_precombined$lower_bound_red <- 10^(log10(two_color_precombined$live_singlets * two_color_precombined$pct_red / 100)*-0.99 + 2.64)
two_color_precombined$lower_bound_nir <- 10^(log10(two_color_precombined$live_singlets * two_color_precombined$pct_nir / 100)*-0.99 + 2.64)
two_color_precombined$passes_lower_bound <- "no"
two_color_precombined$passes_upper_bound <- "no"
for(x in 1:nrow(two_color_precombined)){
  if(two_color_precombined$lower_bound_red[x] <= two_color_precombined$pct_grn_gvn_red[x] & two_color_precombined$lower_bound_nir[x] <= two_color_precombined$pct_grn_gvn_nir[x]){two_color_precombined$passes_lower_bound[x] <- "yes"}
  if(two_color_precombined$pct_grn_gvn_red[x] <= 90 & two_color_precombined$pct_grn_gvn_nir[x] <= 90){two_color_precombined$passes_upper_bound[x] <- "yes"}
}
two_color_combined <- merge(two_color_precombined %>% filter(passes_lower_bound == "yes" & passes_upper_bound == "yes"), pseudovirus_label_key, by = "pseudovirus_env", all.x = T) %>% mutate(ratio_nir_mcherry = pct_nir / pct_red)

Seeing how the two color assay performs

mixed_cell_infection <- two_color_precombined %>% filter(expt == "Mixed_cell_infection" & pseudovirus_env != "BtKY72" & !(pseudovirus_env == "SARS1" & date == "8/10/2021"))
mixed_cell_infection$log10_nir_red_diff <- log10(mixed_cell_infection$moi_gvn_nir) - log10(mixed_cell_infection$moi_gvn_red)
mixed_cell_infection$fold_difference <- 10^mixed_cell_infection$log10_nir_red_diff
mixed_cell_infection$ratio_nir_mcherry <- mixed_cell_infection$pct_nir / mixed_cell_infection$pct_red
mixed_cell_infection <- merge(mixed_cell_infection, pseudovirus_label_key, by = "pseudovirus_env", all.x = T)

unmixed_cell_infection <- two_color_precombined %>% filter(expt == "Mixed_cell_infection" & ((pct_red > 94 & pct_nir < 3) | (pct_red < 3 & pct_nir >88)) & pseudovirus_env != "BtKY72")

unmixed_cell_dataframe <- data.frame(date = rep(unique(unmixed_cell_infection$date), each =length(unique(unmixed_cell_infection$pseudovirus_env))), pseudovirus_env = rep(unique(unmixed_cell_infection$pseudovirus_env), length(unique(unmixed_cell_infection$date))),"fold_difference" = 0)
unmixed_cell_dataframe <- unmixed_cell_dataframe[!(unmixed_cell_dataframe$pseudovirus_env == "SARS2" & unmixed_cell_dataframe$date == "8/10/2021") & !(unmixed_cell_dataframe$pseudovirus_env == "SARS1" & unmixed_cell_dataframe$date == "8/10/2021") & !(unmixed_cell_dataframe$pseudovirus_env == "VSVG" & unmixed_cell_dataframe$date == "8/12/2021"),]

for(x in 1:nrow(unmixed_cell_dataframe)){
  temp_date <- unmixed_cell_dataframe$date[x]
  temp_virus <- unmixed_cell_dataframe$pseudovirus_env[x]
  temp_data <- unmixed_cell_infection %>% filter(date == temp_date, pseudovirus_env == temp_virus)
  
  unmixed_cell_dataframe$fold_difference[x] <- temp_data[temp_data$pct_nir > 80 & temp_data$pseudovirus_env == temp_virus,"moi_gvn_nir"] / temp_data[temp_data$pct_red > 80 & temp_data$pseudovirus_env == temp_virus,"moi_gvn_red"]
}

unmixed_cell_dataframe <- merge(unmixed_cell_dataframe, pseudovirus_label_key, by = "pseudovirus_env", all.x = T)
unmixed_cell_dataframe_summary <- unmixed_cell_dataframe %>% group_by(pseudovirus_env) %>% summarize(fold_diff = mean(fold_difference))

mixed_cell_infection$ratio_nir_mcherry_plotting <- mixed_cell_infection$ratio_nir_mcherry
for(x in 1:nrow(mixed_cell_infection)){
  if(mixed_cell_infection$ratio_nir_mcherry[x] <= 0.1){mixed_cell_infection$ratio_nir_mcherry_plotting[x] <- NA}
  if(mixed_cell_infection$ratio_nir_mcherry[x] >= 90){mixed_cell_infection$ratio_nir_mcherry_plotting[x] <- NA}
}
mixed_cell_infection_summary <- mixed_cell_infection %>% filter(!(pct_nir %in% c(0,100)) & !(fold_difference == "Inf") & !(fold_difference == 0)) %>% mutate(count = 1, log10_fold_diff = log10(fold_difference)) %>% group_by(pseudovirus_env, pct_nir) %>% 
  summarize(log10_mean = mean(log10_fold_diff), log10_sd = sd(log10_fold_diff), n = sum(count), .groups = "drop") %>% 
  mutate(log10_upper_conf = log10_mean + log10_sd/sqrt(n-1) * qt(p=0.05/2, df=n-1,lower.tail=F),
         log10_lower_conf = log10_mean - log10_sd/sqrt(n-1) * qt(p=0.05/2, df=n-1,lower.tail=F),
         mean = 10^log10_mean,
         upper_conf = 10^log10_upper_conf,
         lower_conf = 10^log10_lower_conf)

mixed_cell_infection_summary2 <- merge(mixed_cell_infection_summary, pseudovirus_label_key, by = "pseudovirus_env")

unmixed_cell_dataframe_summary <- unmixed_cell_dataframe %>% mutate(count = 1, log10_fold_diff = log10(fold_difference)) %>% group_by(pseudovirus_env) %>% 
  summarize(log10_mean = mean(log10_fold_diff), log10_sd = sd(log10_fold_diff), n = sum(count), .groups = "drop") %>% 
  mutate(log10_upper_conf = log10_mean + log10_sd/sqrt(n-1) * qt(p=0.05/2, df=n-1,lower.tail=F),
         log10_lower_conf = log10_mean - log10_sd/sqrt(n-1) * qt(p=0.05/2, df=n-1,lower.tail=F),
         mean = 10^log10_mean,
         upper_conf = 10^log10_upper_conf,
         lower_conf = 10^log10_lower_conf)

unmixed_cell_dataframe_summary2 <- merge(unmixed_cell_dataframe_summary, pseudovirus_label_key, by = "pseudovirus_env")

## Setting the factor levels for the plot
unmixed_cell_dataframe$virus_label <- factor(unmixed_cell_dataframe$virus_label, levels = c("VSV","SARS-CoV","SARS-CoV-2"))
unmixed_cell_dataframe_summary2$virus_label <- factor(unmixed_cell_dataframe_summary2$virus_label, levels = c("VSV","SARS-CoV","SARS-CoV-2"))
mixed_cell_infection$virus_label <- factor(mixed_cell_infection$virus_label, levels = c("VSV","SARS-CoV","SARS-CoV-2"))
mixed_cell_infection_summary2$virus_label <- factor(mixed_cell_infection_summary2$virus_label, levels = c("VSV","SARS-CoV","SARS-CoV-2"))

Mixed_cell_fold_ACE2_dependence <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), legend.position = "none") + 
  labs(x = "% ACE2 cells in well", y = "Fold ACE2-dependent\ninfection") +
  scale_x_continuous(limits = c(-10,100), expand = c(0,0), breaks = c(10,20,40,60,80,90)) + #scale_x_log10(breaks = c(0.3,1,3,10,30), limits = c(0.08,10)) + 
  scale_y_log10(limits = c(3e-1, 3e2), breaks = c(1e-1,1,1e1,1e2,1e3), expand = c(0,0)) + scale_color_manual(values = virus_colors) +  
  geom_vline(xintercept = 0) + geom_hline(yintercept = 1, alpha = 0.5) +
  geom_point(data = subset(mixed_cell_infection, !(pct_nir %in% c(0,100)) & fold_difference > 0 & fold_difference < 100), aes(x = pct_nir, y = fold_difference, color = virus_label),
             position = position_dodge(width = 7.5), alpha = 0.5) +
  geom_point(data = subset(mixed_cell_infection_summary2, !is.na(upper_conf)), aes(x = pct_nir, y = mean, color = virus_label),
             position = position_dodge(width = 7.5), shape = 95, size = 7) +
  geom_errorbar(data = subset(mixed_cell_infection_summary2, !is.na(upper_conf)), aes(x = pct_nir, ymin = lower_conf, ymax = upper_conf, color = virus_label), position = position_dodge(width = 7.5), width = 4) +
  geom_point(data = unmixed_cell_dataframe, aes(x = -5, y = fold_difference, color = virus_label), position = position_dodge(width = 7.5), alpha = 0.5) +
  geom_point(data = unmixed_cell_dataframe_summary2, aes(x = -5, y = mean, color = virus_label), position = position_dodge(width = 7.5), shape = 95, size = 7) +
  geom_errorbar(data = unmixed_cell_dataframe_summary2, aes(x = -5, ymin = lower_conf, ymax = upper_conf, color = virus_label), position = position_dodge(width = 7.5), width = 4)
Mixed_cell_fold_ACE2_dependence

ggsave(file = "Plots/Mixed_cell_fold_ACE2_dependence.pdf", Mixed_cell_fold_ACE2_dependence, height = 1.75, width = 4)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_1E_mixed.csv",
          mixed_cell_infection_summary2[,c("virus_label","pct_nir","n","mean","upper_conf","lower_conf")], quote = FALSE, row.names = FALSE)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_1E_unmixed.csv",
          unmixed_cell_dataframe_summary2[,c("virus_label","n","mean","upper_conf","lower_conf")], quote = FALSE, row.names = FALSE)

## Figure out the coefficient of variation
unmixed_cell_dataframe_summary2$cv <- 10^(unmixed_cell_dataframe_summary2$log10_sd)/10^(unmixed_cell_dataframe_summary2$log10_mean)
mixed_cell_infection_summary2$cv <- 10^(mixed_cell_infection_summary2$log10_sd)/10^(mixed_cell_infection_summary2$log10_mean)
x
## [1] 166
Mixed_cell_cv <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), legend.position = "none") + 
  labs(x = "% ACE2 cells in well", y = "CV") +
  scale_x_continuous(limits = c(-10,100), expand = c(0,0), breaks = c(10,20,40,60,80,90)) + #scale_x_log10(breaks = c(0.3,1,3,10,30), limits = c(0.08,10)) + 
  scale_y_log10() + scale_color_manual(values = virus_colors) +  
  geom_vline(xintercept = 0) + geom_hline(yintercept = 1, alpha = 0.5) +
  geom_point(data = subset(mixed_cell_infection_summary2, !is.na(upper_conf)), aes(x = pct_nir, y = cv, color = virus_label), position = position_dodge(width = 7.5)) +
  geom_point(data = unmixed_cell_dataframe_summary2, aes(x = -5, y = cv, color = virus_label), position = position_dodge(width = 7.5))
Mixed_cell_cv

ggsave(file = "Plots/Mixed_cell_cv.pdf", Mixed_cell_cv, height = 1, width = 4)

Seeing which receptors or proposed receptors actually enhance SARS-CoV-2 pseudovirus entry when overexpressed in HEK cells

receptors <- two_color_combined %>% filter(expt == "Receptors" & recombined_construct != "CD209" & pseudovirus_env != "G778B")
receptors$log10_nir_red_diff <- log10(receptors$moi_gvn_nir) - log10(receptors$moi_gvn_red)

## Collapsing all proteins to a single group, regardless of the tag
receptors$receptor_grouped <- NA
for(x in 1:nrow(receptors)){receptors$receptor_grouped[x] <- toupper(gsub("\\[HA]","",receptors$recombined_construct[x]))}

## Group by receptor, regardless of tag
receptors_grouped <- receptors %>% group_by(date, receptor_grouped, virus_label) %>% 
  summarize(log10_nir_red_diff = mean(log10_nir_red_diff), pct_grn_gvn_red = mean(pct_grn_gvn_red), pct_grn_gvn_nir = mean(pct_grn_gvn_nir), ratio_nir_mcherry = mean(ratio_nir_mcherry), .groups = "drop") %>% 
  mutate(n = 1, ace2dep_infection = 10^log10_nir_red_diff)

receptors_grouped_summary <- receptors_grouped %>% group_by(receptor_grouped, virus_label) %>% 
  summarize(mean_log10_nir_red_diff = mean(log10_nir_red_diff), sd_log10_nir_red_diff = sd(log10_nir_red_diff), n = sum(n), .groups = "drop") %>% 
  mutate(se_log10_nir_red_diff = sd_log10_nir_red_diff / sqrt(n), geomean = 10^mean_log10_nir_red_diff, lower_ci = 10^(mean_log10_nir_red_diff - se_log10_nir_red_diff * qt(p=0.05/2, df=n-1,lower.tail=F)), upper_ci = 10^(mean_log10_nir_red_diff + se_log10_nir_red_diff * qt(p=0.05/2, df=n-1,lower.tail=F)))
receptors_virus_label_levels <- c("VSV", "SARS-CoV", "SARS-CoV-2")
receptors_grouped <- receptors_grouped %>% filter(virus_label %in% receptors_virus_label_levels)
receptors_grouped$virus_label <- factor(receptors_grouped$virus_label, receptors_virus_label_levels)
receptors_grouped_summary <- receptors_grouped_summary %>% filter(virus_label %in% receptors_virus_label_levels)
receptors_grouped_summary$virus_label <- factor(receptors_grouped_summary$virus_label, receptors_virus_label_levels)


## Keep the differentially tagged constructs separated
receptors_ungrouped <- receptors %>% filter(virus_label != "BtKY72 RBD") %>% group_by(date, recombined_construct, virus_label) %>% 
  summarize(log10_nir_red_diff = mean(log10_nir_red_diff), pct_grn_gvn_red = mean(pct_grn_gvn_red), pct_grn_gvn_nir = mean(pct_grn_gvn_nir), .groups = "drop") %>% 
  mutate(n = 1, ace2dep_infection = 10^log10_nir_red_diff)

receptors_ungrouped_summary <- receptors_ungrouped %>% group_by(recombined_construct, virus_label) %>% 
  summarize(mean_log10_nir_red_diff = mean(log10_nir_red_diff), sd_log10_nir_red_diff = sd(log10_nir_red_diff), n = sum(n), .groups = "drop") %>% 
  mutate(se_log10_nir_red_diff = sd_log10_nir_red_diff / sqrt(n), geomean = 10^mean_log10_nir_red_diff, lower_ci = 10^(mean_log10_nir_red_diff - se_log10_nir_red_diff * 1.96), upper_ci = 10^(mean_log10_nir_red_diff + se_log10_nir_red_diff * 1.96))

receptors_ungrouped_summary$envelope <- factor(receptors_ungrouped_summary$virus_label, receptors_virus_label_levels)

## Now make a dataframe so I can do a comparative scatterplot
tagged_vs_untagged_receptors_dataframe <- 
  data.frame("gene" = c(rep("BSG",3),rep("NRP1",3),rep("NRP2",3),rep("CLEC4M",3),rep("ACE2",3)),
             "virus" = c(receptors_ungrouped_summary[1:15,"virus_label"]),
             "tagged" = c(subset(receptors_ungrouped_summary, recombined_construct == "[HA]BSG")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "Nrp1[HA]")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "Nrp2[HA]")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "[HA]CLEC4M")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "ACE2[HA]")$geomean),
             "untagged" = c(subset(receptors_ungrouped_summary, recombined_construct == "BSG")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "Nrp1")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "NRP2")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "CLEC4M")$geomean,
                          subset(receptors_ungrouped_summary, recombined_construct == "ACE2")$geomean))

Receptors_scatterplot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank()) + 
  scale_color_manual(values = virus_colors) +
  geom_abline(slope = 1, intercept = 0, alpha = 0.2, size = 4) +
  scale_x_log10() + scale_y_log10() + labs(x = "Untagged protein", y = "Tagged protein") +
  geom_point(data = tagged_vs_untagged_receptors_dataframe, aes(x = untagged, y = tagged, color = virus_label, shape = gene), size = 3, alpha = 0.5)
Receptors_scatterplot

ggsave(file = "Plots/Receptors_scatterplot.pdf", Receptors_scatterplot, height = 2, width = 4)
ggsave(file = "Plots/Receptors_scatterplot.png", Receptors_scatterplot, height = 2, width = 4)


paste("The Pearson's r^2 of the untagged and tagged protein infection data is:",round(cor(tagged_vs_untagged_receptors_dataframe$tagged, tagged_vs_untagged_receptors_dataframe$untagged, method = "pearson")^2,2))
## [1] "The Pearson's r^2 of the untagged and tagged protein infection data is: 0.98"
write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_2C.csv", tagged_vs_untagged_receptors_dataframe, quote = FALSE, row.names = FALSE)


### Make the statistical test to see which proteins reproducibly increase infectivity
full_variant_t_test <- data.frame("cell_label" = rep(unique(receptors_grouped_summary$receptor_grouped),each = length(unique(receptors_grouped_summary$virus_label))),
                                          "virus_label" =  rep(unique(receptors_grouped_summary$virus_label),length(unique(receptors_grouped_summary$receptor_grouped))),
                                          "p_value" = NA,"significant" = NA)

for(x in 1:nrow(full_variant_t_test)){
  temp_cell_label <- full_variant_t_test$cell_label[x]
  temp_pseudovirus_env <- full_variant_t_test$virus_label[x]
  temp_subset <- receptors_grouped %>% filter(receptor_grouped == temp_cell_label & virus_label == temp_pseudovirus_env)
  temp_p_value <- round(t.test(temp_subset$ace2dep_infection,rep(1,nrow(temp_subset)), alternative = "two.sided")$p.value,4)
  full_variant_t_test$p_value[x] <- temp_p_value
}
full_variant_t_test$corrected_p_value <- p.adjust(full_variant_t_test$p_value, method = 'BH')

full_variant_t_test[full_variant_t_test$corrected_p_value < 0.01,"significant"] <- "yes"


## Now plotting the data with indicators of the statistical test

receptors_panel <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), legend.position = "right") +
  scale_y_log10(limits = c(0.3,4e2)) + 
  labs(x = element_blank(), y = "Fold increase\nto infection") +
  scale_color_manual(values = virus_colors) +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_point(data = receptors_grouped, aes(x = receptor_grouped, y = ace2dep_infection, color = virus_label), position = position_jitterdodge(jitter.height = 0, dodge.width = 0.6), alpha = 0.2, size = 1) +
  geom_errorbar(data = receptors_grouped_summary, aes(x = receptor_grouped, ymin = lower_ci, ymax = upper_ci, color = virus_label), position = position_dodge(width = 0.6), alpha = 0.6, width = 0.4) +
  geom_point(data = receptors_grouped_summary, aes(x = receptor_grouped, y = geomean, color = virus_label), position = position_dodge(width = 0.6), size = 8, shape = 95) +
  geom_point(data = full_variant_t_test %>% filter(significant == "yes"), aes(x = cell_label, y = 300, color = virus_label), position = position_dodge(width = 0.5), size = 1.5, shape = 8) +
  NULL
receptors_panel

ggsave(file = "Plots/Receptors_panel.pdf", receptors_panel, height = 1.5, width = 4.8)
ggsave(file = "Plots/Receptors_panel.png", receptors_panel, height = 1.5, width = 4.8)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_2D.csv", receptors_grouped_summary, quote = FALSE, row.names = FALSE)

paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "ACE2" & virus_label == "SARS-CoV")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 9.9"
paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "ACE2" & virus_label == "SARS-CoV-2")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 12"
paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "ACE2" & virus_label == "VSV")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 1"
paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "CLEC4M" & virus_label == "SARS-CoV-2")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 2.8"
paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "CLEC4M" & virus_label == "SARS-CoV")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 1.7"
paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "NRP1" & virus_label == "SARS-CoV-2")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 1.5"
paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "NRP2" & virus_label == "SARS-CoV-2")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 1.5"
paste("ACE2 increased SARS-CoV pseudovirus infection roughly:",round(subset(receptors_grouped_summary, receptor_grouped == "NRP2" & virus_label == "VSV")$geomean,1))
## [1] "ACE2 increased SARS-CoV pseudovirus infection roughly: 1.5"

Allright, so now we can focus on ACE2

two_color_ace2_dep <- two_color_combined %>% filter(expt == "Two_color" & pseudovirus_env != "none" & recombined_construct %in% c("G752A/G758A","G752A","G758A/G852C"))
two_color_ace2_dep$index <- seq(1,nrow(two_color_ace2_dep))
two_color_ace2_dep$log10_nir_red_diff <- log10(two_color_ace2_dep$moi_gvn_nir) - log10(two_color_ace2_dep$moi_gvn_red)

two_color_ace2_dep2 <- two_color_ace2_dep %>% group_by(date, virus_label) %>% 
  summarize(log10_nir_red_diff = mean(log10_nir_red_diff), pct_grn_gvn_red = mean(pct_grn_gvn_red), pct_grn_gvn_nir = mean(pct_grn_gvn_nir), ratio_nir_mcherry = mean(ratio_nir_mcherry), .groups = "drop") %>% 
  mutate(n = 1, ace2dep_infection = 10^log10_nir_red_diff)

two_color_ace2_dep_summary <- two_color_ace2_dep2 %>% group_by(virus_label) %>% 
  summarize(mean_log10_nir_red_diff = mean(log10_nir_red_diff), sd_log10_nir_red_diff = sd(log10_nir_red_diff), n = sum(n), .groups = "drop") %>%
  mutate(se_log10_nir_red_diff = sd_log10_nir_red_diff / sqrt(n), geomean = 10^mean_log10_nir_red_diff, lower_ci = 10^(mean_log10_nir_red_diff - se_log10_nir_red_diff * 1.96), upper_ci = 10^(mean_log10_nir_red_diff + se_log10_nir_red_diff * 1.96), cells = "ACE2")

two_color_ace2_dep_raw <- data.frame("virus_label" = c(two_color_ace2_dep2$virus_label,two_color_ace2_dep2$virus_label),
                                  "variable" = c(rep("nir",nrow(two_color_ace2_dep2)),rep("mCherry",nrow(two_color_ace2_dep2))),
                                  "value" = c(two_color_ace2_dep2$pct_grn_gvn_nir,two_color_ace2_dep2$pct_grn_gvn_red))
diverse_virus_labels <- c("VSV", "Ebolavirus Zaire","Marburgvirus","Lassa fever virus","LCMV","Junin virus","MERS-CoV", "SARS-CoV", "WIV1-CoV", "SARS-CoV-2")

## Show the raw infectivities first
diverse_virus_subset_raw <- two_color_ace2_dep_raw %>% filter(virus_label %in% diverse_virus_labels)
diverse_virus_subset_raw$virus_label <- factor(diverse_virus_subset_raw$virus_label, levels = diverse_virus_labels)

Diverse_virus_subset_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), legend.position = "none") +
  scale_y_log10() + #limits = c(1e-1,2e3)) + 
  scale_color_manual(values = c("red","black")) +
  labs(x = element_blank(), y = "Percent GFP positive cells") +
  geom_jitter(data = diverse_virus_subset_raw, aes(x = virus_label, y = value, color = variable), position = position_dodge(width = 0.4), alpha = 0.4) +
  geom_hline(yintercept = 0.005, linetype = 2)
Diverse_virus_subset_plot

ggsave(file = "Plots/Diverse_virus_subset_plot.pdf", Diverse_virus_subset_plot, height = 3, width = 4.5)
ggsave(file = "Plots/Diverse_virus_subset_plot.png", Diverse_virus_subset_plot, height = 3, width = 4.5)

### Now look at the ACE2 dependent infection of each
diverse_virus_subset <- two_color_ace2_dep2 %>% filter(virus_label %in% diverse_virus_labels)
diverse_virus_subset$virus_label <- factor(diverse_virus_subset$virus_label, levels = diverse_virus_labels)
diverse_virus_subset_summary <- two_color_ace2_dep_summary %>% filter(virus_label %in% diverse_virus_labels)
diverse_virus_subset_summary$virus_label <- factor(diverse_virus_subset_summary$virus_label, levels = diverse_virus_labels)

## Incorporate a statistical test here as well
diverse_virus_panel_t_test <- data.frame("virus_label" =  rep(unique(diverse_virus_subset$virus_label)), "mean_value" = NA,"p_value" = NA, "significant" = NA)
for(x in 1:nrow(diverse_virus_panel_t_test)){
  temp_pseudovirus_env <- diverse_virus_panel_t_test$virus_label[x]
  temp_subset <- diverse_virus_subset %>% filter(virus_label == temp_pseudovirus_env)
  diverse_virus_panel_t_test$mean_value[x] <- mean(temp_subset$ace2dep_infection)
  temp_p_value <- round(t.test(temp_subset$ace2dep_infection,rep(1,nrow(temp_subset)), alternative = "two.sided")$p.value,4)
  diverse_virus_panel_t_test$p_value[x] <- temp_p_value
}
diverse_virus_panel_t_test$corrected_p_value <- p.adjust(diverse_virus_panel_t_test$p_value, method = 'BH')
diverse_virus_panel_t_test[diverse_virus_panel_t_test$corrected_p_value < 0.05 & diverse_virus_panel_t_test$mean_value > 1,"significant"] <- "yes"

## Now plot the data
Two_color_ACE2_dep_panel <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  scale_y_log10(limits = c(3e-1,2e3), expand = c(0,0), breaks = c(1,100)) + 
  labs(x = element_blank(), y = "Ratio of iRFP670 to\nmCherry infected cells") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_quasirandom(data = diverse_virus_subset, aes(x = virus_label, y = ace2dep_infection), position = position_jitter(width = 0.1), alpha = 0.2, size = 1) +
  geom_errorbar(data = diverse_virus_subset_summary, aes(x = virus_label, ymin = lower_ci, ymax = upper_ci), alpha = 0.6, width = 0.4) +
  geom_point(data = diverse_virus_subset_summary, aes(x = virus_label, y = geomean), size = 8, shape = 95) +
  #geom_point(data = diverse_virus_panel_t_test %>% filter(significant == "yes"), aes(x = virus_label, y = 1200), size = 1, shape = 8) +
  NULL
Two_color_ACE2_dep_panel

ggsave(file = "Plots/Two_color_ACE2_dep_panel.pdf", Two_color_ACE2_dep_panel, height = 1.3, width = 3)
ggsave(file = "Plots/Two_color_ACE2_dep_panel.png", Two_color_ACE2_dep_panel, height = 1.3, width = 3)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_3A.csv", diverse_virus_subset_summary, quote = FALSE, row.names = FALSE)
rbd_parsed_df <- read.delim(file = "Data/Alignment/211208_RBD_main_fig/RBDs_aligned_identity_matrix_items.tsv", header = F, stringsAsFactors = F, sep = "\t")
rbd_dist_matrix <- read.delim(file = "Data/Alignment/211208_RBD_main_fig/RBDs_aligned_identity_matrix.csv", header = F, stringsAsFactors = F, sep = ",")

colnames(rbd_dist_matrix) <- rbd_parsed_df$V1 #paste("c_",rbd_parsed_df$V1,sep="")
rownames(rbd_dist_matrix) <- rbd_parsed_df$V1 #paste("r_",rbd_parsed_df$V1,sep="")

rbd_dist_matrix_hclust <- hclust(as.dist(rbd_dist_matrix, diag = TRUE))

rbd_dist_matrix2 <- rbd_dist_matrix
rbd_dist_matrix2$species <- rownames(rbd_dist_matrix2)

rbd_dist_melted <- melt(rbd_dist_matrix2, id = "species")
rbd_dist_melted$variable <- as.character(rbd_dist_melted$variable)

rbd_dist_melted$species <- factor(rbd_dist_melted$species, levels = rbd_dist_matrix_hclust$labels)
rbd_dist_melted$variable <- factor(rbd_dist_melted$variable, levels = rbd_dist_matrix_hclust$labels)

rbd_identity_matrix_plot <- ggplot() + theme_bw() +
  theme(axis.text = element_text(size = 8), axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  scale_fill_continuous(low = "white", high = "black") +
  geom_tile(data = rbd_dist_melted, aes(x = species, y = variable, fill = value)) +
  geom_text(data = rbd_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens" & value <= 40), aes(x = species, y = variable, label = value), size = 2) +
  geom_text(data = rbd_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens" & value >= 40), aes(x = species, y = variable, label = value), size = 2, color = "grey70") +
  NULL
ggsave(file = "Plots/RBD_identity_matrix_plot.pdf", rbd_identity_matrix_plot, height = 5.1, width = 6)
rbd_identity_matrix_plot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_3B.csv", rbd_dist_melted, quote = FALSE, row.names = FALSE)
rbd_parsed_df <- read.delim(file = "Data/Alignment/211207_All_RBDs/All_RBDs_aligned_identity_matrix_items.tsv", header = F, stringsAsFactors = F, sep = "\t")
rbd_dist_matrix <- read.delim(file = "Data/Alignment/211207_All_RBDs/All_RBDs_aligned_identity_matrix.csv", header = F, stringsAsFactors = F, sep = ",")

colnames(rbd_dist_matrix) <- rbd_parsed_df$V1 #paste("c_",rbd_parsed_df$V1,sep="")
rownames(rbd_dist_matrix) <- rbd_parsed_df$V1 #paste("r_",rbd_parsed_df$V1,sep="")

rbd_dist_matrix_hclust <- hclust(as.dist(rbd_dist_matrix, diag = TRUE))

rbd_dist_matrix2 <- rbd_dist_matrix
rbd_dist_matrix2$species <- rownames(rbd_dist_matrix2)

rbd_dist_melted <- melt(rbd_dist_matrix2, id = "species")
rbd_dist_melted$variable <- as.character(rbd_dist_melted$variable)

rbd_dist_melted$species <- factor(rbd_dist_melted$species, levels = rbd_dist_matrix_hclust$labels)
rbd_dist_melted$variable <- factor(rbd_dist_melted$variable, levels = rbd_dist_matrix_hclust$labels)

rbd_identity_matrix_plot <- ggplot() + theme_bw() +
  theme(axis.text = element_text(size = 8), axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  scale_fill_continuous(low = "white", high = "black") +
  geom_tile(data = rbd_dist_melted, aes(x = species, y = variable, fill = value)) +
  geom_text(data = rbd_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens" & value <= 40), aes(x = species, y = variable, label = value), size = 2) +
  geom_text(data = rbd_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens" & value >= 40), aes(x = species, y = variable, label = value), size = 2, color = "grey70") +
  NULL
#rbd_identity_matrix_plot
ggsave(file = "Plots/All_RBD_identity_matrix_plot.pdf", rbd_identity_matrix_plot, height = 5.3*2, width = 6*2)


clade_frame <- data.frame("species" = c("Yunnan2011__C.plicata","Rf4092__Rhinolophus_ferrumequium","ZXC21__Rhinolophus_sinicus","279-2005_R.macrotis","Rs4237__Rhinolophus_sinicus","Rp3__Rhinolophus_pearsonii","JL2012__Rhinolophus_ferrumequinum","73-2005__Rhinolophus_ferrumequium","Rf1__Rhinolophus_ferrumequium","HeB2013__Rhinolophus_ferrumequium","HuB2013__Rhinolophus_sinicus","Rs806/2006__Rhinolophus_sinicus","tRI-SC2018__Rhinolophus","Shaanxi2011__Rhinolophus_pusillus","Rs4081__Rhinolophus_sinicus","Rs4237__Rhinolophus_sinicus",
"YN2013__Rhinolophus_sinicus","ZC45__R.sinicus","SARS_CoV-2__Homo_sapiens","RaTG13__Rhinolophus_affinis",
"Rs4084__Rhinolophus_sinicus","Rs4231__Rhinolophus_sinicus","RsSHC014__Rhinolophus_sinicus","SARS_CoV__Homo_sapiens",
"RaTG15__Rhinolophus_affinis","LYRa11__Rhinolophus_affinis","Rs7327__Rhinolophus_sinicus","WIV1__Rhinolophus_sinicus",
"RhGB01__Rhinolophus_hipposideros","Khosta2__Rhinolophus_hipposideros","BtKY72__Rhinolophus","Khosta1__Rhinolophus_ferrumequinum",
"BB9904__Rhinolophus_euryale","BM48-31__Rhinolophus_blasii","PRD-2386__Rhinolophus","PRD-0038__Rhinolophus"), "clade_species" = c(2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,3,3))
clade_frame$variable <- clade_frame$species
clade_frame$clade_variable <- clade_frame$clade_species

rbd_dist_melted2 <- merge(rbd_dist_melted, clade_frame[,c("species","clade_species")], by = "species")
rbd_dist_melted3 <- merge(rbd_dist_melted2, clade_frame[,c("variable","clade_variable")], by = "variable")

Clade_difference_plot <- ggplot() + theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + 
  labs(x = "Clade", y = "Amino acid\ndifferences") +
  scale_y_continuous(breaks = c(0,50,100), limits = c(0,100)) +
  geom_quasirandom(data = rbd_dist_melted3, aes(x = clade_species, y = value), alpha = 0.1, size = 0.25) + facet_grid(cols = vars(clade_variable))
ggsave(file = "Plots/Clade_difference_plot.pdf", Clade_difference_plot, height = 1.2, width = 2.5)
Clade_difference_plot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_3C.csv", rbd_dist_melted3, quote = FALSE, row.names = FALSE)
write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_2.csv", rbd_dist_melted3, quote = FALSE, row.names = FALSE)
supptable <- read.csv(file = "Supp_tables/S1_Table.csv") %>% filter(cys != 1 & clade != 5)

#supptable2 <- merge(supptable[,c("name","cys")], clade_labels_key, by = "name")

clade_cys_table <- data.frame(table(supptable[,c("cys","clade")]))

clade_cys_table$cys = factor(clade_cys_table$cys, levels = c(2,0))

Cysteine_plot <- ggplot() + labs(x = "Clade", y = "Cysteines") +
  scale_fill_gradient(low = "white", high = "red") + 
  geom_tile(data = clade_cys_table, aes(x = clade, y = cys, fill = Freq)) +
  geom_text(data = clade_cys_table, aes(x = clade, y = cys, label = Freq))
Cysteine_plot

ggsave(file = "Plots/Cysteine_plot.pdf", Cysteine_plot, height = 0.9, width = 2)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_3E.csv", clade_cys_table, quote = FALSE, row.names = FALSE)

Now looking at infection data with the panel of RBDs

rbd_label_levels <- c("WIV1 RBD", "SARS-CoV-2 RBD", "Rp3 RBD", "Rs4237 RBD", "YN2013 RBD","BtKY72 RBD", "BM48-31 RBD", "BB9904 RBD")

rbd_subset_raw <- two_color_ace2_dep_raw %>% filter(virus_label %in% rbd_label_levels)
rbd_subset_raw$virus_label <- factor(rbd_subset_raw$virus_label, levels = rbd_label_levels)

Rbd_subset_raw_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), legend.position = "none") +
  scale_y_log10() + #limits = c(1e-1,2e3)) + 
  scale_color_manual(values = c("red","black")) +
  labs(x = element_blank(), y = "Percent GFP positive cells") +
  geom_jitter(data = rbd_subset_raw, aes(x = virus_label, y = value, color = variable), position = position_dodge(width = 0.4), alpha = 0.4) +
  geom_hline(yintercept = 0.005, linetype = 2)
Rbd_subset_raw_plot

ggsave(file = "Plots/Rbd_subset_raw_plot.pdf", Rbd_subset_raw_plot, height = 3, width = 4.5)

rbd_subset2 <- two_color_ace2_dep2 %>% filter(virus_label %in% rbd_label_levels)
rbd_subset2$virus_label <- factor(rbd_subset2$virus_label, levels = rbd_label_levels)

rbd_subset_summary <- two_color_ace2_dep_summary %>% filter(virus_label %in% rbd_label_levels)
rbd_subset_summary$virus_label <- factor(rbd_subset_summary$virus_label, levels = rbd_label_levels)


## Incorporate a statistical test here as well
rbd_panel_t_test <- data.frame("virus_label" =  rep(unique(rbd_subset2$virus_label)), "mean_value" = NA,"p_value" = NA, "significant" = NA)
for(x in 1:nrow(rbd_panel_t_test)){
  temp_pseudovirus_env <- rbd_panel_t_test$virus_label[x]
  temp_subset <- rbd_subset2 %>% filter(virus_label == temp_pseudovirus_env)
  rbd_panel_t_test$mean_value[x] <- mean(temp_subset$ace2dep_infection)
  temp_p_value <- round(t.test(temp_subset$ace2dep_infection,rep(1,nrow(temp_subset)), alternative = "two.sided")$p.value,4)
  rbd_panel_t_test$p_value[x] <- temp_p_value
}
rbd_panel_t_test$corrected_p_value <- p.adjust(rbd_panel_t_test$p_value, method = 'BH')
rbd_panel_t_test[rbd_panel_t_test$corrected_p_value < 0.05 & rbd_panel_t_test$mean_value > 2,"significant"] <- "yes"


## Plotting the RBD data
Rbd_subset_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  scale_y_log10(limits = c(3e-1,5e3), expand = c(0,0)) + 
  labs(x = element_blank(), y = "Fold human ACE2\n-dependent infection") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_quasirandom(data = rbd_subset2, aes(x = virus_label, y = ace2dep_infection), position = position_jitter(width = 0.1), alpha = 0.2, size = 1) +
  geom_errorbar(data = rbd_subset_summary, aes(x = virus_label, ymin = lower_ci, ymax = upper_ci), alpha = 0.6, width = 0.4) +
  geom_point(data = rbd_subset_summary, aes(x = virus_label, y = geomean), size = 8, shape = 95) +
  geom_point(data = rbd_panel_t_test %>% filter(significant == "yes"), aes(x = virus_label, y = 3000), size = 1.5, shape = 8) +
  NULL
Rbd_subset_plot

ggsave(file = "Plots/RBD_subset_plot.pdf", Rbd_subset_plot, height = 1.75, width = 2.5)
ggsave(file = "Plots/RBD_subset_plot.png", Rbd_subset_plot, height = 1.75, width = 2.5)

rbd_subset2$cells <- "ACE2"
rbd_subset2$dep_infection <- rbd_subset2$ace2dep_infection

rbd_subset_summary$cells <- "ACE2"

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_3H.csv", rbd_subset_summary, quote = FALSE, row.names = FALSE)

OK, so maybe we can increase our sensitivity by adding TMPRSS2 into the mix

two_color_ace2_tmprss2_dep <- two_color_combined %>% filter(expt == "Two_color_ACE2_TMPRSS2" & pseudovirus_env != "none")
two_color_ace2_tmprss2_dep$index <- seq(1,nrow(two_color_ace2_tmprss2_dep))
two_color_ace2_tmprss2_dep$log10_nir_red_diff <- log10(two_color_ace2_tmprss2_dep$moi_gvn_nir) - log10(two_color_ace2_tmprss2_dep$moi_gvn_red)

two_color_ace2_tmprss2_dep2 <- two_color_ace2_tmprss2_dep %>% group_by(date, virus_label) %>% 
  summarize(log10_nir_red_diff = mean(log10_nir_red_diff), pct_grn_gvn_red = mean(pct_grn_gvn_red), pct_grn_gvn_nir = mean(pct_grn_gvn_nir), .groups = "drop") %>% 
  mutate(ace2_tmprss2_dep_infection = 10^log10_nir_red_diff, n = 1)

two_color_ace2_tmprss2_dep_summary <- two_color_ace2_tmprss2_dep2 %>% group_by(virus_label) %>% 
  summarize(mean_log10_nir_red_diff = mean(log10_nir_red_diff), sd_log10_nir_red_diff = sd(log10_nir_red_diff), n = sum(n), .groups = "drop") %>%
  mutate(se_log10_nir_red_diff = sd_log10_nir_red_diff / sqrt(n), geomean = 10^mean_log10_nir_red_diff, lower_ci = 10^(mean_log10_nir_red_diff - se_log10_nir_red_diff * 1.96), upper_ci = 10^(mean_log10_nir_red_diff + se_log10_nir_red_diff * 1.96), cells = "ACE2")

## Compare the control viruses with ACE2 dependent enhancement or ACE2 + TMPRSS2 dependent enhancement
two_color_ace2_dep_summary_controls <- two_color_ace2_dep_summary %>% filter(virus_label %in% c("Junin virus", "SARS-CoV", "SARS[SARS-CoV-2 RBD]"))
two_color_ace2_tmprss2_dep_summary_controls <- two_color_ace2_tmprss2_dep_summary %>% filter(virus_label %in% c("Junin virus", "SARS-CoV", "SARS[SARS-CoV-2 RBD]"))
ace2_with_tmprss2_controls <- merge(two_color_ace2_dep_summary_controls[,c("virus_label","geomean")], two_color_ace2_tmprss2_dep_summary_controls[,c("virus_label","geomean")], by = "virus_label")
colnames(ace2_with_tmprss2_controls) <- c("virus_label","ace2","ace2tmprss2")

## Weird. Maybe the values were maxed out or something?
two_color_ace2_tmprss2_dep2_rbds <- two_color_ace2_tmprss2_dep2 %>% filter(virus_label %in% rbd_label_levels)
two_color_ace2_tmprss2_dep2_rbds$virus_label <- factor(two_color_ace2_tmprss2_dep2_rbds$virus_label, levels = rbd_label_levels)

two_color_ace2_tmprss2_dep_summary_rbds<- two_color_ace2_tmprss2_dep_summary %>% filter(virus_label %in% rbd_label_levels)
two_color_ace2_tmprss2_dep_summary_rbds$virus_label <- factor(two_color_ace2_tmprss2_dep_summary_rbds$virus_label, levels = rbd_label_levels)

ACE2_TMPRSS2_RBDs_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  scale_y_log10(limits = c(3e-1,1e3)) + 
  labs(x = element_blank(), y = "Ratio of mCherry to iRFP670 infected cells") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_quasirandom(data = two_color_ace2_tmprss2_dep2_rbds, aes(x = virus_label, y = ace2_tmprss2_dep_infection), position = position_jitter(width = 0.1), alpha = 0.4) +
  geom_errorbar(data = two_color_ace2_tmprss2_dep_summary_rbds, aes(x = virus_label, ymin = lower_ci, ymax = upper_ci), alpha = 0.6, width = 0.4) +
  geom_point(data = two_color_ace2_tmprss2_dep_summary_rbds, aes(x = virus_label, y = geomean), size = 8, shape = 95)
ACE2_TMPRSS2_RBDs_plot

ggsave(file = "Plots/ACE2_TMPRSS2_RBDs_plot.pdf", ACE2_TMPRSS2_RBDs_plot, height = 2, width = 4.5)
ggsave(file = "Plots/ACE2_TMPRSS2_RBDs_plot.png", ACE2_TMPRSS2_RBDs_plot, height = 2, width = 4.5)

two_color_ace2_tmprss2_dep2_rbds$cells <- "ACE2-2A-TMPRSS2"
two_color_ace2_tmprss2_dep2_rbds$dep_infection <- two_color_ace2_tmprss2_dep2_rbds$ace2_tmprss2_dep_infection

two_color_ace2_tmprss2_dep_summary_rbds$cells <- "ACE2-2A-TMPRSS2"
rbds_both_dataset <- rbind(rbd_subset2[,c("virus_label","cells","dep_infection")], two_color_ace2_tmprss2_dep2_rbds[,c("virus_label","cells","dep_infection")])

rbds_both_dataset_summary <- rbind(rbd_subset_summary[,c("virus_label","cells","geomean","lower_ci","upper_ci")], two_color_ace2_tmprss2_dep_summary_rbds[,c("virus_label","cells","geomean","lower_ci","upper_ci")])

ACE2_ACE2TMPRSS2_bothplot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), legend.position = "none", panel.grid.minor = element_blank()) +
  scale_y_log10(limits = c(3e-1,1e3)) + 
  labs(x = element_blank(), y = "Ratio of mCherry to iRFP670 infected cells") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_jitter(data = rbds_both_dataset, aes(x = virus_label, y = dep_infection, color = cells), alpha = 0.4, position = position_dodge(width = 0.8)) +
  geom_errorbar(data = rbds_both_dataset_summary, aes(x = virus_label, ymin = lower_ci, ymax = upper_ci, color = cells), alpha = 0.4, width = 0.2, position = position_dodge(width = 0.8)) +
  geom_point(data = rbds_both_dataset_summary, aes(x = virus_label, y = geomean, color = cells), size = 8, shape = 95, position = position_dodge(width = 0.8))
ACE2_ACE2TMPRSS2_bothplot

ggsave(file = "Plots/ACE2_ACE2TMPRSS2_bothplot.pdf", ACE2_ACE2TMPRSS2_bothplot, height = 2, width = 4)
ggsave(file = "Plots/ACE2_ACE2TMPRSS2_bothplot.png", ACE2_ACE2TMPRSS2_bothplot, height = 2, width = 4)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_3C.csv", rbds_both_dataset_summary, quote = FALSE, row.names = FALSE)

two_color_ace2_dep_summary_rbds <- two_color_ace2_dep_summary %>% filter(virus_label %in% c("Junin virus", "SARS-CoV","WIV1 RBD", "SARS-CoV-2 RBD", "Rp3 RBD", "YN2013 RBD", "BM48-31 RBD", "BB9904 RBD", "Rs4237 RBD", "BtKY72 RBD"))
two_color_ace2_tmprss2_dep_summary_rbds <- two_color_ace2_tmprss2_dep_summary %>% filter(virus_label %in% c("Junin virus", "SARS-CoV","WIV1 RBD", "SARS-CoV-2 RBD", "Rp3 RBD", "YN2013 RBD","BM48-31 RBD", "BB9904 RBD", "Rs4237 RBD", "BtKY72 RBD"))
ace2_with_tmprss2_rbds <- merge(two_color_ace2_dep_summary_rbds[,c("virus_label","geomean")], two_color_ace2_tmprss2_dep_summary_rbds[,c("virus_label","geomean")], by = "virus_label")
colnames(ace2_with_tmprss2_rbds) <- c("virus_label","ace2","ace2tmprss2")

ACE2_ACE2TMPRSS2_scatterplot <- ggplot() + theme_bw() + 
  scale_x_log10(limits = c(0.3,300), breaks = c(1,10,100)) + 
  scale_y_log10(limits = c(0.3,100), breaks = c(1,10,100)) +
  geom_abline(slope = 1, intercept = 0, alpha = 0.2, size = 4) +
  geom_text_repel(data = ace2_with_tmprss2_rbds, aes(x = ace2, y = ace2tmprss2, label = virus_label), color = "red", segment.color = "orange", size = 2) +
  geom_point(data = ace2_with_tmprss2_rbds, aes(x = ace2, y = ace2tmprss2), alpha = 0.5)
ACE2_ACE2TMPRSS2_scatterplot

ggsave(file = "Plots/ACE2_ACE2TMPRSS2_scatterplot.pdf", ACE2_ACE2TMPRSS2_scatterplot, height = 1.5, width = 2)
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(file = "Plots/ACE2_ACE2TMPRSS2_scatterplot.png", ACE2_ACE2TMPRSS2_scatterplot, height = 1.5, width = 2)
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
btky72_fold_increase <- as.numeric(rbds_both_dataset_summary[rbds_both_dataset_summary$virus_label == "BtKY72 RBD" & rbds_both_dataset_summary$cells == "ACE2-2A-TMPRSS2","geomean"])
btky72_label_levels <- c("Junin virus", "BtKY72 RBD", "BtKY72 1-539", "BtKY72 1-888", "BtKY72 1-955")

btky72_subset_raw <- two_color_ace2_dep_raw %>% filter(virus_label %in% btky72_label_levels)
btky72_subset_raw$virus_label <- factor(btky72_subset_raw$virus_label, levels = btky72_label_levels)

btky72_subset_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), legend.position = "none", panel.grid.minor = element_blank()) +
  scale_y_log10() + #limits = c(1e-1,2e3)) + 
  scale_color_manual(values = c("red","black")) +
  labs(x = element_blank(), y = "Percent GFP positive cells") +
  geom_jitter(data = btky72_subset_raw, aes(x = virus_label, y = value, color = variable), position = position_dodge(width = 0.4), alpha = 0.4) +
  geom_hline(yintercept = 0.005, linetype = 2)
btky72_subset_plot

ggsave(file = "Plots/btky72_subset_plot.pdf", btky72_subset_plot, height = 3, width = 4.5)

btky72_subset <- two_color_ace2_dep2 %>% filter(virus_label %in% btky72_label_levels)
btky72_subset$virus_label <- factor(btky72_subset$virus_label, levels = btky72_label_levels)

btky72_subset_summary <- two_color_ace2_dep_summary %>% filter(virus_label %in% btky72_label_levels)
btky72_subset_summary$virus_label <- factor(btky72_subset_summary$virus_label, levels = btky72_label_levels)

Ace2_btky72_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), panel.grid.minor = element_blank()) +
  scale_y_log10(limits = c(0.3,100), breaks = c(1,10,100)) + 
  labs(x = element_blank(), y = "Ratio of iRFP670 to mCherry infected cells") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  #geom_hline(yintercept = btky72_fold_increase, linetype = 2, alpha = 0.4, color = "red") +
  geom_quasirandom(data = btky72_subset, aes(x = virus_label, y = ace2dep_infection), position = position_jitter(width = 0.1), alpha = 0.4) +
  geom_errorbar(data = btky72_subset_summary, aes(x = virus_label, ymin = lower_ci, ymax = upper_ci), alpha = 0.4, width = 0.2) +
  geom_point(data = btky72_subset_summary, aes(x = virus_label, y = geomean), size = 8, shape = 95)
Ace2_btky72_plot

ggsave(file = "Plots/Ace2_btky72_plot.pdf", Ace2_btky72_plot, height = 1.75, width = 4)
ggsave(file = "Plots/Ace2_btky72_plot.png", Ace2_btky72_plot, height = 1.75, width = 4)
btky72_label_levels2 <- c("Junin virus", "BtKY72 RBD", "BtKY72 RBD [Y488H]", "BtKY72 RBD [T499E]", "BtKY72 1-539", "BtKY72 1-888", "BtKY72 1-955", "BtKY72 1-120;329-539", "BtKY72 112-206;329-539", "BtKY72 214-328;329-539")

two_color_ace2_tmprss2_dep2_btky72 <- two_color_ace2_tmprss2_dep2 %>% filter(virus_label %in% btky72_label_levels2) 
two_color_ace2_tmprss2_dep2_btky72$virus_label <- factor(two_color_ace2_tmprss2_dep2_btky72$virus_label, levels = btky72_label_levels2)

two_color_ace2_tmprss2_dep_summary_btky72 <- two_color_ace2_tmprss2_dep_summary %>% filter(virus_label %in% btky72_label_levels2) 
two_color_ace2_tmprss2_dep_summary_btky72$virus_label <- factor(two_color_ace2_tmprss2_dep_summary_btky72$virus_label, levels = btky72_label_levels2)

ACE2_TMPRSS2_btky72_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), panel.grid.minor.x = element_blank()) +
  scale_y_log10(limits = c(3e-1,1e2)) + 
  labs(x = element_blank(), y = "Ratio of mCherry to iRFP670 infected cells") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_quasirandom(data = two_color_ace2_tmprss2_dep2_btky72, aes(x = virus_label, y = ace2_tmprss2_dep_infection), position = position_jitter(width = 0.1), alpha = 0.4) +
  geom_errorbar(data = two_color_ace2_tmprss2_dep_summary_btky72, aes(x = virus_label, ymin = lower_ci, ymax = upper_ci), alpha = 0.4, width = 0.2) +
  geom_point(data = two_color_ace2_tmprss2_dep_summary_btky72, aes(x = virus_label, y = geomean), size = 12, shape = 95)
ACE2_TMPRSS2_btky72_plot

ggsave(file = "Plots/ACE2_TMPRSS2_btky72_plot.pdf", ACE2_TMPRSS2_btky72_plot, height = 2, width = 3.5)
ggsave(file = "Plots/ACE2_TMPRSS2_btky72_plot.png", ACE2_TMPRSS2_btky72_plot, height = 2, width = 3.5)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_9.csv", two_color_ace2_tmprss2_dep_summary_btky72, quote = FALSE, row.names = FALSE)

two_color_ace2_dep_summary_btky72_segments <- two_color_ace2_dep_summary %>% filter(virus_label %in% c("Junin virus", "BtKY72 RBD", "BtKY72 1-539", "BtKY72 1-888", "BtKY72 1-955","BtKY72 1-539", "BtKY72 1-888", "BtKY72 1-955", "BtKY72 1-120;329-539", "BtKY72 112-206;329-539", "BtKY72 214-328;329-539"))
two_color_ace2_tmprss2_dep_summary_btky72_segments <- two_color_ace2_tmprss2_dep_summary %>% filter(virus_label %in% c("Junin virus", "BtKY72 RBD", "BtKY72 1-539", "BtKY72 1-888", "BtKY72 1-955","BtKY72 1-539", "BtKY72 1-888", "BtKY72 1-955", "BtKY72 1-120;329-539", "BtKY72 112-206;329-539", "BtKY72 214-328;329-539"))
ace2_with_tmprss2_btky72_segments <- merge(two_color_ace2_dep_summary_btky72_segments[,c("virus_label","geomean")], two_color_ace2_tmprss2_dep_summary_btky72_segments[,c("virus_label","geomean")], by = "virus_label")
colnames(ace2_with_tmprss2_btky72_segments) <- c("virus_label","ace2","ace2tmprss2")

ACE2_ACE2TMPRSS2_BtKY72_subpanel_scatterplot <- ggplot() + theme_bw() + scale_x_log10() + scale_y_log10() +
  geom_abline(slope = 1, intercept = 0, alpha = 0.2, size = 4) +
  geom_text_repel(data = ace2_with_tmprss2_btky72_segments, aes(x = ace2, y = ace2tmprss2, label = virus_label), color = "red", segment.color = "orange") +
  geom_point(data = ace2_with_tmprss2_btky72_segments, aes(x = ace2, y = ace2tmprss2))
ggsave(file = "Plots/ACE2_ACE2TMPRSS2_BtKY72_subpanel_scatterplot.pdf", ACE2_ACE2TMPRSS2_BtKY72_subpanel_scatterplot, height = 4, width = 4)
ggsave(file = "Plots/ACE2_ACE2TMPRSS2_BtKY72_subpanel_scatterplot.png", ACE2_ACE2TMPRSS2_BtKY72_subpanel_scatterplot, height = 4, width = 4)
ACE2_ACE2TMPRSS2_BtKY72_subpanel_scatterplot

ACE2_TMPRSS2_btky72_mut_plot <- ggplot() + 
 theme_bw() + theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  scale_y_log10(limits = c(3e-1,2e3)) + 
  labs(x = element_blank(), y = "Ratio of mCherry to iRFP670 infected cells") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_quasirandom(data = two_color_ace2_tmprss2_dep2_btky72 %>% filter(virus_label %in% c("BtKY72 RBD", "BtKY72 RBD [Y488H]", "BtKY72 RBD [T499E]")), aes(x = virus_label, y = ace2_tmprss2_dep_infection), position = position_jitter(width = 0.1), alpha = 0.2, size = 1) +
  geom_errorbar(data = two_color_ace2_tmprss2_dep_summary_btky72 %>% filter(virus_label %in% c("BtKY72 RBD", "BtKY72 RBD [Y488H]", "BtKY72 RBD [T499E]")), aes(x = virus_label, ymin = lower_ci, ymax = upper_ci), alpha = 0.6, width = 0.4) +
  geom_point(data = two_color_ace2_tmprss2_dep_summary_btky72 %>% filter(virus_label %in% c("BtKY72 RBD", "BtKY72 RBD [Y488H]", "BtKY72 RBD [T499E]")), aes(x = virus_label, y = geomean), size = 12, shape = 95)
ACE2_TMPRSS2_btky72_mut_plot

ggsave(file = "Plots/ACE2_TMPRSS2_btky72_mut_plot.pdf", ACE2_TMPRSS2_btky72_mut_plot, height = 1.85, width = 1.3)
ggsave(file = "Plots/ACE2_TMPRSS2_btky72_mut_plot.png", ACE2_TMPRSS2_btky72_mut_plot, height = 1.85, width = 1.3)

temp_data_for_export <- two_color_ace2_tmprss2_dep_summary_btky72 %>% filter(virus_label %in% c("BtKY72 RBD", "BtKY72 RBD [Y488H]", "BtKY72 RBD [T499E]"))

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_3H_right.csv", temp_data_for_export, quote = FALSE, row.names = FALSE)
ace2_parsed_df <- read.delim(file = "Data/Alignment/Bat_ACE2_aligned_identity_matrix_items.tsv", header = F, stringsAsFactors = F, sep = "\t")
ace2_dist_matrix <- read.delim(file = "Data/Alignment/Bat_ACE2_aligned_identity_matrix.csv", header = F, stringsAsFactors = F, sep = ",")

colnames(ace2_dist_matrix) <- ace2_parsed_df$V1 #paste("c_",ace2_parsed_df$V1,sep="")
rownames(ace2_dist_matrix) <- ace2_parsed_df$V1 #paste("r_",ace2_parsed_df$V1,sep="")

ace2_dist_matrix_hclust <- hclust(as.dist(ace2_dist_matrix, diag = TRUE))

ace2_dist_matrix2 <- ace2_dist_matrix
ace2_dist_matrix2$species <- rownames(ace2_dist_matrix2)

ace2_dist_melted <- melt(ace2_dist_matrix2, id = "species")
ace2_dist_melted$variable <- as.character(ace2_dist_melted$variable)

ace2_dist_melted$species <- factor(ace2_dist_melted$species, levels = ace2_dist_matrix_hclust$labels)
ace2_dist_melted$variable <- factor(ace2_dist_melted$variable, levels = ace2_dist_matrix_hclust$labels)

ACE2_identity_matrix_plot <- ggplot() + theme_bw() +
  theme(axis.text = element_text(size = 8), axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  scale_fill_continuous(low = "white", high = "black") +
  geom_tile(data = ace2_dist_melted, aes(x = species, y = variable, fill = value)) +
  geom_text(data = ace2_dist_melted %>% filter(variable != "H.sapiens" & value <= 40), aes(x = species, y = variable, label = value), size = 2) +
  geom_text(data = ace2_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens" & value >= 40), aes(x = species, y = variable, label = value), size = 2, color = "grey90") +
  geom_text(data = ace2_dist_melted %>% filter(species == "H.sapiens" & value >= 40), aes(x = species, y = variable, label = value), size = 1.5, color = "grey90") +   geom_text(data = ace2_dist_melted %>% filter(variable == "H.sapiens" & value >= 40), aes(x = species, y = variable, label = value), size = 1.5, color = "grey90") +
  NULL
ggsave(file = "Plots/ACE2_identity_matrix_plot.pdf", ACE2_identity_matrix_plot, height = 2.3, width = 3.2)
ACE2_identity_matrix_plot

ace2_length <- nchar(ace2_parsed_df$V2[1])
ace2_allele_num <- nrow(ace2_parsed_df)

amino_acid_matrix = matrix(nrow = ace2_length, ncol = ace2_allele_num)

for(x in 1:ace2_length){
  for(y in 1:ace2_allele_num){
    amino_acid_matrix[x,y] <- substr(ace2_parsed_df$V2[y],x,x)
  }
}

df_amino_acid_matrix <- data.frame(amino_acid_matrix)
colnames(df_amino_acid_matrix) <- ace2_parsed_df$V1

df_amino_acid_matrix$unique_residue <- "X"

for(x in 1:nrow(df_amino_acid_matrix)){df_amino_acid_matrix$unique_residue[x] <- gsub("[^A-Z]","",toString(unique(unlist(df_amino_acid_matrix[x,2:9]))))}
df_amino_acid_matrix$amino_acids <- nchar(df_amino_acid_matrix$unique_residue)
df_amino_acid_matrix$differences <- nchar(df_amino_acid_matrix$unique_residue) -1
df_amino_acid_matrix$position <- as.numeric(rownames(df_amino_acid_matrix))

Ace2_differences_linear_seq_plot <- ggplot() + theme_bw() + theme(panel.grid.minor.y = element_blank()) + 
  scale_x_continuous(expand = c(0,0)) + scale_y_continuous(breaks = c(1,2,3), limits = c(0,4), expand = c(0,0)) +
  labs(x = "Position along ACE2", y = "Number of\ndifferences") +
  geom_point(data = df_amino_acid_matrix %>% filter(differences >= 1), aes(x = position, y = differences), alpha = 0.4, size = 2) +
  geom_segment(aes(x = 1, xend = 595, y = 0, yend = 0), size = 3) +
  geom_segment(aes(x = 596, xend = 804, y = 0, yend = 0), size = 3, color = "red")
ggsave(file = "Plots/Ace2_differences_linear_seq_plot.pdf", Ace2_differences_linear_seq_plot, height = 1, width = 1.75)
Ace2_differences_linear_seq_plot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_4A.csv", df_amino_acid_matrix, quote = FALSE, row.names = FALSE)

subset(df_amino_acid_matrix, amino_acids >= 4)
##     H.sapiens R.ferrumequinum R.alcyone R.landeri R.sinicus_472 R.sinicus_215
## 24          Q               L         L         L             E             R
## 31          K               D         N         D             K             E
## 34          H               S         S         S             T             S
## 568         L               S         S         S             E             K
## 667         E               E         E         E             A             A
##     R.sinicus_200 R.affinis R.pearsonii unique_residue amino_acids differences
## 24              L         R           Q           LERQ           4           3
## 31              E         N           K           DNKE           4           3
## 34              F         R           R           STFR           4           3
## 568             K         V           V           SEKV           4           3
## 667             A         V           D           EAVD           4           3
##     position
## 24        24
## 31        31
## 34        34
## 568      568
## 667      667
subset(df_amino_acid_matrix, amino_acids >= 3 & position < 400)
##     H.sapiens R.ferrumequinum R.alcyone R.landeri R.sinicus_472 R.sinicus_215
## 24          Q               L         L         L             E             R
## 27          T               K         I         T             I             T
## 31          K               D         N         D             K             E
## 34          H               S         S         S             T             S
## 35          E               E         E         A             K             E
## 38          D               N         N         N             D             N
## 75          E               K         T         T             E             E
## 91          L               D         D         D             V             V
## 215         Y               L         L         L             P             P
## 223         I               I         T         T             M             M
## 301         A               N         T         T             G             G
## 305         Q               K         K         K             D             D
##     R.sinicus_200 R.affinis R.pearsonii unique_residue amino_acids differences
## 24              L         R           Q           LERQ           4           3
## 27              I         I           I            KIT           3           2
## 31              E         N           K           DNKE           4           3
## 34              F         R           R           STFR           4           3
## 35              E         E           E            EAK           3           2
## 38              N         E           D            NDE           3           2
## 75              E         E           E            KTE           3           2
## 91              V         V           S            DVS           3           2
## 215             P         S           P            LPS           3           2
## 223             M         M           M            ITM           3           2
## 301             G         G           G            NTG           3           2
## 305             D         N           N            KDN           3           2
##     position
## 24        24
## 27        27
## 31        31
## 34        34
## 35        35
## 38        38
## 75        75
## 91        91
## 215      215
## 223      223
## 301      301
## 305      305
## Figure out which positions differ between alcyone and landeri
alcyone_landeri_frame <- df_amino_acid_matrix[,c("R.alcyone","R.landeri")]
for(x in 1:nrow(alcyone_landeri_frame)){alcyone_landeri_frame$unique_residue[x] <- gsub("[^A-Z]","",toString(unique(unlist(alcyone_landeri_frame[x,1:2]))))}
alcyone_landeri_frame$amino_acids <- nchar(alcyone_landeri_frame$unique_residue)
alcyone_landeri_frame$differences <- nchar(alcyone_landeri_frame$unique_residue) -1
alcyone_landeri_frame$position <- as.numeric(rownames(alcyone_landeri_frame))

alcyone_landeri_frame_diff <- alcyone_landeri_frame %>% filter(differences > 0)
alcyone_landeri_frame_diff
##    R.alcyone R.landeri unique_residue amino_acids differences position
## 1          A         T             AT           2           1       15
## 2          I         T             IT           2           1       27
## 3          N         D             ND           2           1       31
## 4          E         A             EA           2           1       35
## 5          H         Y             HY           2           1       41
## 6          T         A             TA           2           1      246
## 7          A         G             AG           2           1      286
## 8          Y         H             YH           2           1      613
## 9          L         S             LS           2           1      656
## 10         N         D             ND           2           1      720
paste("select alcyone_diffs, modeller_alcyone and resi",gsub(", ","+",toString(unlist(alcyone_landeri_frame_diff$position))))
## [1] "select alcyone_diffs, modeller_alcyone and resi 15+27+31+35+41+246+286+613+656+720"
ace2_tested_parsed_df <- read.delim(file = "Data/Alignment/Bat_tested_ACE2_aligned_identity_matrix_items.tsv", header = F, stringsAsFactors = F, sep = "\t")
ace2_tested_dist_matrix <- read.delim(file = "Data/Alignment/Bat_tested_ACE2_aligned_identity_matrix.csv", header = F, stringsAsFactors = F, sep = ",")

colnames(ace2_tested_dist_matrix) <- ace2_tested_parsed_df$V1 #paste("c_",ace2_tested_parsed_df$V1,sep="")
rownames(ace2_tested_dist_matrix) <- ace2_tested_parsed_df$V1 #paste("r_",ace2_tested_parsed_df$V1,sep="")

ace2_tested_dist_matrix_hclust <- hclust(as.dist(ace2_tested_dist_matrix, diag = TRUE))

ace2_tested_dist_matrix2 <- ace2_tested_dist_matrix
ace2_tested_dist_matrix2$species <- rownames(ace2_tested_dist_matrix2)

ace2_tested_dist_melted <- melt(ace2_tested_dist_matrix2, id = "species")
ace2_tested_dist_melted$variable <- as.character(ace2_tested_dist_melted$variable)

ace2_tested_dist_melted$species <- factor(ace2_tested_dist_melted$species, levels = ace2_tested_dist_matrix_hclust$labels)
ace2_tested_dist_melted$variable <- factor(ace2_tested_dist_melted$variable, levels = ace2_tested_dist_matrix_hclust$labels)

ACE2_tested_identity_matrix_plot <- ggplot() + theme_bw() +
  theme(axis.text = element_text(size = 8), axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  scale_fill_continuous(low = "white", high = "black") +
  geom_tile(data = ace2_tested_dist_melted, aes(x = species, y = variable, fill = value)) +
  geom_text(data = ace2_tested_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens"), aes(x = species, y = variable, label = value)) +
  NULL
ggsave(file = "Plots/ACE2_tested_identity_matrix_plot.pdf", ACE2_tested_identity_matrix_plot, height = 2.3*2, width = 2.8*2)
ACE2_tested_identity_matrix_plot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_4C.csv", ace2_tested_dist_melted, quote = FALSE, row.names = FALSE)
all_ace2_parsed_df <- read.delim(file = "Data/Alignment/211020_All_ACE2/All_Bat_ACE2_aligned_identity_matrix_items.tsv", header = F, stringsAsFactors = F, sep = "\t")
all_ace2_dist_matrix <- read.delim(file = "Data/Alignment/211020_All_ACE2/All_Bat_ACE2_aligned_identity_matrix.csv", header = F, stringsAsFactors = F, sep = ",")

colnames(all_ace2_dist_matrix) <- all_ace2_parsed_df$V1 #paste("c_",all_ace2_parsed_df$V1,sep="")
rownames(all_ace2_dist_matrix) <- all_ace2_parsed_df$V1 #paste("r_",all_ace2_parsed_df$V1,sep="")

all_ace2_dist_matrix_hclust <- hclust(as.dist(all_ace2_dist_matrix, diag = TRUE))

all_ace2_dist_matrix2 <- all_ace2_dist_matrix
all_ace2_dist_matrix2$species <- rownames(all_ace2_dist_matrix2)

all_ace2_dist_melted <- melt(all_ace2_dist_matrix2, id = "species")
all_ace2_dist_melted$variable <- as.character(all_ace2_dist_melted$variable)

all_ace2_dist_melted$species <- factor(all_ace2_dist_melted$species, levels = all_ace2_dist_matrix_hclust$labels)
all_ace2_dist_melted$variable <- factor(all_ace2_dist_melted$variable, levels = all_ace2_dist_matrix_hclust$labels)

all_ace2_identity_matrix_plot <- ggplot() + theme_bw() +
  theme(axis.text = element_text(size = 8), axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) +
  labs(x = NULL, y = NULL, fill = NULL) +
  scale_fill_continuous(low = "white", high = "black") +
  geom_tile(data = all_ace2_dist_melted, aes(x = species, y = variable, fill = value)) +
  geom_text(data = all_ace2_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens" & value <= 80), aes(x = species, y = variable, label = value), size = 2) +
  geom_text(data = all_ace2_dist_melted %>% filter(species != "H.sapiens" & variable != "H.sapiens" & value >= 80), aes(x = species, y = variable, label = value), size = 2, color = "grey70") +
  NULL
ggsave(file = "Plots/all_ace2_identity_matrix_plot.pdf", all_ace2_identity_matrix_plot, height = 6, width = 7)
all_ace2_identity_matrix_plot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_4.csv", all_ace2_dist_melted, quote = FALSE, row.names = FALSE)

See if I can plot where the most changes in sequence are

ace2_length <- nchar(all_ace2_parsed_df$V2[1])
ace2_allele_num <- nrow(all_ace2_parsed_df)

amino_acid_matrix = matrix(nrow = ace2_length, ncol = ace2_allele_num)

for(x in 1:ace2_length){
  for(y in 1:ace2_allele_num){
    amino_acid_matrix[x,y] <- substr(all_ace2_parsed_df$V2[y],x,x)
  }
}

df_amino_acid_matrix <- data.frame(amino_acid_matrix)
colnames(df_amino_acid_matrix) <- all_ace2_parsed_df$V1

df_amino_acid_matrix$unique_residue <- "X"

for(x in 1:nrow(df_amino_acid_matrix)){df_amino_acid_matrix$unique_residue[x] <- gsub("[^A-Z]","",toString(unique(unlist(df_amino_acid_matrix[x,2:38]))))}
df_amino_acid_matrix$amino_acids <- nchar(df_amino_acid_matrix$unique_residue)
df_amino_acid_matrix$differences <- nchar(df_amino_acid_matrix$unique_residue) -1
df_amino_acid_matrix$position <- as.numeric(rownames(df_amino_acid_matrix))

annot_line_1_height = -3
annot_line_2_height = -5

Ace2_differences_linear_seq_plot <- ggplot() + theme_bw() + theme(panel.grid.minor.y = element_blank()) + 
  scale_x_continuous(expand = c(0,0)) + scale_y_continuous(breaks = seq(0,6,2), limits = c(-6,6), expand = c(0,0)) +
  labs(x = "Position along ACE2", y = "Number of\ndifferences") +
  geom_vline(xintercept = c(31,37,41,353,355,357), alpha = 0.2, color = "red") + 
  geom_hline(yintercept = 0) +
  geom_point(data = df_amino_acid_matrix %>% filter(differences >= 1), aes(x = position, y = differences), alpha = 0.2, size = 1) +
  geom_segment(aes(x = 1, xend = 595, y = annot_line_1_height, yend = annot_line_1_height), size = 3, color = "tan4") +
  geom_segment(aes(x = 596, xend = 804, y = annot_line_1_height, yend = annot_line_1_height), size = 3, color = "#FB4F14") +
  geom_segment(aes(x = 1, xend = 740, y = annot_line_2_height, yend = annot_line_2_height), size = 3, color = "red") +
  geom_segment(aes(x = 741, xend = 761, y = annot_line_2_height, yend = annot_line_2_height), size = 3, color = "green") +
  geom_segment(aes(x = 761, xend = 804, y = annot_line_2_height, yend = annot_line_2_height), size = 3, color = "blue")
Ace2_differences_linear_seq_plot

ggsave(file = "Plots/Ace2_differences_linear_seq_plot.pdf", Ace2_differences_linear_seq_plot, height = 1.5, width = 2.25)
ggsave(file = "Plots/Ace2_differences_linear_seq_plot_for_presentation.pdf", Ace2_differences_linear_seq_plot, height = 2, width = 5)

paste("select ace2_most_variants, ace2 and resi",gsub(", ","+",toString(subset(df_amino_acid_matrix, differences >= 4)$position)))
## [1] "select ace2_most_variants, ace2 and resi 24+27+31+34"
five_changed_positions <- df_amino_acid_matrix %>% filter(differences == 5)
paste("select five_differences, ace2 and resi",gsub(", ","+",toString(unlist(five_changed_positions$position))))
## [1] "select five_differences, ace2 and resi 34"
paste("color forest, five_differences")
## [1] "color forest, five_differences"
four_changed_positions <- df_amino_acid_matrix %>% filter(differences == 4)
paste("select four_differences, ace2 and resi",gsub(", ","+",toString(unlist(four_changed_positions$position))))
## [1] "select four_differences, ace2 and resi 24+27+31"
paste("color orange, four_differences")
## [1] "color orange, four_differences"
three_changed_positions <- df_amino_acid_matrix %>% filter(differences == 3)
paste("select three_differences, ace2 and resi",gsub(", ","+",toString(unlist(three_changed_positions$position))))
## [1] "select three_differences, ace2 and resi 429+568+667+771"
paste("color yellow, three_differences")
## [1] "color yellow, three_differences"
## Figure out which positions differ between alcyone and landeri
alcyone_landeri_frame <- df_amino_acid_matrix[,c("R.alcyone","R.landeri")]
for(x in 1:nrow(alcyone_landeri_frame)){alcyone_landeri_frame$unique_residue[x] <- gsub("[^A-Z]","",toString(unique(unlist(alcyone_landeri_frame[x,1:2]))))}
alcyone_landeri_frame$amino_acids <- nchar(alcyone_landeri_frame$unique_residue)
alcyone_landeri_frame$differences <- nchar(alcyone_landeri_frame$unique_residue) -1
alcyone_landeri_frame$position <- as.numeric(rownames(alcyone_landeri_frame))

alcyone_landeri_frame_diff <- alcyone_landeri_frame %>% filter(differences > 0)
alcyone_landeri_frame_diff
##    R.alcyone R.landeri unique_residue amino_acids differences position
## 1          A         T             AT           2           1       15
## 2          I         T             IT           2           1       27
## 3          N         D             ND           2           1       31
## 4          E         A             EA           2           1       35
## 5          H         Y             HY           2           1       41
## 6          T         A             TA           2           1      246
## 7          A         G             AG           2           1      286
## 8          Y         H             YH           2           1      613
## 9          L         S             LS           2           1      656
## 10         N         D             ND           2           1      720
paste("select alcyone_diffs, obj_human and resi",gsub(", ","+",toString(unlist(alcyone_landeri_frame_diff$position))))
## [1] "select alcyone_diffs, obj_human and resi 15+27+31+35+41+246+286+613+656+720"

Bringing in some of the Amnis data

Note: The data is way to big to uplaod to GitHub, so I’m only including a snippet here showing how it generally works

af488_data <- read.table(file = "Data/Amnis/af488_distance_intensity.txt", sep = "\t", header = T) %>% mutate(n = 1)
mirfp670_data <- read.table(file = "Data/Amnis/miRFP670_distance_intensity.txt", sep = "\t", header = T) %>% mutate(n = 1)
bf_data <- read.table(file = "Data/Amnis/bf_distance_intensity.txt", sep = "\t", header = T) %>% mutate(n = 1)

af488_summary <- af488_data %>% group_by(distance) %>% summarize(fraction_intensity = sum(fraction_intensity), n = sum(n)) %>% mutate(norm_intensity = fraction_intensity / n)
mirfp670_summary <- mirfp670_data %>% group_by(distance) %>% summarize(fraction_intensity = sum(fraction_intensity), n = sum(n)) %>% mutate(norm_intensity = fraction_intensity / n)
bf_summary <- bf_data %>% group_by(distance) %>% summarize(fraction_intensity = sum(fraction_intensity), n = sum(n)) %>% mutate(norm_intensity = fraction_intensity / n)

af488_summary$fraction_mode <- af488_summary$norm_intensity/max(af488_summary$norm_intensity)
mirfp670_summary$fraction_mode <- mirfp670_summary$norm_intensity/max(mirfp670_summary$norm_intensity)
bf_summary$fraction_mode <- bf_summary$norm_intensity/max(bf_summary$norm_intensity)

bf_summary2 <- bf_summary %>% filter(distance <= 25)
bf_summary2$fraction_mode <- (bf_summary2$fraction_mode - min(bf_summary2$fraction_mode))/(max(bf_summary2$fraction_mode) - min(bf_summary2$fraction_mode))

Export_250_plot <- ggplot() + theme(panel.grid.minor = element_blank()) + 
  labs(x = "Distance from cell center", y = "Normalized pixel intensity") +
  scale_x_continuous(limits = c(0,25)) + 
  geom_line(data = bf_summary2, aes(x = distance, y = fraction_mode), alpha = 0.4, color = "black", size = 2) +
  geom_point(data = bf_summary2, aes(x = distance, y = fraction_mode), alpha = 1, color = "black") +
  geom_line(data = mirfp670_summary, aes(x = distance, y = fraction_mode), alpha = 0.4, color = "red", size = 2) +
  geom_point(data = mirfp670_summary, aes(x = distance, y = fraction_mode), alpha = 1, color = "red") +
  geom_line(data = af488_summary, aes(x = distance, y = fraction_mode), alpha = 0.4, color = "green3", size = 2) +
  geom_point(data = af488_summary, aes(x = distance, y = fraction_mode), alpha = 1, color = "green3")
Export_250_plot

ggsave(file = "Export_250_plot.pdf", Export_250_plot, height = 2.2, width = 2.5)

amnis_cell_numbers <- c(1528,1310,1050,565,980,1121,1052,987,1178,1555)
sum(amnis_cell_numbers)
## [1] 11326
fig4e_for_export <- rbind(bf_summary2 %>% mutate(type = "brightfield"), mirfp670_summary %>% mutate(type = "mirfp670"), af488_summary %>% mutate(type = "af488"))

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_4E.csv", fig4e_for_export, quote = FALSE, row.names = FALSE)
rbd_label_levels2 <- c("VSV","YN2013 RBD", "Rs4237 RBD", "Rp3 RBD", "SARS-CoV-2","SARS-CoV-2 RBD", "SARS-CoV", "WIV1 RBD", "RhGB01 RBD", "BB9904 RBD", "BM48-31 RBD", "Khosta2 RBD", "Khosta1 RBD", "BtKY72 RBD")
bat_ace2_levels <- c("H.sapiens","NULL (fs)","R.ferrumequinum","R.alcyone","R.landeri","R.sinicus472","R.sinicus215","R.sinicus200","R.affinis","R.pearsonii")

bat_ace2s <- two_color_precombined %>% filter(expt == "Bat_ACE2s" & !(recombined_construct %in% c("G1080J/G758A"))) %>% filter(moi_gvn_red != 0) %>% filter(!(pseudovirus_env %in%c("SARS1_OLD","SARS2"))) #%>% filter(date %in% c("8/26/2021", "9/22/2021")) #two_color_precombined

bat_ace2s_2 <- merge(bat_ace2s, pseudovirus_label_key, by = "pseudovirus_env")
bat_ace2s_2 <- merge(bat_ace2s_2, recombined_construct_key, by = "recombined_construct")
bat_ace2s_2$log10_nir_red_diff <- log10(bat_ace2s_2$moi_gvn_nir) - log10(bat_ace2s_2$moi_gvn_red)
bat_ace2s_2$fold_ace2_dep_infection <- 10^bat_ace2s_2$log10_nir_red_diff
bat_ace2s_2 <- bat_ace2s_2 %>% filter(log10_nir_red_diff != "Inf" & log10_nir_red_diff != "-Inf")

bat_ace2s_2$virus_label <- factor(bat_ace2s_2$virus_label, levels = rbd_label_levels2)

bat_ace2s_2_summary <- bat_ace2s_2 %>% group_by(virus_label, cell_label) %>% summarize(mean_log10 = mean(log10_nir_red_diff, na.rm = T), sd_log10 = sd(log10_nir_red_diff, na.rm = T), .groups = "drop") %>% mutate(geomean = 10^mean_log10, upper_conf = 10^(mean_log10 + 1.96*sd_log10), lower_conf = 10^(mean_log10 - 1.96*sd_log10))

bat_ace2s_2$virus_label <- factor(bat_ace2s_2$virus_label, levels = rev(rbd_label_levels2))
bat_ace2s_2_summary$virus_label <- factor(bat_ace2s_2_summary$virus_label, levels = rev(rbd_label_levels2))


### Make the statistical test to see which proteins reproducibly increase infectivity
full_variant_t_test <- data.frame("cell_label" = rep(unique(bat_ace2s_2$cell_label),each = length(unique(bat_ace2s_2$virus_label))),
                                          "virus_label" =  rep(unique(bat_ace2s_2$virus_label),length(unique(bat_ace2s_2$cell_label))),
                                          "mean_ace2dep_infection" = NA,"p_value" = NA,"significant" = NA)

for(x in 1:nrow(full_variant_t_test)){
  temp_cell_label <- full_variant_t_test$cell_label[x]
  temp_pseudovirus_env <- full_variant_t_test$virus_label[x]
  temp_subset <- bat_ace2s_2 %>% filter(cell_label == temp_cell_label & virus_label == temp_pseudovirus_env)
  if(nrow(temp_subset) > 1){
    full_variant_t_test$mean_ace2dep_infection[x] <- mean(temp_subset$fold_ace2_dep_infection)
    temp_p_value <- round(t.test(temp_subset$fold_ace2_dep_infection,rep(1,nrow(temp_subset)), alternative = "two.sided")$p.value,4)
    full_variant_t_test$p_value[x] <- temp_p_value
  }
}
full_variant_t_test$corrected_p_value <- p.adjust(full_variant_t_test$p_value, method = 'BH')
full_variant_t_test[is.na(full_variant_t_test$corrected_p_value),"corrected_p_value"] <- 1
full_variant_t_test[full_variant_t_test$corrected_p_value < 0.05 & full_variant_t_test$mean_ace2dep_infection > 2,"significant"] <- "yes"

Faceted_bar_chart_flow <- 
ggplot() + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0),
                 panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + 
  labs(x = NULL, y = "Fold ACE2-dependent entry") +
  scale_y_log10(breaks = c(1,100)) + 
  geom_hline(yintercept = 1, alpha = 0.5) +
  geom_point(data = bat_ace2s_2_summary, aes(x = virus_label, y = geomean), shape = 95, size = 8) +
  geom_quasirandom(data = bat_ace2s_2, aes(x = virus_label, y = fold_ace2_dep_infection), alpha = 0.4) + 
  #geom_point(data = subset(bat_ace2s_2, fold_ace2_dep_infection > 10), aes(x = virus_label, y = fold_ace2_dep_infection), color = "red") +
  facet_grid(rows = vars(cell_label))
Faceted_bar_chart_flow

ggsave(file = "Plots/Faceted_bar_chart_flow.pdf", Faceted_bar_chart_flow, height = 6, width = 4)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_8_left.csv", bat_ace2s_2_summary %>% mutate(type = "flow_cytometry"), quote = FALSE, row.names = FALSE)


##### R.sinicus200 and SARS[Khosta1 RBD] combo making NaN so throw it out until I get more data
bat_ace2s_2_summary_for_plotting <- bat_ace2s_2_summary %>% filter(!(virus_label == "SARS[Khosta1 RBD]" & cell_label == "R.sinicus200"))
bat_ace2s_2_summary_for_plotting[bat_ace2s_2_summary_for_plotting$geomean >= 100,"geomean"] <- 100
bat_ace2s_2_summary_for_plotting$virus_label <- factor(bat_ace2s_2_summary_for_plotting$virus_label, levels = rev(rbd_label_levels2))
bat_ace2s_2_summary_for_plotting$cell_label <- factor(bat_ace2s_2_summary_for_plotting$cell_label, levels = bat_ace2_levels)


my_breaks <- c("1,10,100")
host_virus_paired <- read.csv(file = "Data/Keys/host_virus_pairs_for_sequenced_sample.csv", header = T, stringsAsFactors = F)

Bat_ACE2_heatmap <- ggplot() + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) + 
  labs(x = NULL, y = NULL, fill = "Fold") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0.3,  na.value = "grey50", trans = "log10", limits = c(0.1,100)) + 
  geom_tile(data = bat_ace2s_2_summary_for_plotting, aes(x = cell_label, y = virus_label, fill = geomean)) +
  geom_text(data = bat_ace2s_2_summary_for_plotting, aes(x = cell_label, y = virus_label, label = round(geomean,1)), size = 1.8) +
  geom_tile(data = host_virus_paired, aes(x = cell_label, y = virus_label), color = "black", fill = NA, size = 0.25) +
  NULL
Bat_ACE2_heatmap

ggsave(file = "Plots/Bat_ACE2_heatmap.pdf", Bat_ACE2_heatmap, height = 3.5, width = 4.67)
ggsave(file = "Plots/Bat_ACE2_heatmap.png", Bat_ACE2_heatmap, height = 3.5, width = 4.67)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_5A_top.csv", bat_ace2s_2_summary_for_plotting %>% mutate(type = "flow_cytometry"), quote = FALSE, row.names = FALSE)

## What values am I getting for Rp3 and YN2013 and Pearsonii
clade2_pearsonii <- bat_ace2s_2 %>% filter(cell_label == "R.pearsonii" & virus_label %in% c("SARS[YN2013 RBD]","SARS[Rp3 RBD]","SARS[Rs4237 RBD]"))
clade2_affinis <- bat_ace2s_2 %>% filter(cell_label == "R.affinis" & virus_label %in% c("SARS[YN2013 RBD]","SARS[Rp3 RBD]","SARS[Rs4237 RBD]"))
clade2_landeri <- bat_ace2s_2 %>% filter(cell_label == "R.landeri" & virus_label %in% c("SARS[YN2013 RBD]","SARS[Rp3 RBD]","SARS[Rs4237 RBD]"))
microscopy <- read.csv(file = "Data/Infection_microscopy_scores.csv", header = T, stringsAsFactors = F) %>% filter(!(label_envelope %in%c("SARS1_OLD","SARS2")))
microscopy2 <- merge(microscopy, recombined_construct_key[,c("recombined_construct","cell_label")], by = "recombined_construct")
microscopy3 <- merge(microscopy2, pseudovirus_label_key[,c("pseudovirus_env","virus_label","sarbecovirus_clade")], by = "pseudovirus_env")

rbd_levels <- rev(c("VSV","YN2013 RBD", "Rs4237 RBD", "Rp3 RBD", "SARS-CoV-2","SARS-CoV-2 RBD", "SARS-CoV","WIV1 RBD", "RhGB01 RBD", "BB9904 RBD", "BM48-31 RBD", "Khosta2 RBD", "Khosta1 RBD", "BtKY72 RBD"))
bat_ace2_levels <- c("H.sapiens","NULL (fs)","R.ferrumequinum","R.alcyone","R.landeri","R.sinicus472","R.sinicus215","R.sinicus200","R.affinis","R.pearsonii")

microscopy_batace2_summary <- microscopy3 %>% filter(expt == "Bat_ACE2s") %>% group_by(cell_label, virus_label) %>% summarize(ratio = mean(nir_mcherry_overlap_ratio), .groups = "drop")

microscopy_batace2_summary$cell_label <- factor(microscopy_batace2_summary$cell_label, levels = bat_ace2_levels)
microscopy_batace2_summary$virus_label <- factor(microscopy_batace2_summary$virus_label, levels = (rbd_levels))

microscopy3_batace2 <- microscopy3 %>% filter(expt == "Bat_ACE2s")

Faceted_bar_chart_microscopy <- 
ggplot() + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0),
                 panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + 
  labs(x = NULL, y = "Fold ACE2-dependent entry") +
  scale_y_log10(breaks = c(1,10)) + 
  geom_hline(yintercept = 1, alpha = 0.5) +
  geom_point(data = microscopy_batace2_summary, aes(x = virus_label, y = ratio), shape = 95, size = 8) +
  geom_quasirandom(data = microscopy3_batace2, aes(x = virus_label, y = nir_mcherry_overlap_ratio), alpha = 0.4) + 
  #geom_point(data = subset(microscopy3_batace2, fold_ace2_dep_infection > 10), aes(x = virus_label, y = fold_ace2_dep_infection), color = "red") +
  facet_grid(rows = vars(cell_label))
Faceted_bar_chart_microscopy

ggsave(file = "Plots/Faceted_bar_chart_microscopy.pdf", Faceted_bar_chart_microscopy, height = 6, width = 4)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_8_right.csv", microscopy_batace2_summary %>% mutate(type = "microscopy"), quote = FALSE, row.names = FALSE)


Microscopy_heatmap <- ggplot() + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) + 
  labs(x = NULL, y = NULL, fill = "Fold") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0.1,  na.value = "grey50", trans = "log10", limits = c(0.5,10)) + 
  geom_tile(data = microscopy_batace2_summary, aes(x = cell_label, y = virus_label, fill = ratio)) +
  geom_text(data = microscopy_batace2_summary, aes(x = cell_label, y = virus_label, label = round(ratio,1)), size = 1.8) +
  geom_tile(data = host_virus_paired, aes(x = cell_label, y = virus_label), color = "black", fill = NA, size = 0.25) +
  NULL
Microscopy_heatmap

ggsave(file = "Plots/Microscopy_heatmap.pdf", Microscopy_heatmap, height = 3.5, width = 4.5)
ggsave(file = "Plots/Microscopy_heatmap.png", Microscopy_heatmap, height = 3.5, width = 4.5)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_5A_bottom.csv", microscopy_batace2_summary %>% mutate(type = "microscopy"), quote = FALSE, row.names = FALSE)
flow_summary <- bat_ace2s_2_summary_for_plotting %>% filter(cell_label != "NULL (fs)") %>% group_by(virus_label) %>% summarize(mean_flow = mean(geomean))
microscope_summary <- microscopy_batace2_summary %>% filter(cell_label != "NULL (fs)") %>% group_by(virus_label) %>% summarize(mean_micro = mean(ratio))

flow_micro_summary <- merge(flow_summary, microscope_summary, by = "virus_label")

grouped_by_virus_plot <- ggplot() + theme(panel.grid.minor = element_blank()) + 
  scale_x_log10(limits = c(0.4,30)) + scale_y_log10(limits = c(0.8,4.2)) + 
  labs(x = "Flow cytometry", y = "Microscopy") + 
  geom_text_repel(data = flow_micro_summary, aes(x = mean_flow, y = mean_micro, label = virus_label), color = "red", size = 2, segment.color = "orange", segment.alpha = 0.4) +
  geom_point(data = flow_micro_summary, aes(x = mean_flow, y = mean_micro), alpha = 0.5)
grouped_by_virus_plot

ggsave(file = "Plots/Grouped_by_virus_plot.pdf", grouped_by_virus_plot, height = 2, width = 2.75)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_5C_top.csv", flow_micro_summary, quote = FALSE, row.names = FALSE)


flow_summary2 <- bat_ace2s_2_summary_for_plotting %>% filter(cell_label != "NULL (fs)" & virus_label != "VSV") %>% group_by(cell_label) %>% summarize(mean_flow = mean(geomean))
microscope_summary2 <- microscopy_batace2_summary %>% filter(cell_label != "NULL (fs)" & virus_label != "VSV") %>% group_by(cell_label) %>% summarize(mean_micro = mean(ratio))

flow_micro_summary2 <- merge(flow_summary2, microscope_summary2, by = "cell_label")

grouped_by_cell_plot <- ggplot() + theme(panel.grid.minor = element_blank()) + 
  scale_x_log10(limits = c(0.4,30)) + scale_y_log10(limits = c(1,4.2)) + 
  labs(x = "Flow cytometry", y = "Microscopy") + 
  geom_text_repel(data = flow_micro_summary2, aes(x = mean_flow, y = mean_micro, label = cell_label), color = "red", size = 2, segment.color = "orange", segment.alpha = 0.4) +
  geom_point(data = flow_micro_summary2, aes(x = mean_flow, y = mean_micro), alpha = 0.5)
grouped_by_cell_plot

ggsave(file = "Plots/Grouped_by_cell_plot.pdf", grouped_by_cell_plot, height = 1.5, width = 2.75)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_5C_bottom.csv", flow_micro_summary2, quote = FALSE, row.names = FALSE)
flow_vs_microscopy <- merge(bat_ace2s_2_summary_for_plotting[,c("virus_label","cell_label","geomean")], microscopy_batace2_summary, by = c("virus_label","cell_label"))

ggplot() + 
  scale_x_log10(limits = c(0.3,100)) + scale_y_log10(limits = c(0.8,10)) + 
  scale_shape_manual(values = c(21, 22, 23, 24, 25, 3, 4, 7, 8, 13)) +
  geom_hline(yintercept = 1, alpha = 0.4) + geom_vline(xintercept = 1, alpha = 0.4) +
  geom_point(data = flow_vs_microscopy, aes(x = geomean, y = ratio, color = virus_label, shape = cell_label))

flow_micro_clade1 <- flow_vs_microscopy %>% filter(virus_label %in% c("VSV", "WIV1 RBD", "SARS-CoV-2 RBD"))
Clade1_scatterplot <- ggplot() + theme(panel.grid.minor = element_blank(), legend.position = "top") + 
  labs(x = "Fold ACE2-dependent infection (Flow)", y = "Fold ACE2-dependent infection (Image)") +
  scale_x_log10(limits = c(0.3,100), expand = c(0,0)) + scale_y_log10(limits = c(0.8,13), expand = c(0,0)) + 
  scale_shape_manual(values = c(16, 8, 0, 1, 2, 9, 10, 12, 5, 6)) +
  geom_hline(yintercept = 1, alpha = 0.4) + geom_vline(xintercept = 1, alpha = 0.4) +
  geom_point(data = flow_micro_clade1, aes(x = geomean, y = ratio, color = virus_label, shape = cell_label), alpha = 0.5)
ggsave("Plots/Clade1_scatterplot.pdf", Clade1_scatterplot, height = 2.1, width = 2.7)
Clade1_scatterplot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_7_left.csv", flow_micro_clade1 %>% mutate(panel = "Left"), quote = FALSE, row.names = FALSE)

flow_micro_clade2 <- flow_vs_microscopy %>% filter(virus_label %in% c("YN2013 RBD", "Rs4237 RBD", "Rp3 RBD"))
Clade2_scatterplot <- ggplot() + theme(panel.grid.minor = element_blank(), legend.position = "top") + 
  labs(x = "Fold ACE2-dependent infection (Flow)", y = "Fold ACE2-dependent infection (Image)") +
  scale_x_log10(limits = c(0.3,100), expand = c(0,0)) + scale_y_log10(limits = c(0.8,13), expand = c(0,0)) + 
  scale_shape_manual(values = c(16, 8, 0, 1, 2, 9, 10, 12, 5, 6)) +
  geom_hline(yintercept = 1, alpha = 0.4) + geom_vline(xintercept = 1, alpha = 0.4) +
  geom_point(data = flow_micro_clade2, aes(x = geomean, y = ratio, color = virus_label, shape = cell_label), alpha = 0.5)
ggsave("Plots/Clade2_scatterplot.pdf", Clade2_scatterplot, height = 2.1, width = 2.7)
Clade2_scatterplot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_7_middle.csv", flow_micro_clade2 %>% mutate(panel = "Middle"), quote = FALSE, row.names = FALSE)

flow_micro_clade3 <- flow_vs_microscopy %>% filter(virus_label %in% c("RhGB01 RBD", "BB9904 RBD", "BM48-31 RBD", "Khosta2 RBD", "Khosta1 RBD", "BtKY72 RBD"))
Clade3_scatterplot <- ggplot() + theme(panel.grid.minor = element_blank(), legend.position = "top") + 
  labs(x = "Fold ACE2-dependent infection (Flow)", y = "Fold ACE2-dependent infection (Image)") +
  scale_x_log10(limits = c(0.3,100), expand = c(0,0)) + scale_y_log10(limits = c(0.8,13), expand = c(0,0)) + 
  scale_shape_manual(values = c(16, 8, 0, 1, 2, 9, 10, 12, 5, 6)) +
  geom_hline(yintercept = 1, alpha = 0.4) + geom_vline(xintercept = 1, alpha = 0.4) +
  geom_point(data = flow_micro_clade3, aes(x = geomean, y = ratio, color = virus_label, shape = cell_label))
ggsave("Plots/Clade3_scatterplot.pdf", Clade3_scatterplot, height = 2.1, width = 2.7)
Clade3_scatterplot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_7_right.csv", flow_micro_clade3 %>% mutate(panel = "Right"), quote = FALSE, row.names = FALSE)

This is where we test the full-length BtKY72 and Khosta-2 spikes

full_length <- two_color_precombined %>% filter(expt %in% c("Full_length") & pseudovirus_env != "R12")

full_length$red_cells <- full_length$live_singlets * full_length$pct_red / 100
full_length$red_then_grn_cells <- round(full_length$red_cells * full_length$pct_grn_gvn_red / 100,0)
full_length$nir_cells <- full_length$live_singlets * full_length$pct_nir / 100
full_length$nir_then_grn_cells <- round(full_length$nir_cells * full_length$pct_grn_gvn_nir / 100,0)

# Foregoing this -> #passes_lower_bound == "yes" & 
full_length_1 <- full_length %>% filter(nir_then_grn_cells > 1) %>% group_by(recombined_construct, pseudovirus_env) %>% mutate(pct_grn_gvn_red = red_then_grn_cells / red_cells * 100, pct_grn_gvn_nir = nir_then_grn_cells / nir_cells * 100) %>% mutate(moi_gvn_red = -log(1-pct_grn_gvn_red/100), moi_gvn_nir = -log(1-pct_grn_gvn_nir/100))

full_length_1$log10_nir_red_diff <- log10(full_length_1$moi_gvn_nir) - log10(full_length_1$moi_gvn_red)
full_length_1$fold_ace2_dep_infection <- 10^full_length_1$log10_nir_red_diff
full_length_1 <- full_length_1 %>% filter(log10_nir_red_diff != "Inf" & log10_nir_red_diff != "-Inf" & pseudovirus_env != "G1271A")

full_length_2 <- merge(full_length_1, pseudovirus_label_key, by = "pseudovirus_env")
full_length_2 <- merge(full_length_2, recombined_construct_key, by = "recombined_construct")

full_length_2$log10_nir_red_diff <- log10(full_length_2$moi_gvn_nir) - log10(full_length_2$moi_gvn_red)
full_length_2$fold_ace2_dep_infection <- 10^full_length_2$log10_nir_red_diff

full_length_summary <- full_length_2 %>% filter(!(fold_ace2_dep_infection %in% c(NaN,0,Inf))) %>% group_by(cell_label, parent_rbd, interface_res) %>% summarize(ratio = 10^(mean(log10(fold_ace2_dep_infection))), .groups = "drop")

full_length_2$type <- "full_length"

chimerics <-two_color_precombined %>% filter(expt != "Full_length")

chimerics_1 <- chimerics %>% group_by(recombined_construct, pseudovirus_env) %>% mutate(moi_gvn_red = -log(1-pct_grn_gvn_red/100), moi_gvn_nir = -log(1-pct_grn_gvn_nir/100))

chimerics_1$log10_nir_red_diff <- log10(chimerics_1$moi_gvn_nir) - log10(chimerics_1$moi_gvn_red)
chimerics_1$fold_ace2_dep_infection <- 10^chimerics_1$log10_nir_red_diff
chimerics_1 <- chimerics_1 %>% filter(log10_nir_red_diff != "Inf" & log10_nir_red_diff != "-Inf" & pseudovirus_env != "G1271A")

chimerics_2 <- merge(chimerics_1, pseudovirus_label_key, by = "pseudovirus_env")
chimerics_2 <- merge(chimerics_2, recombined_construct_key, by = "recombined_construct")

chimerics_2$log10_nir_red_diff <- log10(chimerics_2$moi_gvn_nir) - log10(chimerics_2$moi_gvn_red)
chimerics_2$fold_ace2_dep_infection <- 10^chimerics_2$log10_nir_red_diff

chimerics_3 <- chimerics_2 %>% filter(!(fold_ace2_dep_infection %in% c(NaN,0,Inf))) %>% filter(((parent_rbd == "BtKY72" & interface_res == "K")) & cell_label %in% c("H.sapiens","R.alcyone")) %>% mutate(type = "chimeric")

vsvg <- full_length_2 %>% filter(pseudovirus_env == "VSVG")
vsvg$type <- "full_length"

columns_for_chimerics_full_length <- c("cell_label", "parent_rbd", "type","fold_ace2_dep_infection")

chimerics_and_full_length <- rbind(chimerics_3[,columns_for_chimerics_full_length],
                                   full_length_2[,columns_for_chimerics_full_length],
                                   vsvg[,columns_for_chimerics_full_length]) %>% filter(parent_rbd != "SARS-CoV")

chimerics_and_full_length$label <- paste(chimerics_and_full_length$parent_rbd,chimerics_and_full_length$type,sep="_")

chimerics_and_full_length_summary <- chimerics_and_full_length %>% group_by(cell_label, parent_rbd, type) %>% summarize(ratio = 10^(mean(log10(fold_ace2_dep_infection))), .groups = "drop")

chimerics_and_full_length_summary$label <- paste(chimerics_and_full_length_summary$parent_rbd,chimerics_and_full_length_summary$type,sep="_")


Full_length_plot2 <- ggplot() + theme(axis.text.x.bottom = element_text(angle = -90, hjust = 0, vjust = 0.5), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), legend.position = "right") + 
  labs(x = NULL, y = "Fold ACE2\n-dependent infection") + 
  scale_y_log10() + 
  geom_hline(yintercept = 1, size = 3, alpha = 0.2) +
  geom_quasirandom(data = chimerics_and_full_length, aes(x = label, y = fold_ace2_dep_infection, color = type), alpha = 0.2) +
  geom_point(data = chimerics_and_full_length_summary, aes(y = ratio, x = label, color = type), shape = 95, size = 8) +
  facet_grid(cols = vars(cell_label)) +
  NULL
Full_length_plot2

ggsave(file = "Plots/Full_length_plot2.pdf", Full_length_plot2, height = 2.2, width = 3.5)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_5D.csv", chimerics_and_full_length_summary, quote = FALSE, row.names = FALSE)
prdbtky <- two_color_precombined %>% filter(expt %in% c("Q493_muts"))

prdbtky$red_cells <- prdbtky$live_singlets * prdbtky$pct_red / 100
prdbtky$red_then_grn_cells <- round(prdbtky$red_cells * prdbtky$pct_grn_gvn_red / 100,0)
prdbtky$nir_cells <- prdbtky$live_singlets * prdbtky$pct_nir / 100
prdbtky$nir_then_grn_cells <- round(prdbtky$nir_cells * prdbtky$pct_grn_gvn_nir / 100,0)

prdbtky_1 <- prdbtky %>% filter(passes_lower_bound == "yes" & nir_then_grn_cells > 5) %>% group_by(recombined_construct, pseudovirus_env) %>% mutate(pct_grn_gvn_red = red_then_grn_cells / red_cells * 100, pct_grn_gvn_nir = nir_then_grn_cells / nir_cells * 100) %>% mutate(moi_gvn_red = -log(1-pct_grn_gvn_red/100), moi_gvn_nir = -log(1-pct_grn_gvn_nir/100))

prdbtky_1$log10_nir_red_diff <- log10(prdbtky_1$moi_gvn_nir) - log10(prdbtky_1$moi_gvn_red)
prdbtky_1$fold_ace2_dep_infection <- 10^prdbtky_1$log10_nir_red_diff
prdbtky_1 <- prdbtky_1 %>% filter(log10_nir_red_diff != "Inf" & log10_nir_red_diff != "-Inf")

prdbtky_2 <- merge(prdbtky_1, pseudovirus_label_key, by = "pseudovirus_env")
prdbtky_2 <- merge(prdbtky_2, recombined_construct_key, by = "recombined_construct")

prdbtky_2$log10_nir_red_diff <- log10(prdbtky_2$moi_gvn_nir) - log10(prdbtky_2$moi_gvn_red)
prdbtky_2$fold_ace2_dep_infection <- 10^prdbtky_2$log10_nir_red_diff

flow_ace2_muts_summary <- prdbtky_2 %>% filter(!(fold_ace2_dep_infection %in% c(NaN,0,Inf))) %>% group_by(cell_label, parent_rbd, interface_res) %>% summarize(ratio = 10^(mean(log10(fold_ace2_dep_infection))), .groups = "drop")

prd <- prdbtky_2 %>% filter(pseudovirus_env %in% c("G1279B","G1280A"))
btky <- prdbtky_2 %>% filter(pseudovirus_env == "G1194G" & date %in% unique(prd$date))
sars <- prdbtky_2 %>% filter(pseudovirus_env == "R12")
vsvg <- prdbtky_2 %>% filter(pseudovirus_env == "VSVG")

prd_btky <- rbind(prd,btky,sars,vsvg)

prd_btky_summary <- prd_btky %>% filter(!(fold_ace2_dep_infection %in% c(NaN,0,Inf))) %>% group_by(cell_label, parent_rbd, interface_res) %>% summarize(ratio = 10^(mean(log10(fold_ace2_dep_infection))), .groups = "drop")

Predict_BtKY72_plot <- ggplot() + 
  theme(axis.text.x.bottom = element_text(angle = -45, hjust = 0, vjust = 0.5), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), legend.position = "none") +
  labs(x = NULL, y = "Fold ACE2\n-dependent infection") + scale_y_log10() + 
  geom_hline(yintercept = 1, alpha = 0.2, size = 1.5) +
  geom_point(data = prd_btky, aes(x = parent_rbd, y = fold_ace2_dep_infection), position = position_dodge(width = 0.8), alpha = 0.2) +
  geom_point(data = prd_btky_summary, aes(x = parent_rbd, y = ratio), position = position_dodge(width = 0.8), shape = 95, size = 8) +
  facet_grid(cols = vars(cell_label))
Predict_BtKY72_plot

ggsave(file = "Plots/Predict_BtKY72_plot.pdf", Predict_BtKY72_plot, height = 1.4, width = 2.8)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_6B.csv", prd_btky_summary, quote = FALSE, row.names = FALSE)
ace2_muts <- two_color_combined %>% filter(expt == "Btky72_ACE2muts" & !(recombined_construct == "Q42R"))# | pseudovirus_env == "G778B" 

ace2_muts$index <- seq(1,nrow(ace2_muts))
ace2_muts$log10_nir_red_diff <- log10(ace2_muts$moi_gvn_red) - log10(ace2_muts$moi_gvn_nir)

ace2_muts2 <- ace2_muts %>% group_by(date, virus_label, recombined_construct) %>% 
  summarize(log10_nir_red_diff = mean(log10_nir_red_diff), pct_grn_gvn_nir = mean(pct_grn_gvn_nir), ratio_nir_mcherry = mean(ratio_nir_mcherry), .groups = "drop") %>% mutate(n = 1, geomean = 10^log10_nir_red_diff)

two_color_ace2_muts_summary <- ace2_muts2 %>% group_by(virus_label, recombined_construct) %>% 
  summarize(mean_log10_nir_red_diff = mean(log10_nir_red_diff), sd_log10_nir_red_diff = sd(log10_nir_red_diff), n = sum(n), .groups = "drop") %>%
  mutate(se_log10_nir_red_diff = sd_log10_nir_red_diff / sqrt(n), geomean = 10^mean_log10_nir_red_diff, lower_ci = 10^(mean_log10_nir_red_diff - se_log10_nir_red_diff * 1.96), upper_ci = 10^(mean_log10_nir_red_diff + se_log10_nir_red_diff * 1.96), cells = "ACE2")

ace2_variant_levels <- c("WT","I21N","T27A","K31D","E37K","Q38H","Y41A","K353D","D355N","R357A","R357T")
ace2_muts2$recombined_construct <- factor(ace2_muts2$recombined_construct, levels = ace2_variant_levels)
two_color_ace2_muts_summary$recombined_construct <- factor(two_color_ace2_muts_summary$recombined_construct, levels = ace2_variant_levels)

## Set up consistent colors
virus_colors2 <- c("VSV" = "red", "SARS-CoV" = "darkgreen", "SARS-CoV-2" = "blue", "SARS-CoV-2 RBD" = "blue", "BtKY72 RBD" = "purple", "Khosta2 RBD" = "cyan", "WIV1 RBD" = "green")
##

Two_color_ACE2_muts_panel_plot <- ggplot() + 
  theme_bw() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  scale_color_manual(values = virus_colors2) +
  scale_y_log10(limits = c(3e-1,1e1), expand = c(0,0)) + 
  labs(x = element_blank(), y = "Ratio of iRFP670 to\nmCherry infected cells") +
  geom_hline(yintercept = 1, linetype = 2, alpha = 0.4) +
  geom_point(data = ace2_muts2, aes(x = recombined_construct, y = geomean, color = virus_label), position = position_dodge(width = 0.8), alpha = 0.4) +
  geom_errorbar(data = two_color_ace2_muts_summary, aes(x = recombined_construct, ymin = lower_ci, ymax = upper_ci, color = virus_label), position = position_dodge(width = 0.8), alpha = 0.4, width = 0.2) +
  geom_point(data = two_color_ace2_muts_summary, aes(x = recombined_construct, y = geomean, color = virus_label), position = position_dodge(width = 0.8), size = 8, shape = 95) +
  facet_grid(rows = vars(virus_label))
Two_color_ACE2_muts_panel_plot

ggsave(file = "Plots/Two_color_ACE2_muts_panel_plot.pdf", Two_color_ACE2_muts_panel_plot, height = 2.5, width = 4.5)
ggsave(file = "Plots/Two_color_ACE2_muts_panel_plot.png", Two_color_ACE2_muts_panel_plot, height = 2.5, width = 4.5)

### Hmmm, BtKY72 didn't do much here. Probably since this is all suboptimal Kozak ACE2 cells, which isn't enough to allow BtKY72 infection.

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_10A.csv", two_color_ace2_muts_summary, quote = FALSE, row.names = FALSE)
original_mutant_data <- read.delim(file = "Data/Shukla_supplementary_table_2.tsv", sep = "\t", header = T) %>% filter(expt == "ACE2_mut_panel")
original_mutant_data2 <- merge(original_mutant_data, pseudovirus_label_key, by = "pseudovirus_env")

original_mutant_data_summary <<- original_mutant_data2 %>% group_by(cell_label, virus_label) %>% summarize(geomean = 10^mean(log_scaled_infection))
## `summarise()` has grouped output by 'cell_label'. You can override using the
## `.groups` argument.
original_mutant_data_summary$recombined_construct <- ""
original_mutant_data_summary$geomean.decto_scaled <- 0
cntrl_frame <- original_mutant_data_summary %>% filter(cell_label == "ACE2(dEcto)")

for(x in 1:nrow(original_mutant_data_summary)){
  temp_virus <- original_mutant_data_summary$virus_label[x]
  original_mutant_data_summary$recombined_construct[x] <- substr(original_mutant_data_summary$cell_label[x],11,20)
  original_mutant_data_summary$geomean.decto_scaled[x] <- original_mutant_data_summary$geomean[x] / cntrl_frame[cntrl_frame$virus_label == temp_virus,"geomean"]
}
original_mutant_data_summary[original_mutant_data_summary$recombined_construct == "","recombined_construct"] <- "WT"

singleplex_duplex_ace2_data <- merge(original_mutant_data_summary, two_color_ace2_muts_summary, by = c("recombined_construct","virus_label"))
singleplex_duplex_ace2_data$geomean.decto_scaled <- as.numeric(singleplex_duplex_ace2_data$geomean.decto_scaled)

singleplex_duplex_ace2_ace2_muts_plot <- ggplot() + theme(panel.grid.minor = element_blank()) + 
  labs(x = "Singleplex assay", y = "Duplex assay") +
  scale_color_manual(values = virus_colors2) +
  scale_x_log10() + scale_y_log10() + 
  geom_point(data = singleplex_duplex_ace2_data, aes(x = geomean.decto_scaled, y = geomean.y, color = virus_label), alpha = 0.5) +
  geom_text_repel(data = singleplex_duplex_ace2_data %>% filter(virus_label != "VSV"), aes(x = geomean.decto_scaled, y = geomean.y, label = recombined_construct), size = 2, segment.alpha = 0.4)
ggsave(file = "Plots/singleplex_duplex_ace2_ace2_muts_plot.pdf", singleplex_duplex_ace2_ace2_muts_plot, height = 2.5, width = 4.3)
ggsave(file = "Plots/singleplex_duplex_ace2_ace2_muts_plot.png", singleplex_duplex_ace2_ace2_muts_plot, height = 2.5, width = 4.3)

singleplex_duplex_ace2_ace2_muts_plot

cor(singleplex_duplex_ace2_data$geomean.y, singleplex_duplex_ace2_data$geomean.decto_scaled, method = "pearson")
## [1] 0.9506675
write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_10B.csv", singleplex_duplex_ace2_data, quote = FALSE, row.names = FALSE)
flow_ace2_muts <- two_color_precombined %>% filter(expt %in% c("Clade3_hum_muts","ACE2_muts") & moi_gvn_red != 0)
flow_ace2_muts2 <- flow_ace2_muts
flow_ace2_muts3 <- merge(flow_ace2_muts2, recombined_construct_key[,c("recombined_construct","cell_label")], by = "recombined_construct")
flow_ace2_muts4 <- merge(flow_ace2_muts3, pseudovirus_label_key[,c("pseudovirus_env","virus_label","sarbecovirus_clade")], by = "pseudovirus_env")

flow_ace2_muts4$log10_nir_red_diff <- log10(flow_ace2_muts4$moi_gvn_nir) - log10(flow_ace2_muts4$moi_gvn_red)
flow_ace2_muts4$fold_ace2_dep_infection <- 10^flow_ace2_muts4$log10_nir_red_diff
flow_ace2_muts4 <- flow_ace2_muts4 %>% filter(log10_nir_red_diff != "Inf" & log10_nir_red_diff != "-Inf")

flow_ace2_muts4$cell_label <- factor(flow_ace2_muts4$cell_label, levels = c("H.sapiens", "K31D","Y41A","K353D","D355N","R357T"))
flow_ace2_muts4$virus_label <- factor(flow_ace2_muts4$virus_label, levels = c("VSV","SARS-CoV-2 RBD", "SARS-CoV", "WIV1 RBD", "Khosta2 RBD","BtKY72 RBD"))

flow_ace2_muts_summary <- flow_ace2_muts4 %>% group_by(cell_label, virus_label) %>% summarize(ratio = 10^mean(log10(fold_ace2_dep_infection)), .groups = "drop")

flow_ace2_muts_summary$cell_label <- factor(flow_ace2_muts_summary$cell_label, levels = c("H.sapiens", "K31D","Y41A","K353D","D355N","R357T"))
flow_ace2_muts_summary$virus_label <- factor(flow_ace2_muts_summary$virus_label, levels = rev(c("VSV","SARS-CoV-2 RBD", "SARS-CoV", "WIV1 RBD", "Khosta2 RBD","BtKY72 RBD")))

Flow_ace2muts_faceted <- ggplot() + theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + 
  labs(x = NULL, y = "Fold ACE2-depednent infection") +
  scale_color_manual(values = virus_colors2) +
  scale_y_log10() + 
  geom_quasirandom(data = flow_ace2_muts4, aes(x = cell_label, y = fold_ace2_dep_infection, color = virus_label), alpha = 0.5) +
  geom_point(data= flow_ace2_muts_summary, aes(x = cell_label, y = ratio), shape = 95, size = 8) +
  facet_grid(rows = vars(virus_label))
Flow_ace2muts_faceted

ggsave(file = "Plots/Flow_ace2muts_faceted.pdf", Flow_ace2muts_faceted, height = 2.75, width = 3.8)
ggsave(file = "Plots/Flow_ace2muts_faceted.png", Flow_ace2muts_faceted, height = 2.75, width = 3.8)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_10C_left.csv", flow_ace2_muts_summary %>% mutate(panel = "left"), quote = FALSE, row.names = FALSE)

Flow_ace2muts_heatmap <- ggplot() + theme(axis.text.x = element_text(angle = -45, hjust = 0, vjust = 0.5)) + 
  labs(x = NULL, y = NULL, fill = "Fold") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0.1,  na.value = "grey50", trans = "log10") + 
  geom_tile(data = flow_ace2_muts_summary, aes(x = cell_label, y = virus_label, fill = ratio)) +
  geom_text(data = flow_ace2_muts_summary, aes(x = cell_label, y = virus_label, label = round(ratio,1)), size = 2) +
  NULL
ggsave(file = "Plots/Flow_ace2muts_heatmap.pdf", Flow_ace2muts_heatmap, height = 1.6, width = 3.5)
ggsave(file = "Plots/Flow_ace2muts_heatmap.png", Flow_ace2muts_heatmap, height = 1.6, width = 3.5)
Flow_ace2muts_heatmap

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_6D.csv", flow_ace2_muts_summary, quote = FALSE, row.names = FALSE)
microscopy_ace2muts <- microscopy3 %>% filter(expt == "Human_ACE2_muts") 
microscopy_ace2muts_summary <- microscopy_ace2muts %>% group_by(cell_label, virus_label) %>% summarize(ratio = 10^mean(log10(nir_mcherry_overlap_ratio)), .groups = "drop")
microscopy_ace2muts_summary$cell_label <- factor(microscopy_ace2muts_summary$cell_label, levels = c("H.sapiens", "K31D","Y41A","K353D","D355N","R357T"))
microscopy_ace2muts_summary$virus_label <- factor(microscopy_ace2muts_summary$virus_label, levels = rev(c("VSV","SARS-CoV-2 RBD", "SARS-CoV", "WIV1 RBD", "Khosta2 RBD","BtKY72 RBD")))

Microscopy_ace2muts_heatmap <- ggplot() + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5)) + 
  labs(x = NULL, y = NULL, fill = "Fold") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0.1,  na.value = "grey50", trans = "log10", limits = c(0.5,10)) + 
  geom_tile(data = microscopy_ace2muts_summary, aes(x = cell_label, y = virus_label, fill = ratio)) +
  geom_text(data = microscopy_ace2muts_summary, aes(x = cell_label, y = virus_label, label = round(ratio,1)), size = 1.8) +
  NULL
Microscopy_ace2muts_heatmap

ggsave(file = "Plots/Microscopy_ace2muts_heatmap.pdf", Microscopy_ace2muts_heatmap, height = 1.75, width = 3.25)
ggsave(file = "Plots/Microscopy_ace2muts_heatmap.png", Microscopy_ace2muts_heatmap, height = 1.75, width = 3.25)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_10D.csv", microscopy_ace2muts_summary %>% mutate(panel = "right"), quote = FALSE, row.names = FALSE)


microscopy_ace2muts$cell_label <- factor(microscopy_ace2muts$cell_label, levels = c("H.sapiens", "K31D","Y41A","K353D","D355N","R357T"))
microscopy_ace2muts$virus_label <- factor(microscopy_ace2muts$virus_label, levels = c("VSV","SARS-CoV-2 RBD", "SARS-CoV", "WIV1 RBD", "Khosta2 RBD","BtKY72 RBD"))

Microscopy_ace2muts_faceted <- ggplot() + theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + 
  labs(x = NULL, y = "Fold ACE2-depednent infection") +
  scale_color_manual(values = virus_colors2) +
  scale_y_log10() + 
  geom_quasirandom(data = microscopy_ace2muts, aes(x = cell_label, y = nir_mcherry_overlap_ratio, color = virus_label), alpha = 0.5) +
  geom_point(data= microscopy_ace2muts_summary, aes(x = cell_label, y = ratio), shape = 95, size = 8) +
  facet_grid(rows = vars(virus_label))
Microscopy_ace2muts_faceted

ggsave(file = "Plots/Microscopy_ace2muts_faceted.pdf", Microscopy_ace2muts_faceted, height = 2.75, width = 3.8)
ggsave(file = "Plots/Microscopy_ace2muts_faceted.png", Microscopy_ace2muts_faceted, height = 2.75, width = 3.8)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_10C_right.csv", flow_ace2_muts_summary %>% mutate(panel = "right"), quote = FALSE, row.names = FALSE)
flow_vs_microscopy_ace2muts <- merge(flow_ace2_muts_summary[,c("virus_label","cell_label","ratio")], microscopy_ace2muts_summary, by = c("virus_label","cell_label"))

flow_vs_microscopy_ace2muts_scatterplot <- ggplot() + theme(panel.grid.minor = element_blank()) + 
  labs(x = "Fold ACE2 dependence by flow cytometry", y = "Fold ACE2 dependence by microscopy") +
  scale_x_log10(limits = c(0.5,30), breaks = c(1,3,10,30)) + scale_y_log10(limits = c(0.9,7), breaks = c(1,3,10), expand = c(0,0)) + 
  scale_shape_manual(values = c(3, 21, 22, 23, 24, 25)) +
  scale_color_manual(values = virus_colors2) +
  geom_hline(yintercept = 1, alpha = 0.4) + geom_vline(xintercept = 1, alpha = 0.4) +
  geom_point(data = flow_vs_microscopy_ace2muts, aes(x = ratio.x, y = ratio.y, color = virus_label, shape = cell_label))
flow_vs_microscopy_ace2muts_scatterplot

ggsave(file = "Plots/flow_vs_microscopy_ace2muts_scatterplot.pdf", flow_vs_microscopy_ace2muts_scatterplot, height = 1.5, width = 3.5)
ggsave(file = "Plots/flow_vs_microscopy_ace2muts_scatterplot.png", flow_vs_microscopy_ace2muts_scatterplot, height = 1.5, width = 3.5)

Trying to incorporate the bat species range data

## https://r-spatial.github.io/sf/reference/st_read.html
## https://www.r-graph-gallery.com/168-load-a-shape-file-into-r.html
## https://cfss.uchicago.edu/notes/simple-features/
## https://ggplot2.tidyverse.org/reference/ggsf.html
## https://datascience.blog.wzb.eu/2019/04/30/zooming-in-on-maps-with-sf-and-ggplot2/

#install.packages("Rcpp")
#library(Rcpp)
#install.packages("sf")
#library(sf)
#install.packages("here")
#library(here)
#install.packages("broom")
#library(broom)

world_shape <- here("Data/Maps/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp") %>% st_read()
## Reading layer `TM_WORLD_BORDERS-0.3' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 246 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.6236
## Geodetic CRS:  WGS 84
base_world_map_ggplot <- ggplot() + theme_bw() + 
  theme(axis.text = element_blank(), panel.grid.major = element_blank(), axis.ticks = element_blank()) +
  labs(x = NULL, y = NULL) +
  geom_sf(data = world_shape, fill="grey95", color="black",size = 0.1) +
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T) +
  NULL
ggsave(file = "Plots/Base_world_map_ggplot.pdf", base_world_map_ggplot, height = 1.5, width = 4)
#base_world_map_ggplot

ferrum_shape <- here("Data/Maps/ferrumequinum_redlist_species_data_8e16dc36-7b4b-44b9-9840-799c4a0f6a53/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/ferrumequinum_redlist_species_data_8e16dc36-7b4b-44b9-9840-799c4a0f6a53/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 18 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -9.495943 ymin: 22.61465 xmax: 144.0388 ymax: 52.63987
## Geodetic CRS:  WGS 84
ferrum_map_plot <- base_world_map_ggplot + geom_sf(data = ferrum_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Ferrum_map_plot.pdf", ferrum_map_plot, height = 2, width = 4)
#ferrum_map_plot

affinis_shape <- here("Data/Maps/affinis_redlist_species_data_e5cea578-fa93-46b4-bcd1-72db305e97de/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/affinis_redlist_species_data_e5cea578-fa93-46b4-bcd1-72db305e97de/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 22 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 77.6688 ymin: -10.32216 xmax: 123.1624 ymax: 33.41214
## Geodetic CRS:  WGS 84
affinis_map_plot <- base_world_map_ggplot + geom_sf(data = affinis_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Affinis_map_plot.pdf", affinis_map_plot, height = 2, width = 4)
#affinis_map_plot

hippo_shape <- here("Data/Maps/hipposideros_redlist_species_data_81732104-6de7-456f-9dec-2f1fcd484c6f/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/hipposideros_redlist_species_data_81732104-6de7-456f-9dec-2f1fcd484c6f/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 17 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -10.53219 ymin: 17.01716 xmax: 79.43162 ymax: 54.54312
## Geodetic CRS:  WGS 84
hippo_map_plot <- base_world_map_ggplot + geom_sf(data = hippo_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Hippo_map_plot.pdf", hippo_map_plot, height = 2, width = 4)
#hippo_map_plot

alcyone_shape <- here("Data/Maps/alcyone_redlist_species_data_ee5660d2-1774-4c77-8970-9a473e1c66a5/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/alcyone_redlist_species_data_ee5660d2-1774-4c77-8970-9a473e1c66a5/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -13.74239 ymin: -5.45619 xmax: 31.73008 ymax: 13.37119
## Geodetic CRS:  WGS 84
alcyone_map_plot <- base_world_map_ggplot + geom_sf(data = alcyone_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Alcyone_map_plot.pdf", alcyone_map_plot, height = 2, width = 4)
#alcyone_map_plot

landeri_shape <- here("Data/Maps/landeri_redlist_species_data_87e5a8c2-0fd6-4f3d-8f4b-fcf34c2e89f3/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/landeri_redlist_species_data_87e5a8c2-0fd6-4f3d-8f4b-fcf34c2e89f3/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3 features and 15 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -17.53524 ymin: -30.63702 xmax: 43.9498 ymax: 15.21187
## Geodetic CRS:  WGS 84
landeri_map_plot <- base_world_map_ggplot + geom_sf(data = landeri_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Landeri_map_plot.pdf", landeri_map_plot, height = 2, width = 4)
#landeri_map_plot

pearsonii_shape <- here("Data/Maps/pearsonii_redlist_species_data_a96e1d46-d9a0-40f7-b3a5-1ad1b91e2431/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/pearsonii_redlist_species_data_a96e1d46-d9a0-40f7-b3a5-1ad1b91e2431/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 77.5213 ymin: 6.630936 xmax: 122.3372 ymax: 33.56774
## Geodetic CRS:  WGS 84
pearsonii_map_plot <- base_world_map_ggplot + geom_sf(data = pearsonii_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Pearsonii_map_plot.pdf", affinis_map_plot, height = 2, width = 4)
#pearsonii_map_plot

sinicus_shape <- here("Data/Maps/sinicus_redlist_species_data_c0256091-31d3-4426-b8d6-791db2f0e7d6/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/sinicus_redlist_species_data_c0256091-31d3-4426-b8d6-791db2f0e7d6/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 76.7799 ymin: 14.14004 xmax: 122.937 ymax: 33.91734
## Geodetic CRS:  WGS 84
sinicus_map_plot <- base_world_map_ggplot + geom_sf(data = sinicus_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Sinicus_map_plot.pdf", sinicus_map_plot, height = 2, width = 4)
#sinicus_map_plot

blasii_shape <- here("Data/Maps/blasii_redlist_species_data_cea535e3-2ee3-4223-aad2-d2a071e66a6d/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/blasii_redlist_species_data_cea535e3-2ee3-4223-aad2-d2a071e66a6d/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -8.945001 ymin: -29.70939 xmax: 74.66737 ymax: 46.35565
## Geodetic CRS:  WGS 84
blasii_map_plot <- base_world_map_ggplot + geom_sf(data = blasii_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Blasii_map_plot.pdf", blasii_map_plot, height = 2, width = 4)
#blasii_map_plot

euryale_shape <- here("Data/Maps/euryale_redlist_species_data_4dbfc03e-ae50-4d4c-ac73-53a0757f19ec/data_0.shp") %>% st_read()
## Reading layer `data_0' from data source 
##   `/Users/kmatreyek/Box Sync/Github/ACE2_dependence/Data/Maps/euryale_redlist_species_data_4dbfc03e-ae50-4d4c-ac73-53a0757f19ec/data_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 7 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -9.894488 ymin: 28.25488 xmax: 61.00445 ymax: 48.94192
## Geodetic CRS:  WGS 84
euryale_map_plot <- base_world_map_ggplot + geom_sf(data = euryale_shape, fill="red", alpha = 0.4, size = 0.2) + 
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Euryale_map_plot.pdf", euryale_map_plot, height = 2, width = 4)
#euryale_map_plot

Clade3_map_plot <- base_world_map_ggplot + 
  geom_sf(data = euryale_shape, fill="yellow", alpha = 0.4, size = 0.1) + 
  geom_sf(data = blasii_shape, fill="cyan", alpha = 0.4, size = 0.1) + 
  geom_sf(data = hippo_shape, fill="orange", alpha = 0.4, size = 0.1) + 
  geom_sf(data = ferrum_shape, fill="green", alpha = 0.4, size = 0.1) + 
  geom_point(data = NULL, aes(x = 39.7439, y = 43.5691), size = 1.5, shape = 19) +  ## Sochi national park for Khosta-1 and Khosta-2
  geom_point(data = NULL, aes(x = 27.608611, y = 42.0125), size = 1.5, shape = 19) +  ## Strandzha nature park for BM48-31
  geom_point(data = NULL, aes(x = 36.8219, y = -1.2921), size = 1.5, shape = 19) +  ## Nairobi Kenya as proxy for BtKY72 location
  geom_point(data = NULL, aes(x = 29.63, y = -1.50), size = 1.5, shape = 19) +  ## PRD-0038 location https://www.ncbi.nlm.nih.gov/protein/1867217113
  geom_point(data = NULL, aes(x = 29.71, y = -1.11), size = 1.5, shape = 19) +  ## PDF-2370, PDF-2386 location https://www.ncbi.nlm.nih.gov/protein/QRW38681.1
  geom_point(data = NULL, aes(x = -3.0750174, y = 51.343452), size = 1.5, shape = 19) +  ## Weston-super-Mare UK as proxy for RhGB01 +
  geom_point(data = NULL, aes(x = 39.7439, y = 43.5691), size = 0.5, shape = 19, color = "yellow") +  ## Sochi national park for Khosta-1 and Khosta-2
  geom_point(data = NULL, aes(x = 27.608611, y = 42.0125), size = 0.5, shape = 19, color = "yellow") +  ## Strandzha nature park for BM48-31
  geom_point(data = NULL, aes(x = 36.8219, y = -1.2921), size = 0.5, shape = 19, color = "yellow") +  ## Nairobi Kenya as proxy for BtKY72 location
  geom_point(data = NULL, aes(x = 29.63, y = -1.50), size = 0.5, shape = 19, color = "yellow") +  ## PRD-0038 location https://www.ncbi.nlm.nih.gov/protein/1867217113
  geom_point(data = NULL, aes(x = 29.71, y = -1.11), size = 0.5, shape = 19, color = "yellow") +  ## PDF-2370, PDF-2386 location https://www.ncbi.nlm.nih.gov/protein/QRW38681.1
  geom_point(data = NULL, aes(x = -3.0750174, y = 51.343452), size = 0.5, shape = 19, color = "yellow") +  ## Weston-super-Mare UK as proxy for RhGB01
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Clade3_map_plot.pdf", Clade3_map_plot, height = 1.5, width = 4)
ggsave(file = "Plots/Clade3_map_plot.png", Clade3_map_plot, height = 1.5, width = 4)
Clade3_map_plot

ACE2_clade3_map_plot <- base_world_map_ggplot + 
  geom_sf(data = affinis_shape, fill="magenta", alpha = 0.4, size = 0.1) + 
  geom_sf(data = ferrum_shape, fill="green", alpha = 0.4, size = 0.1) + 
  geom_sf(data = alcyone_shape, fill="red", alpha = 0.4, size = 0.1) + 
  geom_sf(data = landeri_shape, fill="blue", alpha = 0.4, size = 0.1) + 
  geom_sf(data = sinicus_shape, fill="purple", alpha = 0.4, size = 0.1) + 
  geom_point(data = NULL, aes(x = 39.7439, y = 43.5691), size = 1.5, shape = 19) +  ## Sochi national park for Khosta-1 and Khosta-2
  geom_point(data = NULL, aes(x = 27.608611, y = 42.0125), size = 1.5, shape = 19) +  ## Strandzha nature park for BM48-31
  geom_point(data = NULL, aes(x = 36.8219, y = -1.2921), size = 1.5, shape = 19) +  ## Nairobi Kenya as proxy for BtKY72 location
  geom_point(data = NULL, aes(x = 29.63, y = -1.50), size = 1.5, shape = 19) +  ## PRD-0038 location https://www.ncbi.nlm.nih.gov/protein/1867217113
  geom_point(data = NULL, aes(x = 29.71, y = -1.11), size = 1.5, shape = 19) +  ## PDF-2370, PDF-2386 location https://www.ncbi.nlm.nih.gov/protein/QRW38681.1
  geom_point(data = NULL, aes(x = -3.0750174, y = 51.343452), size = 1.5, shape = 19) +  ## Weston-super-Mare UK as proxy for RhGB01 +
  geom_point(data = NULL, aes(x = 39.7439, y = 43.5691), size = 0.5, shape = 19, color = "yellow") +  ## Sochi national park for Khosta-1 and Khosta-2
  geom_point(data = NULL, aes(x = 27.608611, y = 42.0125), size = 0.5, shape = 19, color = "yellow") +  ## Strandzha nature park for BM48-31
  geom_point(data = NULL, aes(x = 36.8219, y = -1.2921), size = 0.5, shape = 19, color = "yellow") +  ## Nairobi Kenya as proxy for BtKY72 location
  geom_point(data = NULL, aes(x = 29.63, y = -1.50), size = 0.5, shape = 19, color = "yellow") +  ## PRD-0038 location https://www.ncbi.nlm.nih.gov/protein/1867217113
  geom_point(data = NULL, aes(x = 29.71, y = -1.11), size = 0.5, shape = 19, color = "yellow") +  ## PDF-2370, PDF-2386 location https://www.ncbi.nlm.nih.gov/protein/QRW38681.1
  geom_point(data = NULL, aes(x = -3.0750174, y = 51.343452), size = 0.5, shape = 19, color = "yellow") +  ## Weston-super-Mare UK as proxy for RhGB01
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/ACE2_clade3_map_plot.pdf", ACE2_clade3_map_plot, height = 1.5, width = 4)
ggsave(file = "Plots/ACE2_clade3_map_plot.png", ACE2_clade3_map_plot, height = 1.5, width = 4)
ACE2_clade3_map_plot

Clade3_combined_map_plot <- base_world_map_ggplot + 
  geom_sf(data = euryale_shape, fill="yellow", alpha = 0.4, size = 0.1) + 
  geom_sf(data = blasii_shape, fill="cyan", alpha = 0.4, size = 0.1) + 
  geom_sf(data = hippo_shape, fill="orange", alpha = 0.4, size = 0.1) + 
  geom_sf(data = ferrum_shape, fill="green", alpha = 0.4, size = 0.1) + 
  geom_sf(data = alcyone_shape, fill="red", alpha = 0.4, size = 0.1) + 
  geom_sf(data = landeri_shape, fill="blue", alpha = 0.4, size = 0.1) + 
  geom_point(data = NULL, aes(x = 39.7439, y = 43.5691), size = 0.5) +  ## Sochi national park for Khosta-1 and Khosta-2
  geom_point(data = NULL, aes(x = 27.608611, y = 42.0125), size = 0.5) +  ## Strandzha nature park for BM48-31
  geom_point(data = NULL, aes(x = 36.8219, y = -1.2921), size = 0.5) +  ## Nairobi Kenya as proxy for BtKY72 location
  geom_point(data = NULL, aes(x = 30.0619, y = -1.9441), size = 0.5) +  ## Kigali Rwanda as proxy for PRD-0038 location
  geom_point(data = NULL, aes(x = 32.5825, y = 0.3476), size = 0.5) +  ## Kampala Uganda as proxy for PRD-2370, PRD-2386 location +
  geom_point(data = NULL, aes(x = -3.0750174, y = 51.343452), size = 0.5) +  ## Weston-super-Mare UK as proxy for RhGB01
  coord_sf(xlim = c(-12, 145), ylim = c(-19, 60), expand = T)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
ggsave(file = "Plots/Clade3_combined_map_plot.pdf", Clade3_combined_map_plot, height = 1.5, width = 4)
ggsave(file = "Plots/Clade3_combined_map_plot.png", Clade3_combined_map_plot, height = 1.5, width = 4)
Clade3_combined_map_plot

## Flow data first

flow_btky72 <- bat_ace2s_2 %>% filter(virus_label == "BtKY72 RBD")
flow_btky72_summary <- bat_ace2s_2_summary %>% filter(virus_label == "BtKY72 RBD")

flow_btky72$cell_label <- factor(flow_btky72$cell_label, levels = bat_ace2_levels)
flow_btky72_summary$cell_label <- factor(flow_btky72_summary$cell_label, levels = bat_ace2_levels)

Bar_chart_flow_btky72 <- ggplot() + labs(x = NULL, y = NULL) + 
  theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + 
  scale_y_log10(breaks = c(1,10,100)) + geom_hline(yintercept = 1, alpha = 0.5) + 
  geom_quasirandom(data = flow_btky72, aes(x = cell_label, y = fold_ace2_dep_infection), alpha = 0.2) +
  geom_point(data = flow_btky72_summary, aes(x = cell_label, y = geomean), shape = 95, size = 10)
Bar_chart_flow_btky72

ggsave(file = "Plots/Bar_chart_flow_btky72.pdf", Bar_chart_flow_btky72, height = 1.5, width = 2.5)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_7C_bottom.csv", flow_btky72_summary, quote = FALSE, row.names = FALSE)

## Microscopy next

microscopy_btky72 <- microscopy3_batace2 %>% filter(virus_label == "BtKY72 RBD")
microscopy_btky72_summary <- microscopy_batace2_summary %>% filter(virus_label == "BtKY72 RBD")

microscopy_btky72$cell_label <- factor(microscopy_btky72$cell_label, levels = bat_ace2_levels)
microscopy_btky72_summary$cell_label <- factor(microscopy_btky72_summary$cell_label, levels = bat_ace2_levels)

Bar_chart_microscopy_btky72 <- ggplot() + labs(x = NULL, y = NULL) + 
  theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + 
  scale_y_log10(breaks = c(1,3,10)) + geom_hline(yintercept = 1, alpha = 0.5) + 
  geom_quasirandom(data = microscopy_btky72, aes(x = cell_label, y = nir_mcherry_overlap_ratio), alpha = 0.2) +
  geom_point(data = microscopy_btky72_summary, aes(x = cell_label, y = ratio), shape = 95, size = 10)
Bar_chart_microscopy_btky72

ggsave(file = "Plots/Bar_chart_microscopy_btky72.pdf", Bar_chart_microscopy_btky72, height = 1.5, width = 2.43)


## Flow again for the relevant variants now

flowmuts_btky72 <- flow_ace2_muts4 %>% filter(virus_label == "BtKY72 RBD" & cell_label %in% c("H.sapiens", "K31D"))
flowmuts_btky72_summary <- flow_ace2_muts_summary %>% filter(virus_label == "BtKY72 RBD" & cell_label %in% c("H.sapiens", "K31D"))

Bar_chart_flowmuts_btky72 <- ggplot() + labs(x = NULL, y = NULL) + 
  theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + 
  scale_y_log10(breaks = c(1,10,100)) + geom_hline(yintercept = 1, alpha = 0.5) + 
  geom_quasirandom(data = flowmuts_btky72, aes(x = cell_label, y = fold_ace2_dep_infection), alpha = 0.2) +
  geom_point(data = flowmuts_btky72_summary, aes(x = cell_label, y = ratio), shape = 95, size = 10)
Bar_chart_flowmuts_btky72

ggsave(file = "Plots/Bar_chart_flowmuts_btky72.pdf", Bar_chart_flowmuts_btky72, height = 1.1, width = 0.85)

## Microscopy again for the relevant variants

micromuts_btky72 <- microscopy_ace2muts %>% filter(virus_label == "BtKY72 RBD" & cell_label %in% c("H.sapiens", "K31D"))
micromuts_btky72_summary <- microscopy_ace2muts_summary %>% filter(virus_label == "BtKY72 RBD" & cell_label %in% c("H.sapiens", "K31D"))

Bar_chart_micromuts_btky72 <- ggplot() + labs(x = NULL, y = NULL) + 
  theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + 
  scale_y_log10(breaks = c(1,3,10)) + geom_hline(yintercept = 1, alpha = 0.5) + 
  geom_quasirandom(data = micromuts_btky72, aes(x = cell_label, y = nir_mcherry_overlap_ratio), alpha = 0.2) +
  geom_point(data = micromuts_btky72_summary, aes(x = cell_label, y = ratio), shape = 95, size = 10)
Bar_chart_micromuts_btky72

ggsave(file = "Plots/Bar_chart_micromuts_btky72.pdf", Bar_chart_micromuts_btky72, height = 1.1, width = 0.78)


## Flow for Khosta-1

flow_khosta1 <- bat_ace2s_2 %>% filter(virus_label == "Khosta1 RBD")
flow_khosta1_summary <- bat_ace2s_2_summary %>% filter(virus_label == "Khosta1 RBD")

flow_khosta1$cell_label <- factor(flow_khosta1$cell_label, levels = bat_ace2_levels)
flow_khosta1_summary$cell_label <- factor(flow_khosta1_summary$cell_label, levels = bat_ace2_levels)

Bar_chart_flow_khosta1 <- ggplot() + labs(x = NULL, y = NULL) + 
  theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + 
  scale_y_log10(breaks = c(1,10,100)) + geom_hline(yintercept = 1, alpha = 0.5) + 
  geom_quasirandom(data = flow_khosta1, aes(x = cell_label, y = fold_ace2_dep_infection), alpha = 0.2) +
  geom_point(data = flow_khosta1_summary, aes(x = cell_label, y = geomean), shape = 95, size = 10)
Bar_chart_flow_khosta1

ggsave(file = "Plots/Bar_chart_flow_khosta1.pdf", Bar_chart_flow_khosta1, height = 1.5, width = 2.5)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_7C_top.csv", flow_khosta1_summary, quote = FALSE, row.names = FALSE)

## Microscopy for Khosta-1

microscopy_khosta1 <- microscopy3_batace2 %>% filter(virus_label == "Khosta1 RBD")
microscopy_khosta1_summary <- microscopy_batace2_summary %>% filter(virus_label == "Khosta1 RBD")

microscopy_khosta1$cell_label <- factor(microscopy_khosta1$cell_label, levels = bat_ace2_levels)
microscopy_khosta1_summary$cell_label <- factor(microscopy_khosta1_summary$cell_label, levels = bat_ace2_levels)

Bar_chart_microscopy_khosta1 <- ggplot() + labs(x = NULL, y = NULL) + 
  theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0.5), strip.text.y = element_text(angle = 0), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank()) + 
  scale_y_log10(breaks = c(1,3,10)) + geom_hline(yintercept = 1, alpha = 0.5) + 
  geom_quasirandom(data = microscopy_khosta1, aes(x = cell_label, y = nir_mcherry_overlap_ratio), alpha = 0.2) +
  geom_point(data = microscopy_khosta1_summary, aes(x = cell_label, y = ratio), shape = 95, size = 10)
Bar_chart_microscopy_khosta1

ggsave(file = "Plots/Bar_chart_microscopy_khosta1.pdf", Bar_chart_microscopy_khosta1, height = 1.5, width = 2.43)
q493_muts <- two_color_precombined %>% filter(expt %in% c("Q493_muts"))

q493_muts$red_cells <- q493_muts$live_singlets * q493_muts$pct_red / 100
q493_muts$red_then_grn_cells <- round(q493_muts$red_cells * q493_muts$pct_grn_gvn_red / 100,0)
q493_muts$nir_cells <- q493_muts$live_singlets * q493_muts$pct_nir / 100
q493_muts$nir_then_grn_cells <- round(q493_muts$nir_cells * q493_muts$pct_grn_gvn_nir / 100,0)

q493_muts_1 <- q493_muts %>% filter(passes_lower_bound == "yes" & nir_then_grn_cells > 5) %>% group_by(recombined_construct, pseudovirus_env) %>% mutate(pct_grn_gvn_red = red_then_grn_cells / red_cells * 100, pct_grn_gvn_nir = nir_then_grn_cells / nir_cells * 100) %>% mutate(moi_gvn_red = -log(1-pct_grn_gvn_red/100), moi_gvn_nir = -log(1-pct_grn_gvn_nir/100))

q493_muts_1$log10_nir_red_diff <- log10(q493_muts_1$moi_gvn_nir) - log10(q493_muts_1$moi_gvn_red)
q493_muts_1$fold_ace2_dep_infection <- 10^q493_muts_1$log10_nir_red_diff
q493_muts_1 <- q493_muts_1 %>% filter(log10_nir_red_diff != "Inf" & log10_nir_red_diff != "-Inf")

q493_muts_2 <- merge(q493_muts_1, pseudovirus_label_key, by = "pseudovirus_env")
q493_muts_2 <- merge(q493_muts_2, recombined_construct_key, by = "recombined_construct")
q493_muts_2 <- q493_muts_2 %>% filter(parent_rbd != "PRD0038")

q493_muts_2$log10_nir_red_diff <- log10(q493_muts_2$moi_gvn_nir) - log10(q493_muts_2$moi_gvn_red)
q493_muts_2$fold_ace2_dep_infection <- 10^q493_muts_2$log10_nir_red_diff

flow_ace2_muts_summary <- q493_muts_2 %>% filter(!(fold_ace2_dep_infection %in% c(NaN,0,Inf))) %>% group_by(cell_label, parent_rbd, interface_res) %>% summarize(ratio = 10^(mean(log10(fold_ace2_dep_infection))), .groups = "drop")

#Interface_mutants_plot <- ggplot() + 
#  theme(axis.text.x.bottom = element_text(angle = -45, hjust = 0), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), legend.position = "none") +
#  labs(x = NULL, y = "Fold ACE2\n-dependent infection") + scale_y_log10() + 
#  geom_hline(yintercept = 1, alpha = 0.2, size = 1.5) +
#  geom_point(data = q493_muts_2, aes(x = parent_rbd, y = fold_ace2_dep_infection, color = interface_res), position = position_dodge(width = 0.8), alpha = 0.2) +
#  geom_text(data = flow_ace2_muts_summary, aes(x = parent_rbd, y = ratio, label = interface_res, color = interface_res), position = position_dodge(width = 0.8)) +
#  facet_grid(rows = vars(cell_label))
#Interface_mutants_plot

## Hone in on the most informative subset

q493_muts_2_plotting <- q493_muts_2 %>% filter(parent_rbd %in% c("VSVG","SARS-CoV" ,"SARS-CoV-2","BtKY72","Khosta2","Khosta1") & virus_label != "BtKY72 0-955")
flow_ace2_muts_summary_plotting <- flow_ace2_muts_summary %>% filter(parent_rbd %in% c("VSVG","SARS-CoV" , "SARS-CoV-2","Khosta2","BtKY72","Khosta1"))
q493_muts_2_plotting$parent_rbd <- factor(q493_muts_2_plotting$parent_rbd, levels = rev(c("VSVG","SARS-CoV" , "SARS-CoV-2","Khosta2","BtKY72","Khosta1")))

Interface_mutants_plot1 <- ggplot() + 
  theme(axis.text.x.bottom = element_text(angle = -45, hjust = 0), panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(), legend.position = "none") +
  labs(x = NULL, y = "Fold ACE2\n-dependent infection") + scale_y_log10() + 
  geom_hline(yintercept = 1, alpha = 0.2, size = 1.5) +
  geom_point(data = q493_muts_2_plotting, aes(x = parent_rbd, y = fold_ace2_dep_infection, color = interface_res), position = position_dodge(width = 0.8), alpha = 0.2) +
  geom_text(data = flow_ace2_muts_summary_plotting, aes(x = parent_rbd, y = ratio, label = interface_res, color = interface_res), position = position_dodge(width = 0.8)) +
  facet_grid(rows = vars(cell_label))
Interface_mutants_plot1

ggsave(file = "Plots/Interface_mutants_plot1.pdf", Interface_mutants_plot1, height = 2.2, width = 3)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_7D.csv", flow_ace2_muts_summary_plotting, quote = FALSE, row.names = FALSE)
# https://rpubs.com/sinhrks/plot_mds

rbd_dist_matrix3 <- rbd_dist_matrix
for(x in 1:nrow(rbd_dist_matrix3)){rownames(rbd_dist_matrix3)[x] <- strsplit(rownames(rbd_dist_matrix3)[x], "__",1)[[1]][1]}

mds_plotting_frame <- data.frame(cmdscale(rbd_dist_matrix3, eig = TRUE)$points)
mds_plotting_frame$name <- row.names(mds_plotting_frame)

mds_plotting_frame2 <- merge(mds_plotting_frame, clade_labels_key, by = "name")


ACE2_receptor_usage_by_sequence_matrix <- 
ggplot() + #scale_x_continuous(limits = c(-50, 50)) + scale_y_continuous(limits = c(-30, 50)) + 
  theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + 
  labs(x = "Scaled dimension 1", y = "Scaled dimension 2") +
  #geom_hline(yintercept = 0, alpha = 0.2) + geom_vline(xintercept = 0, alpha = 0.2) +
  geom_point(data = mds_plotting_frame2, aes(x = X1, y = X2, color = clade), alpha = 0.4) +
  geom_text_repel(data = mds_plotting_frame2 %>% filter(name %in% c("BtKY72","SARS_CoV-2","SARS_CoV","YN2013","RaTG15","Khosta2","Khosta1","BB9904")), aes(x = X1, y = X2, label = name),force_pull = -0.04, segment.color = "orange", segment.alpha = 0.5, size = 2)
ACE2_receptor_usage_by_sequence_matrix

ggsave(file = "Plots/ACE2_receptor_usage_by_sequence_matrix.pdf", ACE2_receptor_usage_by_sequence_matrix, height = 1.7, width = 3.7)

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/Fig_8.csv", mds_plotting_frame2, quote = FALSE, row.names = FALSE)
## Seeing why figure 2D has much lower ACE2-dependent infection than some of the other experiments

receptors_ace2 <- two_color_combined %>% filter(expt == "Receptors" & recombined_construct == "ACE2" & pseudovirus_env == "SARS1")

bat_ace2 <- two_color_combined %>% filter(expt == "Bat_ACE2s" & recombined_construct == "G852C" & pseudovirus_env == "SARS1")
for(x in 1:nrow(bat_ace2)){if(bat_ace2$pct_nir[x] > 100){bat_ace2$pct_nir[x] <- bat_ace2$pct_nir[x]/bat_ace2$live_singlets[x] * 100}}

q493_muts <- two_color_combined %>% filter(expt == "Q493_muts" & recombined_construct == "G852C" & pseudovirus_env == "R12")

cell_mixing_labels <- c("expt","moi_gvn_red","moi_gvn_nir","ratio_nir_mcherry")

cell_mixing <- rbind(receptors_ace2[,cell_mixing_labels], bat_ace2[,cell_mixing_labels], q493_muts[,cell_mixing_labels])

cell_mixing$fold_ace2_dep_infxn <- cell_mixing$moi_gvn_nir/cell_mixing$moi_gvn_red

for(x in 1:nrow(cell_mixing)){
  if(cell_mixing$expt[x] == "Receptors"){cell_mixing$expt[x] <- "Fig 2D"}
  if(cell_mixing$expt[x] == "Bat_ACE2s"){cell_mixing$expt[x] <- "Fig 5A"}
  if(cell_mixing$expt[x] == "Q493_muts"){cell_mixing$expt[x] <- "Fig 7D"}
}

Ratio_dependent_ACE2_infection_plot <- ggplot() + theme(panel.grid.minor = element_blank(), legend.position = "top") + 
  labs(x = "Ratio of NIR to mCherry cells in well", y = "Fold ACE2-dependent infection") +
  scale_x_log10() + scale_y_log10(limits = c(1,400)) + 
  geom_point(data = cell_mixing, aes(x = ratio_nir_mcherry, y = fold_ace2_dep_infxn, color = expt), alpha = 0.5)
ggsave(file = "plots/Ratio_dependent_ACE2_infection_plot.pdf", Ratio_dependent_ACE2_infection_plot, height = 2.5, width = 3)
Ratio_dependent_ACE2_infection_plot

write.csv(file = "Supp_tables/Export_csv_for_S2_Data/SFig_1.csv", cell_mixing, quote = FALSE, row.names = FALSE)

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published