Main Paper Plots:
Figure 1:
In Vitro Distribution Diagram (no code)
Figure 2:
Predicted and experimental intracellular concentrations from the Nominal = Cfree (left panel), Armitage (middle panel), and Kramer (right panel) models. Dashed line shows unity.
#ARMITAGE:
#Armitage RMSLE:
<-rmsle((Armitage.dt$CellConcentration_uM),(Armitage.dt$ccells))
arm_rmsle_long<-round(arm_rmsle_long, 2)
arm_rmsle#1.12
#Armitage r^2:
= lm(CellConcentration_uM~ccells, data = Armitage.dt)
ml_arm_overall summary(ml_arm_overall)$r.squared
#0.33
#plot set up
<- range(Armitage.dt$CellConcentration_uM)
yrng <- range(Armitage.dt$ccells)
xrng <-n_distinct(Nominal.dt$ChemicalName)
n_chems<-nrow(Nominal.dt)
n_data
# now plot
<-ggplot(Armitage.dt) +
Armitage_ccellgeom_point(aes(ccells, CellConcentration_uM, color = ChemicalName)) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
xlab(expression("Armitage Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
scale_x_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
scale_y_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
"N[data] ", "~'='~", n_data)), hjust = 1, vjust = 0, size = 4) +
annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
"N[chemicals] ", "~'='~", n_chems)), hjust = 1, vjust = 1.2, size = 4)+
annotate(geom = "text", x = xrng[2], y = yrng[1],
label = paste("RMSLE =", arm_rmsle),
hjust = 1, vjust = 3, size = 4) +
personal_theme() +
scale_color_manual(values=plotcolors)
#KRAMER:
#Kramer RMSLE:
<-
kram_rmsle_longrmsle((Kramer.dt$CellConcentration_uM),(Kramer.dt$concentration_cells))
<-format(round(kram_rmsle_long, digits=2), nsmall = 2)
kram_rmsle#1.30
#Kramer r^2:
= lm(CellConcentration_uM~concentration_cells, data = Kramer.dt)
ml_kram_overall summary(ml_kram_overall)$r.squared
#0.03
#plot
<-ggplot(Kramer.dt)+
Kramer_ccellgeom_point(aes(concentration_cells, CellConcentration_uM,
colour = ChemicalName)) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
scale_x_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
scale_y_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
xlab(expression("Kramer Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
"N[data] ", "~'='~", n_data)), hjust = 1, vjust = 0, size = 4) +
annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
"N[chemicals] ", "~'='~", n_chems)), hjust = 1, vjust = 1.2, size = 4)+
annotate(geom = "text", x = xrng[2], y = yrng[1],
label = paste("RMSLE =", kram_rmsle), hjust = 1, vjust = 3, size = 4) +
personal_theme() +
scale_color_manual(values=plotcolors)
#NOMINAL:
#Nominal RMSLE:
<-rmsle((Nominal.dt$CellConcentration_uM),(Nominal.dt$nomconc))
nom_rmsle_long<-round(nom_rmsle_long, 2)
nom_rmsle#1.45
<-ggplot(Nominal.dt) +
Nominal_ccellgeom_point(aes(nomconc, CellConcentration_uM, colour = ChemicalName))+
geom_abline(intercept=0,slope=1, linetype = "dashed") +
scale_x_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
scale_y_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
xlab(expression(italic("C"["nominal"])~"(\u03BCM)")) +
ylab(expression("Experimental "~italic("C"["cell"])~"(\u03BCM)")) +
labs(colour = "Chemical")+
annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
"N[data] ", "~'='~", n_data)), hjust = 1, vjust = 0, size = 4) +
annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
"N[chemicals] ", "~'='~", n_chems)), hjust = 1, vjust = 1.2, size = 4) +
annotate(geom = "text", x = xrng[2], y = yrng[1],
label = paste("RMSLE =", nom_rmsle),
hjust = 1, vjust = 3, size = 4) +
personal_theme() +
theme(legend.position = "bottom",
legend.key.size = unit(2, 'mm'),
legend.spacing = unit(10, "pt"),
legend.text = element_text(size = 6))+
guides(col = guide_legend(ncol = 7)) +
scale_color_manual(values=plotcolors)
#Combine the three plots w RMSLE
<-ggarrange(Nominal_ccell, Armitage_ccell, Kramer_ccell,
Ccells_comboplotncol=3 , legend = "bottom", common.legend = TRUE)
#save
if(save_output){
ggsave(file = paste0("Figure2_",data.date,"_",Sys.Date(),".png"),
path = path_out,
width = 10, height = 5, dpi = 300)
Ccells_comboplot,
}
print(Ccells_comboplot)
Figure 3:
Predicted and experimental intracellular concentrations from the Armitage (left panel) and Kramer (right panel) models for the ten chemicals with multiple observations. Solid lines connect multiple measurements for a single chemical. The dashed line shows unity.
#Armitage.multobs.dt %>% count(ChemicalName, NominalDose_uM, Citation)
#some of these have super close nomconcs because they are experimentally
#measured - not really relevant for this comparison because they are replicates
#chemicals to remove:
#Hexachlorobenzene, Malathion, Pentachlorophenol, Propiconazole
#(all of the chemicals from Stadnicka, 2014)
<-Armitage.dt %>%
multiObsChemnameList::filter(Citation != "Stadnicka, 2014") %>%
dplyrcount(ChemicalName, NominalDose_uM) %>%
count(ChemicalName) %>% dplyr::filter(n>=2) %>%
select(ChemicalName) %>% distinct()
#10 chemicals with multiple nominal doses measured
#filter measured data to just these chemicals
<- Armitage.dt %>%
Armitage.multobs.dt_og ::filter(ChemicalName %in% multiObsChemnameList$ChemicalName)
dplyr
<- Kramer.dt %>%
Kramer.multobs.dt_og ::filter(ChemicalName %in% multiObsChemnameList$ChemicalName)
dplyr
<- Nominal.dt %>%
Nominal.multobs.dt_og ::filter(ChemicalName %in% multiObsChemnameList$ChemicalName)
dplyr#each have 89 observations
#merge the lines with multiple observations for the same nomconc (ie tox21)
<-Armitage.multobs.dt_og %>%
Armitage.multobs.dtgroup_by(ChemicalName, NominalDose_uM) %>%
mutate(mean_measuredccell = mean(CellConcentration_uM),
mean_predictedccell = mean(ccells)) %>%
select(ChemicalName, NominalDose_uM, mean_measuredccell,
%>% distinct()
mean_predictedccell)
<-Kramer.multobs.dt_og %>%
Kramer.multobs.dtgroup_by(ChemicalName, NominalDose_uM) %>%
mutate(mean_measuredccell = mean(CellConcentration_uM),
mean_predictedccell = mean(concentration_cells)) %>%
select(ChemicalName, NominalDose_uM, mean_measuredccell,
%>% distinct()
mean_predictedccell)
<-Nominal.multobs.dt_og %>%
Nominal.multobs.dtgroup_by(ChemicalName, NominalDose_uM) %>%
mutate(mean_measuredccell = mean(CellConcentration_uM),
mean_predictedccell = mean(NominalDose_uM)) %>%
select(ChemicalName, NominalDose_uM, mean_measuredccell,
%>% distinct()
mean_predictedccell) #down to 50 observations because the duplicates have been averaged
### Plotting ###
#plot set up
<- range(Armitage.multobs.dt$mean_measuredccell)
yrng_mult <- range(Armitage.multobs.dt$mean_predictedccell)
xrng_mult <-n_distinct(Armitage.multobs.dt$ChemicalName)
n_chems_mult<-nrow(Armitage.multobs.dt)
n_data_mult
#ARMITAGE
#Armitage RMSLE:
<-rmsle((Armitage.multobs.dt$mean_measuredccell),
arm_mult_rmsle_long$mean_predictedccell))
(Armitage.multobs.dt<-round(arm_mult_rmsle_long, 2)
arm_mult_rmsle#1.27
#r squared
<- lm(mean_measuredccell~mean_predictedccell, data = Armitage.multobs.dt)
ml_arm <- summary(ml_arm)$r.squared
rsq_armitage_mult #0.87
# now plot
<-ggplot(Armitage.multobs.dt) +
Armitage_ccell_multgeom_point(aes(mean_predictedccell, mean_measuredccell,
colour = ChemicalName)) +
geom_smooth(aes(mean_predictedccell, mean_measuredccell,
colour = ChemicalName), method=lm, se = FALSE) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
scale_x_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
scale_y_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
xlab(expression("Armitage Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
labs(colour = "Chemical")+
annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
label = parse(text = paste("N[data] ", "~'='~", n_data_mult)),
hjust = 1, vjust = 0, size = 4) +
annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
label = parse(text = paste("N[chemicals] ", "~'='~", n_chems_mult)),
hjust = 1, vjust = 1.2, size = 4)+
annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
label = paste("RMSLE =", arm_mult_rmsle), hjust = 1, vjust = 3, size = 4) +
personal_theme() +
scale_color_manual(values=plotcolors)
#KRAMER
#Kramer RMSLE:
<-rmsle((Kramer.multobs.dt$mean_measuredccell),
kram_mult_rmsle_long$mean_predictedccell))
(Kramer.multobs.dt<-round(kram_mult_rmsle_long, 2)
kram_mult_rmsle# 1.58
#r squared
<- lm(mean_measuredccell~mean_predictedccell, data = Kramer.multobs.dt)
ml_kram <- summary(ml_kram)$r.squared
rsq_kramer_mult #0.87
# now plot
<-ggplot(Kramer.multobs.dt) +
Kramer_ccell_multgeom_point(aes(mean_predictedccell, mean_measuredccell,
colour = ChemicalName)) +
geom_smooth(aes(mean_predictedccell, mean_measuredccell,
colour = ChemicalName), method=lm, se = FALSE) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
scale_x_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
scale_y_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
labels = label_log(digits = 2)) +
xlab(expression("Kramer Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
labs(colour = "Chemical")+
annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
label = parse(text = paste("N[data] ", "~'='~", n_data_mult)),
hjust = 1, vjust = 0, size = 4) +
annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
label = parse(text = paste("N[chemicals] ", "~'='~", n_chems_mult)),
hjust = 1, vjust = 1.2, size = 4) +
annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
label = paste("RMSLE =", kram_mult_rmsle), hjust = 1, vjust = 3, size = 4) +
personal_theme() +
scale_color_manual(values=plotcolors) +
theme(legend.key.size = unit(2, 'mm'),
legend.text = element_text(size = 6))
#arrange
<-ggarrange(Armitage_ccell_mult, Kramer_ccell_mult, ncol=2,
Ccells_comboplot_multlegend = "bottom", common.legend = TRUE)
if(save_output){
ggsave(file = paste0("Figure3_",data.date,"_",Sys.Date(),".png"),
path = path_out,
width = 8, height = 4, dpi = 300)
Ccells_comboplot_mult,
}
print(Ccells_comboplot_mult)
Figure 4:
Chemical partitioning into water, air, cell, and plastic compartments for the Armitage and Kramer models. Dashed line shows unity.
#Cwat_uM -------- Cwat_R
<-ggplot(combodata) +
Cwater_comparisongeom_point(aes(concentration_medium, cwat_s, colour = ChemicalName.x)) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
ggtitle("Medium Concentration") +
xlab(expression("Kramer Predicted "*italic("C"["medium"])~"(\u03BCM)")) +
ylab(expression("Armitage Predicted "*italic(" C"["medium"])~"(\u03BCM)")) +
scale_x_log10(labels = label_log(digits = 2)) +
scale_y_log10(labels = label_log(digits = 2)) +
personal_theme() +
scale_color_manual(values=plotcolors)
#Cair_uM -------- Cair_R
<-ggplot(combodata) +
Cair_comparisongeom_point(aes(concentration_air, cair, colour = ChemicalName.x)) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
ggtitle("Air Concentration") +
xlab(expression("Kramer Predicted "*italic("C"["air"])~"(\u03BCM)")) +
ylab(expression("Armitage Predicted "*italic(" C"["air"])~"(\u03BCM)")) +
scale_x_log10(labels = label_log(digits = 2)) +
scale_y_log10(labels = label_log(digits = 2)) +
personal_theme() +
scale_color_manual(values=plotcolors)
#C_cells_uM ----- Ccells
<-ggplot(combodata) +
Ccells_comparisongeom_point(aes(concentration_cells, ccells, colour = ChemicalName.x)) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
ggtitle("Cell Concentration") +
xlab(expression("Kramer Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
ylab(expression("Armitage Predicted "*italic(" C"["cell"])~"(\u03BCM)")) +
scale_x_log10(lim = c(10^-4, 10^5), labels = label_log(digits = 2),
breaks = c(10^-2, 10^0, 10^2, 10^4)) +
scale_y_log10(lim = c(10^-4, 10^5), labels = label_log(digits = 2),
breaks = c(10^-2, 10^0, 10^2, 10^4)) +
personal_theme() +
scale_color_manual(values=plotcolors)
#Aplastic_uM_m2 - (Cplastic_R / Sarea_R)
<-ggplot(combodata) +
Aplastic_comparisongeom_point(aes(concentration_plastic, cplastic, colour = ChemicalName.x)) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
ggtitle("Plastic Concentration") +
xlab(expression("Kramer Predicted "*italic("C"["plastic"])~"(\u03BCM)")) +
ylab(expression("Armitage Predicted "*italic(" C"["plastic"])~"(\u03BCM)")) +
labs(colour = "Chemical")+
scale_x_log10(labels = label_log(digits = 2),
breaks = c(10^-6, 10^-4, 10^-2, 10^0)) +
scale_y_log10(labels = label_log(digits = 2)) +
personal_theme() +
scale_color_manual(values=plotcolors) +
theme(legend.key.size = unit(1, 'mm'),
legend.spacing = unit(2, "pt"),
legend.text = element_text(size = 6))
<-ggarrange(Ccells_comparison, Aplastic_comparison,
compartment_plots
Cwater_comparison, Cair_comparison, common.legend = TRUE, legend = "none")
if(save_output){
ggsave(file = paste0("Figure4_",data.date,"_",Sys.Date(),".png"),
path = path_out,
width = 8, height = 6, dpi= 300)
compartment_plots,
}
print(compartment_plots)
Figure 5:
Comparison of Armitage model using curated vs dashboard physchem values.
#load curated data info
<- httk::Dimitrijevic.IVD
original_curated.data.dt
#first - run using the default/dashboard values
<-copy(original_curated.data.dt %>%
default.dtselect(-c( "Arnot_pka", "Arnot_pkb", "Chemaxon_pKa",
"Chemaxon_pKb", "log.KOW.N", "log.KAW.N",
"Predicted_Ccell_µM", "FoA")))
# run the model, output concentrations in umol/L (e.g. uM)
<- armitage_eval(tcdata=default.dt,
armitageOutput_default.dt restrict.ion.partitioning = TRUE,
surface.area.switch = FALSE)
#next- run using curated values (EAS-E Suite)
<-copy(original_curated.data.dt)
curated.data.dt
#overwrite using curated physchem properties
:= as.character(Arnot_pka)] %>% #acidic
curated.data.dt[, pKa_Donor := as.character(Arnot_pkb)] %>% #basic
.[, pKa_Accept := log.KOW.N] %>%
.[, gkow := log.KAW.N]
.[, gkaw_n
# run the model, output concentrations in umol/L (e.g. uM)
<- armitage_eval(tcdata=curated.data.dt,
armitageOutput_curated.data.dt restrict.ion.partitioning = TRUE,
surface.area.switch = FALSE)
#save output with these values (Supplemental Materials T9)
<- armitageOutput_default.dt %>%
defaultvalsselect(Name, casrn, gkow_n, gkaw_n, pKa_Donor, pKa_Accept)
<-armitageOutput_curated.data.dt %>%
curatedvalsselect(Name, casrn, gkow_n, gkaw_n, pKa_Donor, pKa_Accept)
#tie the two tables together for the supplement
<-rbind(curatedvals, defaultvals)
s9_table
#calculate rmsle
#default values
#individual
:=
armitageOutput_default.dt[,rmsle_ccell_defaultrmsle(Reported_Ccell_µM, ccells), by = Name]
#total
<-rmsle(armitageOutput_default.dt$Reported_Ccell_µM,
RMSLE_default.dt$ccells)
armitageOutput_default.dt<-round(RMSLE_default.dt, 3)
RMSLE_default.dt#0.586
#curated values
#individual
:=
armitageOutput_curated.data.dt[,rmsle_ccell_curatedrmsle(Reported_Ccell_µM, ccells), by = Name]
#total
<-rmsle(armitageOutput_curated.data.dt$Reported_Ccell_µM,
RMSLE_curated.data.dt$ccells)
armitageOutput_curated.data.dt<-round(RMSLE_curated.data.dt, 3)
RMSLE_curated.data.dt#0.570
#improvement when using curated data:
-RMSLE_curated.data.dt
RMSLE_default.dt#0.016
## difference between the two on a per-chemical basis ##
<-merge(armitageOutput_default.dt,
curated_data.dtby = "Name")
armitageOutput_curated.data.dt,
#calculate rmsle difference for each chemical
:=
curated_data.dt[,rmsle_difference-rmsle_ccell_default, by = Name]
rmsle_ccell_curated
#which chemicals had the greatest magnitude of difference
#largest improvement when using curated data
head(curated_data.dt %>%
select(rmsle_difference, Name) %>%
arrange(rmsle_difference), 2)
#largest improvement when using default data
tail(curated_data.dt %>%
select(rmsle_difference, Name) %>%
arrange(rmsle_difference), 2)
#plot
<-ggplot(curated_data.dt) +
curated_difference_httk_plotgeom_point(aes(ccells.y, Reported_Ccell_µM.y, colour = Name), size = 4) +
geom_label_repel(aes(ccells.y, Reported_Ccell_µM.y,
label = round(rmsle_difference, 2))) +
geom_abline(intercept=0,slope=1, linetype = "dashed") +
xlab(expression("Armitage Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
labs(color = "Chemical")+
scale_x_log10(lim = c(10, 10000), labels = label_log(digits = 2)) +
scale_y_log10(lim = c(10, 10000), labels = label_log(digits = 2)) +
personal_theme() +
scale_color_viridis(discrete=TRUE)+
theme(legend.position = "bottom",
legend.key.size = unit(2, 'mm'),
legend.spacing = unit(10, "pt"),
legend.text = element_text(size = 10))
if(save_output){
ggsave(file = paste0("Figure5_",data.date,"_",Sys.Date(),".png"),
path = path_out,
width = 8, height = 5, dpi = 300)
curated_difference_httk_plot,
write.csv(s9_table, file = paste0(path_out,"table_s9_",data.date,"_",
Sys.Date(),".csv"))
}