Main Article
E. coli clinical resistance models based on ten most common ARGs in E. coli
The first and maybe most obvious approach was to use the 10 most common ResFinder ARGs in publicly available E. coli genomes in predicting clinical resistance in E. coli. Results from beta regression models for the four resistance categories and Figure 1 from the article presented below.
Clinical aminopenicillin resistance
tmp <- ResData %>% filter(`AP_(n)`>100)
tmp$y = tmp$`AP_(R%)`/100
breg <- betareg(y ~ mean_topColi, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_topColi, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.2304 -0.5217 -0.0417 0.3678 2.9636
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.08767 0.12986 0.675 0.5
## mean_topColi 8.54151 1.52922 5.586 2.33e-08 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 13.715 3.289 4.17 3.04e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 23.48 on 3 Df
## Pseudo R-squared: 0.405
## Number of iterations: 22 (BFGS) + 3 (Fisher scoring)
new_int <- data.frame(mean_topColi=seq(min(tmp$mean_topColi), max(tmp$mean_topColi)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p1 <- ggplot(tmp, aes(mean_topColi, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_topColi, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.35)) +
labs(x="Relative ARG abundance", y="Proportion of resistant clinical isolates", title="A")
Clinical fluoroquinolone resistance
tmp <- ResData %>% filter(`FQ_(n)`>100)
tmp$y = tmp$`FQ_(R%)`/100
breg <- betareg(y ~ mean_topColi, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_topColi, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.7415 -0.4312 -0.0534 0.6384 2.0625
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6723 0.1004 -6.696 2.14e-11 ***
## mean_topColi 6.0567 1.0462 5.789 7.07e-09 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 13.323 3.149 4.23 2.33e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 26 on 3 Df
## Pseudo R-squared: 0.4906
## Number of iterations: 15 (BFGS) + 4 (Fisher scoring)
new_int <- data.frame(mean_topColi=seq(min(tmp$mean_topColi), max(tmp$mean_topColi)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p2 <- ggplot(tmp, aes(mean_topColi, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_topColi, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.35)) +
labs(x="Relative ARG abundance", y="Proportion of resistant clinical isolates", title="B")
Clinical 3rd generation cephalosporin resistance
tmp <- ResData %>% filter(`3GC_(n)`>100)
tmp$y = tmp$`3GC_(R%)`/100
breg <- betareg(y ~ mean_topColi, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_topColi, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -3.5030 -0.5427 -0.1212 0.4388 2.0227
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8483 0.1321 -6.421 1.35e-10 ***
## mean_topColi 6.3569 1.3598 4.675 2.94e-06 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 6.613 1.564 4.228 2.36e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 21.52 on 3 Df
## Pseudo R-squared: 0.3938
## Number of iterations: 12 (BFGS) + 5 (Fisher scoring)
new_int <- data.frame(mean_topColi=seq(min(tmp$mean_topColi), max(tmp$mean_topColi)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p3 <- ggplot(tmp, aes(mean_topColi, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_topColi, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.35)) +
labs(x="Relative ARG abundance", y="Proportion of resistant clinical isolates", title="C")
Clinical aminoglycoside resistance
tmp <- ResData %>% filter(`AG_(n)`>100)
tmp$y = tmp$`AG_(R%)`/100
breg <- betareg(y ~ mean_topColi, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_topColi, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -3.2747 -0.4971 -0.1390 0.3243 2.1603
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7967 0.0874 -9.115 < 2e-16 ***
## mean_topColi 2.1374 0.8162 2.619 0.00883 **
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 15.31 3.76 4.071 4.68e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 36.54 on 3 Df
## Pseudo R-squared: 0.2014
## Number of iterations: 15 (BFGS) + 4 (Fisher scoring)
new_int <- data.frame(mean_topColi=seq(min(tmp$mean_topColi), max(tmp$mean_topColi)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p4 <- ggplot(tmp, aes(mean_topColi, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_topColi, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.35)) +
labs(x="Relative ARG abundance", y="Proportion of resistant clinical isolates", title="D")
Figure 1
(p1 + p2) / (p3 + p4)
Fig. 1: E. coli clinical resistance models based on ten most common ARGs in E. coli
E. coli clinical resistance models based on intI1 integrase gene
Next spet was to use intI1 integrase gene as a proxy for antibiotic resistance gene prevalence in sewage samples. The results from this approach ands Figure 2 from the article below.
Clinical aminopenicillin resistance
tmp <- ResData %>% filter(`AP_(n)`>100)
tmp$y = tmp$`AP_(R%)`/100
breg <- betareg(y ~ mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.7796 -0.8594 -0.0775 0.8627 2.1615
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.02805 0.13275 0.211 0.833
## mean_int 10.84719 1.88498 5.755 8.69e-09 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 15.49 3.72 4.165 3.12e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 26.03 on 3 Df
## Pseudo R-squared: 0.5949
## Number of iterations: 43 (BFGS) + 3 (Fisher scoring)
new_int <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p1 <- ggplot(tmp, aes(mean_int, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_int, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.25)) +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates", title="A")
Clinical fluoroquinolone resistance
tmp <- ResData %>% filter(`FQ_(n)`>100)
tmp$y = tmp$`FQ_(R%)`/100
breg <- betareg(y ~ mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.0159 -0.6936 0.0341 0.9617 1.4457
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7660 0.1011 -7.579 3.49e-14 ***
## mean_int 8.3148 1.2936 6.428 1.30e-10 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 15.929 3.782 4.212 2.53e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 29.05 on 3 Df
## Pseudo R-squared: 0.6604
## Number of iterations: 15 (BFGS) + 4 (Fisher scoring)
new_int <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p2 <- ggplot(tmp, aes(mean_int, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_int, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.25)) +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates", title="B")
Clinical 3rd generation cephalosporin resistance
tmp <- ResData %>% filter(`3GC_(n)`>100)
tmp$y = tmp$`3GC_(R%)`/100
breg <- betareg(y ~ mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.5827 -0.7182 0.0696 0.6722 2.3899
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0358 0.1266 -8.180 2.85e-16 ***
## mean_int 9.6379 1.6151 5.968 2.41e-09 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 9.278 2.221 4.178 2.94e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 26.87 on 3 Df
## Pseudo R-squared: 0.6235
## Number of iterations: 21 (BFGS) + 3 (Fisher scoring)
new_int <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p3 <- ggplot(tmp, aes(mean_int, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_int, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.25)) +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates", title="C")
Clinical aminoglycoside resistance
tmp <- ResData %>% filter(`AG_(n)`>100)
tmp$y = tmp$`AG_(R%)`/100
breg <- betareg(y ~ mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.6903 -0.5976 -0.0897 0.4839 2.0737
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.89686 0.09074 -9.884 < 2e-16 ***
## mean_int 3.88204 1.07225 3.620 0.000294 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 17.760 4.368 4.066 4.78e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 38.83 on 3 Df
## Pseudo R-squared: 0.3058
## Number of iterations: 21 (BFGS) + 2 (Fisher scoring)
new_int <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p4 <- ggplot(tmp, aes(mean_int, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_int, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_size(limits=c(50,100), breaks=c(60, 80, 100)) + scale_x_continuous(limits=c(0, 0.25)) +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates", title="D")
Figure 2
(p1 + p2) / (p3 + p4)
Fig. 2: E. coli clinical resistance models based on intI1 integrase gene
Aggregated resistance model based on intI1 integrase gene
Although the two approaches performed equally well, the intI1 approach was chosen due to its simplicity compared to the top ARGs in E. coli. So intI1 integrase gene abundance was used to model the aggregated resistance index. Results from beta regression model and Figure 3 below. With all data and with Nigeria and Peru removed from the analysis.
With all data
tmp <- ResData
tmp$y = tmp$AverageAll/100
breg <- betareg(y ~ mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.5510 -0.8003 0.0782 0.8662 1.8720
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.55247 0.08251 -6.696 2.15e-11 ***
## mean_int 2.58303 0.62081 4.161 3.17e-05 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 9.422 1.885 4.998 5.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 31.44 on 3 Df
## Pseudo R-squared: 0.2439
## Number of iterations: 12 (BFGS) + 3 (Fisher scoring)
new_int <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p1 <- ggplot(tmp, aes(mean_int, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_int, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_y_continuous(limits=c(0,0.7)) +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Aggregated proportion of resistant isolates",
title="A")
With outliers removed
tmp <- ResData
tmp$y = tmp$AverageAll/100
tmp <- tmp %>% filter(!(Country=="Nigeria" | Country=="Peru"))
breg <- betareg(y ~ mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.7115 -0.7347 0.0049 0.8044 2.2584
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.75344 0.09013 -8.359 < 2e-16 ***
## mean_int 5.04575 0.89541 5.635 1.75e-08 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 12.12 2.50 4.847 1.25e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 36.09 on 3 Df
## Pseudo R-squared: 0.4337
## Number of iterations: 12 (BFGS) + 1 (Fisher scoring)
new_int <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05))
new_int$y <- predict(breg, newdata=new_int)
p2 <- ggplot(tmp, aes(mean_int, y)) + geom_point(pch=21, size=5, fill="skyblue") +
geom_line(aes(mean_int, y), data=new_int, linetype="twodash", color="blue", size=1) + theme_classic() +
scale_x_continuous(limits=c(0,0.3)) + scale_y_continuous(limits=c(0,0.7)) +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Aggregated proportion of resistant isolates",
title="B")
Figure 3
p1 + p2
Fig. 3: Aggregated resistance model based on intI1 integrase gene
E. coli clinical resistance models based on combined sewage and socioeconomical data
It is clear from the results that sewage data alone is not enough for modeling clinical resistance prevalence. To improve the performance, we included socioeconomic factors to the models. We used GDP, proportion of urban population and basic sanotation index in combination with the sewage data (intI1) to model clinical resistance prevalence in E. coli and more broadly using the aggregated resistance index.
Amniopenicillin resistance
tmp <- ResData %>% filter(`AP_(n)`>100)
tmp$y = tmp$`AP_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ log(GDP) + urban_population + basic_sanitation +
## mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.9702 -0.6482 -0.0020 0.5511 2.3723
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.631215 1.099727 6.030 1.64e-09 ***
## log(GDP) -0.277599 0.121371 -2.287 0.022184 *
## urban_population -0.007860 0.003898 -2.017 0.043745 *
## basic_sanitation -0.028746 0.008429 -3.410 0.000649 ***
## mean_int 4.202077 1.379923 3.045 0.002326 **
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 49.43 12.08 4.091 4.3e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 44.5 on 6 Df
## Pseudo R-squared: 0.8474
## Number of iterations: 27 (BFGS) + 7 (Fisher scoring)
new_int0 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[2], basic_sanitation=quantile(tmp$basic_sanitation)[2],
GDP=quantile(tmp$GDP)[2])
new_int5 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[3], basic_sanitation=quantile(tmp$basic_sanitation)[3],
GDP=quantile(tmp$GDP)[3])
new_int9 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[4], basic_sanitation=quantile(tmp$basic_sanitation)[4],
GDP=quantile(tmp$GDP)[4])
new_int0$y <- predict(breg, newdata=new_int0)
new_int5$y <- predict(breg, newdata=new_int5)
new_int9$y <- predict(breg, newdata=new_int9)
p1 <- ggplot(tmp, aes(mean_int, y, size=basic_sanitation, fill=urban_population)) + geom_point(pch=21) +
geom_line(aes(mean_int, y), data=new_int0, linetype="twodash", color="darkblue", size=1) +
geom_line(aes(mean_int, y), data=new_int5, linetype="twodash", color="blue", size=1) +
geom_line(aes(mean_int, y), data=new_int9, linetype="twodash", color="lightblue", size=1) + theme_classic() +
scale_size(limits=c(min(tmp$basic_sanitation),max(tmp$basic_sanitation)),
breaks=c(60, 80, 100), name="Basic sanitation") +
scale_x_continuous(limits=c(0, 0.25)) +
scale_fill_gradient(limits=c(min(tmp$urban_population), max(tmp$urban_population)),
breaks=c(20, 40, 60, 80,100), name="Urban population") +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates",
title="A")
Fluoroquinolone resistance
tmp <- ResData %>% filter(`FQ_(n)`>100)
tmp$y = tmp$`FQ_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ log(GDP) + urban_population + basic_sanitation +
## mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.1288 -0.7550 0.0663 0.8374 1.8843
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.3588129 1.0537833 2.238 0.025193 *
## log(GDP) -0.2054641 0.1228022 -1.673 0.094302 .
## urban_population -0.0098242 0.0040179 -2.445 0.014481 *
## basic_sanitation -0.0008548 0.0068677 -0.124 0.900949
## mean_int 4.8309076 1.3844648 3.489 0.000484 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 28.50 6.83 4.172 3.02e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 38.85 on 6 Df
## Pseudo R-squared: 0.798
## Number of iterations: 27 (BFGS) + 2 (Fisher scoring)
new_int0 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[2], basic_sanitation=quantile(tmp$basic_sanitation)[2],
GDP=quantile(tmp$GDP)[2])
new_int5 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[3], basic_sanitation=quantile(tmp$basic_sanitation)[3],
GDP=quantile(tmp$GDP)[3])
new_int9 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[4], basic_sanitation=quantile(tmp$basic_sanitation)[4],
GDP=quantile(tmp$GDP)[4])
new_int0$y <- predict(breg, newdata=new_int0)
new_int5$y <- predict(breg, newdata=new_int5)
new_int9$y <- predict(breg, newdata=new_int9)
p2 <- ggplot(tmp, aes(mean_int, y, fill=urban_population, size=basic_sanitation)) + geom_point(pch=21) +
geom_line(aes(mean_int, y), data=new_int0, linetype="twodash", color="darkblue", size=1) +
geom_line(aes(mean_int, y), data=new_int5, linetype="twodash", color="blue", size=1) +
geom_line(aes(mean_int, y), data=new_int9, linetype="twodash", color="lightblue", size=1) + theme_classic() +
scale_size(limits=c(min(tmp$basic_sanitation),max(tmp$basic_sanitation)),
breaks=c(60, 80, 100), name="Basic sanitation") +
scale_x_continuous(limits=c(0, 0.25)) +
scale_fill_gradient(limits=c(min(tmp$urban_population), max(tmp$urban_population)),
breaks=c(20, 40, 60, 80,100), name="Urban population") +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates",
title="B")
3rd generation cephalosporins
tmp <- ResData %>% filter(`3GC_(n)`>100)
tmp$y = tmp$`3GC_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ log(GDP) + urban_population + basic_sanitation +
## mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -3.3211 -0.4718 0.0347 0.3623 2.6310
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.815852 1.117003 4.311 1.62e-05 ***
## log(GDP) -0.345795 0.130314 -2.654 0.00797 **
## urban_population -0.006785 0.004179 -1.624 0.10447
## basic_sanitation -0.015339 0.007446 -2.060 0.03940 *
## mean_int 3.577753 1.441305 2.482 0.01305 *
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 25.141 6.079 4.136 3.54e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 43.09 on 6 Df
## Pseudo R-squared: 0.8596
## Number of iterations: 26 (BFGS) + 2 (Fisher scoring)
new_int0 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[2], basic_sanitation=quantile(tmp$basic_sanitation)[2],
GDP=quantile(tmp$GDP)[2])
new_int5 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[3], basic_sanitation=quantile(tmp$basic_sanitation)[3],
GDP=quantile(tmp$GDP)[3])
new_int9 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[4], basic_sanitation=quantile(tmp$basic_sanitation)[4],
GDP=quantile(tmp$GDP)[4])
new_int0$y <- predict(breg, newdata=new_int0)
new_int5$y <- predict(breg, newdata=new_int5)
new_int9$y <- predict(breg, newdata=new_int9)
p3 <- ggplot(tmp, aes(mean_int, y, size=basic_sanitation, fill=urban_population)) + geom_point(pch=21) +
geom_line(aes(mean_int, y), data=new_int0, linetype="twodash", color="darkblue", size=1) +
geom_line(aes(mean_int, y), data=new_int5, linetype="twodash", color="blue", size=1) +
geom_line(aes(mean_int, y), data=new_int9, linetype="twodash", color="lightblue", size=1) + theme_classic() +
scale_size(limits=c(min(tmp$basic_sanitation),max(tmp$basic_sanitation)),
breaks=c(60, 80, 100), name="Basic sanitation") +
scale_x_continuous(limits=c(0, 0.25)) +
scale_fill_gradient(limits=c(min(tmp$urban_population), max(tmp$urban_population)),
breaks=c(20, 40, 60, 80,100), name="Urban population") +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates",
title="C")
Amoinoglycoside resistance
tmp <- ResData %>% filter(`AG_(n)`>100)
tmp$y = tmp$`AG_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ log(GDP) + urban_population + basic_sanitation +
## mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.9172 -0.7608 -0.1140 0.5325 2.6332
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.558608 0.909783 3.911 9.17e-05 ***
## log(GDP) -0.444008 0.102452 -4.334 1.47e-05 ***
## urban_population -0.004142 0.003330 -1.244 0.214
## basic_sanitation 0.006487 0.005199 1.248 0.212
## mean_int 0.469429 1.118507 0.420 0.675
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 39.90 9.84 4.055 5.02e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 51.44 on 6 Df
## Pseudo R-squared: 0.6815
## Number of iterations: 27 (BFGS) + 2 (Fisher scoring)
new_int0 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[2], basic_sanitation=quantile(tmp$basic_sanitation)[2],
GDP=quantile(tmp$GDP)[2])
new_int5 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[3], basic_sanitation=quantile(tmp$basic_sanitation)[3],
GDP=quantile(tmp$GDP)[3])
new_int9 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
urban_population=quantile(tmp$urban_population)[4], basic_sanitation=quantile(tmp$basic_sanitation)[4],
GDP=quantile(tmp$GDP)[4])
new_int0$y <- predict(breg, newdata=new_int0)
new_int5$y <- predict(breg, newdata=new_int5)
new_int9$y <- predict(breg, newdata=new_int9)
p4 <- ggplot(tmp, aes(mean_int, y, size=basic_sanitation, fill=urban_population)) + geom_point(pch=21) +
geom_line(aes(mean_int, y), data=new_int0, linetype="twodash", color="darkblue", size=1) +
geom_line(aes(mean_int, y), data=new_int5, linetype="twodash", color="blue", size=1) +
geom_line(aes(mean_int, y), data=new_int9, linetype="twodash", color="lightblue", size=1) + theme_classic() +
scale_size(limits=c(min(tmp$basic_sanitation),max(tmp$basic_sanitation)),
breaks=c(60, 80, 100), name="Basic sanitation") +
scale_x_continuous(limits=c(0, 0.25)) +
scale_fill_gradient(limits=c(min(tmp$urban_population), max(tmp$urban_population)),
breaks=c(20, 40, 60, 80,100), name="Urban population") +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Proportion of resistant clinical isolates",
title="D")
Figure 4
(p1 + p2) / (p3 + p4) +
plot_layout(guides = 'collect') & theme(legend.position='bottom')
Fig. 4: E. coli clinical resistance models based on combined sewage and socioeconomical data
Aggregated resistance index
tmp <- ResData
tmp$y = tmp$AverageAll/100
tmp <- tmp %>% filter(!(Country=="Nigeria" | Country=="Peru" | Country=="Kosovo"))
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
summary(breg)
##
## Call:
## betareg(formula = y ~ log(GDP) + urban_population + basic_sanitation +
## mean_int, data = tmp, link = "loglog")
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.9243 -0.8309 0.0747 0.6642 2.1436
##
## Coefficients (mean model with loglog link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.499088 0.745364 3.353 0.000800 ***
## log(GDP) -0.364228 0.099317 -3.667 0.000245 ***
## urban_population -0.008212 0.003016 -2.723 0.006478 **
## basic_sanitation 0.012280 0.004040 3.039 0.002371 **
## mean_int 3.333969 0.892383 3.736 0.000187 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 28.345 5.929 4.781 1.75e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 54.7 on 6 Df
## Pseudo R-squared: 0.7589
## Number of iterations: 14 (BFGS) + 4 (Fisher scoring)
new_int0 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
basic_sanitation=quantile(tmp$basic_sanitation)[2],
urban_population=quantile(tmp$urban_population)[2], GDP=quantile(tmp$GDP)[2])
new_int5 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
basic_sanitation=quantile(tmp$basic_sanitation)[3],
urban_population=quantile(tmp$urban_population)[3], GDP=quantile(tmp$GDP)[3])
new_int9 <- data.frame(mean_int=seq(min(tmp$mean_int), max(tmp$mean_int)+0.05, 0.05),
basic_sanitation=quantile(tmp$basic_sanitation)[4],
urban_population=quantile(tmp$urban_population)[4], GDP=quantile(tmp$GDP)[4])
new_int0$y <- predict(breg, newdata=new_int0)
new_int5$y <- predict(breg, newdata=new_int5)
new_int9$y <- predict(breg, newdata=new_int9)
Figure 5
ggplot(tmp, aes(mean_int, y, size=basic_sanitation, fill=urban_population)) + geom_point(pch=21) +
geom_line(aes(mean_int, y), data=new_int0, linetype="twodash", color="darkblue", size=1) +
geom_line(aes(mean_int, y), data=new_int5, linetype="twodash", color="blue", size=1) +
geom_line(aes(mean_int, y), data=new_int9, linetype="twodash", color="lightblue", size=1) + theme_classic() +
scale_size(limits=c(min(tmp$basic_sanitation),max(tmp$basic_sanitation)),
breaks=c(60, 80, 100), name="Basic sanitation") +
scale_x_continuous(limits=c(0, 0.25)) +
scale_fill_gradient(limits=c(min(tmp$urban_population), max(tmp$urban_population)),
breaks=c(20, 40, 60, 80,100), name="Urban population") +
labs(x=expression(paste("Relative ", italic("intI1"), " abundance", sep=" ")), y="Aggregated proportion of resistant isolates",
title="")
Fig. 5: Aggregated resistance model based on combined sewage and socioeconomical data
Predictions of clinical resistance prevalence for countries without clinical resistance data
After the promising results from our clinical resistance models, we were confident enough to try and predict clinical resistance prevalence in countries where we didn’t have any reliable data on clinical resistance prevalence.
Figure 6
no_data <- data.frame(ResData[!complete.cases(ResData[,c("AP_(R%)", "FQ_(R%)", "3GC_(R%)", "AG_(R%)", "AverageAll")]),])
no_data <- no_data %>% filter(!(Country=="Gambia" | Country=="Chad" | Country == "Kosovo"))
# AP
tmp <- ResData %>% filter(`AP_(n)`>100)
tmp$y = tmp$`AP_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
pred_AP <- predict(breg, no_data)
# FQ
tmp <- ResData %>% filter(`FQ_(n)`>100)
tmp$y = tmp$`FQ_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
pred_FQ <- predict(breg, no_data)
# 3GC
tmp <- ResData %>% filter(`3GC_(n)`>100)
tmp$y = tmp$`3GC_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
pred_3GC <- predict(breg, no_data)
# AG
tmp <- ResData %>% filter(`AG_(n)`>100)
tmp$y = tmp$`AG_(R%)`/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
pred_AG <- predict(breg, no_data)
# AverageAll
tmp <- ResData
tmp$y = tmp$AverageAll/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
pred_AA <- predict(breg, no_data)
pred_data <- cbind(no_data, pred_AP, pred_FQ, pred_3GC, pred_AG, pred_AA)
pred_data <- pred_data %>% filter(!is.na(pred_data[,"pred_AP"]))
row.names(pred_data) <- pred_data$Country
pred <- pred_data[,c("pred_AP", "pred_FQ", "pred_3GC", "pred_AG", "pred_AA")]
annotation_row <- data.frame(Continent=pred_data$Continent, row.names=row.names(pred_data))
orig_values <- data.frame(no_data[,c("AP_.R..", "FQ_.R..","X3GC_.R..", "AG_.R..", "AverageAll")], row.names=no_data$Country)
orig_values <- format(round(orig_values/100, digits=2), nsmall=2)
orig_values[orig_values<0] <- ""
clust_mat <- pred
clust_mat[orig_values>0] <- orig_values[orig_values>0]
countries <- c("Georgia", "Macedonia")
clust_mat[countries,] <- pred_data[countries,c("pred_AP", "pred_FQ", "pred_3GC", "pred_AG", "pred_AA")]
clust_obj <- hclust(dist(clust_mat))
pred[orig_values>0] <- NA
# put Georgia and Macedonia back to the model
countries <- c("Georgia", "Macedonia")
pred[countries,] <- pred_data[countries,c("pred_AP", "pred_FQ", "pred_3GC", "pred_AG", "pred_AA")]
pheatmap(pred, cluster_cols=FALSE, cluster_rows=clust_obj,annotation_row = annotation_row, display_numbers=orig_values,
labels_col=c("Pred AP", "Pred FQ", "Pred 3rd GC", "Pred AG", "Pred Agg"), gaps_col=c(0,0),
angle_col=0, cutree_rows=2, na_col="white")
Fig. 6: Predictions of clinical resistance prevalence for countries without clinical resistance data
Global predictions for aggregated resistance index
And for giving an overview of global clinical resistance situation, the same was done for all countries where we had sewage and socio-economical data.
Figure 7
all_data <- ResData %>% filter(!(Country=="Gambia" | Country=="Chad" | Country == "Kosovo")) %>% data.frame()
tmp <- ResData
tmp$y = tmp$AverageAll/100
breg <- betareg(y ~ log(GDP) + urban_population + basic_sanitation + mean_int, data=tmp, link="loglog")
pred_AA <- predict(breg, all_data)
all_data <- cbind(all_data, pred_AA)
RES_map <- map_data("world")
RES_map <- RES_map %>% filter(region!="Antarctica")
tmp <- data.frame(region=all_data$Country, pred_AA=all_data$pred_AA)
tmp$region <- plyr::revalue(tmp$region, c("Viet Nam" = "Vietnam"))
RES_map <- full_join(RES_map, tmp, by="region")
# main figure
ggplot() +
geom_polygon(data = RES_map, aes(x=long, y = lat, group = group, fill=pred_AA), color="white") +
theme(panel.grid = element_blank(), panel.background = element_blank(),
plot.title = element_text(size=10, face = "bold"), legend.position = "bottom") +
scale_x_continuous(limits=c(-170, 200)) + scale_y_continuous(limits=c(-60, 85)) +
labs(x="Longitude", y="Latitude", title = "", caption = "") +
scale_fill_gradient(name="Predicted aggregated\nresistance index", high="red", low="#FFFF99", na.value = "grey70")
Fig. 7: Global predictions for aggregated resistance index