############################################################################## # # File: meta-analysis-public 18 11 30 # # Project: Global patterns of mortality in international migrants # # Author: Rob Aldridge # # Data: A cleaned version of the data used in this analysis has been made available # via the following DOI https://doi.org/10.14324/000.ds.10062606 # # Notes: # 1) Some of the variables used in the scripsts below (e.g. Duplicate, Exclude) # have been removed from these shared files, but are preserved in the scripts below # in order to provide transparency in how we ran the analyses. # 2) We have cleaned these scripts prior to publication to improve their clarity and have # removed some code that was originally included to just to check the data # 3) We are unable to share several of the datasets used below because we do not have # permission to redistribute (e.g. input_country_and_region_codes_origin.csv). Please # contact the study team if further details on these required # ############################################################################## # # Purpose: This file undertakes meta-analysis of migrant mortality SMRs # ############################################################################## # this scrip used the following packages # googlesheets (version 0.3.0) # metafor (version 2.0-0 # tidyverse (version 1.2.1) # data.table (version 1.11.8) # initial data extraction undertaken in google sheets datasheet <- gs_title("data_extraction_sheet") # check the list of worksheets available in the google sheet gs_ws_ls(datasheet) # import the mortality data from the google sheet mortality <- gs_read(ss=datasheet, ws = "Mortality", col_types = cols( count = col_integer(), Outcome = col_number(), lci = col_number(), uci = col_number(), smr = col_integer(), uci_new = col_number(), lci_new = col_number()) ) #preview imported data dim (mortality) #several files are then used to clean country and population details and link these #to UN population and World Bank Data #the section below imports these files to clean country and population details destination_countries <- read_csv("input_destination_countries.csv") country_codes_destination <- read_csv("input_country_and_region_codes_destination.csv") origin_countries <- read_csv("input_origin_countries.csv") country_codes_orig <- read_csv("input_country_and_region_codes_origin.csv") population <- read_csv("input_population.csv") international_migrants_total_origin <- read_csv("input_international_migrants_total_origin.csv") input_icd10_labels<- read_csv("input_icd10_labels.csv") input_icd10_subgroup_labels<- read_csv("input_icd10_subgroup_labels.csv") international_migrants_total_receiving <- read_csv("input_international_migrants_total_receiving.csv") #add country code to destination countries mortality <- mortality %>% left_join(destination_countries, by = "Country_of_study") %>% left_join(country_codes_destination, by = "Country_of_study_new_name") %>% left_join(origin_countries, by = "Country_origin") %>% left_join(country_codes_orig, by = "Country_origin_new_name") %>% left_join(population, by = "Population") %>% left_join(international_migrants_total_origin, by = "country-code-country-origin") %>% left_join(input_icd10_labels, by = "ICD10_chapter") %>% left_join(input_icd10_subgroup_labels, by = "ICD10_subgroup") %>% left_join(international_migrants_total_receiving, by = "country-code") ################## #Creat all cause Forrest plots ################## #filter the data to only include non-excluded or duplicate SMR data smr_filter_all_cause <- mortality %>% filter(smr==1, smr_all_cause_include==1, is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, Population_group, Country_of_study, region, First_author, Year_of_publication, bank_region_origin, Sex, Country_origin_forrest, Study_years, Country_of_study_new_name) %>% as.data.frame(smr_filter_all_cause) #create standard error estimates from confidence intervals from study smr_filter_all_cause$sei <- (log(smr_filter_all_cause$uci/smr_filter_all_cause$lci))/3.92 #create study identifier smr_filter_all_cause$study_id<-as.factor(smr_filter_all_cause$First_author) ### decrease margins so the full space is used par(mar=c(3,8,1,6)) ### fit random-effects model res_smr_all_cause <- rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") ### Creat Forrest plot labels region <- paste(smr_filter_all_cause$bank_region_origin,smr_filter_all_cause$Country_of_study_new_name, sep=", ") paper <- paste("[",smr_filter_all_cause$First_author,"]") label <- paste(region, paper) ### set up forest plot forest(res_smr_all_cause, slab=label, order=order(smr_filter_all_cause$Year_of_publication), ylim=c(-1, 122), atransf=exp) ### add column headings to the plot text(-4.8,122, "Region of origin, Country of study, [Author, ref]", cex=0.5, pos=4, font=2 ) ### add column headings to the plot text(2.4,122, "SMR [95%CIs]", cex=0.5, pos=4, font=2) ### output Figure dev.copy(pdf,'fig-all-cause.pdf') dev.off() ### create Funnel plot funnel(res_smr_all_cause, xlab="Log SMR") ### output Funnel plot dev.copy(pdf,'fig-all-cause-funnel.pdf') dev.off() ################## #All cause Forrest plots - Male ################## #filter the data to only include non-excluded or duplicate SMR data smr_filter_all_cause <- mortality %>% filter(smr==1, Sex=="M", smr_all_cause_include==1, is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, Population_group, Country_of_study, First_author, Year_of_publication, bank_region_origin, Sex, Country_origin_forrest, Study_years, Country_of_study_new_name) %>% as.data.frame(smr_filter_all_cause) #create standard error estimates from confidence intervals from study smr_filter_all_cause$sei <- (log(smr_filter_all_cause$uci/smr_filter_all_cause$lci))/3.92 ### decrease margins so the full space is used par(mfrow=c(1,2)) ### fit random-effects model res_smr_all_cause <- rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") ### Create Forest plot labels region <- paste(smr_filter_all_cause$bank_region_origin,smr_filter_all_cause$Country_of_study_new_name, sep=", ") paper <- paste("[",smr_filter_all_cause$First_author,"]") label <- paste(region, paper) ### set up forest plot forest(res_smr_all_cause, slab=label, order=order(smr_filter_all_cause$Year_of_publication), ylim=c(-1, 56), atransf=exp, cex=0.25) ### add column headings to the plot text(-4.2,55, "Region of origin, Country of study, [Author, ref]", cex=0.25, pos=4, font=2 ) ### add column headings to the plot text(2.0,55, "SMR [95%CIs]", cex=0.25, pos=4, font=2) ### output Figure dev.copy(pdf,'fig-male-all-cause.pdf') dev.off() ################## #All cause Forrest plots - Female ################## #filter the data to only include non-excluded or duplicate SMR data smr_filter_all_cause <- mortality %>% filter(smr==1, Sex=="F", smr_all_cause_include==1, is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, Population_group,Country_origin_new_name, First_author, Year_of_publication, bank_region_origin, Country_origin_forrest, Study_years, Country_of_study_new_name) %>% as.data.frame(smr_filter_all_cause) #create standard error estimates from confidence intervals from study smr_filter_all_cause$sei <- (log(smr_filter_all_cause$uci/smr_filter_all_cause$lci))/3.92 ### fit random-effects model res_smr_all_cause <- rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") ### Forest label region <- paste(smr_filter_all_cause$bank_region_origin,smr_filter_all_cause$Country_of_study_new_name, sep=", ") paper <- paste("[",smr_filter_all_cause$First_author,"]") label <- paste(region, paper) ### set up forest plot forest(res_smr_all_cause, slab=label, order=order(smr_filter_all_cause$Year_of_publication), ylim=c(-1, 56), cex=0.25, atransf=exp) ### add column headings to the plot text(-4.9,55, "Region of origin, Country of study, [Author, ref]", cex=0.25, pos=4, font=2 ) ### add column headings to the plot text(2.2,55, "SMR [95%CIs]", cex=0.25, pos=4, font=2) dev.copy(pdf,'fig-all-cause-male-female.pdf') dev.off() ################## #All cause Forrest plots - By migrant group ################## #filter the data to only include non-excluded or duplicate SMR data smr_filter_all_cause <- mortality %>% filter(ICD10_chapter=="All cause", smr_all_cause_include==1, is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, Population_group, Country_of_study, First_author, Year_of_publication, Sex, bank_region_origin, Country_of_study_new_name) %>% as.data.frame(smr_filter_all_cause) #create standard error estimates from confidence intervals from study smr_filter_all_cause$sei <- (log(smr_filter_all_cause$uci/smr_filter_all_cause$lci))/3.92 ### Notes for how this plot was created from here - http://www.metafor-project.org/doku.php/plots:forest_plot_with_subgroups ### decrease margins so the full space is used par(mar=c(3,8,1,6)) ### fit random-effects model res_smr_all_cause <- rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") ### Forest label region <- paste(smr_filter_all_cause$bank_region_origin,smr_filter_all_cause$Country_of_study_new_name, sep=", ") paper <- paste("[",smr_filter_all_cause$First_author,"]") label <- paste(region, paper) ### set up forest plot forest(res_smr_all_cause, order=order(smr_filter_all_cause$Population_group), atransf=exp, ylim=c(-1, 136), rows=c(3:6,11:109,114:129),slab=label) ### add column headings to the plot text(-4.9,135, "Region of origin, Country of study, [Author, ref]", cex=0.3, pos=4, font=2 ) ### add column headings to the plot text(2.5,135, "SMR [95%CIs]", cex=0.3, pos=4, font=2) ### add text for the subgroups text(-4.9, c(130,110,7),cex=0.30, pos=4, c("Refugees", "International migrants", "Asylum seekers")) ### fit random-effects model in the three subgroups res_smr_all_cause.r <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(Population_group=="Refugee"), method="REML") res_smr_all_cause.i <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(Population_group=="International"), method="REML") res_smr_all_cause.a <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(Population_group=="Asylum"), method="REML") ### add summary polygons for the three subgroups addpoly(res_smr_all_cause.r, row=112.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.i, row= 9.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.a, row= 1.5, cex=0.3, atransf=exp, mlab="") ### add text with Q-value, dfs, p-value, and I^2 statistic for subgroups text(-4.9, 112.5, pos=4, cex=0.3, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.r$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.r$I2, digits=1, format="f")), "%)"))) text(-4.9, 9.5, pos=4, cex=0.3, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.i$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.i$I2, digits=1, format="f")), "%)"))) text(-4.9, 1.5, pos=4, cex=0.3, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.a$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.a$I2, digits=1, format="f")), "%)"))) dev.copy(pdf,'fig-all-cause-migrant-group.pdf') dev.off() ################## #All cause Forrest plots by geographical region of origin ################## #filter the data to only include non-excluded or duplicate SMR data smr_filter_all_cause <- mortality %>% filter(smr==1, lci>0, uci>lci,ICD10_chapter=="All cause", smr_all_cause_include==1, is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, bank_region_origin, Country_of_study,First_author,Population_group, Sex, Year_of_publication, Country_of_study_new_name) %>% as.data.frame(smr_filter_all_cause) #create standard error estimates from confidence intervals from study smr_filter_all_cause$sei <- (log(smr_filter_all_cause$uci/smr_filter_all_cause$lci))/3.92 ### decrease margins so the full space is used par(mar=c(3,8,1,6)) ### fit random-effects model res_smr_all_cause <- rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") ### Forest label region <- paste(smr_filter_all_cause$bank_region_origin,smr_filter_all_cause$Country_of_study_new_name, sep=", ") paper <- paste("[",smr_filter_all_cause$First_author,"]") label <- paste(region, paper) ### set up forest plot forest(res_smr_all_cause, order=order(smr_filter_all_cause$bank_region_origin), atransf=exp, ylim=c(-1, 155), rows=c(3:18,23:63,68:75,80:81,86:108,113:114,119:136,141:149), slab=label) ### add column headings to the plot text(-4.9,154, "Region of origin, Country of study, [Author, ref]", cex=0.3, pos=4, font=2 ) ### add column headings to the plot text(2.5,154, "SMR [95%CIs]", cex=0.3, pos=4, font=2) ### add text for the subgroups text(-4.9, c(19,64,76,82,109,115,137,150), cex=0.25, pos=4, c("East Asia & Pacific", "Europe & Central Asia", "Latin America & Caribbean", "Middle East & North Africa", "Multiple", "North America", "South Asia", "Sub-Saharan Africa")) ### fit random-effects model in the three subgroups res_smr_all_cause.ea <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="East Asia & Pacific"), method="REML") res_smr_all_cause.ec <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="Europe & Central Asia"), method="REML") res_smr_all_cause.la <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="Latin America & Caribbean"), method="REML") res_smr_all_cause.me <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="Middle East & North Africa"), method="REML") res_smr_all_cause.m <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="Multiple"), method="REML") res_smr_all_cause.na <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="North America"), method="REML") res_smr_all_cause.sa <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="South Asia"), method="REML") res_smr_all_cause.ssa <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_origin=="Sub-Saharan Africa"), method="REML") ### add summary polygons for the three subgroups addpoly(res_smr_all_cause.ea, row=1.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.ec, row=21.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.la, row=66.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.me, row=78.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.m, row=84.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.na, row=111.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.sa, row=117.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.ssa, row=139.5, cex=0.3, atransf=exp, mlab="") ### add text with Q-value, dfs, p-value, and I^2 statistic for subgroups text(-4.9, 1.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.ea$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.ea$I2, digits=1, format="f")), "%)"))) text(-4.9, 21.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.ec$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.ec$I2, digits=1, format="f")), "%)"))) text(-4.9, 66.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.la$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.la$I2, digits=1, format="f")), "%)"))) text(-4.9, 78.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.me$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.me$I2, digits=1, format="f")), "%)"))) text(-4.9, 84.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.m$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.m$I2, digits=1, format="f")), "%)"))) text(-4.9, 111.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.na$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.na$I2, digits=1, format="f")), "%)"))) text(-4.9, 117.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.sa$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.sa$I2, digits=1, format="f")), "%)"))) text(-4.9, 139.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.ssa$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.ssa$I2, digits=1, format="f")), "%)"))) dev.copy(pdf,'fig-all-cause-migrant-region-origin.pdf') dev.off() ################## #All cause Forrest plots by region of destination ################## #filter the data to only include non-excluded or duplicate SMR data smr_filter_all_cause <- mortality %>% filter(smr==1, lci>0, uci>lci,ICD10_chapter=="All cause", smr_all_cause_include==1, !is.na(region), is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter,bank_region_origin, First_author,Population_group,Country_of_study, Sex, bank_region_destination, Year_of_publication, Country_of_study_new_name) %>% as.data.frame(smr_filter_all_cause) #create standard error estimates from confidence intervals from study smr_filter_all_cause$sei <- (log(smr_filter_all_cause$uci/smr_filter_all_cause$lci))/3.92 ### Notes for how this plot was created from here - http://www.metafor-project.org/doku.php/plots:forest_plot_with_subgroups ### decrease margins so the full space is used par(mar=c(3,8,1,6)) ### fit random-effects model res_smr_all_cause <- rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") ### Forest label region <- paste(smr_filter_all_cause$bank_region_origin,smr_filter_all_cause$Country_of_study_new_name, sep=", ") paper <- paste("[",smr_filter_all_cause$First_author,"]") label <- paste(region, paper) ### set up forest plot forest(res_smr_all_cause, order=order(smr_filter_all_cause$bank_region_destination), atransf=exp, ylim=c(-1, 139), rows=c(3:69,74:79,84:86,91:133), slab=label) ### add column headings to the plot text(-4.9,139, "Region of origin, Country of study, [Author, ref]", cex=0.3, pos=4, font=2 ) ### add column headings to the plot text(2.2,139, "SMR [95%CIs]", cex=0.3, pos=4, font=2) ### add text for the subgroups text(-4.9, c(134,87,80,70), cex=0.25, pos=4, c("North America", "Middle East & North Africa", "Latin America & Caribbean", "Europe & Central Asia")) ### fit random-effects model in the three subgroups res_smr_all_cause.dec <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_destination=="Europe & Central Asia"), method="REML") res_smr_all_cause.dla <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_destination=="Latin America & Caribbean"), method="REML") res_smr_all_cause.dme <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_destination=="Middle East & North Africa"), method="REML") res_smr_all_cause.dna <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(bank_region_destination=="North America"), method="REML") ### add summary polygons for the three subgroups addpoly(res_smr_all_cause.dec, row=1.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.dla, row=72.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.dme, row=82.5, cex=0.3, atransf=exp, mlab="") addpoly(res_smr_all_cause.dna, row=89.5, cex=0.3, atransf=exp, mlab="") ### add text with Q-value, dfs, p-value, and I^2 statistic for subgroups text(-4.9, 1.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.dec$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.dec$I2, digits=1, format="f")), "%)"))) text(-4.9, 72.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.dme$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.dme$I2, digits=1, format="f")), "%)"))) text(-4.9, 82.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.dla$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.dla$I2, digits=1, format="f")), "%)"))) text(-4.9, 89.5, pos=4, cex=0.25, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.dna$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.dna$I2, digits=1, format="f")), "%)"))) dev.copy(pdf,'fig-all-cause-migrant-region-destination.pdf') dev.off() ################## #All cause Forrest plots by world bank income group in origin countries ################## #filter the data to only include non-excluded or duplicate SMR data smr_filter_all_cause <- mortality %>% filter(ICD10_chapter=="All cause", smr_all_cause_include==1, is.na(Duplicate), is.na(Exclude), !is.na(income_group_number_origin)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, Country_origin_forrest,Country_of_study, Population_group, bank_region_origin, Sex, First_author, Year_of_publication, Country_origin_new_name, income_group_number_origin, Country_of_study_new_name) %>% as.data.frame(smr_filter_all_cause) #create standard error estimates from confidence intervals from study smr_filter_all_cause$sei <- (log(smr_filter_all_cause$uci/smr_filter_all_cause$lci))/3.92 ### decrease margins so the full space is used par(mar=c(3,4,1,3)) ### fit random-effects model res_smr_all_cause <- rma.uni(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, method="REML") ### Forest label region <- paste(smr_filter_all_cause$Country_origin_new_name,smr_filter_all_cause$Country_of_study_new_name, sep=", ") paper <- paste("[",smr_filter_all_cause$First_author,"]") label <- paste(region, paper) ### set up forest plot forest(res_smr_all_cause, order=order(smr_filter_all_cause$income_group_number_origin), atransf=exp, ylim=c(-1, 55), cex=0.5, rows=c(3:14,19:23,28:49), slab=label) ### add text for the subgroups text(-4.77, c(50,24,15), cex=0.5, pos=4, font=2, c("High Income Countries", "Upper Middle Income Countries", "Lower Middle Income Countries")) ### add column headings to the plot text(-4.85,52, "Country of origin, Country of study, [Author, ref]", cex=0.5, pos=4, font=2 ) ### add column headings to the plot text(2.3,52, "SMR [95%CIs]", cex=0.5, pos=4, font=2) ### fit random-effects model in the three subgroups res_smr_all_cause.lmic <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(income_group_number_origin==2), method="REML") res_smr_all_cause.umic <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(income_group_number_origin==3), method="REML") res_smr_all_cause.hic <- rma(yi = log(smr_filter_all_cause$Outcome), sei=smr_filter_all_cause$sei, data = smr_filter_all_cause, subset=(income_group_number_origin==4), method="REML") ### add summary polygons for the three subgroups addpoly(res_smr_all_cause.lmic, row=2, cex=0.5, atransf=exp, mlab="") addpoly(res_smr_all_cause.umic, row=18, cex=0.5, atransf=exp, mlab="") addpoly(res_smr_all_cause.hic, row=27, cex=0.5, atransf=exp, mlab="") text(-4.77, 27, pos=4, cex=0.5, font=4, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.hic$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.hic$I2, digits=1, format="f")), "%)"))) text(-4.77, 18, pos=4, cex=0.5, font=4, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.umic$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.umic$I2, digits=1, format="f")), "%)"))) text(-4.77, 2, pos=4, cex=0.5, font=4, bquote(paste("RE Model for Subgroup p = ", .(formatC(res_smr_all_cause.lmic$QEp, digits=2, format="f")), "; ", I^2, " = ", .(formatC(res_smr_all_cause.lmic$I2, digits=1, format="f")), "%)"))) dev.copy(pdf,'fig-all-cause-income.pdf') dev.off() ################## # ICD10 subgroup ################## #filter the data to only include SMR smr_filter_ICD <- mortality %>% filter(smr == 1, lci>0, is.na(Duplicate), is.na(Exclude), ICD10_chapter!="All cause", ICD10_chapter!="Unclassified",ICD10_chapter!="Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified") %>% select (smr, lci, uci, Outcome, ICD10_chapter_label) %>% as.data.frame(smr_filter_ICD) #create log standard error estimates from confidence intervals from study smr_filter_ICD$sei <- (log(smr_filter_ICD$uci/smr_filter_ICD$lci))/3.92 # ‘outcome’ is the name of the SMR variable and ‘SE’ is the name of the standard error variable y <- lapply(split(smr_filter_ICD, f = smr_filter_ICD$ICD10_chapter_label), function(x) rma.uni(yi = log(Outcome), sei=sei, data = x)) # Extract combined effects results_icd <- sapply (y, function(x) c(x$b, x$ci.lb, x$ci.ub, x$k, x$I2, x$pval)) results_icd <- t(results_icd) a <- as.data.frame(results_icd) names(a) <- c('smr', 'lci', 'uci', 'N','I2','pval') a$icd_names <- rownames(a) forrest_N <- a$N forrest_i2 <- sprintf("%3.2f", a$I2) forrest_pval <- sprintf("%3.2f", a$pval) forrest_smr <- sprintf("%3.2f", exp(a$smr)) forrest_lci <- sprintf("%3.2f", exp(a$lci)) forrest_uci <- sprintf("%3.2f", exp(a$uci)) a$label <- paste(forrest_smr, "(", forrest_lci, "-", forrest_uci, ") I2=", forrest_i2, "%;N=", forrest_N,"") a <- a[nrow(a):1,] par(xpd = F, mar = c(5, 8, 2, 10)) xas <- c(0.2, 0.3, 0.6, 1, 2, 5) plot(a$smr, seq(nrow(a)), axes = F, xlim = log(c(min(xas), max(xas))), pch = 15, xlab = 'SMR', ylab = '') arrows(a$lci, seq(nrow(a)), a$uci, seq(nrow(a)), code = 3, angle = 90, length = 0.03) axis(1, at = log(xas), labels = xas) abline(v = 0, lty = 2) par(xpd = T) text(x = log(0.2), y = seq(nrow(a)), a$icd_names, pos = 2, cex=0.75) text(x = log(5), y = seq(nrow(a)), a$label, pos = 4, cex=0.5) dev.copy(pdf,'fig-icd10_all.pdf') dev.off() par(xpd = F) ################## # ICD10 subgroup - neoplasms ################## #filter the data to only include SMRs smr_filter_ICD_subgroup <- mortality %>% filter(smr==1, lci>0, ICD10_chapter=="Neoplasms", is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, ICD10_subgroup, ICD10_subgroup_label, Population, region, region_country_origin, Country_of_study, First_author) as.data.frame(smr_filter_ICD_subgroup) #create log standard error estimates from confidence intervals from study smr_filter_ICD_subgroup$sei <- (log(smr_filter_ICD_subgroup$uci/smr_filter_ICD_subgroup$lci))/3.92 # ‘outcome’ is the name of the SMR variable and ‘SE’ is the name of the standard error variable y <- lapply(split(smr_filter_ICD_subgroup, f = smr_filter_ICD_subgroup$ICD10_subgroup_label), function(x) rma.uni(yi = log(Outcome), sei=sei, data = x)) # now let's extract the combined effects. results_icd_subgroup <- sapply (y, function(x) c(x$b, x$ci.lb, x$ci.ub, x$k, x$I2, x$pval)) results_icd_subgroup <- t(results_icd_subgroup) a <- as.data.frame(results_icd_subgroup) names(a) <- c('smr', 'lci', 'uci', 'N','I2','pval') a$ICD10_subgroup_label <- rownames(a) forrest_N <- a$N forrest_i2 <- sprintf("%3.2f", a$I2) forrest_pval <- sprintf("%3.2f", a$pval) forrest_smr <- sprintf("%3.2f", exp(a$smr)) forrest_lci <- sprintf("%3.2f", exp(a$lci)) forrest_uci <- sprintf("%3.2f", exp(a$uci)) a$label <- paste(forrest_smr, "(", forrest_lci, "-", forrest_uci, ") I2=", forrest_i2, "%;N=", forrest_N,"") a <- a[nrow(a):1,] par(xpd = T, mfrow=c(3,2)) xas <- c(0.2, 0.3, 0.6, 1, 2, 3, 5, 15) plot(a$smr, seq(nrow(a)), axes = F, xlim = log(c(min(xas), max(xas))), pch = 15, xlab = 'SMR', ylab = '') arrows(a$lci, seq(nrow(a)), a$uci, seq(nrow(a)), code = 3, angle = 90, length = 0.03) axis(1, at = log(xas), labels = xas) abline(v = 0, lty = 2) a$fig_lab<- nrow(a) text(x = log(0.2), y = seq(nrow(a)), a$ICD10_subgroup_label, pos = 2, cex=0.75) text(x = log(15), y = seq(nrow(a)), a$label, pos = 4, cex=0.5) text(x = log(0.11), y = a$fig_lab, "A (Neoplasms)", pos = 2, cex=0.75) dev.copy(pdf,'fig-icd10_neoplasms.pdf') dev.off() ################## # ICD10 subgroup - CVD ################## #filter the data to only include SMRs smr_filter_ICD_subgroup <- mortality %>% filter(smr==1, lci>0, ICD10_chapter=="Diseases of the circulatory system", is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, ICD10_subgroup, ICD10_subgroup_label) as.data.frame(smr_filter_ICD_subgroup) #create log standard error estimates from confidence intervals from study smr_filter_ICD_subgroup$sei <- (log(smr_filter_ICD_subgroup$uci/smr_filter_ICD_subgroup$lci))/3.92 # ‘outcome’ is the name of the SMR variable and ‘SE’ is the name of the standard error variable y <- lapply(split(smr_filter_ICD_subgroup, f = smr_filter_ICD_subgroup$ICD10_subgroup_label), function(x) rma.uni(yi = log(Outcome), sei=sei, data = x)) # now let's extract the combined effects. Try attributes(y[[1]]) to see the names of all rma.uni results. ?rma.uni gives you the documentation) results_icd_subgroup <- sapply (y, function(x) c(x$b, x$ci.lb, x$ci.ub, x$k, x$I2, x$pval)) results_icd_subgroup <- t(results_icd_subgroup) a <- as.data.frame(results_icd_subgroup) names(a) <- c('smr', 'lci', 'uci', 'N','I2','pval') a$icd_subgroup <- rownames(a) forrest_N <- a$N forrest_i2 <- sprintf("%3.2f", a$I2) forrest_pval <- sprintf("%3.2f", a$pval) forrest_smr <- sprintf("%3.2f", exp(a$smr)) forrest_lci <- sprintf("%3.2f", exp(a$lci)) forrest_uci <- sprintf("%3.2f", exp(a$uci)) a$label <- paste(forrest_smr, "(", forrest_lci, "-", forrest_uci, ") I2=", forrest_i2, "%;N=", forrest_N,"") a <- a[nrow(a):1,] xas <- c(0.2, 0.3, 0.6, 1, 2, 3, 5, 15) plot(a$smr, seq(nrow(a)), axes = F, xlim = log(c(min(xas), max(xas))), pch = 15, xlab = 'SMR', ylab = '') arrows(a$lci, seq(nrow(a)), a$uci, seq(nrow(a)), code = 3, angle = 90, length = 0.03) axis(1, at = log(xas), labels = xas) abline(v = 0, lty = 2) a$fig_lab<- nrow(a) text(x = log(0.2), y = seq(nrow(a)), a$icd_subgroup, pos = 2, cex=0.75) text(x = log(15), y = seq(nrow(a)), a$label, pos = 4, cex=0.5) text(x = log(0.1), y = a$fig_lab, "B (CVD)", pos = 2, cex=0.75) dev.copy(pdf,'fig-icd10_cvd.pdf') dev.off() ################## # ICD10 subgroup - External ################## #filter the data to only include SMRs smr_filter_ICD_subgroup <- mortality %>% filter(smr==1, lci>0, ICD10_chapter=="External causes of morbidity and mortality", is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, ICD10_subgroup_label, ICD10_subgroup, Population, region_country_origin, Country_of_study, First_author, Title_paper) as.data.frame(smr_filter_ICD_subgroup) write_csv(smr_filter_ICD_subgroup, "results_ICD_ext_SMRs.csv") #create log standard error estimates from confidence intervals from study smr_filter_ICD_subgroup$sei <- (log(smr_filter_ICD_subgroup$uci/smr_filter_ICD_subgroup$lci))/3.92 # ‘outcome’ is the name of the SMR variable and ‘SE’ is the name of the standard error variable y <- lapply(split(smr_filter_ICD_subgroup, f = smr_filter_ICD_subgroup$ICD10_subgroup_label), function(x) rma.uni(yi = log(Outcome), sei=sei, data = x)) # now let's extract the combined effects. results_icd_subgroup <- sapply (y, function(x) c(x$b, x$ci.lb, x$ci.ub, x$k, x$I2, x$pval)) results_icd_subgroup <- t(results_icd_subgroup) a <- as.data.frame(results_icd_subgroup) names(a) <- c('smr', 'lci', 'uci', 'N','I2','pval') a$icd_subgroup <- rownames(a) forrest_N <- a$N forrest_i2 <- sprintf("%3.2f", a$I2) forrest_pval <- sprintf("%3.2f", a$pval) forrest_smr <- sprintf("%3.2f", exp(a$smr)) forrest_lci <- sprintf("%3.2f", exp(a$lci)) forrest_uci <- sprintf("%3.2f", exp(a$uci)) a$label <- paste(forrest_smr, "(", forrest_lci, "-", forrest_uci, ") I2=", forrest_i2, "%;N=", forrest_N,"") xas <- c(0.2, 0.3, 0.6, 1, 2, 3, 5, 15) plot(a$smr, seq(nrow(a)), axes = F, xlim = log(c(min(xas), max(xas))), pch = 15, xlab = 'SMR', ylab = '') arrows(a$lci, seq(nrow(a)), a$uci, seq(nrow(a)), code = 3, angle = 90, length = 0.03) axis(1, at = log(xas), labels = xas) abline(v = 0, lty = 2) a$fig_lab<- nrow(a) text(x = log(0.2), y = seq(nrow(a)), a$icd_subgroup, pos = 2, cex=0.75) text(x = log(15), y = seq(nrow(a)), a$label, pos = 4, cex=0.5) text(x = log(0.09), y = a$fig_lab, "C (External)", pos = 2, cex=0.75) dev.copy(pdf,'fig-icd10_external.pdf') dev.off() ################## # ICD10 subgroup - Respiratory ################## #filter the data to only include SMRs smr_filter_ICD_subgroup <- mortality %>% filter(smr==1, lci>0, ICD10_chapter=="Diseases of the respiratory system", is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, ICD10_subgroup, ICD10_subgroup_label) as.data.frame(smr_filter_ICD_subgroup) #create log standard error estimates from confidence intervals from study smr_filter_ICD_subgroup$sei <- (log(smr_filter_ICD_subgroup$uci/smr_filter_ICD_subgroup$lci))/3.92 # ‘outcome’ is the name of the SMR variable and ‘SE’ is the name of the standard error variable y <- lapply(split(smr_filter_ICD_subgroup, f = smr_filter_ICD_subgroup$ICD10_subgroup_label), function(x) rma.uni(yi = log(Outcome), sei=sei, data = x)) # now let's extract the combined effects. results_icd_subgroup <- sapply (y, function(x) c(x$b, x$ci.lb, x$ci.ub, x$k, x$I2, x$pval)) results_icd_subgroup <- t(results_icd_subgroup) a <- as.data.frame(results_icd_subgroup) names(a) <- c('smr', 'lci', 'uci', 'N','I2','pval') a$icd_subgroup <- rownames(a) forrest_N <- a$N forrest_i2 <- sprintf("%3.2f", a$I2) forrest_pval <- sprintf("%3.2f", a$pval) forrest_smr <- sprintf("%3.2f", exp(a$smr)) forrest_lci <- sprintf("%3.2f", exp(a$lci)) forrest_uci <- sprintf("%3.2f", exp(a$uci)) a$label <- paste(forrest_smr, "(", forrest_lci, "-", forrest_uci, ") I2=", forrest_i2, "%;N=", forrest_N,"") a <- a[nrow(a):1,] xas <- c(0.2, 0.3, 0.6, 1, 2, 3, 5, 15) plot(a$smr, seq(nrow(a)), axes = F, xlim = log(c(min(xas), max(xas))), pch = 15, xlab = 'SMR', ylab = '') arrows(a$lci, seq(nrow(a)), a$uci, seq(nrow(a)), code = 3, angle = 90, length = 0.03) axis(1, at = log(xas), labels = xas) abline(v = 0, lty = 2) a$fig_lab<- nrow(a) text(x = log(0.2), y = seq(nrow(a)), a$icd_subgroup, pos = 2, cex=0.75) text(x = log(15), y = seq(nrow(a)), a$label, pos = 4, cex=0.5) text(x = log(0.13), y = a$fig_lab, "D (Respiratory)", pos = 2, cex=0.75) dev.copy(pdf,'fig-icd10_respiratory.pdf') dev.off() ################## # ICD10 subgroup - Infection ################## #filter the data to only include SMRs smr_filter_ICD_subgroup <- mortality %>% filter(smr==1, lci>0, ICD10_chapter=="Certain infectious and parasitic diseases", is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, ICD10_subgroup, ICD10_subgroup_label) as.data.frame(smr_filter_ICD_subgroup) #create log standard error estimates from confidence intervals from study smr_filter_ICD_subgroup$sei <- (log(smr_filter_ICD_subgroup$uci/smr_filter_ICD_subgroup$lci))/3.92 # ‘outcome’ is the name of the SMR variable and ‘SE’ is the name of the standard error variable y <- lapply(split(smr_filter_ICD_subgroup, f = smr_filter_ICD_subgroup$ICD10_subgroup_label), function(x) rma.uni(yi = log(Outcome), sei=sei, data = x)) # now let's extract the combined effects. results_icd_subgroup <- sapply (y, function(x) c(x$b, x$ci.lb, x$ci.ub, x$k, x$I2, x$pval)) results_icd_subgroup <- t(results_icd_subgroup) a <- as.data.frame(results_icd_subgroup) names(a) <- c('smr', 'lci', 'uci', 'N','I2','pval') a$icd_subgroup <- rownames(a) forrest_N <- a$N forrest_i2 <- sprintf("%3.2f", a$I2) forrest_pval <- sprintf("%3.2f", a$pval) forrest_smr <- sprintf("%3.2f", exp(a$smr)) forrest_lci <- sprintf("%3.2f", exp(a$lci)) forrest_uci <- sprintf("%3.2f", exp(a$uci)) a$label <- paste(forrest_smr, "(", forrest_lci, "-", forrest_uci, ") I2=", forrest_i2, "%;N=", forrest_N,"") a <- a[nrow(a):1,] xas <- c(0.2, 0.3, 0.6, 1, 2, 3, 5, 15) plot(a$smr, seq(nrow(a)), axes = F, xlim = log(c(min(xas), max(xas))), pch = 15, xlab = 'SMR', ylab = '') arrows(a$lci, seq(nrow(a)), a$uci, seq(nrow(a)), code = 3, angle = 90, length = 0.03) axis(1, at = log(xas), labels = xas) abline(v = 0, lty = 2) a$fig_lab<- nrow(a) text(x = log(0.2), y = seq(nrow(a)), a$icd_subgroup, pos = 2, cex=0.75) text(x = log(15), y = seq(nrow(a)), a$label, pos = 4, cex=0.5) text(x = log(0.1), y = a$fig_lab, "E (Infection)", pos = 2, cex=0.75) dev.copy(pdf,'fig-icd10_infection.pdf') dev.off() ################## # ICD10 subgroup - Endocrine ################## #filter the data to only include SMRs smr_filter_ICD_subgroup <- mortality %>% filter(smr==1, ICD10_chapter=="Endocrine, nutritional and metabolic diseases", is.na(Duplicate), is.na(Exclude)) %>% select (smr, lci, uci, Outcome, ICD10_chapter, ICD10_subgroup, ICD10_subgroup_label) as.data.frame(smr_filter_ICD_subgroup) #create log standard error estimates from confidence intervals from study smr_filter_ICD_subgroup$sei <- (log(smr_filter_ICD_subgroup$uci/smr_filter_ICD_subgroup$lci))/3.92 # ‘outcome’ is the name of the SMR variable and ‘SE’ is the name of the standard error variable y <- lapply(split(smr_filter_ICD_subgroup, f = smr_filter_ICD_subgroup$ICD10_subgroup_label), function(x) rma.uni(yi = log(Outcome), sei=sei, data = x)) # now let's extract the combined effects. results_icd_subgroup <- sapply (y, function(x) c(x$b, x$ci.lb, x$ci.ub, x$k, x$I2, x$pval)) results_icd_subgroup <- t(results_icd_subgroup) a <- as.data.frame(results_icd_subgroup) names(a) <- c('smr', 'lci', 'uci', 'N','I2','pval') a$icd_subgroup <- rownames(a) forrest_N <- a$N forrest_i2 <- sprintf("%3.2f", a$I2) forrest_pval <- sprintf("%3.2f", a$pval) forrest_smr <- sprintf("%3.2f", exp(a$smr)) forrest_lci <- sprintf("%3.2f", exp(a$lci)) forrest_uci <- sprintf("%3.2f", exp(a$uci)) a$label <- paste(forrest_smr, "(", forrest_lci, "-", forrest_uci, ") I2=", forrest_i2, "%;N=", forrest_N,"") a <- a[nrow(a):1,] #par(xpd = F, mar = c(5, 12, 15, 8)) xas <- c(0.2, 0.3, 0.6, 1, 2, 3, 5, 15) plot(a$smr, seq(nrow(a)), axes = F, xlim = log(c(min(xas), max(xas))), pch = 15, xlab = 'SMR', ylab = '') arrows(a$lci, seq(nrow(a)), a$uci, seq(nrow(a)), code = 3, angle = 90, length = 0.03) axis(1, at = log(xas), labels = xas) abline(v = 0, lty = 2) a$fig_lab<- nrow(a) text(x = log(0.2), y = seq(nrow(a)), a$icd_subgroup, pos = 2, cex=0.75) text(x = log(15), y = seq(nrow(a)), a$label, pos = 4, cex=0.5) text(x = log(0.1), y = a$fig_lab, "F (Endocrine)", pos = 2, cex=0.75) dev.copy(pdf,'fig-icd10_endocrine.pdf') dev.off()