From d19e83c35f4c62aea7834c5fcea51c683c8d2fff Mon Sep 17 00:00:00 2001 From: AnnaKS123 Date: Mon, 7 Aug 2023 15:40:31 +0100 Subject: [PATCH] added comments --- R/error_handling.R | 1 + R/health_burden.R | 23 ++++++--- R/injury_death_to_yll.R | 76 ++++++++++++++++++++++------ R/population_attributable_fraction.R | 2 +- multi_city_script.R | 9 ++-- 5 files changed, 83 insertions(+), 28 deletions(-) diff --git a/R/error_handling.R b/R/error_handling.R index b768b22e..ad1044f5 100644 --- a/R/error_handling.R +++ b/R/error_handling.R @@ -1,3 +1,4 @@ +#' CURRENTLY NOT BEING CALLED #' @export error_handling <- function(code, function_name, variable_name) { output <- diff --git a/R/health_burden.R b/R/health_burden.R index 292a84e1..83a815b8 100644 --- a/R/health_burden.R +++ b/R/health_burden.R @@ -20,8 +20,8 @@ #' - find the sum of the relative risks (RR) for the specific disease for each age and sex category for the non-reference scenario #' - calculate the PIF (potential impact fraction), i.e the proportional change in the sum of relative risks between the reference #' and the on-reference scenario for each age and sex category -#' - calculate the health burden for the non-reference scenario by multiplying the current burden of disease by the PIF -#' (combine_health_and_pif.R) +#' - calculate the health burden (deaths and ylls) for the non-reference scenario compared to the reference scenario by multiplying +#' the current burden of disease by the PIF (combine_health_and_pif.R) #' - if confidence intervals are required, loop through the upper and lower confidence interval limits #' and calculate the health burden for deaths and YLLs using the upper and lower confidence relative risks. If no upper and #' lower relative risk values exist, use the mean value instead @@ -127,12 +127,14 @@ health_burden <- function(ind_ap_pa, conf_int = F, combined_AP_PA = T){ # calculate PIF for this scenario and convert to vector pif_scen <- ((pif_ref[,2] - pif_temp[,2]) / pif_ref[,2]) %>% pull() - # Calculate ylls by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change + #Calculate the difference in ylls between the non-reference and the reference scenario + # by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change # between the reference scenario and the given scenario for each age and sex category yll_dfs <- combine_health_and_pif(pif_values=pif_scen, hc = gbd_ylls_disease) ylls[[yll_name]] <- yll_dfs - # Calculate deaths by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change + # Calculate the difference in deaths between the non-reference and the reference scenario + # by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change # between the reference scenario and the given scenario for each age and sex category death_dfs <- combine_health_and_pif(pif_values=pif_scen,hc=gbd_deaths_disease) deaths[[deaths_name]] <- death_dfs @@ -199,10 +201,15 @@ health_burden <- function(ind_ap_pa, conf_int = F, combined_AP_PA = T){ -#' Join disease health burden and injury +#' Join disease health burden and injury data #' #' Join the two data frames for health burden: that from disease, and that from road-traffic injury #' +#' This function performs the following steps: +#' - extract the yll and deaths data from the combined AP and PA pathways +#' - extract the yll and deaths data from the injury data +#' - create one dataframe for yll and one for deaths containing all the AP, PA and injury data +#' #' @param ind_ap_pa list (deaths, YLLs) of data frames of all demographic groups' burdens for diseases #' @param inj list (deaths, YLLs) of data frames of all demographic groups' burdens for road-traffic injury #' @@ -213,11 +220,11 @@ join_hb_and_injury <- function(ind_ap_pa,inj){ deaths <- ind_ap_pa$deaths ylls <- ind_ap_pa$ylls - # Select deaths columns + # Select deaths columns from injury data inj_deaths <- dplyr::select(inj, c(age_cat, sex, contains("deaths"))) - # Select yll columns + # Select yll columns from injury data inj_ylls <- dplyr::select(inj, c(age_cat, sex, contains("yll"))) - # Join injuries data to global datasets + # Join injuries data to global deaths and yll datasets deaths <- dplyr::left_join(deaths, inj_deaths, by = c("age_cat", "sex")) ylls <- dplyr::left_join(ylls, inj_ylls, by = c("age_cat", "sex")) list(deaths=deaths,ylls=ylls) diff --git a/R/injury_death_to_yll.R b/R/injury_death_to_yll.R index 50e4c2b6..0bce841d 100644 --- a/R/injury_death_to_yll.R +++ b/R/injury_death_to_yll.R @@ -1,37 +1,68 @@ -#' Map injury death burden to YLL burden +#' Map injury death burden to YLL (years of life lost) burden #' -#' Calculated the YLL burden from the death burden of injury based on the ratio in the GBD data. +#' Calculate the YLL burden from the death burden of injury based on the ratio in the GBD data. #' -#' @param injuries data frame of injury deaths +#' The function performs the following steps: +#' - join the estimated injury deaths with the global burden of disease (GBD) injury data by age and sex +#' - multiply the estimated injury deaths by the yll to injury death ratio in the GBD data to get YLL +#' from the estimated injury deaths +#' - extract and create matrices for deaths and ylls with one column for each scenario +#' - create dataframe A with ylls and deaths of reference scenario +#' - create dataframe B showing the differences in deaths and yll for each non-reference +#' scenario to the reference scenario +#' - if confidence intervals are required: +#' - create dataframe with ylls and deaths of reference scenario using both the lower and upper +#' relative risk boundary values +#' - create dataframe showing the differences in deaths and yll for each non-reference +#' scenario to the reference scenario using both the lower and upper relative +#' risk boundary values +#' - add the confidence upper and lower interval boundary values to the two output dataframes A and B +#' +#' +#' @param injuries data frame of injury deaths by age and sex category for each scenario incl. baseline #' #' @return list of injury deaths and YLLs (which are differences from reference scenario) plus the values in the reference scenario. #' #' @export injury_death_to_yll <- function(injuries){ - # injuries is a tibble, GBD_INJ_YLL is a data.frame, returns a tibble + + # join the injury deaths with the YLLs lost due to injuries in the global burden of disease data joined_injury <- dplyr::left_join(injuries, GBD_INJ_YLL[,c('sex_age','sex','yll_dth_ratio')], by=c("sex_age",'sex')) + # multiply the injury deaths by the yll to injury death ratio in the global burden of disease injury data joined_injury$YLL <- joined_injury$Deaths*joined_injury$yll_dth_ratio + # extract columns of interest death_and_yll <- dplyr::select(joined_injury, c('age_cat','sex','scenario','Deaths','YLL')) + # extract and create matrices for deaths and ylls with one column for each scenario x_deaths <- dplyr::select(death_and_yll, -YLL) - x_deaths <- spread(x_deaths,scenario, Deaths) + x_deaths <- spread(x_deaths,scenario, Deaths) # create one column for age_cat, sex and each scenario (incl baseline) for deaths x_yll <- dplyr::select(death_and_yll, -Deaths) - x_yll <- spread(x_yll,scenario, YLL) + x_yll <- spread(x_yll,scenario, YLL) # create one column for age_cat, sex and each scenario (incl baseline) for ylls + # set reference and other scenarios ref_scen <- REFERENCE_SCENARIO - if (REFERENCE_SCENARIO == 'Baseline'){ - ref_scen = 'baseline'} + if (REFERENCE_SCENARIO == 'Baseline'){ref_scen = 'baseline'} ref_scen_index <- which(SCEN==ref_scen) calc_scen <- SCEN[SCEN!=ref_scen] + # find indexes of columns in x_deaths (and therefore also x_yll) data that belong to the non-reference scenarios calc_scen_index <- which(colnames(x_deaths)%in%calc_scen) + # create dataframe with ylls and deaths of reference scenario ref_injuries <- as.data.frame(cbind(x_deaths[,1:2],deaths=x_deaths[[ref_scen]],ylls=x_yll[[ref_scen]])) + + # calculate the differences in injury deaths between the non-reference and the reference scenario deaths <- t(repmat(unlist(ref_injuries$deaths),NSCEN,1)) - x_deaths[,calc_scen_index,drop=F] + + # calculate the differences in injury ylls between the non-reference and the reference scenario ylls <- t(repmat(unlist(ref_injuries$ylls),NSCEN,1)) - x_yll[,calc_scen_index,drop=F] + + # create one dataframe showing the differences in deaths and yll for each non-reference scenario to the + # reference scenario deaths_yll_injuries <- as.data.frame(cbind(as.data.frame(x_deaths[,1:2]),deaths, ylls)) + # update columns names of deaths_yll_injuries to state whether the column shows injury deaths or ylls metric <- c("deaths", "yll") k <- 1 for (i in 1: 2) @@ -41,31 +72,36 @@ injury_death_to_yll <- function(injuries){ } # Repeat the above logic for lower and upper interval - - if (any(colnames(injuries) %in% c('Deaths_lb', 'Deaths_ub'))) - { + if (any(colnames(injuries) %in% c('Deaths_lb', 'Deaths_ub'))){ - # injuries is a tibble, GBD_INJ_YLL is a data.frame, returns a tibble + ## lower interval boundary + # join the injury deaths with the YLLs lost due to injuries in the global burden of disease data joined_injury_lb <- dplyr::left_join(injuries, GBD_INJ_YLL[,c('sex_age','sex','yll_dth_ratio')], by=c("sex_age",'sex')) + # multiply the injury deaths by the yll to injury death ratio in the global burden of disease injury data joined_injury_lb$YLL_lb <- joined_injury_lb$Deaths_lb * joined_injury_lb$yll_dth_ratio death_and_yll_lb <- dplyr::select(joined_injury_lb, c('age_cat','sex','scenario','Deaths_lb','YLL_lb')) + # extract and create matrices for deaths and ylls with one column for each scenario x_deaths_lb <- dplyr::select(death_and_yll_lb, -YLL_lb) x_deaths_lb <- spread(x_deaths_lb,scenario, Deaths_lb) x_yll_lb <- dplyr::select(death_and_yll_lb, -Deaths_lb) x_yll_lb <- spread(x_yll_lb,scenario, YLL_lb) + # set reference and other scenarios ref_scen_lb <- REFERENCE_SCENARIO ref_scen_index_lb <- which(SCEN==ref_scen_lb) calc_scen <- SCEN[SCEN!=ref_scen_lb] calc_scen_index <- which(colnames(x_deaths_lb)%in%calc_scen) + # create one dataframe showing the differences in deaths and yll for each non-reference scenario to the + # reference scenario ref_injuries_lb <- as.data.frame(cbind(x_deaths_lb[,1:2],deaths_lb=x_deaths_lb[[ref_scen_lb]],ylls_lb=x_yll_lb[[ref_scen_lb]])) deaths_lb <- t(repmat(unlist(ref_injuries_lb$deaths_lb),NSCEN,1)) - x_deaths_lb[,calc_scen_index,drop=F] ylls_lb <- t(repmat(unlist(ref_injuries_lb$ylls_lb),NSCEN,1)) - x_yll_lb[,calc_scen_index,drop=F] deaths_yll_injuries_lb <- as.data.frame(cbind(as.data.frame(x_deaths_lb[,1:2]),deaths_lb, ylls_lb)) + # update columns names of deaths_yll_injuries to state whether the column shows injury deaths or ylls metric <- c("deaths", "yll") k <- 1 for (i in 1: 2) @@ -75,27 +111,36 @@ injury_death_to_yll <- function(injuries){ k<-k+1 } - # injuries is a tibble, GBD_INJ_YLL is a data.frame, returns a tibble + + + ## upper confidence interval boundary + # join the injury deaths with the YLLs lost due to injuries in the global burden of disease data joined_injury_ub <- dplyr::left_join(injuries, GBD_INJ_YLL[,c('sex_age','sex','yll_dth_ratio')], by=c("sex_age",'sex')) + # multiply the injury deaths by the yll to injury death ratio in the global burden of disease injury data joined_injury_ub$YLL_ub <- joined_injury_ub$Deaths_ub * joined_injury_ub$yll_dth_ratio death_and_yll_ub <- dplyr::select(joined_injury_ub, c('age_cat','sex','scenario','Deaths_ub','YLL_ub')) + # extract and create matrices for deaths and ylls with one column for each scenario x_deaths_ub <- dplyr::select(death_and_yll_ub, -YLL_ub) x_deaths_ub <- spread(x_deaths_ub,scenario, Deaths_ub) x_yll_ub <- dplyr::select(death_and_yll_ub, -Deaths_ub) x_yll_ub <- spread(x_yll_ub,scenario, YLL_ub) + # set reference and other scenarios ref_scen_ub <- REFERENCE_SCENARIO ref_scen_index_ub <- which(SCEN==ref_scen_ub) calc_scen <- SCEN[SCEN!=ref_scen_ub] calc_scen_index <- which(colnames(x_deaths_ub)%in%calc_scen) - + + # create one dataframe showing the differences in deaths and yll for each non-reference scenario to the + # reference scenario ref_injuries_ub <- as.data.frame(cbind(x_deaths_ub[,1:2],deaths_ub=x_deaths_ub[[ref_scen_ub]],ylls_ub=x_yll_ub[[ref_scen_ub]])) deaths_ub <- t(repmat(unlist(ref_injuries_ub$deaths_ub),NSCEN,1)) - x_deaths_ub[,calc_scen_index,drop=F] ylls_ub <- t(repmat(unlist(ref_injuries_ub$ylls_ub),NSCEN,1)) - x_yll_ub[,calc_scen_index,drop=F] deaths_yll_injuries_ub <- as.data.frame(cbind(as.data.frame(x_deaths_ub[,1:2]),deaths_ub, ylls_ub)) + # update columns names of deaths_yll_injuries to state whether the column shows injury deaths or ylls metric <- c("deaths", "yll") k <- 1 for (i in 1: 2) @@ -105,8 +150,7 @@ injury_death_to_yll <- function(injuries){ k<-k+1 } - # Combine lower and upper datasets - + # add lower and upper boundary values to the reference and the yll and deaths injury differences datasets deaths_yll_injuries <- left_join(deaths_yll_injuries, deaths_yll_injuries_ub) deaths_yll_injuries <- left_join(deaths_yll_injuries, deaths_yll_injuries_lb) diff --git a/R/population_attributable_fraction.R b/R/population_attributable_fraction.R index 39720cdb..ab7a6a82 100644 --- a/R/population_attributable_fraction.R +++ b/R/population_attributable_fraction.R @@ -1,4 +1,4 @@ -#' Calculate population attributable fraction +#' Calculate population attributable fraction - CURRENTLY NOT BEING USED #' #' #' diff --git a/multi_city_script.R b/multi_city_script.R index 312f5345..68c3948c 100644 --- a/multi_city_script.R +++ b/multi_city_script.R @@ -85,16 +85,19 @@ comment <- "Added CO2 emission sampling" # scenario definition scenario_name <- "BOGOTA" # name of scenario to be called -reference_scenario <- 'Baseline' # scenario the other scenarios are compared to +# scenario the other scenarios are compared to, the reference scenario name should always +# be the name of the scenario corresponding to the actual baseline burden of disease and +# other input data for the city +reference_scenario <- 'Baseline' scenario_increase <- 0.05 # increase for each mode in each scenario (used in GLOBAL, BOGOTA, LATAM and AFRICA_INDIA scenarios) -n_scenarios <- 3 # number of scnenarios (not including the baseline scenario) +n_scenarios <- 3 # number of scenarios (not including the baseline scenario) # define which output results to plot, more than 6 cannot be plotted # potential outputs (in yll for all scenarios): c('pa_ap_all_cause', 'pa_ap_IHD', 'pa_total_cancer', 'pa_ap_lung_cancer', 'ap_COPD', # 'pa_ap_stroke', 'pa_ap_T2D', 'ap_LRI', 'pa_breast_cancer', 'pa_colon_cancer', 'pa_endo_cancer', # 'pa_liver_cancer', 'pa_ap_CVD', 'pa_total_dementia', 'pa_myeloma', 'pa_Parkinson', # 'pa_head_neck_cancer', 'pa_stomach_cancer', 'inj') -outputs_to_plot <- c('pa_ap_all_cause','inj','pa_ap_IHD', 'pa_total_cancer', 'pa_ap_lung_cancer', 'ap_COPD') +outputs_to_plot <- c('pa_ap_all_cause', 'ap_COPD', 'pa_total_dementia', 'pa_myeloma', 'ap_LRI','pa_stomach_cancer' ) ############################### No need to change the following ##################################