Skip to content

Commit

Permalink
added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
AnnaKS123 committed Aug 7, 2023
1 parent 9a4a1f3 commit d19e83c
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 28 deletions.
1 change: 1 addition & 0 deletions R/error_handling.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#' CURRENTLY NOT BEING CALLED
#' @export
error_handling <- function(code, function_name, variable_name) {
output <-
Expand Down
23 changes: 15 additions & 8 deletions R/health_burden.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#' - find the sum of the relative risks (RR) for the specific disease for each age and sex category for the non-reference scenario
#' - calculate the PIF (potential impact fraction), i.e the proportional change in the sum of relative risks between the reference
#' and the on-reference scenario for each age and sex category
#' - calculate the health burden for the non-reference scenario by multiplying the current burden of disease by the PIF
#' (combine_health_and_pif.R)
#' - calculate the health burden (deaths and ylls) for the non-reference scenario compared to the reference scenario by multiplying
#' the current burden of disease by the PIF (combine_health_and_pif.R)
#' - if confidence intervals are required, loop through the upper and lower confidence interval limits
#' and calculate the health burden for deaths and YLLs using the upper and lower confidence relative risks. If no upper and
#' lower relative risk values exist, use the mean value instead
Expand Down Expand Up @@ -127,12 +127,14 @@ health_burden <- function(ind_ap_pa, conf_int = F, combined_AP_PA = T){
# calculate PIF for this scenario and convert to vector
pif_scen <- ((pif_ref[,2] - pif_temp[,2]) / pif_ref[,2]) %>% pull()

# Calculate ylls by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change
#Calculate the difference in ylls between the non-reference and the reference scenario
# by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change
# between the reference scenario and the given scenario for each age and sex category
yll_dfs <- combine_health_and_pif(pif_values=pif_scen, hc = gbd_ylls_disease)
ylls[[yll_name]] <- yll_dfs

# Calculate deaths by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change
# Calculate the difference in deaths between the non-reference and the reference scenario
# by multiplying current burden of disease for particular disease by the PIF value, i.e the expected change
# between the reference scenario and the given scenario for each age and sex category
death_dfs <- combine_health_and_pif(pif_values=pif_scen,hc=gbd_deaths_disease)
deaths[[deaths_name]] <- death_dfs
Expand Down Expand Up @@ -199,10 +201,15 @@ health_burden <- function(ind_ap_pa, conf_int = F, combined_AP_PA = T){



#' Join disease health burden and injury
#' Join disease health burden and injury data
#'
#' Join the two data frames for health burden: that from disease, and that from road-traffic injury
#'
#' This function performs the following steps:
#' - extract the yll and deaths data from the combined AP and PA pathways
#' - extract the yll and deaths data from the injury data
#' - create one dataframe for yll and one for deaths containing all the AP, PA and injury data
#'
#' @param ind_ap_pa list (deaths, YLLs) of data frames of all demographic groups' burdens for diseases
#' @param inj list (deaths, YLLs) of data frames of all demographic groups' burdens for road-traffic injury
#'
Expand All @@ -213,11 +220,11 @@ join_hb_and_injury <- function(ind_ap_pa,inj){

deaths <- ind_ap_pa$deaths
ylls <- ind_ap_pa$ylls
# Select deaths columns
# Select deaths columns from injury data
inj_deaths <- dplyr::select(inj, c(age_cat, sex, contains("deaths")))
# Select yll columns
# Select yll columns from injury data
inj_ylls <- dplyr::select(inj, c(age_cat, sex, contains("yll")))
# Join injuries data to global datasets
# Join injuries data to global deaths and yll datasets
deaths <- dplyr::left_join(deaths, inj_deaths, by = c("age_cat", "sex"))
ylls <- dplyr::left_join(ylls, inj_ylls, by = c("age_cat", "sex"))
list(deaths=deaths,ylls=ylls)
Expand Down
76 changes: 60 additions & 16 deletions R/injury_death_to_yll.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,68 @@
#' Map injury death burden to YLL burden
#' Map injury death burden to YLL (years of life lost) burden
#'
#' Calculated the YLL burden from the death burden of injury based on the ratio in the GBD data.
#' Calculate the YLL burden from the death burden of injury based on the ratio in the GBD data.
#'
#' @param injuries data frame of injury deaths
#' The function performs the following steps:
#' - join the estimated injury deaths with the global burden of disease (GBD) injury data by age and sex
#' - multiply the estimated injury deaths by the yll to injury death ratio in the GBD data to get YLL
#' from the estimated injury deaths
#' - extract and create matrices for deaths and ylls with one column for each scenario
#' - create dataframe A with ylls and deaths of reference scenario
#' - create dataframe B showing the differences in deaths and yll for each non-reference
#' scenario to the reference scenario
#' - if confidence intervals are required:
#' - create dataframe with ylls and deaths of reference scenario using both the lower and upper
#' relative risk boundary values
#' - create dataframe showing the differences in deaths and yll for each non-reference
#' scenario to the reference scenario using both the lower and upper relative
#' risk boundary values
#' - add the confidence upper and lower interval boundary values to the two output dataframes A and B
#'
#'
#' @param injuries data frame of injury deaths by age and sex category for each scenario incl. baseline
#'
#' @return list of injury deaths and YLLs (which are differences from reference scenario) plus the values in the reference scenario.
#'
#' @export
injury_death_to_yll <- function(injuries){

# injuries is a tibble, GBD_INJ_YLL is a data.frame, returns a tibble

# join the injury deaths with the YLLs lost due to injuries in the global burden of disease data
joined_injury <- dplyr::left_join(injuries, GBD_INJ_YLL[,c('sex_age','sex','yll_dth_ratio')], by=c("sex_age",'sex'))

# multiply the injury deaths by the yll to injury death ratio in the global burden of disease injury data
joined_injury$YLL <- joined_injury$Deaths*joined_injury$yll_dth_ratio
# extract columns of interest
death_and_yll <- dplyr::select(joined_injury, c('age_cat','sex','scenario','Deaths','YLL'))

# extract and create matrices for deaths and ylls with one column for each scenario
x_deaths <- dplyr::select(death_and_yll, -YLL)
x_deaths <- spread(x_deaths,scenario, Deaths)
x_deaths <- spread(x_deaths,scenario, Deaths) # create one column for age_cat, sex and each scenario (incl baseline) for deaths
x_yll <- dplyr::select(death_and_yll, -Deaths)
x_yll <- spread(x_yll,scenario, YLL)
x_yll <- spread(x_yll,scenario, YLL) # create one column for age_cat, sex and each scenario (incl baseline) for ylls

# set reference and other scenarios
ref_scen <- REFERENCE_SCENARIO
if (REFERENCE_SCENARIO == 'Baseline'){
ref_scen = 'baseline'}
if (REFERENCE_SCENARIO == 'Baseline'){ref_scen = 'baseline'}
ref_scen_index <- which(SCEN==ref_scen)
calc_scen <- SCEN[SCEN!=ref_scen]
# find indexes of columns in x_deaths (and therefore also x_yll) data that belong to the non-reference scenarios
calc_scen_index <- which(colnames(x_deaths)%in%calc_scen)

# create dataframe with ylls and deaths of reference scenario
ref_injuries <- as.data.frame(cbind(x_deaths[,1:2],deaths=x_deaths[[ref_scen]],ylls=x_yll[[ref_scen]]))

# calculate the differences in injury deaths between the non-reference and the reference scenario
deaths <- t(repmat(unlist(ref_injuries$deaths),NSCEN,1)) - x_deaths[,calc_scen_index,drop=F]

# calculate the differences in injury ylls between the non-reference and the reference scenario
ylls <- t(repmat(unlist(ref_injuries$ylls),NSCEN,1)) - x_yll[,calc_scen_index,drop=F]

# create one dataframe showing the differences in deaths and yll for each non-reference scenario to the
# reference scenario
deaths_yll_injuries <- as.data.frame(cbind(as.data.frame(x_deaths[,1:2]),deaths, ylls))

# update columns names of deaths_yll_injuries to state whether the column shows injury deaths or ylls
metric <- c("deaths", "yll")
k <- 1
for (i in 1: 2)
Expand All @@ -41,31 +72,36 @@ injury_death_to_yll <- function(injuries){
}

# Repeat the above logic for lower and upper interval

if (any(colnames(injuries) %in% c('Deaths_lb', 'Deaths_ub')))
{
if (any(colnames(injuries) %in% c('Deaths_lb', 'Deaths_ub'))){

# injuries is a tibble, GBD_INJ_YLL is a data.frame, returns a tibble
## lower interval boundary
# join the injury deaths with the YLLs lost due to injuries in the global burden of disease data
joined_injury_lb <- dplyr::left_join(injuries, GBD_INJ_YLL[,c('sex_age','sex','yll_dth_ratio')], by=c("sex_age",'sex'))

# multiply the injury deaths by the yll to injury death ratio in the global burden of disease injury data
joined_injury_lb$YLL_lb <- joined_injury_lb$Deaths_lb * joined_injury_lb$yll_dth_ratio
death_and_yll_lb <- dplyr::select(joined_injury_lb, c('age_cat','sex','scenario','Deaths_lb','YLL_lb'))

# extract and create matrices for deaths and ylls with one column for each scenario
x_deaths_lb <- dplyr::select(death_and_yll_lb, -YLL_lb)
x_deaths_lb <- spread(x_deaths_lb,scenario, Deaths_lb)
x_yll_lb <- dplyr::select(death_and_yll_lb, -Deaths_lb)
x_yll_lb <- spread(x_yll_lb,scenario, YLL_lb)

# set reference and other scenarios
ref_scen_lb <- REFERENCE_SCENARIO
ref_scen_index_lb <- which(SCEN==ref_scen_lb)
calc_scen <- SCEN[SCEN!=ref_scen_lb]
calc_scen_index <- which(colnames(x_deaths_lb)%in%calc_scen)

# create one dataframe showing the differences in deaths and yll for each non-reference scenario to the
# reference scenario
ref_injuries_lb <- as.data.frame(cbind(x_deaths_lb[,1:2],deaths_lb=x_deaths_lb[[ref_scen_lb]],ylls_lb=x_yll_lb[[ref_scen_lb]]))
deaths_lb <- t(repmat(unlist(ref_injuries_lb$deaths_lb),NSCEN,1)) - x_deaths_lb[,calc_scen_index,drop=F]
ylls_lb <- t(repmat(unlist(ref_injuries_lb$ylls_lb),NSCEN,1)) - x_yll_lb[,calc_scen_index,drop=F]
deaths_yll_injuries_lb <- as.data.frame(cbind(as.data.frame(x_deaths_lb[,1:2]),deaths_lb, ylls_lb))

# update columns names of deaths_yll_injuries to state whether the column shows injury deaths or ylls
metric <- c("deaths", "yll")
k <- 1
for (i in 1: 2)
Expand All @@ -75,27 +111,36 @@ injury_death_to_yll <- function(injuries){
k<-k+1
}

# injuries is a tibble, GBD_INJ_YLL is a data.frame, returns a tibble


## upper confidence interval boundary
# join the injury deaths with the YLLs lost due to injuries in the global burden of disease data
joined_injury_ub <- dplyr::left_join(injuries, GBD_INJ_YLL[,c('sex_age','sex','yll_dth_ratio')], by=c("sex_age",'sex'))

# multiply the injury deaths by the yll to injury death ratio in the global burden of disease injury data
joined_injury_ub$YLL_ub <- joined_injury_ub$Deaths_ub * joined_injury_ub$yll_dth_ratio
death_and_yll_ub <- dplyr::select(joined_injury_ub, c('age_cat','sex','scenario','Deaths_ub','YLL_ub'))

# extract and create matrices for deaths and ylls with one column for each scenario
x_deaths_ub <- dplyr::select(death_and_yll_ub, -YLL_ub)
x_deaths_ub <- spread(x_deaths_ub,scenario, Deaths_ub)
x_yll_ub <- dplyr::select(death_and_yll_ub, -Deaths_ub)
x_yll_ub <- spread(x_yll_ub,scenario, YLL_ub)

# set reference and other scenarios
ref_scen_ub <- REFERENCE_SCENARIO
ref_scen_index_ub <- which(SCEN==ref_scen_ub)
calc_scen <- SCEN[SCEN!=ref_scen_ub]
calc_scen_index <- which(colnames(x_deaths_ub)%in%calc_scen)


# create one dataframe showing the differences in deaths and yll for each non-reference scenario to the
# reference scenario
ref_injuries_ub <- as.data.frame(cbind(x_deaths_ub[,1:2],deaths_ub=x_deaths_ub[[ref_scen_ub]],ylls_ub=x_yll_ub[[ref_scen_ub]]))
deaths_ub <- t(repmat(unlist(ref_injuries_ub$deaths_ub),NSCEN,1)) - x_deaths_ub[,calc_scen_index,drop=F]
ylls_ub <- t(repmat(unlist(ref_injuries_ub$ylls_ub),NSCEN,1)) - x_yll_ub[,calc_scen_index,drop=F]
deaths_yll_injuries_ub <- as.data.frame(cbind(as.data.frame(x_deaths_ub[,1:2]),deaths_ub, ylls_ub))

# update columns names of deaths_yll_injuries to state whether the column shows injury deaths or ylls
metric <- c("deaths", "yll")
k <- 1
for (i in 1: 2)
Expand All @@ -105,8 +150,7 @@ injury_death_to_yll <- function(injuries){
k<-k+1
}

# Combine lower and upper datasets

# add lower and upper boundary values to the reference and the yll and deaths injury differences datasets
deaths_yll_injuries <- left_join(deaths_yll_injuries, deaths_yll_injuries_ub)
deaths_yll_injuries <- left_join(deaths_yll_injuries, deaths_yll_injuries_lb)

Expand Down
2 changes: 1 addition & 1 deletion R/population_attributable_fraction.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Calculate population attributable fraction
#' Calculate population attributable fraction - CURRENTLY NOT BEING USED
#'
#'
#'
Expand Down
9 changes: 6 additions & 3 deletions multi_city_script.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,19 @@ comment <- "Added CO2 emission sampling"

# scenario definition
scenario_name <- "BOGOTA" # name of scenario to be called
reference_scenario <- 'Baseline' # scenario the other scenarios are compared to
# scenario the other scenarios are compared to, the reference scenario name should always
# be the name of the scenario corresponding to the actual baseline burden of disease and
# other input data for the city
reference_scenario <- 'Baseline'
scenario_increase <- 0.05 # increase for each mode in each scenario (used in GLOBAL, BOGOTA, LATAM and AFRICA_INDIA scenarios)
n_scenarios <- 3 # number of scnenarios (not including the baseline scenario)
n_scenarios <- 3 # number of scenarios (not including the baseline scenario)

# define which output results to plot, more than 6 cannot be plotted
# potential outputs (in yll for all scenarios): c('pa_ap_all_cause', 'pa_ap_IHD', 'pa_total_cancer', 'pa_ap_lung_cancer', 'ap_COPD',
# 'pa_ap_stroke', 'pa_ap_T2D', 'ap_LRI', 'pa_breast_cancer', 'pa_colon_cancer', 'pa_endo_cancer',
# 'pa_liver_cancer', 'pa_ap_CVD', 'pa_total_dementia', 'pa_myeloma', 'pa_Parkinson',
# 'pa_head_neck_cancer', 'pa_stomach_cancer', 'inj')
outputs_to_plot <- c('pa_ap_all_cause','inj','pa_ap_IHD', 'pa_total_cancer', 'pa_ap_lung_cancer', 'ap_COPD')
outputs_to_plot <- c('pa_ap_all_cause', 'ap_COPD', 'pa_total_dementia', 'pa_myeloma', 'ap_LRI','pa_stomach_cancer' )


############################### No need to change the following ##################################
Expand Down

0 comments on commit d19e83c

Please sign in to comment.