diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..b8f44d9 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,3 @@ +^RankinLabTools\.Rproj$ +^\.Rproj\.user$ +^LICENSE\.md$ diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..e69de29 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cd67eac --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.Rproj.user diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..bd99932 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,26 @@ +Package: RankinLabTools +Title: General data cleaning tools for the Rankin lab +Version: 0.0.0.9000 +Date: 2020-04-27 +Authors@R: + person(given = "Patrick", + family = "Callahan", + role = c("aut", "cre"), + email = "patricktcallahan18@gmail.com", + comment = c(ORCID = "0000-0003-1769-7580")) +Maintainer: Patrick Callahan +URL: https://orcid.org/0000-0003-1769-7580 +Description: What the package does (one paragraph). +License: MIT + file LICENSE +Encoding: UTF-8 +LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.0 +Imports: + dplyr, + readr, + rlang, + glue, + stringr, + fuzzyjoin, + lubridate diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ab1673c --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2020 +COPYRIGHT HOLDER: Patrick Callahan diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..a5efdb6 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2020 Patrick Callahan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..bb41438 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,13 @@ +# Generated by roxygen2: do not edit by hand + +export(add_test_name_to_vars) +export(calculate_current_past_difference) +export(combine_current_and_past_observations) +export(combine_mac_uds_encounters) +export(custom_fuzzy_join) +export(impute_missing_values) +export(negative_values_to_na) +export(read_csv_by_string_search) +export(remove_rows_with_mult_NAs) +export(reverse_scoring) +export(select_by_test_name) diff --git a/R/Calculate Current Past Difference.R b/R/Calculate Current Past Difference.R new file mode 100644 index 0000000..b47f0d9 --- /dev/null +++ b/R/Calculate Current Past Difference.R @@ -0,0 +1,47 @@ + + +#' Calculate difference between vectors of columns +#' This function accepts any number of 2 sets of equal length columns, and calculates the difference (that is, past-current = difference). +#' Intended to be used in conjunction with the combine_current_and_past_observations() function. +#' +#' @param dataframe dataframe object +#' @param variables_past variables where the observation is in the past +#' @param variables_current variables where the observation is current +#' @param variables_output_prefix add a meaningful prefix to the name of the difference columns +#' @param PIDN defaults to PIDN; can be any other ID +#' @param DCDate defaults to DCDate; can be any other date variable +#' +#' @return dataframe +#' @export +#' +#' @examples +calculate_current_past_difference <- function(dataframe, variables_past, variables_current, variables_output_prefix, PIDN=PIDN, DCDate = DCDate) { + variables_current <- dplyr::enquo(variables_current) + variables_past <- dplyr::enquo(variables_past) + PIDN <- dplyr::enquo(PIDN) + DCDate <- dplyr::enquo(DCDate) + + length1 <- dataframe %>% + dplyr::select(!!variables_past) %>% + length() + length2 <- dataframe %>% + dplyr::select(!!variables_current) %>% + length() + if (length1 != length2){print("Stop! Your variable lengths differ; you are not using this function correctly. The map() call within this function will not work!")} + + difference_names <- dataframe %>% + dplyr::select(!!variables_current) %>% + dplyr::rename_all(~gsub("current","difference",.)) %>% + colnames() + + dataframe %<>% + dplyr::select(!!variables_past,!!variables_current,everything()) %>% + dplyr::bind_cols(purrr::map2(.x = .[, 1:(length1)], .y = .[, (1+length1):(length1+length2)], .f = ~.x - .y)) %>% # calculate difference between selected columns; PAST - CURRENT = DIFFERENCE + dplyr::rename_at(dplyr::vars(!colnames(dataframe)), ~paste(difference_names)) %>% # Rename new columns to indicate they're the difference column + dplyr::select(everything(),!!variables_past,!!variables_current,ends_with("difference")) # arrange new columns + + + return(dataframe) +} + + diff --git a/R/Combine Current and Past Observations.R b/R/Combine Current and Past Observations.R new file mode 100644 index 0000000..e00454c --- /dev/null +++ b/R/Combine Current and Past Observations.R @@ -0,0 +1,35 @@ + + +#' Combine Current and Past Observations +#' +#' @param dataframe dataframe object +#' @param CurrentPast variable indicating whether the observation is a "Current" or "Past" record; default column name expected is CurrentPast, but can be replaced with any column with "current" and "past"/"before" characters +#' @param DCDate date column; defaults to DCDate +#' @param PIDN identifier column; defaults to PIDN +#' +#' @return dataframe +#' @export +#' +#' @examples +combine_current_and_past_observations <- function(dataframe, CurrentPast = CurrentPast, DCDate=DCDate, PIDN=PIDN) { + CurrentPast <- enquo(CurrentPast) + DCDate <- enquo(DCDate) + PIDN <- enquo(PIDN) + + current_dataframe <- dataframe %>% + dplyr::filter(!!CurrentPast %in% c("CURRENT","Current")) + + past_dataframe <- dataframe %>% + dplyr::filter(!!CurrentPast %in% c("PAST","Past","Before","BEFORE")) %>% + dplyr::mutate(DCDate = lubridate::as_date(DCDate)) %>% + dplyr::group_by(PIDN) %>% # These three lines act to select only the earliest instance of completing a PAST test within each PIDN + dplyr::filter(DCDate == min(DCDate)) %>% + dplyr::ungroup() + + final_dataframe <- current_dataframe %>% + dplyr::left_join(past_dataframe, by=c("PIDN"="PIDN"), suffix=c("_current","_past")) + + return(final_dataframe) +} + + diff --git a/R/Combine MAC and UDS encounters.R b/R/Combine MAC and UDS encounters.R new file mode 100644 index 0000000..c084157 --- /dev/null +++ b/R/Combine MAC and UDS encounters.R @@ -0,0 +1,39 @@ + +#' Combine MAC and UDS Encounters +#' This function allows you to combine MAC and UDS encounters for a particular test where the +#' encounters have been separated into different rows, but really occurred around the same time period. +#' You can determine what range of days qualifies as a "single visit" using the day_range argument. +#' +#' @param dataframe dataframe object +#' @param questions_to_combine select the variables that should be combined. This will be at least the item-level variables, and potentially your summary scores +#' @param day_range range of days over which MAC and UDS visits can be considered the "same" visit; default of 30 days +#' @param DCDate date identifier; defaults to DCDate variable +#' @param PIDN person identifier; defaults to PIDN +#' +#' @return dataframe +#' @export +#' +#' @examples +combine_mac_uds_encounters <- function(dataframe, questions_to_combine, day_range = 30, DCDate = DCDate, PIDN = PIDN){ + questions_to_combine <- enquo(questions_to_combine) + DCDate <- enquo(DCDate) + PIDN <- enquo(PIDN) + + dataframe %<>% + dplyr::mutate_at(dplyr::vars(!!questions_to_combine), ~dplyr::na_if(.,-6)) %>% + dplyr::mutate(DCDate = lubridate::as_date(!!DCDate)) %>% + dplyr::arrange(!!PIDN,DCDate) %>% + dplyr::group_by(!!PIDN) %>% + dplyr::mutate(closeness_lag = dplyr::near(DCDate,dplyr::lead(DCDate),tol=day_range)) %>% + dplyr::ungroup() %>% + dplyr::mutate(newDate = dplyr::case_when(closeness_lag == TRUE ~ dplyr::lead(DCDate), + closeness_lag == FALSE ~ DCDate, + is.na(closeness_lag) ~ DCDate)) %>% + dplyr::group_by(!!PIDN,newDate) %>% + dplyr::summarize_all(list(~dplyr::first(stats::na.omit(.)))) %>% + dplyr::ungroup() + + return(dataframe) + +} + diff --git a/R/Custom Fuzzy Join.R b/R/Custom Fuzzy Join.R new file mode 100644 index 0000000..3cd6c7a --- /dev/null +++ b/R/Custom Fuzzy Join.R @@ -0,0 +1,43 @@ + +#' Title +#' +#' @param df1 first dataframe argument +#' @param DCDate1 first date argument +#' @param PIDN1 first ID argument; defaults to PIDN +#' @param df2 second dataframe argument +#' @param DCDate2 second date argument +#' @param PIDN2 second ID argument; defaults to PIDN +#' @param mode used to specify if the join should be "left", "right", "full", "semi", or "anti"; defaults to "left" +#' @param interval absolute value of days over which to match visits; default of +/-90 days +#' +#' @return +#' @export +#' +#' @examples +custom_fuzzy_join <- function(df1, DCDate1, PIDN1 = PIDN, + df2, DCDate2, PIDN2 = PIDN, + mode = "left", interval = 90) { + + PIDN1 <- enquo(PIDN1) + PIDN2 <- enquo(PIDN2) + DCDate1 <- enquo(DCDate1) + DCDate2 <- enquo(DCDate2) + + joining_pidn <- set_names(quo_name(PIDN2),quo_name(PIDN1)) + joining_date <- set_names(quo_name(DCDate2),quo_name(DCDate1)) + + DCDate1 <- df1 %>% + select(!!DCDate1) %>% + mutate_at(vars(!!DCDate1), ~as_date(.)) + DCDate2 <- df2 %>% + select(!!DCDate2) %>% + mutate_at(vars(!!DCDate2), ~as_date(.)) + + df1 %>% + fuzzyjoin::fuzzy_join(df2, + by=c(joining_pidn,joining_date), + match_fun = list(`==`, function(x,y)abs(x-y)% + colnames() %>% + stringr::str_subset(test_name, negate=TRUE) + + if (location == 1){ + dataframe %<>% + dplyr::rename_at(dplyr::vars(all_of(rename_me)), ~paste(test_name,.,sep="_")) + } + # Use the rename_me vector to rename columns only where needed + + if (location == 0){ + dataframe %<>% + dplyr::rename_at(dplyr::vars(all_of(rename_me)), ~paste(sep="_",.,test_name)) + } + + return(dataframe) + +} + diff --git a/R/Impute Missing Values.R b/R/Impute Missing Values.R new file mode 100644 index 0000000..42129b8 --- /dev/null +++ b/R/Impute Missing Values.R @@ -0,0 +1,57 @@ + + +#' Impute for Missing Values +#' This function calculates imputed scores where item-level data is missing, and allows for the threshold of proportion of missing questions to be adjusted +#' +#' @param dataframe dataframe object +#' @param vars_to_impute vector of variables from which a score will be calculated/imputed +#' @param scale_name output name of the scale being calculated +#' @param missing_threshold maximum proportion of the scale questions allowed to be missing (i.e. the scale will not be calculated if the number of missing questions is greater than this value) +#' @param toggle_warning +#' +#' @return +#' @export +#' +#' @examples +impute_missing_values <- function(dataframe, vars_to_impute, scale_name, missing_threshold = 0.2, toggle_warning = TRUE){ + if(missing_threshold != 0.2 & toggle_warning){print("Warning! Default threshold is 0.2 or 20% in LAVA. Do you have a compelling reason to change this? (You can turn this warning off; toggle_warning = FALSE)")} + + vars_to_impute <- rlang::enquo(vars_to_impute) + length_imputed_columns <- dataframe %>% + dplyr::select(!!vars_to_impute) %>% + length() + quantity_missing <- dplyr::quo_name(scale_name) %>% + stringr::str_replace(., ":","_") %>% + paste0(.,"_quantity_missing") + prop_missing_Q <- dplyr::quo_name(scale_name) %>% + stringr::str_replace(.,":","_") %>% + paste0(.,"_prop_missing_Q") + row_Avg <- dplyr::quo_name(scale_name) %>% + stringr::str_replace(.,":","_") %>% + paste0(.,"_avg") + row_Sum <- dplyr::quo_name(scale_name) %>% + stringr::str_replace(.,":","_") %>% + paste0(.,"_imputed_sum") + + dataframe %<>% + dplyr::mutate_at(dplyr::vars(!!vars_to_impute), ~dplyr::case_when(. >= 0 ~ .)) %>% + dplyr::mutate(!!row_Avg := dplyr::select(.,!!vars_to_impute) %>% rowMeans(.,na.rm=TRUE), + !!quantity_missing := dplyr::select(.,!!vars_to_impute) %>% is.na() %>% rowSums(), + !!prop_missing_Q := !!dplyr::sym(quantity_missing)/length_imputed_columns, + !!row_Sum := dplyr::case_when(!!dplyr::sym(prop_missing_Q) <= missing_threshold ~ round(!!dplyr::sym(row_Avg)*length_imputed_columns)) + ) +# Lines below imputed averages to the empty columns then calculated the sum of the rows for +# the selected columns, but this item-level replacment should not be done!! +# dplyr::mutate_at(dplyr::vars(!!vars_to_impute), ~dplyr::case_when(!!dplyr::sym(prop_missing_Q) <= missing_threshold & is.na(.) ~ !!dplyr::sym(row_Avg), TRUE ~ .)) %>% +# dplyr::mutate(!!row_sum := dplyr::select(.,!!vars_to_impute) %>% rowSums(.)) + + return(dataframe) + + # When imputing values, item-level values should not be replaced!!! + # To calculate new sub-scale scores: take the average of all existing values in the sub-scale, and multiply this + # average by the total number of items in the sub-scale (that is, in R terms, the length() of the columns used + # in the sub-scale) +} + + + diff --git a/R/Read CSV by String.R b/R/Read CSV by String.R new file mode 100644 index 0000000..5eb45ea --- /dev/null +++ b/R/Read CSV by String.R @@ -0,0 +1,29 @@ + + +#' Read CSV By String Search +#' This convenience function searches either the default directory or a specified directory for a (1) .csv file that at least partially matches the string_search variable. The file is then read-in to R. +#' +#' @param string_search string to search for in the directory +#' @param file_parent can specify the parent directory of the file if needed +#' +#' @return +#' @export +#' +#' @examples +read_csv_by_string_search <- function(string_search, file_parent) { + if(rlang::is_missing(file_parent)){ + destination <- here::here() + } else { + destination <- here::here(file_parent) + } + + + pathway <- fs::dir_info(destination) %>% + dplyr::filter_at(vars(path), dplyr::all_vars(stringr::str_detect(.,pattern=glue::glue("{string_search}")))) %>% + dplyr::pull(path) + if (length(pathway) < 1){print(glue::glue("Error! No object with partial name = '{string_search}' found in this directory. Specify a different folder within this project using the file_parent argument"))} + if (length(pathway) > 1){print(glue::glue("Error!", length(pathway),"objects with partial name = '{string_search}' found in this directory. Check folder contents for duplicate files, and ensure your string_search argument is specific."))} + + return(readr::read_csv(paste0(pathway))) + +} diff --git a/R/Remove Rows with Multiple NAs.R b/R/Remove Rows with Multiple NAs.R new file mode 100644 index 0000000..9abafee --- /dev/null +++ b/R/Remove Rows with Multiple NAs.R @@ -0,0 +1,30 @@ + + + +#' Remove Rows with Multipls NAs +#' This function allows you to remove rows if ALL specified columns are NA +#' +#' @param dataframe dataframe object +#' @param columns variables to check for missingness +#' +#' @return +#' @export +#' +#' @examples +remove_rows_with_mult_NAs <- function(dataframe, columns){ + columns <- enquo(columns) + + dataframe %<>% + dplyr::filter_at(dplyr::vars(!!columns), dplyr::any_vars(!is.na(.))) + + return(dataframe) + # This function allows you to remove rows from a dataframe given ALL variables specified in the *columns* + # argument are NA. + + # Should rename this function to drop_if_all_NA() + # Should create another version of the function where a row is dropped if any variables in the list are + # missing, and this would be drop_if_any_NA(). It's counterintuitive, but I think I would just need to + # change the any_vars() call to all_vars() in the function to implement this change. +} + + diff --git a/R/Reverse Scoring.R b/R/Reverse Scoring.R new file mode 100644 index 0000000..6dd22eb --- /dev/null +++ b/R/Reverse Scoring.R @@ -0,0 +1,24 @@ + +#' Reverse Scoring +#' Allows for reverse-scoring of any specified columns +#' +#' @param dataframe dataframe object +#' @param reversed_columns variables to have their score reversed +#' @param extremum_max maximum attainable score on the variables selected; needs to be the same maximum for all vars +#' +#' @return +#' @export +#' +#' @examples +reverse_scoring <- function(dataframe, reversed_columns, extremum_max) { + reversed_columns <- dplyr::enquo(reversed_columns) + + if (missing(extremum_min)) { + dataframe %<>% + dplyr::mutate_at(dplyr::vars(!!reversed_columns), ~(extremum_max+1 - .)) + } + + return(dataframe) +} + + diff --git a/R/Select Tests by Name.R b/R/Select Tests by Name.R new file mode 100644 index 0000000..bf4767f --- /dev/null +++ b/R/Select Tests by Name.R @@ -0,0 +1,23 @@ + +#' Title +#' +#' @param dataframe dataframe object +#' @param test_names character vector of all names of tests that one would like to select from the dataframe; assumes that the tests share a common string (e.g. "_DART" or "_BISBAS") +#' @param ... any additional columns to be selected from the dataframe +#' +#' @return +#' @export +#' +#' @examples +select_by_test_name <- function(dataframe, test_names, ...){ + test_names <- paste0(test_names,collapse="|") + + column_list <- dataframe %>% + colnames() %>% + grep(test_names,.,ignore.case = TRUE, value = TRUE) + + dataframe %>% + dplyr::select(...,column_list) + +} + diff --git a/R/Set Negative Values to NA.R b/R/Set Negative Values to NA.R new file mode 100644 index 0000000..40023db --- /dev/null +++ b/R/Set Negative Values to NA.R @@ -0,0 +1,23 @@ + + +#' Negative Values to NA +#' Allows for the user to define any numeric variables they would like to change to NA if the value of that variable is less than 0 +#' +#' @param dataframe dataframe object +#' @param vars_to_clean any numeric variables where negative values should be set to NA +#' +#' @return +#' @export +#' +#' @examples +negative_values_to_na <- function(dataframe, vars_to_clean){ + + vars_to_clean <- enquo(vars_to_clean) + + dataframe %<>% + dplyr::mutate_at(dplyr::vars(!!vars_to_clean), ~dplyr::case_when(. >= 0 ~ .)) + return(dataframe) + +} + + diff --git a/RankinLabTools.Rproj b/RankinLabTools.Rproj new file mode 100644 index 0000000..69fafd4 --- /dev/null +++ b/RankinLabTools.Rproj @@ -0,0 +1,22 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes +LineEndingConversion: Posix + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/add_test_name_to_vars.Rd b/man/add_test_name_to_vars.Rd new file mode 100644 index 0000000..7108bbe --- /dev/null +++ b/man/add_test_name_to_vars.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Final Variable Naming.R +\name{add_test_name_to_vars} +\alias{add_test_name_to_vars} +\title{Add Test Name to Variables +Adds a name to all variables in the dataframe; can specify if the name appears at the beginning or end of the variables} +\usage{ +add_test_name_to_vars(dataframe, test_name, location = 0) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{test_name}{name to be added to all variables; must be quoted (e.g. "name")} + +\item{location}{where to append the test_name; 0 = end of variables, 1 = beginning of variables; defaults to 0} +} +\value{ + +} +\description{ +Add Test Name to Variables +Adds a name to all variables in the dataframe; can specify if the name appears at the beginning or end of the variables +} diff --git a/man/calculate_current_past_difference.Rd b/man/calculate_current_past_difference.Rd new file mode 100644 index 0000000..ddbf596 --- /dev/null +++ b/man/calculate_current_past_difference.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Calculate Current Past Difference.R +\name{calculate_current_past_difference} +\alias{calculate_current_past_difference} +\title{Calculate difference between vectors of columns +This function accepts any number of 2 sets of equal length columns, and calculates the difference (that is, past-current = difference). +Intended to be used in conjunction with the combine_current_and_past_observations() function.} +\usage{ +calculate_current_past_difference( + dataframe, + variables_past, + variables_current, + variables_output_prefix, + PIDN = PIDN, + DCDate = DCDate +) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{variables_past}{variables where the observation is in the past} + +\item{variables_current}{variables where the observation is current} + +\item{variables_output_prefix}{add a meaningful prefix to the name of the difference columns} + +\item{PIDN}{defaults to PIDN; can be any other ID} + +\item{DCDate}{defaults to DCDate; can be any other date variable} +} +\value{ +dataframe +} +\description{ +Calculate difference between vectors of columns +This function accepts any number of 2 sets of equal length columns, and calculates the difference (that is, past-current = difference). +Intended to be used in conjunction with the combine_current_and_past_observations() function. +} diff --git a/man/combine_current_and_past_observations.Rd b/man/combine_current_and_past_observations.Rd new file mode 100644 index 0000000..d3fd5c9 --- /dev/null +++ b/man/combine_current_and_past_observations.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Combine Current and Past Observations.R +\name{combine_current_and_past_observations} +\alias{combine_current_and_past_observations} +\title{Combine Current and Past Observations} +\usage{ +combine_current_and_past_observations( + dataframe, + CurrentPast = CurrentPast, + DCDate = DCDate, + PIDN = PIDN +) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{CurrentPast}{variable indicating whether the observation is a "Current" or "Past" record; default column name expected is CurrentPast, but can be replaced with any column with "current" and "past"/"before" characters} + +\item{DCDate}{date column; defaults to DCDate} + +\item{PIDN}{identifier column; defaults to PIDN} +} +\value{ +dataframe +} +\description{ +Combine Current and Past Observations +} diff --git a/man/combine_mac_uds_encounters.Rd b/man/combine_mac_uds_encounters.Rd new file mode 100644 index 0000000..ea2aa96 --- /dev/null +++ b/man/combine_mac_uds_encounters.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Combine MAC and UDS encounters.R +\name{combine_mac_uds_encounters} +\alias{combine_mac_uds_encounters} +\title{Combine MAC and UDS Encounters +This function allows you to combine MAC and UDS encounters for a particular test where the +encounters have been separated into different rows, but really occurred around the same time period. +You can determine what range of days qualifies as a "single visit" using the day_range argument.} +\usage{ +combine_mac_uds_encounters( + dataframe, + questions_to_combine, + day_range = 30, + DCDate = DCDate, + PIDN = PIDN +) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{questions_to_combine}{select the variables that should be combined. This will be at least the item-level variables, and potentially your summary scores} + +\item{day_range}{range of days over which MAC and UDS visits can be considered the "same" visit; default of 30 days} + +\item{DCDate}{date identifier; defaults to DCDate variable} + +\item{PIDN}{person identifier; defaults to PIDN} +} +\value{ +dataframe +} +\description{ +Combine MAC and UDS Encounters +This function allows you to combine MAC and UDS encounters for a particular test where the +encounters have been separated into different rows, but really occurred around the same time period. +You can determine what range of days qualifies as a "single visit" using the day_range argument. +} diff --git a/man/custom_fuzzy_join.Rd b/man/custom_fuzzy_join.Rd new file mode 100644 index 0000000..89dc24c --- /dev/null +++ b/man/custom_fuzzy_join.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Custom Fuzzy Join.R +\name{custom_fuzzy_join} +\alias{custom_fuzzy_join} +\title{Title} +\usage{ +custom_fuzzy_join( + df1, + DCDate1, + PIDN1 = PIDN, + df2, + DCDate2, + PIDN2 = PIDN, + mode = "left", + interval = 90 +) +} +\arguments{ +\item{df1}{first dataframe argument} + +\item{DCDate1}{first date argument} + +\item{PIDN1}{first ID argument; defaults to PIDN} + +\item{df2}{second dataframe argument} + +\item{DCDate2}{second date argument} + +\item{PIDN2}{second ID argument; defaults to PIDN} + +\item{mode}{used to specify if the join should be "left", "right", "full", "semi", or "anti"; defaults to "left"} + +\item{interval}{absolute value of days over which to match visits; default of +/-90 days} +} +\value{ + +} +\description{ +Title +} diff --git a/man/impute_missing_values.Rd b/man/impute_missing_values.Rd new file mode 100644 index 0000000..0f2b842 --- /dev/null +++ b/man/impute_missing_values.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Impute Missing Values.R +\name{impute_missing_values} +\alias{impute_missing_values} +\title{Impute for Missing Values +This function calculates imputed scores where item-level data is missing, and allows for the threshold of proportion of missing questions to be adjusted} +\usage{ +impute_missing_values( + dataframe, + vars_to_impute, + scale_name, + missing_threshold = 0.2, + toggle_warning = TRUE +) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{vars_to_impute}{vector of variables from which a score will be calculated/imputed} + +\item{scale_name}{output name of the scale being calculated} + +\item{missing_threshold}{maximum proportion of the scale questions allowed to be missing (i.e. the scale will not be calculated if the number of missing questions is greater than this value)} + +\item{toggle_warning}{} +} +\value{ + +} +\description{ +Impute for Missing Values +This function calculates imputed scores where item-level data is missing, and allows for the threshold of proportion of missing questions to be adjusted +} diff --git a/man/negative_values_to_na.Rd b/man/negative_values_to_na.Rd new file mode 100644 index 0000000..e7bd59f --- /dev/null +++ b/man/negative_values_to_na.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Set Negative Values to NA.R +\name{negative_values_to_na} +\alias{negative_values_to_na} +\title{Negative Values to NA +Allows for the user to define any numeric variables they would like to change to NA if the value of that variable is less than 0} +\usage{ +negative_values_to_na(dataframe, vars_to_clean) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{vars_to_clean}{any numeric variables where negative values should be set to NA} +} +\value{ + +} +\description{ +Negative Values to NA +Allows for the user to define any numeric variables they would like to change to NA if the value of that variable is less than 0 +} diff --git a/man/read_csv_by_string_search.Rd b/man/read_csv_by_string_search.Rd new file mode 100644 index 0000000..bd54503 --- /dev/null +++ b/man/read_csv_by_string_search.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Read CSV by String.R +\name{read_csv_by_string_search} +\alias{read_csv_by_string_search} +\title{Read CSV By String Search +This convenience function searches either the default directory or a specified directory for a (1) .csv file that at least partially matches the string_search variable. The file is then read-in to R.} +\usage{ +read_csv_by_string_search(string_search, file_parent) +} +\arguments{ +\item{string_search}{string to search for in the directory} + +\item{file_parent}{can specify the parent directory of the file if needed} +} +\value{ + +} +\description{ +Read CSV By String Search +This convenience function searches either the default directory or a specified directory for a (1) .csv file that at least partially matches the string_search variable. The file is then read-in to R. +} diff --git a/man/remove_rows_with_mult_NAs.Rd b/man/remove_rows_with_mult_NAs.Rd new file mode 100644 index 0000000..73038cd --- /dev/null +++ b/man/remove_rows_with_mult_NAs.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Remove Rows with Multiple NAs.R +\name{remove_rows_with_mult_NAs} +\alias{remove_rows_with_mult_NAs} +\title{Remove Rows with Multipls NAs +This function allows you to remove rows if ALL specified columns are NA} +\usage{ +remove_rows_with_mult_NAs(dataframe, columns) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{columns}{variables to check for missingness} +} +\value{ + +} +\description{ +Remove Rows with Multipls NAs +This function allows you to remove rows if ALL specified columns are NA +} diff --git a/man/reverse_scoring.Rd b/man/reverse_scoring.Rd new file mode 100644 index 0000000..79561e9 --- /dev/null +++ b/man/reverse_scoring.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Reverse Scoring.R +\name{reverse_scoring} +\alias{reverse_scoring} +\title{Reverse Scoring +Allows for reverse-scoring of any specified columns} +\usage{ +reverse_scoring(dataframe, reversed_columns, extremum_max) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{reversed_columns}{variables to have their score reversed} + +\item{extremum_max}{maximum attainable score on the variables selected; needs to be the same maximum for all vars} +} +\value{ + +} +\description{ +Reverse Scoring +Allows for reverse-scoring of any specified columns +} diff --git a/man/select_by_test_name.Rd b/man/select_by_test_name.Rd new file mode 100644 index 0000000..f1c965f --- /dev/null +++ b/man/select_by_test_name.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Select Tests by Name.R +\name{select_by_test_name} +\alias{select_by_test_name} +\title{Title} +\usage{ +select_by_test_name(dataframe, test_names, ...) +} +\arguments{ +\item{dataframe}{dataframe object} + +\item{test_names}{character vector of all names of tests that one would like to select from the dataframe; assumes that the tests share a common string (e.g. "_DART" or "_BISBAS")} + +\item{...}{any additional columns to be selected from the dataframe} +} +\value{ + +} +\description{ +Title +}