Skip to content

Commit

Permalink
Merge pull request #55 from FertigLab/54-recommended-function-length
Browse files Browse the repository at this point in the history
modularized input checks to utils.R
  • Loading branch information
jmitchell81 authored Dec 7, 2023
2 parents 7d0e8f7 + 823de02 commit 5162b90
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 63 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: main
on:
push:
branches: ['v0.2.2_development']
pull_request:
branches: ['master', 'v0.2.2']
workflow_dispatch:

jobs:
build-check:
uses: FertigLab/actions/.github/workflows/r-build-check.yml@v1
secrets: inherit
86 changes: 24 additions & 62 deletions R/import_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,83 +17,44 @@ NULL
#' @param database_name name of the database being used, stored in output
#' @param gene_conv a tuple of (from, to) or (source, target) if gene conversion to orthologs is desired; options are ENSMUSG, ENSG, MGI, or HGNC
#' @param gene_conv_host host for conversion; default ensembl, could also use mirrors if desired
#' @param alternate_convert boolean if you would like to use a non-ensembl method of conversion (must supply table; not recommended, use only if ensembl is down)
#' @param alternate_convert_table supplied table for non-ensembl method of conversion
#' @param alternate_convert_table supplied table for non-ensembl method of conversion (not recommended, use only if ensembl is down))
#' @return Data frame where each row describes a possible receptor-ligand interaction
#' @export create_rl_map_cellphonedb
#'
#'
create_rl_map_cellphonedb <- function(genes, proteins, interactions, complexes = NULL, database_name = "CellPhoneDB",
gene_conv = NULL, gene_conv_host = "https://www.ensembl.org", alternate_convert = FALSE, alternate_convert_table = NULL) {
gene_conv = NULL, gene_conv_host = "https://www.ensembl.org", alternate_convert_table = NULL) {

# Check input structures:
stopifnot(`genes argument must be file path or dataframe` = (is(genes, "data.frame") | is(genes,
"character")))
stopifnot(`proteins argument must be file path or dataframe` = (is(proteins, "data.frame") | is(proteins,
"character")))
stopifnot(`interactions argument must be file path or dataframe` = (is(interactions, "data.frame") |
is(interactions, "character")))
stopifnot(`complexes argument must be NULL, file path or dataframe` = (is.null(complexes) | is(complexes,
"data.frame") | is(complexes, "character")))
stopifnot(`Database name must be a string` = is(database_name, "character") & length(database_name) ==
1)
stopifnot(`Gene conversion must be NULL or a character vector with 2 items` = (is.null(gene_conv) |
(is(gene_conv, "character") & length(gene_conv) == 2)))
stopifnot(`Gene conversion host must be a string` = is(gene_conv_host, "character") & length(gene_conv_host) ==
1)
stopifnot(`Alternate conversion argument (not recommended) must be TRUE or FALSE` = is(alternate_convert,
"logical"))
if(alternate_convert & is.null(alternate_convert_table)) {
stop("If using alternate conversion table (not recommended), a table must be provided")
}
check_arg(genes, c("character", "data.frame"))
check_arg(proteins, c("character", "data.frame"))
check_arg(interactions, c("character", "data.frame"))
check_arg(complexes, c("character", "data.frame", "NULL"))
check_arg(database_name, c("character"), allow_len = c(1))
check_arg(gene_conv, c("NULL", "character"), allow_len = c(0, 2))
check_arg(gene_conv_host, c("character"), allow_len = c(1))

# Read in files if needed:
if (is(genes, "character")) {
genes <- read.csv(genes, stringsAsFactors = FALSE)
}
if (is(proteins, "character")) {
proteins <- read.csv(proteins, stringsAsFactors = FALSE)
}
if (is(interactions, "character")) {
interactions <- read.csv(interactions, stringsAsFactors = FALSE)
}
if (is(complexes, "character")) {
complexes <- read.csv(complexes, stringsAsFactors = FALSE)
}
genes <- read_if_char(genes)
proteins <- read_if_char(proteins)
interactions <- read_if_char(interactions)
complexes <- read_if_char(complexes)

# replace empty cells in columns annotating gene properties with 'False' There are some
# unannotated genes in database v2.0 that seem to have been fixed in v4.0
gene_features <- c("transmembrane", "peripheral", "secreted", "secreted_highlight", "receptor",
"integrin", "other")
proteins[proteins$receptor == "", colnames(proteins) %in% gene_features] <- "False"

# change cases of True/False syntax from Python to TRUE/FALSE R syntax
for (x in colnames(genes)) {
if (identical(unique(genes[[x]]), c("True", "False")) | identical(unique(genes[[x]]), c("False",
"True"))) {
genes[[x]] <- ifelse(genes[[x]] == "True", TRUE, FALSE)
}
}
for (x in colnames(proteins)) {
if (identical(unique(proteins[[x]]), c("True", "False")) | identical(unique(proteins[[x]]),
c("False", "True"))) {
proteins[[x]] <- ifelse(proteins[[x]] == "True", TRUE, FALSE)
}
}
for (x in colnames(interactions)) {
if (identical(unique(interactions[[x]]), c("True", "False")) | identical(unique(interactions[[x]]),
c("False", "True"))) {
interactions[[x]] <- ifelse(interactions[[x]] == "True", TRUE, FALSE)
}
}
if (!is.null(complexes)) {
for (x in colnames(complexes)) {
if (identical(unique(complexes[[x]]), c("True", "False")) | identical(unique(complexes[[x]]),
c("False", "True"))) {
complexes[[x]] <- ifelse(complexes[[x]] == "True", TRUE, FALSE)
}
}
}
genes <- conv_py_bools(genes)
proteins <- conv_py_bools(proteins)
interactions <- conv_py_bools(interactions)
complexes <- conv_py_bools(complexes)

# gene conversions
if (!is.null(gene_conv) & !identical(gene_conv[1], gene_conv[2])) {
# obtain conversion dictionary
if (alternate_convert) {
if (!is.null(alternate_convert_table)) {
conv_dict <- table_convert_genes(genes$gene_name, from = gene_conv[1], to = gene_conv[2],
alternate_convert_table)
} else {
Expand All @@ -102,6 +63,7 @@ create_rl_map_cellphonedb <- function(genes, proteins, interactions, complexes =
# column 1 is the source gene names used by the reference data base column 2 is the
# orthologous gene names for the organism to which the reference is being converted
}

# Step through the interactions and build rl connections.
rl_map <- NULL
for (i in 1:nrow(interactions)) {
Expand Down
52 changes: 51 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,52 @@
#' Check input arguments
#'
#' Accepts an object and rules for checking, stops if rules not met.
#'
#' @param arg The argument to check
#' @param allow_class Vector of allowed classes
#' @param allow_len Vector of allowed lengths
check_arg <- function(arg, allow_class = c("character"), allow_len = NULL) {
argname <- deparse(substitute(arg))
classes <- paste(allow_class, collapse = ",")
lengths <- paste(allow_len, collapse = ",")

if (!(class(arg) %in% allow_class)) {
stop(sprintf("Class of %s must be one of: %s", argname, classes))
}

if (!is.null(allow_len)) {
if (!(length(arg) %in% allow_len)) {
stop(sprintf("Length of %s must be one of: %s", argname, lengths))
}
}
}

#' Read in data if an object looks like path to it.
#'
#' @param obj
#' @return obj the object itself in case its not a character
read_if_char <- function(obj) {
if (is(obj, "character")) {
obj <- read.csv(obj, stringsAsFactors = FALSE)
}
return(obj)
}


#' Change cases of True/False syntax from Python to TRUE/FALSE R syntax
#'
#' @param obj Object that will be converted
#' @return obj The converted object
conv_py_bools <- function(obj) {
for (x in colnames(obj)) {
bools <- sort(unique(obj[[x]]))
if (identical(bools, c("False", "True"))) {
obj[[x]] <- ifelse(obj[[x]] == "True", TRUE, FALSE)
}
}
return(obj)
}

#' Access database
#'
#' A function to pull database information from a domino object
Expand Down Expand Up @@ -160,4 +209,5 @@ dom_info <- function(dom) {
info = slot(dom, "misc")
return(list("create" = info$create, "build"= info$build,
"build_variables"= info$build_vars))
}
}

36 changes: 36 additions & 0 deletions tests/testthat/test-import_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,39 @@ test_that("building a domino object under set parameters", {
expect_equal(pbmc_dom_built@signaling,
pbmc_dom_built_tiny@signaling)
})

test_that("create_rl_map_cellphonedb fails on wrong input arg type.", {
load(test_path("testdata", "sysdata.rda"))

expect_error(create_rl_map_cellphonedb(
genes = list(), proteins = proteins_test,
interactions = interactions_test, complexes = complexes_test
))

expect_error(create_rl_map_cellphonedb(
genes = genes_test, proteins = list(),
interactions = interactions_test, complexes = complexes_test
))

expect_error(create_rl_map_cellphonedb(
genes = genes_test, proteins = proteins_test,
interactions = list(), complexes = complexes_test
))

expect_error(create_rl_map_cellphonedb(
genes = genes_test, proteins = proteins_test,
interactions = interactions_test, complexes = list()
))

expect_error(create_rl_map_cellphonedb(
genes = genes_test, proteins = proteins_test,
interactions = interactions_test, complexes = complexes_test,
database_name = list()
))

expect_error(create_rl_map_cellphonedb(
genes = genes_test, proteins = proteins_test,
interactions = interactions_test, complexes = complexes_test,
database_name = c("length", ">1")
))
})
20 changes: 20 additions & 0 deletions tests/testthat/test_utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
context("domino2")

test_that("bool conversion function works",{
df <- data.frame(list(c1 = c("True", "False"),
c2 = c("False", "True"),
c3 = c(1, 2)),
c4 = c("a", "b"))

c_df <- conv_py_bools(df)

expect_equal(class(c_df$c1), "logical")
expect_equal(class(c_df$c2), "logical")
expect_equal(class(c_df$c3), "numeric")
expect_equal(class(c_df$c4), "character")

})

test_that("read if char tries to read a file", {
expect_error(read_if_char("./file_that_not_exists.csv", "cannot open the connection"))
})

0 comments on commit 5162b90

Please sign in to comment.