Skip to content

Commit

Permalink
Merge pull request #179 from mrc-ide/refactor-expand-dirs
Browse files Browse the repository at this point in the history
Refactor directory expansion.
  • Loading branch information
plietar authored Sep 12, 2024
2 parents 625317d + e616d8e commit f19865c
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 56 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: orderly2
Title: Orderly Next Generation
Version: 1.99.36
Version: 1.99.37
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "[email protected]"),
person("Robert", "Ashton", role = "aut"),
Expand Down
26 changes: 6 additions & 20 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -391,28 +391,14 @@ copy_shared_resource <- function(path_root, path_dest, config, files, call) {
shared_dir))
}

here <- files$here
there <- files$there

assert_file_exists_relative(there, workdir = shared_path,
assert_file_exists_relative(files$there, workdir = shared_path,
name = "Shared resource file", call = call)
src <- file.path(shared_path, there)
dst <- file.path(path_dest, here)

is_dir <- is_directory(file.path(shared_path, there))
fs::dir_create(file.path(path_dest, dirname(here)))
if (any(is_dir)) {
fs::dir_copy(src[is_dir], dst[is_dir])
## Update the names that will be used in the metadata:
files <- lapply(src[is_dir], dir)
here <- replace_ragged(here, is_dir, Map(file.path, here[is_dir], files))
there <- replace_ragged(there, is_dir, Map(file.path, there[is_dir], files))
}
if (any(!is_dir)) {
copy_files(src[!is_dir], dst[!is_dir], overwrite = TRUE)
}

data_frame(here = here, there = there)
files_expanded <- expand_dirs(files, shared_path)
copy_files(fs::path(shared_path, files_expanded$there),
fs::path(path_dest, files_expanded$here),
overwrite = TRUE)
files_expanded
}


Expand Down
28 changes: 10 additions & 18 deletions R/outpack_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ orderly_copy_files <- function(..., files, dest, overwrite = TRUE,
}
}

plan <- plan_copy_files(root, id, files$there, files$here, environment())
plan <- plan_copy_files(root, id, files, environment())
name <- outpack_metadata_core(id, root)$name

tryCatch(
Expand Down Expand Up @@ -149,25 +149,17 @@ orderly_copy_files <- function(..., files, dest, overwrite = TRUE,
}


plan_copy_files <- function(root, id, there, here, call = NULL) {
assert_relative_path(there, name = "File", workdir = id, call = call)
validate_packet_has_file(root, id, there, call)
is_dir <- grepl("/$", there)
if (any(is_dir)) {
meta <- outpack_metadata_core(id, root)
files <- meta$files$path
expanded <- lapply(which(is_dir), function(i) {
p <- there[[i]]
j <- string_starts_with(p, files)
nms <- file.path(sub("/+$", "", here[[i]]),
string_drop_prefix(p, files[j]))
set_names(files[j], nms)
})
plan_copy_files <- function(root, id, files, call = NULL) {
assert_relative_path(files$there, name = "File", workdir = id, call = call)
validate_packet_has_file(root, id, files$there, call)

there <- replace_ragged(there, is_dir, lapply(expanded, unname))
here <- replace_ragged(here, is_dir, lapply(expanded, names))
meta <- outpack_metadata_core(id, root)
is_dir <- function(p) grepl("/$", p)
list_files <- function(p) {
j <- string_starts_with(p, meta$files$path)
string_drop_prefix(p, meta$files$path[j])
}
data_frame(there, here)
expand_dirs_virtual(files, is_dir, list_files)
}


Expand Down
57 changes: 45 additions & 12 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,20 +134,53 @@ vcapply <- function(X, FUN, ...) { # nolint
}


## TODO: also replace copy_shared with this
expand_dirs <- function(paths, workdir) {
if (length(paths) == 0) {
return(character())
#' Expand directories into their content lists.
#'
#' This function does not access the filesystem directly and instead calls the
#' given `is_dir` and `list_files` callback. This allows using the function with
#' files that do not exist on disk yet, such as those listed in a packet's
#' metadata.
#'
#' @param files either a character vector or a dataframe with columns `there`
#' and `here`.
#' @param is_dir a function from a character vector to a logical vector,
#' indicating whether each path is a directory needing expansion or not.
#' @param list_files a function from a character scalar to a character vector,
#' enumerating the contents of the directory. The return values must *not*
#' include the directory path as a prefix.
#' @return a modified version of `files`, where directories have been replaced
#' by their contents. If `files` was a data_frame, both the `there` and `here`
#' columns are modified.
#' @noRd
expand_dirs_virtual <- function(files, is_dir, list_files) {
if (is.character(files)) {
dirs <- is_dir(files)
expanded <- lapply(files[dirs], list_files)
replace_ragged(files, dirs, Map(fs::path, files[dirs], expanded))
} else {
dirs <- is_dir(files$there)
expanded <- lapply(files$there[dirs], list_files)

there <- replace_ragged(files$there, dirs,
Map(fs::path, files$there[dirs], expanded))
here <- replace_ragged(files$here, dirs,
Map(fs::path, files$here[dirs], expanded))

data_frame(here, there)
}
withr::local_dir(workdir)
i <- is_directory(paths)
if (any(i)) {
contents <- lapply(paths[i], function(p) {
as.character(fs::dir_ls(p, all = TRUE, type = "file", recurse = TRUE))
})
paths <- replace_ragged(paths, i, contents)
}


expand_dirs <- function(files, workdir) {
assert_scalar_character(workdir)

is_dir <- function(p) is_directory(fs::path(workdir, p))
list_files <- function(p) {
full_path <- fs::path(workdir, p)
files <- fs::dir_ls(full_path, all = TRUE, type = "file", recurse = TRUE)
fs::path_rel(files, full_path)
}
paths
expand_dirs_virtual(files, is_dir, list_files)
}


Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-outpack-packet.R
Original file line number Diff line number Diff line change
Expand Up @@ -583,15 +583,15 @@ test_that("can depend based on a simple query", {
p$depends[[1]],
list(packet = id$b[[3]],
query = "latest()",
files = data.frame(there = "data.rds", here = "1.rds")))
files = data.frame(here = "1.rds", there = "data.rds")))

query <- orderly_query("latest(parameter:i < 3)", name = "a")
outpack_packet_use_dependency(p, query, c("2.rds" = "data.rds"))
expect_mapequal(
p$depends[[2]],
list(packet = id$a[[2]],
query = 'latest(parameter:i < 3 && name == "a")',
files = data.frame(there = "data.rds", here = "2.rds")))
files = data.frame(here = "2.rds", there = "data.rds")))
})


Expand Down Expand Up @@ -790,8 +790,8 @@ test_that("can pull in directories", {
p2 <- outpack_packet_start_quietly(path_src2, "b", root = root)
outpack_packet_use_dependency(p2, 'latest(name == "a")', c(d = "data/"))
expect_equal(p2$depends[[1]]$files,
data_frame(there = file.path("data", letters[1:6]),
here = file.path("d", letters[1:6])))
data_frame(here = file.path("d", letters[1:6]),
there = file.path("data", letters[1:6])))
})


Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-run.R
Original file line number Diff line number Diff line change
Expand Up @@ -1347,7 +1347,7 @@ test_that("can read about dependencies", {
r <- readRDS(file.path(path, "archive", "depends", id2, "depends.rds"))
expect_equal(r$id, id1)
expect_equal(r$name, "data")
expect_equal(r$files, data_frame(there = "data.rds", here = "input.rds"))
expect_equal(r$files, data_frame(here = "input.rds", there = "data.rds"))

res <- withr::with_dir(
path_src,
Expand Down
98 changes: 98 additions & 0 deletions tests/testthat/test-util.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,104 @@ test_that("read_string strips newlines", {
expect_equal(result, "12345678")
})

describe("expand_dirs_virtual", {
files <- list(
"d1" = c("f2", "f3"),
"d2" = c("f4", "d3/f5"),
"d2/d3" = c("f5"))
is_dir <- function(p) p %in% c("d1", "d2", "d2/d3")
list_files <- function(p) files[[p]]

check <- function(object, expected) {
expect_equal(expand_dirs_virtual(object, is_dir, list_files), expected)
}

it("accepts a character vector", {
check("f1", "f1")
check("d1", c("d1/f2", "d1/f3"))
check(c("f1", "d1"), c("f1", "d1/f2", "d1/f3"))
check("d2", c("d2/f4", "d2/d3/f5"))
})

it("accepts a dataframe", {
check(
data_frame(here = "g1", there = "f1"),
data_frame(here = "g1", there = "f1"))

check(
data_frame(here = "dest", there = "d1"),
data_frame(here = c("dest/f2", "dest/f3"),
there = c("d1/f2", "d1/f3")))

check(
data_frame(here = c("g1", "dest"), there = c("f1", "d1")),
data_frame(here = c("g1", "dest/f2", "dest/f3"),
there = c("f1", "d1/f2", "d1/f3")))

check(
data_frame(here = "dest", there = "d2"),
data_frame(here = c("dest/f4", "dest/d3/f5"),
there = c("d2/f4", "d2/d3/f5")))

check(
data_frame(here = c("foo", "bar"), there = c("d2", "d2")),
data_frame(here = c("foo/f4", "foo/d3/f5", "bar/f4", "bar/d3/f5"),
there = c("d2/f4", "d2/d3/f5", "d2/f4", "d2/d3/f5")))
})
})

describe("expand_dirs", {
p <- withr::local_tempdir()
files <- c("f1", "d1/f2", "d1/f3", "d2/f4", "d2/d3/f5")
fs::dir_create(fs::path(p, unique(dirname(files))))
fs::file_create(fs::path(p, files))

it("accepts a character vector", {
check <- function(object, expected) {
expect_setequal(expand_dirs(object, p), expected)
}

check("f1", "f1")
check("d1", c("d1/f2", "d1/f3"))
check(c("f1", "d1"), c("f1", "d1/f2", "d1/f3"))
check("d2", c("d2/f4", "d2/d3/f5"))
})

it("accepts a dataframe", {
check <- function(object, expected) {
result <- expand_dirs(object, p)
# This compares the dataframes ignoring the order, which is not
# deterministic
expect_setequal(unname(split(result, seq_len(nrow(result)))),
unname(split(expected, seq_len(nrow(expected)))))
}

check(
data_frame(here = "g1", there = "f1"),
data_frame(here = "g1", there = "f1"))

check(
data_frame(here = "dest", there = "d1"),
data_frame(here = c("dest/f2", "dest/f3"), there = c("d1/f2", "d1/f3")))

check(
data_frame(here = c("g1", "dest"), there = c("f1", "d1")),
data_frame(here = c("g1", "dest/f2", "dest/f3"),
there = c("f1", "d1/f2", "d1/f3")))

check(
data_frame(here = "dest", there = "d2"),
data_frame(here = c("dest/f4", "dest/d3/f5"),
there = c("d2/f4", "d2/d3/f5")))

check(
data_frame(here = c("foo", "bar"), there = c("d2", "d2")),
data_frame(here = c("foo/f4", "foo/d3/f5", "bar/f4", "bar/d3/f5"),
there = c("d2/f4", "d2/d3/f5", "d2/f4", "d2/d3/f5")))
})
})


test_that("fill_missing_names works", {
expect_equal(fill_missing_names(NULL), NULL)
expect_equal(fill_missing_names(c("a", "b")), c(a = "a", b = "b"))
Expand Down

0 comments on commit f19865c

Please sign in to comment.