diff --git a/DESCRIPTION b/DESCRIPTION index 88a4bfd4..78256407 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 Remotes: - ropensci/jsonvalidate + ropensci/jsonvalidate, + r-lib/gert VignetteBuilder: knitr Language: en-GB diff --git a/NAMESPACE b/NAMESPACE index 9d08d1d8..cb3dd3cf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ S3method(format,outpack_query) export(orderly_artefact) +export(orderly_cleanup) +export(orderly_cleanup_status) export(orderly_dependency) export(orderly_description) export(orderly_global_resource) diff --git a/R/cleanup.R b/R/cleanup.R new file mode 100644 index 00000000..a661827f --- /dev/null +++ b/R/cleanup.R @@ -0,0 +1,119 @@ +##' Find, and delete, file that were generated by running a report. +##' Until you're comfortable with what this will do, you are strongly +##' recommended to run `orderly_cleanup_status` first to see what will +##' be deleted. +##' +##' After file deletion, we look through and remove all empty +##' directories; orderly2 has similar semantics here to git where +##' directories are never directly tracked. +##' +##' For recent `gert` (not yet on CRAN) we will ask git if files are +##' ignored; if ignored then they are good candidates for deletion! We +##' encourage you to keep a per-report `.gitignore` that lists files +##' that will copy into the source directory, and then we can use that +##' same information to clean up these files after generation. +##' Importantly, even if a file matches an ignore rule but has been +##' committed to your repository, it will no longer match the ignore +##' rule. +##' +##' @section Notes for user of orderly1: +##' +##' In orderly1 this function has quite different semantics, because +##' the full set of possible files is always knowable from the yaml +##' file. So there, we start from the point of view of the list of +##' files then compare that with the directory. +##' +##' @title Clean up source directory +##' +##' @param name Name of the report directory to clean (i.e., we look +##' at `src/` relative to your orderly root +##' +##' @inheritParams orderly_run +##' +##' @return An (currently unstable) object of class +##' `orderly_cleanup_status` within which the element `delete` +##' indicates files that would be deleted (for +##' `orderly_cleanup_status`) or that were deleted (for +##' `orderly_cleanup`) +##' +##' @export +orderly_cleanup <- function(name = NULL, root = NULL, locate = TRUE) { + status <- orderly_cleanup_status(name, root, locate) + if (length(status$delete) > 0) { + withr::with_dir(status$path, fs::file_delete(status$delete)) + } + delete_empty_directories(status$path) + invisible(status) +} + + +##' @export +##' @rdname orderly_cleanup +orderly_cleanup_status <- function(name = NULL, root = NULL, locate = TRUE) { + p <- get_active_packet() + is_active <- !is.null(p) + if (is_active) { + cli::cli_abort( + "Don't call 'orderly2::orderly_cleanup_status()' from a running packet", + i = "The orderly_cleanup* functions are for interactive use only") + } + + if (is.null(name)) { + path <- getwd() + root <- detect_orderly_interactive_path(path)$path + name <- basename(path) + } else { + root <- orderly_root(root, locate) + validate_orderly_directory(name, root, environment()) + path <- file.path(root$path, "src", name) + root <- root$path + } + + info <- orderly_read(path) + files <- withr::with_dir( + path, + dir(all.files = TRUE, recursive = TRUE, no.. = TRUE)) + + ## Slightly tricky construction here as we need to match all files + ## that are present as directory entries; this is explicit only for + ## dependencies, but we need to work it out ourselves for the rest. + matches_path <- function(x, path, add_slash = TRUE) { + path_dir <- if (add_slash) with_trailing_slash(path) else path + x %in% path | + row_any(vapply(path_dir, function(p) string_starts_with(p, x), + logical(length(x)))) + } + nms_resource <- info$resources + nms_artefact <- unlist(lapply(info$artefacts, "[[", "files")) + nms_dependency <- unlist(lapply(info$dependency, function(x) names(x$use))) + nms_global_resource <- names(info$global_resource) + + role <- cbind(orderly = files == "orderly.R", + resource = matches_path(files, nms_resource), + global_resource = matches_path(files, nms_global_resource), + dependency = matches_path(files, nms_dependency, FALSE), + artefact = matches_path(files, nms_artefact)) + rownames(role) <- files + + v_source <- c("orderly", "resource") + v_derived <- c("global_resource", "dependency", "artefact") + + is_source <- row_any(role[, v_source, drop = FALSE]) + is_derived <- !is_source & row_any(role[, v_derived, drop = FALSE]) + is_ignored <- path_is_git_ignored(file.path("src", name, files), path) + status <- cbind(source = is_source, + derived = is_derived, + ignored = is_ignored) + rownames(status) <- files + + to_delete <- (is_derived | (!is.na(is_ignored) & is_ignored)) & !is_source + delete <- files[to_delete] + + structure(list(name = name, + root = root, + path = path, + role = role, + status = status, + delete = delete), + class = "orderly_cleanup_status") +} diff --git a/R/interactive.R b/R/interactive.R index 4c76c80f..8bf57561 100644 --- a/R/interactive.R +++ b/R/interactive.R @@ -34,12 +34,8 @@ detect_orderly_interactive_path <- function(path = getwd()) { ##' with [orderly2::orderly_dependency]; see of Details section of ##' [orderly2::orderly_run]. ##' -##' @inheritParams orderly_run -##' ##' @return Nothing, called for its side effects ##' @export -orderly_interactive_set_search_options <- function(options = NULL, - root = NULL, - locate = TRUE) { +orderly_interactive_set_search_options <- function(options = NULL) { .interactive$search_options <- options } diff --git a/R/metadata.R b/R/metadata.R index 95fae63b..99126b84 100644 --- a/R/metadata.R +++ b/R/metadata.R @@ -283,10 +283,7 @@ static_orderly_dependency <- function(args) { use <- args$use name <- static_string(name) - - ## TODO: this is no longer ok, it might not easily be computable - ## really; see mrc-4398 - use <- static_character_vector(use) + use <- static_character_vector(use, TRUE) ## TODO: allow passing expressions directly in, that will be much ## nicer, but possibly needs some care as we do want a consistent ## approach to NSE here @@ -378,7 +375,7 @@ copy_global <- function(path_root, path_dest, config, files) { static_orderly_global_resource <- function(args) { - list(files = static_character_vector(args)) + unlist(lapply(args, static_character_vector, TRUE), FALSE, TRUE) } @@ -393,12 +390,12 @@ static_string <- function(x) { } -static_character_vector <- function(x) { +static_character_vector <- function(x, named) { if (is.character(x)) { x } else if (is_call(x, "c")) { - x <- lapply(x[-1], static_character_vector) - x <- if (all(vlapply(x, is.character))) unlist(x, FALSE, FALSE) else NULL + x <- lapply(x[-1], static_character_vector, named) + x <- if (all(vlapply(x, is.character))) unlist(x, FALSE, named) else NULL } else { x <- NULL } diff --git a/R/outpack_misc.R b/R/outpack_misc.R index 4d064e83..03a6ce19 100644 --- a/R/outpack_misc.R +++ b/R/outpack_misc.R @@ -47,7 +47,7 @@ not_found_error <- function(message, data) { ## Also note that there might be 0, 1, or more urls depending on the ## way that the repo is configured; this feels ok really. git_info <- function(path) { - repo <- tryCatch(gert::git_open(path), error = function(e) NULL) + repo <- git_open(path) if (is.null(repo)) { return(NULL) } diff --git a/R/outpack_root.R b/R/outpack_root.R index 957b1b17..d06b837a 100644 --- a/R/outpack_root.R +++ b/R/outpack_root.R @@ -422,7 +422,7 @@ validate_packet_has_file <- function(root, id, path) { ## Then, look to see if any of the missing ones are actually directories: msg <- path[!found] - found_if_dir <- vlapply(sub("(? 0) { ret$dependency <- drop_null(dat$dependency, empty = NULL) } + if (length(dat$global_resource) > 0) { + ret$global_resource <- unlist(dat$global_resource, FALSE, TRUE) + } ret } diff --git a/R/util.R b/R/util.R index dae25a66..90f7b17f 100644 --- a/R/util.R +++ b/R/util.R @@ -480,3 +480,50 @@ string_interpolate_simple1 <- function(x, environment, call) { quote_braces <- function(x) { gsub("{", "{{", x, fixed = TRUE) } + + +## Currently supported in gert 1.9000 (which is the 2.0 +## prerelease). Rather than depend on that version directly, requiring +## people to install it from source or r-universe, just sniff for the +## function we need, for now: +gert_git_ignore_path_is_ignored <- function() { + tryCatch(getExportedValue("gert", "git_ignore_path_is_ignored"), + error = function(e) NULL) +} + + +git_open <- function(path) { + tryCatch(gert::git_open(path), error = function(e) NULL) +} + + +path_is_git_ignored <- function(path, root) { + repo <- git_open(root) + gert_fn <- gert_git_ignore_path_is_ignored() + if (is.null(repo) || is.null(gert_fn)) { + rep_len(NA, length(path)) + } else { + gert_fn(path, repo) + } +} + + +row_any <- function(x) { + apply(x, 1, any) +} + + +delete_empty_directories <- function(path) { + paths <- fs::dir_ls(path, type = "directory", recurse = TRUE) + paths <- setdiff(paths[order(nchar(paths), decreasing = TRUE)], path) + for (p in paths) { + if (length(fs::dir_ls(p, all = TRUE)) == 0) { + fs::dir_delete(p) + } + } +} + + +with_trailing_slash <- function(x) { + sub("(?} relative to your orderly root} + +\item{root}{The path to an orderly root directory, or \code{NULL} +(the default) to search for one from the current working +directory if \code{locate} is \code{TRUE}.} + +\item{locate}{Logical, indicating if the configuration should be +searched for. If \code{TRUE} and \code{config} is not given, +then orderly looks in the working directory and up through its +parents until it finds an \code{.outpack} directory} +} +\value{ +An (currently unstable) object of class +\code{orderly_cleanup_status} within which the element \code{delete} +indicates files that would be deleted (for +\code{orderly_cleanup_status}) or that were deleted (for +\code{orderly_cleanup}) +} +\description{ +Find, and delete, file that were generated by running a report. +Until you're comfortable with what this will do, you are strongly +recommended to run \code{orderly_cleanup_status} first to see what will +be deleted. +} +\details{ +After file deletion, we look through and remove all empty +directories; orderly2 has similar semantics here to git where +directories are never directly tracked. + +For recent \code{gert} (not yet on CRAN) we will ask git if files are +ignored; if ignored then they are good candidates for deletion! We +encourage you to keep a per-report \code{.gitignore} that lists files +that will copy into the source directory, and then we can use that +same information to clean up these files after generation. +Importantly, even if a file matches an ignore rule but has been +committed to your repository, it will no longer match the ignore +rule. +} +\section{Notes for user of orderly1}{ + + +In orderly1 this function has quite different semantics, because +the full set of possible files is always knowable from the yaml +file. So there, we start from the point of view of the list of +files then compare that with the directory. +} + diff --git a/man/orderly_interactive_set_search_options.Rd b/man/orderly_interactive_set_search_options.Rd index 3dee55ee..5c34de97 100644 --- a/man/orderly_interactive_set_search_options.Rd +++ b/man/orderly_interactive_set_search_options.Rd @@ -4,25 +4,12 @@ \alias{orderly_interactive_set_search_options} \title{Set search options for interactive use} \usage{ -orderly_interactive_set_search_options( - options = NULL, - root = NULL, - locate = TRUE -) +orderly_interactive_set_search_options(options = NULL) } \arguments{ \item{options}{Optional control over locations, when used with \link{orderly_dependency}; see of Details section of \link{orderly_run}.} - -\item{root}{The path to an orderly root directory, or \code{NULL} -(the default) to search for one from the current working -directory if \code{locate} is \code{TRUE}.} - -\item{locate}{Logical, indicating if the configuration should be -searched for. If \code{TRUE} and \code{config} is not given, -then orderly looks in the working directory and up through its -parents until it finds an \code{.outpack} directory} } \value{ Nothing, called for its side effects diff --git a/tests/testthat/helper-orderly.R b/tests/testthat/helper-orderly.R index 8c3e3671..95b091c4 100644 --- a/tests/testthat/helper-orderly.R +++ b/tests/testthat/helper-orderly.R @@ -91,3 +91,9 @@ skip_on_solaris <- function() { is_windows <- function() { tolower(Sys.info()[["sysname"]]) == "windows" } + + +skip_if_older_gert <- function() { + testthat::skip_if(is.null(gert_git_ignore_path_is_ignored()), + "older gert") +} diff --git a/tests/testthat/test-cleanup.R b/tests/testthat/test-cleanup.R new file mode 100644 index 00000000..6c0bddd0 --- /dev/null +++ b/tests/testthat/test-cleanup.R @@ -0,0 +1,160 @@ +test_that("can cleanup explicit things quite well", { + path <- test_prepare_orderly_example("explicit") + env <- new.env() + path_src <- file.path(path, "src", "explicit") + withr::with_dir(path_src, + sys.source("orderly.R", env)) + status <- withr::with_dir(path_src, orderly_cleanup_status()) + + expect_s3_class(status, "orderly_cleanup_status") + expect_setequal(names(status), + c("name", "root", "path", "role", "status", "delete")) + expect_equal(status$name, "explicit") + expect_equal(status$root, orderly_root(path, FALSE)$path) + expect_equal(normalizePath(status$path, mustWork = TRUE), + normalizePath(file.path(status$root, "src", status$name))) + paths <- c("data.csv", "mygraph.png", "orderly.R") + expect_equal(status$role, + cbind(orderly = set_names(c(FALSE, FALSE, TRUE), paths), + resource = c(TRUE, FALSE, FALSE), + global_resource = FALSE, + dependency = FALSE, + artefact = c(FALSE, TRUE, FALSE))) + expect_equal(status$status, + cbind(source = set_names(c(TRUE, FALSE, TRUE), paths), + derived = c(FALSE, TRUE, FALSE), + ignored = NA)) + expect_equal(status$delete, "mygraph.png") + + status <- withr::with_dir(path_src, orderly_cleanup()) + expect_equal(dir(path_src), c("data.csv", "orderly.R")) +}) + + +test_that("can clean up unknown files if gitignored", { + skip_if_older_gert() + path <- test_prepare_orderly_example("explicit") + env <- new.env() + path_src <- file.path(path, "src", "explicit") + helper_add_git(path) + + fs::dir_create(file.path(path_src, c("a", "b/c", "b/d"))) + files <- c("a/x", "b/c/x", "b/c/y") + file.create(file.path(path_src, files)) + + status <- orderly_cleanup_status("explicit", path) + expect_equal( + status$status[c("a/x", "b/c/x", "b/c/y"), ], + cbind(source = set_names(rep(FALSE, 3), files), + derived = FALSE, + ignored = FALSE)) + expect_equal(status$delete, character()) + + writeLines(c("b/"), file.path(path_src, ".gitignore")) + status <- orderly_cleanup_status("explicit", path) + expect_equal( + status$status[c("a/x", "b/c/x", "b/c/y"), ], + cbind(source = set_names(rep(FALSE, 3), files), + derived = FALSE, + ignored = c(FALSE, TRUE, TRUE))) + expect_equal(status$delete, c("b/c/x", "b/c/y")) + + status2 <- orderly_cleanup("explicit", path) + expect_equal(status2, status) + expect_setequal( + dir(path_src, recursive = TRUE, include.dirs = TRUE), + c("a", "a/x", "data.csv", "orderly.R")) +}) + + +test_that("can clean up globals", { + path <- test_prepare_orderly_example("global") + path_src <- file.path(path, "src", "global") + file.create(file.path(path_src, "global_data.csv")) + status <- orderly_cleanup_status("global", path) + + files <- c("global_data.csv", "orderly.R") + expect_setequal(rownames(status$role), files) + expect_equal( + status$role, + cbind(orderly = set_names(c(FALSE, TRUE), files), + resource = FALSE, + global_resource = c(TRUE, FALSE), + dependency = FALSE, + artefact = FALSE)) + expect_equal( + status$status, + cbind(source = set_names(c(FALSE, TRUE), files), + derived = c(TRUE, FALSE), + ignored = NA)) + expect_equal(status$delete, "global_data.csv") +}) + + +test_that("can clean up dependencies", { + path <- test_prepare_orderly_example(c("data", "depends")) + path_src <- file.path(path, "src", "depends") + file.create(file.path(path_src, c("input.rds", "other.rds"))) + status <- orderly_cleanup_status("depends", path) + + files <- c("input.rds", "orderly.R", "other.rds") + expect_setequal(rownames(status$role), files) + expect_equal( + status$role, + cbind(orderly = set_names(c(FALSE, TRUE, FALSE), files), + resource = FALSE, + global_resource = FALSE, + dependency = c(TRUE, FALSE, FALSE), + artefact = FALSE)) + expect_equal( + status$status, + cbind(source = set_names(c(FALSE, TRUE, FALSE), files), + derived = c(TRUE, FALSE, FALSE), + ignored = NA)) + expect_equal(status$delete, "input.rds") +}) + + +test_that("can clean up directories", { + path <- test_prepare_orderly_example("directories") + env <- new.env() + path_src <- file.path(path, "src", "directories") + withr::with_dir(path_src, + sys.source("orderly.R", env)) + status <- orderly_cleanup_status("directories", path) + + files <- c("data/a.csv", "data/b.csv", "orderly.R", + "output/a.rds", "output/b.rds") + expect_equal(rownames(status$role), files) + expect_equal( + status$role, + cbind(orderly = set_names(files == "orderly.R", files), + resource = c(TRUE, TRUE, FALSE, FALSE, FALSE), + global_resource = FALSE, + dependency = FALSE, + artefact = c(FALSE, FALSE, FALSE, TRUE, TRUE))) + expect_equal( + status$status, + cbind(source = set_names(c(TRUE, TRUE, TRUE, FALSE, FALSE), files), + derived = c(FALSE, FALSE, FALSE, TRUE, TRUE), + ignored = NA)) + expect_equal(status$delete, c("output/a.rds", "output/b.rds")) + + status2 <- orderly_cleanup("directories", path) + expect_equal(status2, status) + expect_setequal( + dir(path_src, recursive = TRUE, include.dirs = TRUE), + c("data", "data/a.csv", "data/b.csv", "orderly.R")) +}) + + +test_that("Don't call cleanup on an active packet", { + path <- test_prepare_orderly_example("data") + path_src <- file.path(path, "src", "data") + append_lines(file.path(path_src, "orderly.R"), + "orderly2::orderly_cleanup_status()") + expect_error( + orderly_run("data", root = path), + "Don't call 'orderly2::orderly_cleanup_status()' from a running packet", + fixed = TRUE) +}) diff --git a/tests/testthat/test-read.R b/tests/testthat/test-read.R index 100d8c80..aaef877f 100644 --- a/tests/testthat/test-read.R +++ b/tests/testthat/test-read.R @@ -22,19 +22,24 @@ test_that("Skip over computed resources", { test_that("Can read string vector literals from expressions", { - expect_equal(static_character_vector(quote("x")), "x") - expect_equal(static_character_vector(quote(c("x"))), "x") - expect_equal(static_character_vector(quote(c("x", "y"))), c("x", "y")) - expect_equal(static_character_vector(quote(c("x", c("y", "z")))), + expect_equal(static_character_vector(quote("x"), FALSE), "x") + expect_equal(static_character_vector(quote(c("x")), FALSE), "x") + expect_equal(static_character_vector(quote(c("x", "y")), FALSE), c("x", "y")) + expect_equal(static_character_vector(quote(c("x", c("y", "z"))), FALSE), c("x", "y", "z")) - expect_null(static_character_vector(quote(a))) - expect_null(static_character_vector(quote(c(a)))) - expect_null(static_character_vector(quote(c(a, "x")))) - expect_null(static_character_vector(quote(c(a, b)))) - expect_null(static_character_vector(quote(c("x", c(a, b))))) - expect_null(static_character_vector(quote(c("x", c("y", b))))) - expect_null(static_character_vector(quote(c(a, c("x", "y"))))) + expect_equal(static_character_vector(quote(c(a = "x")), FALSE), + "x") + expect_equal(static_character_vector(quote(c(a = "x")), TRUE), + c(a = "x")) + + expect_null(static_character_vector(quote(a), FALSE)) + expect_null(static_character_vector(quote(c(a)), FALSE)) + expect_null(static_character_vector(quote(c(a, "x")), FALSE)) + expect_null(static_character_vector(quote(c(a, b)), FALSE)) + expect_null(static_character_vector(quote(c("x", c(a, b))), FALSE)) + expect_null(static_character_vector(quote(c("x", c("y", b))), FALSE)) + expect_null(static_character_vector(quote(c(a, c("x", "y"))), FALSE)) }) diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R index 19a1b2b5..094e381f 100644 --- a/tests/testthat/test-util.R +++ b/tests/testthat/test-util.R @@ -259,3 +259,11 @@ test_that("prevent problematic string interpolations", { c(x = msg$message, "i" = "Was interpolating string 'a/${f}'")) }) + + +test_that("fall back if gert does not support ignored files", { + mock_ns <- mockery::mock(identity, stop("not found")) + mockery::stub(gert_git_ignore_path_is_ignored, "getExportedValue", mock_ns) + expect_equal(gert_git_ignore_path_is_ignored(), identity) + expect_null(gert_git_ignore_path_is_ignored()) +})