Add write(), data_types() and comment to "read_write" options

SciViews · Aug 25, 2018 · 0b0743a · 0b0743a
1 parent 7ce085c
commit 0b0743a
Show file tree

Hide file tree

Showing 14 changed files with 387 additions and 70 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,16 @@
 Package: data.io
 Type: Package
-Version: 1.0.1
+Version: 1.1.0
 Title: Data Input/Output, Read or Write Data from Files or Datasets in R Packages in Different Formats
-Description: Read or write data from many different formats (tabular datasets, images,
-  ...) into R objects. Add labels and units in different languages.
+Description: Read or write data from many different formats (tabular datasets,
+  from statistic software, ...) into R objects. Add labels and units in
+  different languages.
 Authors@R: c(person("Philippe", "Grosjean", role = c("aut", "cre"),
   email = "[email protected]"))
 Maintainer: Philippe Grosjean <[email protected]>
 Depends: R (>= 3.3.0)
 Imports: tibble, tsibble, Hmisc, utils, readr, rlang, datasets, ggplot2, nycflights13
-Suggests: SciViews, readxl, haven, WriteXLS, openxlsx, covr, knitr, testthat
+Suggests: SciViews, readxl, haven, WriteXLS, writexl, covr, knitr, testthat
 License: GPL-2
 Encoding: UTF-8
 LazyData: true

diff --git a/NAMESPACE b/NAMESPACE
@@ -16,6 +16,7 @@ export(as.dataframe)
 export(as_dataframe)
 export(cl)
 export(data_example)
+export(data_types)
 export(hread_text)
 export(hread_xls)
 export(hread_xlsx)
@@ -30,6 +31,7 @@ export(relative_path)
 export(type_from_extension)
 export(unlabelise)
 export(unlabelize)
+export(write)
 importFrom(Hmisc,"label<-")
 importFrom(Hmisc,label)
 importFrom(readr,default_locale)

diff --git a/NEWS.md b/NEWS.md
@@ -1,8 +1,18 @@
 # data.io News
 
+## Changes in data.io 1.1.0
+
+- A basic version of write() is now available.
+
+- data_types() function added to easily get information about data types that
+  can be read() or write()
+
+- Description added into "read_write" options.
+
+
 ## Changes in data.io 1.0.1
 
-- bug corrected: forgot to change 'data' -> 'data.io' in 'read_write' options.
+- Bug corrected: forgot to change 'data' -> 'data.io' in 'read_write' options.
 
 
 ## Changes in data.io 1.0.0

diff --git a/R/data_types.R b/R/data_types.R
@@ -0,0 +1,56 @@
+#' List recognized file formats (types) for read() and write()
+#'
+#' @param types_only If `TRUE`, only a vector of types is returned, otherwise,
+#' a `tibble` with fll specifications is provided.
+#' @param view If `TRUE`, the result is "viewed" (displayed in a table in a
+#' separate window, if the user interface allows it, e.g., in RStudio) and
+#' returned invisibly. Otherwise, the results are returned normally.
+#'
+#' @description Dispaly information about data types that can read() and write()
+#' can use, as well as, the original functions that are delegated (see they
+#' respective help pages for more info and to know which additional parameters
+#' can be used in read() and write()).
+#'
+#' @return An `tibble` with `types_only = FALSE`, or a character vector.
+#' @details The function is mainly designed to be used interactively and to
+#' provide information about file types that can be read() or write(). This
+#' cannot be done through a man page because this list is dynamic and other
+#' packages could add or change entries there. With `view = FALSE`, the function
+#' can, nevertheless, be also used in a script or a R Markdown/Notebook
+#' document.
+#' @author Philippe Grosjean <[email protected]>
+#' @export
+#' @seealso [read()], [write()]
+#' @keywords utilities
+#' @concept list file types that can be read or write
+#' @examples
+#' \dontrun{
+#' data_types()
+#' data_types(TRUE)
+#' }
+#' # For non-interactive use, specify view = FALSE
+#' data_types(view = FALSE)
+#' data_types(TRUE, view = FALSE)
+data_types <- function(types_only = FALSE, view = TRUE) {
+  `data types` <- getOption("read_write")
+  # If not installed yet, do it now!
+  if (is.null(`data types`))
+    `data types` <- read_write_option()
+
+  if (isTRUE(types_only))
+    `data types` <- `data types`$type
+
+  if (isTRUE(view)) {
+    # We don't necessarily want to use utils::View(). For instance, RStudio
+    # defines another version of that function, and we ant to use it instead!
+    view <- get0("View", mode = "function")
+    if (is.null(view)) {
+      warning("'View' function not found, return the data instead")
+      `data types`
+    } else {
+      view(`data types`)
+      invisible(`data types`)
+    }
+  } else `data types`
+}
+
diff --git a/R/internal.R b/R/internal.R
@@ -1,3 +1,23 @@
 .onLoad <- function(libname, pkgname) {
   read_write_option()
 }
+
+.get_function <- function(fun) {
+  # In case we have ns::fun
+  fun <- strsplit(fun, "::", fixed = TRUE)[[1L]]
+  if (length(fun) == 2L) {
+    res <- try(getExportedValue(fun[1L], fun[2L]), silent = TRUE)
+    if (inherits(res, "try-error"))
+      stop("You need function '", fun[2L], "' from package '", fun[1L],
+        "' to read these data. Please, install the package first",
+        " and make sure the function is available there.")
+  } else {
+    if (is.na(fun[1L]))
+      return(NA)
+    res <- get0(fun[1L], envir = parent.frame(), mode = "function",
+      inherits = TRUE)
+    if (is.null(res))
+      stop("function '", fun[1], "' not found")
+  }
+  res
+}
diff --git a/R/read.R b/R/read.R
@@ -53,7 +53,7 @@
 #' accross several \R packages. See `getOption("read_write")`.
 #' @author Philippe Grosjean <[email protected]>
 #' @export
-#' @seealso [read_csv()]
+#' @seealso [data_types()], [write()], [read_csv()]
 #' @keywords utilities
 #' @concept read and import data
 #' @examples
@@ -303,32 +303,12 @@ sidecar_file = TRUE, fun_list = NULL, hfun = NULL, fun = NULL, ...) {
         } else fun_item <- fun_list[fun_list$type == type, ]
       }
 
-      get_function <- function(fun) {
-        # In case we have ns::fun
-        fun <- strsplit(fun, "::", fixed = TRUE)[[1L]]
-        if (length(fun) == 2L) {
-          res <- try(getExportedValue(fun[1L], fun[2L]), silent = TRUE)
-          if (inherits(res, "try-error"))
-            stop("You need function '", fun[2L], "' from package '", fun[1L],
-              "' to read these data. Please, install the package first",
-              " and make sure the function is available there.")
-        } else {
-          if (is.na(fun[1L]))
-            return(NA)
-          res <- get0(fun[1L], envir = parent.frame(), mode = "function",
-            inherits = TRUE)
-          if (is.null(res))
-            stop("function '", fun[1], "' not found")
-        }
-        res
-      }
-
       # If header is not NULL and a hread_xxx() function is available,
       # read as many lines as there are starting with this string
       # and decrypt header data/metadata
       attribs <- NULL
       if (is.null(hfun))
-          hfun <- get_function(fun_item$read_header)
+          hfun <- .get_function(fun_item$read_header)
       if (is.function(hfun) && !is.null(header) && header != "") {
         dat <- hfun(file = file, header.max = header.max, skip = skip,
           locale = locale)
@@ -398,7 +378,7 @@ sidecar_file = TRUE, fun_list = NULL, hfun = NULL, fun = NULL, ...) {
 
       # Do we have a function to read these data?
       if (is.null(fun))
-        fun <- get_function(fun_item$read_fun)
+        fun <- .get_function(fun_item$read_fun)
 
       # Read the data
       skip_all <- skip + n_header

diff --git a/R/read_write_option.R b/R/read_write_option.R
@@ -24,87 +24,91 @@
 #' (read_write_option())
 #' # To add a new type:
 #' tail(read_write_option(data.frame(type = "png", read_fun = "png::readPNG",
-#'   read_header = NA, write_fun = "png::writePNG")))
+#'   read_header = NA, write_fun = "png::writePNG", comment = "PNG image")))
 read_write_option <- function(new_type) {
   opts <- getOption("read_write", default = tibble::tribble(
     ~type,     ~read_fun,              ~read_header,
-    ~write_fun,
+    ~write_fun,               ~comment,
     "csv",     "readr::read_csv",      "data.io::hread_text",
-    "readr::write_csv",
+    "readr::write_csv",       "comma separated values",
     "csv2",    "readr::read_csv2",     "data.io::hread_text",
-    NA,
+    NA,                       "semicolon separated values",
     "xlcsv",   "readr::read_csv",      "data.io::hread_text",
-    "readr::write_excel_csv",
+    "readr::write_excel_csv",  "write a CSV file more easily readable by Excel",
     "tsv",     "readr::read_tsv",      "data.io::hread_text",
-    "readr::write_tsv",
+    "readr::write_tsv",        "tab separated values",
     "fwf",     "readr::read_fwf",      "data.io::hread_text",
-    NA, # TODO: a writer here!
+    NA,                         "fixed width file", # TODO: a writer here!
     "log",     "readr::read_log",      NA,
-    NA, # TODO: a writer here!
+    NA,                          "standard log file", # TODO: a writer here!
     "rds",     "readr::read_rds",      NA,
-    "readr::write_rds",
+    "readr::write_rds",          "R data file (no compression by default)",
     "txt",     "readr::read_file",     NA,
-    "readr::write_file",
+    "readr::write_file",         "text file (as length 1 character vector)",
     "raw",     "readr::read_file_raw", NA,
-    NA, # TODO: a writer here!
+    NA,                           "binary file (read as raw vector)",
+                                                     # TODO: a writer here!
     "ssv",     "readr::read_table",    "data.io::hread_text",
-    NA,#Space separated values
+    NA,                           "space separated values (strict)",
     "ssv2",    "readr::read_table2",   "data.io::hread_text",
-    NA,
+    NA,                           "space separated values (relaxed)",
     "csv.gz",  "readr::read_csv",      "data.io::hread_text",
-    "readr::write_csv",
+    "readr::write_csv",            "gz compressed comma separated values",
     "csv2.gz", "readr::read_csv2",     "data.io::hread_text",
-    NA,
+    NA,                            "gz compressed semicolon separated values",
     "tsv.gz",  "readr::read_tsv",      "data.io::hread_text",
-    "readr::write_tsv",
+    "readr::write_tsv",             "gz compressed tab separated values",
     "txt.gz",  "readr::read_file",     NA,
-    "readr::write_file",
+    "readr::write_file",            "gz compressed text file",
     "csv.bz2", "readr::read_csv",      "data.io::hread_text",
-    "readr::write_csv",
+    "readr::write_csv",             "bz2 compressed comma separated values",
     "csv2.bz2","readr::read_csv2",     "data.io::hread_text",
-    NA,
+    NA,                             "bz2 compressed semicolon separated values",
     "tsv.bz2", "readr::read_tsv",      "data.io::hread_text",
-    "readr::write_tsv",
+    "readr::write_tsv",             "bz2 compressed tab separated values",
     "txt.bz2", "readr::read_file",     "data.io::hread_text",
-    "readr::write_file",
+    "readr::write_file",            "bz2 compressed text file",
     "csv.xz",  "readr::read_csv",      "data.io::hread_text",
-    "readr::write_csv",
+    "readr::write_csv",             "xz compressed comma separated values",
     "csv2.xz", "readr::read_csv2",     "data.io::hread_text",
-    NA,
+    NA,                             "xz compressed semicolon separated values",
     "tsv.xz",  "readr::read_tsv",      "data.io::hread_text",
-    "readr::write_tsv",
+    "readr::write_tsv",             "xz compressed tab separated values",
     "txt.xz",  "readr::read_file",     NA,
-    "readr::write_file",
+    "readr::write_file",            "xz compressed text file",
     # Buggy right now!! "csvy",    "csvy::read_csvy",    NA, "csvy::write_csvy",
+    # "comma separated value with YAML header",
     "xls",     "readxl::read_excel",   "data.io::hread_xls",
-    "WriteXLS::WriteXLS",
+    "WriteXLS::WriteXLS",            "Excel old .xls format",
     "xlsx",    "readxl::read_excel",   "data.io::hread_xlsx",
-    "openxlsx::write.xlsx",
+    "writexl::write_xlsx",    "Excel new .xlsx format", #"openxlsx::write.xlsx",
     "dta",     "haven::read_dta",      NA,
-    "haven::write_dta",
+    "haven::write_dta",              "Stata DTA format",
     # read_dta() = read_stata()
     "sas",     "haven::read_sas",      NA,
-    "haven::write_sas",
+    "haven::write_sas",               "SAS format",
     "sas7bdat","haven::read_sas",      NA,
-    "haven::write_sas",
+    "haven::write_sas",                "SAS format (sas7bdat)",
     "sav",     "haven::read_sav",      NA,
-    "haven::write_sav",
+    "haven::write_sav",                "SPSS .sav format",
+    "zsav",    "haven::read_sav",      NA,
+    "haven::write_sav",                "SPSS .zsav format",
     "por",     "haven::read_por",      NA,
-    NA,
+    NA,                                "SPSS .por format",
     # read_por()/read_sav() = read_spss()
     "xpt",     "haven::read_xpt",      NA,
-    "haven::write_xpt"   #,
+    "haven::write_xpt",                "SPSS transport format (FDA compliant)"#,
     #"feather", "feather::read_feather",NA,
-    #"feather::write_feather"
+    #"feather::write_feather",        "transportable feather format"
   ))
 
   if (!missing(new_type)) {
     # Check it is in a correct format
     if (!is.data.frame(new_type))
       stop("new_type must be a data.frame")
-    if (ncol(new_type) != 4)
-      stop("new_type must contain 4 columns",
-        " (type, read_fun, read_header & write_fun")
+    if (ncol(new_type) != 5)
+      stop("new_type must contain 5 columns",
+        " (type, read_fun, read_header, write_fun & comment")
     names(new_type) <- names(opts)
     opts <- rbind(opts, new_type)
   }