diff --git a/DESCRIPTION b/DESCRIPTION index 87f26c8..938432a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: data.io Type: Package -Version: 1.1.0 +Version: 1.2.0 Title: Data Input/Output, Read or Write Data from Files or Datasets in R Packages in Different Formats Description: Read or write data from many different formats (tabular datasets, from statistic software, ...) into R objects. Add labels and units in diff --git a/NEWS.md b/NEWS.md index 67bdb7f..7be4eef 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,14 @@ # data.io News +## Changes in data.io 1.2.0 + +- It is now possible to specify the default language to use for read() with + the option `data.io-lang`. + +- `lang` and `lang_encoding` are now recoarde as attributes of the comment of + the imported object. + + ## Changes in data.io 1.1.0 - A basic version of write() is now available. diff --git a/R/read.R b/R/read.R index c1608a6..9e74c25 100644 --- a/R/read.R +++ b/R/read.R @@ -12,8 +12,10 @@ #' defaults as US English + UTF-8 encoding, and it is advised to be used as #' much as possible. #' @param lang The language to use (mainly for comment, label and units), but -#' also for factor levels or other chanracter strings if a translation exists +#' also for factor levels or other character strings if a translation exists #' and if the language is spelled with uppercase characters (e.g., `"FR"`). +#' The default value can be set with, e.g., `options(data.io_lang = "fr")` for +#' French. #' @param lang_encoding Encoding used by R scripts for translation. They should #' all be encoded as `UTF-8`, which is the default. However, this argument #' allows to specify a different encoding if needed. @@ -189,9 +191,10 @@ #' (afalfa <- read(data_example("afalfa.xpt"))) # SAS transport file #' } read <- structure(function(file, type = NULL, header = "#", header.max = 50L, -skip = 0L, locale = default_locale(), lang = "en", lang_encoding = "UTF-8", -as_dataframe = TRUE, as_labelled = FALSE, comments = NULL, package = NULL, -sidecar_file = TRUE, fun_list = NULL, hfun = NULL, fun = NULL, ...) { +skip = 0L, locale = default_locale(), lang = getOption("data.io_lang", "en"), +lang_encoding = "UTF-8", as_dataframe = TRUE, as_labelled = FALSE, +comments = NULL, package = NULL, sidecar_file = TRUE, fun_list = NULL, +hfun = NULL, fun = NULL, ...) { if (!is.null(lang)) { if (length(lang) != 1 || !is.character(lang)) stop("lang must be a single character string or NULL") @@ -436,18 +439,20 @@ sidecar_file = TRUE, fun_list = NULL, hfun = NULL, fun = NULL, ...) { } } - # Record the comments and origin of the data + # Record the comments, lang, lang_encoding and origin of the data cmt <- comment(res) cmt[] <- c(cmt, comments) if (is.null(cmt)) cmt2 <- "" else cmt2 <- cmt + attr(cmt2, "lang") <- lang + attr(cmt2, "lang_encoding") <- lang_encoding if (!is.null(srcfile)) { attr(cmt2, "srcfile") <- srcfile comment(res) <- cmt2 } else if (!is.null(src)) { attr(cmt2, "src") <- src comment(res) <- cmt2 - } else if (!is.null(cmt)) { - comment(res) <- cmt + } else { + comment(res) <- cmt2 } if (isTRUE(as_dataframe)) {# Try to convert the object into a dataframe diff --git a/man/read.Rd b/man/read.Rd index 5a39b5a..79e6a8c 100644 --- a/man/read.Rd +++ b/man/read.Rd @@ -10,10 +10,10 @@ \title{Read data in \R in different formats} \usage{ read(file, type = NULL, header = "#", header.max = 50L, skip = 0L, - locale = default_locale(), lang = "en", lang_encoding = "UTF-8", - as_dataframe = TRUE, as_labelled = FALSE, comments = NULL, - package = NULL, sidecar_file = TRUE, fun_list = NULL, hfun = NULL, - fun = NULL, ...) + locale = default_locale(), lang = getOption("data.io_lang", "en"), + lang_encoding = "UTF-8", as_dataframe = TRUE, as_labelled = FALSE, + comments = NULL, package = NULL, sidecar_file = TRUE, fun_list = NULL, + hfun = NULL, fun = NULL, ...) type_from_extension(file) @@ -44,8 +44,10 @@ defaults as US English + UTF-8 encoding, and it is advised to be used as much as possible.} \item{lang}{The language to use (mainly for comment, label and units), but -also for factor levels or other chanracter strings if a translation exists -and if the language is spelled with uppercase characters (e.g., \code{"FR"}).} +also for factor levels or other character strings if a translation exists +and if the language is spelled with uppercase characters (e.g., \code{"FR"}). +The default value can be set with, e.g., \code{options(data.io_lang = "fr")} for +French.} \item{lang_encoding}{Encoding used by R scripts for translation. They should all be encoded as \code{UTF-8}, which is the default. However, this argument