Skip to content

Commit

Permalink
Merge branch 'master' into themis_pipeops
Browse files Browse the repository at this point in the history
  • Loading branch information
advieser authored Sep 24, 2024
2 parents 22f80a2 + f19fb8c commit d3402ef
Show file tree
Hide file tree
Showing 39 changed files with 8,286 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:

- name: Deploy
if: github.event_name != 'pull_request'
uses: JamesIves/[email protected].1
uses: JamesIves/[email protected].4
with:
clean: false
branch: gh-pages
Expand Down
17 changes: 16 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,16 @@ Authors@R:
person(given = "Susanne",
family = "Dandl",
role = "aut",
email = "[email protected]"))
email = "[email protected]"),
person(given = "Keno",
family = "Mersmann",
role = "ctb",
email = "[email protected]"),
person(given = "Maximilian",
family = "Mücke",
role = "ctb",
email = "[email protected]",
comment = c(ORCID = "0009-0000-9432-9795")))
Description: Dataflow programming toolkit that enriches 'mlr3' with a diverse
set of pipelining operators ('PipeOps') that can be composed into graphs.
Operations exist for data preprocessing, model fitting, and ensemble
Expand Down Expand Up @@ -99,6 +108,12 @@ Roxygen: list(markdown = TRUE, r6 = FALSE)
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Collate:
'CnfAtom.R'
'CnfClause.R'
'CnfFormula.R'
'CnfFormula_simplify.R'
'CnfSymbol.R'
'CnfUniverse.R'
'Graph.R'
'GraphLearner.R'
'mlr_pipeops.R'
Expand Down
55 changes: 55 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,40 @@
# Generated by roxygen2: do not edit by hand

S3method("!",CnfAtom)
S3method("!",CnfClause)
S3method("!",CnfFormula)
S3method("$",CnfUniverse)
S3method("%among%",CnfSymbol)
S3method("%among%",default)
S3method("&",CnfAtom)
S3method("&",CnfClause)
S3method("&",CnfFormula)
S3method("[",CnfClause)
S3method("|",CnfAtom)
S3method("|",CnfClause)
S3method("|",CnfFormula)
S3method(all,equal.CnfAtom)
S3method(all,equal.CnfClause)
S3method(all,equal.CnfFormula)
S3method(as.CnfAtom,CnfAtom)
S3method(as.CnfAtom,default)
S3method(as.CnfAtom,logical)
S3method(as.CnfClause,CnfAtom)
S3method(as.CnfClause,CnfClause)
S3method(as.CnfClause,default)
S3method(as.CnfClause,logical)
S3method(as.CnfFormula,CnfAtom)
S3method(as.CnfFormula,CnfClause)
S3method(as.CnfFormula,CnfFormula)
S3method(as.CnfFormula,default)
S3method(as.CnfFormula,logical)
S3method(as.data.table,DictionaryGraph)
S3method(as.data.table,DictionaryPipeOp)
S3method(as.list,CnfClause)
S3method(as.list,CnfFormula)
S3method(as.logical,CnfAtom)
S3method(as.logical,CnfClause)
S3method(as.logical,CnfFormula)
S3method(as_graph,Graph)
S3method(as_graph,default)
S3method(as_graph,list)
Expand All @@ -11,6 +44,11 @@ S3method(as_pipeop,Filter)
S3method(as_pipeop,Learner)
S3method(as_pipeop,PipeOp)
S3method(as_pipeop,default)
S3method(format,CnfAtom)
S3method(format,CnfClause)
S3method(format,CnfFormula)
S3method(format,CnfSymbol)
S3method(format,CnfUniverse)
S3method(marshal_model,Multiplicity)
S3method(marshal_model,graph_learner_model)
S3method(marshal_model,pipeop_impute_learner_state)
Expand All @@ -25,6 +63,11 @@ S3method(pos,"NULL")
S3method(pos,character)
S3method(pos,list)
S3method(predict,Graph)
S3method(print,CnfAtom)
S3method(print,CnfClause)
S3method(print,CnfFormula)
S3method(print,CnfSymbol)
S3method(print,CnfUniverse)
S3method(print,Multiplicity)
S3method(print,Selector)
S3method(set_validate,GraphLearner)
Expand All @@ -35,6 +78,12 @@ S3method(unmarshal_model,pipeop_impute_learner_state_marshaled)
S3method(unmarshal_model,pipeop_learner_cv_state_marshaled)
export("%>>!%")
export("%>>%")
export("%among%")
export(CnfAtom)
export(CnfClause)
export(CnfFormula)
export(CnfSymbol)
export(CnfUniverse)
export(Graph)
export(GraphLearner)
export(LearnerClassifAvg)
Expand Down Expand Up @@ -118,6 +167,9 @@ export(PipeOpUnbranch)
export(PipeOpVtreat)
export(PipeOpYeoJohnson)
export(add_class_hierarchy_cache)
export(as.CnfAtom)
export(as.CnfClause)
export(as.CnfFormula)
export(as.Multiplicity)
export(as.data.table)
export(as_graph)
Expand Down Expand Up @@ -159,6 +211,9 @@ export(selector_none)
export(selector_setdiff)
export(selector_type)
export(selector_union)
if (getRversion() >= "4.3.0") S3method(chooseOpsMethod,CnfAtom)
if (getRversion() >= "4.3.0") S3method(chooseOpsMethod,CnfClause)
if (getRversion() >= "4.3.0") S3method(chooseOpsMethod,CnfFormula)
import(checkmate)
import(data.table)
import(mlr3)
Expand Down
17 changes: 16 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,22 @@

# mlr3pipelines 0.7.0

* New PipeOp: `PipeOpRowApply` / `po("rowapply")`
* New PipeOp `PipeOpRowApply` / `po("rowapply")`
* Empty `PipeOp` IDs now explicitly forbidden.
* Bugfix: `Graph$tran()` / `Graph$predict()` with `single_input = FALSE` now correctly handles `PipeOp`s with multiple inputs.
* `GraphLearner$base_learner()` now works with `PipeOpBranch`, and is generally more robust.
* `GraphLearner` now supports `$importance`, `$selected_features()`, `$oob_error()`, and `$loglik()`.
These are computed from the underlying `Learner`.
* `GraphLearner$impute_selected_features` option added:
`$selected_features()` is reported even if the underlying base learner does not report it; in this case, the full feature set as seen by that learner is returned.
* `GraphLearner$predict_type` handling more robust now.
* `PipeOpThreshold` and `PipeOpTuneThreshold` now have the `$predict_type` `"prob"`.
They can be set to `"response"`, in which case the probability predictions are discarded, potentially saving memory.
* Bugfix for handling multiplicities in PipeOps with vararg channels.
* Bugfix: `PipeOpImputeOOR` now retains the `.MISSING` level in factors during prediction that were imputed during training, but had no missing values during prediction.
* `as_data_table(po())` now works even when some `PipeOp`s can not be constructed.
For these `PipeOp`s, `NA` is reported in most columns.
* Compatibility with upcoming `mlr3` release.
* New PipeOps for handling inbalanced data: `PipeOpADAS` / `po("adas")`, `PipeOpBLSmote` / `po("blsmote")` and `PipeOpSmoteNC` / `po("smotenc")`

# mlr3pipelines 0.6.0
Expand Down
217 changes: 217 additions & 0 deletions R/CnfAtom.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
# An expression of the form "X %in% {x1, x2, x3}"
#' @title Atoms for CNF Formulas
#'
#' @description
#' `CnfAtom` objects represent a single statement that is used to build up CNF formulae.
#' They are mostly intermediate, created using the [`%among%`] operator or [`CnfAtom()`]
#' directly, and combined into [`CnfClause`] and [`CnfFormula`] objects.
#' [`CnfClause`] and [`CnfFormula`] do not, however, contain [`CnfAtom`] objects directly,
#'
#' `CnfAtom`s contain an indirect reference to a [`CnfSymbol`] by referencing its name
#' and its [`CnfUniverse`]. They furthermore contain a set of values. An `CnfAtom`
#' represents a statement asserting that the given symbol takes up one of the
#' given values.
#'
#' If the set of values is empty, the `CnfAtom` represents a contradiction (FALSE).
#' If it is the full domain of the symbol, the `CnfAtom` represents a tautology (TRUE).
#' These values can be converted to, and from, `logical(1)` values using `as.logical()`
#' and `as.CnfAtom()`.
#'
#' `CnfAtom` objects can be negated using the `!` operator, which will return the `CnfAtom`
#' representing set membership in the complement of the symbol with respect to its domain.
#' `CnfAtom`s can furthermore be combined using the `|` operator to form a [`CnfClause`],
#' and using the `&` operator to form a [`CnfFormula`]. This happens even if the
#' resulting statement could be represented as a single `CnfAtom`.
#'
#' This is part of the CNF representation tooling, which is currently considered
#' experimental; it is for internal use.
#'
#' @details
#' We would have preferred to overload the `%in%` operator, but this is currently
#' not easily possible in R. We therefore created the `%among%` operator.
#'
#' The internal representation of a `CnfAtom` may change in the future.
#'
#' @param symbol ([`CnfSymbol`]) \cr
#' The symbol to which the atom refers.
#' @param values (`character`) \cr
#' The values that the symbol can take.
#' @param e1 (`CnfSymbol`) \cr
#' Left-hand side of the `%among%` operator.
#' Passed as `symbol` to `CnfAtom()`.
#' @param e2 (`character`) \cr
#' Right-hand side of the `%among%` operator.
#' Passed as `values` to `CnfAtom()`.
#' @param x (any) \cr
#' The object to be coerced to a `CnfAtom` by `as.CnfAtom`.
#' Only `logical(1)` and `CnfAtom` itself are currently supported.
#' @return A new `CnfAtom` object.
#' @examples
#' u = CnfUniverse()
#' X = CnfSymbol(u, "X", c("a", "b", "c"))
#'
#' CnfAtom(X, c("a", "b"))
#' X %among% "a"
#' X %among% character(0)
#' X %among% c("a", "b", "c")
#'
#' as.logical(X %among% character(0))
#' as.CnfAtom(TRUE)
#'
#' !(X %among% "a")
#'
#' X %among% "a" | X %among% "b" # creates a CnfClause
#'
#' X %among% "a" & X %among% c("a", "b") # creates a CnfFormula
#' @family CNF representation objects
#' @keywords internal
#' @export
CnfAtom = function(symbol, values) {
assert_class(symbol, "CnfSymbol")
domain = attr(symbol, "universe")[[symbol]]
assert_subset(values, domain)
if (all(domain %in% values)) {
entry = TRUE
} else if (length(values) == 0) {
entry = FALSE
} else {
entry = list(symbol = c(symbol), values = unique(values))
}
structure(
entry,
universe = attr(symbol, "universe"),
class = "CnfAtom"
)
}

#' @export
print.CnfAtom = function(x, ...) {
if (isTRUE(x)) {
cat("CnfAtom: <TRUE>\n")
} else if (isFALSE(x)) {
cat("CnfAtom: <FALSE>\n")
} else {
cat(sprintf("CnfAtom: %s \U2208 {%s}.\n", x$symbol, paste(x$values, collapse = ", ")))
}
invisible(x)
}

#' @export
format.CnfAtom = function(x, ...) {
if (isTRUE(x)) {
return("CnfAtom: T")
} else if (isFALSE(x)) {
return("CnfAtom: F")
} else {
return(sprintf("CnfAtom(%s)", x$symbol))
}
}

# construct CnfAtom with `X %among% c("a", "b", "c")`
# we cannot overload `%in%`, unfortunately
#' @export
#' @rdname CnfAtom
`%among%` = function(e1, e2) {
UseMethod("%among%")
}

#' @export
`%among%.CnfSymbol` = function(e1, e2) {
CnfAtom(e1, e2)
}

#' @export
`%among%.default` = function(e1, e2) {
stop("%among% operation not defined for LHS. %among% should typically be used with a CnfSymbol.")
}


#' @rdname CnfAtom
#' @export
as.CnfAtom = function(x) {
UseMethod("as.CnfAtom")
}

#' @export
as.CnfAtom.default = function(x) {
stop("Cannot convert object to CnfAtom.")
}

#' @export
as.CnfAtom.logical = function(x) {
assert_flag(x)
structure(
x,
universe = attr(x, "universe"),
class = "CnfAtom"
)
}

#' @export
as.CnfAtom.CnfAtom = function(x) {
x
}

#' @export
as.logical.CnfAtom = function(x, ...) {
if (is.logical(x)) {
return(unclass(x))
}
return(NA)
}

#' @export
all.equal.CnfAtom = function(target, current, ...) {
if (is.logical(target) && is.logical(current)) {
# compare truth-values directly, even if they disagree on universe
# (since logical atoms sometimes have universe set to NULL)
if (identical(c(target), c(current))) {
return(TRUE)
}
return("target and current are both logicals but not equal")
}
if (is.logical(target) || is.logical(current)) {
return("target and current are not both logicals")
}
if (!inherits(current, "CnfAtom")) {
return("current is not a CnfAtom")
}
target$values = sort(target$values)
current$values = sort(current$values)
all.equal.list(target, current, ...)
}

#' @rawNamespace if (getRversion() >= "4.3.0") S3method(chooseOpsMethod,CnfAtom)
chooseOpsMethod.CnfAtom <- function(x, y, mx, my, cl, reverse) TRUE

#' @export
`&.CnfAtom` = function(e1, e2) {
# Will return a CnfFormula, so we can just delegate to there.
# `&.CnfFormula` handles conversion.
`&.CnfFormula`(e1, e2)
}

#' @export
`|.CnfAtom` = function(e1, e2) {
if (inherits(e2, "CnfFormula")) {
# `|.CnfFormula` handles conversion
return(`|.CnfFormula`(e1, e2))
}
if (isFALSE(e1) || isTRUE(e2)) return(as.CnfClause(e2))
if (isFALSE(e2) || isTRUE(e1)) return(as.CnfClause(e1))

# either two proper CnfAtoms, or e2 is a CnfClause.
CnfClause(list(e1, e2))
}

#' @export
`!.CnfAtom` = function(x) {
if (is.logical(x)) {
return(as.CnfAtom(!unclass(x)))
}
structure(
list(symbol = x$symbol, values = setdiff(attr(x, "universe")[[x$symbol]], x$values)),
universe = attr(x, "universe"),
class = "CnfAtom"
)
}
Loading

0 comments on commit d3402ef

Please sign in to comment.