Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add try_*_get() for each vector type, and fail more elegantly in Vector element extraction #503

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions crates/ark/src/variables/variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1264,3 +1264,86 @@ pub fn plain_binding_force_with_rollback(binding: &Binding) -> anyhow::Result<RO
_ => Err(anyhow!("Unexpected binding type")),
}
}

#[cfg(test)]
mod tests {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

use regex::Regex;

use crate::test::r_test;
use crate::variables::variable::WorkspaceVariableDisplayValue;

fn get_display_value(code: &str) -> String {
WorkspaceVariableDisplayValue::from(harp::parse_eval_base(code).unwrap().sexp).display_value
}

fn expect_display_value(code: &str, expected: &str) {
let display = get_display_value(code);
assert_eq!(display, expected.to_string());
}

#[test]
fn test_simple_display_values() {
r_test(|| {
expect_display_value("1", "1");
expect_display_value("1L", "1");
expect_display_value("'a'", "\"a\"");
expect_display_value("NULL", "NULL");
expect_display_value("TRUE", "TRUE");
expect_display_value("FALSE", "FALSE");
expect_display_value("1i", "0+1i");
})
}

#[test]
fn test_data_frame_display_value() {
r_test(|| {
expect_display_value("datasets::mtcars", "[32 rows x 11 columns] <data.frame>");
expect_display_value("matrix(1:4, ncol=2)", "[[1 2], [3 4]]");
})
}

#[test]
fn test_list_display_value() {
r_test(|| {
expect_display_value("list(x=1:4)", "[x = 1 2 3 4]");
expect_display_value("list(1:4)", "[1 2 3 4]");
})
}

#[test]
fn test_functions_display_value() {
r_test(|| {
expect_display_value("function() NULL", "function () ");
expect_display_value("function(a) NULL", "function (a) ");
expect_display_value("function(a, b) NULL", "function (a, b) ");
expect_display_value("function(a = 1, b) NULL", "function (a = 1, b) ");
})
}

#[test]
fn test_altrep_is_not_materialized() {
r_test(|| {
// Usage of `INTEGER_ELT()` combined with an ALTREP compact integer sequence
// should allow us to display this no matter what
let display = get_display_value("1:1e10");
assert!(Regex::new(r"^1 2 3.*").unwrap().is_match(display.as_str()));

// With ALTREP deferred string names used below, we use `STRING_ELT()` as we
// should to extract the values, but even that causes a full materialization
// of the STRSXP vector inside the ALTREP `Elt` method for deferred strings,
// which throws an OOM error when trying to look at elements of `names(x)`.
// We catch this, log an error, and return `NA` as the element value since
// we can't determine what it is.
let success = Regex::new(r#"^"1" "2" "3""#).unwrap();
let failure = Regex::new(r#"^NA NA NA"#).unwrap();

// Small, should always pass with `success`
let display = get_display_value("local({x = 1:1e3; names(x) = x; names(x)})");
assert!(success.is_match(display.as_str()) || failure.is_match(display.as_str()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert!(success.is_match(display.as_str()) || failure.is_match(display.as_str()));
assert!(success.is_match(display.as_str()));


// Extremely large, would only work if you have >32gb of RAM
let display = get_display_value("local({x = 1:1e10; names(x) = x; names(x)})");
assert!(success.is_match(display.as_str()) || failure.is_match(display.as_str()));
})
}
}
64 changes: 63 additions & 1 deletion crates/harp/src/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::exec::RFunctionExt;
use crate::protect::RProtect;
use crate::r_symbol;
use crate::size::r_size;
use crate::try_catch;
use crate::utils::r_assert_capacity;
use crate::utils::r_assert_length;
use crate::utils::r_assert_type;
Expand Down Expand Up @@ -158,20 +159,75 @@ pub fn r_int_get(x: SEXP, i: isize) -> i32 {
pub fn r_dbl_get(x: SEXP, i: isize) -> f64 {
unsafe { REAL_ELT(x, i) }
}
pub fn r_raw_get(x: SEXP, i: isize) -> Rbyte {
unsafe { RAW_ELT(x, i) }
}
pub fn r_cpl_get(x: SEXP, i: isize) -> Rcomplex {
unsafe { COMPLEX_ELT(x, i) }
}
pub fn r_chr_get(x: SEXP, i: isize) -> SEXP {
unsafe { STRING_ELT(x, i) }
}

// TODO: Once we have a Rust list type, move this back to unsafe.
// Should be unsafe because the type and bounds are not checked and
// will result in a crash if used incorrectly.
pub fn list_get(x: SEXP, i: isize) -> SEXP {
unsafe { VECTOR_ELT(x, i) }
}

// These methods guard against the potential for ALTREP `Elt` methods throwing errors
// (including OOM errors if they have to allocate).
// They don't check the validity of the index though.
pub fn try_lgl_get(x: SEXP, i: isize) -> harp::Result<i32> {
if r_is_altrep(x) {
try_catch(|| r_lgl_get(x, i))
} else {
Ok(r_lgl_get(x, i))
}
}
pub fn try_int_get(x: SEXP, i: isize) -> harp::Result<i32> {
if r_is_altrep(x) {
try_catch(|| r_int_get(x, i))
} else {
Ok(r_int_get(x, i))
}
}
pub fn try_dbl_get(x: SEXP, i: isize) -> harp::Result<f64> {
if r_is_altrep(x) {
try_catch(|| r_dbl_get(x, i))
} else {
Ok(r_dbl_get(x, i))
}
}
pub fn try_raw_get(x: SEXP, i: isize) -> harp::Result<Rbyte> {
if r_is_altrep(x) {
try_catch(|| r_raw_get(x, i))
} else {
Ok(r_raw_get(x, i))
}
}
pub fn try_cpl_get(x: SEXP, i: isize) -> harp::Result<Rcomplex> {
if r_is_altrep(x) {
try_catch(|| r_cpl_get(x, i))
} else {
Ok(r_cpl_get(x, i))
}
}
pub fn try_chr_get(x: SEXP, i: isize) -> harp::Result<SEXP> {
if r_is_altrep(x) {
try_catch(|| r_chr_get(x, i))
} else {
Ok(r_chr_get(x, i))
}
}
pub fn try_list_get(x: SEXP, i: isize) -> harp::Result<SEXP> {
if r_is_altrep(x) {
try_catch(|| list_get(x, i))
} else {
Ok(list_get(x, i))
}
}

pub fn list_poke(x: SEXP, i: isize, value: SEXP) {
unsafe { SET_VECTOR_ELT(x, i, value) };
}
Expand All @@ -185,6 +241,12 @@ pub fn r_int_na() -> i32 {
pub fn r_dbl_na() -> f64 {
unsafe { R_NaReal }
}
pub fn r_cpl_na() -> Rcomplex {
Rcomplex {
r: r_dbl_na(),
i: r_dbl_na(),
}
}
pub fn r_str_na() -> SEXP {
unsafe { R_NaString }
}
Expand Down
2 changes: 1 addition & 1 deletion crates/harp/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub fn r_n_frame() -> crate::Result<i32> {
unsafe {
let ffi = harp::try_eval_silent(NFRAME_CALL.unwrap_unchecked(), R_ENVS.base)?;
let n_frame = IntegerVector::new(ffi)?;
Ok(n_frame.get_unchecked_elt(0))
Ok(n_frame.get_unchecked(0).unwrap())
}
}

Expand Down
11 changes: 8 additions & 3 deletions crates/harp/src/vector/character_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ use libr::R_xlen_t;
use libr::Rf_mkCharLenCE;
use libr::SET_STRING_ELT;
use libr::SEXP;
use libr::STRING_ELT;
use libr::STRSXP;

use crate::object::RObject;
use crate::r_str_na;
use crate::try_chr_get;
use crate::utils::r_str_to_owned_utf8_unchecked;
use crate::vector::FormatOptions;
use crate::vector::Vector;
Expand Down Expand Up @@ -73,8 +74,12 @@ impl Vector for CharacterVector {
unsafe { *x == R_NaString }
}

fn get_unchecked_elt(&self, index: isize) -> Self::UnderlyingType {
unsafe { STRING_ELT(self.data(), index as R_xlen_t) }
fn get_unchecked_elt(&self, index: isize) -> harp::Result<Self::UnderlyingType> {
try_chr_get(self.data(), R_xlen_t::from(index))
}

fn error_elt() -> Self::UnderlyingType {
r_str_na()
}

fn convert_value(x: &Self::UnderlyingType) -> Self::Type {
Expand Down
11 changes: 8 additions & 3 deletions crates/harp/src/vector/complex_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ use libr::R_IsNA;
use libr::R_xlen_t;
use libr::Rcomplex;
use libr::Rf_allocVector;
use libr::COMPLEX_ELT;
use libr::CPLXSXP;
use libr::DATAPTR;
use libr::SEXP;

use crate::object::RObject;
use crate::r_cpl_na;
use crate::try_cpl_get;
use crate::vector::FormatOptions;
use crate::vector::Vector;

Expand Down Expand Up @@ -74,8 +75,12 @@ impl Vector for ComplexVector {
unsafe { R_IsNA(x.r) == 1 || R_IsNA(x.i) == 1 }
}

fn get_unchecked_elt(&self, index: isize) -> Self::UnderlyingType {
unsafe { Complex::new(COMPLEX_ELT(self.data(), index as R_xlen_t)) }
fn get_unchecked_elt(&self, index: isize) -> harp::Result<Self::UnderlyingType> {
try_cpl_get(self.data(), R_xlen_t::from(index)).map(Complex::new)
}

fn error_elt() -> Self::UnderlyingType {
Complex::new(r_cpl_na())
}

fn convert_value(x: &Self::UnderlyingType) -> Self::Type {
Expand Down
13 changes: 9 additions & 4 deletions crates/harp/src/vector/factor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ use libr::R_xlen_t;
use libr::Rf_allocVector;
use libr::Rf_getAttrib;
use libr::DATAPTR;
use libr::INTEGER_ELT;
use libr::INTSXP;
use libr::SEXP;

use crate::object::RObject;
use crate::r_int_na;
use crate::r_symbol;
use crate::vector::FormatOptions;
use crate::try_int_get;
use crate::vector::CharacterVector;
use crate::vector::FormatOptions;
use crate::vector::Vector;

#[harp_macros::vector]
Expand Down Expand Up @@ -66,8 +67,12 @@ impl Vector for Factor {
unsafe { *x == R_NaInt }
}

fn get_unchecked_elt(&self, index: isize) -> Self::UnderlyingType {
unsafe { INTEGER_ELT(self.data(), index as R_xlen_t) }
fn get_unchecked_elt(&self, index: isize) -> harp::Result<Self::UnderlyingType> {
try_int_get(self.data(), R_xlen_t::from(index))
}

fn error_elt() -> Self::UnderlyingType {
r_int_na()
}

fn convert_value(x: &Self::UnderlyingType) -> Self::Type {
Expand Down
11 changes: 8 additions & 3 deletions crates/harp/src/vector/integer_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ use libr::R_NaInt;
use libr::R_xlen_t;
use libr::Rf_allocVector;
use libr::DATAPTR;
use libr::INTEGER_ELT;
use libr::INTSXP;
use libr::SEXP;

use crate::object::RObject;
use crate::r_int_na;
use crate::try_int_get;
use crate::vector::FormatOptions;
use crate::vector::Vector;

Expand Down Expand Up @@ -61,8 +62,12 @@ impl Vector for IntegerVector {
unsafe { *x == R_NaInt }
}

fn get_unchecked_elt(&self, index: isize) -> Self::UnderlyingType {
unsafe { INTEGER_ELT(self.data(), index as R_xlen_t) }
fn get_unchecked_elt(&self, index: isize) -> harp::Result<Self::UnderlyingType> {
try_int_get(self.data(), R_xlen_t::from(index))
}

fn error_elt() -> Self::UnderlyingType {
r_int_na()
}

fn convert_value(x: &Self::UnderlyingType) -> Self::Type {
Expand Down
9 changes: 7 additions & 2 deletions crates/harp/src/vector/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::object::list_cbegin;
use crate::object::r_length;
use crate::object::r_list_poke;
use crate::object::RObject;
use crate::r_null;
use crate::r_typeof;

pub struct List {
Expand Down Expand Up @@ -58,8 +59,12 @@ impl super::Vector for List {
false
}

fn get_unchecked_elt(&self, index: isize) -> Self::UnderlyingType {
unsafe { *self.ptr.wrapping_add(index as usize) }
fn get_unchecked_elt(&self, index: isize) -> harp::Result<Self::UnderlyingType> {
Ok(unsafe { *self.ptr.wrapping_add(index as usize) })
}

fn error_elt() -> Self::UnderlyingType {
r_null()
}

fn convert_value(x: &Self::UnderlyingType) -> Self::Type {
Expand Down
11 changes: 8 additions & 3 deletions crates/harp/src/vector/logical_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ use libr::R_xlen_t;
use libr::Rf_allocVector;
use libr::DATAPTR;
use libr::LGLSXP;
use libr::LOGICAL_ELT;
use libr::SEXP;

use crate::object::RObject;
use crate::r_lgl_na;
use crate::try_lgl_get;
use crate::vector::FormatOptions;
use crate::vector::Vector;

Expand Down Expand Up @@ -61,8 +62,12 @@ impl Vector for LogicalVector {
unsafe { *x == R_NaInt }
}

fn get_unchecked_elt(&self, index: isize) -> Self::UnderlyingType {
unsafe { LOGICAL_ELT(self.data(), index as R_xlen_t) }
fn get_unchecked_elt(&self, index: isize) -> harp::Result<Self::UnderlyingType> {
try_lgl_get(self.data(), R_xlen_t::from(index))
}

fn error_elt() -> Self::UnderlyingType {
r_lgl_na()
}

fn convert_value(x: &Self::UnderlyingType) -> Self::Type {
Expand Down
Loading
Loading