diff --git a/src/CHANGELOG.md b/src/CHANGELOG.md index d17608e..a61937a 100644 --- a/src/CHANGELOG.md +++ b/src/CHANGELOG.md @@ -5,7 +5,7 @@ * Named vectors were added and can e.g. be constructed via `[a = 1, b = 2]` * The `is_null()` primitive was added * Setting a list value to `null` actually sets it to `null` and does not remove it. -* Stricter recycling rule are enforced (@98): +* Stricter recycling rules are enforced (@98): Vectorized operations on two vectors `v1` and `v2` now requires either of: * One of the vectors has length 1 and the other vector's length is not zero. * The vectors have the same length. @@ -20,6 +20,7 @@ This included a considerable refactor. * Iterating over references of a `Rep` was made much simpler and new methods were added and unused ones removed. +* The `RepType` struct that was introduced in 0.4.0 was removed again (#189). ## Notable Bugs Addressed diff --git a/src/callable/primitive/c.rs b/src/callable/primitive/c.rs index 91b5732..920a3f0 100644 --- a/src/callable/primitive/c.rs +++ b/src/callable/primitive/c.rs @@ -213,9 +213,10 @@ impl Callable for PrimitiveC { }; if let Some(names) = names { - v.set_names(names.into()) + Ok(Obj::Vector(v.set_names(names.into()))) + } else { + Ok(Obj::Vector(v)) } - Ok(Obj::Vector(v)) } } diff --git a/src/callable/primitive/sum.rs b/src/callable/primitive/sum.rs index 52470cb..11a8ab6 100644 --- a/src/callable/primitive/sum.rs +++ b/src/callable/primitive/sum.rs @@ -5,7 +5,7 @@ use crate::error::*; use crate::formals; use crate::internal_err; use crate::lang::*; -use crate::object::reptype::RepType; +use crate::object::rep::Rep; use crate::object::*; /// Calculate a Sum of Elements @@ -40,7 +40,7 @@ impl Callable for PrimitiveSum { let (_, ellipsis) = self.match_arg_exprs(args, stack)?; if ellipsis.is_empty() { - return EvalResult::Ok(Obj::Vector(Vector::from(RepType::from(vec![0.0])))); + return EvalResult::Ok(Obj::Vector(Vector::from(Rep::from(vec![0.0])))); } let objects: Vec = force_promises(ellipsis, stack)? @@ -78,8 +78,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x as i32 as f64, @@ -90,8 +90,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x as f64, @@ -102,8 +102,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x, @@ -116,7 +116,7 @@ impl Callable for PrimitiveSum { _ => return internal_err!(), } } - EvalResult::Ok(Obj::Vector(Vector::from(RepType::from(vec![sum])))) + EvalResult::Ok(Obj::Vector(Vector::from(Rep::from(vec![sum])))) } else { let mut sum: i32 = 0; @@ -128,8 +128,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x as i32, @@ -140,8 +140,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x, @@ -154,7 +154,7 @@ impl Callable for PrimitiveSum { _ => return internal_err!(), } } - EvalResult::Ok(Obj::Vector(Vector::from(RepType::from(vec![sum])))) + EvalResult::Ok(Obj::Vector(Vector::from(Rep::from(vec![sum])))) } } } diff --git a/src/object/vector/core.rs b/src/object/vector/core.rs index 1966e45..850eb84 100644 --- a/src/object/vector/core.rs +++ b/src/object/vector/core.rs @@ -8,9 +8,8 @@ use crate::object::CowObj; use crate::object::Obj; use super::coercion::CoercibleInto; +use super::rep::IterableValues; use super::rep::Rep; -use super::reptype::IterableValues; -use super::reptype::RepType; use super::subset::Subset; use super::types::*; @@ -139,13 +138,14 @@ impl Vector { } } - pub fn set_names(&self, names: CowObj>) { + pub fn set_names(&self, names: CowObj>) -> Self { + use super::Vector::*; match self { - Vector::Character(x) => x.set_names(names), - Vector::Logical(x) => x.set_names(names), - Vector::Integer(x) => x.set_names(names), - Vector::Double(x) => x.set_names(names), - }; + Character(x) => Character(x.set_names(names)), + Logical(x) => Logical(x.set_names(names)), + Integer(x) => Integer(x.set_names(names)), + Double(x) => Double(x.set_names(names)), + } } pub fn try_get(&self, index: Obj) -> EvalResult { @@ -323,30 +323,6 @@ impl From>> for Vector { } } -impl From> for Vector { - fn from(x: RepType) -> Self { - Vector::Double(x.into()) - } -} - -impl From> for Vector { - fn from(x: RepType) -> Self { - Vector::Integer(x.into()) - } -} - -impl From> for Vector { - fn from(x: RepType) -> Self { - Vector::Logical(x.into()) - } -} - -impl From> for Vector { - fn from(x: RepType) -> Self { - Vector::Character(x.into()) - } -} - impl From> for Vector { fn from(x: Rep) -> Self { Vector::Double(x) diff --git a/src/object/vector/mod.rs b/src/object/vector/mod.rs index 630c962..1db5ee6 100644 --- a/src/object/vector/mod.rs +++ b/src/object/vector/mod.rs @@ -7,7 +7,6 @@ pub mod coercion; pub mod iterators; pub mod rep; -pub mod reptype; pub mod types; mod subsets; diff --git a/src/object/vector/rep.rs b/src/object/vector/rep.rs index f790dc7..fc48e17 100644 --- a/src/object/vector/rep.rs +++ b/src/object/vector/rep.rs @@ -1,128 +1,351 @@ -use std::cell::{Ref, RefCell, RefMut}; -use std::fmt::{Debug, Display}; +use std::fmt::Debug; +use std::fmt::Display; use std::iter::repeat; use super::coercion::{AtomicMode, CoercibleInto, CommonCmp, CommonNum, MinimallyNumeric}; -use super::reptype::{ - IntoIterableRefNames, IntoIterableRefPairs, IntoIterableRefValues, IterablePairs, - IterableValues, Naming, RepType, -}; use super::subset::Subset; +use super::subsets::Subsets; use super::types::*; use super::{OptionNA, Pow, VecPartialCmp}; use crate::error::Error; use crate::lang::Signal; -use crate::object::{CowObj, Obj, Subsets, ViewMut}; +use crate::object::{CowObj, ViewMut}; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::rc::Rc; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct Naming { + // TODO: change this to usize and not Vec (after making names unique) + pub map: CowObj>>, + pub names: CowObj>>, +} -/// Vector Representation -/// -/// The ref-cell is used so vectors can change there internal representation, -/// e.g. by materializing. -#[derive(Debug, PartialEq)] -pub struct Rep(pub RefCell>); +impl Naming { + /// Create an empty `Naming` + pub fn new() -> Self { + Naming::default() + } -impl Clone for Rep { - fn clone(&self) -> Self { - match self.borrow().clone() { - RepType::Subset(v, s, n) => Rep(RefCell::new(RepType::Subset( - v.clone(), - s.clone(), - n.clone(), - ))), + // Allocates a new Naming with a capacity for `capacity` elements. + pub fn with_capacity(capacity: usize) -> Self { + Self { + map: HashMap::>::with_capacity(capacity).into(), + names: CowObj::from(Vec::::with_capacity(capacity)), } } + + /// Push a new name onto the `Naming`. + pub fn push_name(&self, name: OptionNA) { + self.names.with_inner_mut(|v| v.push(name.clone())); + if let OptionNA::Some(name) = name { + let n = self.names.len() - 1; + self.map.with_inner_mut(|map| { + let indices = map.entry(name.clone()).or_default(); + if !indices.contains(&n) { + indices.push(n); + }; + }); + }; + } + + /// Get mutable access to the internal data (map and names vector) via the passed closure. + pub fn with_inner_mut(&self, f: F) -> R + where + F: FnOnce(&mut HashMap>, &mut Vec>) -> R, + { + self.map + .with_inner_mut(|map| self.names.with_inner_mut(|names| f(map, names))) + } } -impl ViewMut for Rep { - fn view_mut(&self) -> Self { - Self(RefCell::new(self.borrow().view_mut())) +impl From> for Rep { + fn from(value: Vec<(Character, T)>) -> Self { + let mut names = Vec::with_capacity(value.len()); + let mut values = Vec::with_capacity(value.len()); + for (k, v) in value { + names.push(k); + values.push(v); + } + + Rep::Subset( + CowObj::new(Rc::new(RefCell::new(Rc::new(values)))), + Subsets::default(), + Option::Some(Naming::from(names)), + ) } } -impl Rep { - /// Get the inner value mutably. - /// This is used for assignments like `list(1)[[1]] = 10`. - pub fn try_get_inner_mut(&self, subset: Subset) -> Result { - self.borrow().try_get_inner_mut(subset) +impl From>> for Naming { + fn from(value: CowObj>) -> Self { + let mut map: HashMap> = HashMap::new(); + + value.iter().enumerate().for_each(|(i, maybe_name)| { + if let OptionNA::Some(name) = maybe_name { + let indices = map.entry(name.clone()).or_default(); + if !indices.contains(&i) { + indices.push(i); + }; + }; + }); + + Self { map: map.into(), names: value } + } +} + +/// Vector +#[derive(Debug, PartialEq)] +pub enum Rep { + // Vector::Subset encompasses a "raw" vector (no subsetting) + Subset(CowObj>, Subsets, Option), + // Iterator includes things like ranges 1:Inf, and lazily computed values + // Iter(Box>) +} + +impl Clone for Rep { + fn clone(&self) -> Self { + match self { + Rep::Subset(v, s, n) => Rep::Subset(v.clone(), s.clone(), n.clone()), + } + } +} + +impl Default for Rep { + fn default() -> Self { + Self::new() } +} +impl Rep { /// Get a cloned version of the inner value. /// This is used for accessing inner values like `list(1)[[1]]`. pub fn try_get_inner(&self, subset: Subset) -> Result { #[allow(clippy::map_clone)] self.try_get_inner_mut(subset).map(|x| x.clone()) } + /// Retrieve the internal data as a mutable view. + /// This is important for lists for things like `l$a[1:2] = c(10, 11)` + pub fn try_get_inner_mut(&self, subset: Subset) -> Result { + let new_subset = self.subset(subset); + match new_subset { + Rep::Subset(..) => { + let mut iter = new_subset.iter_subset_indices(); + + if let Some(i) = iter.next() { + if iter.next().is_some() { + return Error::Other("subset has length > 1".to_string()).into(); + } + + // TODO: subsetting with NA should not be possible. + let i = i.unwrap(); + + Ok(self.with_inner_mut(|values| values[i].view_mut())) + } else { + Error::Other("subset is empty".to_string()).into() + } + } + } + } } -impl Rep { - /// Iterate over the owned names and values of the vector. - pub fn iter_pairs(&self) -> IterablePairs { - self.0.borrow().clone().iter_pairs() +pub struct IntoIterableRefNames { + names: Rc>, + na_name: Character, + iter: Box>>, +} + +pub struct RepIterableNames<'a> { + names: &'a [Character], + na_name: &'a Character, + iter: &'a mut Box>>, +} + +impl IntoIterableRefNames { + pub fn iter(&mut self) -> RepIterableNames<'_> { + let names = &self.names[..]; + RepIterableNames { + names, + na_name: &self.na_name, + iter: &mut self.iter, + } } } -impl Rep -where - T: Clone + Default, -{ - /// Return the only value if the vector has length 1. - pub fn as_scalar(&self) -> Option { - let mut into_iter = self.values_ref(); - let mut iter = into_iter.iter(); - if let Some(x) = iter.next() { - if iter.next().is_none() { - return Some(x.clone()); - } - }; - None +impl<'a> Iterator for RepIterableNames<'a> { + type Item = &'a Character; + + fn next(&mut self) -> Option { + if let Some(i) = self.iter.next()? { + Some(&self.names[i]) + } else { + Some(self.na_name) + } } +} - pub fn borrow(&self) -> Ref> { - self.0.borrow() +pub struct IntoIterableRefValues { + values: Rc>, + na_value: T, + iter: Box>>, +} + +impl IntoIterableRefValues { + pub fn iter(&mut self) -> IterableRefValues<'_, T> { + let values = &self.values[..]; + + IterableRefValues { + values, + na_value: &self.na_value, + iter: &mut self.iter, + } } +} + +pub struct IntoIterableRefPairs { + values: Rc>, + names: Option>>, + na_value: T, + na_name: Character, + iter: Box>>, +} + +impl IntoIterableRefPairs { + pub fn iter(&mut self) -> IterableRefPairs<'_, T> { + let values = &self.values[..]; - pub fn borrow_mut(&mut self) -> RefMut> { - self.0.borrow_mut() + let names = self.names.as_ref().map(|names| &names[..]); + + IterableRefPairs { + values, + names, + na_value: &self.na_value, + na_name: &self.na_name, + iter: &mut self.iter, + } } +} - /// Iterate over the (owned) values of the vector. - pub fn iter_values(&self) -> IterableValues { - self.0.borrow().iter_values() +pub struct IterableRefValues<'a, T: Clone> { + values: &'a [T], + na_value: &'a T, + iter: &'a mut Box>>, +} + +pub struct IterableRefPairs<'a, T: Clone> { + values: &'a [T], + names: Option<&'a [Character]>, + na_value: &'a T, + na_name: &'a Character, + iter: &'a mut Box>>, +} + +impl<'a, T: Clone> Iterator for IterableRefPairs<'a, T> { + type Item = (&'a Character, &'a T); + + fn next(&mut self) -> Option { + if let Some(i) = self.iter.next()? { + if let Some(names) = self.names { + Option::Some((&names[i], &self.values[i])) + } else { + Option::Some((self.na_name, &self.values[i])) + } + } else { + Option::Some((self.na_name, self.na_value)) + } } +} - /// Iterate over the names of the vector (if they exist). - pub fn iter_names(&self) -> Option> { - self.0.borrow().iter_names() +impl<'a, T: Clone> Iterator for IterableRefValues<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + if let Some(i) = self.iter.next()? { + Some(&self.values[i]) + } else { + Some(self.na_value) + } + } +} + +impl ViewMut for Rep { + fn view_mut(&self) -> Self { + match self { + Rep::Subset(v, s, n) => Rep::Subset(v.view_mut(), s.clone(), n.clone()), + } } +} - fn materialize_inplace(&self) -> &Self { - // TODO: Rewrite this to avoid copying unnecessarily - let new_repr = { self.borrow().materialize() }; - self.0.replace(new_repr); +pub struct IterableValues { + values: Rc>, + iter: Box>>, +} - self +impl Iterator for IterableValues { + type Item = T; + fn next(&mut self) -> Option { + // FIXME: Already assumes no indexing with NA + let i = self.iter.next()?.unwrap(); + Some(self.values[i].clone()) } +} - /// Reindex the mapping from names to indices using the names vector from the `Naming`. - pub fn reindex(&mut self) { - self.borrow_mut().reindex() +pub struct IterablePairs { + values: Rc>, + names: Option>>, + iter: Box>>, +} + +impl Iterator for IterablePairs { + type Item = (Character, T); + fn next(&mut self) -> Option { + // FIXME: Already assumes no indexing with NA + let i = self.iter.next()?.unwrap(); + let value = self.values[i].clone(); + let name = if let Some(names) = &self.names { + names[i].clone() + } else { + Character::NA + }; + Some((name, value)) } +} - /// Set the names of the vector. - pub fn set_names(&self, names: CowObj>>) { - let new_repr = self.borrow().materialize().set_names(names); - self.0.replace(new_repr); +impl Rep { + /// Create an empty vector + /// + /// The primary use case for this function is to support testing, and there + /// are few expected use cases outside. It is used for creating a vector + /// of an explicit atomic type, likely to be tested with + /// `SameType::is_same_type_as`. + /// + /// ``` + /// use r::utils::*; + /// use r::object::Vector; + /// use r::object::OptionNA; + /// + /// let result = Vector::from(vec![1, 2, 3]); + /// let expect = Vector::from(Vec::>::new()); + /// + /// assert!(result.is_same_type_as(&expect)) + /// ``` + /// + pub fn new() -> Self { + Rep::Subset( + Vec::new().into(), + Subsets(Vec::new()), + Some(Naming::default()), + ) } /// Whether the vector representation has names. pub fn is_named(&self) -> bool { - matches!(*self.borrow(), RepType::Subset(.., Some(_))) + matches!(self, Rep::Subset(.., Some(_))) } /// Return the names of the vector if there are any. pub fn names(&self) -> Option>> { - match self.borrow().clone() { - RepType::Subset(_, s, n) => { + match self.clone() { + Rep::Subset(_, s, n) => { if s.is_empty() { n.map(|n| n.clone().names) } else if n.is_some() { @@ -139,31 +362,32 @@ where } } - pub fn dedup_last(self) -> Self { - self.0.into_inner().dedup_last().into() - } - - /// Constructs a new, empty `Rep` with at least the specified `capacity`. - /// Names are only include if `names` is true. - pub fn with_capacity(capacity: usize, names: bool) -> Self { - let naming = if names { - Some(Naming::with_capacity(capacity)) - } else { - None - }; - Self(RefCell::new(RepType::Subset( - CowObj::from(Vec::with_capacity(capacity)), - Subsets::default(), - naming, - ))) - } + /// Change a value at the location given by `subset` to the provided `value`. + /// If the `subset` does not have length `1`, an error is returned. + pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { + match &self { + Rep::Subset(..) => { + let err = Error::Other("subset must have length 1".to_string()); + + let mut iter = self.clone().subset(subset).iter_subset_indices(); + let i1 = iter.next(); + + // check that subset has exactly length 1 + // assumes no indexing with NA (unwrap the option) + let i = if let Some(i) = i1 { + if iter.next().is_some() { + return err.into(); + } + i + } else { + return err.into(); + } + .unwrap(); - /// Get an `RepTypeIntoIterablePairs` which in turn can be converted into an iterator over - /// pairs of references (&name, &value). - /// - /// Directly getting an iterator is not possible due to lifetime issues. - pub fn pairs_ref(&self) -> IntoIterableRefPairs { - self.0.borrow().pairs_ref() + self.with_inner_mut(|v| v[i] = value.clone()); + Ok(value.clone()) + } + } } /// Get an `Option>` which in turn can be converted into an iterator over @@ -172,7 +396,14 @@ where /// /// Directly getting an iterator is not possible due to lifetime issues. pub fn values_ref(&self) -> IntoIterableRefValues { - self.0.borrow().values_ref() + match self.clone() { + Rep::Subset(values, ..) => { + let iter = Box::new(self.iter_subset_indices()); + let values = values.inner_rc(); + + IntoIterableRefValues { values, na_value: T::default(), iter } + } + } } /// Get an `RepTypeIntoIterableValues` which in turn can be converted into an iterator over @@ -180,87 +411,379 @@ where /// /// Directly getting an iterator is not possible due to lifetime issues. pub fn names_ref(&self) -> Option { - self.0.borrow().names_ref() - } + match self.clone() { + Rep::Subset(.., naming) => { + let iter = Box::new(self.iter_subset_indices()); + let naming = naming?; + let names = naming.names.inner_rc(); - pub fn materialize(&self) -> Self { - self.borrow().materialize().into() + Some(IntoIterableRefNames { names, na_name: Character::default(), iter }) + } + } } - /// Create an empty vector - /// - /// The primary use case for this function is to support testing, and there - /// are few expected use cases outside. It is used for creating a vector - /// of an explicit atomic type, likely to be tested with - /// `SameType::is_same_type_as`. - /// - /// ``` - /// use r::utils::*; - /// use r::object::Vector; - /// use r::object::OptionNA; - /// - /// let result = Vector::from(vec![1, 2, 3]); - /// let expect = Vector::from(Vec::>::new()); - /// - /// assert!(result.is_same_type_as(&expect)) - /// ``` + /// Get an `RepTypeIntoIterablePairs` which in turn can be converted into an iterator over + /// pairs of references (&name, &value). /// - pub fn new() -> Self { - RepType::new().into() + /// Directly getting an iterator is not possible due to lifetime issues. + pub fn pairs_ref(&self) -> IntoIterableRefPairs { + match self.clone() { + Rep::Subset(values, _, maybe_naming) => { + let iter = Box::new(self.iter_subset_indices()); + let values = values.inner_rc(); + let names = maybe_naming.map(|x| x.names.inner_rc()); + + IntoIterableRefPairs { + values, + names, + na_value: T::default(), + na_name: Character::NA, + iter, + } + } + } + } + + pub fn iter_pairs(&self) -> IterablePairs { + match self.clone() { + Rep::Subset(values, _, maybe_naming) => { + let iter = Box::new(self.iter_subset_indices()); + let values = values.inner_rc(); + let names = maybe_naming.map(|x| x.names.inner_rc()); + + IterablePairs { values, names, iter } + } + } + } + + /// Iterate over the (owned) values of the vector. + pub fn iter_values(&self) -> IterableValues { + match self.clone() { + Rep::Subset(values, ..) => { + let iter = Box::new(self.iter_subset_indices()); + IterableValues { values: values.inner_rc(), iter } + } + } } + /// Iterate over the names of the vector (if they exist). + pub fn iter_names(&self) -> Option> { + match self.clone() { + Rep::Subset(.., maybe_naming) => { + let iter = Box::new(self.iter_subset_indices()); + let names = maybe_naming.map(|x| x.names.inner_rc())?; + + Some(IterableValues { values: names, iter }) + } + } + } + + pub fn push_value(&self, value: T) { + self.push_named(Character::NA, value); + } + + /// Push a named `value` with a given `name` onto the `Rep`. + pub fn push_named(&self, name: OptionNA, value: T) { + match self { + Rep::Subset(values, Subsets(subsets), maybe_naming) => match subsets.as_slice() { + [] => { + values.with_inner_mut(|values| values.push(value)); + if let Some(naming) = maybe_naming { + naming.push_name(name) + } + } + _ => unimplemented!(), + }, + } + } + + pub fn iter_subset_indices_exact(&self) -> ExactIterSubsetIndices { + // TODO(performance): Avoid the vector allocation + let iter = self.iter_subset_indices(); + let len = iter.count(); + let iter = self.iter_subset_indices(); + ExactIterSubsetIndices { iter, len } + } + + pub fn iter_subset_indices(&self) -> Box>> { + match self.clone() { + Rep::Subset(vals, subsets, maybe_naming) => { + if subsets.is_empty() { + return Box::new((0_usize..vals.len()).map(Some)); + } + + if let Some(naming) = maybe_naming { + Box::new(subsets.bind_names(naming.map).into_iter().map(|(_, y)| y)) + } else { + Box::new(subsets.into_iter().map(|(_, y)| y)) + } + } + } + } + + /// Reindex the mapping from names to indices. + pub fn reindex(&mut self) { + if let Rep::Subset(.., Some(naming)) = self { + naming.map.with_inner_mut(|map| { + map.drain(); + + for (i, maybe_name) in naming.names.borrow().iter().enumerate() { + if let OptionNA::Some(name) = maybe_name { + let indices = map.entry(name.clone()).or_default(); + if !indices.contains(&i) { + indices.push(i) + } + } + } + }) + } + } + + /// Constructs a new, empty `Rep` with at least the specified `capacity`. + /// Names are only include if `names` is true. + pub fn with_capacity(capacity: usize, names: bool) -> Self { + let naming = if names { + Some(Naming::with_capacity(capacity)) + } else { + None + }; + Rep::Subset( + CowObj::from(Vec::with_capacity(capacity)), + Subsets::default(), + naming, + ) + } + + pub fn dedup_last(self) -> Self { + match self { + Rep::Subset(values, subsets, Some(naming)) => { + naming.with_inner_mut(|map, names| { + let mut dups: Vec = map + .iter() + .flat_map(|(_, indices)| { + indices + .split_last() + .map_or(vec![], |(_, leading_dups)| leading_dups.to_vec()) + }) + .collect(); + + dups.sort(); + + values.with_inner_mut(|vs| { + for i in dups.into_iter().rev() { + vs.remove(i); + names.remove(i); + } + }); + + for (_, indices) in map.iter_mut() { + indices.drain(0..(indices.len())); + } + }); + Rep::Subset(values, subsets, Some(naming)) + } + Rep::Subset(.., None) => self, + } + } + + pub fn set_names(&self, names: CowObj>) -> Self { + match self { + Rep::Subset(v, s, _) => Rep::Subset(v.clone(), s.clone(), Option::Some(names.into())), + } + } + + /// Access a lazy copy of the internal vector data pub fn inner(&self) -> CowObj> { - self.borrow().inner() + match self.materialize() { + Rep::Subset(v, ..) => v.clone(), + } } - pub fn len(&self) -> usize { - // TODO: Only materialize when necessary - self.materialize_inplace(); - self.borrow().len() + /// Get mutable access to the internal vector through the passed closure. + pub fn with_inner_mut(&self, f: F) -> R + where + F: FnOnce(&mut Vec) -> R, + { + match self { + Rep::Subset(v, ..) => v.with_inner_mut(f), + } } /// Subsetting a Vector /// /// Introduce a new subset into the aggregate list of subset indices. - /// pub fn subset(&self, subset: Subset) -> Self { - (*self.borrow()).subset(subset).into() + match self { + Rep::Subset(v, Subsets(subsets), n) => { + let mut subsets = subsets.clone(); + subsets.push(subset); + Rep::Subset(v.view_mut(), Subsets(subsets), n.clone()) + } + } } + /// The length of the vector. + pub fn len(&self) -> usize { + match self { + Rep::Subset(v, Subsets(s), _) => match s.as_slice() { + [] => v.borrow().len(), + _ => self.values_ref().iter().count(), + }, + } + } + + /// Whether the vector has length 0. #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn get(&self, index: usize) -> Option { - let x = self.borrow().get(index); - x.map(|x| x.into()) + /// Get a single element from a vector + /// + /// Access a single element without materializing a new vector + /// + pub fn get(&self, index: usize) -> Option> + where + T: Clone, + { + match self { + Rep::Subset(v, subsets, _) => { + let vb = v.borrow(); + let index = subsets.get_index_at(index)?; + let elem = vb.get(index)?; + Some(Rep::Subset( + vec![elem.clone()].into(), + Subsets::new(), + Option::Some(Naming::new()), + )) + } + } } - /// Change a value at the location given by `subset` to the provided `value`. - /// If the `subset` does not have length `1`, an error is returned. - pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { - // Used for `[[`-assignment. - self.0.borrow_mut().set_subset(subset, value) + /// Assignment to Subset Indices + /// + /// Assignment to a vector from another. The aggregate subsetted indices + /// are iterated over while performing the assignment. + /// + pub fn assign(&mut self, value: Rep) -> Result + where + T: Clone + Default + From, + R: Default + Clone, + { + let l_indices = self.iter_subset_indices_exact(); + let mut r_indices = value.iter_subset_indices_exact(); + + // TODO(performance): When we clone the interior data of self (to which we write) + // we don't have to perform recycling checks + // and just start iterating. We can always discard the result afterwards again + // Maybe implement filter_exact on (named)subsets + if r_indices.len() == 1 { + // get the element from reptype value + let index = r_indices + .next() + .expect("index should exist") + .expect("No NA for subsetting"); + let elem = value.get_inner(index).expect("element should exist"); + match (self, value) { + (Rep::Subset(lv, ls, ln), Rep::Subset(..)) => { + lv.with_inner_mut(|lvb| { + for li in l_indices { + lvb[li.unwrap()] = elem.clone().into(); + } + }); + return Ok(Rep::Subset(lv.clone(), ls.clone(), ln.clone())); + } + } + } + + if l_indices.len() != r_indices.len() { + return Err(Signal::Error(Error::NonRecyclableLengths( + l_indices.len(), + r_indices.len(), + ))); + } + + match (self, value) { + (Rep::Subset(lv, ls, ln), Rep::Subset(rv, ..)) => { + lv.with_inner_mut(|lvb| { + let rvc = rv.clone(); + let rvb = rvc.borrow(); + + for (li, ri) in l_indices.zip(r_indices) { + match (li, ri) { + (Some(li), None) => lvb[li] = T::default(), + (Some(li), Some(ri)) => lvb[li] = rvb[ri % rvb.len()].clone().into(), + _ => (), + } + } + }); + + Ok(Rep::Subset(lv.clone(), ls.clone(), ln.clone())) + } + } } - /// Push a named `value` with a given `name` onto the `Rep`. - pub fn push_named(&self, name: OptionNA, value: T) { - self.borrow().push_named(name, value) + /// Return the only value if the vector has length 1. + pub fn as_scalar(&self) -> Option { + let mut into_iter = self.values_ref(); + let mut iter = into_iter.iter(); + if let Some(x) = iter.next() { + if iter.next().is_none() { + return Some(x.clone()); + } + }; + None } - /// Assign to the vector, often with a view through a Subset. - /// An error is thrown if the lengths are not compatible. - pub fn assign(&mut self, value: Rep) -> Result + /// Materialize a Vector + /// + /// Apply subsets and clone values into a new vector. + pub fn materialize(&self) -> Self where - T: From + Clone, - R: Clone + Default, + T: Clone, { - self.0 - .borrow_mut() - .assign(value.0.into_inner()) - .map(|x| x.into()) + match self { + Rep::Subset(v, subsets, naming) => { + // early exit when there is nothing to do + match subsets { + Subsets(s) => { + if s.as_slice().is_empty() { + return self.clone(); + } + } + } + + let vc = v.clone(); + let vb = vc.borrow(); + let mut res: Vec = vec![]; + let vb_len = vb.len(); + + let new_naming = Naming::new(); + + let iter = subsets.clone().into_iter().take_while(|(i, _)| i < &vb_len); + + for (_, i) in iter { + match i { + Some(i) => { + res.push(vb[i].clone()); + if let Option::Some(n) = naming { + new_naming.push_name(n.names.borrow()[i].clone()) + }; + } + // default is NA + None => { + res.push(T::default()); + // When we subset with NA, there is no name for this entry; + new_naming.push_name(OptionNA::NA); + } + } + } + + Rep::Subset(res.into(), Subsets(vec![]), Option::None) + } + } } + /// Test the mode of the internal vector type /// /// Internally, this is defined by the [crate::object::coercion::AtomicMode] @@ -272,6 +795,7 @@ where { T::is_double() } + /// See [Self::is_double] for more information pub fn is_logical(&self) -> bool where @@ -279,6 +803,7 @@ where { T::is_logical() } + /// See [Self::is_double] for more information pub fn is_integer(&self) -> bool where @@ -286,6 +811,7 @@ where { T::is_integer() } + /// See [Self::is_double] for more information pub fn is_character(&self) -> bool where @@ -320,16 +846,25 @@ where /// pub fn as_mode(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, Mode: Clone, { - Rep(RefCell::new(self.borrow().as_mode())) + match self { + Rep::Subset(v, subsets, naming) => { + let vc = v.clone(); + let vb = vc.borrow(); + + let num_vec: Vec = vb.iter().map(|i| (*i).clone().coerce_into()).collect(); + + Rep::Subset(num_vec.into(), subsets.clone(), naming.clone()) + } + } } /// See [Self::as_mode] for more information pub fn as_logical(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } @@ -337,7 +872,7 @@ where /// See [Self::as_mode] for more information pub fn as_integer(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } @@ -345,7 +880,7 @@ where /// See [Self::as_mode] for more information pub fn as_double(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } @@ -353,102 +888,154 @@ where /// See [Self::as_mode] for more information pub fn as_character(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } -} -impl Default for Rep -where - T: Clone + Default, -{ - fn default() -> Self { - Rep(RefCell::new(RepType::default())) + pub fn get_inner(&self, index: usize) -> Option { + match self { + Rep::Subset(v, subsets, maybe_naming) => { + if maybe_naming.is_some() { + // TODO(NOW) + unimplemented!() + } + let vb = v.borrow(); + let index = subsets.get_index_at(index)?; + vb.get(index).cloned() + } + } } } -impl From> for Rep -where - T: Clone + Default, -{ - fn from(rep: Vec) -> Self { - Rep(RefCell::new(RepType::from(CowObj::from(rep)))) - } +pub struct ExactIterSubsetIndices { + iter: Box>>, + len: usize, } -impl From>> for Rep -where - T: Clone + Default, -{ - fn from(rep: CowObj>) -> Self { - Rep(RefCell::new(rep.into())) +impl ExactSizeIterator for ExactIterSubsetIndices { + fn len(&self) -> usize { + self.len } } -impl From> for Rep -where - T: Clone + Default, -{ - fn from(rep: RepType) -> Self { - Rep(RefCell::new(rep)) +impl Iterator for ExactIterSubsetIndices { + type Item = Option; + fn next(&mut self) -> Option { + self.iter.next() } } -// TODO: I think this should err when rep has length > 1 impl TryInto for Rep> where OptionNA: AtomicMode + Clone + CoercibleInto>, - T: 'static, { type Error = (); fn try_into(self) -> Result { - self.iter_pairs() - .next() - .map(|(_, x)| x) - .map_or( - Err(()), - |i| match CoercibleInto::>::coerce_into(i) { - OptionNA::Some(x) => Ok(x), - OptionNA::NA => Err(()), - }, - ) + self.get_inner(0).map_or( + Err(()), + |i| match CoercibleInto::>::coerce_into(i) { + OptionNA::Some(x) => Ok(x), + OptionNA::NA => Err(()), + }, + ) } } -impl From> for Rep { - fn from(value: Vec<(Character, Obj)>) -> Self { - Rep(RefCell::new(value.into())) +impl From> for Naming { + fn from(value: Vec) -> Self { + let naming = Naming::new(); + for k in value { + naming.push_name(k); + } + naming + } +} + +impl From>> for Rep { + fn from(value: CowObj>) -> Self { + Rep::Subset(value, Subsets::default(), Option::None) + } +} + +impl From, T)>> for Rep { + fn from(value: Vec<(Option, T)>) -> Self { + let mut names = Vec::with_capacity(value.len()); + let mut values = Vec::with_capacity(value.len()); + for (k, v) in value.into_iter() { + names.push(k.map_or(Character::NA, Character::Some)); + values.push(v) + } + let naming = Naming::from(names); + Rep::Subset(values.into(), Subsets::default(), Some(naming)) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } -impl From, T)>> for Rep { - fn from(value: Vec<(Option, T)>) -> Self { - Rep(RefCell::new(value.into())) +impl From<(Vec, Subsets)> for Rep +where + Rep: From>, + T: Clone, +{ + fn from(value: (Vec, Subsets)) -> Self { + match Self::from(value.0) { + Rep::Subset(v, ..) => Rep::Subset(v, value.1, Option::None), + } } } @@ -578,7 +1165,7 @@ impl std::ops::Neg for Rep where L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, LNum: std::ops::Neg, - RepType: From>, + Rep: From>, O: Clone, { type Output = Result, Signal>; @@ -587,7 +1174,7 @@ where .iter_values() .map(|x| -(CoercibleInto::::coerce_into(x))) .collect(); - Ok(Rep(RefCell::new(result.into()))) + Ok(result.into()) } } @@ -716,7 +1303,7 @@ where .iter_values() .map(|x| !(CoercibleInto::::coerce_into(x))) .collect(); - Ok(Rep(RefCell::new(result.into()))) + Ok(result.into()) } } @@ -907,3 +1494,367 @@ where f(c1, c2) }) } +#[cfg(test)] +mod test { + use super::OptionNA::*; + use crate::object::rep::Rep; + use crate::object::{types::*, OptionNA, VecPartialCmp}; + use crate::r; + use crate::utils::SameType; + + #[test] + fn vector_add() { + let x = Rep::::from((1..=5).collect::>()); + let y = Rep::::from(vec![2, 5, 6, 2, 3]); + + let z = (x + y).unwrap(); + assert_eq!(z, Rep::from(vec![3, 7, 9, 6, 8])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_integer()); + } + + #[test] + fn vector_mul() { + let x = Rep::::from((1..=5).collect::>()); + let y = Rep::::from(vec![Some(2), NA, Some(6), NA, Some(3)]); + + let z = (x * y).unwrap(); + assert_eq!(z, Rep::from(vec![Some(2), NA, Some(18), NA, Some(15),])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_integer()); + } + + #[test] + fn vector_common_mul_f32_na() { + // expect that f32's do not get coerced into an OptionNA:: instead + // using std::f32::NAN as NA representation. + + let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); + let y = Rep::::from(vec![100, 10, 1]); + + let z = (x * y).unwrap(); + // assert_eq!(z, Vector::from(vec![0_f32, std::f32::NAN, 1_000_f32])); + // comparing floats is error prone + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_double()); + } + + #[test] + fn vector_and() { + // expect that f32's do not get coerced into an OptionNA:: instead + // using std::f32::NAN as NA representation. + + let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); + let y = Rep::::from(vec![100, 10, 1]); + + let z = (x & y).unwrap(); + assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_logical()); + } + + #[test] + fn vector_gt() { + // expect that f32's do not get coerced into an instead + // using std::f32::NAN as NA representation. + + let x = Rep::from(vec![Some(0_f64), NA, Some(10000_f64)]); + let y = Rep::::from(vec![100, 10, 1]); + + let z = x.vec_gt(y).unwrap(); + assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_logical()); + } + + #[test] + fn test_iter_values() { + // Create values as Vec + let values = vec![1, 2, 3, 4, 5]; + + // Create Rep from values + let rep = Rep::from(values.clone()); + + // Use iter_values to get an iterator and collect values + let collected_values: Vec = rep.iter_values().collect(); + + // Expected values as Vec> + let expected_values: Vec = values.into_iter().map(OptionNA::Some).collect(); + + // Assert collected values match expected values + assert_eq!(collected_values, expected_values); + } + + #[test] + fn test_iter_names() { + // Create values with names + let values_with_names = vec![ + (Character::Some(String::from("a")), 1), + (Character::Some(String::from("b")), 2), + (Character::NA, 3), + (Character::Some(String::from("d")), 4), + (Character::NA, 5), + ]; + + // Create Rep from values with names + let rep = Rep::from(values_with_names.clone()); + + // Use iter_names to get an iterator + let names_iter = rep.iter_names(); + + // Ensure iter_names is Some iterator + assert!(names_iter.is_some()); + + // Collect names + let collected_names: Vec = names_iter.unwrap().collect(); + + // Expected names + let expected_names: Vec = values_with_names + .iter() + .map(|(name_opt, _)| match name_opt { + Some(name) => Character::Some(name.clone()), + Character::NA => Character::NA, + }) + .collect(); + + // Assert collected names match expected names + assert_eq!(collected_names, expected_names); + } + + use crate::object::{Obj, Vector}; + // The tests below don't test the subsetting mechanism, which is instead tested in subsets.rs + #[test] + fn iter_pairs_mixed_names() { + let x = r!(c(a = 1, 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_pairs() + } else { + unreachable!() + }; + + assert_eq!( + x.next().unwrap(), + (Character::Some("a".to_string()), Double::Some(1.0)) + ); + assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); + assert_eq!(x.next(), None); + } + + #[test] + fn iter_pairs_no_names() { + let x = r!(c(1, 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_pairs() + } else { + unreachable!() + }; + + assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(1.0))); + assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); + assert_eq!(x.next(), None); + } + + #[test] + fn iter_values() { + let x = r!(c(1, 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_values() + } else { + unreachable!() + }; + + assert_eq!(x.next().unwrap(), Double::Some(1.0)); + assert_eq!(x.next().unwrap(), Double::Some(2.0)); + assert_eq!(x.next(), None); + } + + #[test] + fn iter_names_none() { + let x = r!(c(1, 2)).unwrap(); + + let x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_names() + } else { + unreachable!() + }; + + assert!(x.is_none()) + } + + #[test] + fn iter_names_some() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_names().unwrap() + } else { + unreachable!() + }; + + assert_eq!(x.next().unwrap(), Character::NA); + assert_eq!(x.next().unwrap(), Character::Some("b".to_string())); + assert_eq!(x.next(), None); + } + + #[test] + fn names_ref_iter_some() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.names_ref().unwrap() + } else { + unreachable!() + }; + + let mut x = x.iter(); + + assert_eq!(x.next().unwrap(), &Character::NA); + assert_eq!(x.next().unwrap(), &Character::Some("b".to_string())); + assert_eq!(x.next(), None); + } + + #[test] + #[should_panic] + fn names_ref_iter_none() { + let x = r!(c(1, 2)).unwrap(); + + if let Obj::Vector(Vector::Double(r)) = x { + r.names_ref().unwrap() + } else { + unreachable!() + }; + } + + #[test] + fn values_ref_iter() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.values_ref() + } else { + unreachable!() + }; + + let mut x = x.iter(); + + assert_eq!(x.next().unwrap(), &Double::Some(1.0)); + assert_eq!(x.next().unwrap(), &Double::Some(2.0)); + assert_eq!(x.next(), None); + } + + #[test] + fn pairs_ref_iter() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.pairs_ref() + } else { + unreachable!() + }; + + let mut x = x.iter(); + + assert_eq!(x.next().unwrap(), (&Character::NA, &Double::Some(1.0))); + assert_eq!( + x.next().unwrap(), + (&Character::Some("b".to_string()), &Double::Some(2.0)) + ); + assert_eq!(x.next(), None); + } + + use crate::error::Error; + use crate::lang::Signal; + + #[test] + fn assign_recycle_incompatible() { + let mut x = Rep::::from(vec![1, 2, 3]); + let y = Rep::::from(vec![99, 99]); + let result = x.assign(y); + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(3, 2)) + ); + } + #[test] + fn assign_recycle_length_one() { + let x = Rep::::from(vec![1, 2, 3]); + let y = Rep::::from(vec![99]); + let mut xview = x.subset(vec![0, 1].into()); + let _ = xview.assign(y).unwrap(); + let result_vec: Vec<_> = x.iter_values().collect(); + assert_eq!(result_vec, vec![Some(99), Some(99), Some(3)]) + } + #[test] + fn non_recyclable_lengths_3_2() { + let x = Rep::::from(vec![1, 2, 3]); + let y = Rep::::from(vec![99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(3, 2)) + ); + } + #[test] + fn non_recyclable_lengths_4_2() { + let x = Rep::::from(vec![1, 2, 3, 4]); + let y = Rep::::from(vec![99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(4, 2)) + ); + } + #[test] + fn non_recyclable_lengths_2_3() { + let x = Rep::::from(vec![1, 2]); + let y = Rep::::from(vec![99, 99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(2, 3)) + ); + } + #[test] + fn non_recyclable_lengths_2_4() { + let x = Rep::::from(vec![1, 2]); + let y = Rep::::from(vec![99, 99, 99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(2, 4)) + ); + } + #[test] + fn non_recyclable_lengths_0_1() { + let x = Rep::::from(Vec::::new()); + let y = Rep::::from(vec![99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(0, 1)) + ); + } + #[test] + fn non_recyclable_lengths_1_0() { + let x = Rep::::from(vec![99]); + let y = Rep::::from(Vec::::new()); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(1, 0)) + ); + } +} diff --git a/src/object/vector/reptype.rs b/src/object/vector/reptype.rs deleted file mode 100644 index 1b9f2f2..0000000 --- a/src/object/vector/reptype.rs +++ /dev/null @@ -1,1291 +0,0 @@ -use std::fmt::Debug; - -use super::coercion::{AtomicMode, CoercibleInto}; -use super::subset::Subset; -use super::subsets::Subsets; -use super::types::*; -use super::OptionNA; -use crate::error::Error; -use crate::lang::Signal; -use crate::object::{CowObj, ViewMut}; -use hashbrown::HashMap; -use std::cell::RefCell; -use std::rc::Rc; - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct Naming { - // TODO: change this to usize and not Vec (after making names unique) - pub map: CowObj>>, - pub names: CowObj>>, -} - -impl Naming { - /// Create an empty `Naming` - pub fn new() -> Self { - Naming::default() - } - - /// Create a naming with the given `capacity`. - pub fn with_capacity(capacity: usize) -> Self { - Self { - map: HashMap::>::with_capacity(capacity).into(), - names: CowObj::from(Vec::::with_capacity(capacity)), - } - } - - /// Push a new name onto the `Naming`. - pub fn push_name(&self, name: OptionNA) { - self.names.with_inner_mut(|v| v.push(name.clone())); - if let OptionNA::Some(name) = name { - let n = self.names.len() - 1; - self.map.with_inner_mut(|map| { - let indices = map.entry(name.clone()).or_default(); - if !indices.contains(&n) { - indices.push(n); - }; - }); - }; - } - - /// Get mutable access to the internal data (map and names vector) via the passed closure. - pub fn with_inner_mut(&self, f: F) -> R - where - F: FnOnce(&mut HashMap>, &mut Vec>) -> R, - { - self.map - .with_inner_mut(|map| self.names.with_inner_mut(|names| f(map, names))) - } -} - -impl From> for RepType { - fn from(value: Vec<(Character, T)>) -> Self { - let mut names = Vec::with_capacity(value.len()); - let mut values = Vec::with_capacity(value.len()); - for (k, v) in value { - names.push(k); - values.push(v); - } - - RepType::Subset( - CowObj::new(Rc::new(RefCell::new(Rc::new(values)))), - Subsets::default(), - Option::Some(Naming::from(names)), - ) - } -} - -impl From>> for Naming { - fn from(value: CowObj>) -> Self { - let mut map: HashMap> = HashMap::new(); - - value.iter().enumerate().for_each(|(i, maybe_name)| { - if let OptionNA::Some(name) = maybe_name { - let indices = map.entry(name.clone()).or_default(); - if !indices.contains(&i) { - indices.push(i); - }; - }; - }); - - Self { map: map.into(), names: value } - } -} - -/// Vector -#[derive(Debug, PartialEq)] -pub enum RepType { - // Vector::Subset encompasses a "raw" vector (no subsetting) - Subset(CowObj>, Subsets, Option), - // Iterator includes things like ranges 1:Inf, and lazily computed values - // Iter(Box>) -} - -impl Clone for RepType { - fn clone(&self) -> Self { - match self { - RepType::Subset(v, s, n) => RepType::Subset(v.clone(), s.clone(), n.clone()), - } - } -} - -impl Default for RepType { - fn default() -> Self { - Self::new() - } -} - -impl RepType { - /// Retrieve the internal data as a mutable view. - /// This is important for lists for things like `l$a[1:2] = c(10, 11)` - pub fn try_get_inner_mut(&self, subset: Subset) -> Result { - let new_subset = self.subset(subset); - match new_subset { - RepType::Subset(..) => { - let mut iter = new_subset.iter_subset_indices(); - - if let Some(i) = iter.next() { - if iter.next().is_some() { - return Error::Other("subset has length > 1".to_string()).into(); - } - - // TODO: subsetting with NA should not be possible. - let i = i.unwrap(); - - Ok(self.with_inner_mut(|values| values[i].view_mut())) - } else { - Error::Other("subset is empty".to_string()).into() - } - } - } - } -} - -pub struct IntoIterableRefNames { - names: Rc>, - na_name: Character, - iter: Box>>, -} - -pub struct RepTypeIterableNames<'a> { - names: &'a [Character], - na_name: &'a Character, - iter: &'a mut Box>>, -} - -impl IntoIterableRefNames { - pub fn iter(&mut self) -> RepTypeIterableNames<'_> { - let names = &self.names[..]; - RepTypeIterableNames { - names, - na_name: &self.na_name, - iter: &mut self.iter, - } - } -} - -impl<'a> Iterator for RepTypeIterableNames<'a> { - type Item = &'a Character; - - fn next(&mut self) -> Option { - if let Some(i) = self.iter.next()? { - Some(&self.names[i]) - } else { - Some(self.na_name) - } - } -} - -pub struct IntoIterableRefValues { - values: Rc>, - na_value: T, - iter: Box>>, -} - -impl IntoIterableRefValues { - pub fn iter(&mut self) -> IterableRefValues<'_, T> { - let values = &self.values[..]; - - IterableRefValues { - values, - na_value: &self.na_value, - iter: &mut self.iter, - } - } -} - -pub struct IntoIterableRefPairs { - values: Rc>, - names: Option>>, - na_value: T, - na_name: Character, - iter: Box>>, -} - -impl IntoIterableRefPairs { - pub fn iter(&mut self) -> IterableRefPairs<'_, T> { - let values = &self.values[..]; - - let names = self.names.as_ref().map(|names| &names[..]); - - IterableRefPairs { - values, - names, - na_value: &self.na_value, - na_name: &self.na_name, - iter: &mut self.iter, - } - } -} - -pub struct IterableRefValues<'a, T: Clone> { - values: &'a [T], - na_value: &'a T, - iter: &'a mut Box>>, -} - -pub struct IterableRefPairs<'a, T: Clone> { - values: &'a [T], - names: Option<&'a [Character]>, - na_value: &'a T, - na_name: &'a Character, - iter: &'a mut Box>>, -} - -impl<'a, T: Clone> Iterator for IterableRefPairs<'a, T> { - type Item = (&'a Character, &'a T); - - fn next(&mut self) -> Option { - if let Some(i) = self.iter.next()? { - if let Some(names) = self.names { - Option::Some((&names[i], &self.values[i])) - } else { - Option::Some((self.na_name, &self.values[i])) - } - } else { - Option::Some((self.na_name, self.na_value)) - } - } -} - -impl<'a, T: Clone> Iterator for IterableRefValues<'a, T> { - type Item = &'a T; - - fn next(&mut self) -> Option { - if let Some(i) = self.iter.next()? { - Some(&self.values[i]) - } else { - Some(self.na_value) - } - } -} - -impl ViewMut for RepType { - fn view_mut(&self) -> Self { - match self { - RepType::Subset(v, s, n) => RepType::Subset(v.view_mut(), s.clone(), n.clone()), - } - } -} - -pub struct IterableValues { - values: Rc>, - iter: Box>>, -} - -impl Iterator for IterableValues { - type Item = T; - fn next(&mut self) -> Option { - // FIXME: Already assumes no indexing with NA - let i = self.iter.next()?.unwrap(); - Some(self.values[i].clone()) - } -} - -pub struct IterablePairs { - values: Rc>, - names: Option>>, - iter: Box>>, -} - -impl Iterator for IterablePairs { - type Item = (Character, T); - fn next(&mut self) -> Option { - // FIXME: Already assumes no indexing with NA - let i = self.iter.next()?.unwrap(); - let value = self.values[i].clone(); - let name = if let Some(names) = &self.names { - names[i].clone() - } else { - Character::NA - }; - Some((name, value)) - } -} - -impl RepType { - /// Create an empty vector - /// - /// The primary use case for this function is to support testing, and there - /// are few expected use cases outside. It is used for creating a vector - /// of an explicit atomic type, likely to be tested with - /// `SameType::is_same_type_as`. - /// - /// ``` - /// use r::utils::*; - /// use r::object::Vector; - /// use r::object::OptionNA; - /// - /// let result = Vector::from(vec![1, 2, 3]); - /// let expect = Vector::from(Vec::>::new()); - /// - /// assert!(result.is_same_type_as(&expect)) - /// ``` - /// - pub fn new() -> Self { - RepType::Subset( - Vec::new().into(), - Subsets(Vec::new()), - Some(Naming::default()), - ) - } - - pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { - match &self { - RepType::Subset(..) => { - let err = Error::Other("subset must have length 1".to_string()); - - let mut iter = self.clone().subset(subset).iter_subset_indices(); - let i1 = iter.next(); - - // check that subset has exactly length 1 - // assumes no indexing with NA (unwrap the option) - let i = if let Some(i) = i1 { - if iter.next().is_some() { - return err.into(); - } - i - } else { - return err.into(); - } - .unwrap(); - - self.with_inner_mut(|v| v[i] = value.clone()); - Ok(value.clone()) - } - } - } - - pub fn values_ref(&self) -> IntoIterableRefValues { - match self.clone() { - RepType::Subset(values, ..) => { - let iter = Box::new(self.iter_subset_indices()); - let values = values.inner_rc(); - - IntoIterableRefValues { values, na_value: T::default(), iter } - } - } - } - - pub fn names_ref(&self) -> Option { - match self.clone() { - RepType::Subset(.., naming) => { - let iter = Box::new(self.iter_subset_indices()); - let naming = naming?; - let names = naming.names.inner_rc(); - - Some(IntoIterableRefNames { names, na_name: Character::default(), iter }) - } - } - } - - pub fn pairs_ref(&self) -> IntoIterableRefPairs { - match self.clone() { - RepType::Subset(values, _, maybe_naming) => { - let iter = Box::new(self.iter_subset_indices()); - let values = values.inner_rc(); - let names = maybe_naming.map(|x| x.names.inner_rc()); - - IntoIterableRefPairs { - values, - names, - na_value: T::default(), - na_name: Character::NA, - iter, - } - } - } - } - - pub fn iter_pairs(&self) -> IterablePairs { - match self.clone() { - RepType::Subset(values, _, maybe_naming) => { - let iter = Box::new(self.iter_subset_indices()); - let values = values.inner_rc(); - let names = maybe_naming.map(|x| x.names.inner_rc()); - - IterablePairs { values, names, iter } - } - } - } - - pub fn iter_values(&self) -> IterableValues { - match self.clone() { - RepType::Subset(values, ..) => { - let iter = Box::new(self.iter_subset_indices()); - IterableValues { values: values.inner_rc(), iter } - } - } - } - - pub fn iter_names(&self) -> Option> { - match self.clone() { - RepType::Subset(.., maybe_naming) => { - let iter = Box::new(self.iter_subset_indices()); - let names = maybe_naming.map(|x| x.names.inner_rc())?; - - Some(IterableValues { values: names, iter }) - } - } - } - - pub fn push_value(&self, value: T) { - self.push_named(Character::NA, value); - } - - pub fn push_named(&self, name: OptionNA, value: T) { - match self { - RepType::Subset(values, Subsets(subsets), maybe_naming) => match subsets.as_slice() { - [] => { - values.with_inner_mut(|values| values.push(value)); - if let Some(naming) = maybe_naming { - naming.push_name(name) - } - } - _ => unimplemented!(), - }, - } - } - - pub fn iter_subset_indices_exact(&self) -> ExactIterSubsetIndices { - // TODO(performance): Avoid the vector allocation - let iter = self.iter_subset_indices(); - let len = iter.count(); - let iter = self.iter_subset_indices(); - ExactIterSubsetIndices { iter, len } - } - - pub fn iter_subset_indices(&self) -> Box>> { - match self.clone() { - RepType::Subset(vals, subsets, maybe_naming) => { - if subsets.is_empty() { - return Box::new((0_usize..vals.len()).map(Some)); - } - - if let Some(naming) = maybe_naming { - Box::new(subsets.bind_names(naming.map).into_iter().map(|(_, y)| y)) - } else { - Box::new(subsets.into_iter().map(|(_, y)| y)) - } - } - } - } - - /// Reindex the mapping from names to indices. - pub fn reindex(&mut self) { - if let RepType::Subset(.., Some(naming)) = self { - naming.map.with_inner_mut(|map| { - map.drain(); - - for (i, maybe_name) in naming.names.borrow().iter().enumerate() { - if let OptionNA::Some(name) = maybe_name { - let indices = map.entry(name.clone()).or_default(); - if !indices.contains(&i) { - indices.push(i) - } - } - } - }) - } - } - - pub fn dedup_last(self) -> Self { - match self { - RepType::Subset(values, subsets, Some(naming)) => { - naming.with_inner_mut(|map, names| { - let mut dups: Vec = map - .iter() - .flat_map(|(_, indices)| { - indices - .split_last() - .map_or(vec![], |(_, leading_dups)| leading_dups.to_vec()) - }) - .collect(); - - dups.sort(); - - values.with_inner_mut(|vs| { - for i in dups.into_iter().rev() { - vs.remove(i); - names.remove(i); - } - }); - - for (_, indices) in map.iter_mut() { - indices.drain(0..(indices.len())); - } - }); - RepType::Subset(values, subsets, Some(naming)) - } - RepType::Subset(.., None) => self, - } - } - - pub fn set_names(&self, names: CowObj>) -> Self { - match self { - RepType::Subset(v, s, _) => { - RepType::Subset(v.clone(), s.clone(), Option::Some(names.into())) - } - } - } - - /// Access a lazy copy of the internal vector data - pub fn inner(&self) -> CowObj> { - match self.materialize() { - RepType::Subset(v, ..) => v.clone(), - } - } - - /// Get mutable access to the internal vector through the passed closure. - pub fn with_inner_mut(&self, f: F) -> R - where - F: FnOnce(&mut Vec) -> R, - { - match self { - RepType::Subset(v, ..) => v.with_inner_mut(f), - } - } - - /// Subsetting a Vector - /// - /// Introduce a new subset into the aggregate list of subset indices. - pub fn subset(&self, subset: Subset) -> Self { - match self { - RepType::Subset(v, Subsets(subsets), n) => { - let mut subsets = subsets.clone(); - subsets.push(subset); - RepType::Subset(v.view_mut(), Subsets(subsets), n.clone()) - } - } - } - - pub fn len(&self) -> usize { - match self { - RepType::Subset(v, Subsets(s), _) => match s.as_slice() { - [] => v.borrow().len(), - _ => unimplemented!(), - }, - } - } - #[must_use] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Get a single element from a vector - /// - /// Access a single element without materializing a new vector - /// - pub fn get(&self, index: usize) -> Option> - where - T: Clone, - { - match self { - RepType::Subset(v, subsets, _) => { - let vb = v.borrow(); - let index = subsets.get_index_at(index)?; - let elem = vb.get(index)?; - Some(RepType::Subset( - vec![elem.clone()].into(), - Subsets::new(), - Option::Some(Naming::new()), - )) - } - } - } - - /// Assignment to Subset Indices - /// - /// Assignment to a vector from another. The aggregate subsetted indices - /// are iterated over while performing the assignment. - /// - pub fn assign(&mut self, value: RepType) -> Result - where - T: Clone + Default + From, - R: Default + Clone, - { - let l_indices = self.iter_subset_indices_exact(); - let mut r_indices = value.iter_subset_indices_exact(); - - // TODO(performance): When we clone the interior data of self (to which we write) - // we don't have to perform recycling checks - // and just start iterating. We can always discard the result afterwards again - // Maybe implement filter_exact on (named)subsets - if r_indices.len() == 1 { - // get the element from reptype value - let index = r_indices - .next() - .expect("index should exist") - .expect("No NA for subsetting"); - let elem = value.get_inner(index).expect("element should exist"); - match (self, value) { - (RepType::Subset(lv, ls, ln), RepType::Subset(..)) => { - lv.with_inner_mut(|lvb| { - for li in l_indices { - lvb[li.unwrap()] = elem.clone().into(); - } - }); - return Ok(RepType::Subset(lv.clone(), ls.clone(), ln.clone())); - } - } - } - - if l_indices.len() != r_indices.len() { - return Err(Signal::Error(Error::NonRecyclableLengths( - l_indices.len(), - r_indices.len(), - ))); - } - - match (self, value) { - (RepType::Subset(lv, ls, ln), RepType::Subset(rv, ..)) => { - lv.with_inner_mut(|lvb| { - let rvc = rv.clone(); - let rvb = rvc.borrow(); - - for (li, ri) in l_indices.zip(r_indices) { - match (li, ri) { - (Some(li), None) => lvb[li] = T::default(), - (Some(li), Some(ri)) => lvb[li] = rvb[ri % rvb.len()].clone().into(), - _ => (), - } - } - }); - - Ok(RepType::Subset(lv.clone(), ls.clone(), ln.clone())) - } - } - } - - /// Materialize a Vector - /// - /// Apply subsets and clone values into a new vector. - pub fn materialize(&self) -> Self - where - T: Clone, - { - match self { - RepType::Subset(v, subsets, naming) => { - // early exit when there is nothing to do - match subsets { - Subsets(s) => { - if s.as_slice().is_empty() { - return self.clone(); - } - } - } - - let vc = v.clone(); - let vb = vc.borrow(); - let mut res: Vec = vec![]; - let vb_len = vb.len(); - - let new_naming = Naming::new(); - - let iter = subsets.clone().into_iter().take_while(|(i, _)| i < &vb_len); - - for (_, i) in iter { - match i { - Some(i) => { - res.push(vb[i].clone()); - if let Option::Some(n) = naming { - new_naming.push_name(n.names.borrow()[i].clone()) - }; - } - // default is NA - None => { - res.push(T::default()); - // When we subset with NA, there is no name for this entry; - new_naming.push_name(OptionNA::NA); - } - } - } - - RepType::Subset(res.into(), Subsets(vec![]), Option::None) - } - } - } - - pub fn is_double(&self) -> bool - where - T: AtomicMode, - { - T::is_double() - } - - pub fn is_logical(&self) -> bool - where - T: AtomicMode, - { - T::is_logical() - } - - pub fn is_integer(&self) -> bool - where - T: AtomicMode, - { - T::is_integer() - } - - pub fn is_character(&self) -> bool - where - T: AtomicMode, - { - T::is_character() - } - - pub fn as_mode(&self) -> RepType - where - T: CoercibleInto, - Mode: Clone, - { - match self { - RepType::Subset(v, subsets, naming) => { - let vc = v.clone(); - let vb = vc.borrow(); - - let num_vec: Vec = vb.iter().map(|i| (*i).clone().coerce_into()).collect(); - - RepType::Subset(num_vec.into(), subsets.clone(), naming.clone()) - } - } - } - - pub fn as_logical(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn as_integer(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn as_double(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn as_character(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn get_inner(&self, index: usize) -> Option { - match self { - RepType::Subset(v, subsets, maybe_naming) => { - if maybe_naming.is_some() { - // TODO(NOW) - unimplemented!() - } - let vb = v.borrow(); - let index = subsets.get_index_at(index)?; - vb.get(index).cloned() - } - } - } -} - -pub struct ExactIterSubsetIndices { - iter: Box>>, - len: usize, -} - -impl ExactSizeIterator for ExactIterSubsetIndices { - fn len(&self) -> usize { - self.len - } -} - -impl Iterator for ExactIterSubsetIndices { - type Item = Option; - fn next(&mut self) -> Option { - self.iter.next() - } -} - -impl TryInto for RepType> -where - OptionNA: AtomicMode + Clone + CoercibleInto>, -{ - type Error = (); - fn try_into(self) -> Result { - self.get_inner(0).map_or( - Err(()), - |i| match CoercibleInto::>::coerce_into(i) { - OptionNA::Some(x) => Ok(x), - OptionNA::NA => Err(()), - }, - ) - } -} - -impl From> for Naming { - fn from(value: Vec) -> Self { - let naming = Naming::new(); - for k in value { - naming.push_name(k); - } - naming - } -} - -impl From>> for RepType { - fn from(value: CowObj>) -> Self { - RepType::Subset(value, Subsets::default(), Option::None) - } -} - -impl From, T)>> for RepType { - fn from(value: Vec<(Option, T)>) -> Self { - let mut names = Vec::with_capacity(value.len()); - let mut values = Vec::with_capacity(value.len()); - for (k, v) in value.into_iter() { - names.push(k.map_or(Character::NA, Character::Some)); - values.push(v) - } - let naming = Naming::from(names); - RepType::Subset(values.into(), Subsets::default(), Some(naming)) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From<(Vec, Subsets)> for RepType -where - RepType: From>, - T: Clone, -{ - fn from(value: (Vec, Subsets)) -> Self { - match Self::from(value.0) { - RepType::Subset(v, ..) => RepType::Subset(v, value.1, Option::None), - } - } -} - -#[cfg(test)] -mod test { - use super::OptionNA::*; - use crate::object::rep::Rep; - use crate::object::reptype::RepType; - use crate::object::{types::*, OptionNA, VecPartialCmp}; - use crate::r; - use crate::utils::SameType; - - #[test] - fn vector_add() { - let x = Rep::::from((1..=5).collect::>()); - let y = Rep::::from(vec![2, 5, 6, 2, 3]); - - let z = (x + y).unwrap(); - assert_eq!(z, Rep::from(vec![3, 7, 9, 6, 8])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_integer()); - } - - #[test] - fn vector_mul() { - let x = Rep::::from((1..=5).collect::>()); - let y = Rep::::from(vec![Some(2), NA, Some(6), NA, Some(3)]); - - let z = (x * y).unwrap(); - assert_eq!(z, Rep::from(vec![Some(2), NA, Some(18), NA, Some(15),])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_integer()); - } - - #[test] - fn vector_common_mul_f32_na() { - // expect that f32's do not get coerced into an OptionNA:: instead - // using std::f32::NAN as NA representation. - - let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); - let y = Rep::::from(vec![100, 10, 1]); - - let z = (x * y).unwrap(); - // assert_eq!(z, Vector::from(vec![0_f32, std::f32::NAN, 1_000_f32])); - // comparing floats is error prone - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_double()); - } - - #[test] - fn vector_and() { - // expect that f32's do not get coerced into an OptionNA:: instead - // using std::f32::NAN as NA representation. - - let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); - let y = Rep::::from(vec![100, 10, 1]); - - let z = (x & y).unwrap(); - assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_logical()); - } - - #[test] - fn vector_gt() { - // expect that f32's do not get coerced into an instead - // using std::f32::NAN as NA representation. - - let x = Rep::from(vec![Some(0_f64), NA, Some(10000_f64)]); - let y = Rep::::from(vec![100, 10, 1]); - - let z = x.vec_gt(y).unwrap(); - assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_logical()); - } - - #[test] - fn test_iter_values() { - // Create values as Vec - let values = vec![1, 2, 3, 4, 5]; - - // Create RepType from values - let rep = RepType::from(values.clone()); - - // Use iter_values to get an iterator and collect values - let collected_values: Vec = rep.iter_values().collect(); - - // Expected values as Vec> - let expected_values: Vec = values.into_iter().map(OptionNA::Some).collect(); - - // Assert collected values match expected values - assert_eq!(collected_values, expected_values); - } - - #[test] - fn test_iter_names() { - // Create values with names - let values_with_names = vec![ - (Character::Some(String::from("a")), 1), - (Character::Some(String::from("b")), 2), - (Character::NA, 3), - (Character::Some(String::from("d")), 4), - (Character::NA, 5), - ]; - - // Create RepType from values with names - let rep = RepType::from(values_with_names.clone()); - - // Use iter_names to get an iterator - let names_iter = rep.iter_names(); - - // Ensure iter_names is Some iterator - assert!(names_iter.is_some()); - - // Collect names - let collected_names: Vec = names_iter.unwrap().collect(); - - // Expected names - let expected_names: Vec = values_with_names - .iter() - .map(|(name_opt, _)| match name_opt { - Some(name) => Character::Some(name.clone()), - Character::NA => Character::NA, - }) - .collect(); - - // Assert collected names match expected names - assert_eq!(collected_names, expected_names); - } - - use crate::object::{Obj, Vector}; - // The tests below don't test the subsetting mechanism, which is instead tested in subsets.rs - #[test] - fn iter_pairs_mixed_names() { - let x = r!(c(a = 1, 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_pairs() - } else { - unreachable!() - }; - - assert_eq!( - x.next().unwrap(), - (Character::Some("a".to_string()), Double::Some(1.0)) - ); - assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); - assert_eq!(x.next(), None); - } - - #[test] - fn iter_pairs_no_names() { - let x = r!(c(1, 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_pairs() - } else { - unreachable!() - }; - - assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(1.0))); - assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); - assert_eq!(x.next(), None); - } - - #[test] - fn iter_values() { - let x = r!(c(1, 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_values() - } else { - unreachable!() - }; - - assert_eq!(x.next().unwrap(), Double::Some(1.0)); - assert_eq!(x.next().unwrap(), Double::Some(2.0)); - assert_eq!(x.next(), None); - } - - #[test] - fn iter_names_none() { - let x = r!(c(1, 2)).unwrap(); - - let x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_names() - } else { - unreachable!() - }; - - assert!(x.is_none()) - } - - #[test] - fn iter_names_some() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_names().unwrap() - } else { - unreachable!() - }; - - assert_eq!(x.next().unwrap(), Character::NA); - assert_eq!(x.next().unwrap(), Character::Some("b".to_string())); - assert_eq!(x.next(), None); - } - - #[test] - fn names_ref_iter_some() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().names_ref().unwrap() - } else { - unreachable!() - }; - - let mut x = x.iter(); - - assert_eq!(x.next().unwrap(), &Character::NA); - assert_eq!(x.next().unwrap(), &Character::Some("b".to_string())); - assert_eq!(x.next(), None); - } - - #[test] - #[should_panic] - fn names_ref_iter_none() { - let x = r!(c(1, 2)).unwrap(); - - if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().names_ref().unwrap() - } else { - unreachable!() - }; - } - - #[test] - fn values_ref_iter() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().values_ref() - } else { - unreachable!() - }; - - let mut x = x.iter(); - - assert_eq!(x.next().unwrap(), &Double::Some(1.0)); - assert_eq!(x.next().unwrap(), &Double::Some(2.0)); - assert_eq!(x.next(), None); - } - - #[test] - fn pairs_ref_iter() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().pairs_ref() - } else { - unreachable!() - }; - - let mut x = x.iter(); - - assert_eq!(x.next().unwrap(), (&Character::NA, &Double::Some(1.0))); - assert_eq!( - x.next().unwrap(), - (&Character::Some("b".to_string()), &Double::Some(2.0)) - ); - assert_eq!(x.next(), None); - } - - use crate::error::Error; - use crate::lang::Signal; - - #[test] - fn assign_recycle_incompatible() { - let mut x = Rep::::from(vec![1, 2, 3]); - let y = Rep::::from(vec![99, 99]); - let result = x.assign(y); - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(3, 2)) - ); - } - #[test] - fn assign_recycle_length_one() { - let x = Rep::::from(vec![1, 2, 3]); - let y = Rep::::from(vec![99]); - let mut xview = x.subset(vec![0, 1].into()); - let _ = xview.assign(y).unwrap(); - let result_vec: Vec<_> = x.iter_values().collect(); - assert_eq!(result_vec, vec![Some(99), Some(99), Some(3)]) - } - #[test] - fn non_recyclable_lengths_3_2() { - let x = Rep::::from(vec![1, 2, 3]); - let y = Rep::::from(vec![99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(3, 2)) - ); - } - #[test] - fn non_recyclable_lengths_4_2() { - let x = Rep::::from(vec![1, 2, 3, 4]); - let y = Rep::::from(vec![99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(4, 2)) - ); - } - #[test] - fn non_recyclable_lengths_2_3() { - let x = Rep::::from(vec![1, 2]); - let y = Rep::::from(vec![99, 99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(2, 3)) - ); - } - #[test] - fn non_recyclable_lengths_2_4() { - let x = Rep::::from(vec![1, 2]); - let y = Rep::::from(vec![99, 99, 99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(2, 4)) - ); - } - #[test] - fn non_recyclable_lengths_0_1() { - let x = Rep::::from(Vec::::new()); - let y = Rep::::from(vec![99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(0, 1)) - ); - } - #[test] - fn non_recyclable_lengths_1_0() { - let x = Rep::::from(vec![99]); - let y = Rep::::from(Vec::::new()); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(1, 0)) - ); - } -} diff --git a/src/object/vector/subset.rs b/src/object/vector/subset.rs index 243ba73..2f4ba84 100644 --- a/src/object/vector/subset.rs +++ b/src/object/vector/subset.rs @@ -163,18 +163,13 @@ impl Subset { } } Subset::Mask(mask) => { - Box::new( - (**mask.borrow()) - .clone() - .into_iter() - .cycle() - .zip(iter) - .filter_map(|(mask, i @ (i_orig, _))| match mask { - OptionNA::Some(true) => Some(i), // accept index - OptionNA::NA => Some((i_orig, None)), // accept, but NA - _ => None, // filter falses - }), - ) + Box::new((**mask.borrow()).clone().into_iter().zip(iter).filter_map( + |(mask, i @ (i_orig, _))| match mask { + OptionNA::Some(true) => Some(i), // accept index + OptionNA::NA => Some((i_orig, None)), // accept, but NA + _ => None, // filter falses + }, + )) } Subset::Range(range) => Box::new( iter.skip(range.start)