Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Grounding space implementation with a more compact representation in memory #829

Draft
wants to merge 26 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
be37630
Add atom storage
vsbogd Jun 18, 2024
30960ef
Add atom token iterator
vsbogd Jun 20, 2024
091e0b3
Add atom index implementation
vsbogd Jun 20, 2024
872162a
Add AtomStorage::get_id() method to get id by atom
vsbogd Jun 20, 2024
2aecc9a
Stop modifying AtomIndex on query
vsbogd Jun 20, 2024
18d34f5
Add iterator over atoms in index
vsbogd Jun 20, 2024
740dea5
Rename AtomStorage::get to AtomStorage::get_atom
vsbogd Jun 20, 2024
4fa5e0e
Remove unused code
vsbogd Jun 20, 2024
3cef2f7
Rename variables while processing query
vsbogd Sep 16, 2024
27d0286
Optimize search for a case of the atom which is not in index storage
vsbogd Sep 19, 2024
ff18168
Rename HashAtom to HashableAtom
vsbogd Dec 24, 2024
a9c3a21
Implement GroundingSpace using AtomIndex
vsbogd Jun 21, 2024
9330ff3
Remove cloning on insert into AtomIndex, refactor code
vsbogd Dec 26, 2024
2f292a2
Add TODO about custom key entry matching issue
vsbogd Dec 26, 2024
5f71afa
Split IndexKey on InsertKey and QueryKey because
vsbogd Dec 26, 2024
8da9276
Minor AtomIndex::skip_atom() change
vsbogd Dec 26, 2024
aab9f5c
Simplify exact key matching code
vsbogd Dec 26, 2024
3845a91
Borrow values from AtomIndex when it is possible while iterating
vsbogd Dec 26, 2024
cd10a30
Improve code readability
vsbogd Dec 26, 2024
4a6df69
Implement AtomTrieNode iterator without collecting items
vsbogd Dec 26, 2024
303372f
Implement Display for AtomStorage and AtomTrieNode
vsbogd Dec 27, 2024
a5016a4
Eliminate expression buffer allocation on recursion
vsbogd Dec 27, 2024
75fd5e7
Allow using CustomMatch implementors in queries to the AtomIndex
vsbogd Dec 28, 2024
9e883c3
Add AtomIndex::remove method
vsbogd Dec 28, 2024
cd2f00b
Move AtomIndex implementation into grounding::index module
vsbogd Dec 28, 2024
f8a8f93
Fix nightly compiler warnings
vsbogd Dec 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ rand = "0.8.5"
bitset = "0.1.2"
dyn-fmt = "0.4.0"
itertools = "0.13.0"
bimap = "0.6.3"

# pkg_mgmt deps
xxhash-rust = {version="0.8.7", features=["xxh3"], optional=true }
Expand Down
16 changes: 15 additions & 1 deletion lib/src/atom/serial.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,11 @@ pub trait ConvertingSerializer<T>: Serializer + Default {
/// Serialization result type
pub type Result = std::result::Result<(), Error>;

trait PrivHasher : Hasher {}
impl PrivHasher for DefaultHasher {}

// there are much speedier hashers, but not sure if it's worth the extra dependency given the other options
impl Serializer for DefaultHasher {
impl<H: PrivHasher> Serializer for H {
fn serialize_bool(&mut self, v: bool) -> Result { Ok(self.write_u8(v as u8)) }
fn serialize_i64(&mut self, v: i64) -> Result { Ok(self.write_i64(v)) }
fn serialize_f64(&mut self, v: f64) -> Result { Ok(self.write_u64(v as u64)) }
Expand All @@ -95,3 +98,14 @@ impl Serializer for Vec<u8> {
fn serialize_f64(&mut self, v: f64) -> Result { Ok(self.extend(v.to_le_bytes())) }
fn serialize_str(&mut self, v: &str) -> Result { Ok(self.extend(v.bytes())) }
}

#[derive(Default)]
pub struct NullSerializer();

impl Serializer for NullSerializer {
fn serialize_bool(&mut self, _v: bool) -> Result { Ok(()) }
fn serialize_i64(&mut self, _v: i64) -> Result { Ok(()) }
fn serialize_f64(&mut self, _v: f64) -> Result { Ok(()) }
fn serialize_str(&mut self, _v: &str) -> Result { Ok(()) }
}

16 changes: 16 additions & 0 deletions lib/src/common/collections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,22 @@ impl<'a, T: 'a + Display> Display for VecDisplay<'a, T> {
}
}

/// Helper function to implement Display for all mapping like code structures.
/// Displays iterator over pairs in a format { <key>: <value>, ... }
pub fn write_mapping<A, B, I>(f: &mut std::fmt::Formatter, it: I) -> std::fmt::Result
where
A: Display,
B: Display,
I: Iterator<Item=(A, B)>
{
write!(f, "{{").and_then(|()| {
it.fold((Ok(()), true), |(res, start), (a, b)| {
let comma = if start { "" } else { "," };
(res.and_then(|()| write!(f, "{} {}: {}", comma, a, b)), false)
}).0
}).and_then(|()| write!(f, " }}"))
}


#[cfg(test)]
mod test {
Expand Down
4 changes: 2 additions & 2 deletions lib/src/common/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl<'a, T: 'a> Deref for RefHolder<'a, T> {
}

impl<T> LockBorrow<T> for &T {
fn borrow<'a>(&'a self) -> Box<dyn Deref<Target=T> + '_> {
fn borrow(&self) -> Box<dyn Deref<Target=T> + '_> {
Box::new(RefHolder(self))
}
}
Expand All @@ -64,7 +64,7 @@ impl<'a, T: 'a> Deref for RefHolderMut<'a, T> {
}

impl<T> LockBorrow<T> for &mut T {
fn borrow<'a>(&'a self) -> Box<dyn Deref<Target=T> + '_> {
fn borrow(&self) -> Box<dyn Deref<Target=T> + '_> {
Box::new(RefHolderMut(self))
}
}
Expand Down
4 changes: 2 additions & 2 deletions lib/src/metta/runner/environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ impl Environment {

/// Returns the [ModuleCatalog]s from the Environment, in search priority order
#[cfg(feature = "pkg_mgmt")]
pub fn catalogs<'a>(&'a self) -> impl Iterator<Item=&dyn ModuleCatalog> + 'a {
pub fn catalogs(&self) -> impl Iterator<Item=&'_ dyn ModuleCatalog> + '_ {
self.catalogs.iter().map(|catalog| &**catalog as &dyn ModuleCatalog)
}

/// Returns the [FsModuleFormat]s from the Environment, in priority order
#[cfg(feature = "pkg_mgmt")]
pub fn fs_mod_formats<'a>(&'a self) -> impl Iterator<Item=&dyn FsModuleFormat> + 'a {
pub fn fs_mod_formats(&self) -> impl Iterator<Item=&'_ dyn FsModuleFormat> + '_ {
self.fs_mod_formats.iter().map(|fmt| &**fmt as &dyn FsModuleFormat)
}

Expand Down
47 changes: 45 additions & 2 deletions lib/src/metta/runner/stdlib/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ use crate::space::*;
use crate::metta::*;
use crate::metta::text::Tokenizer;
use crate::metta::types::{get_atom_types, get_meta_type};
use crate::common::multitrie::MultiTrie;
use crate::space::grounding::atom_to_trie_key;
use crate::common::multitrie::{MultiTrie, TrieKey, TrieToken};
use crate::common::collections::ImmutableString;
use super::{grounded_op, regex};
use crate::metta::runner::number::*;

use std::convert::TryInto;
use std::hash::{DefaultHasher, Hasher};

#[derive(Clone, Debug)]
pub struct UniqueAtomOp {}
Expand Down Expand Up @@ -83,6 +84,35 @@ impl Grounded for IntersectionAtomOp {
}
}

fn atom_to_trie_key(atom: &Atom) -> TrieKey<SymbolAtom> {
fn fill_key(atom: &Atom, tokens: &mut Vec<TrieToken<SymbolAtom>>) {
match atom {
Atom::Symbol(sym) => tokens.push(TrieToken::Exact(sym.clone())),
Atom::Expression(expr) => {
tokens.push(TrieToken::LeftPar);
expr.children().iter().for_each(|child| fill_key(child, tokens));
tokens.push(TrieToken::RightPar);
},
Atom::Grounded(g) if g.as_grounded().as_match().is_none() => {
// TODO: Adding Hash on grounded atoms matched by equality is
// required in order to make TrieToken::Exact be generated for
// them.
let mut h = DefaultHasher::new();
match (*g).serialize(&mut h) {
Ok(()) => { tokens.push(TrieToken::Exact(SymbolAtom::new(ImmutableString::Allocated(h.finish().to_string())))) }
Err(_) => { tokens.push(TrieToken::Wildcard) }
}
}
_ => tokens.push(TrieToken::Wildcard),
}
}

let mut tokens = Vec::new();
fill_key(atom, &mut tokens);
TrieKey::from(tokens)
}


impl CustomExecute for IntersectionAtomOp {
fn execute(&self, args: &[Atom]) -> Result<Vec<Atom>, ExecError> {
let arg_error = || ExecError::from("intersection expects and executable LHS and RHS atom");
Expand Down Expand Up @@ -607,6 +637,19 @@ mod tests {
("A" ("B" "C")) "p" "p" ("Q" "a"))]);
}

#[test]
fn index_atom_to_key() {
assert_eq!(atom_to_trie_key(&Atom::sym("A")), TrieKey::from([TrieToken::Exact(SymbolAtom::new("A".into()))]));
assert_eq!(atom_to_trie_key(&Atom::value(1)), TrieKey::from([TrieToken::Wildcard]));
assert_eq!(atom_to_trie_key(&Atom::var("a")), TrieKey::from([TrieToken::Wildcard]));
assert_eq!(atom_to_trie_key(&expr!("A" "B")), TrieKey::from([
TrieToken::LeftPar,
TrieToken::Exact(SymbolAtom::new("A".into())),
TrieToken::Exact(SymbolAtom::new("B".into())),
TrieToken::RightPar
]));
}

#[test]
fn intersection_op() {
let intersection_op = IntersectionAtomOp{};
Expand Down
Loading
Loading