From 1846f42b78d3e3caaaf518c56b50030370d3006c Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 31 Oct 2024 11:22:43 +0100 Subject: [PATCH] feat: implement some helper functions required for common sub-expression elimination. --- lib/src/compiler/ir/mod.rs | 611 ++++++++++++++++-- lib/src/compiler/ir/tests/mod.rs | 42 +- .../compiler/ir/tests/testdata/1.folding.ir | 98 +-- .../ir/tests/testdata/1.no-folding.ir | 126 ++-- .../compiler/ir/tests/testdata/2.folding.ir | 86 +-- .../ir/tests/testdata/2.no-folding.ir | 86 +-- .../compiler/ir/tests/testdata/3.folding.ir | 34 +- .../ir/tests/testdata/3.no-folding.ir | 34 +- 8 files changed, 822 insertions(+), 295 deletions(-) diff --git a/lib/src/compiler/ir/mod.rs b/lib/src/compiler/ir/mod.rs index fe0a1e56..d4894e30 100644 --- a/lib/src/compiler/ir/mod.rs +++ b/lib/src/compiler/ir/mod.rs @@ -37,7 +37,8 @@ use std::ops::RangeInclusive; use bitmask::bitmask; use bstr::BString; -use rustc_hash::FxHasher; +use itertools::Itertools; +use rustc_hash::{FxHashMap, FxHasher}; use serde::{Deserialize, Serialize}; use yara_x_parser::ast::Ident; @@ -307,9 +308,25 @@ impl From for PatternIdx { } /// Identifies an expression in the IR tree. -#[derive(Debug, Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] pub(crate) struct ExprId(u32); +impl ExprId { + pub const fn none() -> Self { + ExprId(u32::MAX) + } +} + +impl Debug for ExprId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if self.0 == u32::MAX { + write!(f, "None") + } else { + write!(f, "{}", self.0) + } + } +} + impl From for ExprId { #[inline] fn from(value: usize) -> Self { @@ -344,14 +361,26 @@ impl Index for ExprHashes { /// which is an index in the vector. pub(crate) struct IR { constant_folding: bool, + /// The [`ExprId`] corresponding to the root node. root: Option, + /// Vector that contains all the nodes in the IR. An [`ExprId`] is an index + /// within this vector. nodes: Vec, + /// Vector that indicates the parent of a node. An [`ExprId`] is an index + /// within this vector. `parents[expr_id]` returns the node of the expression + /// identified by `expr_id`. + parents: Vec, } impl IR { /// Creates a new [`IR`]. pub fn new() -> Self { - Self { nodes: Vec::new(), root: None, constant_folding: false } + Self { + nodes: Vec::new(), + parents: Vec::new(), + root: None, + constant_folding: false, + } } /// Enable constant folding. @@ -362,7 +391,8 @@ impl IR { /// Clears the tree, removing all nodes. pub fn clear(&mut self) { - self.nodes.clear() + self.nodes.clear(); + self.parents.clear(); } /// Returns a reference to the [`Expr`] at the given index in the tree. @@ -413,13 +443,68 @@ impl IR { None } + /// Returns an iterator that yields the ancestors of the given expression. + /// + /// The first item yielded by the iterator is the [`ExprId`] corresponding + /// to the parent of `expr`, and then keeps going up the ancestors chain + /// until it reaches the root expression. + pub fn ancestors(&self, expr: ExprId) -> Ancestors<'_> { + Ancestors { ir: self, current: expr } + } + + /// Finds the common ancestor of a given set of expressions in the IR tree. + /// + /// This function traverses the ancestor chain of each expression to identify + /// where they converge. In the worst-case scenario, the common ancestor will + /// be the root expression. + pub fn common_ancestor(&self, exprs: &[ExprId]) -> ExprId { + if exprs.is_empty() { + return ExprId::none(); + } + + // Vector where each item is an ancestors iterator for one of the + // expressions passed to this function. + let mut ancestor_iterators: Vec = + exprs.iter().map(|expr| self.ancestors(*expr)).collect(); + + let mut exprs = exprs.to_vec(); + + // In each iteration of this loop, we move one step up the ancestor + // chain for each expression, except for the expression with the highest + // ExprId. This process continues until all ancestor chains converge at + // the same ExprId. + // + // This algorithm leverages the property that each node in the IR tree + // has a higher ExprId than any of its descendants. This means that if + // node A has a lower ExprId than node B, B cannot be a descendant of + // A. We can therefore traverse up A’s ancestor chain until finding B + // or some other node with an ExprId higher than B's. + while !exprs.iter().all_equal() { + let max = exprs.iter().cloned().max().unwrap(); + let expr_with_ancestors = + exprs.iter_mut().zip(&mut ancestor_iterators); + // Advance the ancestor iterators by one, except the iterator + // corresponding to the expression with the highest ExprId. + for (expr, ancestors) in expr_with_ancestors { + if *expr != max { + *expr = ancestors.next().unwrap(); + } + } + } + + // At this point all expressions have converged to the same ExprId, we + // can return any of them. + exprs[0] + } + /// Computes the hash corresponding to each expression in the IR. /// - /// Returns a [`ExprHashes`] type containing the hashes. This object - /// can be indexed by a [`ExprId`] for obtaining the hash corresponding - /// to a given expression. - pub fn compute_expr_hashes(&self, start: ExprId) -> ExprHashes { - let mut hashes = vec![0; self.nodes.len()]; + /// For each expression in the IR the `f` is invoked with the [`ExprId`] + /// and the hash corresponding to the expression. + pub fn compute_expr_hashes(&self, start: ExprId, mut f: F) + where + F: FnMut(ExprId, u64), + { let mut hashers = Vec::new(); // Function that decides which expressions should be ignored. Some @@ -446,33 +531,87 @@ impl IR { Event::Leave((expr_id, expr)) => { if !ignore(expr) { let hasher = hashers.pop().unwrap(); - hashes[expr_id.0 as usize] = hasher.finish(); + f(expr_id, hasher.finish()); } } } } + } + + pub fn find_duplicates(&self) { + // Vector with expression hashes. `hashes[ExprId]` is the hash for + // the expression identified by `ExprId`. + let mut hashes = vec![0; self.nodes.len()]; + + // A map where keys are expression hashes and values vectors with + // the ExprId of every expression with the hash indicated in the + // key. + let mut map: FxHashMap> = FxHashMap::default(); + + self.compute_expr_hashes(self.root.unwrap(), |expr_id, hash| { + hashes[expr_id.0 as usize] = hash; + map.entry(hash).or_default().push(expr_id); + }); + + let mut dfs = self.dfs_iter(self.root.unwrap()); - ExprHashes(hashes) + while let Some(evt) = dfs.next() { + match evt { + Event::Enter((expr_id, expr)) => { + // Get hash for the current expression. + let hash = hashes[expr_id.0 as usize]; + // Get vector with all the expressions that have the same + // hash as the current expression, including the current + // expression itself. + let exprs = map.get(&hash).unwrap(); + // `exprs` can not be empty, as it must have at least the + // current expression. + let first = exprs.first().unwrap(); + + // When the current expression is equal to some other + // expression, we don't want to traverse its children, as + // the children are going to be equal to the other + // expression's children. + if exprs.len() > 1 { + dfs.prune(); + } + + if exprs.len() > 1 && *first == expr_id { + todo!() + } + } + Event::Leave(_) => {} + } + } } } impl IR { /// Creates a new [`Expr::FileSize`]. pub fn filesize(&mut self) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents.push(ExprId::none()); self.nodes.push(Expr::Filesize); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Const`]. pub fn constant(&mut self, type_value: TypeValue) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents.push(ExprId::none()); self.nodes.push(Expr::Const(type_value)); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Ident`]. pub fn ident(&mut self, symbol: Symbol) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents.push(ExprId::none()); self.nodes.push(Expr::Ident { symbol: Box::new(symbol) }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Lookup`]. @@ -482,12 +621,17 @@ impl IR { primary: ExprId, index: ExprId, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[primary.0 as usize] = expr_id; + self.parents[index.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Lookup(Box::new(Lookup { type_value, primary, index, }))); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Not`]. @@ -497,8 +641,12 @@ impl IR { return self.constant(TypeValue::const_bool_from(!v)); } } + let expr_id = ExprId::from(self.nodes.len()); + self.parents[operand.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Not { operand }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::And`]. @@ -528,8 +676,14 @@ impl IR { } } + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::And { operands }); - Ok(ExprId::from(self.nodes.len() - 1)) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + Ok(expr_id) } /// Creates a new [`Expr::Or`]. @@ -559,8 +713,14 @@ impl IR { } } + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::Or { operands }); - Ok(ExprId::from(self.nodes.len() - 1)) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + Ok(expr_id) } /// Creates a new [`Expr::Minus`]. @@ -576,53 +736,91 @@ impl IR { _ => {} } } + + let expr_id = ExprId::from(self.nodes.len()); + self.parents[operand.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Minus { operand, is_float: matches!(self.get(operand).ty(), Type::Float), }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Defined`]. pub fn defined(&mut self, operand: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[operand.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Defined { operand }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::BitwiseNot`]. pub fn bitwise_not(&mut self, operand: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[operand.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::BitwiseNot { operand }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::BitwiseAnd`]. pub fn bitwise_and(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::BitwiseAnd { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::BitwiseOr`]. pub fn bitwise_or(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::BitwiseOr { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::BitwiseXor`]. pub fn bitwise_xor(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::BitwiseXor { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Shl`]. pub fn shl(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Shl { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Shr`]. pub fn shr(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Shr { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Add`]. @@ -641,8 +839,14 @@ impl IR { } } + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::Add { operands, is_float }); - Ok(ExprId::from(self.nodes.len() - 1)) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + Ok(expr_id) } /// Creates a new [`Expr::Sub`]. @@ -661,8 +865,14 @@ impl IR { } } + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::Sub { operands, is_float }); - Ok(ExprId::from(self.nodes.len() - 1)) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + Ok(expr_id) } /// Creates a new [`Expr::Mul`]. @@ -681,8 +891,14 @@ impl IR { } } + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::Mul { operands, is_float }); - Ok(ExprId::from(self.nodes.len() - 1)) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + Ok(expr_id) } /// Creates a new [`Expr::Div`]. @@ -690,108 +906,195 @@ impl IR { let is_float = operands .iter() .any(|op| matches!(self.get(*op).ty(), Type::Float)); + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::Div { operands, is_float }); - Ok(ExprId::from(self.nodes.len() - 1)) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + Ok(expr_id) } /// Creates a new [`Expr::Mod`]. pub fn modulus(&mut self, operands: Vec) -> Result { + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::Mod { operands }); - Ok(ExprId::from(self.nodes.len() - 1)) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + Ok(expr_id) } /// Creates a new [`Expr::FieldAccess`]. pub fn field_access(&mut self, operands: Vec) -> ExprId { let type_value = self.get(*operands.last().unwrap()).type_value(); + let expr_id = ExprId::from(self.nodes.len()); + for operand in operands.iter() { + self.parents[operand.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::FieldAccess(Box::new(FieldAccess { operands, type_value, }))); - ExprId::from(self.nodes.len() - 1) + expr_id } /// Creates a new [`Expr::Eq`]. pub fn eq(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Eq { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Ne`]. pub fn ne(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Ne { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Ge`]. pub fn ge(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Ge { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Gt`]. pub fn gt(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Gt { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Le`]. pub fn le(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Le { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Lt`]. pub fn lt(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Lt { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Contains`]. pub fn contains(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Contains { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::IContains`]. pub fn icontains(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::IContains { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::StartsWith`]. pub fn starts_with(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::StartsWith { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::IStartsWith`]. pub fn istarts_with(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::IStartsWith { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::EndsWith`]. pub fn ends_with(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::EndsWith { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::IEndsWith`]. pub fn iends_with(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::IEndsWith { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::IEquals`]. pub fn iequals(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::IEquals { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::Matches`]. pub fn matches(&mut self, lhs: ExprId, rhs: ExprId) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + self.parents[lhs.0 as usize] = expr_id; + self.parents[rhs.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::Matches { lhs, rhs }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternMatch`] @@ -800,8 +1103,21 @@ impl IR { pattern: PatternIdx, anchor: MatchAnchor, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + match &anchor { + MatchAnchor::None => {} + MatchAnchor::At(expr) => { + self.parents[expr.0 as usize] = expr_id; + } + MatchAnchor::In(range) => { + self.parents[range.lower_bound.0 as usize] = expr_id; + self.parents[range.upper_bound.0 as usize] = expr_id; + } + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::PatternMatch { pattern, anchor }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternMatchVar`] @@ -810,9 +1126,22 @@ impl IR { symbol: Symbol, anchor: MatchAnchor, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + match &anchor { + MatchAnchor::None => {} + MatchAnchor::At(expr) => { + self.parents[expr.0 as usize] = expr_id; + } + MatchAnchor::In(range) => { + self.parents[range.lower_bound.0 as usize] = expr_id; + self.parents[range.upper_bound.0 as usize] = expr_id; + } + } + self.parents.push(ExprId::none()); self.nodes .push(Expr::PatternMatchVar { symbol: Box::new(symbol), anchor }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternLength`] @@ -821,8 +1150,14 @@ impl IR { pattern: PatternIdx, index: Option, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + if let Some(index) = &index { + self.parents[index.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::PatternLength { pattern, index }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternLengthVar`] @@ -831,9 +1166,15 @@ impl IR { symbol: Symbol, index: Option, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + if let Some(index) = &index { + self.parents[index.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes .push(Expr::PatternLengthVar { symbol: Box::new(symbol), index }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternOffset`] @@ -842,8 +1183,14 @@ impl IR { pattern: PatternIdx, index: Option, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + if let Some(index) = &index { + self.parents[index.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::PatternOffset { pattern, index }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternOffsetVar`] @@ -852,9 +1199,15 @@ impl IR { symbol: Symbol, index: Option, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + if let Some(index) = &index { + self.parents[index.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes .push(Expr::PatternOffsetVar { symbol: Box::new(symbol), index }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternCount`] @@ -863,8 +1216,15 @@ impl IR { pattern: PatternIdx, range: Option, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + if let Some(range) = &range { + self.parents[range.lower_bound.0 as usize] = expr_id; + self.parents[range.upper_bound.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::PatternCount { pattern, range }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::PatternCountVar`] @@ -873,9 +1233,16 @@ impl IR { symbol: Symbol, range: Option, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + if let Some(range) = &range { + self.parents[range.lower_bound.0 as usize] = expr_id; + self.parents[range.upper_bound.0 as usize] = expr_id; + } + self.parents.push(ExprId::none()); self.nodes .push(Expr::PatternCountVar { symbol: Box::new(symbol), range }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::FuncCall`] @@ -886,13 +1253,20 @@ impl IR { type_value: TypeValue, signature_index: usize, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + for arg in args.iter() { + self.parents[arg.0 as usize] = expr_id + } + self.parents[callable.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::FuncCall(Box::new(FuncCall { callable, args, type_value, signature_index, }))); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::OfExprTuple`] @@ -904,6 +1278,27 @@ impl IR { items: Vec, anchor: MatchAnchor, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + match quantifier { + Quantifier::Percentage(expr) | Quantifier::Expr(expr) => { + self.parents[expr.0 as usize] = expr_id + } + _ => {} + } + for item in items.iter() { + self.parents[item.0 as usize] = expr_id; + } + match &anchor { + MatchAnchor::None => {} + MatchAnchor::At(expr) => { + self.parents[expr.0 as usize] = expr_id; + } + MatchAnchor::In(range) => { + self.parents[range.lower_bound.0 as usize] = expr_id; + self.parents[range.upper_bound.0 as usize] = expr_id; + } + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::OfExprTuple(Box::new(OfExprTuple { quantifier, items, @@ -911,7 +1306,8 @@ impl IR { for_vars, next_expr_var, }))); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::OfPatternSet`] @@ -923,6 +1319,24 @@ impl IR { items: Vec, anchor: MatchAnchor, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + match quantifier { + Quantifier::Percentage(expr) | Quantifier::Expr(expr) => { + self.parents[expr.0 as usize] = expr_id + } + _ => {} + } + match &anchor { + MatchAnchor::None => {} + MatchAnchor::At(expr) => { + self.parents[expr.0 as usize] = expr_id; + } + MatchAnchor::In(range) => { + self.parents[range.lower_bound.0 as usize] = expr_id; + self.parents[range.upper_bound.0 as usize] = expr_id; + } + } + self.parents.push(ExprId::none()); self.nodes.push(Expr::OfPatternSet(Box::new(OfPatternSet { quantifier, items, @@ -930,7 +1344,8 @@ impl IR { for_vars, next_pattern_var, }))); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::ForOf`] @@ -942,6 +1357,15 @@ impl IR { pattern_set: Vec, condition: ExprId, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + match quantifier { + Quantifier::Percentage(expr) | Quantifier::Expr(expr) => { + self.parents[expr.0 as usize] = expr_id + } + _ => {} + } + self.parents[condition.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::ForOf(Box::new(ForOf { quantifier, variable, @@ -949,7 +1373,8 @@ impl IR { condition, for_vars, }))); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::ForIn`] @@ -962,6 +1387,29 @@ impl IR { iterable: Iterable, condition: ExprId, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + match quantifier { + Quantifier::Percentage(expr) | Quantifier::Expr(expr) => { + self.parents[expr.0 as usize] = expr_id + } + _ => {} + } + match &iterable { + Iterable::Range(range) => { + self.parents[range.lower_bound.0 as usize] = expr_id; + self.parents[range.upper_bound.0 as usize] = expr_id; + } + Iterable::ExprTuple(exprs) => { + for expr in exprs.iter() { + self.parents[expr.0 as usize] = expr_id; + } + } + Iterable::Expr(expr) => { + self.parents[expr.0 as usize] = expr_id; + } + } + self.parents[condition.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::ForIn(Box::new(ForIn { quantifier, variables, @@ -970,7 +1418,8 @@ impl IR { iterable, condition, }))); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } /// Creates a new [`Expr::With`] @@ -979,8 +1428,15 @@ impl IR { declarations: Vec<(Var, ExprId)>, condition: ExprId, ) -> ExprId { + let expr_id = ExprId::from(self.nodes.len()); + for (_, expr) in declarations.iter() { + self.parents[expr.0 as usize] = expr_id; + } + self.parents[condition.0 as usize] = expr_id; + self.parents.push(ExprId::none()); self.nodes.push(Expr::With { declarations, condition }); - ExprId::from(self.nodes.len() - 1) + debug_assert_eq!(self.parents.len(), self.nodes.len()); + expr_id } } @@ -1042,7 +1498,11 @@ impl Debug for IR { if index.is_some() { " INDEX" } else { "" } }; - let expr_hashes = self.compute_expr_hashes(self.root.unwrap()); + let mut expr_hashes = vec![0; self.nodes.len()]; + + self.compute_expr_hashes(self.root.unwrap(), |expr_id, hash| { + expr_hashes[expr_id.0 as usize] = hash; + }); for event in self.dfs_iter(self.root.unwrap()) { match event { @@ -1051,8 +1511,9 @@ impl Debug for IR { for _ in 0..level { write!(f, " ")?; } + write!(f, "id:{:?} parent:{:?}: ", expr_id, self.parents[expr_id.0 as usize])?; level += 1; - let expr_hash = expr_hashes[expr_id]; + let expr_hash = expr_hashes[expr_id.0 as usize]; match expr { Expr::Const(c) => writeln!(f, "CONST {}", c)?, Expr::Filesize => writeln!(f, "FILESIZE")?, @@ -1160,6 +1621,32 @@ impl Debug for IR { } } +/// Iterator that returns the ancestors for a given expression in the +/// IR tree. +/// +/// The first item returned by the iterator is the parent of the original +/// expression, then the parent's parent, and so on until reaching the +/// root node. +pub(crate) struct Ancestors<'a> { + ir: &'a IR, + current: ExprId, +} + +impl<'a> Iterator for Ancestors<'a> { + type Item = ExprId; + + fn next(&mut self) -> Option { + if self.current == ExprId::none() { + return None; + } + self.current = self.ir.parents[self.current.0 as usize]; + if self.current == ExprId::none() { + return None; + } + Some(self.current) + } +} + /// Intermediate representation (IR) for an expression. pub(crate) enum Expr { /// Constant value (i.e: the value is known at compile time). diff --git a/lib/src/compiler/ir/tests/mod.rs b/lib/src/compiler/ir/tests/mod.rs index 500a75a4..b9963a4c 100644 --- a/lib/src/compiler/ir/tests/mod.rs +++ b/lib/src/compiler/ir/tests/mod.rs @@ -2,7 +2,8 @@ use std::fs; use std::io::BufWriter; use std::mem::size_of; -use crate::compiler::Expr; +use crate::compiler::{Expr, IR}; +use crate::types::TypeValue; use crate::Compiler; #[test] @@ -12,6 +13,45 @@ fn expr_size() { assert_eq!(size_of::(), 32); } +#[test] +fn ancestors() { + let mut ir = IR::new(); + + let const_1 = ir.constant(TypeValue::const_integer_from(1)); + let const_2 = ir.constant(TypeValue::const_integer_from(2)); + let const_3 = ir.constant(TypeValue::const_integer_from(2)); + let add = ir.add(vec![const_2, const_3]).unwrap(); + let root = ir.add(vec![const_1, add]).unwrap(); + + let mut ancestors = ir.ancestors(const_3); + assert_eq!(ancestors.next(), Some(add)); + assert_eq!(ancestors.next(), Some(root)); + assert_eq!(ancestors.next(), None); + + let mut ancestors = ir.ancestors(const_1); + assert_eq!(ancestors.next(), Some(root)); + assert_eq!(ancestors.next(), None); + + let mut ancestors = ir.ancestors(root); + assert_eq!(ancestors.next(), None); +} + +#[test] +fn common_ancestor() { + let mut ir = IR::new(); + + let const_1 = ir.constant(TypeValue::const_integer_from(1)); + let const_2 = ir.constant(TypeValue::const_integer_from(2)); + let const_3 = ir.constant(TypeValue::const_integer_from(2)); + let add = ir.add(vec![const_2, const_3]).unwrap(); + let root = ir.add(vec![const_1, add]).unwrap(); + + assert_eq!(ir.common_ancestor(&[const_1, const_3]), root); + assert_eq!(ir.common_ancestor(&[const_2, const_3]), add); + assert_eq!(ir.common_ancestor(&[const_1, const_1]), const_1); + assert_eq!(ir.common_ancestor(&[const_1, add, const_2]), root); +} + #[test] fn ir() { let files: Vec<_> = globwalk::glob("src/compiler/ir/tests/testdata/*.in") diff --git a/lib/src/compiler/ir/tests/testdata/1.folding.ir b/lib/src/compiler/ir/tests/testdata/1.folding.ir index c437ec06..904d1914 100644 --- a/lib/src/compiler/ir/tests/testdata/1.folding.ir +++ b/lib/src/compiler/ir/tests/testdata/1.folding.ir @@ -1,63 +1,63 @@ RULE test_1 - EQ [0x1e87f9ecd59c9f97] - CONST integer(1) - CONST integer(1) + id:12 parent:None: EQ [0x1e87f9ecd59c9f97] + id:10 parent:12: CONST integer(1) + id:11 parent:12: CONST integer(1) RULE test_2 - GT [0x936af95517c74992] - DIV [0x75c0510b526a94ab] - CONST integer(2) - CONST integer(1) - CONST integer(1) + id:6 parent:None: GT [0x936af95517c74992] + id:4 parent:6: DIV [0x75c0510b526a94ab] + id:2 parent:4: CONST integer(2) + id:3 parent:4: CONST integer(1) + id:5 parent:6: CONST integer(1) RULE test_3 - LE [0x63b2e857a1254c2b] - SHR [0x8dd6307742aa2311] - SHL [0x48eeb57428a0dbf0] - CONST integer(1) - CONST integer(2) - CONST integer(1) - CONST integer(2) + id:6 parent:None: LE [0x63b2e857a1254c2b] + id:4 parent:6: SHR [0x8dd6307742aa2311] + id:2 parent:4: SHL [0x48eeb57428a0dbf0] + id:0 parent:2: CONST integer(1) + id:1 parent:2: CONST integer(2) + id:3 parent:4: CONST integer(1) + id:5 parent:6: CONST integer(2) RULE test_4 - EQ [0x82d0fbda86ff9c76] - CONST integer(8) - CONST integer(8) + id:14 parent:None: EQ [0x82d0fbda86ff9c76] + id:12 parent:14: CONST integer(8) + id:13 parent:14: CONST integer(8) RULE test_5 - AND [0xf8ebdbba687cc112] - EQ [0x78262602520cebf1] - FIELD_ACCESS [0x6d93cf8c80a38e93] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } - CONST integer(0) + id:12 parent:None: AND [0xf8ebdbba687cc112] + id:8 parent:12: EQ [0x78262602520cebf1] + id:6 parent:8: FIELD_ACCESS [0x6d93cf8c80a38e93] + id:4 parent:6: IDENT Field { index: 0, is_root: true, type_value: struct } + id:5 parent:6: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:7 parent:8: CONST integer(0) RULE test_6 - ADD [0xeb09fb0c289a1e4a] - FIELD_ACCESS [0x6d93cf8c80a38e93] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } - CONST integer(1) - CONST integer(2) + id:5 parent:None: ADD [0xeb09fb0c289a1e4a] + id:2 parent:5: FIELD_ACCESS [0x6d93cf8c80a38e93] + id:0 parent:2: IDENT Field { index: 0, is_root: true, type_value: struct } + id:1 parent:2: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:3 parent:5: CONST integer(1) + id:4 parent:5: CONST integer(2) RULE test_7 - AND [0xb21fa80def581aea] - CONTAINS [0x702eaf70b18909ff] - CONST string("foobar") - CONST string("bar") - ICONTAINS [0xe30a191883a20729] - CONST string("foobar") - CONST string("BAR") - STARTS_WITH [0xc963b1bf1e8d9506] - CONST string("foobar") - CONST string("foo") - ISTARTS_WITH [0x1ce5974aa4aaacaa] - CONST string("foobar") - CONST string("FOO") - ENDS_WITH [0x4394dbecb8c89956] - CONST string("foobar") - CONST string("bar") - IENDS_WITH [0xb67045948ae19680] - CONST string("foobar") - CONST string("BAR") + id:18 parent:None: AND [0xb21fa80def581aea] + id:2 parent:18: CONTAINS [0x702eaf70b18909ff] + id:0 parent:2: CONST string("foobar") + id:1 parent:2: CONST string("bar") + id:5 parent:18: ICONTAINS [0xe30a191883a20729] + id:3 parent:5: CONST string("foobar") + id:4 parent:5: CONST string("BAR") + id:8 parent:18: STARTS_WITH [0xc963b1bf1e8d9506] + id:6 parent:8: CONST string("foobar") + id:7 parent:8: CONST string("foo") + id:11 parent:18: ISTARTS_WITH [0x1ce5974aa4aaacaa] + id:9 parent:11: CONST string("foobar") + id:10 parent:11: CONST string("FOO") + id:14 parent:18: ENDS_WITH [0x4394dbecb8c89956] + id:12 parent:14: CONST string("foobar") + id:13 parent:14: CONST string("bar") + id:17 parent:18: IENDS_WITH [0xb67045948ae19680] + id:15 parent:17: CONST string("foobar") + id:16 parent:17: CONST string("BAR") diff --git a/lib/src/compiler/ir/tests/testdata/1.no-folding.ir b/lib/src/compiler/ir/tests/testdata/1.no-folding.ir index b1bcd50a..5919f4f6 100644 --- a/lib/src/compiler/ir/tests/testdata/1.no-folding.ir +++ b/lib/src/compiler/ir/tests/testdata/1.no-folding.ir @@ -1,77 +1,77 @@ RULE test_1 - EQ [0x65918958e229b41b] - SUB [0xbb5b2d3c003a978d] - ADD [0x32e284abcc26d05b] - CONST integer(1) - CONST integer(1) - CONST integer(1) - CONST integer(1) + id:12 parent:None: EQ [0x65918958e229b41b] + id:10 parent:12: SUB [0xbb5b2d3c003a978d] + id:8 parent:10: ADD [0x32e284abcc26d05b] + id:6 parent:8: CONST integer(1) + id:7 parent:8: CONST integer(1) + id:9 parent:10: CONST integer(1) + id:11 parent:12: CONST integer(1) RULE test_2 - GT [0x931bcdb2c7afb608] - DIV [0xf2b3602767487d08] - MUL [0x23a380280d1b4f32] - CONST integer(1) - CONST integer(2) - CONST integer(1) - CONST integer(1) + id:6 parent:None: GT [0x931bcdb2c7afb608] + id:4 parent:6: DIV [0xf2b3602767487d08] + id:2 parent:4: MUL [0x23a380280d1b4f32] + id:0 parent:2: CONST integer(1) + id:1 parent:2: CONST integer(2) + id:3 parent:4: CONST integer(1) + id:5 parent:6: CONST integer(1) RULE test_3 - LE [0x63b2e857a1254c2b] - SHR [0x8dd6307742aa2311] - SHL [0x48eeb57428a0dbf0] - CONST integer(1) - CONST integer(2) - CONST integer(1) - CONST integer(2) + id:6 parent:None: LE [0x63b2e857a1254c2b] + id:4 parent:6: SHR [0x8dd6307742aa2311] + id:2 parent:4: SHL [0x48eeb57428a0dbf0] + id:0 parent:2: CONST integer(1) + id:1 parent:2: CONST integer(2) + id:3 parent:4: CONST integer(1) + id:5 parent:6: CONST integer(2) RULE test_4 - EQ [0x2240111cff945ff5] - SUB [0xa076dada583953db] - CONST integer(4) - MUL [0xd1635797e1f50589] - MINUS [0xb5577ca94105cb4b] - CONST integer(2) - CONST integer(2) - CONST integer(8) + id:14 parent:None: EQ [0x2240111cff945ff5] + id:12 parent:14: SUB [0xa076dada583953db] + id:7 parent:12: CONST integer(4) + id:11 parent:12: MUL [0xd1635797e1f50589] + id:9 parent:11: MINUS [0xb5577ca94105cb4b] + id:8 parent:9: CONST integer(2) + id:10 parent:11: CONST integer(2) + id:13 parent:14: CONST integer(8) RULE test_5 - AND [0x1e7c6065ed040c49] - EQ [0x78262602520cebf1] - FIELD_ACCESS [0x6d93cf8c80a38e93] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } - CONST integer(0) - CONST boolean(true) - NOT [0x2c3c4fda5217a5f3] - CONST boolean(false) + id:12 parent:None: AND [0x1e7c6065ed040c49] + id:8 parent:12: EQ [0x78262602520cebf1] + id:6 parent:8: FIELD_ACCESS [0x6d93cf8c80a38e93] + id:4 parent:6: IDENT Field { index: 0, is_root: true, type_value: struct } + id:5 parent:6: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:7 parent:8: CONST integer(0) + id:9 parent:12: CONST boolean(true) + id:11 parent:12: NOT [0x2c3c4fda5217a5f3] + id:10 parent:11: CONST boolean(false) RULE test_6 - ADD [0xeb09fb0c289a1e4a] - FIELD_ACCESS [0x6d93cf8c80a38e93] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } - CONST integer(1) - CONST integer(2) + id:5 parent:None: ADD [0xeb09fb0c289a1e4a] + id:2 parent:5: FIELD_ACCESS [0x6d93cf8c80a38e93] + id:0 parent:2: IDENT Field { index: 0, is_root: true, type_value: struct } + id:1 parent:2: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:3 parent:5: CONST integer(1) + id:4 parent:5: CONST integer(2) RULE test_7 - AND [0xb21fa80def581aea] - CONTAINS [0x702eaf70b18909ff] - CONST string("foobar") - CONST string("bar") - ICONTAINS [0xe30a191883a20729] - CONST string("foobar") - CONST string("BAR") - STARTS_WITH [0xc963b1bf1e8d9506] - CONST string("foobar") - CONST string("foo") - ISTARTS_WITH [0x1ce5974aa4aaacaa] - CONST string("foobar") - CONST string("FOO") - ENDS_WITH [0x4394dbecb8c89956] - CONST string("foobar") - CONST string("bar") - IENDS_WITH [0xb67045948ae19680] - CONST string("foobar") - CONST string("BAR") + id:18 parent:None: AND [0xb21fa80def581aea] + id:2 parent:18: CONTAINS [0x702eaf70b18909ff] + id:0 parent:2: CONST string("foobar") + id:1 parent:2: CONST string("bar") + id:5 parent:18: ICONTAINS [0xe30a191883a20729] + id:3 parent:5: CONST string("foobar") + id:4 parent:5: CONST string("BAR") + id:8 parent:18: STARTS_WITH [0xc963b1bf1e8d9506] + id:6 parent:8: CONST string("foobar") + id:7 parent:8: CONST string("foo") + id:11 parent:18: ISTARTS_WITH [0x1ce5974aa4aaacaa] + id:9 parent:11: CONST string("foobar") + id:10 parent:11: CONST string("FOO") + id:14 parent:18: ENDS_WITH [0x4394dbecb8c89956] + id:12 parent:14: CONST string("foobar") + id:13 parent:14: CONST string("bar") + id:17 parent:18: IENDS_WITH [0xb67045948ae19680] + id:15 parent:17: CONST string("foobar") + id:16 parent:17: CONST string("BAR") diff --git a/lib/src/compiler/ir/tests/testdata/2.folding.ir b/lib/src/compiler/ir/tests/testdata/2.folding.ir index e2ec5d8c..ac794492 100644 --- a/lib/src/compiler/ir/tests/testdata/2.folding.ir +++ b/lib/src/compiler/ir/tests/testdata/2.folding.ir @@ -1,45 +1,45 @@ RULE test - FOR_IN [0x99a2a38042db5122] - CONST integer(0) - PATTERN_COUNT PatternIdx(0) [0xa0a54267ad7f087f] - FOR_IN [0x72faaec8968d2cad] - CONST integer(0) - PATTERN_COUNT PatternIdx(1) [0x5378b902c7004afb] - FOR_IN [0xca11ac274b61c7b1] - FIELD_ACCESS [0x7226744cca3b46] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 19, is_root: false, type_value: array } - AND [0xb7cf927507bf6496] - LE [0x33cdc6e2f17a3eac] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] - IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } - LE [0xfceeb4c35fe4948] - PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] - IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } - ADD [0x10e80dd91c17496c] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - FIELD_ACCESS [0x571558e57b22c98b] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } - LE [0xa9d3de09cdf88a2e] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] - IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } - LE [0xd470a5399c77b4a4] - PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] - IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } - ADD [0x10e80dd91c17496c] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - FIELD_ACCESS [0x571558e57b22c98b] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:42 parent:None: FOR_IN [0x99a2a38042db5122] + id:0 parent:42: CONST integer(0) + id:1 parent:42: PATTERN_COUNT PatternIdx(0) [0xa0a54267ad7f087f] + id:41 parent:42: FOR_IN [0x72faaec8968d2cad] + id:2 parent:41: CONST integer(0) + id:3 parent:41: PATTERN_COUNT PatternIdx(1) [0x5378b902c7004afb] + id:40 parent:41: FOR_IN [0xca11ac274b61c7b1] + id:6 parent:40: FIELD_ACCESS [0x7226744cca3b46] + id:4 parent:6: IDENT Field { index: 0, is_root: true, type_value: struct } + id:5 parent:6: IDENT Field { index: 19, is_root: false, type_value: array } + id:39 parent:40: AND [0xb7cf927507bf6496] + id:12 parent:39: LE [0x33cdc6e2f17a3eac] + id:9 parent:12: FIELD_ACCESS [0xa441e24a6191870f] + id:7 parent:9: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:8 parent:9: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:11 parent:12: PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] + id:10 parent:11: IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } + id:22 parent:39: LE [0xfceeb4c35fe4948] + id:14 parent:22: PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] + id:13 parent:14: IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } + id:21 parent:22: ADD [0x10e80dd91c17496c] + id:17 parent:21: FIELD_ACCESS [0xa441e24a6191870f] + id:15 parent:17: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:16 parent:17: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:20 parent:21: FIELD_ACCESS [0x571558e57b22c98b] + id:18 parent:20: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:19 parent:20: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:28 parent:39: LE [0xa9d3de09cdf88a2e] + id:25 parent:28: FIELD_ACCESS [0xa441e24a6191870f] + id:23 parent:25: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:24 parent:25: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:27 parent:28: PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] + id:26 parent:27: IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } + id:38 parent:39: LE [0xd470a5399c77b4a4] + id:30 parent:38: PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] + id:29 parent:30: IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } + id:37 parent:38: ADD [0x10e80dd91c17496c] + id:33 parent:37: FIELD_ACCESS [0xa441e24a6191870f] + id:31 parent:33: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:32 parent:33: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:36 parent:37: FIELD_ACCESS [0x571558e57b22c98b] + id:34 parent:36: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:35 parent:36: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } diff --git a/lib/src/compiler/ir/tests/testdata/2.no-folding.ir b/lib/src/compiler/ir/tests/testdata/2.no-folding.ir index e2ec5d8c..ac794492 100644 --- a/lib/src/compiler/ir/tests/testdata/2.no-folding.ir +++ b/lib/src/compiler/ir/tests/testdata/2.no-folding.ir @@ -1,45 +1,45 @@ RULE test - FOR_IN [0x99a2a38042db5122] - CONST integer(0) - PATTERN_COUNT PatternIdx(0) [0xa0a54267ad7f087f] - FOR_IN [0x72faaec8968d2cad] - CONST integer(0) - PATTERN_COUNT PatternIdx(1) [0x5378b902c7004afb] - FOR_IN [0xca11ac274b61c7b1] - FIELD_ACCESS [0x7226744cca3b46] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 19, is_root: false, type_value: array } - AND [0xb7cf927507bf6496] - LE [0x33cdc6e2f17a3eac] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] - IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } - LE [0xfceeb4c35fe4948] - PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] - IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } - ADD [0x10e80dd91c17496c] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - FIELD_ACCESS [0x571558e57b22c98b] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } - LE [0xa9d3de09cdf88a2e] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] - IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } - LE [0xd470a5399c77b4a4] - PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] - IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } - ADD [0x10e80dd91c17496c] - FIELD_ACCESS [0xa441e24a6191870f] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } - FIELD_ACCESS [0x571558e57b22c98b] - IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } - IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:42 parent:None: FOR_IN [0x99a2a38042db5122] + id:0 parent:42: CONST integer(0) + id:1 parent:42: PATTERN_COUNT PatternIdx(0) [0xa0a54267ad7f087f] + id:41 parent:42: FOR_IN [0x72faaec8968d2cad] + id:2 parent:41: CONST integer(0) + id:3 parent:41: PATTERN_COUNT PatternIdx(1) [0x5378b902c7004afb] + id:40 parent:41: FOR_IN [0xca11ac274b61c7b1] + id:6 parent:40: FIELD_ACCESS [0x7226744cca3b46] + id:4 parent:6: IDENT Field { index: 0, is_root: true, type_value: struct } + id:5 parent:6: IDENT Field { index: 19, is_root: false, type_value: array } + id:39 parent:40: AND [0xb7cf927507bf6496] + id:12 parent:39: LE [0x33cdc6e2f17a3eac] + id:9 parent:12: FIELD_ACCESS [0xa441e24a6191870f] + id:7 parent:9: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:8 parent:9: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:11 parent:12: PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] + id:10 parent:11: IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } + id:22 parent:39: LE [0xfceeb4c35fe4948] + id:14 parent:22: PATTERN_OFFSET PatternIdx(0) INDEX [0xc12206c6ddd8d9c9] + id:13 parent:14: IDENT Var { var: Var { frame_id: 1, ty: integer, index: 5 }, type_value: integer(unknown) } + id:21 parent:22: ADD [0x10e80dd91c17496c] + id:17 parent:21: FIELD_ACCESS [0xa441e24a6191870f] + id:15 parent:17: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:16 parent:17: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:20 parent:21: FIELD_ACCESS [0x571558e57b22c98b] + id:18 parent:20: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:19 parent:20: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } + id:28 parent:39: LE [0xa9d3de09cdf88a2e] + id:25 parent:28: FIELD_ACCESS [0xa441e24a6191870f] + id:23 parent:25: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:24 parent:25: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:27 parent:28: PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] + id:26 parent:27: IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } + id:38 parent:39: LE [0xd470a5399c77b4a4] + id:30 parent:38: PATTERN_OFFSET PatternIdx(1) INDEX [0x37281dedba57254c] + id:29 parent:30: IDENT Var { var: Var { frame_id: 2, ty: integer, index: 11 }, type_value: integer(unknown) } + id:37 parent:38: ADD [0x10e80dd91c17496c] + id:33 parent:37: FIELD_ACCESS [0xa441e24a6191870f] + id:31 parent:33: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:32 parent:33: IDENT Field { index: 0, is_root: false, type_value: integer(unknown) } + id:36 parent:37: FIELD_ACCESS [0x571558e57b22c98b] + id:34 parent:36: IDENT Var { var: Var { frame_id: 3, ty: struct, index: 17 }, type_value: struct } + id:35 parent:36: IDENT Field { index: 1, is_root: false, type_value: integer(unknown) } diff --git a/lib/src/compiler/ir/tests/testdata/3.folding.ir b/lib/src/compiler/ir/tests/testdata/3.folding.ir index 712765d0..2baf10ef 100644 --- a/lib/src/compiler/ir/tests/testdata/3.folding.ir +++ b/lib/src/compiler/ir/tests/testdata/3.folding.ir @@ -1,19 +1,19 @@ RULE test - OR [0x99788476ae53b807] - EQ [0x6994bc80c1a4dce0] - FN_CALL [0x168a4f6687a1363c] - FIELD_ACCESS [0x860e335dcd575606] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 4, is_root: false, type_value: function } - CONST integer(0) - FILESIZE - CONST string("feba6c919e3797e7778e8f2e85fa033d") - EQ [0x1acc396ba27115f6] - FN_CALL [0x168a4f6687a1363c] - FIELD_ACCESS [0x860e335dcd575606] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 4, is_root: false, type_value: function } - CONST integer(0) - FILESIZE - CONST string("275876e34cf609db118f3d84b799a790") + id:30 parent:None: OR [0x99788476ae53b807] + id:14 parent:30: EQ [0x6994bc80c1a4dce0] + id:12 parent:14: FN_CALL [0x168a4f6687a1363c] + id:9 parent:12: FIELD_ACCESS [0x860e335dcd575606] + id:7 parent:9: IDENT Field { index: 0, is_root: true, type_value: struct } + id:8 parent:9: IDENT Field { index: 4, is_root: false, type_value: function } + id:10 parent:12: CONST integer(0) + id:11 parent:12: FILESIZE + id:13 parent:14: CONST string("feba6c919e3797e7778e8f2e85fa033d") + id:29 parent:30: EQ [0x1acc396ba27115f6] + id:27 parent:29: FN_CALL [0x168a4f6687a1363c] + id:24 parent:27: FIELD_ACCESS [0x860e335dcd575606] + id:22 parent:24: IDENT Field { index: 0, is_root: true, type_value: struct } + id:23 parent:24: IDENT Field { index: 4, is_root: false, type_value: function } + id:25 parent:27: CONST integer(0) + id:26 parent:27: FILESIZE + id:28 parent:29: CONST string("275876e34cf609db118f3d84b799a790") diff --git a/lib/src/compiler/ir/tests/testdata/3.no-folding.ir b/lib/src/compiler/ir/tests/testdata/3.no-folding.ir index 712765d0..2baf10ef 100644 --- a/lib/src/compiler/ir/tests/testdata/3.no-folding.ir +++ b/lib/src/compiler/ir/tests/testdata/3.no-folding.ir @@ -1,19 +1,19 @@ RULE test - OR [0x99788476ae53b807] - EQ [0x6994bc80c1a4dce0] - FN_CALL [0x168a4f6687a1363c] - FIELD_ACCESS [0x860e335dcd575606] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 4, is_root: false, type_value: function } - CONST integer(0) - FILESIZE - CONST string("feba6c919e3797e7778e8f2e85fa033d") - EQ [0x1acc396ba27115f6] - FN_CALL [0x168a4f6687a1363c] - FIELD_ACCESS [0x860e335dcd575606] - IDENT Field { index: 0, is_root: true, type_value: struct } - IDENT Field { index: 4, is_root: false, type_value: function } - CONST integer(0) - FILESIZE - CONST string("275876e34cf609db118f3d84b799a790") + id:30 parent:None: OR [0x99788476ae53b807] + id:14 parent:30: EQ [0x6994bc80c1a4dce0] + id:12 parent:14: FN_CALL [0x168a4f6687a1363c] + id:9 parent:12: FIELD_ACCESS [0x860e335dcd575606] + id:7 parent:9: IDENT Field { index: 0, is_root: true, type_value: struct } + id:8 parent:9: IDENT Field { index: 4, is_root: false, type_value: function } + id:10 parent:12: CONST integer(0) + id:11 parent:12: FILESIZE + id:13 parent:14: CONST string("feba6c919e3797e7778e8f2e85fa033d") + id:29 parent:30: EQ [0x1acc396ba27115f6] + id:27 parent:29: FN_CALL [0x168a4f6687a1363c] + id:24 parent:27: FIELD_ACCESS [0x860e335dcd575606] + id:22 parent:24: IDENT Field { index: 0, is_root: true, type_value: struct } + id:23 parent:24: IDENT Field { index: 4, is_root: false, type_value: function } + id:25 parent:27: CONST integer(0) + id:26 parent:27: FILESIZE + id:28 parent:29: CONST string("275876e34cf609db118f3d84b799a790")