Skip to content

Commit

Permalink
Overhauls system macro table bootstrapping
Browse files Browse the repository at this point in the history
This patch modifies `EncodingContext` to have an empty macro table when
constructed for use in v1.0 readers.

This allows the Ion 1.0 reader to be used in the process of bootstrapping
the Ion 1.1 system macro table, which was previously not possible due to
the (not eliminated) circular dependency between macros and readers.

System macros that can be expressed in TDL are now constructed from
a template definition encoded in Ion 1.0. As a result, the system macro
table initialization logic is now substantially easier to read and
more straightforward to modify.
  • Loading branch information
zslayton committed Dec 6, 2024
1 parent 4131347 commit 73d1f1b
Show file tree
Hide file tree
Showing 13 changed files with 288 additions and 578 deletions.
2 changes: 1 addition & 1 deletion benches/read_many_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ mod benchmark {
let name = test_data_1_1.name.as_str();

let empty_context = EncodingContext::for_ion_version(IonVersion::v1_1);
let compiled_macro = TemplateCompiler::compile_from_text(
let compiled_macro = TemplateCompiler::compile_from_source(
empty_context.get_ref(),
&test_data_1_1.template_definition_text,
)
Expand Down
5 changes: 3 additions & 2 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1317,8 +1317,9 @@ mod tests {
encode_macro_fn: impl FnOnce(MacroAddress) -> Vec<u8>,
test_fn: impl FnOnce(BinaryEExpArgsIterator_1_1<'_>) -> IonResult<()>,
) -> IonResult<()> {
let mut context = EncodingContext::empty();
let template_macro = TemplateCompiler::compile_from_text(context.get_ref(), macro_source)?;
let mut context = EncodingContext::for_ion_version(IonVersion::v1_1);
let template_macro =
TemplateCompiler::compile_from_source(context.get_ref(), macro_source)?;
let macro_address = context.macro_table.add_macro(template_macro)?;
let opcode_byte = u8::try_from(macro_address).unwrap();
let binary_ion = encode_macro_fn(opcode_byte as usize);
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/encoder/text/v1_1/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ mod tests {
let mut reader = LazyRawTextReader_1_1::new(encoded_text.as_bytes());
let mut context = EncodingContext::for_ion_version(IonVersion::v1_1);
let macro_foo =
TemplateCompiler::compile_from_text(context.get_ref(), "(macro foo (x*) null)")?;
TemplateCompiler::compile_from_source(context.get_ref(), "(macro foo (x*) null)")?;
context.macro_table.add_macro(macro_foo)?;
let context = context.get_ref();
let _marker = reader.next(context)?.expect_ivm()?;
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/encoder/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl<E: Encoding, Output: Write> Writer<E, Output> {
// TODO: LazyEncoder should define a method to construct a new symtab and/or macro table
let ion_version = E::ion_version();
let symbol_table = SymbolTable::new(ion_version);
let macro_table = MacroTable::with_system_macros();
let macro_table = MacroTable::with_system_macros(ion_version);
let context = WriterContext::new(symbol_table, macro_table);
let mut writer = Writer {
context,
Expand Down
156 changes: 99 additions & 57 deletions src/lazy/expanded/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! evaluation.
use crate::element::iterators::SymbolsIterator;
use crate::lazy::decoder::Decoder;
use crate::lazy::expanded::macro_table::ION_1_1_SYSTEM_MACROS;
use crate::lazy::expanded::template::{
ExprRange, MacroSignature, Parameter, ParameterCardinality, ParameterEncoding,
RestSyntaxPolicy, TemplateBody, TemplateBodyElement, TemplateBodyExpr, TemplateMacro,
Expand All @@ -14,7 +15,10 @@ use crate::lazy::text::raw::v1_1::reader::MacroIdRef;
use crate::lazy::value::LazyValue;
use crate::lazy::value_ref::ValueRef;
use crate::result::IonFailure;
use crate::{v1_1, IonError, IonResult, IonType, Macro, MacroTable, Reader, Symbol, SymbolRef};
use crate::{
AnyEncoding, IonError, IonInput, IonResult, IonType, Macro, MacroTable, Reader, Symbol,
SymbolRef,
};
use rustc_hash::FxHashMap;
use smallvec::SmallVec;
use std::ops::Range;
Expand Down Expand Up @@ -50,6 +54,17 @@ impl ExpansionAnalysis {
}
}

/// Returns an expansion analysis for a macro that will produce a single value that may or may
/// not be a system value.
pub const fn possible_system_value() -> Self {
ExpansionAnalysis {
could_produce_system_value: true,
must_produce_exactly_one_value: true,
can_be_lazily_evaluated_at_top_level: false,
expansion_singleton: None,
}
}

/// Returns an expansion analysis for a macro that always produces exactly one non-system value.
pub const fn single_application_value(ion_type: IonType) -> Self {
ExpansionAnalysis {
Expand All @@ -64,6 +79,19 @@ impl ExpansionAnalysis {
}
}

/// Returns an expansion analysis for a macro that always produces application values
/// (i.e. not system values), but the number of values produced is unspecified.
// TODO: It might be helpful to have an `empty_stream()` ExpansionAnalysis constructor
// to optimize `none`, `meta`, and template macros based on those.
pub const fn application_value_stream() -> Self {
ExpansionAnalysis {
could_produce_system_value: false,
must_produce_exactly_one_value: false,
can_be_lazily_evaluated_at_top_level: false, // Requires exactly-one output
expansion_singleton: None,
}
}

/// Produces a single system value: an s-expression annotated with `$ion`.
pub const fn directive() -> Self {
ExpansionAnalysis {
Expand Down Expand Up @@ -146,7 +174,7 @@ pub struct TemplateCompiler {}

impl TemplateCompiler {
/// Takes a TDL expression in the form:
/// ```ion_1_1
/// ```ion
/// (macro name (param1 param2 [...] paramN) body)
/// ```
/// and compiles it into a [`TemplateMacro`].
Expand All @@ -163,7 +191,7 @@ impl TemplateCompiler {
/// This arrangement--expressions storing their composite expression count--enables the reader
/// to skip the entire parent expression as desired. For example, in this macro:
///
/// ```ion_1_1
/// ```ion
/// (macro foo ()
/// // Template body expressions
/// [ // #0, num_expressions=5, range=0..5
Expand All @@ -185,12 +213,12 @@ impl TemplateCompiler {
/// The compiler recognizes the `(literal expr1 expr2 [...] exprN)` form, adding each subexpression
/// to the template without interpretation. `(literal ...)` does not appear in the compiled
/// template as there is nothing more for it to do at expansion time.
pub fn compile_from_text(
pub fn compile_from_source(
context: EncodingContextRef<'_>,
expression: &str,
source: impl IonInput,
) -> IonResult<TemplateMacro> {
// TODO: This is a rudimentary implementation that surfaces terse errors.
let mut reader = Reader::new(v1_1::Text, expression.as_bytes())?;
let mut reader = Reader::new(AnyEncoding, source)?;
let macro_def_sexp = reader.expect_next()?.read()?.expect_sexp()?;

Self::compile_from_sexp(context, &MacroTable::empty(), macro_def_sexp)
Expand Down Expand Up @@ -431,10 +459,7 @@ impl TemplateCompiler {
let macro_id = macro_id.into();
match module_name {
// If the module is `$ion`, this refers to the system module.
"$ion" => context
.system_module
.macro_table()
.clone_macro_with_id(macro_id),
"$ion" => ION_1_1_SYSTEM_MACROS.clone_macro_with_id(macro_id),
// If the module is `$ion_encoding`, this refers to the active encoding module.
"$ion_encoding" => context.macro_table().clone_macro_with_id(macro_id),
_ => todo!(
Expand All @@ -459,7 +484,61 @@ impl TemplateCompiler {
// The `params` clause of the macro definition is an s-expression enumerating the parameters
// that the macro accepts. For example: `(flex_uint::x, y*, z?)`.
let params_clause = Self::expect_sexp("an s-expression defining parameters", &mut values)?;
let signature = Self::compile_signature_from_sexp(context, pending_macros, params_clause)?;
let body = Self::expect_next("the template body", &mut values)?;
let expansion_analysis = Self::analyze_body_expr(body)?;
let mut compiled_body = TemplateBody {
expressions: Vec::new(),
annotations_storage: Vec::new(),
};
// Information that will be propagated to each subexpression
let tdl_context = TdlContext {
context,
pending_macros,
signature: &signature,
};
Self::compile_value(
tdl_context,
&mut compiled_body,
/*is_literal=*/ false,
/*target_parameter=*/ None, /*; this is not a macro invocation arg.*/
body,
)?;
// Make sure there aren't any unexpected expressions following the body.
match values.next() {
None => {}
Some(expr) => {
let name = template_name.unwrap_or_else(|| "<anonymous>".into());
return IonResult::decoding_error(format!(
"found unexpected expression following the body of macro '{name}': {:?}",
expr?
));
}
}
let template_macro = TemplateMacro {
name: template_name,
signature,
body: compiled_body,
expansion_analysis,
};
Ok(template_macro)
}

pub fn compile_signature(
context: EncodingContextRef<'_>,
source: impl IonInput,
) -> IonResult<MacroSignature> {
let mut reader = Reader::new(AnyEncoding, source)?;
let empty_macro_table = MacroTable::empty();
let params_clause = reader.expect_next()?.read()?.expect_sexp()?;
Self::compile_signature_from_sexp(context, &empty_macro_table, params_clause)
}

fn compile_signature_from_sexp<D: Decoder>(
context: EncodingContextRef<'_>,
pending_macros: &MacroTable,
params_clause: LazySExp<'_, D>,
) -> IonResult<MacroSignature> {
let mut compiled_params = Vec::new();
// `param_items` is a peekable iterator over the Ion values in `params_clause`. Because it
// is peekable, we can look ahead at each step to see if there are more values. This is
Expand Down Expand Up @@ -520,44 +599,7 @@ impl TemplateCompiler {
Parameter::new(name, parameter_encoding, cardinality, rest_syntax_policy);
compiled_params.push(compiled_param);
}
let signature = MacroSignature::new(compiled_params)?;
let body = Self::expect_next("the template body", &mut values)?;
let expansion_analysis = Self::analyze_body_expr(body)?;
let mut compiled_body = TemplateBody {
expressions: Vec::new(),
annotations_storage: Vec::new(),
};
// Information that will be propagated to each subexpression
let tdl_context = TdlContext {
context,
pending_macros,
signature: &signature,
};
Self::compile_value(
tdl_context,
&mut compiled_body,
/*is_literal=*/ false,
/*target_parameter=*/ None, /*; this is not a macro invocation arg.*/
body,
)?;
// Make sure there aren't any unexpected expressions following the body.
match values.next() {
None => {}
Some(expr) => {
let name = template_name.unwrap_or_else(|| "<anonymous>".into());
return IonResult::decoding_error(format!(
"found unexpected expression following the body of macro '{name}': {:?}",
expr?
));
}
}
let template_macro = TemplateMacro {
name: template_name,
signature,
body: compiled_body,
expansion_analysis,
};
Ok(template_macro)
MacroSignature::new(compiled_params)
}

/// The entry point for static analysis of a template body expression.
Expand Down Expand Up @@ -1412,7 +1454,7 @@ mod tests {
ExprRange, ParameterEncoding, TemplateBodyExpr, TemplateMacro, TemplateValue,
};
use crate::lazy::expanded::{EncodingContext, EncodingContextRef};
use crate::{Int, IntoAnnotations, IonResult, Macro, Symbol};
use crate::{Int, IntoAnnotations, IonResult, IonVersion, Macro, Symbol};
use rustc_hash::FxHashMap;
use std::sync::Arc;

Expand Down Expand Up @@ -1514,7 +1556,7 @@ mod tests {
impl TestResources {
fn new() -> Self {
Self {
context: EncodingContext::empty(),
context: EncodingContext::for_ion_version(IonVersion::v1_1),
}
}

Expand All @@ -1530,7 +1572,7 @@ mod tests {

let expression = "(macro foo () 42)";

let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?;
let template = TemplateCompiler::compile_from_source(context.get_ref(), expression)?;
assert_eq!(template.name(), "foo");
assert_eq!(template.signature().len(), 0);
expect_value(&template, 0, TemplateValue::Int(42.into()))?;
Expand All @@ -1544,7 +1586,7 @@ mod tests {

let expression = "(macro foo () [1, 2, 3])";

let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?;
let template = TemplateCompiler::compile_from_source(context.get_ref(), expression)?;
assert_eq!(template.name(), "foo");
assert_eq!(template.signature().len(), 0);
expect_value(&template, 0, TemplateValue::List)?;
Expand All @@ -1561,7 +1603,7 @@ mod tests {

let expression = r#"(macro foo () (.values 42 "hello" false))"#;

let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?;
let template = TemplateCompiler::compile_from_source(context.get_ref(), expression)?;
assert_eq!(template.name(), "foo");
assert_eq!(template.signature().len(), 0);
expect_macro(
Expand All @@ -1584,7 +1626,7 @@ mod tests {
let expression =
"(macro foo (x y z) [100, [200, a::b::300], (%x), {y: [true, false, (%z)]}])";

let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?;
let template = TemplateCompiler::compile_from_source(context.get_ref(), expression)?;
expect_value(&template, 0, TemplateValue::List)?;
expect_value(&template, 1, TemplateValue::Int(Int::from(100)))?;
expect_value(&template, 2, TemplateValue::List)?;
Expand All @@ -1610,7 +1652,7 @@ mod tests {

let expression = "(macro identity (x) (%x))";

let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?;
let template = TemplateCompiler::compile_from_source(context.get_ref(), expression)?;
assert_eq!(template.name(), "identity");
assert_eq!(template.signature().len(), 1);
expect_variable(&template, 0, 0)?;
Expand All @@ -1624,7 +1666,7 @@ mod tests {

let expression = "(macro identity (flex_uint::x) (%x))";

let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?;
let template = TemplateCompiler::compile_from_source(context.get_ref(), expression)?;
assert_eq!(template.name(), "identity");
assert_eq!(template.signature().len(), 1);
assert_eq!(
Expand Down Expand Up @@ -1656,7 +1698,7 @@ mod tests {
(.values (%x) ))))
"#;

let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?;
let template = TemplateCompiler::compile_from_source(context.get_ref(), expression)?;
assert_eq!(template.name(), "foo");
assert_eq!(template.signature().len(), 1);
// Outer `values`
Expand Down
10 changes: 2 additions & 8 deletions src/lazy/expanded/encoding_module.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::lazy::expanded::macro_table::MacroTable;
use crate::{IonVersion, SymbolTable};
use crate::SymbolTable;

#[derive(Debug, Clone)]
pub struct EncodingModule {
Expand All @@ -16,13 +16,7 @@ impl EncodingModule {
symbol_table,
}
}
pub fn ion_1_1_system_module() -> Self {
Self::new(
String::from("$ion"),
MacroTable::with_system_macros(),
SymbolTable::new(IonVersion::v1_1),
)
}

pub fn name(&self) -> &str {
&self.name
}
Expand Down
14 changes: 4 additions & 10 deletions src/lazy/expanded/macro_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2110,22 +2110,16 @@ mod tests {
(&[macro_id, 0x0B, 0x0D], (5, 6)), // TODO: non-required cardinalities
];

for test in tests {
let mut stream = vec![0xE0, 0x01, 0x00, 0xEA];
stream.extend_from_slice(test.0);
println!(
"stream {:02X?} -> pair ({}, {})",
test.0, test.1 .0, test.1 .1
);
let mut reader = Reader::new(v1_1::Binary, stream.as_slice())?;
for (stream, (num1, num2)) in tests.iter().copied() {
let mut reader = Reader::new(v1_1::Binary, stream)?;
reader.register_template_src(template_definition)?;
assert_eq!(
reader.next()?.unwrap().read()?.expect_int()?,
Int::from(test.1 .0)
Int::from(num1)
);
assert_eq!(
reader.next()?.unwrap().read()?.expect_int()?,
Int::from(test.1 .1)
Int::from(num2)
);
}
Ok(())
Expand Down
Loading

0 comments on commit 73d1f1b

Please sign in to comment.