Skip to content

Commit

Permalink
add support for include in a2ml
Browse files Browse the repository at this point in the history
  • Loading branch information
Louis Caron authored and DanielT committed Jun 17, 2024
1 parent 40e7b0c commit 4d49808
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 61 deletions.
161 changes: 126 additions & 35 deletions a2lfile/src/a2ml.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use super::{tokenizer, loader};
use super::writer::{TaggedItemInfo, Writer};
use std::collections::HashMap;
use std::path::Path;

// tokenizer types
#[derive(Debug, PartialEq)]
Expand Down Expand Up @@ -118,13 +120,14 @@ pub enum GenericIfData {

// tokenize()
// Tokenize the text of the a2ml section
fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
fn tokenize_a2ml(filename: String, input: &str, complete_string: &mut String) -> Result<Vec<TokenType>, String> {
let mut amltokens = Vec::<TokenType>::new();
let mut remaining = input;

while !remaining.is_empty() {
let mut chars = remaining.char_indices();
let (mut idx, mut c) = chars.next().unwrap();
let mut append_to_complete = true;

if c.is_ascii_whitespace() {
/* skip whitespace */
Expand All @@ -133,8 +136,6 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
idx = pair.0;
c = pair.1;
}
remaining = &remaining[idx..];
continue;
} else if remaining.starts_with("/*") {
/* get a block comment */
chars.next(); /* skip over the '*' char of the opening sequence */
Expand Down Expand Up @@ -162,7 +163,8 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
return Err(format!("unclosed block quote starting with \"{errtxt}\""));
}
}
remaining = &remaining[idx + 1..];
// chomp the last /
idx += 1;
} else if remaining.starts_with("//") {
/* get a line comment */
loop {
Expand All @@ -177,7 +179,75 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
break;
}
}
remaining = &remaining[idx + 1..];
// add the initial extra / in //
idx += 1;
} else if remaining.starts_with("/include") {
// skip the first elements (include = 0..6)
chars.nth(6);
let mut state = 0;
let mut fname_idx_start = 0;
let fname_idx_end;

// skip the whitespaces
loop {
let pair = chars.next().unwrap_or((idx + 1, '\0'));
idx = pair.0;
c = pair.1;
if state == 0 && c.is_ascii_whitespace() {
// just skip whitespaces
} else if state == 0 && tokenizer::is_pathchar(c as u8) {
// start a non quoted filename
state = 1;
fname_idx_start = idx;
} else if state == 1 && tokenizer::is_pathchar(c as u8) {
// in non quoted filename
} else if state == 1 && (c.is_ascii_whitespace() || c == '\0') {
// end of non quoted filename
fname_idx_end = idx;
break;
} else if state == 0 && c == '"' {
// start a quoted filename
state = 2;
} else if state == 2 && tokenizer::is_pathchar(c as u8) {
// first byte of a quoted filename
state = 3;
fname_idx_start = idx;
} else if state == 3 && tokenizer::is_pathchar(c as u8) {
// in a quoted filename
} else if state == 3 && c == '"' {
// end of non quoted filename
fname_idx_end = idx;
// chomp the '"'
idx = idx + 1;
break;
}
else {
let displaylen = if remaining.len() > 16 {
16
} else {
remaining.len()
};
// slicing localremaining in arbitrary ways is not safe, the end might be in the middle of a utf-8 sequence, so from_utf8_lossy is needed
let errtxt = String::from_utf8_lossy(&remaining.as_bytes()[..displaylen]);
return Err(format!("failed parsing a2ml include filename in {errtxt}"));
}
}
// if the current filename was not provided (unit tests..), do not try to parse the include file
if !filename.is_empty() {
let incfilename = loader::make_include_filename(&remaining[fname_idx_start..fname_idx_end], &filename);

// check if incname is an accessible file
let incpathref = Path::new(&incfilename);
let loadresult = loader::load(incpathref);
if let Ok(incfiledata) = loadresult {
let mut tokresult = tokenize_a2ml(incpathref.display().to_string(), &incfiledata, complete_string)?;
// append the tokens from the included file(s)
amltokens.append(&mut tokresult);
} else {
return Err(format!("failed reading {}", incpathref.display()));
}
}
append_to_complete = false;
} else if c == '"' {
/* tag - it is enclosed in double quotes, but contains neither spaces nor escape characters */
loop {
Expand All @@ -191,7 +261,7 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
if c == '"' {
let tag = &remaining[1..idx];
amltokens.push(TokenType::Tag(tag.to_string()));
remaining = &remaining[idx + 1..];
idx += 1;
} else {
let displaylen = if remaining.len() > 16 {
16
Expand All @@ -204,34 +274,34 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
}
} else if c == ';' {
amltokens.push(TokenType::Semicolon);
remaining = &remaining[1..];
idx = 1;
} else if c == ',' {
amltokens.push(TokenType::Comma);
remaining = &remaining[1..];
idx = 1;
} else if c == '{' {
amltokens.push(TokenType::OpenCurlyBracket);
remaining = &remaining[1..];
idx = 1;
} else if c == '}' {
amltokens.push(TokenType::ClosedCurlyBracket);
remaining = &remaining[1..];
idx = 1;
} else if c == '[' {
amltokens.push(TokenType::OpenSquareBracket);
remaining = &remaining[1..];
idx = 1;
} else if c == ']' {
amltokens.push(TokenType::ClosedSquareBracket);
remaining = &remaining[1..];
idx = 1;
} else if c == '(' {
amltokens.push(TokenType::OpenRoundBracket);
remaining = &remaining[1..];
idx = 1;
} else if c == ')' {
amltokens.push(TokenType::ClosedRoundBracket);
remaining = &remaining[1..];
idx = 1;
} else if c == '*' {
amltokens.push(TokenType::Repeat);
remaining = &remaining[1..];
idx = 1;
} else if c == '=' {
amltokens.push(TokenType::Equals);
remaining = &remaining[1..];
idx = 1;
} else if c.is_ascii_digit() {
loop {
let pair = chars.next().unwrap_or((idx + 1, '\0'));
Expand All @@ -257,7 +327,6 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
return Err(format!("Invalid sequence in AML: {num_text}"));
}
}
remaining = &remaining[idx..];
} else if c.is_ascii_alphabetic() || c == '_' {
loop {
let pair = chars.next().unwrap_or((idx + 1, '\0'));
Expand Down Expand Up @@ -318,7 +387,6 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
amltokens.push(TokenType::Identifier(kw_or_ident.to_string()));
}
}
remaining = &remaining[idx..];
} else {
let displaylen = if remaining.len() > 16 {
16
Expand All @@ -329,6 +397,10 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
let errtxt = String::from_utf8_lossy(&remaining.as_bytes()[..displaylen]);
return Err(format!("Unable to tokenize: {errtxt}..."));
}
if append_to_complete {
complete_string.push_str(&remaining[..idx])
}
remaining = &remaining[idx..];
}

Ok(amltokens)
Expand All @@ -337,8 +409,9 @@ fn tokenize_a2ml(input: &str) -> Result<Vec<TokenType>, String> {
// parse an a2ml fragment in an a2l file
// The target data structure is the parsing definition used by the a2l parser, so that the
// a2ml can control the parsing of IF_DATA blocks
pub(crate) fn parse_a2ml(input: &str) -> Result<A2mlTypeSpec, String> {
let tok_result = tokenize_a2ml(input)?;
pub(crate) fn parse_a2ml(filename: String, input: &str) -> Result<(A2mlTypeSpec, String), String> {
let mut complete_string = String::with_capacity(input.len());
let tok_result = tokenize_a2ml(filename, input, &mut complete_string)?;
let mut tok_iter = tok_result.iter().peekable();

let mut ifdata_block: Option<A2mlTypeSpec> = None;
Expand Down Expand Up @@ -411,7 +484,7 @@ pub(crate) fn parse_a2ml(input: &str) -> Result<A2mlTypeSpec, String> {

// The integration point between the custom blocks in Aml and the A2l file is the IF_DATA block.
if let Some(ifdata_block) = ifdata_block {
Ok(ifdata_block)
Ok((ifdata_block, complete_string))
} else {
Err("The A2ML declaration was fully parsed. However it does not contain an IF_DATA block, so it is not usable.".to_string())
}
Expand Down Expand Up @@ -1286,39 +1359,57 @@ mod test {

#[test]
fn tokenize() {
let tokenvec = tokenize_a2ml(" ").unwrap();
let mut complete_string = String::new();
let tokenvec = tokenize_a2ml(String::new(), " ", &mut complete_string).unwrap();
assert!(tokenvec.is_empty());

let tokenvec = tokenize_a2ml("/* // */").unwrap();
let tokenvec = tokenize_a2ml(String::new(), "/* // */", &mut complete_string).unwrap();
assert!(tokenvec.is_empty());
let tokenvec = tokenize_a2ml("/*/*/").unwrap();
let tokenvec = tokenize_a2ml(String::new(), "/*/*/", &mut complete_string).unwrap();
assert!(tokenvec.is_empty());
let tokenvec = tokenize_a2ml("/***/").unwrap();
let tokenvec = tokenize_a2ml(String::new(), "/***/", &mut complete_string).unwrap();
assert!(tokenvec.is_empty());
let tokenvec_err = tokenize_a2ml("/* ");
let tokenvec_err = tokenize_a2ml(String::new(), "/* ", &mut complete_string);
assert!(tokenvec_err.is_err());
let tokenvec = tokenize_a2ml("//*/").unwrap();
let tokenvec = tokenize_a2ml(String::new(), "//*/", &mut complete_string).unwrap();
assert!(tokenvec.is_empty());

let tokenvec = tokenize_a2ml(r#""TAG""#).unwrap();
let tokenvec = tokenize_a2ml(String::new(), r#""TAG""#, &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 1);
let _tag = TokenType::Tag("TAG".to_string());
assert!(matches!(&tokenvec[0], _tag));

let tokenvec = tokenize_a2ml(";").unwrap();
let tokenvec = tokenize_a2ml(String::new(), ";", &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 1);
assert!(matches!(tokenvec[0], TokenType::Semicolon));

let tokenvec = tokenize_a2ml("0").unwrap();
let tokenvec = tokenize_a2ml(String::new(), "0", &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 1);
assert!(matches!(tokenvec[0], TokenType::Constant(0)));

let tokenvec = tokenize_a2ml("0x03").unwrap();
let tokenvec = tokenize_a2ml(String::new(), "0x03", &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 1);
assert!(matches!(tokenvec[0], TokenType::Constant(3)));

let tokenvec = tokenize_a2ml("123456").unwrap();
let tokenvec = tokenize_a2ml(String::new(), "123456", &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 1);
assert!(matches!(tokenvec[0], TokenType::Constant(123456)));

let err_result = tokenize_a2ml(r#" "unclosed "#);
let tokenvec = tokenize_a2ml(String::new(), r#"/include "testfile""#, &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 0);

let tokenvec = tokenize_a2ml(String::new(), r#"/include"testfile""#, &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 0);

let tokenvec = tokenize_a2ml(String::new(), r#"/include testfile"#, &mut complete_string).unwrap();
assert_eq!(tokenvec.len(), 0);

let err_result = tokenize_a2ml(String::new(), r#"/include "testfile_unclosed_quote"#, &mut complete_string);
assert!(err_result.is_err());

let err_result = tokenize_a2ml(String::new(), r#" "unclosed "#, &mut complete_string);
assert!(err_result.is_err());

}

#[test]
Expand Down Expand Up @@ -1436,9 +1527,9 @@ mod test {
A2mlTypeSpec::TaggedStruct(taggedstruct_hashmap),
]);

let parse_result = parse_a2ml(TEST_INPUT);
let parse_result = parse_a2ml(String::new(), TEST_INPUT);
assert!(parse_result.is_ok());
let a2ml_spec = parse_result.unwrap();
let (a2ml_spec, _complete_string) = parse_result.unwrap();
println!("{:?}", a2ml_spec);
assert_eq!(a2ml_spec, expected_parse_result);
}
Expand Down
2 changes: 1 addition & 1 deletion a2lfile/src/ifdata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ mod ifdata_test {
&mut log_msgs,
false,
);
parser.builtin_a2mlspec = Some(a2lfile::a2ml::parse_a2ml(A2MLTEST_TEXT).unwrap());
parser.builtin_a2mlspec = Some(a2lfile::a2ml::parse_a2ml(String::new(), A2MLTEST_TEXT).unwrap().0);
super::parse_ifdata(
&mut parser,
&a2lfile::ParseContext {
Expand Down
4 changes: 2 additions & 2 deletions a2lfile/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ fn load_impl(
// if a built-in A2ml specification was passed as a string, then it is parsed here
if let Some(spec) = a2ml_spec {
parser.builtin_a2mlspec = Some(
a2ml::parse_a2ml(&spec)
.map_err(|parse_err| A2lError::InvalidBuiltinA2mlSpec { parse_err })?,
a2ml::parse_a2ml(path.to_string_lossy().to_string(), &spec)
.map_err(|parse_err| A2lError::InvalidBuiltinA2mlSpec { parse_err })?.0,
);
}

Expand Down
13 changes: 13 additions & 0 deletions a2lfile/src/loader.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
use std::ffi::OsString;
use crate::A2lError;
use std::fs::File;
use std::io::Read;
use std::path::Path;

pub(crate) fn make_include_filename(incname: &str, base_filename: &str) -> OsString {
let base = std::path::Path::new(base_filename);
if let Some(basedir) = base.parent() {
let joined = basedir.join(incname);
if joined.exists() {
return OsString::from(joined);
}
}

OsString::from(incname)
}

pub fn load(path: &Path) -> Result<String, A2lError> {
let mut file = match File::open(path) {
Ok(file) => file,
Expand Down
Loading

0 comments on commit 4d49808

Please sign in to comment.