diff --git a/a2lfile/src/a2ml.rs b/a2lfile/src/a2ml.rs index 7256d81..69067ec 100644 --- a/a2lfile/src/a2ml.rs +++ b/a2lfile/src/a2ml.rs @@ -1,5 +1,7 @@ +use super::{tokenizer, loader}; use super::writer::{TaggedItemInfo, Writer}; use std::collections::HashMap; +use std::path::Path; // tokenizer types #[derive(Debug, PartialEq)] @@ -118,13 +120,14 @@ pub enum GenericIfData { // tokenize() // Tokenize the text of the a2ml section -fn tokenize_a2ml(input: &str) -> Result, String> { +fn tokenize_a2ml(filename: String, input: &str, complete_string: &mut String) -> Result, String> { let mut amltokens = Vec::::new(); let mut remaining = input; while !remaining.is_empty() { let mut chars = remaining.char_indices(); let (mut idx, mut c) = chars.next().unwrap(); + let mut append_to_complete = true; if c.is_ascii_whitespace() { /* skip whitespace */ @@ -133,8 +136,6 @@ fn tokenize_a2ml(input: &str) -> Result, String> { idx = pair.0; c = pair.1; } - remaining = &remaining[idx..]; - continue; } else if remaining.starts_with("/*") { /* get a block comment */ chars.next(); /* skip over the '*' char of the opening sequence */ @@ -162,7 +163,8 @@ fn tokenize_a2ml(input: &str) -> Result, String> { return Err(format!("unclosed block quote starting with \"{errtxt}\"")); } } - remaining = &remaining[idx + 1..]; + // chomp the last / + idx += 1; } else if remaining.starts_with("//") { /* get a line comment */ loop { @@ -177,7 +179,75 @@ fn tokenize_a2ml(input: &str) -> Result, String> { break; } } - remaining = &remaining[idx + 1..]; + // add the initial extra / in // + idx += 1; + } else if remaining.starts_with("/include") { + // skip the first elements (include = 0..6) + chars.nth(6); + let mut state = 0; + let mut fname_idx_start = 0; + let fname_idx_end; + + // skip the whitespaces + loop { + let pair = chars.next().unwrap_or((idx + 1, '\0')); + idx = pair.0; + c = pair.1; + if state == 0 && c.is_ascii_whitespace() { + // just skip whitespaces + } else if state == 0 && tokenizer::is_pathchar(c as u8) { + // start a non quoted filename + state = 1; + fname_idx_start = idx; + } else if state == 1 && tokenizer::is_pathchar(c as u8) { + // in non quoted filename + } else if state == 1 && (c.is_ascii_whitespace() || c == '\0') { + // end of non quoted filename + fname_idx_end = idx; + break; + } else if state == 0 && c == '"' { + // start a quoted filename + state = 2; + } else if state == 2 && tokenizer::is_pathchar(c as u8) { + // first byte of a quoted filename + state = 3; + fname_idx_start = idx; + } else if state == 3 && tokenizer::is_pathchar(c as u8) { + // in a quoted filename + } else if state == 3 && c == '"' { + // end of non quoted filename + fname_idx_end = idx; + // chomp the '"' + idx = idx + 1; + break; + } + else { + let displaylen = if remaining.len() > 16 { + 16 + } else { + remaining.len() + }; + // slicing localremaining in arbitrary ways is not safe, the end might be in the middle of a utf-8 sequence, so from_utf8_lossy is needed + let errtxt = String::from_utf8_lossy(&remaining.as_bytes()[..displaylen]); + return Err(format!("failed parsing a2ml include filename in {errtxt}")); + } + } + // if the current filename was not provided (unit tests..), do not try to parse the include file + if !filename.is_empty() { + let incfilename = loader::make_include_filename(&remaining[fname_idx_start..fname_idx_end], &filename); + + // check if incname is an accessible file + let incpathref = Path::new(&incfilename); + let loadresult = loader::load(incpathref); + if let Ok(incfiledata) = loadresult { + let mut tokresult = tokenize_a2ml(incpathref.display().to_string(), &incfiledata, complete_string)?; + // append the tokens from the included file(s) + amltokens.append(&mut tokresult); + } else { + return Err(format!("failed reading {}", incpathref.display())); + } + } + append_to_complete = false; } else if c == '"' { /* tag - it is enclosed in double quotes, but contains neither spaces nor escape characters */ loop { @@ -191,7 +261,7 @@ fn tokenize_a2ml(input: &str) -> Result, String> { if c == '"' { let tag = &remaining[1..idx]; amltokens.push(TokenType::Tag(tag.to_string())); - remaining = &remaining[idx + 1..]; + idx += 1; } else { let displaylen = if remaining.len() > 16 { 16 @@ -204,34 +274,34 @@ fn tokenize_a2ml(input: &str) -> Result, String> { } } else if c == ';' { amltokens.push(TokenType::Semicolon); - remaining = &remaining[1..]; + idx = 1; } else if c == ',' { amltokens.push(TokenType::Comma); - remaining = &remaining[1..]; + idx = 1; } else if c == '{' { amltokens.push(TokenType::OpenCurlyBracket); - remaining = &remaining[1..]; + idx = 1; } else if c == '}' { amltokens.push(TokenType::ClosedCurlyBracket); - remaining = &remaining[1..]; + idx = 1; } else if c == '[' { amltokens.push(TokenType::OpenSquareBracket); - remaining = &remaining[1..]; + idx = 1; } else if c == ']' { amltokens.push(TokenType::ClosedSquareBracket); - remaining = &remaining[1..]; + idx = 1; } else if c == '(' { amltokens.push(TokenType::OpenRoundBracket); - remaining = &remaining[1..]; + idx = 1; } else if c == ')' { amltokens.push(TokenType::ClosedRoundBracket); - remaining = &remaining[1..]; + idx = 1; } else if c == '*' { amltokens.push(TokenType::Repeat); - remaining = &remaining[1..]; + idx = 1; } else if c == '=' { amltokens.push(TokenType::Equals); - remaining = &remaining[1..]; + idx = 1; } else if c.is_ascii_digit() { loop { let pair = chars.next().unwrap_or((idx + 1, '\0')); @@ -257,7 +327,6 @@ fn tokenize_a2ml(input: &str) -> Result, String> { return Err(format!("Invalid sequence in AML: {num_text}")); } } - remaining = &remaining[idx..]; } else if c.is_ascii_alphabetic() || c == '_' { loop { let pair = chars.next().unwrap_or((idx + 1, '\0')); @@ -318,7 +387,6 @@ fn tokenize_a2ml(input: &str) -> Result, String> { amltokens.push(TokenType::Identifier(kw_or_ident.to_string())); } } - remaining = &remaining[idx..]; } else { let displaylen = if remaining.len() > 16 { 16 @@ -329,6 +397,10 @@ fn tokenize_a2ml(input: &str) -> Result, String> { let errtxt = String::from_utf8_lossy(&remaining.as_bytes()[..displaylen]); return Err(format!("Unable to tokenize: {errtxt}...")); } + if append_to_complete { + complete_string.push_str(&remaining[..idx]) + } + remaining = &remaining[idx..]; } Ok(amltokens) @@ -337,8 +409,9 @@ fn tokenize_a2ml(input: &str) -> Result, String> { // parse an a2ml fragment in an a2l file // The target data structure is the parsing definition used by the a2l parser, so that the // a2ml can control the parsing of IF_DATA blocks -pub(crate) fn parse_a2ml(input: &str) -> Result { - let tok_result = tokenize_a2ml(input)?; +pub(crate) fn parse_a2ml(filename: String, input: &str) -> Result<(A2mlTypeSpec, String), String> { + let mut complete_string = String::with_capacity(input.len()); + let tok_result = tokenize_a2ml(filename, input, &mut complete_string)?; let mut tok_iter = tok_result.iter().peekable(); let mut ifdata_block: Option = None; @@ -411,7 +484,7 @@ pub(crate) fn parse_a2ml(input: &str) -> Result { // The integration point between the custom blocks in Aml and the A2l file is the IF_DATA block. if let Some(ifdata_block) = ifdata_block { - Ok(ifdata_block) + Ok((ifdata_block, complete_string)) } else { Err("The A2ML declaration was fully parsed. However it does not contain an IF_DATA block, so it is not usable.".to_string()) } @@ -1286,39 +1359,57 @@ mod test { #[test] fn tokenize() { - let tokenvec = tokenize_a2ml(" ").unwrap(); + let mut complete_string = String::new(); + let tokenvec = tokenize_a2ml(String::new(), " ", &mut complete_string).unwrap(); assert!(tokenvec.is_empty()); - let tokenvec = tokenize_a2ml("/* // */").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), "/* // */", &mut complete_string).unwrap(); assert!(tokenvec.is_empty()); - let tokenvec = tokenize_a2ml("/*/*/").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), "/*/*/", &mut complete_string).unwrap(); assert!(tokenvec.is_empty()); - let tokenvec = tokenize_a2ml("/***/").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), "/***/", &mut complete_string).unwrap(); assert!(tokenvec.is_empty()); - let tokenvec_err = tokenize_a2ml("/* "); + let tokenvec_err = tokenize_a2ml(String::new(), "/* ", &mut complete_string); assert!(tokenvec_err.is_err()); - let tokenvec = tokenize_a2ml("//*/").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), "//*/", &mut complete_string).unwrap(); assert!(tokenvec.is_empty()); - let tokenvec = tokenize_a2ml(r#""TAG""#).unwrap(); + let tokenvec = tokenize_a2ml(String::new(), r#""TAG""#, &mut complete_string).unwrap(); assert_eq!(tokenvec.len(), 1); let _tag = TokenType::Tag("TAG".to_string()); assert!(matches!(&tokenvec[0], _tag)); - let tokenvec = tokenize_a2ml(";").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), ";", &mut complete_string).unwrap(); + assert_eq!(tokenvec.len(), 1); assert!(matches!(tokenvec[0], TokenType::Semicolon)); - let tokenvec = tokenize_a2ml("0").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), "0", &mut complete_string).unwrap(); + assert_eq!(tokenvec.len(), 1); assert!(matches!(tokenvec[0], TokenType::Constant(0))); - let tokenvec = tokenize_a2ml("0x03").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), "0x03", &mut complete_string).unwrap(); + assert_eq!(tokenvec.len(), 1); assert!(matches!(tokenvec[0], TokenType::Constant(3))); - let tokenvec = tokenize_a2ml("123456").unwrap(); + let tokenvec = tokenize_a2ml(String::new(), "123456", &mut complete_string).unwrap(); + assert_eq!(tokenvec.len(), 1); assert!(matches!(tokenvec[0], TokenType::Constant(123456))); - let err_result = tokenize_a2ml(r#" "unclosed "#); + let tokenvec = tokenize_a2ml(String::new(), r#"/include "testfile""#, &mut complete_string).unwrap(); + assert_eq!(tokenvec.len(), 0); + + let tokenvec = tokenize_a2ml(String::new(), r#"/include"testfile""#, &mut complete_string).unwrap(); + assert_eq!(tokenvec.len(), 0); + + let tokenvec = tokenize_a2ml(String::new(), r#"/include testfile"#, &mut complete_string).unwrap(); + assert_eq!(tokenvec.len(), 0); + + let err_result = tokenize_a2ml(String::new(), r#"/include "testfile_unclosed_quote"#, &mut complete_string); assert!(err_result.is_err()); + + let err_result = tokenize_a2ml(String::new(), r#" "unclosed "#, &mut complete_string); + assert!(err_result.is_err()); + } #[test] @@ -1436,9 +1527,9 @@ mod test { A2mlTypeSpec::TaggedStruct(taggedstruct_hashmap), ]); - let parse_result = parse_a2ml(TEST_INPUT); + let parse_result = parse_a2ml(String::new(), TEST_INPUT); assert!(parse_result.is_ok()); - let a2ml_spec = parse_result.unwrap(); + let (a2ml_spec, _complete_string) = parse_result.unwrap(); println!("{:?}", a2ml_spec); assert_eq!(a2ml_spec, expected_parse_result); } diff --git a/a2lfile/src/ifdata.rs b/a2lfile/src/ifdata.rs index 96c3277..65503a6 100644 --- a/a2lfile/src/ifdata.rs +++ b/a2lfile/src/ifdata.rs @@ -753,7 +753,7 @@ mod ifdata_test { &mut log_msgs, false, ); - parser.builtin_a2mlspec = Some(a2lfile::a2ml::parse_a2ml(A2MLTEST_TEXT).unwrap()); + parser.builtin_a2mlspec = Some(a2lfile::a2ml::parse_a2ml(String::new(), A2MLTEST_TEXT).unwrap().0); super::parse_ifdata( &mut parser, &a2lfile::ParseContext { diff --git a/a2lfile/src/lib.rs b/a2lfile/src/lib.rs index 93fbe41..b4fe142 100644 --- a/a2lfile/src/lib.rs +++ b/a2lfile/src/lib.rs @@ -206,8 +206,8 @@ fn load_impl( // if a built-in A2ml specification was passed as a string, then it is parsed here if let Some(spec) = a2ml_spec { parser.builtin_a2mlspec = Some( - a2ml::parse_a2ml(&spec) - .map_err(|parse_err| A2lError::InvalidBuiltinA2mlSpec { parse_err })?, + a2ml::parse_a2ml(path.to_string_lossy().to_string(), &spec) + .map_err(|parse_err| A2lError::InvalidBuiltinA2mlSpec { parse_err })?.0, ); } diff --git a/a2lfile/src/loader.rs b/a2lfile/src/loader.rs index e4e0f3f..52cc612 100644 --- a/a2lfile/src/loader.rs +++ b/a2lfile/src/loader.rs @@ -1,8 +1,21 @@ +use std::ffi::OsString; use crate::A2lError; use std::fs::File; use std::io::Read; use std::path::Path; +pub(crate) fn make_include_filename(incname: &str, base_filename: &str) -> OsString { + let base = std::path::Path::new(base_filename); + if let Some(basedir) = base.parent() { + let joined = basedir.join(incname); + if joined.exists() { + return OsString::from(joined); + } + } + + OsString::from(incname) +} + pub fn load(path: &Path) -> Result { let mut file = match File::open(path) { Ok(file) => file, diff --git a/a2lfile/src/specification.rs b/a2lfile/src/specification.rs index aa0f812..951c6c5 100644 --- a/a2lfile/src/specification.rs +++ b/a2lfile/src/specification.rs @@ -32074,6 +32074,7 @@ impl VirtualCharacteristic { #[derive(Clone)] pub struct A2ml { pub a2ml_text: String, + merged_a2ml_text: String, pub(crate) __block_info: BlockInfo<(u32, ())>, } @@ -32088,8 +32089,10 @@ impl std::fmt::Debug for A2ml { impl A2ml { #[must_use] pub fn new(a2ml_text: String) -> Self { + let merged_a2ml_text = a2ml_text.clone(); Self { a2ml_text, + merged_a2ml_text, __block_info: BlockInfo { incfile: None, line: 0, @@ -32111,13 +32114,21 @@ impl A2ml { let __a2ml_text_location = parser.get_current_line_offset(); let token = parser.expect_token(context, A2lTokenType::String)?; let a2ml_text = parser.get_token_text(token).to_string(); - match a2ml::parse_a2ml(&a2ml_text) { - Ok(a2mlspec) => parser.file_a2mlspec = Some(a2mlspec), - Err(errmsg) => parser.error_or_log(ParserError::A2mlError { - filename: parser.filenames[context.fileid].clone(), - error_line: parser.last_token_position, - errmsg, - })?, + let filename = &parser.filenames[context.fileid]; + let merged_a2ml_text; + match a2ml::parse_a2ml(filename.to_string(), &a2ml_text) { + Ok((a2mlspec, computed_merged_a2ml_text)) => { + parser.file_a2mlspec = Some(a2mlspec); + merged_a2ml_text = computed_merged_a2ml_text; + }, + Err(errmsg) => { + parser.error_or_log(ParserError::A2mlError { + filename: filename.to_string(), + error_line: parser.last_token_position, + errmsg, + })?; + merged_a2ml_text = String::from(""); + } } parser.expect_token(context, A2lTokenType::End)?; let ident = parser.get_identifier(context)?; @@ -32132,6 +32143,7 @@ impl A2ml { } Ok(A2ml { a2ml_text, + merged_a2ml_text, __block_info: BlockInfo { incfile: fileid, line, @@ -32167,6 +32179,7 @@ impl A2lObject<(u32, ())> for A2ml { } fn merge_includes(&mut self) { self.__block_info.incfile = None; + self.a2ml_text = self.merged_a2ml_text.clone(); } fn get_line(&self) -> u32 { self.__block_info.line diff --git a/a2lfile/src/tokenizer.rs b/a2lfile/src/tokenizer.rs index 2ee78b4..14df6a1 100644 --- a/a2lfile/src/tokenizer.rs +++ b/a2lfile/src/tokenizer.rs @@ -1,4 +1,4 @@ -use std::{ffi::OsString, path::Path}; +use std::path::Path; use thiserror::Error; use super::loader; @@ -117,7 +117,7 @@ pub(crate) fn tokenize( } // incname is the include filename from the filetext without the surrounding quotes let incname = &filetext[filename_start..filename_end]; - let incfilename = make_include_filename(incname, &filenames[0]); + let incfilename = loader::make_include_filename(incname, &filenames[0]); // check if incname is an accessible file let incpathref = Path::new(&incfilename); @@ -548,13 +548,13 @@ fn count_newlines(text: &[u8]) -> u32 { // is_pathchar() // is this char allowed in a file path, extension of is_identchar() -fn is_pathchar(c: u8) -> bool { +pub(crate) fn is_pathchar(c: u8) -> bool { is_identchar(c) || c == b'\\' || c == b'/' } // is_identchar() // is this char allowed in an identifier -fn is_identchar(c: u8) -> bool { +pub(crate) fn is_identchar(c: u8) -> bool { c.is_ascii_alphanumeric() || c == b'.' || c == b'[' || c == b']' || c == b'_' } @@ -565,18 +565,6 @@ fn is_numchar(c: u8) -> bool { c.is_ascii_hexdigit() || c == b'x' || c == b'X' || c == b'.' || c == b'+' || c == b'-' } -fn make_include_filename(incname: &str, base_filename: &str) -> OsString { - let base = std::path::Path::new(base_filename); - if let Some(basedir) = base.parent() { - let joined = basedir.join(incname); - if joined.exists() { - return OsString::from(joined); - } - } - - OsString::from(incname) -} - /*************************************************************************************************/ #[cfg(test)]