From 28d4134eb4dea2413a1c650ca39289f4a9ba1361 Mon Sep 17 00:00:00 2001 From: Eytan Singher Date: Tue, 30 May 2023 12:17:34 +0300 Subject: [PATCH 1/5] Added convinience methods to Sexp and Atom --- src/lib.rs | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index d514af7..069154a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,76 @@ pub enum Atom { F(f64), } + +impl Atom { + /// Returns true if this atom is a string. + pub fn is_string(&self) -> bool { + match self { + &Atom::S(_) => true, + _ => false, + } + } + + /// Returns true if this atom is an integer. + pub fn is_int(&self) -> bool { + match self { + &Atom::I(_) => true, + _ => false, + } + } + + /// Returns true if this atom is a float. + pub fn is_float(&self) -> bool { + match self { + &Atom::F(_) => true, + _ => false, + } + } + + /// Return the string contained in this atom, panic if it is not a string. + pub fn string(&self) -> &str { + self.try_string().expect("not a string") + } + + /// Try to return the string contained in this atom, or None if it is not a + /// string. + pub fn try_string(&self) -> Option<&str> { + match self { + &Atom::S(ref s) => Some(s), + _ => None, + } + } + + /// Return the integer contained in this atom, panic if it is not an integer. + pub fn int(&self) -> i64 { + self.try_int().expect("not an int") + } + + /// Try to return the integer contained in this atom, or None if it is not an + /// integer. + pub fn try_int(&self) -> Option { + match self { + &Atom::I(i) => Some(i), + _ => None, + } + } + + /// Return the float contained in this atom, panic if it is not a float. + pub fn float(&self) -> f64 { + self.try_float().expect("not a float") + } + + /// Try to return the float contained in this atom, or None if it is not a + /// float. + pub fn try_float(&self) -> Option { + match self { + &Atom::F(f) => Some(f), + _ => None, + } + } +} + + /// An s-expression is either an atom or a list of s-expressions. This is /// similar to the data format used by lisp. #[derive(PartialEq, Clone, PartialOrd)] @@ -32,6 +102,51 @@ pub enum Sexp { List(Vec), } +impl Sexp { + /// Returns true if this s-expression is an atom. + pub fn is_atom(&self) -> bool { + match self { + Sexp::Atom(_) => true, + _ => false, + } + } + + /// Returns true if this s-expression is a list. + pub fn is_list(&self) -> bool { + match *self { + Sexp::List(_) => true, + _ => false, + } + } + + /// Return the atom contained in this s-expression, panic if it is a list. + pub fn atom(&self) -> &Atom { + self.try_atom().expect("not an atom") + } + + /// Try to return the atom contained in this s-expression, or None if it is a + pub fn try_atom(&self) -> Option<&Atom> { + match self { + &Sexp::Atom(ref a) => Some(a), + _ => None, + } + } + + /// Return the list contained in this s-expression, panic if it is an atom. + pub fn list(&self) -> &Vec { + self.try_list().expect("not a list") + } + + /// Try to return the list contained in this s-expression, or None if it is an + /// atom. + pub fn try_list(&self) -> Option<&Vec> { + match self { + &Sexp::List(ref l) => Some(l), + _ => None, + } + } +} + #[test] fn sexp_size() { // I just want to see when this changes, in the diff. From 2b0197d097748069b526af37463af5946f5790a7 Mon Sep 17 00:00:00 2001 From: Eytan Singher Date: Tue, 30 May 2023 18:39:04 +0300 Subject: [PATCH 2/5] Improved error reporting --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 069154a..2b334a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -121,7 +121,7 @@ impl Sexp { /// Return the atom contained in this s-expression, panic if it is a list. pub fn atom(&self) -> &Atom { - self.try_atom().expect("not an atom") + self.try_atom().expect(&format!("Expecting an atom, got: {}", self)) } /// Try to return the atom contained in this s-expression, or None if it is a @@ -134,7 +134,7 @@ impl Sexp { /// Return the list contained in this s-expression, panic if it is an atom. pub fn list(&self) -> &Vec { - self.try_list().expect("not a list") + self.try_list().expect(&format!("Expecting a list, got: {}", self)) } /// Try to return the list contained in this s-expression, or None if it is an From 0513d6613643698ab30ae05ebaa3dffc76c964ba Mon Sep 17 00:00:00 2001 From: Eytan Singher Date: Fri, 2 Jun 2023 20:35:51 +0300 Subject: [PATCH 3/5] Added into methods and map interpretation --- Cargo.toml | 3 +++ src/lib.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 26c6fc8..e66219f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,6 @@ keywords = [ "sexp", "parsing", "s-expression", "file-format" ] description = "A small, simple, self-contained, s-expression parser and pretty-printer." license = "MIT" + +[dependencies] +itertools = "0.10.5" diff --git a/src/lib.rs b/src/lib.rs index 2b334a0..74721ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,10 +7,14 @@ use std::borrow::Cow; use std::cmp; +use std::collections::BTreeMap; use std::error; use std::fmt; use std::str::{self, FromStr}; +extern crate itertools; +use itertools::Itertools; + /// A single data element in an s-expression. Floats are excluded to ensure /// atoms may be used as keys in ordered and hashed data structures. /// @@ -63,6 +67,14 @@ impl Atom { } } + /// Consume this atom and return its string, or None if it is not a string. + pub fn into_string(self) -> Option { + match self { + Atom::S(s) => Some(s), + _ => None, + } + } + /// Return the integer contained in this atom, panic if it is not an integer. pub fn int(&self) -> i64 { self.try_int().expect("not an int") @@ -77,6 +89,15 @@ impl Atom { } } + /// Consume this atom and return its integer, or None if it is not an + /// integer. + pub fn into_int(self) -> Option { + match self { + Atom::I(i) => Some(i), + _ => None, + } + } + /// Return the float contained in this atom, panic if it is not a float. pub fn float(&self) -> f64 { self.try_float().expect("not a float") @@ -90,6 +111,14 @@ impl Atom { _ => None, } } + + /// Consume this atom and return its float, or None if it is not a float. + pub fn into_float(self) -> Option { + match self { + Atom::F(f) => Some(f), + _ => None, + } + } } @@ -132,6 +161,14 @@ impl Sexp { } } + /// Consume this s-expression and return its atom, or None if it is a list. + pub fn into_atom(self) -> Option { + match self { + Sexp::Atom(a) => Some(a), + _ => None, + } + } + /// Return the list contained in this s-expression, panic if it is an atom. pub fn list(&self) -> &Vec { self.try_list().expect(&format!("Expecting a list, got: {}", self)) @@ -145,6 +182,33 @@ impl Sexp { _ => None, } } + + /// Consume this s-expression and return its list, or None if it is an atom. + pub fn into_list(self) -> Option> { + match self { + Sexp::List(l) => Some(l), + _ => None, + } + } + + /// Turn s-expression list into a map from key value pairs. + pub fn into_map(self) -> Option> { + match self { + Sexp::List(l) => { + if l.len() % 2 != 0 { + return None; + } + let mut map = BTreeMap::new(); + for (key, value) in l.into_iter().tuple_windows::<(Sexp, Sexp)>() { + let key = key.into_atom()?.into_string()?; + assert!(!map.contains_key(&key)); + map.insert(key, value); + } + Some(map) + }, + _ => None, + } + } } #[test] From ce3c0f811e25f6ab1e9317c0471654c242877172 Mon Sep 17 00:00:00 2001 From: Eytan Singher Date: Sat, 3 Jun 2023 00:18:23 +0300 Subject: [PATCH 4/5] Fixed into_map --- Cargo.toml | 3 --- src/lib.rs | 12 +++++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e66219f..26c6fc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,3 @@ keywords = [ "sexp", "parsing", "s-expression", "file-format" ] description = "A small, simple, self-contained, s-expression parser and pretty-printer." license = "MIT" - -[dependencies] -itertools = "0.10.5" diff --git a/src/lib.rs b/src/lib.rs index 74721ed..5eb25b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,9 +12,6 @@ use std::error; use std::fmt; use std::str::{self, FromStr}; -extern crate itertools; -use itertools::Itertools; - /// A single data element in an s-expression. Floats are excluded to ensure /// atoms may be used as keys in ordered and hashed data structures. /// @@ -195,11 +192,12 @@ impl Sexp { pub fn into_map(self) -> Option> { match self { Sexp::List(l) => { - if l.len() % 2 != 0 { - return None; - } let mut map = BTreeMap::new(); - for (key, value) in l.into_iter().tuple_windows::<(Sexp, Sexp)>() { + for sub_l in l.into_iter() { + assert!(sub_l.is_list() && sub_l.list().len() == 2); + let mut sub_l = sub_l.into_list().unwrap(); + let value = sub_l.remove(1); + let key = sub_l.remove(0); let key = key.into_atom()?.into_string()?; assert!(!map.contains_key(&key)); map.insert(key, value); From aed9d536e3300aa831e4b5d99875b35089a1cd72 Mon Sep 17 00:00:00 2001 From: Eytan Singher Date: Tue, 6 Jun 2023 12:05:52 +0300 Subject: [PATCH 5/5] Fixed warnings better error in into_map --- src/lib.rs | 57 +++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5eb25b5..f02fc11 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -194,7 +194,12 @@ impl Sexp { Sexp::List(l) => { let mut map = BTreeMap::new(); for sub_l in l.into_iter() { - assert!(sub_l.is_list() && sub_l.list().len() == 2); + assert!(sub_l.is_list() && sub_l.list().len() == 2, + "Assertion to map failed (is_list {} len {:?}) on: {}", + sub_l.is_list(), + sub_l.try_list().map(|x| x.len()), + sub_l.to_string().chars().take(100).collect::() + ); let mut sub_l = sub_l.into_list().unwrap(); let value = sub_l.remove(1); let key = sub_l.remove(0); @@ -230,7 +235,7 @@ pub struct Error { impl error::Error for Error { fn description(&self) -> &str { self.message } - fn cause(&self) -> Option<&error::Error> { None } + fn cause(&self) -> Option<&dyn error::Error> { None } } /// Since errors are the uncommon case, they're boxed. This keeps the size of @@ -342,7 +347,7 @@ fn peek(s: &str, pos: &usize) -> ERes<(char, usize)> { fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { dbg("expect", pos); - let (ch, next) = try!(peek(s, pos)); + let (ch, next) = peek(s, pos)?; *pos = next; if ch == c { Ok(()) } else { err("unexpected character", s, pos) } } @@ -350,7 +355,7 @@ fn expect(s: &str, pos: &mut usize, c: char) -> ERes<()> { fn consume_until_newline(s: &str, pos: &mut usize) -> ERes<()> { loop { if *pos == s.len() { return Ok(()) } - let (ch, next) = try!(peek(s, pos)); + let (ch, next) = peek(s, pos)?; *pos = next; if ch == '\n' { return Ok(()) } } @@ -361,9 +366,9 @@ fn zspace(s: &str, pos: &mut usize) -> ERes<()> { dbg("zspace", pos); loop { if *pos == s.len() { return Ok(()) } - let (ch, next) = try!(peek(s, pos)); + let (ch, next) = peek(s, pos)?; - if ch == ';' { try!(consume_until_newline(s, pos)) } + if ch == ';' { consume_until_newline(s, pos)? } else if ch.is_whitespace() { *pos = next; } else { return Ok(()) } } @@ -373,15 +378,15 @@ fn parse_quoted_atom(s: &str, pos: &mut usize) -> ERes { dbg("parse_quoted_atom", pos); let mut cs: String = String::new(); - try!(expect(s, pos, '"')); + expect(s, pos, '"')?; loop { - let (ch, next) = try!(peek(s, pos)); + let (ch, next) = peek(s, pos)?; if ch == '"' { *pos = next; break; } else if ch == '\\' { - let (postslash, nextnext) = try!(peek(s, &next)); + let (postslash, nextnext) = peek(s, &next)?; if postslash == '"' || postslash == '\\' { cs.push(postslash); } else { @@ -405,9 +410,9 @@ fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes { loop { if *pos == s.len() { break } - let (c, next) = try!(peek(s, pos)); + let (c, next) = peek(s, pos)?; - if c == ';' { try!(consume_until_newline(s, pos)); break } + if c == ';' { consume_until_newline(s, pos)?; break } if c.is_whitespace() || c == '(' || c == ')' { break } cs.push(c); *pos = next; @@ -418,7 +423,7 @@ fn parse_unquoted_atom(s: &str, pos: &mut usize) -> ERes { fn parse_atom(s: &str, pos: &mut usize) -> ERes { dbg("parse_atom", pos); - let (ch, _) = try!(peek(s, pos)); + let (ch, _) = peek(s, pos)?; if ch == '"' { parse_quoted_atom (s, pos) } else { parse_unquoted_atom(s, pos) } @@ -426,34 +431,34 @@ fn parse_atom(s: &str, pos: &mut usize) -> ERes { fn parse_list(s: &str, pos: &mut usize) -> ERes> { dbg("parse_list", pos); - try!(zspace(s, pos)); - try!(expect(s, pos, '(')); + zspace(s, pos)?; + expect(s, pos, '(')?; let mut sexps: Vec = Vec::new(); loop { - try!(zspace(s, pos)); - let (c, next) = try!(peek(s, pos)); + zspace(s, pos)?; + let (c, next) = peek(s, pos)?; if c == ')' { *pos = next; break; } - sexps.push(try!(parse_sexp(s, pos))); + sexps.push(parse_sexp(s, pos)?); } - try!(zspace(s, pos)); + zspace(s, pos)?; Ok(sexps) } fn parse_sexp(s: &str, pos: &mut usize) -> ERes { dbg("parse_sexp", pos); - try!(zspace(s, pos)); - let (c, _) = try!(peek(s, pos)); + zspace(s, pos)?; + let (c, _) = peek(s, pos)?; let r = - if c == '(' { Ok(Sexp::List(try!(parse_list(s, pos)))) } - else { Ok(Sexp::Atom(try!(parse_atom(s, pos)))) }; - try!(zspace(s, pos)); + if c == '(' { Ok(Sexp::List(parse_list(s, pos)?)) } + else { Ok(Sexp::Atom(parse_atom(s, pos)?)) }; + zspace(s, pos)?; r } @@ -481,7 +486,7 @@ pub fn list(xs: &[Sexp]) -> Sexp { #[inline(never)] pub fn parse(s: &str) -> Result> { let mut pos = 0; - let ret = try!(parse_sexp(s, &mut pos)); + let ret = parse_sexp(s, &mut pos)?; if pos == s.len() { Ok(ret) } else { err("unrecognized post-s-expression data", s, &pos) } } @@ -529,10 +534,10 @@ impl fmt::Display for Sexp { match *self { Sexp::Atom(ref a) => write!(f, "{}", a), Sexp::List(ref xs) => { - try!(write!(f, "(")); + write!(f, "(")?; for (i, x) in xs.iter().enumerate() { let s = if i == 0 { "" } else { " " }; - try!(write!(f, "{}{}", s, x)); + write!(f, "{}{}", s, x)?; } write!(f, ")") },