Skip to content

Commit

Permalink
Started with stems
Browse files Browse the repository at this point in the history
  • Loading branch information
labra committed Nov 20, 2023
1 parent 89ab1ef commit 6facf1b
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 17 deletions.
3 changes: 1 addition & 2 deletions examples/sample_parser.shex
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
prefix e: <http://example.org/>
e:S {}
<S> EXTRA <p> <q> {}
38 changes: 38 additions & 0 deletions shex_ast/src/ast/lang_or_wildcard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use std::{result, str::FromStr};

use serde::{Serialize, Serializer};
use serde_derive::{Deserialize, Serialize};
use srdf::lang::Lang;
use void::Void;

use super::serde_string_or_struct::SerializeStringOrStruct;

#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
#[serde(untagged)]
pub enum LangOrWildcard {
Lang(Lang),
Wildcard {
#[serde(rename = "type")]
type_: String,
},
}

impl FromStr for LangOrWildcard {
type Err = Void;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(LangOrWildcard::Lang(Lang::new(s)))
}
}

impl SerializeStringOrStruct for LangOrWildcard {
fn serialize_string_or_struct<S>(&self, serializer: S) -> result::Result<S::Ok, S::Error>
where
S: Serializer,
{
match &self {
LangOrWildcard::Lang(ref lang) => lang.serialize(serializer),
_ => self.serialize(serializer),
}
}
}
5 changes: 5 additions & 0 deletions shex_ast/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub mod deref;
pub mod iri;
pub mod iri_ref;
pub mod iri_ref_or_wildcard;
pub mod lang_or_wildcard;
pub mod node_constraint;
pub mod node_kind;
pub mod numeric_literal;
Expand Down Expand Up @@ -33,6 +34,7 @@ pub use bnode::*;
pub use iri::*;
pub use iri_ref::*;
pub use iri_ref_or_wildcard::*;
pub use lang_or_wildcard::*;
pub use node_constraint::*;
pub use node_kind::*;
pub use numeric_literal::*;
Expand Down Expand Up @@ -81,6 +83,7 @@ mod tests {
let expected = ShapeExpr::Shape(Shape::default().with_expression(
TripleExpr::TripleConstraint {
id: None,
negated: None,
inverse: None,
predicate: IriS::new_unchecked("http://a.example/p1").into(),
value_expr: None,
Expand All @@ -107,6 +110,7 @@ mod tests {
let expected = ShapeExpr::Shape(Shape::default().with_expression(
TripleExpr::TripleConstraint {
id: None,
negated: None,
inverse: None,
predicate: IriS::new_unchecked("http://a.example/p1").into(),
value_expr: Some(Box::new(ShapeExpr::Ref(Ref::IriRef {
Expand All @@ -133,6 +137,7 @@ mod tests {
let S5 = IriS::from_str("http://all.example/S5").unwrap();
let expected = TripleExpr::TripleConstraint {
id: None,
negated: None,
inverse: None,
predicate: p1.into(),
value_expr: Some(Box::new(ShapeExpr::Ref(Ref::IriRef {
Expand Down
87 changes: 80 additions & 7 deletions shex_ast/src/ast/value_set_value.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::{result, str::FromStr};
use std::{result, str::FromStr, fmt};

use crate::{ast::serde_string_or_struct::*, Deref, DerefError};
use crate::{ast::serde_string_or_struct::*, Deref, DerefError, LangOrWildcard};
use iri_s::{IriS, IriSError};
use serde::{Serialize, Serializer};
use serde::{Serialize, Serializer, de::Visitor};
use serde_derive::{Deserialize, Serialize};
use srdf::{lang::Lang, literal::Literal};

Expand Down Expand Up @@ -57,11 +57,30 @@ pub enum ValueSetValue {
#[serde(rename = "type")]
type_: String,

#[serde(rename = "languageTag")]
language_tag: String,
#[serde(rename = "languageTag",
serialize_with = "serialize_lang",
deserialize_with = "deserialize_lang")]
language_tag: Lang,
},
LanguageStem {
#[serde(rename = "type")]
type_: String,

stem: Lang,
},
LanguageStemRange {
#[serde(rename = "type")]
type_: String,

#[serde(
serialize_with = "serialize_string_or_struct",
deserialize_with = "deserialize_string_or_struct"
)]
stem: LangOrWildcard,

#[serde(skip_serializing_if = "Option::is_none")]
exclusions: Option<Vec<StringOrLiteralStemWrapper>>,
},
LanguageStem,
LanguageStemRange,
ObjectValue(ObjectValueWrapper),
}

Expand All @@ -84,6 +103,21 @@ impl ValueSetValue {
pub fn object_value(value: ObjectValue) -> ValueSetValue {
ValueSetValue::ObjectValue(ObjectValueWrapper { ov: value })
}

pub fn language(lang: Lang) -> ValueSetValue {
ValueSetValue::Language {
type_: "Language".to_string(),
language_tag: lang
}
}

pub fn language_stem(lang: Lang) -> ValueSetValue {
ValueSetValue::LanguageStem {
type_: "LanguageStem".to_string(),
stem: lang
}
}

}

impl Deref for ValueSetValue {
Expand All @@ -99,6 +133,12 @@ impl Deref for ValueSetValue {
ValueSetValue::ObjectValue(ov) => {
let ov = ov.deref(base, prefixmap)?;
Ok(ValueSetValue::ObjectValue(ov))
},
ValueSetValue::Language { type_, language_tag } => {
Ok(ValueSetValue::Language { type_: type_.clone(), language_tag: language_tag.clone() })
}
ValueSetValue::LanguageStem { type_, stem } => {
Ok(ValueSetValue::LanguageStem { type_: type_.clone(), stem: stem.clone() })
}
_ => {
todo!()
Expand Down Expand Up @@ -165,3 +205,36 @@ impl FromStr for ValueSetValue {
}))
}
}

fn serialize_lang<S>(p: &Lang, serializer: S) -> result::Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(p.value().as_str())
}

fn deserialize_lang<'de, D>(deserializer: D) -> Result<Lang, D::Error>
where
D: serde::Deserializer<'de>,
{
struct LangVisitor;

impl<'de> Visitor<'de> for LangVisitor {
type Value = Lang;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("Lang")
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Lang::new(v))
}


}

deserializer.deserialize_str(LangVisitor)
}
4 changes: 2 additions & 2 deletions shex_compact/src/compact_printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,8 @@ where

fn pp_value_set_value(&self, v: &ValueSetValue) -> DocBuilder<'a, Arena<'a, A>, A> {
match v {
ValueSetValue::LanguageStem => todo!(),
ValueSetValue::LanguageStemRange => todo!(),
ValueSetValue::LanguageStem { type_, stem } => todo!(),
ValueSetValue::LanguageStemRange { type_, stem, exclusions } => todo!(),
ValueSetValue::ObjectValue(ov) => self.pp_object_value(&ov.ov),
ValueSetValue::IriStem { type_, stem } => todo!(),
ValueSetValue::IriStemRange {
Expand Down
60 changes: 54 additions & 6 deletions shex_compact/src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,8 @@ fn closed<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Qualifier> {
fn extra_property_set<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, Qualifier> {
traced("extra_property_set",
map_error(move |i| {
let (i, (_, ps)) = tuple((token_tws("EXTRA"), cut(many1(predicate))))(i)?;
let (i, (_, ps)) = tuple((token_tws("EXTRA"), cut(many1(tuple((predicate, tws0))))))(i)?;
let ps = ps.into_iter().map(|(p,_)| p).collect();
Ok((i, Qualifier::Extra(ps)))
}, || ShExParseError::ExpectedEXTRAPropertySet))
}
Expand Down Expand Up @@ -1085,7 +1086,7 @@ fn value_set_value(i: Span) -> IRes<ValueSetValue> {
// Pending
iri_range,
literal_range,
// language_range,
language_range(),
// exclusion_plus
))(i)
}
Expand Down Expand Up @@ -1116,7 +1117,7 @@ fn exclusion(i: Span) -> IRes<Exclusion> {
fn exc(i: Span) -> IRes<Exclusion> {
let (i, e) = alt((
iri,
// literal,
// literal(),
// lang_tag
))(i)?;
Ok((i, ()))
Expand Down Expand Up @@ -1151,6 +1152,53 @@ fn literal_exclusion(i: Span) -> IRes<Exclusion> {
// Ok((i, Exclusion::))
}

/// `[55] languageRange ::= LANGTAG ('~' languageExclusion*)?`
/// ` | '@' '~' languageExclusion*`
fn language_range<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced("language_range", map_error(move |i| { alt((
language_range1(),
language_range2()
))(i) }, || ShExParseError::LanguageRange))
}

/// `From [55] languageRange1 = LANGTAG ('~' languageExclusion*)?`
fn language_range1<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced("language_range1", map_error(move |i| {
let (i, (lang_tag, _, maybe_stem_exclusions)) =
tuple((lang_tag, tws0, opt(tuple((token_tws("~"), language_exclusions)))))(i)?;
let value: ValueSetValue = match maybe_stem_exclusions {
None => ValueSetValue::language(lang_tag),
Some((_, exclusions)) => if exclusions.is_empty() {
ValueSetValue::language_stem(lang_tag)
} else {
todo!()
}
};
Ok((i, value))
}, || ShExParseError::LanguageRange))
}

/// `From [55] languageRange1 = '@' '~' languageExclusion*`
fn language_range2<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ValueSetValue> {
traced("language_range2", map_error(move |i| {
let (i, (_,_, exclusions)) = tuple((token_tws("@"), token_tws("~"), language_exclusions))(i)?;
todo!()
}, || ShExParseError::LanguageRange))
}

/// `from [55] language_exclusions = languageExclusion*`
fn language_exclusions(i: Span) -> IRes<Vec<LanguageExclusion>> {
many0(language_exclusion)(i)
}

type LanguageExclusion = ();

/// `[56] languageExclusion ::= '-' LANGTAG '~'?`
fn language_exclusion(i: Span) -> IRes<LanguageExclusion> {
let (i, (_, lang_tag, maybe_stem)) = tuple((token_tws("-"), lang_tag, opt(token_tws("~"))))(i)?;
// Pending
Ok((i, ()))
}
/// `[57] include ::= '&' tripleExprLabel`
fn include_<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, TripleExpr> {
traced("include", map_error(move |i| {
Expand Down Expand Up @@ -1241,7 +1289,7 @@ fn rdf_literal<'a>() -> impl FnMut(Span<'a>) -> IRes<'a, ObjectValue> {
traced("rdf_literal", map_error(move |i| {
let (i, str) = string()(i)?;
let (i, maybe_value) = opt(alt((
map(lang_string, |lang| ObjectValue::ObjectLiteral {
map(lang_tag, |lang| ObjectValue::ObjectLiteral {
value: str.fragment().to_string(),
language: Some(lang),
type_: None
Expand Down Expand Up @@ -1369,8 +1417,8 @@ fn echar(input: Span) -> IRes<Span> {
recognize(preceded(token(r"\"), one_of(r#"tbnrf"'\"#)))(input)
}

///
fn lang_string(i: Span) -> IRes<Lang> {
/// `[145s] <LANGTAG> ::= "@" ([a-zA-Z])+ ("-" ([a-zA-Z0-9])+)*`
fn lang_tag(i:Span) -> IRes<Lang> {
let (i, lang_str) = preceded(
token("@"),
recognize(tuple((alpha1, many0(preceded(token("-"), alphanumeric1))))),
Expand Down
3 changes: 3 additions & 0 deletions shex_compact/src/parser_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ pub enum ParseError {
#[error("Expected IRI or Literal")]
ExpectedIriOrLiteral,

#[error("Expected language range")]
LanguageRange,

#[error("Expected Literal")]
Literal,

Expand Down
4 changes: 4 additions & 0 deletions srdf/src/lang.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ impl Lang {
lang: str.to_string(),
}
}

pub fn value(&self) -> String {
self.lang.clone()
}
}

impl Display for Lang {
Expand Down

0 comments on commit 6facf1b

Please sign in to comment.