From 43b43475e18eae00a58830aec0165050f27c9a32 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Arnaud Date: Mon, 10 Jun 2024 11:26:24 +0200 Subject: [PATCH] feat: implement SHACL to RDF --- iri_s/src/lib.rs | 2 +- shacl_ast/Cargo.toml | 2 + shacl_ast/src/ast/message_map.rs | 38 ++++++- shacl_ast/src/ast/node_shape.rs | 48 ++++++-- shacl_ast/src/ast/property_shape.rs | 107 +++++++++++++++--- shacl_ast/src/ast/shape.rs | 25 +++- .../converter/shacl_to_rdf/shacl_writer.rs | 39 +++---- shacl_ast/src/lib.rs | 3 - srdf/src/srdf_basic.rs | 3 + srdf/src/srdf_graph/README.md | 2 - srdf/src/srdf_graph/srdfgraph.rs | 15 ++- srdf/src/srdf_sparql/srdfsparql.rs | 4 + 12 files changed, 230 insertions(+), 58 deletions(-) delete mode 100644 srdf/src/srdf_graph/README.md diff --git a/iri_s/src/lib.rs b/iri_s/src/lib.rs index 16b38add..3d76c65a 100644 --- a/iri_s/src/lib.rs +++ b/iri_s/src/lib.rs @@ -44,7 +44,7 @@ macro_rules! iri { ( $lit: tt ) => { - IriS::new_unchecked($lit) + $crate::IriS::new_unchecked($lit) }; } diff --git a/shacl_ast/Cargo.toml b/shacl_ast/Cargo.toml index 6b3b78f3..5a5af186 100644 --- a/shacl_ast/Cargo.toml +++ b/shacl_ast/Cargo.toml @@ -26,4 +26,6 @@ serde_json = "1" const_format = "0.2" itertools = "0.13" +oxrdf = { version = "0.2.0-alpha.5", features = ["oxsdatatypes"] } + [dev-dependencies] diff --git a/shacl_ast/src/ast/message_map.rs b/shacl_ast/src/ast/message_map.rs index d97a14b6..6d63cf20 100644 --- a/shacl_ast/src/ast/message_map.rs +++ b/shacl_ast/src/ast/message_map.rs @@ -1,10 +1,46 @@ +use oxrdf::{Literal as OxLiteral, Term as OxTerm}; +use srdf::lang::Lang; +use std::collections::HashMap; +use std::str::FromStr; + #[derive(Debug, Default, Clone)] pub struct MessageMap { - // mmap: HashMap, String> + messages: HashMap, String>, } impl MessageMap { pub fn new() -> Self { Self::default() } + + pub fn with_message(mut self, lang: Option, message: String) -> Self { + self.messages.insert(lang, message); + self + } + + pub fn messages(&self) -> &HashMap, String> { + &self.messages + } + + pub fn to_term_iter(&self) -> impl Iterator + '_ { + self.messages.iter().map(|(lang, message)| { + let literal = if let Some(lang) = lang { + OxLiteral::new_language_tagged_literal(message, lang.value()).unwrap() + } else { + OxLiteral::new_simple_literal(message) + }; + + OxTerm::Literal(literal) + }) + } +} + +impl FromStr for MessageMap { + type Err = (); + + fn from_str(s: &str) -> Result { + Ok(Self { + messages: HashMap::from([(None, s.to_string())]), + }) + } } diff --git a/shacl_ast/src/ast/node_shape.rs b/shacl_ast/src/ast/node_shape.rs index 45c5373a..4537dcd8 100644 --- a/shacl_ast/src/ast/node_shape.rs +++ b/shacl_ast/src/ast/node_shape.rs @@ -1,8 +1,11 @@ -use srdf::RDFNode; +use crate::{ + component::Component, message_map::MessageMap, target::Target, SH_DESCRIPTION_STR, SH_NAME_STR, + SH_NODE_SHAPE, +}; +use iri_s::iri; +use srdf::{RDFNode, SRDFBuilder}; use std::fmt::Display; -use crate::{component::Component, target::Target}; - #[derive(Debug, Clone)] pub struct NodeShape { id: RDFNode, @@ -14,9 +17,8 @@ pub struct NodeShape { // deactivated: bool, // message: MessageMap, // severity: Option, - // name: MessageMap, - // description: MessageMap, - + name: MessageMap, + description: MessageMap, // SHACL spec says that the values of sh:order should be decimals but in the examples they use integers. `NumericLiteral` also includes doubles. // order: Option, @@ -36,8 +38,8 @@ impl NodeShape { // deactivated: false, // message: MessageMap::new(), // severity: None, - // name: MessageMap::new(), - // description: MessageMap::new(), + name: MessageMap::new(), + description: MessageMap::new(), // order: None, // group: None, // source_iri: None, @@ -71,6 +73,36 @@ impl NodeShape { self.closed = closed; self } + + pub fn write(&self, rdf: &mut RDF) -> Result<(), RDF::Err> + where + RDF: SRDFBuilder, + { + rdf.add_type(&self.id, RDF::iri_s2term(&SH_NODE_SHAPE))?; + + self.name + .to_term_iter() + .map(|term| { + rdf.add_triple( + &RDF::object_as_subject(&self.id).unwrap(), + &RDF::iri_s2iri(&iri!(SH_NAME_STR)), + &RDF::term_s2term(&term), + ) + }) + .collect::>()?; + + self.description + .to_term_iter() + .map(|term| { + rdf.add_triple( + &RDF::object_as_subject(&self.id).unwrap(), + &RDF::iri_s2iri(&iri!(SH_DESCRIPTION_STR)), + &RDF::term_s2term(&term), + ) + }) + .collect::>()?; + Ok(()) + } } impl Display for NodeShape { diff --git a/shacl_ast/src/ast/property_shape.rs b/shacl_ast/src/ast/property_shape.rs index f6ea33de..33b54176 100644 --- a/shacl_ast/src/ast/property_shape.rs +++ b/shacl_ast/src/ast/property_shape.rs @@ -1,11 +1,16 @@ -use srdf::{RDFNode, SHACLPath}; +use iri_s::iri; +use oxrdf::{Literal as OxLiteral, NamedNode, Term as OxTerm}; +use srdf::{numeric_literal::NumericLiteral, RDFNode, SHACLPath, SRDFBuilder, XSD_DECIMAL_STR}; use std::fmt::Display; -use crate::{component::Component, target::Target}; +use crate::{ + component::Component, message_map::MessageMap, target::Target, SH_DESCRIPTION_STR, SH_NAME_STR, + SH_ORDER_STR, SH_PATH_STR, SH_PROPERTY_SHAPE, +}; #[derive(Debug, Clone)] pub struct PropertyShape { - // id: RDFNode, + id: RDFNode, path: SHACLPath, components: Vec, targets: Vec, @@ -15,21 +20,19 @@ pub struct PropertyShape { // deactivated: bool, // message: MessageMap, // severity: Option, - // name: MessageMap, - // description: MessageMap, - + name: MessageMap, + description: MessageMap, // SHACL spec says that the values of sh:order should be decimals but in the examples they use integers. `NumericLiteral` also includes doubles. - // order: Option, - + order: Option, // group: Option, // source_iri: Option, // annotations: Vec<(IriRef, RDFNode)>, } impl PropertyShape { - pub fn new(_id: RDFNode, path: SHACLPath) -> Self { + pub fn new(id: RDFNode, path: SHACLPath) -> Self { PropertyShape { - // id, + id, path, components: Vec::new(), targets: Vec::new(), @@ -39,15 +42,29 @@ impl PropertyShape { // deactivated: false, // message: MessageMap::new(), // severity: None, - // name: MessageMap::new(), - // description: MessageMap::new(), - // order: None, + name: MessageMap::new(), + description: MessageMap::new(), + order: None, // group: None, // source_iri: None, // annotations: Vec::new() } } + pub fn with_name(mut self, name: MessageMap) -> Self { + self.name = name; + self + } + pub fn with_description(mut self, description: MessageMap) -> Self { + self.description = description; + self + } + + pub fn with_order(mut self, order: Option) -> Self { + self.order = order; + self + } + pub fn with_targets(mut self, targets: Vec) -> Self { self.targets = targets; self @@ -67,6 +84,70 @@ impl PropertyShape { self.closed = closed; self } + + pub fn path(&self) -> &SHACLPath { + &self.path + } + + pub fn name(&self) -> &MessageMap { + &self.name + } + + pub fn description(&self) -> &MessageMap { + &self.description + } + + pub fn write(&self, rdf: &mut RDF) -> Result<(), RDF::Err> + where + RDF: SRDFBuilder, + { + rdf.add_type(&self.id, RDF::iri_s2term(&SH_PROPERTY_SHAPE))?; + + self.name + .to_term_iter() + .map(|term| { + rdf.add_triple( + &RDF::object_as_subject(&self.id).unwrap(), + &RDF::iri_s2iri(&iri!(SH_NAME_STR)), + &RDF::term_s2term(&term), + ) + }) + .collect::>()?; + + self.description + .to_term_iter() + .map(|term| { + rdf.add_triple( + &RDF::object_as_subject(&self.id).unwrap(), + &RDF::iri_s2iri(&iri!(SH_DESCRIPTION_STR)), + &RDF::term_s2term(&term), + ) + }) + .collect::>()?; + + if let Some(order) = &self.order { + let decimal_type = NamedNode::new(XSD_DECIMAL_STR).unwrap(); + + let term = OxTerm::Literal(OxLiteral::new_typed_literal(order.to_string(), decimal_type)); + + rdf.add_triple( + &RDF::object_as_subject(&self.id).unwrap(), + &RDF::iri_s2iri(&iri!(SH_ORDER_STR)), + &RDF::term_s2term(&term))?; + } + + if let SHACLPath::Predicate { pred } = &self.path { + rdf.add_triple( + &RDF::object_as_subject(&self.id).unwrap(), + &RDF::iri_s2iri(&iri!(SH_PATH_STR)), + &RDF::iri_s2term(pred), + )?; + } else { + unimplemented!() + } + + Ok(()) + } } impl Display for PropertyShape { diff --git a/shacl_ast/src/ast/shape.rs b/shacl_ast/src/ast/shape.rs index e6383db0..dcd5aad6 100644 --- a/shacl_ast/src/ast/shape.rs +++ b/shacl_ast/src/ast/shape.rs @@ -1,3 +1,4 @@ +use srdf::SRDFBuilder; use std::fmt::Display; use crate::{node_shape::NodeShape, property_shape::PropertyShape}; @@ -8,12 +9,28 @@ pub enum Shape { PropertyShape(PropertyShape), } +impl Shape { + pub fn write(&self, rdf: &mut RDF) -> Result<(), RDF::Err> + where + RDF: SRDFBuilder, + { + match self { + Shape::NodeShape(ns) => { + ns.write(rdf)?; + } + Shape::PropertyShape(ps) => { + ps.write(rdf)?; + } + } + Ok(()) + } +} + impl Display for Shape { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self { - Shape::NodeShape(ns) => write!(f, "{ns}")?, - Shape::PropertyShape(ps) => write!(f, "{ps}")?, - }; - Ok(()) + Shape::NodeShape(ns) => write!(f, "{ns}"), + Shape::PropertyShape(ps) => write!(f, "{ps}"), + } } } diff --git a/shacl_ast/src/converter/shacl_to_rdf/shacl_writer.rs b/shacl_ast/src/converter/shacl_to_rdf/shacl_writer.rs index 0007a640..6906d278 100644 --- a/shacl_ast/src/converter/shacl_to_rdf/shacl_writer.rs +++ b/shacl_ast/src/converter/shacl_to_rdf/shacl_writer.rs @@ -1,7 +1,8 @@ -use srdf::{RDFFormat, SRDFBasic, SRDFBuilder}; +use crate::{Schema, SH_STR}; +use srdf::{RDF, RDFFormat, SRDFBuilder, XSD}; use std::io::Write; - -use crate::{shape::Shape, Schema, SH_NODE_SHAPE, SH_PROPERTY_SHAPE}; +use std::str::FromStr; +use iri_s::IriS; pub struct ShaclWriter where @@ -19,14 +20,19 @@ where } pub fn write(&mut self, schema: &Schema) -> Result<(), RDF::Err> { - self.rdf.add_prefix_map(schema.prefix_map())?; + let mut prefix_map = schema.prefix_map(); + prefix_map.insert("rdf", &IriS::from_str(RDF).unwrap()); + prefix_map.insert("xsd", &IriS::from_str(XSD).unwrap()); + prefix_map.insert("sh", &IriS::from_str(SH_STR).unwrap()); + + self.rdf.add_prefix_map(prefix_map)?; self.rdf.add_base(&schema.base())?; - for (node, shape) in schema.iter() { - match shape { - Shape::NodeShape(_) => self.rdf.add_type(node, node_shape::())?, - Shape::PropertyShape(_) => self.rdf.add_type(node, property_shape::())?, - } - } + + schema + .iter() + .map(|(_, shape)| shape.write(&mut self.rdf)) + .collect::>()?; + Ok(()) } @@ -43,16 +49,3 @@ where Self::new() } } -fn node_shape() -> RDF::Term -where - RDF: SRDFBasic, -{ - RDF::iri_s2term(&SH_NODE_SHAPE) -} - -fn property_shape() -> RDF::Term -where - RDF: SRDFBasic, -{ - RDF::iri_s2term(&SH_PROPERTY_SHAPE) -} diff --git a/shacl_ast/src/lib.rs b/shacl_ast/src/lib.rs index 90138923..1b9ec025 100644 --- a/shacl_ast/src/lib.rs +++ b/shacl_ast/src/lib.rs @@ -13,6 +13,3 @@ pub mod shacl_vocab; pub use ast::*; pub use converter::*; pub use shacl_vocab::*; - -#[cfg(test)] -mod tests {} diff --git a/srdf/src/srdf_basic.rs b/srdf/src/srdf_basic.rs index 6c03b46c..a1ad69a8 100644 --- a/srdf/src/srdf_basic.rs +++ b/srdf/src/srdf_basic.rs @@ -2,6 +2,7 @@ use std::fmt::{Debug, Display}; use std::hash::Hash; use iri_s::IriS; +use oxrdf::Term as OxTerm; use prefixmap::{PrefixMap, PrefixMapError}; use crate::Object; @@ -103,6 +104,8 @@ pub trait SRDFBasic { } fn iri_s2iri(iri_s: &IriS) -> Self::IRI; + + fn term_s2term(term: &OxTerm) -> Self::Term; fn bnode_id2bnode(id: &str) -> Self::BNode; fn iri_s2subject(iri_s: &IriS) -> Self::Subject { diff --git a/srdf/src/srdf_graph/README.md b/srdf/src/srdf_graph/README.md deleted file mode 100644 index 139597f9..00000000 --- a/srdf/src/srdf_graph/README.md +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/srdf/src/srdf_graph/srdfgraph.rs b/srdf/src/srdf_graph/srdfgraph.rs index bfb277ce..f7376e5d 100644 --- a/srdf/src/srdf_graph/srdfgraph.rs +++ b/srdf/src/srdf_graph/srdfgraph.rs @@ -235,6 +235,10 @@ impl SRDFBasic for SRDFGraph { IriS::from_named_node(iri) } + fn term_s2term(term: &OxTerm) -> Self::Term { + term.clone() + } + fn term_as_object(term: &OxTerm) -> Object { match term { OxTerm::BlankNode(bn) => Object::BlankNode(bn.as_str().to_string()), @@ -577,10 +581,10 @@ impl SRDFBuilder for SRDFGraph { Ok(()) } - fn add_type(&mut self, node: &crate::RDFNode, type_: Self::Term) -> Result<(), Self::Err> { + fn add_type(&mut self, node: &crate::RDFNode, r#type: Self::Term) -> Result<(), Self::Err> { match Self::object_as_subject(node) { Some(subj) => { - let triple = OxTriple::new(subj, rdf_type(), type_.clone()); + let triple = OxTriple::new(subj, rdf_type(), r#type.clone()); self.graph.insert(&triple); Ok(()) } @@ -600,7 +604,12 @@ impl SRDFBuilder for SRDFGraph { } fn serialize(&self, format: RDFFormat, write: W) -> Result<(), Self::Err> { - let serializer = RdfSerializer::from_format(cnv_rdf_format(format)); + let mut serializer = RdfSerializer::from_format(cnv_rdf_format(format)); + + for (prefix, iri) in &self.pm.map { + serializer = serializer.with_prefix(prefix, iri.as_str()).unwrap(); + } + let mut writer = serializer.serialize_to_write(write); for triple in self.graph.iter() { writer.write_triple(triple)?; diff --git a/srdf/src/srdf_sparql/srdfsparql.rs b/srdf/src/srdf_sparql/srdfsparql.rs index d5b8e70b..892b043f 100644 --- a/srdf/src/srdf_sparql/srdfsparql.rs +++ b/srdf/src/srdf_sparql/srdfsparql.rs @@ -171,6 +171,10 @@ impl SRDFBasic for SRDFSparql { iri_s.as_named_node().clone() } + fn term_s2term(term: &OxTerm) -> Self::Term { + term.clone() + } + fn term_as_object(term: &Self::Term) -> Object { match term { Self::Term::BlankNode(bn) => Object::BlankNode(bn.to_string()),