diff --git a/1.0/404.html b/1.0/404.html index 245753f..c0321bd 100644 --- a/1.0/404.html +++ b/1.0/404.html @@ -317,6 +317,8 @@ + + @@ -370,7 +372,7 @@ - Serialization + Serialization format @@ -425,6 +427,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/contributing/index.html b/1.0/contributing/index.html index 5e179b8..ee7d004 100644 --- a/1.0/contributing/index.html +++ b/1.0/contributing/index.html @@ -328,6 +328,8 @@ + + @@ -381,7 +383,7 @@ - Serialization + Serialization format @@ -436,6 +438,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/index.html b/1.0/index.html index 4ff4b92..82cef6b 100644 --- a/1.0/index.html +++ b/1.0/index.html @@ -329,6 +329,8 @@ + + @@ -382,7 +384,7 @@ - Serialization + Serialization format @@ -437,6 +439,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/getting-started/index.html b/1.0/jvm/getting-started/index.html index 876403f..4c5bc5a 100644 --- a/1.0/jvm/getting-started/index.html +++ b/1.0/jvm/getting-started/index.html @@ -323,6 +323,8 @@ + + @@ -376,7 +378,7 @@ - Serialization + Serialization format @@ -431,6 +433,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/grpc/index.html b/1.0/jvm/grpc/index.html index 295cbc4..5f54443 100644 --- a/1.0/jvm/grpc/index.html +++ b/1.0/jvm/grpc/index.html @@ -328,6 +328,8 @@ + + @@ -381,7 +383,7 @@ - Serialization + Serialization format @@ -436,6 +438,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/implementation/index.html b/1.0/jvm/implementation/index.html index 5626a28..e0cc49c 100644 --- a/1.0/jvm/implementation/index.html +++ b/1.0/jvm/implementation/index.html @@ -328,6 +328,8 @@ + + @@ -381,7 +383,7 @@ - Serialization + Serialization format @@ -436,6 +438,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/index.html b/1.0/jvm/index.html index c71f7a4..5e5cd2a 100644 --- a/1.0/jvm/index.html +++ b/1.0/jvm/index.html @@ -328,6 +328,8 @@ + + @@ -381,7 +383,7 @@ - Serialization + Serialization format @@ -436,6 +438,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/jena/index.html b/1.0/jvm/jena/index.html index 9f730fa..d95a1ed 100644 --- a/1.0/jvm/jena/index.html +++ b/1.0/jvm/jena/index.html @@ -323,6 +323,8 @@ + + @@ -376,7 +378,7 @@ - Serialization + Serialization format @@ -431,6 +433,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/rdf4j/index.html b/1.0/jvm/rdf4j/index.html index d318784..42a993c 100644 --- a/1.0/jvm/rdf4j/index.html +++ b/1.0/jvm/rdf4j/index.html @@ -323,6 +323,8 @@ + + @@ -376,7 +378,7 @@ - Serialization + Serialization format @@ -431,6 +433,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/reactive/index.html b/1.0/jvm/reactive/index.html index aa148e9..702fac5 100644 --- a/1.0/jvm/reactive/index.html +++ b/1.0/jvm/reactive/index.html @@ -328,6 +328,8 @@ + + @@ -381,7 +383,7 @@ - Serialization + Serialization format @@ -436,6 +438,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/jvm/releases/index.html b/1.0/jvm/releases/index.html index f42aae3..d870643 100644 --- a/1.0/jvm/releases/index.html +++ b/1.0/jvm/releases/index.html @@ -328,6 +328,8 @@ + + @@ -381,7 +383,7 @@ - Serialization + Serialization format @@ -436,6 +438,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/licensing/index.html b/1.0/licensing/index.html index 3c26019..23b378a 100644 --- a/1.0/licensing/index.html +++ b/1.0/licensing/index.html @@ -326,6 +326,8 @@ + + @@ -379,7 +381,7 @@ - Serialization + Serialization format @@ -434,6 +436,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/performance/index.html b/1.0/performance/index.html index 1a429d6..9a98740 100644 --- a/1.0/performance/index.html +++ b/1.0/performance/index.html @@ -331,6 +331,8 @@ + + @@ -384,7 +386,7 @@ - Serialization + Serialization format @@ -439,6 +441,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/search/search_index.json b/1.0/search/search_index.json index 18600f8..aaad6c3 100644 --- a/1.0/search/search_index.json +++ b/1.0/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":"

    Test \u2013 index.md

    "},{"location":"contributing/","title":"Contributing","text":"

    TODO

    "},{"location":"licensing/","title":"Licensing and citation","text":"

    TODO

    "},{"location":"user-guide/","title":"Jelly user guide","text":"

    Jelly is a high-performance protocol for streaming and non-streaming RDF data. It is designed to be simple, fast, and easy to implement. This guide will help you get started with Jelly.

    Jelly uses Protocol Buffers 3 as the basis of its serialization. This means that you can quickly create a new Jelly implementation using code generation. You can also use an existing implementation, such as the JVM (Scala) implementation.

    "},{"location":"user-guide/#what-can-it-do","title":"What can it do?","text":"

    Jelly is designed to be a protocol for streaming RDF data, but it can also be used with \"classic\", static RDF data. The main design goals of Jelly are speed, simplicity, and wide coverage of use cases.

    "},{"location":"user-guide/#how-to-use-it","title":"How to use it?","text":"

    To use Jelly you firstly need an implementation of the protocol. There is currently one implementation available: Jelly JVM (Scala), which supports both Apache Jena and Eclipse RDF4J. It also has support for reactive streams and gRPC.

    The implementation will support several stream types and patterns that you can use. Which stream type you choose depends on your use case (see stream types below).

    All stream types use the same concept of stream frames \u2013 discrete elements into which the stream is divided. Each frame contains a number of rows, which are the actual RDF data (RDF triples, quads, etc.). Jelly does not define the semantics of stream frames \u2013 it's up to you to decide what they mean (see examples below).

    Why doesn't Jelly define the semantics of stream frames?

    There are many, many ways in which streams of RDF data can be used \u2013 there are different use cases, network protocols, QoS settings, ordering guarantees, stream semantics, etc. Picking specific semantics for stream frames would hopelessly overcomplicate the protocol and make it less useful in some use cases.

    Jelly tries to make as few assumptions as possible about the streams to ensure it is widely applicable. It is the responsibility of the end users to define the semantics of stream frames for their use case. To help with that, this user guide contains some common patterns and examples.

    "},{"location":"user-guide/#stream-types","title":"Stream types","text":""},{"location":"user-guide/#common-patterns-cookbook","title":"Common patterns cookbook","text":"

    Below you will find some common patterns for using Jelly. These are just examples \u2013 you can use Jelly in many other ways. All of the presented patterns are supported in the Jelly JVM (Scala) implementation with the Reactive Streaming module.

    "},{"location":"user-guide/#triple-stream-just-a-bunch-of-triples","title":"Triple stream \u2013 \"just a bunch of triples\"","text":"

    Let's say you want to stream a lot of triples from A to B \u2013 maybe you're doing some kind of data migration, or you're sending data to a data lake. You don't care about the graph they belong to \u2013 you just want to send a bunch of triples.

    You can use a triple stream, batching the triples into frames of an arbitrary size (let's say, 1000 triples each):

    Example (click to expand)

    You can then send these frames one-by-one over gRPC or Kafka, or write them to a file. The consumer will be able to read the triples one frame at a time, without having to know how many triples there are in total.

    "},{"location":"user-guide/#triple-stream-a-stream-of-graphs","title":"Triple stream \u2013 \"a stream of graphs\"","text":"

    In this case we have an IoT sensor that periodically emits an RDF graph that describes what the sensor saw (something like SOSA/SSN). The graphs may be of different sizes (depending on what the sensor saw) and they can be emitted at different rates (depending on how often the sensor is triggered). We want to stream these graphs to a server that will process them in real-time with no additional latency.

    You can use a triple stream, where the stream frames correspond to different unnamed (default) graphs:

    Example (click to expand)

    The consumer will be able to read the graphs one frame at a time, without having to know how many graphs there are in total.

    RiverBench uses this pattern for distributing its triple streams (see example). Note that in RiverBench the stream may be equivalently considered \"just a bunch of triples\" \u2013 the serialization is the same, it only depends on the interpretation on the side of the consumer.

    "},{"location":"user-guide/#quad-stream-just-a-bunch-of-quads","title":"Quad stream \u2013 \"just a bunch of quads\"","text":"

    You want to stream a lot of quads \u2013 similar to the \"just a bunch of triples\" case above, but you also want to include the graph node. You can use a quad stream, batching the quads into frames of an arbitrary size (let's say, 1000 quads each):

    Example (click to expand)

    The mechanism is exactly the same as with a triple stream.

    "},{"location":"user-guide/#quad-stream-a-stream-of-datasets","title":"Quad stream \u2013 \"a stream of datasets\"","text":"

    You want to stream RDF datasets \u2013 similar to the \"a stream of graphs\" case above, but your elements are entire datasets. You can use a quad stream, where the stream frames correspond to different datasets:

    Example (click to expand)

    The mechanism is exactly the same as with a triple stream of graphs.

    RiverBench uses this pattern for distributing its quad and graph streams (see example). Note that in RiverBench the stream may be equivalently considered \"just a bunch of quads\" \u2013 the serialization is the same, it only depends on the interpretation on the side of the consumer.

    "},{"location":"user-guide/#graph-stream-just-a-bunch-of-named-graphs","title":"Graph stream \u2013 \"just a bunch of named graphs\"","text":"

    This a slightly different take on the problem of \"just a bunch of quads\" \u2013 you also want to transmit what is essentially an RDF dataset, but instead of sending individual quads, you want to send it graph-by-graph. This makes most sense if your data changes on a per-graph basis, or you are streaming a static RDF dataset.

    You can use a graph stream, batching the triples in the graphs into frames of an arbitrary size (let's say, 1000 triples each):

    Example (click to expand)

    Notice that one graph can span multiple stream frames, and one stream frame can contain multiple graphs. The consumer will be able to read the graphs one frame at a time, without having to know how many graphs there are in total.

    "},{"location":"user-guide/#graph-stream-a-stream-of-datasets","title":"Graph stream \u2013 \"a stream of datasets\"","text":"

    You want to stream RDF datasets \u2013 for example using the RSP Data Model, where each element is a named graph and a bunch of statements about this graph in the default graph. You can use a graph stream, where the stream frames correspond to different datasets:

    Example (click to expand)

    Of course each stream frame can contain more than one named graph, and the graphs can be of different sizes.

    "},{"location":"user-guide/#ordering-and-delivery-guarantees","title":"Ordering and delivery guarantees","text":"

    To be able to compress RDF streams on-the-fly, Jelly requires that stream frames are kept strictly in order (see also the spec). This is because the compression algorithm updates its lookup tables dynamically over the course of the stream, and a given frame depends on the lookups defined in previous frames. If the frames are out of order, the compression may fail.

    There are use cases where it's hard to guarantee strict ordering of messages, such as IoT messaging (e.g., MQTT with QoS 0) or high-throughput streams with parallel partitions (e.g., Kafka). In these cases you may want to employ one of these strategies:

    Note that Jelly by default also assumes that frames are delivered at least once. At-least-once delivery is good enough (as long as the order is kept), as lookup updates are idempotent \u2013 you may only need to de-duplicate the frames afterwards. At-most-once delivery requires you to make the frames independent of each other, such as with the IoT strategy above.

    "},{"location":"user-guide/#implementing-jelly","title":"Implementing Jelly","text":"

    Note

    This section is intended only for those who want to write a new Jelly implementation from scratch. It's much easier to use an existing implementation, such as the JVM (Scala) implementation.

    Implementing Jelly from scratch is greatly simplified by the existing Protobuf and RDF libraries. Essentially, the only thing you'll need to do is to glue them together:

    That's it! You may also want to implement streaming facilities, such as Reactive Streams in Java/Scala. Implementing the gRPC publish/subscribe mechanism follows a very similar procedure \u2013 many Protobuf libraries have built-in support for gRPC with code generation.

    "},{"location":"jvm/","title":"Jelly JVM (Scala) implementation","text":"

    javadoc link

    maven

    "},{"location":"jvm/getting-started/","title":"Getting started","text":"

    Compatibility \u2013 Java 11 \u2013 21. 11, 17, 21 are tested in CI.

    "},{"location":"jvm/grpc/","title":"User guide \u2013 gRPC","text":""},{"location":"jvm/grpc/#example-grpc-pubsub","title":"Example \u2013 gRPC pub/sub","text":""},{"location":"jvm/implementation/","title":"Developer guide \u2013 implementing conversions for other libraries","text":"

    Currently converters for the two most popular RDF JVM libraries are implemented \u2013 RDF4J and Jena. But it is possible to implement your own converters and adapt the Jelly serialization code to any RDF library with little effort.

    To do this, you will need to implement three traits (interfaces in Java) from the jelly-core module: ProtoEncoder, ProtoDecoderConverter, and ConverterFactory.

    "},{"location":"jvm/reactive/","title":"User guide \u2013 reactive streaming","text":"

    TODO

    "},{"location":"jvm/reactive/#example-streaming-with-kafka","title":"Example: streaming with Kafka","text":""},{"location":"jvm/reactive/#byte-streams","title":"Byte streams","text":"

    TODO

    (referenced by specification/serialization.md)

    "},{"location":"jvm/releases/","title":"Developer guide \u2013 releases","text":""},{"location":"jvm/releases/#full-versioned-releases","title":"Full (versioned) releases","text":"

    Full (versioned) releases are created manually and follow the Semantic Versioning scheme for binary compatibility.

    To create a new tagged release (example for version 1.2.3):

    $ git checkout main\n$ git pull\n$ git tag v1.2.3\n$ git push origin v1.2.3\n

    The rest (packaging and release creation) will be handled automatically by the CI. The release will be pushed to Maven Central.

    "},{"location":"jvm/releases/#snapshot-releases","title":"Snapshot releases","text":"

    Snapshot releases are triggered automatically by commits in the main branch. Snapshots are pushed to the Sonatype snapshot repository.

    "},{"location":"specification/","title":"Jelly protocol specification","text":"

    The Jelly protocol consists of two parts: the gRPC streaming protocol and the serialization format. The serialization format is the basis for Jelly, specifying how to turn RDF data into bytes and back. The gRPC streaming protocol defines a publish/subscribe mechanism for exchanging RDF data between a client and a server, using gRPC.

    See the user guide for a friendly introduction to Jelly.

    See the specification pages for more details:

    "},{"location":"specification/media-type/","title":"File extension and media type","text":"

    Jelly is not tied to any specific file extension and does not have a registered media type. However, you can use the following:

    The files should be saved in the delimited variant of Jelly.

    "},{"location":"specification/media-type/#see-also","title":"See also","text":""},{"location":"specification/reference/","title":"Protocol Documentation","text":""},{"location":"specification/reference/#table-of-contents","title":"Table of Contents","text":"

    Top

    "},{"location":"specification/reference/#grpcproto","title":"grpc.proto","text":""},{"location":"specification/reference/#rdfstreamreceived","title":"RdfStreamReceived","text":"

    Acknowledgement of receiving a stream sent by the server to the client.

    "},{"location":"specification/reference/#rdfstreamsubscribe","title":"RdfStreamSubscribe","text":"

    Subscribe command sent by the client to the server.

    Field Type Label Description topic string The topic to which the client wants to subscribe (UTF-8 encoded). requested_options RdfStreamOptions Optional: the stream options requested by the client. The server should respond with a stream that matches these options. In case that is not possible, the server must respond with the INVALID_ARGUMENT error.

    "},{"location":"specification/reference/#rdfstreamservice","title":"RdfStreamService","text":"

    Pub/Sub service for RDF streams, to be implemented by the server.

    Method Name Request Type Response Type Description SubscribeRdf RdfStreamSubscribe RdfStreamFrame stream Subscribe to an RDF stream. PublishRdf RdfStreamFrame stream RdfStreamReceived Publish an RDF stream. In case the server cannot process the stream, it must respond with the INVALID_ARGUMENT error.

    Top

    "},{"location":"specification/reference/#rdfproto","title":"rdf.proto","text":""},{"location":"specification/reference/#rdfdatatypeentry","title":"RdfDatatypeEntry","text":"

    Entry in the datatype lookup table

    Field Type Label Description id uint32 1-based identifier value string Value of the datatype (UTF-8 encoded)

    "},{"location":"specification/reference/#rdfdefaultgraph","title":"RdfDefaultGraph","text":"

    Empty message indicating the default RDF graph.

    "},{"location":"specification/reference/#rdfgraph","title":"RdfGraph","text":"

    RDF graph nodes

    Field Type Label Description iri RdfIri IRI bnode string Blank node literal RdfLiteral Literal \u2013 only valid for generalized RDF streams default_graph RdfDefaultGraph Default graph repeat RdfRepeat Repeated term \u2013 only valid in a QUADS stream

    "},{"location":"specification/reference/#rdfgraphend","title":"RdfGraphEnd","text":"

    End of a graph in a GRAPHS stream

    "},{"location":"specification/reference/#rdfgraphstart","title":"RdfGraphStart","text":"

    Start of a graph in a GRAPHS stream

    Field Type Label Description graph RdfGraph

    "},{"location":"specification/reference/#rdfiri","title":"RdfIri","text":"

    RDF IRIs Either prefix_id or name_id can be zero if the prefix or the suffix are not used.

    Field Type Label Description prefix_id uint32 1-based, refers to an entry in the prefix lookup. name_id uint32 1-based, refers to an entry in the name lookup.

    "},{"location":"specification/reference/#rdfliteral","title":"RdfLiteral","text":"

    RDF literals

    Field Type Label Description lex string The lexical form of the literal. simple RdfLiteralSimple Simple literal with datatype xsd:string. langtag string Language-tagged string. datatype uint32 Typed literal. The datatype is a reference to an entry in the datatype lookup.

    "},{"location":"specification/reference/#rdfliteralsimple","title":"RdfLiteralSimple","text":"

    Empty message indicating a simple literal

    "},{"location":"specification/reference/#rdfnameentry","title":"RdfNameEntry","text":"

    Entry in the name lookup table

    Field Type Label Description id uint32 1-based identifier value string Value of the name (UTF-8 encoded)

    "},{"location":"specification/reference/#rdfprefixentry","title":"RdfPrefixEntry","text":"

    Entry in the prefix lookup table

    Field Type Label Description id uint32 1-based identifier value string Value of the prefix (UTF-8 encoded)

    "},{"location":"specification/reference/#rdfquad","title":"RdfQuad","text":"

    RDF quad

    Field Type Label Description s RdfTerm Quad subject p RdfTerm Quad predicate o RdfTerm Quad object g RdfGraph Quad graph node

    "},{"location":"specification/reference/#rdfrepeat","title":"RdfRepeat","text":"

    Empty message indicating a repeated term from the previous statement.

    "},{"location":"specification/reference/#rdfstreamframe","title":"RdfStreamFrame","text":"

    RDF stream frame

    Field Type Label Description rows RdfStreamRow repeated Stream rows

    "},{"location":"specification/reference/#rdfstreamoptions","title":"RdfStreamOptions","text":"

    RDF stream options

    Field Type Label Description stream_name string Name of the stream (completely optional). This may be used for, e.g., topic names in a pub/sub system. stream_type RdfStreamType Type of the stream (required) generalized_statements bool Whether the stream may contain generalized triples, quads, or datasets use_repeat bool Whether RdfRepeat will be used rdf_star bool Whether the stream may contain RDF-star statements max_name_table_size uint32 Maximum size of the name lookup table max_prefix_table_size uint32 Maximum size of the prefix lookup table max_datatype_table_size uint32 Maximum size of the datatype lookup table version uint32 Protocol version (required) For Jelly 1.0.x value must be 1. For custom extensions, the value must be 1000 or higher.

    "},{"location":"specification/reference/#rdfstreamrow","title":"RdfStreamRow","text":"

    RDF stream row

    Field Type Label Description options RdfStreamOptions Stream options. Must occur at the start of the stream. triple RdfTriple RDF triple statement. Valid in TRIPLES and GRAPHS streams. quad RdfQuad RDF quad statement. Only valid in a QUADS stream. graph_start RdfGraphStart Graph boundary: ends the currently transmitted graph and starts a new one Only valid in a GRAPHS stream. graph_end RdfGraphEnd Explicit end of a graph. Signals the consumer that the transmitted graph is complete. Only valid in a GRAPHS stream. name RdfNameEntry Entry in the name lookup table. prefix RdfPrefixEntry Entry in the prefix lookup table. datatype RdfDatatypeEntry Entry in the datatype lookup table.

    "},{"location":"specification/reference/#rdfterm","title":"RdfTerm","text":"

    RDF terms

    Field Type Label Description iri RdfIri IRI bnode string Blank node literal RdfLiteral Literal triple_term RdfTriple RDF-star quoted triple repeat RdfRepeat Repeated term from the previous statement. Only valid in statements, not quoted triples.

    "},{"location":"specification/reference/#rdftriple","title":"RdfTriple","text":"

    RDF triple

    Field Type Label Description s RdfTerm Triple subject p RdfTerm Triple predicate o RdfTerm Triple object

    "},{"location":"specification/reference/#rdfstreamtype","title":"RdfStreamType","text":"

    RDF stream type

    Name Number Description RDF_STREAM_TYPE_UNSPECIFIED 0 Unspecified stream type \u2013 invalid RDF_STREAM_TYPE_TRIPLES 1 RDF triples RDF_STREAM_TYPE_QUADS 2 RDF quads RDF_STREAM_TYPE_GRAPHS 3 RDF triples grouped in graphs"},{"location":"specification/reference/#scalar-value-types","title":"Scalar Value Types","text":".proto Type Notes C++ Java Python Go C# PHP Ruby double double double float float64 double float Float float float float float float32 float float Float int32 Uses variable-length encoding. Inefficient for encoding negative numbers \u2013 if your field is likely to have negative values, use sint32 instead. int32 int int int32 int integer Bignum or Fixnum (as required) int64 Uses variable-length encoding. Inefficient for encoding negative numbers \u2013 if your field is likely to have negative values, use sint64 instead. int64 long int/long int64 long integer/string Bignum uint32 Uses variable-length encoding. uint32 int int/long uint32 uint integer Bignum or Fixnum (as required) uint64 Uses variable-length encoding. uint64 long int/long uint64 ulong integer/string Bignum or Fixnum (as required) sint32 Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int32s. int32 int int int32 int integer Bignum or Fixnum (as required) sint64 Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int64s. int64 long int/long int64 long integer/string Bignum fixed32 Always four bytes. More efficient than uint32 if values are often greater than 2^28. uint32 int int uint32 uint integer Bignum or Fixnum (as required) fixed64 Always eight bytes. More efficient than uint64 if values are often greater than 2^56. uint64 long int/long uint64 ulong integer/string Bignum sfixed32 Always four bytes. int32 int int int32 int integer Bignum or Fixnum (as required) sfixed64 Always eight bytes. int64 long int/long int64 long integer/string Bignum bool bool boolean boolean bool bool boolean TrueClass/FalseClass string A string must always contain UTF-8 encoded or 7-bit ASCII text. string String str/unicode string string string String (UTF-8) bytes May contain any arbitrary sequence of bytes. string ByteString str []byte ByteString string String (ASCII-8BIT)"},{"location":"specification/serialization/","title":"Jelly serialization format specification","text":"

    This document is the specification of the Jelly serialization format. It is intended for implementers of Jelly libraries and applications. If you are looking for a user-friendly introduction to Jelly, see the Jelly index page.

    This document is accompanied by the Jelly Protobuf reference and the Protobuf definition itself (rdf.proto).

    The following assumptions are used in this document:

    Author: Piotr Sowi\u0144ski (Ostrzyciel)

    Version: 1.0.0

    Document status: Draft specification

    Info

    The key words \"MUST\", \"MUST NOT\", \"REQUIRED\", \"SHOULD\", \"SHOULD NOT\", \"RECOMMENDED\", \"MAY\", and \"OPTIONAL\" in this document are to be interpreted as described in RFC 2119.

    Note

    The \"Note\" blocks in this document are not part of the specification, but rather provide additional information for implementers.

    Note

    The \"Example\" blocks in this document are not part of the specification, but rather provide informal examples of the serialization format. The examples use the Protocol Buffers Text Format Language.

    "},{"location":"specification/serialization/#conformance","title":"Conformance","text":"

    Implementations MAY choose to implement only a subset of the following specification. In this case, they SHOULD clearly specify which parts of the specification they implement. In the rest of this specification, the keywords \"MUST\", \"MUST NOT\", etc. refer to full (not partial) implementations.

    Note

    Implementations may in particular choose to not implement features that are not supported on the target platform (e.g., RDF datasets, RDF-star, generalized RDF terms, etc.).

    Implementations MAY also choose to extend Jelly with additional features that SHOULD NOT interfere with the serialization being readable by implementations which follow the specification.

    "},{"location":"specification/serialization/#versioning","title":"Versioning","text":"

    The protocol follows the Semantic Versioning 2.0 scheme. Each MAJOR.MINOR semantic version corresponds to an integer version tag in the protocol. The version tag is encoded in the version field of the RdfStreamOptions message. See also the section on stream options for more information on how to handle the version tags in serialized streams.

    The following versions of the protocol are defined:

    Version tag Semantic version 1 1.0.x (current)

    Note

    Releases of the protocol are published on GitHub.

    "},{"location":"specification/serialization/#backward-compatibility","title":"Backward compatibility","text":"

    Implementations SHOULD ensure backward compatibility. To achieve backward compatibility, the implementation MUST be able to read all messages from the previous releases of the protocol with the same MAJOR version. The implementation MAY also be able to read messages from previous releases of the protocol with a different MAJOR version.

    Note

    The protocol is designed in such a way that you don't need to worry about backward compatibility. The only thing you need to do is to implement the latest version of the protocol, and you will automatically get backward compatibility with all previous versions (of the same MAJOR).

    "},{"location":"specification/serialization/#forward-compatibility","title":"Forward compatibility","text":"

    Forward compatibility is not guaranteed. Implementations MAY be able to read messages from future releases of the protocol with the same MAJOR version. Implementations MAY also be able to read messages from future releases of the protocol with a different MAJOR version.

    "},{"location":"specification/serialization/#actors-and-implementations","title":"Actors and implementations","text":"

    Jelly assumes there to be two actors involved in processing the stream: the producer (serializer) and the consumer (parser). The producer is responsible for serializing the RDF data into the Jelly format, and the consumer is responsible for parsing the Jelly format into RDF data.

    Implementations may include only the producer, only the consumer, or both.

    "},{"location":"specification/serialization/#format-specification","title":"Format specification","text":"

    The Jelly serialization format uses Protocol Buffers version 3 as the underlying serialization format. All implementations MUST use a compliant Protocol Buffers implementation. The Protocol Buffers schema for Jelly serialization is defined in rdf.proto (source code, reference).

    The Jelly format is a stream (i.e., an ordered sequence) of stream frames. The frames may be sent one-by-one using a dedicated streaming protocol (e.g., gRPC, MQTT, Kafka) or written in sequence to a byte stream (e.g., a file or socket). When writing to a byte stream, the frames MUST be delimeted \u2013 see the delimited variant.

    Jelly supports several distinct types of streams, and uses a simple and configurable compression mechanism using lookup tables.

    "},{"location":"specification/serialization/#stream-frames","title":"Stream frames","text":"

    A stream frame is a message of type RdfStreamFrame (reference). The message has only one field (rows), which is a repeated field of type RdfStreamRow (reference). A stream frame may contain any number of rows, however it is RECOMMENDED to keep the size of the frames below 1 MB. The semantics for the frames are not defined by the protocol. The end users are free to define their own semantics for the frames.

    Note

    A stream frame in \"simple flat file\" is just a batch of RDF statements \u2013 the stream frames may carry no semantics in this case. You can make the stream frame as long as the file itself, but this is not recommended, as it would make the file harder to process.

    Note

    Stream frames can also be used to indicate individual stream elements. For example, in the case of a stream of RDF datasets, each frame may contain one dataset. RiverBench datasets use this convention in their distributions.

    "},{"location":"specification/serialization/#ordering","title":"Ordering","text":"

    Stream frames MUST be processed strictly in order to preserve the semantics of the stream. Each stream frame MUST be processed in its entirety before the next stream frame is processed.

    Implementations MAY choose to adopt a non-standard solution where the order or delivery of the frames is not guaranteed and the stream can be read in more than one order or without some frames. The implementation MUST clearly specify in the documentation that it uses such a non-standard solution.

    Note

    An example where not adhering to the strict ordering may be useful is when you are dealing with a network streaming protocol that does not guarantee the order of the messages (e.g., MQTT).

    Note

    The main thing you will need to worry about is the order of the lookup tables. If you can, emit all lookup tables at the beginning of the stream. When using stream partitions (e.g., in Kafka), you should ensure that the lookups are emitted to each partition. Alternatively, you can transmit the lookup tables separately from the stream.

    "},{"location":"specification/serialization/#stream-rows","title":"Stream rows","text":"

    A stream row is a message of type RdfStreamRow. It has one of the following fields set:

    Stream rows MUST be processed strictly in order to preserve the semantics of the stream.

    "},{"location":"specification/serialization/#stream-types","title":"Stream types","text":"

    The type of the stream MUST be explicitly specified in the (stream options header)[#stream-options]. The type of the stream is defined by the RdfStreamType enum (reference). The following types are defined:

    Note

    See also a more human explanation of the available stream types.

    "},{"location":"specification/serialization/#stream-options","title":"Stream options","text":"

    The stream options is a message of type RdfStreamOptions (reference). It MUST be the first row in the stream. It MAY appear more than once in the stream (also after other rows), but it MUST be identical to all previous occurrences. Implementations MAY throw an error if the stream options header is not present at the start of the stream, alternatively, they MAY use the default options. Implementations SHOULD NOT throw an error if the stream options header is present more than once in the stream.

    The stream options header instructs the consumer of the stream (parser) on the size of the needed lookups to decode the stream and the features used by the stream.

    The stream options header contains the following fields:

    "},{"location":"specification/serialization/#prefix-name-and-datatype-lookups","title":"Prefix, name, and datatype lookups","text":"

    Jelly uses a common mechanism of lookup tables for IRI prefixes, IRI names (postfixes), and datatypes. The lookups are used to compress the IRIs and datatypes in the stream. All lookups function in the same way:

    Note

    The spec does not specify what strategy should the producer use to update the lookup. You can use a the LRU strategy (as used in the Scala implementation), LFU, or something more complex. You can also have a fixed lookup in the producer and communicate it at the start of the stream. This is possible if you are using a fixed set of prefixes, names, or datatypes and want to conserve computing power (e.g., in IoT devices).

    The simplest way to implement the consumer's lookup is to just use an indexed array of fixed size. The workload on the consumer's side is much lower than on the producer's side, so your choice of the strategy depends largely on the producer.

    "},{"location":"specification/serialization/#rdf-statements-and-graphs","title":"RDF statements and graphs","text":"

    RDF statements (triples or quads) are communicated in three different ways, depending on the type of the stream:

    Note

    If the stream is meant to represent a single RDF dataset, then the graphs should be able to stretch across several stream frames. If the stream is meant to represent a stream of RDF datasets, then the graphs should be contained within a single stream frame.

    "},{"location":"specification/serialization/#rdf-terms","title":"RDF terms","text":"

    RDF terms are encoded using the RdfTerm message. The message has one of the following fields set: iri, bnode, literal, triple_term, repeat, corresponding to RDF IRIs, blank nodes, literals, RDF-star quoted triples, and repeated terms, respectively. Exactly one of these fields MUST be set.

    "},{"location":"specification/serialization/#iris","title":"IRIs","text":"

    The IRIs are encoded using the RdfIri message. The message has two fields that together make up the IRI:

    At least one of the prefix_id and name_id fields MUST be set to a non-default, positive value. The IRI is then constructed by concatenating the prefix and the name. The IRI SHOULD be a valid IRI, as defined in RFC 3987.

    Example (click to expand)

    Assume the following lookup entries were defined in the stream (wrapping RdfStreamRows were omitted for brevity):

    RdfPrefixEntry {\n    id: 1\n    prefix: \"http://example.com/\"\n}\nRdfNameEntry {\n    id: 4\n    name: \"example\"\n}\nRdfNameEntry {\n    id: 1\n    name: \"http://test.com/test\"\n}\n

    Then the following IRIs are encoded as follows:

    # http://example.com/example\nRdfIri {\n    prefix_id: 1\n    name_id: 4\n} \n\n# http://example.com/\nRdfIri {\n    prefix_id: 1\n}\n\n# http://test.com/test\nRdfIri {\n    name_id: 1\n}\n

    Note

    The spec does not specify how to split the IRIs into names and prefixes. You can use any strategy you want, as long as you follow the rules above. The simplest way is to split the IRI at the last occurrence of the # or / character \u2013 this is what the Scala implementation uses. The prefixes are not meant to be user-facing, but you can also use user-defined prefixes (e.g., @prefix in Turtle) to split the IRIs.

    "},{"location":"specification/serialization/#blank-nodes","title":"Blank nodes","text":"

    RDF blank nodes are represented using simple strings. The string is the identifier of the blank node. The identifier may be any valid UTF-8 string.

    Because the spec does not define the semantics of the stream frames, blank node identifiers are not guaranteed to be unique across the stream frames. The consumer MAY choose to treat the blank nodes as unique across the stream (and thus treat all occurences of the identifier as a single node), or it MAY choose to treat them as unique only within a single stream frame. The producer SHOULD specify in the documentation which strategy it uses.

    Note

    If the stream is meant to represent a single RDF graph or dataset, then the blank node identifiers should be unique across the stream so that you can refer to them across stream frame boundaries. If the frames refer to different graphs or datasets, then the blank node identifiers should be unique only within a single frame.

    Note

    Many RDF libraries (e.g., RDF4J, Apache Jena) use internal identifiers for blank nodes, which can be used as the identifiers in Jelly streams. You can also use a different format, for example with shorter identifiers to preserve space.

    "},{"location":"specification/serialization/#literals","title":"Literals","text":"

    RDF literals are represented using the RdfLiteral message (reference). The message has the following fields:

    "},{"location":"specification/serialization/#quoted-triples-rdf-star","title":"Quoted triples (RDF-star)","text":"

    RDF-star quoted triples are represented using the RdfTriple message (reference). Quoted triples are encoded in the same manner as triple statements, with the only difference being that repeated terms (RdfRepeat) MUST NOT be used in quoted triples. The consumer SHOULD throw an error if a repeated term is encountered in a quoted triple.

    Quoted triples may be nested up to arbitrary depth. The consumer SHOULD throw an error if the depth of the nesting exceeds the capabilities of the implementation.

    "},{"location":"specification/serialization/#repeated-terms","title":"Repeated terms","text":"

    Repeated terms indicate that a term in a given position (subject, predicate, object, or graph node in quads) is the same as the term in the same position in the previous row. The repeated terms are encoded using the RdfRepeat message (reference). The message does not have any fields.

    Example (click to expand)

    In the example the wrapping RdfStreamRows were omitted for brevity:

    # First row\nRdfTriple {\n    s: RdfTerm {\n        iri: RdfIri {\n            prefix_id: 1\n            name_id: 1\n        }\n    }\n    p: RdfTerm {\n        iri: RdfIri {\n            prefix_id: 1\n            name_id: 2\n        }\n    }\n    o: RdfTerm {\n        bnode: \"b1\"\n    }\n}\n\n# Second row \u2013 repeating the subject and predicate\nRdfTriple {\n    s: RdfRepeat {} # RdfTerm(iri: RdfIri(1, 1))\n    p: RdfRepeat {} # RdfTerm(iri: RdfIri(1, 2))\n    o: RdfTerm {\n        bnode: \"b2\"\n    }\n}\n\n# Third row \u2013 repeating the subject and object\nRdfTriple {\n    s: RdfRepeat {} # RdfTerm(iri: RdfIri(1, 1))\n    p: RdfTerm {\n        iri: RdfIri {\n            prefix_id: 2\n            name_id: 3\n        }\n    }\n    o: RdfRepeat {} # RdfTerm(bnode = \"b2\")\n}\n

    Note

    Repeated terms can be simply implemented with four variables (s, p, o, g) holding the last non-repeated value of a term in that position. This O(1) solution is what the Scala implementation uses.

    Note

    Although repeated terms can stretch across stream frame boundaries (i.e., refer to values last seen in the previous stream frame), you don't have to use this feature. If your use case requires the stream frames to be more independent of each other (see: stream frame ordering), you can just reset the repeated terms at the start of each stream frame.

    "},{"location":"specification/serialization/#rdf-graph-nodes","title":"RDF graph nodes","text":"

    RDF graph nodes are encoded using the RdfGraph message. The message is used both in the RdfGraphStart message for GRAPHS streams and in the RdfQuad message for QUADS streams. The message MUST have exactly one of the following fields set:

    "},{"location":"specification/serialization/#delimited-variant-of-jelly","title":"Delimited variant of Jelly","text":"

    Note

    Protobuf messages are not delimited, so if you write multiple messages to the same file / socket / byte stream, you need to add some kind of delimiter between them. Jelly uses the convention already implemented in some protobuf libraries of prepending a varint before the message, to specify the length of the message.

    A byte stream (or file) in the delimited variant MUST consist of a series of delimited RdfStreamFrame messages. A delimited message is a message that has a varint prepended before it, specifying the length of the message in bytes.

    Implementing the delimited variant is OPTIONAL.

    "},{"location":"specification/serialization/#delimited-variant-implementations","title":"Delimited variant implementations","text":"

    The delimiting convention is implemented in Protobuf libraries for:

    The JVM (Scala) implementation of Jelly also supports the delimited variant \u2013 see the documentation.

    "},{"location":"specification/serialization/#internet-media-type-and-file-extension","title":"Internet media type and file extension","text":"

    The RECOMMENDED media type for Jelly is application/x-jelly-rdf. The RECOMMENDED file extension is .jelly.

    The files SHOULD be saved in the delimited variant of Jelly.

    "},{"location":"specification/serialization/#implementations","title":"Implementations","text":"

    This section is not part of the specification.

    The following implementations of the Jelly serialization format specification are available:

    "},{"location":"specification/streaming/","title":"Jelly gRPC streaming protocol specification","text":"

    This document is the specification of the Jelly gRPC streaming protocol (publish/subscribe mechanism). It is intended for implementers of Jelly libraries and applications. If you are looking for a user-friendly introduction to Jelly, see the Jelly index page.

    This document is accompanied by the Jelly Protobuf reference and the Protobuf definition itself (grpc.proto).

    The following assumptions are used in this document:

    Author: Piotr Sowi\u0144ski (Ostrzyciel)

    Version: 1.0.0

    Document status: Draft specification

    Info

    The key words \"MUST\", \"MUST NOT\", \"REQUIRED\", \"SHOULD\", \"SHOULD NOT\", \"RECOMMENDED\", \"MAY\", and \"OPTIONAL\" in this document are to be interpreted as described in RFC 2119.

    Note

    The \"Note\" blocks in this document are not part of the specification, but rather provide additional information for implementers.

    Note

    The \"Example\" blocks in this document are not part of the specification, but rather provide informal examples of the serialization format. The examples use the Protocol Buffers Text Format Language.

    "},{"location":"specification/streaming/#conformance","title":"Conformance","text":"

    Implementations MAY choose to implement only a subset of the following specification. In this case, they SHOULD clearly specify which parts of the specification they implement. In the rest of this specification, the keywords \"MUST\", \"MUST NOT\", etc. refer to full (not partial) implementations.

    "},{"location":"specification/streaming/#versioning","title":"Versioning","text":"

    The streaming protocol follows the Semantic Versioning 2.0 scheme. The version of the gRPC streaming protocol is equal to the version of the corresponding serialization format (1:1 equivalence). The version of the protocol is specified in the stream options \u2013 see the serialization specification for details.

    Note

    Releases of the protocol are published on GitHub.

    "},{"location":"specification/streaming/#backward-compatibility","title":"Backward compatibility","text":"

    Implementations SHOULD ensure backward compatibility. To achieve backward compatibility, the implementation MUST be able to respond to all RPCs from the previous releases of the protocol with the same MAJOR version. The implementation MAY also be able to respond to RPCs from previous releases of the protocol with a different MAJOR version.

    Note

    The protocol is designed in such a way that you don't need to worry about backward compatibility. The only thing you need to do is to implement the latest version of the protocol, and you will automatically get backward compatibility with all previous versions (of the same MAJOR).

    "},{"location":"specification/streaming/#forward-compatibility","title":"Forward compatibility","text":"

    Forward compatibility is not guaranteed. Implementations MAY be able to respond to RPCs from future releases of the protocol with the same MAJOR version. Implementations MAY also be able to respond to RPCs from future releases of the protocol with a different MAJOR version.

    "},{"location":"specification/streaming/#actors-and-implementations","title":"Actors and implementations","text":"

    The Jelly gRPC streaming protocol assumes there to be two actors: the server and the client. These actors can both play the role of the producer or the consumer of the stream (see serialization specification), depending on the direction of the stream.

    Implementations may include only the server, only the client, or both.

    "},{"location":"specification/streaming/#protocol-specification","title":"Protocol specification","text":"

    The protocol specifies a simple publish/subscribe mechanism topics identified with UTF-8 strings. The client can subscribe to a topic and receive messages published to that topic by the server. The client can also publish messages to a topic.

    The described protocol is implemented as a gRPC service RdfStreamService (reference).

    Note

    The protocol does not specify what happens to the messages on the server \u2013 this is NOT a broker or message queue specification. The protocol is meant to enable point-to-point communication, but can also be used to implement a broker or a similar service (see Usage notes below).

    You can also ignore the topics and use the protocol as a simple streaming protocol.

    "},{"location":"specification/streaming/#topics","title":"Topics","text":"

    Topics are identified with UTF-8 strings. The topic string MUST be valid UTF-8. There are no further restrictions on the topic string.

    Note

    The topic can be whatever you like \u2013 it can also be empty. It is up the user to decide what to use the topics for, or if to use them at all.

    "},{"location":"specification/streaming/#subscribing-to-a-stream","title":"Subscribing to a stream","text":"

    The client subscribes to a stream from the server with the SubscribeRdf RPC (reference). The RPC is initiated with an RdfStreamSubscribe message (reference) from the client. The message includes two OPTIONAL fields:

    The server MUST respond with either a stream of RdfStreamFrame messages or an error.

    "},{"location":"specification/streaming/#stream-options-handling","title":"Stream options handling","text":"

    The client MAY request specific options for the stream it subscribes to. In that case, the client MUST include the options field in the RdfStreamSubscribe message. The server SHOULD respond with a stream that uses options that are compatible with the options requested by the client. If the server cannot respond with a stream that uses options that are compatible with the options requested by the client, the server MUST respond with the INVALID_ARGUMENT error.

    The following rules are used to determine if the options are compatible. All rules MUST be satisfied for the options to be compatible.

    Option Client request Server response stream_name x MAY be x stream_type x MUST be x generalized_statements x MUST be x or false use_repeat x MUST be x or false rdf_star x MUST be x or false max_name_table_size x MUST be <= x max_prefix_table_size x MUST be <= x max_datatype_table_size x MUST be <= x version x MUST be <= x

    Notes

    The server should implement some limits for the stream options it supports, for example the maximum size of the name table. Otherwise, a client may request a name table that takes up all the server's memory.

    "},{"location":"specification/streaming/#publishing-a-stream","title":"Publishing a stream","text":"

    The client publishes a stream to the server with the PublishRdf RPC (reference). The RPC is initiated with a stream of RdfStreamFrame messages from the client. The stream MUST include at least one message. The first frame MUST include a row with the stream options as the first row. After the stream successfully completes, the server MUST respond with the RdfStreamReceived message (reference).

    If the server cannot handle the stream with the specified options, the server MUST respond with the INVALID_ARGUMENT error.

    "},{"location":"specification/streaming/#usage-notes","title":"Usage notes","text":"

    This section is not part of the specification.

    The protocol is deliberately very general and unrestrictive. The pub/sub mechanism can be used in a number of ways, possibly extending the existing base protocol. The following are some examples of how the protocol can be used:

    These use cases can be implemented with the protocol as-is, or by extending the protocol with additional messages and/or RPCs. In either case, the protocol provides a base layer for compatibility between different implementations.

    "},{"location":"specification/streaming/#implementations","title":"Implementations","text":"

    This section is not part of the specification.

    The following implementations of the Jelly gRPC streaming protocol specification are available:

    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":"

    Test \u2013 index.md

    "},{"location":"contributing/","title":"Contributing","text":"

    TODO

    "},{"location":"licensing/","title":"Licensing and citation","text":"

    TODO

    "},{"location":"user-guide/","title":"Jelly user guide","text":"

    Jelly is a high-performance protocol for streaming and non-streaming RDF data. It is designed to be simple, fast, and easy to implement. This guide will help you get started with Jelly.

    Jelly uses Protocol Buffers 3 as the basis of its serialization. This means that you can quickly create a new Jelly implementation using code generation. You can also use an existing implementation, such as the JVM (Scala) implementation.

    "},{"location":"user-guide/#what-can-it-do","title":"What can it do?","text":"

    Jelly is designed to be a protocol for streaming RDF data, but it can also be used with \"classic\", static RDF data. The main design goals of Jelly are speed, simplicity, and wide coverage of use cases.

    "},{"location":"user-guide/#how-to-use-it","title":"How to use it?","text":"

    To use Jelly you firstly need an implementation of the protocol. There is currently one implementation available: Jelly JVM (Scala), which supports both Apache Jena and Eclipse RDF4J. It also has support for reactive streams and gRPC.

    The implementation will support several stream types and patterns that you can use. Which stream type you choose depends on your use case (see stream types below).

    All stream types use the same concept of stream frames \u2013 discrete elements into which the stream is divided. Each frame contains a number of rows, which are the actual RDF data (RDF triples, quads, etc.). Jelly does not define the semantics of stream frames \u2013 it's up to you to decide what they mean (see examples below).

    Why doesn't Jelly define the semantics of stream frames?

    There are many, many ways in which streams of RDF data can be used \u2013 there are different use cases, network protocols, QoS settings, ordering guarantees, stream semantics, etc. Picking specific semantics for stream frames would hopelessly overcomplicate the protocol and make it less useful in some use cases.

    Jelly tries to make as few assumptions as possible about the streams to ensure it is widely applicable. It is the responsibility of the end users to define the semantics of stream frames for their use case. To help with that, this user guide contains some common patterns and examples.

    "},{"location":"user-guide/#stream-types","title":"Stream types","text":""},{"location":"user-guide/#common-patterns-cookbook","title":"Common patterns cookbook","text":"

    Below you will find some common patterns for using Jelly. These are just examples \u2013 you can use Jelly in many other ways. All of the presented patterns are supported in the Jelly JVM (Scala) implementation with the Reactive Streaming module.

    "},{"location":"user-guide/#triple-stream-just-a-bunch-of-triples","title":"Triple stream \u2013 \"just a bunch of triples\"","text":"

    Let's say you want to stream a lot of triples from A to B \u2013 maybe you're doing some kind of data migration, or you're sending data to a data lake. You don't care about the graph they belong to \u2013 you just want to send a bunch of triples.

    You can use a triple stream, batching the triples into frames of an arbitrary size (let's say, 1000 triples each):

    Example (click to expand)

    You can then send these frames one-by-one over gRPC or Kafka, or write them to a file. The consumer will be able to read the triples one frame at a time, without having to know how many triples there are in total.

    "},{"location":"user-guide/#triple-stream-a-stream-of-graphs","title":"Triple stream \u2013 \"a stream of graphs\"","text":"

    In this case we have an IoT sensor that periodically emits an RDF graph that describes what the sensor saw (something like SOSA/SSN). The graphs may be of different sizes (depending on what the sensor saw) and they can be emitted at different rates (depending on how often the sensor is triggered). We want to stream these graphs to a server that will process them in real-time with no additional latency.

    You can use a triple stream, where the stream frames correspond to different unnamed (default) graphs:

    Example (click to expand)

    The consumer will be able to read the graphs one frame at a time, without having to know how many graphs there are in total.

    RiverBench uses this pattern for distributing its triple streams (see example). Note that in RiverBench the stream may be equivalently considered \"just a bunch of triples\" \u2013 the serialization is the same, it only depends on the interpretation on the side of the consumer.

    "},{"location":"user-guide/#quad-stream-just-a-bunch-of-quads","title":"Quad stream \u2013 \"just a bunch of quads\"","text":"

    You want to stream a lot of quads \u2013 similar to the \"just a bunch of triples\" case above, but you also want to include the graph node. You can use a quad stream, batching the quads into frames of an arbitrary size (let's say, 1000 quads each):

    Example (click to expand)

    The mechanism is exactly the same as with a triple stream.

    "},{"location":"user-guide/#quad-stream-a-stream-of-datasets","title":"Quad stream \u2013 \"a stream of datasets\"","text":"

    You want to stream RDF datasets \u2013 similar to the \"a stream of graphs\" case above, but your elements are entire datasets. You can use a quad stream, where the stream frames correspond to different datasets:

    Example (click to expand)

    The mechanism is exactly the same as with a triple stream of graphs.

    RiverBench uses this pattern for distributing its quad and graph streams (see example). Note that in RiverBench the stream may be equivalently considered \"just a bunch of quads\" \u2013 the serialization is the same, it only depends on the interpretation on the side of the consumer.

    "},{"location":"user-guide/#graph-stream-just-a-bunch-of-named-graphs","title":"Graph stream \u2013 \"just a bunch of named graphs\"","text":"

    This a slightly different take on the problem of \"just a bunch of quads\" \u2013 you also want to transmit what is essentially an RDF dataset, but instead of sending individual quads, you want to send it graph-by-graph. This makes most sense if your data changes on a per-graph basis, or you are streaming a static RDF dataset.

    You can use a graph stream, batching the triples in the graphs into frames of an arbitrary size (let's say, 1000 triples each):

    Example (click to expand)

    Notice that one graph can span multiple stream frames, and one stream frame can contain multiple graphs. The consumer will be able to read the graphs one frame at a time, without having to know how many graphs there are in total.

    "},{"location":"user-guide/#graph-stream-a-stream-of-datasets","title":"Graph stream \u2013 \"a stream of datasets\"","text":"

    You want to stream RDF datasets \u2013 for example using the RSP Data Model, where each element is a named graph and a bunch of statements about this graph in the default graph. You can use a graph stream, where the stream frames correspond to different datasets:

    Example (click to expand)

    Of course each stream frame can contain more than one named graph, and the graphs can be of different sizes.

    "},{"location":"user-guide/#ordering-and-delivery-guarantees","title":"Ordering and delivery guarantees","text":"

    To be able to compress RDF streams on-the-fly, Jelly requires that stream frames are kept strictly in order (see also the spec). This is because the compression algorithm updates its lookup tables dynamically over the course of the stream, and a given frame depends on the lookups defined in previous frames. If the frames are out of order, the compression may fail.

    There are use cases where it's hard to guarantee strict ordering of messages, such as IoT messaging (e.g., MQTT with QoS 0) or high-throughput streams with parallel partitions (e.g., Kafka). In these cases you may want to employ one of these strategies:

    Note that Jelly by default also assumes that frames are delivered at least once. At-least-once delivery is good enough (as long as the order is kept), as lookup updates are idempotent \u2013 you may only need to de-duplicate the frames afterwards. At-most-once delivery requires you to make the frames independent of each other, such as with the IoT strategy above.

    "},{"location":"user-guide/#implementing-jelly","title":"Implementing Jelly","text":"

    Note

    This section is intended only for those who want to write a new Jelly implementation from scratch. It's much easier to use an existing implementation, such as the JVM (Scala) implementation.

    Implementing Jelly from scratch is greatly simplified by the existing Protobuf and RDF libraries. Essentially, the only thing you'll need to do is to glue them together:

    That's it! You may also want to implement streaming facilities, such as Reactive Streams in Java/Scala. Implementing the gRPC publish/subscribe mechanism follows a very similar procedure \u2013 many Protobuf libraries have built-in support for gRPC with code generation.

    "},{"location":"jvm/","title":"Jelly JVM (Scala) implementation","text":"

    javadoc link

    maven

    "},{"location":"jvm/getting-started/","title":"Getting started","text":"

    Compatibility \u2013 Java 11 \u2013 21. 11, 17, 21 are tested in CI.

    "},{"location":"jvm/grpc/","title":"User guide \u2013 gRPC","text":""},{"location":"jvm/grpc/#example-grpc-pubsub","title":"Example \u2013 gRPC pub/sub","text":""},{"location":"jvm/implementation/","title":"Developer guide \u2013 implementing conversions for other libraries","text":"

    Currently converters for the two most popular RDF JVM libraries are implemented \u2013 RDF4J and Jena. But it is possible to implement your own converters and adapt the Jelly serialization code to any RDF library with little effort.

    To do this, you will need to implement three traits (interfaces in Java) from the jelly-core module: ProtoEncoder, ProtoDecoderConverter, and ConverterFactory.

    "},{"location":"jvm/reactive/","title":"User guide \u2013 reactive streaming","text":"

    TODO

    "},{"location":"jvm/reactive/#example-streaming-with-kafka","title":"Example: streaming with Kafka","text":""},{"location":"jvm/reactive/#byte-streams","title":"Byte streams","text":"

    TODO

    (referenced by specification/serialization.md)

    "},{"location":"jvm/releases/","title":"Developer guide \u2013 releases","text":""},{"location":"jvm/releases/#full-versioned-releases","title":"Full (versioned) releases","text":"

    Full (versioned) releases are created manually and follow the Semantic Versioning scheme for binary compatibility.

    To create a new tagged release (example for version 1.2.3):

    $ git checkout main\n$ git pull\n$ git tag v1.2.3\n$ git push origin v1.2.3\n

    The rest (packaging and release creation) will be handled automatically by the CI. The release will be pushed to Maven Central.

    "},{"location":"jvm/releases/#snapshot-releases","title":"Snapshot releases","text":"

    Snapshot releases are triggered automatically by commits in the main branch. Snapshots are pushed to the Sonatype snapshot repository.

    "},{"location":"specification/","title":"Jelly protocol specification","text":"

    The Jelly protocol consists of two parts: the gRPC streaming protocol and the serialization format. The serialization format is the basis for Jelly, specifying how to turn RDF data into bytes and back. The gRPC streaming protocol defines a publish/subscribe mechanism for exchanging RDF data between a client and a server, using gRPC.

    See the user guide for a friendlier introduction to Jelly.

    See the specification pages for more details:

    "},{"location":"specification/media-type/","title":"File extension and media type","text":"

    Jelly is not tied to any specific file extension and does not have a registered media type. However, you can use the following:

    The files should be saved in the delimited variant of Jelly.

    "},{"location":"specification/media-type/#see-also","title":"See also","text":""},{"location":"specification/protobuf-source/","title":"Protobuf sources","text":"

    Below you will find the Protocol Buffers definitions for the Jelly serialization format and the Jelly gRPC streaming protocol. The original files are hosted on GitHub and all releases can be found here.

    Human-readable reference for these definitions can be found here.

    The following code is licensed under the Apache License, Version 2.0.

    "},{"location":"specification/protobuf-source/#rdfproto","title":"rdf.proto","text":"
    syntax = \"proto3\";\npackage eu.ostrzyciel.jelly.core.proto.v1;\n\n// Jelly RDF serialization with Protocol Buffers.\n// Protocol version: 1.0.0\n\n// RDF IRIs\n// Either prefix_id or name_id can be zero if the prefix or the suffix are not used.\nmessage RdfIri {\n  // 1-based, refers to an entry in the prefix lookup.\n  uint32 prefix_id = 1;\n  // 1-based, refers to an entry in the name lookup.\n  uint32 name_id = 2;\n}\n\n// RDF literals\nmessage RdfLiteral {\n  // The lexical form of the literal.\n  string lex = 1;\n\n  // Literal kind \u2013 exactly one of these fields must be set.\n  oneof literalKind {\n    // Simple literal with datatype xsd:string.\n    RdfLiteralSimple simple = 2;\n    // Language-tagged string.\n    string langtag = 3;\n    // Typed literal. The datatype is a reference to an entry in the datatype lookup.\n    uint32 datatype = 4;\n  }\n}\n\n// Empty message indicating a simple literal\nmessage RdfLiteralSimple {\n}\n\n// Empty message indicating a repeated term from the previous statement.\nmessage RdfRepeat {\n}\n\n// RDF terms\nmessage RdfTerm {\n  // Exactly one of these fields must be set.\n  oneof term {\n    // IRI\n    RdfIri        iri = 1;\n    // Blank node\n    string        bnode = 2;\n    // Literal\n    RdfLiteral    literal = 3;\n    // RDF-star quoted triple\n    RdfTriple     triple_term = 4;\n    // Repeated term from the previous statement. Only valid in statements, not quoted triples.\n    RdfRepeat     repeat = 10;\n  }\n}\n\n// Empty message indicating the default RDF graph.\nmessage RdfDefaultGraph {\n}\n\n// RDF graph nodes\nmessage RdfGraph {\n  // Exactly one of these fields must be set.\n  oneof graph {\n    // IRI\n    RdfIri           iri = 1;\n    // Blank node\n    string           bnode = 2;\n    // Literal \u2013 only valid for generalized RDF streams\n    RdfLiteral       literal = 3;\n    // Default graph\n    RdfDefaultGraph  default_graph = 4;\n    // Repeated term \u2013 only valid in a QUADS stream\n    RdfRepeat        repeat = 10;\n  }\n}\n\n// RDF triple\nmessage RdfTriple {\n  // Triple subject\n  RdfTerm s = 1;\n  // Triple predicate\n  RdfTerm p = 2;\n  // Triple object\n  RdfTerm o = 3;\n}\n\n// RDF quad\nmessage RdfQuad {\n  // Quad subject\n  RdfTerm  s = 1;\n  // Quad predicate\n  RdfTerm  p = 2;\n  // Quad object\n  RdfTerm  o = 3;\n  // Quad graph node\n  RdfGraph g = 4;\n}\n\n// Start of a graph in a GRAPHS stream\nmessage RdfGraphStart {\n  RdfGraph graph = 1;\n}\n\n// End of a graph in a GRAPHS stream\nmessage RdfGraphEnd {\n}\n\n// Entry in the name lookup table\nmessage RdfNameEntry {\n  // 1-based identifier\n  uint32 id = 1;\n  // Value of the name (UTF-8 encoded)\n  string value = 2;\n}\n\n// Entry in the prefix lookup table\nmessage RdfPrefixEntry {\n  // 1-based identifier\n  uint32 id = 1;\n  // Value of the prefix (UTF-8 encoded)\n  string value = 2;\n}\n\n// Entry in the datatype lookup table\nmessage RdfDatatypeEntry {\n  // 1-based identifier\n  uint32 id = 1;\n  // Value of the datatype (UTF-8 encoded)\n  string value = 2;\n}\n\n// RDF stream options\nmessage RdfStreamOptions {\n  // Name of the stream (completely optional).\n  // This may be used for, e.g., topic names in a pub/sub system.\n  string stream_name = 1;\n  // Type of the stream (required)\n  RdfStreamType stream_type = 2;\n  // Whether the stream may contain generalized triples, quads, or datasets\n  bool generalized_statements = 3;\n  // Whether RdfRepeat will be used\n  bool use_repeat = 4;\n  // Whether the stream may contain RDF-star statements\n  bool rdf_star = 5;\n  // Maximum size of the name lookup table\n  uint32 max_name_table_size = 9;\n  // Maximum size of the prefix lookup table\n  uint32 max_prefix_table_size = 10;\n  // Maximum size of the datatype lookup table\n  uint32 max_datatype_table_size = 11;\n  // Protocol version (required)\n  // For Jelly 1.0.x value must be 1.\n  // For custom extensions, the value must be 1000 or higher.\n  uint32 version = 15;\n}\n\n// RDF stream type\nenum RdfStreamType {\n  // Unspecified stream type \u2013 invalid\n  RDF_STREAM_TYPE_UNSPECIFIED = 0;\n  // RDF triples\n  RDF_STREAM_TYPE_TRIPLES = 1;\n  // RDF quads\n  RDF_STREAM_TYPE_QUADS = 2;\n  // RDF triples grouped in graphs\n  RDF_STREAM_TYPE_GRAPHS = 3;\n}\n\n// RDF stream row\nmessage RdfStreamRow {\n  // Exactly one of these fields must be set.\n  oneof row {\n    // Stream options. Must occur at the start of the stream.\n    RdfStreamOptions options = 1;\n    // RDF triple statement.\n    // Valid in TRIPLES and GRAPHS streams.\n    RdfTriple triple = 2;\n    // RDF quad statement.\n    // Only valid in a QUADS stream.\n    RdfQuad quad = 3;\n    // Graph boundary: ends the currently transmitted graph and starts a new one\n    // Only valid in a GRAPHS stream.\n    RdfGraphStart graph_start = 4;\n    // Explicit end of a graph.\n    // Signals the consumer that the transmitted graph is complete.\n    // Only valid in a GRAPHS stream.\n    RdfGraphEnd graph_end = 5;\n    // Entry in the name lookup table.\n    RdfNameEntry name = 9;\n    // Entry in the prefix lookup table.\n    RdfPrefixEntry prefix = 10;\n    // Entry in the datatype lookup table.\n    RdfDatatypeEntry datatype = 11;\n  }\n}\n\n// RDF stream frame\nmessage RdfStreamFrame {\n  // Stream rows\n  repeated RdfStreamRow rows = 1;\n}\n
    "},{"location":"specification/protobuf-source/#grpcproto","title":"grpc.proto","text":"
    syntax = \"proto3\";\npackage eu.ostrzyciel.jelly.core.proto.v1;\n\n// gRPC service specifications for RDF streaming.\n// Protocol version: 1.0.0\n\nimport \"rdf.proto\";\n\n// Subscribe command sent by the client to the server.\nmessage RdfStreamSubscribe {\n  // The topic to which the client wants to subscribe (UTF-8 encoded).\n  string topic = 1;\n  // Optional: the stream options requested by the client.\n  // The server should respond with a stream that matches these options.\n  // In case that is not possible, the server must respond with the\n  // INVALID_ARGUMENT error.\n  RdfStreamOptions requested_options = 2;\n}\n\n// Acknowledgement of receiving a stream sent by the server to the client.\nmessage RdfStreamReceived {\n}\n\n// Pub/Sub service for RDF streams, to be implemented by the server.\nservice RdfStreamService {\n  // Subscribe to an RDF stream.\n  rpc SubscribeRdf (RdfStreamSubscribe) returns (stream RdfStreamFrame);\n  // Publish an RDF stream.\n  // In case the server cannot process the stream, it must respond with\n  // the INVALID_ARGUMENT error.\n  rpc PublishRdf (stream RdfStreamFrame) returns (RdfStreamReceived);\n}\n
    "},{"location":"specification/protobuf-source/#see-also","title":"See also","text":""},{"location":"specification/reference/","title":"Protocol Documentation","text":""},{"location":"specification/reference/#table-of-contents","title":"Table of Contents","text":"

    Top

    "},{"location":"specification/reference/#grpcproto","title":"grpc.proto","text":""},{"location":"specification/reference/#rdfstreamreceived","title":"RdfStreamReceived","text":"

    Acknowledgement of receiving a stream sent by the server to the client.

    "},{"location":"specification/reference/#rdfstreamsubscribe","title":"RdfStreamSubscribe","text":"

    Subscribe command sent by the client to the server.

    Field Type Label Description topic string The topic to which the client wants to subscribe (UTF-8 encoded). requested_options RdfStreamOptions Optional: the stream options requested by the client. The server should respond with a stream that matches these options. In case that is not possible, the server must respond with the INVALID_ARGUMENT error.

    "},{"location":"specification/reference/#rdfstreamservice","title":"RdfStreamService","text":"

    Pub/Sub service for RDF streams, to be implemented by the server.

    Method Name Request Type Response Type Description SubscribeRdf RdfStreamSubscribe RdfStreamFrame stream Subscribe to an RDF stream. PublishRdf RdfStreamFrame stream RdfStreamReceived Publish an RDF stream. In case the server cannot process the stream, it must respond with the INVALID_ARGUMENT error.

    Top

    "},{"location":"specification/reference/#rdfproto","title":"rdf.proto","text":""},{"location":"specification/reference/#rdfdatatypeentry","title":"RdfDatatypeEntry","text":"

    Entry in the datatype lookup table

    Field Type Label Description id uint32 1-based identifier value string Value of the datatype (UTF-8 encoded)

    "},{"location":"specification/reference/#rdfdefaultgraph","title":"RdfDefaultGraph","text":"

    Empty message indicating the default RDF graph.

    "},{"location":"specification/reference/#rdfgraph","title":"RdfGraph","text":"

    RDF graph nodes

    Field Type Label Description iri RdfIri IRI bnode string Blank node literal RdfLiteral Literal \u2013 only valid for generalized RDF streams default_graph RdfDefaultGraph Default graph repeat RdfRepeat Repeated term \u2013 only valid in a QUADS stream

    "},{"location":"specification/reference/#rdfgraphend","title":"RdfGraphEnd","text":"

    End of a graph in a GRAPHS stream

    "},{"location":"specification/reference/#rdfgraphstart","title":"RdfGraphStart","text":"

    Start of a graph in a GRAPHS stream

    Field Type Label Description graph RdfGraph

    "},{"location":"specification/reference/#rdfiri","title":"RdfIri","text":"

    RDF IRIs Either prefix_id or name_id can be zero if the prefix or the suffix are not used.

    Field Type Label Description prefix_id uint32 1-based, refers to an entry in the prefix lookup. name_id uint32 1-based, refers to an entry in the name lookup.

    "},{"location":"specification/reference/#rdfliteral","title":"RdfLiteral","text":"

    RDF literals

    Field Type Label Description lex string The lexical form of the literal. simple RdfLiteralSimple Simple literal with datatype xsd:string. langtag string Language-tagged string. datatype uint32 Typed literal. The datatype is a reference to an entry in the datatype lookup.

    "},{"location":"specification/reference/#rdfliteralsimple","title":"RdfLiteralSimple","text":"

    Empty message indicating a simple literal

    "},{"location":"specification/reference/#rdfnameentry","title":"RdfNameEntry","text":"

    Entry in the name lookup table

    Field Type Label Description id uint32 1-based identifier value string Value of the name (UTF-8 encoded)

    "},{"location":"specification/reference/#rdfprefixentry","title":"RdfPrefixEntry","text":"

    Entry in the prefix lookup table

    Field Type Label Description id uint32 1-based identifier value string Value of the prefix (UTF-8 encoded)

    "},{"location":"specification/reference/#rdfquad","title":"RdfQuad","text":"

    RDF quad

    Field Type Label Description s RdfTerm Quad subject p RdfTerm Quad predicate o RdfTerm Quad object g RdfGraph Quad graph node

    "},{"location":"specification/reference/#rdfrepeat","title":"RdfRepeat","text":"

    Empty message indicating a repeated term from the previous statement.

    "},{"location":"specification/reference/#rdfstreamframe","title":"RdfStreamFrame","text":"

    RDF stream frame

    Field Type Label Description rows RdfStreamRow repeated Stream rows

    "},{"location":"specification/reference/#rdfstreamoptions","title":"RdfStreamOptions","text":"

    RDF stream options

    Field Type Label Description stream_name string Name of the stream (completely optional). This may be used for, e.g., topic names in a pub/sub system. stream_type RdfStreamType Type of the stream (required) generalized_statements bool Whether the stream may contain generalized triples, quads, or datasets use_repeat bool Whether RdfRepeat will be used rdf_star bool Whether the stream may contain RDF-star statements max_name_table_size uint32 Maximum size of the name lookup table max_prefix_table_size uint32 Maximum size of the prefix lookup table max_datatype_table_size uint32 Maximum size of the datatype lookup table version uint32 Protocol version (required) For Jelly 1.0.x value must be 1. For custom extensions, the value must be 1000 or higher.

    "},{"location":"specification/reference/#rdfstreamrow","title":"RdfStreamRow","text":"

    RDF stream row

    Field Type Label Description options RdfStreamOptions Stream options. Must occur at the start of the stream. triple RdfTriple RDF triple statement. Valid in TRIPLES and GRAPHS streams. quad RdfQuad RDF quad statement. Only valid in a QUADS stream. graph_start RdfGraphStart Graph boundary: ends the currently transmitted graph and starts a new one Only valid in a GRAPHS stream. graph_end RdfGraphEnd Explicit end of a graph. Signals the consumer that the transmitted graph is complete. Only valid in a GRAPHS stream. name RdfNameEntry Entry in the name lookup table. prefix RdfPrefixEntry Entry in the prefix lookup table. datatype RdfDatatypeEntry Entry in the datatype lookup table.

    "},{"location":"specification/reference/#rdfterm","title":"RdfTerm","text":"

    RDF terms

    Field Type Label Description iri RdfIri IRI bnode string Blank node literal RdfLiteral Literal triple_term RdfTriple RDF-star quoted triple repeat RdfRepeat Repeated term from the previous statement. Only valid in statements, not quoted triples.

    "},{"location":"specification/reference/#rdftriple","title":"RdfTriple","text":"

    RDF triple

    Field Type Label Description s RdfTerm Triple subject p RdfTerm Triple predicate o RdfTerm Triple object

    "},{"location":"specification/reference/#rdfstreamtype","title":"RdfStreamType","text":"

    RDF stream type

    Name Number Description RDF_STREAM_TYPE_UNSPECIFIED 0 Unspecified stream type \u2013 invalid RDF_STREAM_TYPE_TRIPLES 1 RDF triples RDF_STREAM_TYPE_QUADS 2 RDF quads RDF_STREAM_TYPE_GRAPHS 3 RDF triples grouped in graphs"},{"location":"specification/reference/#scalar-value-types","title":"Scalar Value Types","text":".proto Type Notes C++ Java Python Go C# PHP Ruby double double double float float64 double float Float float float float float float32 float float Float int32 Uses variable-length encoding. Inefficient for encoding negative numbers \u2013 if your field is likely to have negative values, use sint32 instead. int32 int int int32 int integer Bignum or Fixnum (as required) int64 Uses variable-length encoding. Inefficient for encoding negative numbers \u2013 if your field is likely to have negative values, use sint64 instead. int64 long int/long int64 long integer/string Bignum uint32 Uses variable-length encoding. uint32 int int/long uint32 uint integer Bignum or Fixnum (as required) uint64 Uses variable-length encoding. uint64 long int/long uint64 ulong integer/string Bignum or Fixnum (as required) sint32 Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int32s. int32 int int int32 int integer Bignum or Fixnum (as required) sint64 Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int64s. int64 long int/long int64 long integer/string Bignum fixed32 Always four bytes. More efficient than uint32 if values are often greater than 2^28. uint32 int int uint32 uint integer Bignum or Fixnum (as required) fixed64 Always eight bytes. More efficient than uint64 if values are often greater than 2^56. uint64 long int/long uint64 ulong integer/string Bignum sfixed32 Always four bytes. int32 int int int32 int integer Bignum or Fixnum (as required) sfixed64 Always eight bytes. int64 long int/long int64 long integer/string Bignum bool bool boolean boolean bool bool boolean TrueClass/FalseClass string A string must always contain UTF-8 encoded or 7-bit ASCII text. string String str/unicode string string string String (UTF-8) bytes May contain any arbitrary sequence of bytes. string ByteString str []byte ByteString string String (ASCII-8BIT)"},{"location":"specification/serialization/","title":"Jelly serialization format specification","text":"

    This document is the specification of the Jelly serialization format. It is intended for implementers of Jelly libraries and applications. If you are looking for a user-friendly introduction to Jelly, see the Jelly index page.

    This document is accompanied by the Jelly Protobuf reference and the Protobuf definition itself (rdf.proto).

    The following assumptions are used in this document:

    Author: Piotr Sowi\u0144ski (Ostrzyciel)

    Version: 1.0.0

    Document status: Draft specification

    Info

    The key words \"MUST\", \"MUST NOT\", \"REQUIRED\", \"SHOULD\", \"SHOULD NOT\", \"RECOMMENDED\", \"MAY\", and \"OPTIONAL\" in this document are to be interpreted as described in RFC 2119.

    Note

    The \"Note\" blocks in this document are not part of the specification, but rather provide additional information for implementers.

    Note

    The \"Example\" blocks in this document are not part of the specification, but rather provide informal examples of the serialization format. The examples use the Protocol Buffers Text Format Language.

    "},{"location":"specification/serialization/#conformance","title":"Conformance","text":"

    Implementations MAY choose to implement only a subset of the following specification. In this case, they SHOULD clearly specify which parts of the specification they implement. In the rest of this specification, the keywords \"MUST\", \"MUST NOT\", etc. refer to full (not partial) implementations.

    Note

    Implementations may in particular choose to not implement features that are not supported on the target platform (e.g., RDF datasets, RDF-star, generalized RDF terms, etc.).

    Implementations MAY also choose to extend Jelly with additional features that SHOULD NOT interfere with the serialization being readable by implementations which follow the specification.

    "},{"location":"specification/serialization/#versioning","title":"Versioning","text":"

    The protocol follows the Semantic Versioning 2.0 scheme. Each MAJOR.MINOR semantic version corresponds to an integer version tag in the protocol. The version tag is encoded in the version field of the RdfStreamOptions message. See also the section on stream options for more information on how to handle the version tags in serialized streams.

    The following versions of the protocol are defined:

    Version tag Semantic version 1 1.0.x (current)

    Note

    Releases of the protocol are published on GitHub.

    "},{"location":"specification/serialization/#backward-compatibility","title":"Backward compatibility","text":"

    Implementations SHOULD ensure backward compatibility. To achieve backward compatibility, the implementation MUST be able to read all messages from the previous releases of the protocol with the same MAJOR version. The implementation MAY also be able to read messages from previous releases of the protocol with a different MAJOR version.

    Note

    The protocol is designed in such a way that you don't need to worry about backward compatibility. The only thing you need to do is to implement the latest version of the protocol, and you will automatically get backward compatibility with all previous versions (of the same MAJOR).

    "},{"location":"specification/serialization/#forward-compatibility","title":"Forward compatibility","text":"

    Forward compatibility is not guaranteed. Implementations MAY be able to read messages from future releases of the protocol with the same MAJOR version. Implementations MAY also be able to read messages from future releases of the protocol with a different MAJOR version.

    "},{"location":"specification/serialization/#actors-and-implementations","title":"Actors and implementations","text":"

    Jelly assumes there to be two actors involved in processing the stream: the producer (serializer) and the consumer (parser). The producer is responsible for serializing the RDF data into the Jelly format, and the consumer is responsible for parsing the Jelly format into RDF data.

    Implementations may include only the producer, only the consumer, or both.

    "},{"location":"specification/serialization/#format-specification","title":"Format specification","text":"

    The Jelly serialization format uses Protocol Buffers version 3 as the underlying serialization format. All implementations MUST use a compliant Protocol Buffers implementation. The Protocol Buffers schema for Jelly serialization is defined in rdf.proto (source code, reference).

    The Jelly format is a stream (i.e., an ordered sequence) of stream frames. The frames may be sent one-by-one using a dedicated streaming protocol (e.g., gRPC, MQTT, Kafka) or written in sequence to a byte stream (e.g., a file or socket). When writing to a byte stream, the frames MUST be delimeted \u2013 see the delimited variant.

    Jelly supports several distinct types of streams, and uses a simple and configurable compression mechanism using lookup tables.

    "},{"location":"specification/serialization/#stream-frames","title":"Stream frames","text":"

    A stream frame is a message of type RdfStreamFrame (reference). The message has only one field (rows), which is a repeated field of type RdfStreamRow (reference). A stream frame may contain any number of rows, however it is RECOMMENDED to keep the size of the frames below 1 MB. The semantics for the frames are not defined by the protocol. The end users are free to define their own semantics for the frames.

    Note

    A stream frame in \"simple flat file\" is just a batch of RDF statements \u2013 the stream frames may carry no semantics in this case. You can make the stream frame as long as the file itself, but this is not recommended, as it would make the file harder to process.

    Note

    Stream frames can also be used to indicate individual stream elements. For example, in the case of a stream of RDF datasets, each frame may contain one dataset. RiverBench datasets use this convention in their distributions.

    "},{"location":"specification/serialization/#ordering","title":"Ordering","text":"

    Stream frames MUST be processed strictly in order to preserve the semantics of the stream. Each stream frame MUST be processed in its entirety before the next stream frame is processed.

    Implementations MAY choose to adopt a non-standard solution where the order or delivery of the frames is not guaranteed and the stream can be read in more than one order or without some frames. The implementation MUST clearly specify in the documentation that it uses such a non-standard solution.

    Note

    An example where not adhering to the strict ordering may be useful is when you are dealing with a network streaming protocol that does not guarantee the order of the messages (e.g., MQTT).

    Note

    The main thing you will need to worry about is the order of the lookup tables. If you can, emit all lookup tables at the beginning of the stream. When using stream partitions (e.g., in Kafka), you should ensure that the lookups are emitted to each partition. Alternatively, you can transmit the lookup tables separately from the stream.

    "},{"location":"specification/serialization/#stream-rows","title":"Stream rows","text":"

    A stream row is a message of type RdfStreamRow. It has one of the following fields set:

    Stream rows MUST be processed strictly in order to preserve the semantics of the stream.

    "},{"location":"specification/serialization/#stream-types","title":"Stream types","text":"

    The type of the stream MUST be explicitly specified in the (stream options header)[#stream-options]. The type of the stream is defined by the RdfStreamType enum (reference). The following types are defined:

    Note

    See also a more human explanation of the available stream types.

    "},{"location":"specification/serialization/#stream-options","title":"Stream options","text":"

    The stream options is a message of type RdfStreamOptions (reference). It MUST be the first row in the stream. It MAY appear more than once in the stream (also after other rows), but it MUST be identical to all previous occurrences. Implementations MAY throw an error if the stream options header is not present at the start of the stream, alternatively, they MAY use the default options. Implementations SHOULD NOT throw an error if the stream options header is present more than once in the stream.

    The stream options header instructs the consumer of the stream (parser) on the size of the needed lookups to decode the stream and the features used by the stream.

    The stream options header contains the following fields:

    "},{"location":"specification/serialization/#prefix-name-and-datatype-lookups","title":"Prefix, name, and datatype lookups","text":"

    Jelly uses a common mechanism of lookup tables for IRI prefixes, IRI names (postfixes), and datatypes. The lookups are used to compress the IRIs and datatypes in the stream. All lookups function in the same way:

    Note

    The spec does not specify what strategy should the producer use to update the lookup. You can use a the LRU strategy (as used in the Scala implementation), LFU, or something more complex. You can also have a fixed lookup in the producer and communicate it at the start of the stream. This is possible if you are using a fixed set of prefixes, names, or datatypes and want to conserve computing power (e.g., in IoT devices).

    The simplest way to implement the consumer's lookup is to just use an indexed array of fixed size. The workload on the consumer's side is much lower than on the producer's side, so your choice of the strategy depends largely on the producer.

    "},{"location":"specification/serialization/#rdf-statements-and-graphs","title":"RDF statements and graphs","text":"

    RDF statements (triples or quads) are communicated in three different ways, depending on the type of the stream:

    Note

    If the stream is meant to represent a single RDF dataset, then the graphs should be able to stretch across several stream frames. If the stream is meant to represent a stream of RDF datasets, then the graphs should be contained within a single stream frame.

    "},{"location":"specification/serialization/#rdf-terms","title":"RDF terms","text":"

    RDF terms are encoded using the RdfTerm message. The message has one of the following fields set: iri, bnode, literal, triple_term, repeat, corresponding to RDF IRIs, blank nodes, literals, RDF-star quoted triples, and repeated terms, respectively. Exactly one of these fields MUST be set.

    "},{"location":"specification/serialization/#iris","title":"IRIs","text":"

    The IRIs are encoded using the RdfIri message. The message has two fields that together make up the IRI:

    At least one of the prefix_id and name_id fields MUST be set to a non-default, positive value. The IRI is then constructed by concatenating the prefix and the name. The IRI SHOULD be a valid IRI, as defined in RFC 3987.

    Example (click to expand)

    Assume the following lookup entries were defined in the stream (wrapping RdfStreamRows were omitted for brevity):

    RdfPrefixEntry {\n    id: 1\n    prefix: \"http://example.com/\"\n}\nRdfNameEntry {\n    id: 4\n    name: \"example\"\n}\nRdfNameEntry {\n    id: 1\n    name: \"http://test.com/test\"\n}\n

    Then the following IRIs are encoded as follows:

    # http://example.com/example\nRdfIri {\n    prefix_id: 1\n    name_id: 4\n} \n\n# http://example.com/\nRdfIri {\n    prefix_id: 1\n}\n\n# http://test.com/test\nRdfIri {\n    name_id: 1\n}\n

    Note

    The spec does not specify how to split the IRIs into names and prefixes. You can use any strategy you want, as long as you follow the rules above. The simplest way is to split the IRI at the last occurrence of the # or / character \u2013 this is what the Scala implementation uses. The prefixes are not meant to be user-facing, but you can also use user-defined prefixes (e.g., @prefix in Turtle) to split the IRIs.

    "},{"location":"specification/serialization/#blank-nodes","title":"Blank nodes","text":"

    RDF blank nodes are represented using simple strings. The string is the identifier of the blank node. The identifier may be any valid UTF-8 string.

    Because the spec does not define the semantics of the stream frames, blank node identifiers are not guaranteed to be unique across the stream frames. The consumer MAY choose to treat the blank nodes as unique across the stream (and thus treat all occurences of the identifier as a single node), or it MAY choose to treat them as unique only within a single stream frame. The producer SHOULD specify in the documentation which strategy it uses.

    Note

    If the stream is meant to represent a single RDF graph or dataset, then the blank node identifiers should be unique across the stream so that you can refer to them across stream frame boundaries. If the frames refer to different graphs or datasets, then the blank node identifiers should be unique only within a single frame.

    Note

    Many RDF libraries (e.g., RDF4J, Apache Jena) use internal identifiers for blank nodes, which can be used as the identifiers in Jelly streams. You can also use a different format, for example with shorter identifiers to preserve space.

    "},{"location":"specification/serialization/#literals","title":"Literals","text":"

    RDF literals are represented using the RdfLiteral message (reference). The message has the following fields:

    "},{"location":"specification/serialization/#quoted-triples-rdf-star","title":"Quoted triples (RDF-star)","text":"

    RDF-star quoted triples are represented using the RdfTriple message (reference). Quoted triples are encoded in the same manner as triple statements, with the only difference being that repeated terms (RdfRepeat) MUST NOT be used in quoted triples. The consumer SHOULD throw an error if a repeated term is encountered in a quoted triple.

    Quoted triples may be nested up to arbitrary depth. The consumer SHOULD throw an error if the depth of the nesting exceeds the capabilities of the implementation.

    "},{"location":"specification/serialization/#repeated-terms","title":"Repeated terms","text":"

    Repeated terms indicate that a term in a given position (subject, predicate, object, or graph node in quads) is the same as the term in the same position in the previous row. The repeated terms are encoded using the RdfRepeat message (reference). The message does not have any fields.

    Example (click to expand)

    In the example the wrapping RdfStreamRows were omitted for brevity:

    # First row\nRdfTriple {\n    s: RdfTerm {\n        iri: RdfIri {\n            prefix_id: 1\n            name_id: 1\n        }\n    }\n    p: RdfTerm {\n        iri: RdfIri {\n            prefix_id: 1\n            name_id: 2\n        }\n    }\n    o: RdfTerm {\n        bnode: \"b1\"\n    }\n}\n\n# Second row \u2013 repeating the subject and predicate\nRdfTriple {\n    s: RdfRepeat {} # RdfTerm(iri: RdfIri(1, 1))\n    p: RdfRepeat {} # RdfTerm(iri: RdfIri(1, 2))\n    o: RdfTerm {\n        bnode: \"b2\"\n    }\n}\n\n# Third row \u2013 repeating the subject and object\nRdfTriple {\n    s: RdfRepeat {} # RdfTerm(iri: RdfIri(1, 1))\n    p: RdfTerm {\n        iri: RdfIri {\n            prefix_id: 2\n            name_id: 3\n        }\n    }\n    o: RdfRepeat {} # RdfTerm(bnode = \"b2\")\n}\n

    Note

    Repeated terms can be simply implemented with four variables (s, p, o, g) holding the last non-repeated value of a term in that position. This O(1) solution is what the Scala implementation uses.

    Note

    Although repeated terms can stretch across stream frame boundaries (i.e., refer to values last seen in the previous stream frame), you don't have to use this feature. If your use case requires the stream frames to be more independent of each other (see: stream frame ordering), you can just reset the repeated terms at the start of each stream frame.

    "},{"location":"specification/serialization/#rdf-graph-nodes","title":"RDF graph nodes","text":"

    RDF graph nodes are encoded using the RdfGraph message. The message is used both in the RdfGraphStart message for GRAPHS streams and in the RdfQuad message for QUADS streams. The message MUST have exactly one of the following fields set:

    "},{"location":"specification/serialization/#delimited-variant-of-jelly","title":"Delimited variant of Jelly","text":"

    Note

    Protobuf messages are not delimited, so if you write multiple messages to the same file / socket / byte stream, you need to add some kind of delimiter between them. Jelly uses the convention already implemented in some protobuf libraries of prepending a varint before the message, to specify the length of the message.

    A byte stream (or file) in the delimited variant MUST consist of a series of delimited RdfStreamFrame messages. A delimited message is a message that has a varint prepended before it, specifying the length of the message in bytes.

    Implementing the delimited variant is OPTIONAL.

    "},{"location":"specification/serialization/#delimited-variant-implementations","title":"Delimited variant implementations","text":"

    The delimiting convention is implemented in Protobuf libraries for:

    The JVM (Scala) implementation of Jelly also supports the delimited variant \u2013 see the documentation.

    "},{"location":"specification/serialization/#internet-media-type-and-file-extension","title":"Internet media type and file extension","text":"

    The RECOMMENDED media type for Jelly is application/x-jelly-rdf. The RECOMMENDED file extension is .jelly.

    The files SHOULD be saved in the delimited variant of Jelly.

    "},{"location":"specification/serialization/#implementations","title":"Implementations","text":"

    This section is not part of the specification.

    The following implementations of the Jelly serialization format specification are available:

    "},{"location":"specification/streaming/","title":"Jelly gRPC streaming protocol specification","text":"

    This document is the specification of the Jelly gRPC streaming protocol (publish/subscribe mechanism). It is intended for implementers of Jelly libraries and applications. If you are looking for a user-friendly introduction to Jelly, see the Jelly index page.

    This document is accompanied by the Jelly Protobuf reference and the Protobuf definition itself (grpc.proto).

    The following assumptions are used in this document:

    Author: Piotr Sowi\u0144ski (Ostrzyciel)

    Version: 1.0.0

    Document status: Draft specification

    Info

    The key words \"MUST\", \"MUST NOT\", \"REQUIRED\", \"SHOULD\", \"SHOULD NOT\", \"RECOMMENDED\", \"MAY\", and \"OPTIONAL\" in this document are to be interpreted as described in RFC 2119.

    Note

    The \"Note\" blocks in this document are not part of the specification, but rather provide additional information for implementers.

    Note

    The \"Example\" blocks in this document are not part of the specification, but rather provide informal examples of the serialization format. The examples use the Protocol Buffers Text Format Language.

    "},{"location":"specification/streaming/#conformance","title":"Conformance","text":"

    Implementations MAY choose to implement only a subset of the following specification. In this case, they SHOULD clearly specify which parts of the specification they implement. In the rest of this specification, the keywords \"MUST\", \"MUST NOT\", etc. refer to full (not partial) implementations.

    "},{"location":"specification/streaming/#versioning","title":"Versioning","text":"

    The streaming protocol follows the Semantic Versioning 2.0 scheme. The version of the gRPC streaming protocol is equal to the version of the corresponding serialization format (1:1 equivalence). The version of the protocol is specified in the stream options \u2013 see the serialization specification for details.

    Note

    Releases of the protocol are published on GitHub.

    "},{"location":"specification/streaming/#backward-compatibility","title":"Backward compatibility","text":"

    Implementations SHOULD ensure backward compatibility. To achieve backward compatibility, the implementation MUST be able to respond to all RPCs from the previous releases of the protocol with the same MAJOR version. The implementation MAY also be able to respond to RPCs from previous releases of the protocol with a different MAJOR version.

    Note

    The protocol is designed in such a way that you don't need to worry about backward compatibility. The only thing you need to do is to implement the latest version of the protocol, and you will automatically get backward compatibility with all previous versions (of the same MAJOR).

    "},{"location":"specification/streaming/#forward-compatibility","title":"Forward compatibility","text":"

    Forward compatibility is not guaranteed. Implementations MAY be able to respond to RPCs from future releases of the protocol with the same MAJOR version. Implementations MAY also be able to respond to RPCs from future releases of the protocol with a different MAJOR version.

    "},{"location":"specification/streaming/#actors-and-implementations","title":"Actors and implementations","text":"

    The Jelly gRPC streaming protocol assumes there to be two actors: the server and the client. These actors can both play the role of the producer or the consumer of the stream (see serialization specification), depending on the direction of the stream.

    Implementations may include only the server, only the client, or both.

    "},{"location":"specification/streaming/#protocol-specification","title":"Protocol specification","text":"

    The protocol specifies a simple publish/subscribe mechanism topics identified with UTF-8 strings. The client can subscribe to a topic and receive messages published to that topic by the server. The client can also publish messages to a topic.

    The described protocol is implemented as a gRPC service RdfStreamService (reference).

    Note

    The protocol does not specify what happens to the messages on the server \u2013 this is NOT a broker or message queue specification. The protocol is meant to enable point-to-point communication, but can also be used to implement a broker or a similar service (see Usage notes below).

    You can also ignore the topics and use the protocol as a simple streaming protocol.

    "},{"location":"specification/streaming/#topics","title":"Topics","text":"

    Topics are identified with UTF-8 strings. The topic string MUST be valid UTF-8. There are no further restrictions on the topic string.

    Note

    The topic can be whatever you like \u2013 it can also be empty. It is up the user to decide what to use the topics for, or if to use them at all.

    "},{"location":"specification/streaming/#subscribing-to-a-stream","title":"Subscribing to a stream","text":"

    The client subscribes to a stream from the server with the SubscribeRdf RPC (reference). The RPC is initiated with an RdfStreamSubscribe message (reference) from the client. The message includes two OPTIONAL fields:

    The server MUST respond with either a stream of RdfStreamFrame messages or an error.

    "},{"location":"specification/streaming/#stream-options-handling","title":"Stream options handling","text":"

    The client MAY request specific options for the stream it subscribes to. In that case, the client MUST include the options field in the RdfStreamSubscribe message. The server SHOULD respond with a stream that uses options that are compatible with the options requested by the client. If the server cannot respond with a stream that uses options that are compatible with the options requested by the client, the server MUST respond with the INVALID_ARGUMENT error.

    The following rules are used to determine if the options are compatible. All rules MUST be satisfied for the options to be compatible.

    Option Client request Server response stream_name x MAY be x stream_type x MUST be x generalized_statements x MUST be x or false use_repeat x MUST be x or false rdf_star x MUST be x or false max_name_table_size x MUST be <= x max_prefix_table_size x MUST be <= x max_datatype_table_size x MUST be <= x version x MUST be <= x

    Notes

    The server should implement some limits for the stream options it supports, for example the maximum size of the name table. Otherwise, a client may request a name table that takes up all the server's memory.

    "},{"location":"specification/streaming/#publishing-a-stream","title":"Publishing a stream","text":"

    The client publishes a stream to the server with the PublishRdf RPC (reference). The RPC is initiated with a stream of RdfStreamFrame messages from the client. The stream MUST include at least one message. The first frame MUST include a row with the stream options as the first row. After the stream successfully completes, the server MUST respond with the RdfStreamReceived message (reference).

    If the server cannot handle the stream with the specified options, the server MUST respond with the INVALID_ARGUMENT error.

    "},{"location":"specification/streaming/#usage-notes","title":"Usage notes","text":"

    This section is not part of the specification.

    The protocol is deliberately very general and unrestrictive. The pub/sub mechanism can be used in a number of ways, possibly extending the existing base protocol. The following are some examples of how the protocol can be used:

    These use cases can be implemented with the protocol as-is, or by extending the protocol with additional messages and/or RPCs. In either case, the protocol provides a base layer for compatibility between different implementations.

    "},{"location":"specification/streaming/#implementations","title":"Implementations","text":"

    This section is not part of the specification.

    The following implementations of the Jelly gRPC streaming protocol specification are available:

    "}]} \ No newline at end of file diff --git a/1.0/sitemap.xml b/1.0/sitemap.xml index 07dc9bf..002061b 100644 --- a/1.0/sitemap.xml +++ b/1.0/sitemap.xml @@ -75,6 +75,11 @@ 2023-10-13 daily + + https://jelly-rdf.github.io/1.0/specification/protobuf-source/ + 2023-10-13 + daily + https://jelly-rdf.github.io/1.0/specification/reference/ 2023-10-13 diff --git a/1.0/sitemap.xml.gz b/1.0/sitemap.xml.gz index 52dd7cf..22817f3 100644 Binary files a/1.0/sitemap.xml.gz and b/1.0/sitemap.xml.gz differ diff --git a/1.0/specification/index.html b/1.0/specification/index.html index 3368b5f..0ffa390 100644 --- a/1.0/specification/index.html +++ b/1.0/specification/index.html @@ -330,6 +330,8 @@ + + @@ -383,7 +385,7 @@ - Serialization + Serialization format @@ -438,6 +440,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • @@ -845,11 +867,13 @@

    Jelly protocol specification

    The Jelly protocol consists of two parts: the gRPC streaming protocol and the serialization format. The serialization format is the basis for Jelly, specifying how to turn RDF data into bytes and back. The gRPC streaming protocol defines a publish/subscribe mechanism for exchanging RDF data between a client and a server, using gRPC.

    -

    See the user guide for a friendly introduction to Jelly.

    +

    See the user guide for a friendlier introduction to Jelly.

    See the specification pages for more details:

    diff --git a/1.0/specification/media-type/index.html b/1.0/specification/media-type/index.html index 4d29628..cd85b73 100644 --- a/1.0/specification/media-type/index.html +++ b/1.0/specification/media-type/index.html @@ -13,7 +13,7 @@ - + @@ -330,6 +330,8 @@ + + @@ -383,7 +385,7 @@ - Serialization + Serialization format @@ -437,6 +439,26 @@ + +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + diff --git a/1.0/specification/proto/grpc.proto b/1.0/specification/proto/grpc.proto new file mode 100644 index 0000000..0cb6ac2 --- /dev/null +++ b/1.0/specification/proto/grpc.proto @@ -0,0 +1,32 @@ +syntax = "proto3"; +package eu.ostrzyciel.jelly.core.proto.v1; + +// gRPC service specifications for RDF streaming. +// Protocol version: 1.0.0 + +import "rdf.proto"; + +// Subscribe command sent by the client to the server. +message RdfStreamSubscribe { + // The topic to which the client wants to subscribe (UTF-8 encoded). + string topic = 1; + // Optional: the stream options requested by the client. + // The server should respond with a stream that matches these options. + // In case that is not possible, the server must respond with the + // INVALID_ARGUMENT error. + RdfStreamOptions requested_options = 2; +} + +// Acknowledgement of receiving a stream sent by the server to the client. +message RdfStreamReceived { +} + +// Pub/Sub service for RDF streams, to be implemented by the server. +service RdfStreamService { + // Subscribe to an RDF stream. + rpc SubscribeRdf (RdfStreamSubscribe) returns (stream RdfStreamFrame); + // Publish an RDF stream. + // In case the server cannot process the stream, it must respond with + // the INVALID_ARGUMENT error. + rpc PublishRdf (stream RdfStreamFrame) returns (RdfStreamReceived); +} diff --git a/1.0/specification/proto/rdf.proto b/1.0/specification/proto/rdf.proto new file mode 100644 index 0000000..776f9dd --- /dev/null +++ b/1.0/specification/proto/rdf.proto @@ -0,0 +1,202 @@ +syntax = "proto3"; +package eu.ostrzyciel.jelly.core.proto.v1; + +// Jelly RDF serialization with Protocol Buffers. +// Protocol version: 1.0.0 + +// RDF IRIs +// Either prefix_id or name_id can be zero if the prefix or the suffix are not used. +message RdfIri { + // 1-based, refers to an entry in the prefix lookup. + uint32 prefix_id = 1; + // 1-based, refers to an entry in the name lookup. + uint32 name_id = 2; +} + +// RDF literals +message RdfLiteral { + // The lexical form of the literal. + string lex = 1; + + // Literal kind – exactly one of these fields must be set. + oneof literalKind { + // Simple literal with datatype xsd:string. + RdfLiteralSimple simple = 2; + // Language-tagged string. + string langtag = 3; + // Typed literal. The datatype is a reference to an entry in the datatype lookup. + uint32 datatype = 4; + } +} + +// Empty message indicating a simple literal +message RdfLiteralSimple { +} + +// Empty message indicating a repeated term from the previous statement. +message RdfRepeat { +} + +// RDF terms +message RdfTerm { + // Exactly one of these fields must be set. + oneof term { + // IRI + RdfIri iri = 1; + // Blank node + string bnode = 2; + // Literal + RdfLiteral literal = 3; + // RDF-star quoted triple + RdfTriple triple_term = 4; + // Repeated term from the previous statement. Only valid in statements, not quoted triples. + RdfRepeat repeat = 10; + } +} + +// Empty message indicating the default RDF graph. +message RdfDefaultGraph { +} + +// RDF graph nodes +message RdfGraph { + // Exactly one of these fields must be set. + oneof graph { + // IRI + RdfIri iri = 1; + // Blank node + string bnode = 2; + // Literal – only valid for generalized RDF streams + RdfLiteral literal = 3; + // Default graph + RdfDefaultGraph default_graph = 4; + // Repeated term – only valid in a QUADS stream + RdfRepeat repeat = 10; + } +} + +// RDF triple +message RdfTriple { + // Triple subject + RdfTerm s = 1; + // Triple predicate + RdfTerm p = 2; + // Triple object + RdfTerm o = 3; +} + +// RDF quad +message RdfQuad { + // Quad subject + RdfTerm s = 1; + // Quad predicate + RdfTerm p = 2; + // Quad object + RdfTerm o = 3; + // Quad graph node + RdfGraph g = 4; +} + +// Start of a graph in a GRAPHS stream +message RdfGraphStart { + RdfGraph graph = 1; +} + +// End of a graph in a GRAPHS stream +message RdfGraphEnd { +} + +// Entry in the name lookup table +message RdfNameEntry { + // 1-based identifier + uint32 id = 1; + // Value of the name (UTF-8 encoded) + string value = 2; +} + +// Entry in the prefix lookup table +message RdfPrefixEntry { + // 1-based identifier + uint32 id = 1; + // Value of the prefix (UTF-8 encoded) + string value = 2; +} + +// Entry in the datatype lookup table +message RdfDatatypeEntry { + // 1-based identifier + uint32 id = 1; + // Value of the datatype (UTF-8 encoded) + string value = 2; +} + +// RDF stream options +message RdfStreamOptions { + // Name of the stream (completely optional). + // This may be used for, e.g., topic names in a pub/sub system. + string stream_name = 1; + // Type of the stream (required) + RdfStreamType stream_type = 2; + // Whether the stream may contain generalized triples, quads, or datasets + bool generalized_statements = 3; + // Whether RdfRepeat will be used + bool use_repeat = 4; + // Whether the stream may contain RDF-star statements + bool rdf_star = 5; + // Maximum size of the name lookup table + uint32 max_name_table_size = 9; + // Maximum size of the prefix lookup table + uint32 max_prefix_table_size = 10; + // Maximum size of the datatype lookup table + uint32 max_datatype_table_size = 11; + // Protocol version (required) + // For Jelly 1.0.x value must be 1. + // For custom extensions, the value must be 1000 or higher. + uint32 version = 15; +} + +// RDF stream type +enum RdfStreamType { + // Unspecified stream type – invalid + RDF_STREAM_TYPE_UNSPECIFIED = 0; + // RDF triples + RDF_STREAM_TYPE_TRIPLES = 1; + // RDF quads + RDF_STREAM_TYPE_QUADS = 2; + // RDF triples grouped in graphs + RDF_STREAM_TYPE_GRAPHS = 3; +} + +// RDF stream row +message RdfStreamRow { + // Exactly one of these fields must be set. + oneof row { + // Stream options. Must occur at the start of the stream. + RdfStreamOptions options = 1; + // RDF triple statement. + // Valid in TRIPLES and GRAPHS streams. + RdfTriple triple = 2; + // RDF quad statement. + // Only valid in a QUADS stream. + RdfQuad quad = 3; + // Graph boundary: ends the currently transmitted graph and starts a new one + // Only valid in a GRAPHS stream. + RdfGraphStart graph_start = 4; + // Explicit end of a graph. + // Signals the consumer that the transmitted graph is complete. + // Only valid in a GRAPHS stream. + RdfGraphEnd graph_end = 5; + // Entry in the name lookup table. + RdfNameEntry name = 9; + // Entry in the prefix lookup table. + RdfPrefixEntry prefix = 10; + // Entry in the datatype lookup table. + RdfDatatypeEntry datatype = 11; + } +} + +// RDF stream frame +message RdfStreamFrame { + // Stream rows + repeated RdfStreamRow rows = 1; +} diff --git a/1.0/specification/protobuf-source/index.html b/1.0/specification/protobuf-source/index.html new file mode 100644 index 0000000..1b41816 --- /dev/null +++ b/1.0/specification/protobuf-source/index.html @@ -0,0 +1,1278 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Protobuf source - Jelly + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    Protobuf sources

    +

    Below you will find the Protocol Buffers definitions for the Jelly serialization format and the Jelly gRPC streaming protocol. The original files are hosted on GitHub and all releases can be found here.

    +

    Human-readable reference for these definitions can be found here.

    +

    The following code is licensed under the Apache License, Version 2.0.

    +

    rdf.proto

    +
    syntax = "proto3";
    +package eu.ostrzyciel.jelly.core.proto.v1;
    +
    +// Jelly RDF serialization with Protocol Buffers.
    +// Protocol version: 1.0.0
    +
    +// RDF IRIs
    +// Either prefix_id or name_id can be zero if the prefix or the suffix are not used.
    +message RdfIri {
    +  // 1-based, refers to an entry in the prefix lookup.
    +  uint32 prefix_id = 1;
    +  // 1-based, refers to an entry in the name lookup.
    +  uint32 name_id = 2;
    +}
    +
    +// RDF literals
    +message RdfLiteral {
    +  // The lexical form of the literal.
    +  string lex = 1;
    +
    +  // Literal kind – exactly one of these fields must be set.
    +  oneof literalKind {
    +    // Simple literal with datatype xsd:string.
    +    RdfLiteralSimple simple = 2;
    +    // Language-tagged string.
    +    string langtag = 3;
    +    // Typed literal. The datatype is a reference to an entry in the datatype lookup.
    +    uint32 datatype = 4;
    +  }
    +}
    +
    +// Empty message indicating a simple literal
    +message RdfLiteralSimple {
    +}
    +
    +// Empty message indicating a repeated term from the previous statement.
    +message RdfRepeat {
    +}
    +
    +// RDF terms
    +message RdfTerm {
    +  // Exactly one of these fields must be set.
    +  oneof term {
    +    // IRI
    +    RdfIri        iri = 1;
    +    // Blank node
    +    string        bnode = 2;
    +    // Literal
    +    RdfLiteral    literal = 3;
    +    // RDF-star quoted triple
    +    RdfTriple     triple_term = 4;
    +    // Repeated term from the previous statement. Only valid in statements, not quoted triples.
    +    RdfRepeat     repeat = 10;
    +  }
    +}
    +
    +// Empty message indicating the default RDF graph.
    +message RdfDefaultGraph {
    +}
    +
    +// RDF graph nodes
    +message RdfGraph {
    +  // Exactly one of these fields must be set.
    +  oneof graph {
    +    // IRI
    +    RdfIri           iri = 1;
    +    // Blank node
    +    string           bnode = 2;
    +    // Literal – only valid for generalized RDF streams
    +    RdfLiteral       literal = 3;
    +    // Default graph
    +    RdfDefaultGraph  default_graph = 4;
    +    // Repeated term – only valid in a QUADS stream
    +    RdfRepeat        repeat = 10;
    +  }
    +}
    +
    +// RDF triple
    +message RdfTriple {
    +  // Triple subject
    +  RdfTerm s = 1;
    +  // Triple predicate
    +  RdfTerm p = 2;
    +  // Triple object
    +  RdfTerm o = 3;
    +}
    +
    +// RDF quad
    +message RdfQuad {
    +  // Quad subject
    +  RdfTerm  s = 1;
    +  // Quad predicate
    +  RdfTerm  p = 2;
    +  // Quad object
    +  RdfTerm  o = 3;
    +  // Quad graph node
    +  RdfGraph g = 4;
    +}
    +
    +// Start of a graph in a GRAPHS stream
    +message RdfGraphStart {
    +  RdfGraph graph = 1;
    +}
    +
    +// End of a graph in a GRAPHS stream
    +message RdfGraphEnd {
    +}
    +
    +// Entry in the name lookup table
    +message RdfNameEntry {
    +  // 1-based identifier
    +  uint32 id = 1;
    +  // Value of the name (UTF-8 encoded)
    +  string value = 2;
    +}
    +
    +// Entry in the prefix lookup table
    +message RdfPrefixEntry {
    +  // 1-based identifier
    +  uint32 id = 1;
    +  // Value of the prefix (UTF-8 encoded)
    +  string value = 2;
    +}
    +
    +// Entry in the datatype lookup table
    +message RdfDatatypeEntry {
    +  // 1-based identifier
    +  uint32 id = 1;
    +  // Value of the datatype (UTF-8 encoded)
    +  string value = 2;
    +}
    +
    +// RDF stream options
    +message RdfStreamOptions {
    +  // Name of the stream (completely optional).
    +  // This may be used for, e.g., topic names in a pub/sub system.
    +  string stream_name = 1;
    +  // Type of the stream (required)
    +  RdfStreamType stream_type = 2;
    +  // Whether the stream may contain generalized triples, quads, or datasets
    +  bool generalized_statements = 3;
    +  // Whether RdfRepeat will be used
    +  bool use_repeat = 4;
    +  // Whether the stream may contain RDF-star statements
    +  bool rdf_star = 5;
    +  // Maximum size of the name lookup table
    +  uint32 max_name_table_size = 9;
    +  // Maximum size of the prefix lookup table
    +  uint32 max_prefix_table_size = 10;
    +  // Maximum size of the datatype lookup table
    +  uint32 max_datatype_table_size = 11;
    +  // Protocol version (required)
    +  // For Jelly 1.0.x value must be 1.
    +  // For custom extensions, the value must be 1000 or higher.
    +  uint32 version = 15;
    +}
    +
    +// RDF stream type
    +enum RdfStreamType {
    +  // Unspecified stream type – invalid
    +  RDF_STREAM_TYPE_UNSPECIFIED = 0;
    +  // RDF triples
    +  RDF_STREAM_TYPE_TRIPLES = 1;
    +  // RDF quads
    +  RDF_STREAM_TYPE_QUADS = 2;
    +  // RDF triples grouped in graphs
    +  RDF_STREAM_TYPE_GRAPHS = 3;
    +}
    +
    +// RDF stream row
    +message RdfStreamRow {
    +  // Exactly one of these fields must be set.
    +  oneof row {
    +    // Stream options. Must occur at the start of the stream.
    +    RdfStreamOptions options = 1;
    +    // RDF triple statement.
    +    // Valid in TRIPLES and GRAPHS streams.
    +    RdfTriple triple = 2;
    +    // RDF quad statement.
    +    // Only valid in a QUADS stream.
    +    RdfQuad quad = 3;
    +    // Graph boundary: ends the currently transmitted graph and starts a new one
    +    // Only valid in a GRAPHS stream.
    +    RdfGraphStart graph_start = 4;
    +    // Explicit end of a graph.
    +    // Signals the consumer that the transmitted graph is complete.
    +    // Only valid in a GRAPHS stream.
    +    RdfGraphEnd graph_end = 5;
    +    // Entry in the name lookup table.
    +    RdfNameEntry name = 9;
    +    // Entry in the prefix lookup table.
    +    RdfPrefixEntry prefix = 10;
    +    // Entry in the datatype lookup table.
    +    RdfDatatypeEntry datatype = 11;
    +  }
    +}
    +
    +// RDF stream frame
    +message RdfStreamFrame {
    +  // Stream rows
    +  repeated RdfStreamRow rows = 1;
    +}
    +
    +

    grpc.proto

    +
    syntax = "proto3";
    +package eu.ostrzyciel.jelly.core.proto.v1;
    +
    +// gRPC service specifications for RDF streaming.
    +// Protocol version: 1.0.0
    +
    +import "rdf.proto";
    +
    +// Subscribe command sent by the client to the server.
    +message RdfStreamSubscribe {
    +  // The topic to which the client wants to subscribe (UTF-8 encoded).
    +  string topic = 1;
    +  // Optional: the stream options requested by the client.
    +  // The server should respond with a stream that matches these options.
    +  // In case that is not possible, the server must respond with the
    +  // INVALID_ARGUMENT error.
    +  RdfStreamOptions requested_options = 2;
    +}
    +
    +// Acknowledgement of receiving a stream sent by the server to the client.
    +message RdfStreamReceived {
    +}
    +
    +// Pub/Sub service for RDF streams, to be implemented by the server.
    +service RdfStreamService {
    +  // Subscribe to an RDF stream.
    +  rpc SubscribeRdf (RdfStreamSubscribe) returns (stream RdfStreamFrame);
    +  // Publish an RDF stream.
    +  // In case the server cannot process the stream, it must respond with
    +  // the INVALID_ARGUMENT error.
    +  rpc PublishRdf (stream RdfStreamFrame) returns (RdfStreamReceived);
    +}
    +
    +

    See also

    + + + + + + + +
    +
    + + +
    + + + +
    + + + +
    +
    +
    +
    + +
    + + + + + + + + + + \ No newline at end of file diff --git a/1.0/specification/reference/index.html b/1.0/specification/reference/index.html index 5140c25..ffec787 100644 --- a/1.0/specification/reference/index.html +++ b/1.0/specification/reference/index.html @@ -16,7 +16,7 @@ - + @@ -330,6 +330,8 @@ + + @@ -383,7 +385,7 @@ - Serialization + Serialization format @@ -665,6 +667,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/specification/serialization/index.html b/1.0/specification/serialization/index.html index af062ff..a835338 100644 --- a/1.0/specification/serialization/index.html +++ b/1.0/specification/serialization/index.html @@ -24,7 +24,7 @@ - Serialization - Jelly + Serialization format - Jelly @@ -119,7 +119,7 @@
    - Serialization + Serialization format
    @@ -330,6 +330,8 @@ + + @@ -392,7 +394,7 @@ - Serialization + Serialization format @@ -403,7 +405,7 @@ - Serialization + Serialization format @@ -676,6 +678,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/specification/streaming/index.html b/1.0/specification/streaming/index.html index 17f0b9e..085eee9 100644 --- a/1.0/specification/streaming/index.html +++ b/1.0/specification/streaming/index.html @@ -330,6 +330,8 @@ + + @@ -383,7 +385,7 @@ - Serialization + Serialization format @@ -580,6 +582,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/1.0/user-guide/index.html b/1.0/user-guide/index.html index 7303d86..15c5e06 100644 --- a/1.0/user-guide/index.html +++ b/1.0/user-guide/index.html @@ -464,6 +464,8 @@ + + @@ -517,7 +519,7 @@ - Serialization + Serialization format @@ -572,6 +574,26 @@ +
  • + + + + + Protobuf source + + + + +
  • + + + + + + + + +
  • diff --git a/latest/specification/protobuf-source/index.html b/latest/specification/protobuf-source/index.html new file mode 100644 index 0000000..9fca3f2 --- /dev/null +++ b/latest/specification/protobuf-source/index.html @@ -0,0 +1,16 @@ + + + + + Redirecting + + + + + Redirecting to ../../../1.0/specification/protobuf-source/... + + \ No newline at end of file