diff --git a/tests/input/datasets.yaml b/tests/input/datasets.yaml
deleted file mode 100644
index badd922..0000000
--- a/tests/input/datasets.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
----
-"@context": https://raw.githubusercontent.com/EMMC-ASBL/oteapi-dlite/refs/heads/rdf-serialisation/oteapi_dlite/context/0.2/context.json
-
-# This extends the list of prefixes that are already defined in the context
-prefixes:
-  sem: "https://w3id.com/emmo/domain/sem/0.1#"
-  semdata: "http://sintef.no/data/matchmaker/SEM/"
-  dm: "http://onto-ns.com/meta/characterisation/0.1/SEMImage#"
-
-# List of documented datasets
-datasets:
-    # unique ID of the dataset
-  - "@id": "semdata:sample3/pos1_01_grid_200x"
-    # Dataset type as defined in the SEM domain ontology
-    "@type": "https://w3id.com/emmo/domain/sem/0.1#"
-    title: SEM image of cement
-    description: Back-scattered SEM image of cement sample 3, polished with 1 µm diamond compound.
-    creator: Sigurd Wenner
-    contactPoint: "Sigurd Wenner <Sigurd.Wenner@sintef.no>"
-
-    datamodel: http://onto-ns.com/meta/characterisation/0.1/SEMImage
-    mappingURL: https://raw.githubusercontent.com/HEU-MatCHMaker/DataDocumentation/refs/heads/master/SEM/datamodels/SEMImage.ttl
-
-    # Contextual documentation of the dataset
-    statements:
-      - ["@id", "sem:fromSample", "semdata:sample3"]
-      - ["@id", "emmo:isDescriptionOf", "semdata:concrete1"]
-
-    # A dataset can have several distributions, hence a list
-    distribution:
-      - downloadURL: https://github.com/HEU-MatCHMaker/DataDocumentation/raw/refs/heads/master/SEM/example_data/(ThermoFisher)%20pos1_01_grid_200x.tif
-        mediaType: image/tiff
-        parser:
-          parserType: application/vnd.dlite-parse
-          configuration:
-            driver: image
-            options: "plugin=tiffile"
-
-    # List of consumers of this dataset
-    datasink:
-        # This simple consumer wants the data in png format
-      - storeURL: mydata.png
-        mediaType: image/png
-        generator:
-          functionType: application/vnd.dlite-generate
-          configuration:
-            driver: image
-            options: format=png
diff --git a/tests/input/semdata.yaml b/tests/input/semdata.yaml
index ec2a839..e640d93 100644
--- a/tests/input/semdata.yaml
+++ b/tests/input/semdata.yaml
@@ -34,7 +34,6 @@ datasets:
       downloadURL: https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif
       mediaType: image/tiff
       parser: parser:sem_hitachi
-      generator: gen:sem_hitachi
 
   - "@id": semdata:SEM_cement_batch2/77600-23-001
     "@type": sem:SEMImageSeries
diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py
index f321c03..7682633 100644
--- a/tripper/dataset/dataset.py
+++ b/tripper/dataset/dataset.py
@@ -1,25 +1,34 @@
 """Module for documenting datasets with Tripper.
 
-The dataset documentation follows the DCAT structure and is exposed as
-Python dicts with attribute access in this module.  This dict
-structure is used by the functions:
-  - `read_datadoc()`: Read documentation from YAML file and return it as dict.
-  - `save_dict()`: Save dict documentation to the triplestore.
-  - `load_dict()`: Load dict documentation from the triplestore.
-
-YAML documentation can also be stored directly to the triplestore with
-  - `save_datadoc()`: Save documentation from YAML file to the triplestore.
+The dataset documentation follows the [DCAT] structure and is exposed
+as Python dicts with attribute access in this module.  The semantic
+meaning of the keywords in this dict are defined by a [JSON-LD context].
 
-For accessing and storing actual data, the following functions can be used:
+High-level functions for accessing and storing actual data:
   - `load()`: Load documented dataset from its source.
   - `save()`: Save documented dataset to a data resource.
 
-For searching the triplestore:
+High-level function for populating the triplestore from YAML documentation:
+  - `save_datadoc()`: Save documentation from YAML file to the triplestore.
+
+Functions for searching the triplestore:
   - `list_dataset_iris()`: Get IRIs of matching datasets.
 
-For interaction with OTEAPI:
+Functions for working with the dict-representation:
+  - `read_datadoc()`: Read documentation from YAML file and return it as dict.
+  - `save_dict()`: Save dict documentation to the triplestore.
+  - `load_dict()`: Load dict documentation from the triplestore.
+
+Functions for interaction with OTEAPI:
   - `get_partial_pipeline()`: Returns a OTELib partial pipeline.
 
+---
+
+__TODO__: Update the URL to the JSON-LD context when merged to master
+
+[DCAT]: https://www.w3.org/TR/vocab-dcat-3/
+[JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/dataset/tripper/context/0.2/context.json
+
 """
 
 # pylint: disable=invalid-name,redefined-builtin,import-outside-toplevel
@@ -107,20 +116,23 @@ def save(
     generator: "Optional[str]" = None,
     prefixes: "Optional[dict]" = None,
     use_sparql: "Optional[bool]" = None,
-) -> None:
-    """Saves a documented dataset to a data resource.
+) -> str:
+    """Saves data to a dataresource and document it in the triplestore.
 
     Arguments:
         ts: Triplestore to load data from.
-        data: Bytes representation of the dataset to save.
+        data: Bytes representation of the data to save.
         class_iri: IRI of a class in the ontology (e.g. an `emmo:DataSet`
             subclass) that describes the dataset that is saved.
             Is used to select the `distribution` if that is not given.
             If `distribution` is also given, a
             `dcat:distribution value <distribution>` restriction will be
             added to `class_iri`
-        dataset: IRI of dataset for the data to be saved.
-            Or a dict with additional documentation of the dataset.
+        dataset: Either the IRI of the dataset individual standing for
+            the data to be saved or or a dict with additional
+            documentation of the dataset.
+            If the dataset already exists, a new distribution will be added
+            to it. Otherwise a new random blank node IRI will be created.
         distribution: IRI of distribution for the data to be saved.
             Or a dict additional documentation of the distribution,
             like media type, parsers, generators etc...
@@ -131,6 +143,9 @@ def save(
         use_sparql: Whether to access the triplestore with SPARQL.
             Defaults to `ts.prefer_sparql`.
 
+    Returns:
+        IRI of the dataset.
+
     """
     # pylint: disable=too-many-locals,too-many-branches,too-many-statements
     # Use the Protocol plugin system from DLite.  Should we move it to tripper?
@@ -245,6 +260,8 @@ def save(
     elif save_distribution:
         save_dict(ts, "distribution", distribution, prefixes=prefixes)
 
+    return dataset["@id"]
+
 
 def load(
     ts: Triplestore,