From 1700c21e6bd2a4766ee8b5554c66a82407fe9226 Mon Sep 17 00:00:00 2001
From: Arne Binder <arne.binder@dfki.de>
Date: Fri, 22 Dec 2023 15:16:57 +0100
Subject: [PATCH] tokenization: don't regard missed partitions (don't show them
 if verbose and don't break if strict_span_conversion)

---
 src/pie_modules/document/processing/tokenization.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/pie_modules/document/processing/tokenization.py b/src/pie_modules/document/processing/tokenization.py
index 099d52582..172b11b52 100644
--- a/src/pie_modules/document/processing/tokenization.py
+++ b/src/pie_modules/document/processing/tokenization.py
@@ -343,11 +343,14 @@ def tokenize_document(
     missed_annotations = defaultdict(set)
     if strict_span_conversion or verbose:
         for annotation_field in doc.annotation_fields():
-            current_missed_annotations = set(doc[annotation_field.name]) - set(
-                added_annotations[annotation_field.name]
-            )
-            if len(current_missed_annotations) > 0:
-                missed_annotations[annotation_field.name] = current_missed_annotations
+            # do not check the partition layer because the partitions are not required later on
+            # and entries get quite probably removed when windowing is applied, so this just pollutes the logs
+            if annotation_field.name != partition_layer:
+                current_missed_annotations = set(doc[annotation_field.name]) - set(
+                    added_annotations[annotation_field.name]
+                )
+                if len(current_missed_annotations) > 0:
+                    missed_annotations[annotation_field.name] = current_missed_annotations
 
     if len(missed_annotations) > 0:
         missed_annotations_simplified = {k: str(v) for k, v in missed_annotations.items()}