diff --git a/README.md b/README.md
index e7b3c7b..240f017 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,8 @@ dinglehopper is an OCR evaluation tool and reads
[ALTO](https://github.com/altoxml),
[PAGE](https://github.com/PRImA-Research-Lab/PAGE-XML) and text files. It
compares a ground truth (GT) document page with a OCR result page to compute
-metrics and a word/character differences report.
+metrics and a word/character differences report. It also supports batch processing by
+generating, aggregating and summarizing multiple reports.
[![Build Status](https://circleci.com/gh/qurator-spk/dinglehopper.svg?style=svg)](https://circleci.com/gh/qurator-spk/dinglehopper)
@@ -27,7 +28,7 @@ sudo pip install .
Usage
-----
~~~
-Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX]
+Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX] [REPORTS_FOLDER]
Compare the PAGE/ALTO/text document GT against the document OCR.
@@ -35,19 +36,23 @@ Usage: dinglehopper [OPTIONS] GT OCR [REPORT_PREFIX]
their text and falls back to plain text if no ALTO or PAGE is detected.
The files GT and OCR are usually a ground truth document and the result of
- an OCR software, but you may use dinglehopper to compare two OCR results.
- In that case, use --no-metrics to disable the then meaningless metrics and
- also change the color scheme from green/red to blue.
+ an OCR software, but you may use dinglehopper to compare two OCR results. In
+ that case, use --no-metrics to disable the then meaningless metrics and also
+ change the color scheme from green/red to blue.
- The comparison report will be written to $REPORT_PREFIX.{html,json}, where
- $REPORT_PREFIX defaults to "report". The reports include the character
- error rate (CER) and the word error rate (WER).
+ The comparison report will be written to
+ $REPORTS_FOLDER/$REPORT_PREFIX.{html,json}, where $REPORTS_FOLDER defaults
+ to the current working directory and $REPORT_PREFIX defaults to "report".
+ The reports include the character error rate (CER) and the word error rate
+ (WER).
By default, the text of PAGE files is extracted on 'region' level. You may
use "--textequiv-level line" to extract from the level of TextLine tags.
Options:
--metrics / --no-metrics Enable/disable metrics and green/red
+ --differences BOOLEAN Enable reporting character and word level
+ differences
--textequiv-level LEVEL PAGE TextEquiv level to extract text from
--progress Show progress bar
--help Show this message and exit.
@@ -61,6 +66,43 @@ This generates `report.html` and `report.json`.
![dinglehopper displaying metrics and character differences](.screenshots/dinglehopper.png?raw=true)
+Batch comparison between folders of GT and OCR files can be done by simply providing
+folders:
+~~~
+dinglehopper gt/ ocr/ report output_folder/
+~~~
+This assumes that you have files with the same name in both folders, e.g.
+`gt/00000001.page.xml` and `ocr/00000001.alto.xml`.
+
+The example generates reports for each set of files, with the prefix `report`, in the
+(automatically created) folder `output_folder/`.
+
+By default, the JSON report does not contain the character and word differences, only
+the calculated metrics. If you want to include the differences, use the
+`--differences` flag:
+
+~~~
+dinglehopper gt/ ocr/ report output_folder/ --differences
+~~~
+
+### dinglehopper-summarize
+A set of (JSON) reports can be summarized into a single set of
+reports. This is useful after having generated reports in batch.
+Example:
+~~~
+dinglehopper-summarize output_folder/
+~~~
+This generates `summary.html` and `summary.json` in the same `output_folder`.
+
+If you are summarizing many reports and have used the `--differences` flag while
+generating them, it may be useful to limit the number of differences reported by using
+the `--occurences-threshold` parameter. This will reduce the size of the generated HTML
+report, making it easier to open and navigate. Note that the JSON report will still
+contain all differences. Example:
+~~~
+dinglehopper-summarize output_folder/ --occurences-threshold 10
+~~~
+
### dinglehopper-line-dirs
You also may want to compare a directory of GT text files (i.e. `gt/line0001.gt.txt`)
with a directory of OCR text files (i.e. `ocr/line0001.some-ocr.txt`) with a separate
diff --git a/dinglehopper/cli.py b/dinglehopper/cli.py
index 72d428d..b22aafc 100644
--- a/dinglehopper/cli.py
+++ b/dinglehopper/cli.py
@@ -1,4 +1,5 @@
import os
+from collections import Counter
import click
from jinja2 import Environment, FileSystemLoader
@@ -6,15 +7,15 @@
from uniseg.graphemecluster import grapheme_clusters
from ocrd_utils import initLogging
-from .character_error_rate import character_error_rate_n
-from .word_error_rate import word_error_rate_n, words_normalized
-from .align import seq_align
-from .extracted_text import ExtractedText
-from .ocr_files import extract
-from .config import Config
+from dinglehopper.character_error_rate import character_error_rate_n
+from dinglehopper.word_error_rate import word_error_rate_n, words_normalized
+from dinglehopper.align import seq_align
+from dinglehopper.extracted_text import ExtractedText
+from dinglehopper.ocr_files import extract
+from dinglehopper.config import Config
-def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none):
+def gen_diff_report(gt_in, ocr_in, css_prefix, joiner, none, differences=False):
gtx = ""
ocrx = ""
@@ -54,6 +55,8 @@ def format_thing(t, css_classes=None, id_=None):
g_pos = 0
o_pos = 0
+ found_differences = []
+
for k, (g, o) in enumerate(seq_align(gt_things, ocr_things)):
css_classes = None
gt_id = None
@@ -66,6 +69,9 @@ def format_thing(t, css_classes=None, id_=None):
# Deletions and inserts only produce one id + None, UI must
# support this, i.e. display for the one id produced
+ if differences:
+ found_differences.append(f'{g} :: {o}')
+
gtx += joiner + format_thing(g, css_classes, gt_id)
ocrx += joiner + format_thing(o, css_classes, ocr_id)
@@ -74,6 +80,8 @@ def format_thing(t, css_classes=None, id_=None):
if o is not None:
o_pos += len(o)
+ found_differences = dict(Counter(elem for elem in found_differences))
+
return """
{}
@@ -81,7 +89,7 @@ def format_thing(t, css_classes=None, id_=None):
""".format(
gtx, ocrx
- )
+ ), found_differences
def json_float(value):
@@ -97,7 +105,8 @@ def json_float(value):
return str(value)
-def process(gt, ocr, report_prefix, *, metrics=True, textequiv_level="region"):
+def process(gt, ocr, report_prefix, reports_folder='.', *, metrics=True,
+ differences=False, textequiv_level="region"):
"""Check OCR result against GT.
The @click decorators change the signature of the decorated functions, so we keep this undecorated version and use
@@ -110,14 +119,15 @@ def process(gt, ocr, report_prefix, *, metrics=True, textequiv_level="region"):
cer, n_characters = character_error_rate_n(gt_text, ocr_text)
wer, n_words = word_error_rate_n(gt_text, ocr_text)
- char_diff_report = gen_diff_report(
- gt_text, ocr_text, css_prefix="c", joiner="", none="·"
- )
+ char_diff_report, diff_c = gen_diff_report(gt_text, ocr_text, css_prefix="c",
+ joiner="",
+ none="·", differences=differences)
gt_words = words_normalized(gt_text)
ocr_words = words_normalized(ocr_text)
- word_diff_report = gen_diff_report(
- gt_words, ocr_words, css_prefix="w", joiner=" ", none="⋯"
+ word_diff_report, diff_w = gen_diff_report(
+ gt_words, ocr_words, css_prefix="w", joiner=" ", none="⋯",
+ differences=differences
)
env = Environment(
@@ -129,7 +139,11 @@ def process(gt, ocr, report_prefix, *, metrics=True, textequiv_level="region"):
for report_suffix in (".html", ".json"):
template_fn = "report" + report_suffix + ".j2"
- out_fn = report_prefix + report_suffix
+
+ if not os.path.isdir(reports_folder):
+ os.mkdir(reports_folder)
+
+ out_fn = os.path.join(reports_folder, report_prefix + report_suffix)
template = env.get_template(template_fn)
template.stream(
@@ -142,16 +156,42 @@ def process(gt, ocr, report_prefix, *, metrics=True, textequiv_level="region"):
char_diff_report=char_diff_report,
word_diff_report=word_diff_report,
metrics=metrics,
+ differences=differences,
+ diff_c=diff_c,
+ diff_w=diff_w,
).dump(out_fn)
+def process_dir(gt, ocr, report_prefix, reports_folder, metrics, differences,
+ textequiv_level):
+ for gt_file in os.listdir(gt):
+ gt_file_path = os.path.join(gt, gt_file)
+ ocr_file_path = os.path.join(ocr, gt_file)
+
+ if os.path.isfile(gt_file_path) and os.path.isfile(ocr_file_path):
+ process(gt_file_path, ocr_file_path,
+ f"{gt_file}-{report_prefix}",
+ reports_folder=reports_folder,
+ metrics=metrics,
+ differences=differences,
+ textequiv_level=textequiv_level)
+ else:
+ print("Skipping {0} and {1}".format(gt_file_path, ocr_file_path))
+
+
@click.command()
@click.argument("gt", type=click.Path(exists=True))
@click.argument("ocr", type=click.Path(exists=True))
@click.argument("report_prefix", type=click.Path(), default="report")
+@click.argument("reports_folder", type=click.Path(), default=".")
@click.option(
"--metrics/--no-metrics", default=True, help="Enable/disable metrics and green/red"
)
+@click.option(
+ "--differences",
+ default=False,
+ help="Enable reporting character and word level differences"
+)
@click.option(
"--textequiv-level",
default="region",
@@ -159,7 +199,8 @@ def process(gt, ocr, report_prefix, *, metrics=True, textequiv_level="region"):
metavar="LEVEL",
)
@click.option("--progress", default=False, is_flag=True, help="Show progress bar")
-def main(gt, ocr, report_prefix, metrics, textequiv_level, progress):
+def main(gt, ocr, report_prefix, reports_folder, metrics, differences, textequiv_level,
+ progress):
"""
Compare the PAGE/ALTO/text document GT against the document OCR.
@@ -171,7 +212,8 @@ def main(gt, ocr, report_prefix, metrics, textequiv_level, progress):
that case, use --no-metrics to disable the then meaningless metrics and also
change the color scheme from green/red to blue.
- The comparison report will be written to $REPORT_PREFIX.{html,json}, where
+ The comparison report will be written to $REPORTS_FOLDER/$REPORT_PREFIX.{html,json},
+ where $REPORTS_FOLDER defaults to the current working directory and
$REPORT_PREFIX defaults to "report". The reports include the character error
rate (CER) and the word error rate (WER).
@@ -180,7 +222,17 @@ def main(gt, ocr, report_prefix, metrics, textequiv_level, progress):
"""
initLogging()
Config.progress = progress
- process(gt, ocr, report_prefix, metrics=metrics, textequiv_level=textequiv_level)
+ if os.path.isdir(gt):
+ if not os.path.isdir(ocr):
+ raise click.BadParameter(
+ "OCR must be a directory if GT is a directory", param_hint="ocr"
+ )
+ else:
+ process_dir(gt, ocr, report_prefix, reports_folder, metrics,
+ differences, textequiv_level)
+ else:
+ process(gt, ocr, report_prefix, reports_folder, metrics=metrics,
+ differences=differences, textequiv_level=textequiv_level)
if __name__ == "__main__":
diff --git a/dinglehopper/cli_summarize.py b/dinglehopper/cli_summarize.py
new file mode 100644
index 0000000..1cf1c91
--- /dev/null
+++ b/dinglehopper/cli_summarize.py
@@ -0,0 +1,101 @@
+import json
+import os
+
+import click
+from ocrd_utils import initLogging
+from jinja2 import Environment, FileSystemLoader
+
+from dinglehopper.cli import json_float
+
+
+def process(reports_folder, occurrences_threshold=1):
+ cer_list = []
+ wer_list = []
+ cer_sum = 0
+ wer_sum = 0
+ diff_c = {}
+ diff_w = {}
+
+ for report in os.listdir(reports_folder):
+ if report.endswith(".json"):
+ with open(os.path.join(reports_folder, report), "r") as f:
+ report_data = json.load(f)
+
+ if "cer" not in report_data or "wer" not in report_data:
+ click.echo(
+ f"Skipping {report} because it does not contain CER and WER")
+ continue
+
+ cer = report_data["cer"]
+ wer = report_data["wer"]
+ cer_list.append(cer)
+ wer_list.append(wer)
+ cer_sum += cer
+ wer_sum += wer
+
+ for key, value in report_data["differences"]["character_level"].items():
+ diff_c[key] = diff_c.get(key, 0) + value
+ for key, value in report_data["differences"]["word_level"].items():
+ diff_w[key] = diff_w.get(key, 0) + value
+
+ if len(cer_list) == 0:
+ click.echo(f"No reports found in folder '{os.path.abspath(reports_folder)}'")
+ return
+
+ cer_avg = cer_sum / len(cer_list)
+ wer_avg = wer_sum / len(wer_list)
+
+ print(f"Number of reports: {len(cer_list)}")
+ print(f"Average CER: {cer_avg}")
+ print(f"Average WER: {wer_avg}")
+ print(f"Sum of common mistakes: {cer_sum}")
+ print(f"Sum of common mistakes: {wer_sum}")
+
+ env = Environment(
+ loader=FileSystemLoader(
+ os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates")
+ )
+ )
+ env.filters["json_float"] = json_float
+ for report_suffix in (".html", ".json"):
+ template_fn = "summary" + report_suffix + ".j2"
+
+ out_fn = os.path.join(reports_folder, 'summary' + report_suffix)
+ template = env.get_template(template_fn)
+ template.stream(
+ num_reports=len(cer_list),
+ cer_avg=cer_avg,
+ wer_avg=wer_avg,
+ diff_c=diff_c,
+ diff_w=diff_w,
+ occurrences_threshold=occurrences_threshold,
+ ).dump(out_fn)
+
+
+@click.command()
+@click.argument("reports_folder",
+ type=click.Path(exists=True),
+ default="./reports"
+ )
+@click.option("--occurrences-threshold",
+ type=int,
+ default=1,
+ help="Only show differences that occur at least this many times.")
+def main(reports_folder, occurrences_threshold):
+ """
+ Summarize the results from multiple reports generated earlier by dinglehopper.
+ It calculates the average CER and WER, as well as a sum of common mistakes.
+ Reports include lists of mistakes and their occurrences.
+
+ You may use a threshold to reduce the file size of the HTML report by only showing
+ mistakes whose number of occurrences is above the threshold. The JSON report will
+ always contain all mistakes.
+
+ All JSON files in the provided folder will be gathered and summarized.
+ """
+ initLogging()
+ process(reports_folder, occurrences_threshold)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/dinglehopper/templates/report.html.j2 b/dinglehopper/templates/report.html.j2
index 0c2f464..435b98a 100644
--- a/dinglehopper/templates/report.html.j2
+++ b/dinglehopper/templates/report.html.j2
@@ -26,6 +26,22 @@
border: 2px solid;
border-radius: 5px;
}
+
+ .row {
+ margin-bottom: 20px;
+ }
+
+ table {
+ width: 100%;
+ }
+
+ th {
+ cursor: pointer;
+ }
+
+ th:hover {
+ background-color: #eee;
+ }
@@ -50,6 +66,32 @@
Word differences
{{ word_diff_report }}
+{%- if differences %}
+{% set sections = [{'title': 'Found differences (character)', 'data': diff_c}, {'title': 'Found differences (word)', 'data': diff_w}] %}
+
+
+{% for section in sections %}
+
+
{{ section['title'] }}
+
+
+
+ GT
+ OCR
+ Occurrences
+
+ {% for gt_ocr, occurrences in section['data'].items() %}
+
+ {{ gt_ocr.split("::")[0] }}
+ {{ gt_ocr.split("::")[1] }}
+ {{ occurrences }}
+
+ {% endfor %}
+
+
+{% endfor %}
+
+{%- endif %}
diff --git a/dinglehopper/templates/report.html.js b/dinglehopper/templates/report.html.js
index 4c2ba28..f47cee7 100644
--- a/dinglehopper/templates/report.html.js
+++ b/dinglehopper/templates/report.html.js
@@ -12,4 +12,28 @@ $(document).ready(function() {
$('.diff').mouseout(function() {
find_diff_class($(this).attr('class')).removeClass('diff-highlight');
});
+
+ /* Sort this column of the table */
+ $('th').click(function () {
+ var table = $(this).closest('table');
+ var rows = table.find('tbody > tr').toArray().sort(compareRows($(this).index()));
+ this.asc = !this.asc;
+ if (!this.asc) {
+ rows = rows.reverse();
+ }
+ for (var i = 0; i < rows.length; i++) {
+ table.children('tbody').append(rows[i]);
+ }
+ });
+
+ function compareRows(index) {
+ return function (row1, row2) {
+ var cell1 = $(row1).children('td').eq(index).text().toLowerCase();
+ var cell2 = $(row2).children('td').eq(index).text().toLowerCase();
+ return cell1.localeCompare(cell2, undefined, {
+ numeric: true,
+ sensitivity: 'base'
+ });
+ }
+ }
});
diff --git a/dinglehopper/templates/report.json.j2 b/dinglehopper/templates/report.json.j2
index 0e8af03..64dd8d4 100644
--- a/dinglehopper/templates/report.json.j2
+++ b/dinglehopper/templates/report.json.j2
@@ -4,6 +4,12 @@
{% if metrics %}
"cer": {{ cer|json_float }},
"wer": {{ wer|json_float }},
+{% endif %}
+{% if differences %}
+ "differences": {
+ "character_level": {{ diff_c|tojson }},
+ "word_level": {{ diff_w|tojson }}
+ },
{% endif %}
"n_characters": {{ n_characters }},
"n_words": {{ n_words }}
diff --git a/dinglehopper/templates/summary.html.j2 b/dinglehopper/templates/summary.html.j2
new file mode 100644
index 0000000..e61e808
--- /dev/null
+++ b/dinglehopper/templates/summary.html.j2
@@ -0,0 +1,136 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Summary of all reports
+
+
+
+
Number of reports: {{ num_reports }}
+
+
+{% if cer_avg and wer_avg -%}
+
+
Metrics
+
+
+
+
Average CER: {{ cer_avg|round(4) }}
+
Average WER: {{ wer_avg|round(4) }}
+
+{% endif %}
+
+{%- if diff_c and diff_w %}
+{%- set sections = [{'title': 'Found differences (character)', 'data': diff_c}, {'title': 'Found differences (word)', 'data': diff_w}] %}
+
+
+{%- for section in sections %}
+
+
{{ section['title'] }}
+
+
+ GT OCR Occurrences
+
+ {%- set num_omitted = namespace(value=0) -%}
+ {% for gt_ocr, occurrences in section['data'].items() -%}
+ {% if occurrences < occurrences_threshold -%}
+ {%- set num_omitted.value = num_omitted.value + 1 %}
+ {%- else -%}
+ {%- set gt = gt_ocr.split(" :: ")[0] %}
+ {%- set ocr = gt_ocr.split(" :: ")[1] %}
+
+ {{ gt }} {# display the unicode character #}
+ {{ ocr }}
+ {{ occurrences }}
+
+ {%- endif %}
+ {%- endfor %}
+
+ {% if num_omitted.value > 0 and occurrences_threshold > 1 -%}
+ Skipped {{ num_omitted.value }} diffs with fewer than {{ occurrences_threshold }} occurrences. The complete list of diffs is available in the accompanying JSON file.
+ {%- set num_omitted.value = 0 %}
+ {%- endif %}
+
+
+{%- endfor %}
+
+{%- endif %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dinglehopper/templates/summary.json.j2 b/dinglehopper/templates/summary.json.j2
new file mode 100644
index 0000000..bb45f4e
--- /dev/null
+++ b/dinglehopper/templates/summary.json.j2
@@ -0,0 +1,15 @@
+{
+"num_reports": {{ num_reports}}
+{%- if cer_avg and wer_avg %}
+ ,
+ "cer_avg": {{ cer_avg|json_float }},
+ "wer_avg": {{ wer_avg|json_float }}
+{%- endif %}
+{%- if diff_c and wer_avg %}
+ ,
+ "differences": {
+ "character_level": {{ diff_c|tojson }},
+ "word_level": {{ diff_w|tojson }}
+ }
+{%- endif %}
+}
diff --git a/dinglehopper/tests/data/directory-test/gt/1.xml b/dinglehopper/tests/data/directory-test/gt/1.xml
new file mode 100644
index 0000000..c0dc183
--- /dev/null
+++ b/dinglehopper/tests/data/directory-test/gt/1.xml
@@ -0,0 +1,3394 @@
+
+
+
+ doculibtopagexml
+ 2019-01-08T10:25:36
+ 2019-04-26T07:11:05
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+ ber
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ v
+
+
+
+ i
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+ vielen
+
+
+
+
+
+
+ S
+
+
+
+ o
+
+
+
+ r
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ Sorgen
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ wegen
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+ e
+
+
+
+ n
+
+ deelben
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ a
+
+
+
+ ß
+
+ vergaß
+
+
+ ber die vielen Sorgen wegen deelben vergaß
+
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+ ihr
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ n
+
+
+
+ o
+
+
+
+
+
+ no
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ m
+
+
+
+ .
+
+ aem.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ ihr do no an aem. —
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+
+
+ ,
+
+ Hartkopf,
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ der
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ⸗
+
+ ver⸗
+
+
+ Hartkopf, der Frau Amtmnnin das ver⸗
+
+
+
+
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ ſproene
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ i
+
+
+
+ e
+
+
+
+ f
+
+
+
+ e
+
+
+
+ r
+
+
+
+ n
+
+
+
+ .
+
+ berliefern.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ i
+
+
+
+ n
+
+ Ein
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+
+
+ p
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+ r
+
+
+
+ e
+
+ Erpreer
+
+
+ ſproene zu berliefern. — Ein Erpreer
+
+
+
+
+
+
+
+ w
+
+
+
+ d
+
+
+
+ e
+
+
+
+ u
+
+
+
+ r
+
+ wurde
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ a
+
+
+
+ b
+
+
+
+ g
+
+
+
+ e
+
+
+
+ ſ
+
+
+
+
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ abgeſit,
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+ um
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+
+
+ s
+
+ ums
+
+
+
+
+
+
+ H
+
+
+
+ i
+
+
+
+ m
+
+
+
+ ⸗
+
+ Him⸗
+
+
+ wurde an ihn abgeſit, um ihn ums Him⸗
+
+
+
+
+
+
+
+ m
+
+
+
+ e
+
+
+
+ l
+
+
+
+ s
+
+
+
+ w
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+ melswien
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ ſagen,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ V
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ Verſproene
+
+
+ melswien zu ſagen, daß er das Verſproene
+
+
+
+
+
+
+
+ g
+
+
+
+ l
+
+
+
+ e
+
+
+
+ i
+
+
+
+
+
+ glei
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ i
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ berbringen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ ,
+
+ mte,
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+ glei den Augenbli berbringen mte, die
+
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ h
+
+
+
+
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+ htte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ f
+
+ auf
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ verlaen,
+
+
+ Frau Amtmnnin htte auf ihn verlaen,
+
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ n
+
+
+
+ u
+
+
+
+ n
+
+ nun
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ wßte
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ n
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ nit,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ f
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ anfangen
+
+
+ und nun wßte e nit, was e anfangen
+
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ ſote.
+
+
+
+
+
+
+ D
+
+
+
+ e
+
+
+
+ n
+
+ Den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ ſote
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ kommen,
+
+
+ ſote. Den Augenbli ſote er kommen,
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ i
+
+
+
+ n
+
+ in
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+
+
+ e
+
+
+
+ r
+
+ ihrer
+
+
+
+
+
+
+ A
+
+
+
+ n
+
+
+
+ g
+
+
+
+
+
+
+
+ .
+
+ Ang.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ D
+
+
+
+ i
+
+
+
+ e
+
+ Die
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+ n
+
+
+
+
+
+ ſon
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ vergieng
+
+
+ ſon vergieng e in ihrer Ang. — Die
+
+
+
+
+
+
+
+ G
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ Ge
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ n
+
+ wren
+
+
+
+
+
+
+ ſ
+
+
+
+
+
+
+
+ o
+
+
+
+ n
+
+ ſon
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ angekommen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ f
+
+
+
+ e
+
+
+
+ h
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+ fehlte
+
+
+ Ge wren ſon angekommen, und es fehlte
+
+ ber die vielen Sorgen wegen deelben vergaß
+Hartkopf, der Frau Amtmnnin das ver⸗
+ſproene zu berliefern. — Ein Erpreer
+wurde an ihn abgeſit, um ihn ums Him⸗
+melswien zu ſagen, daß er das Verſproene
+glei den Augenbli berbringen mte, die
+Frau Amtmnnin htte auf ihn verlaen,
+und nun wßte e nit, was e anfangen
+ſote. Den Augenbli ſote er kommen,
+ſon vergieng e in ihrer Ang. — Die
+Ge wren ſon angekommen, und es fehlte
+ihr do no an aem. —
+
+
+
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ f
+
+
+
+ p
+
+ Hartkopf
+
+
+
+
+
+
+ m
+
+
+
+ u
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ mußte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ bennen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+ Hartkopf mußte er bennen, und
+
+
+
+
+
+
+
+ m
+
+
+
+ i
+
+
+
+ t
+
+ mit
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ a
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ berbrate
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ .
+
+ es.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ mit und berbrate es. —
+
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ m
+
+ langem
+
+
+
+
+
+
+ N
+
+
+
+ a
+
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+
+
+ k
+
+
+
+ e
+
+
+
+ n
+
+ Nadenken
+
+
+
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+ fiel
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ m
+
+ ihm
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ d
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ endli
+
+
+
+
+
+
+ n
+
+
+
+ a
+
+
+
+
+
+ na
+
+
+ endli na langem Nadenken fiel es ihm er
+
+
+
+
+
+
+
+ w
+
+
+
+ i
+
+
+
+ e
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ wieder
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ .
+
+ ein.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+ Er
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ langte
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ Z
+
+
+
+ e
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+
+
+ l
+
+ Zettel
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+ aus
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ m
+
+ dem
+
+
+ wieder ein. — Er langte den Zettel aus dem
+
+
+
+
+
+
+
+ A
+
+
+
+ c
+
+
+
+ c
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ b
+
+
+
+ u
+
+ Accisbue
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+
+
+ ,
+
+ heraus,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ ſagte
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ e
+
+
+
+ r
+
+ ſeiner
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ ,
+
+ Frau,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+ Accisbue heraus, und ſagte ſeiner Frau, daß
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+
+
+ ,
+
+ das,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+ da
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ ,
+
+ wre,
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ e
+
+
+
+ y
+
+
+
+ ſ
+
+
+
+
+
+
+
+ a
+
+
+
+ ff
+
+
+
+ e
+
+
+
+ n
+
+ herbeyſaffen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ mte.
+
+
+ e das, was da wre, herbeyſaffen mte.
+
+
+
+
+
+
+
+ J
+
+
+
+ n
+
+
+
+ d
+
+
+
+ e
+
+
+
+ ß
+
+ Jndeß
+
+
+
+
+
+
+ m
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+
+
+ n
+
+ mangelten
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ i
+
+
+
+ g
+
+
+
+ e
+
+ einige
+
+
+
+
+
+
+ G
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+
+
+ l
+
+
+
+ i
+
+
+
+ a
+
+
+
+ ,
+
+
+
+ r
+
+
+
+ a
+
+ Generalia,
+
+
+ Jndeß mangelten do einige Generalia, die
+
+
+
+
+
+
+
+ a
+
+
+
+ l
+
+
+
+ ſ
+
+
+
+ o
+
+ alſo
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+
+
+ .
+
+ wegfielen.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+ Hartkopf
+
+
+
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ gieng
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+
+
+ ſelb
+
+
+ alſo wegfielen. — Hartkopf gieng ſelb
+
+ Hartkopf mußte er bennen, und
+endli na langem Nadenken fiel es ihm er
+wieder ein. — Er langte den Zettel aus dem
+Accisbue heraus, und ſagte ſeiner Frau, daß
+e das, was da wre, herbeyſaffen mte.
+Jndeß mangelten do einige Generalia, die
+alſo wegfielen. — Hartkopf gieng ſelb
+mit und berbrate es. —
+
+
+
+
diff --git a/dinglehopper/tests/data/directory-test/gt/2.xml b/dinglehopper/tests/data/directory-test/gt/2.xml
new file mode 100644
index 0000000..c0dc183
--- /dev/null
+++ b/dinglehopper/tests/data/directory-test/gt/2.xml
@@ -0,0 +1,3394 @@
+
+
+
+ doculibtopagexml
+ 2019-01-08T10:25:36
+ 2019-04-26T07:11:05
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+ ber
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ v
+
+
+
+ i
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+ vielen
+
+
+
+
+
+
+ S
+
+
+
+ o
+
+
+
+ r
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ Sorgen
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ wegen
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+ e
+
+
+
+ n
+
+ deelben
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ a
+
+
+
+ ß
+
+ vergaß
+
+
+ ber die vielen Sorgen wegen deelben vergaß
+
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+ ihr
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ n
+
+
+
+ o
+
+
+
+
+
+ no
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ m
+
+
+
+ .
+
+ aem.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ ihr do no an aem. —
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+
+
+ ,
+
+ Hartkopf,
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ der
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ⸗
+
+ ver⸗
+
+
+ Hartkopf, der Frau Amtmnnin das ver⸗
+
+
+
+
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ ſproene
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ i
+
+
+
+ e
+
+
+
+ f
+
+
+
+ e
+
+
+
+ r
+
+
+
+ n
+
+
+
+ .
+
+ berliefern.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ i
+
+
+
+ n
+
+ Ein
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+
+
+ p
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+ r
+
+
+
+ e
+
+ Erpreer
+
+
+ ſproene zu berliefern. — Ein Erpreer
+
+
+
+
+
+
+
+ w
+
+
+
+ d
+
+
+
+ e
+
+
+
+ u
+
+
+
+ r
+
+ wurde
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ a
+
+
+
+ b
+
+
+
+ g
+
+
+
+ e
+
+
+
+ ſ
+
+
+
+
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ abgeſit,
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+ um
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+
+
+ s
+
+ ums
+
+
+
+
+
+
+ H
+
+
+
+ i
+
+
+
+ m
+
+
+
+ ⸗
+
+ Him⸗
+
+
+ wurde an ihn abgeſit, um ihn ums Him⸗
+
+
+
+
+
+
+
+ m
+
+
+
+ e
+
+
+
+ l
+
+
+
+ s
+
+
+
+ w
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+ melswien
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ ſagen,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ V
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ Verſproene
+
+
+ melswien zu ſagen, daß er das Verſproene
+
+
+
+
+
+
+
+ g
+
+
+
+ l
+
+
+
+ e
+
+
+
+ i
+
+
+
+
+
+ glei
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ i
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ berbringen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ ,
+
+ mte,
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+ glei den Augenbli berbringen mte, die
+
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ h
+
+
+
+
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+ htte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ f
+
+ auf
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ verlaen,
+
+
+ Frau Amtmnnin htte auf ihn verlaen,
+
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ n
+
+
+
+ u
+
+
+
+ n
+
+ nun
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ wßte
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ n
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ nit,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ f
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ anfangen
+
+
+ und nun wßte e nit, was e anfangen
+
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ ſote.
+
+
+
+
+
+
+ D
+
+
+
+ e
+
+
+
+ n
+
+ Den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ ſote
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ kommen,
+
+
+ ſote. Den Augenbli ſote er kommen,
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ i
+
+
+
+ n
+
+ in
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+
+
+ e
+
+
+
+ r
+
+ ihrer
+
+
+
+
+
+
+ A
+
+
+
+ n
+
+
+
+ g
+
+
+
+
+
+
+
+ .
+
+ Ang.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ D
+
+
+
+ i
+
+
+
+ e
+
+ Die
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+ n
+
+
+
+
+
+ ſon
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ vergieng
+
+
+ ſon vergieng e in ihrer Ang. — Die
+
+
+
+
+
+
+
+ G
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ Ge
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ n
+
+ wren
+
+
+
+
+
+
+ ſ
+
+
+
+
+
+
+
+ o
+
+
+
+ n
+
+ ſon
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ angekommen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ f
+
+
+
+ e
+
+
+
+ h
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+ fehlte
+
+
+ Ge wren ſon angekommen, und es fehlte
+
+ ber die vielen Sorgen wegen deelben vergaß
+Hartkopf, der Frau Amtmnnin das ver⸗
+ſproene zu berliefern. — Ein Erpreer
+wurde an ihn abgeſit, um ihn ums Him⸗
+melswien zu ſagen, daß er das Verſproene
+glei den Augenbli berbringen mte, die
+Frau Amtmnnin htte auf ihn verlaen,
+und nun wßte e nit, was e anfangen
+ſote. Den Augenbli ſote er kommen,
+ſon vergieng e in ihrer Ang. — Die
+Ge wren ſon angekommen, und es fehlte
+ihr do no an aem. —
+
+
+
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ f
+
+
+
+ p
+
+ Hartkopf
+
+
+
+
+
+
+ m
+
+
+
+ u
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ mußte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ bennen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+ Hartkopf mußte er bennen, und
+
+
+
+
+
+
+
+ m
+
+
+
+ i
+
+
+
+ t
+
+ mit
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ a
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ berbrate
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ .
+
+ es.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ mit und berbrate es. —
+
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ m
+
+ langem
+
+
+
+
+
+
+ N
+
+
+
+ a
+
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+
+
+ k
+
+
+
+ e
+
+
+
+ n
+
+ Nadenken
+
+
+
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+ fiel
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ m
+
+ ihm
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ d
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ endli
+
+
+
+
+
+
+ n
+
+
+
+ a
+
+
+
+
+
+ na
+
+
+ endli na langem Nadenken fiel es ihm er
+
+
+
+
+
+
+
+ w
+
+
+
+ i
+
+
+
+ e
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ wieder
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ .
+
+ ein.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+ Er
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ langte
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ Z
+
+
+
+ e
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+
+
+ l
+
+ Zettel
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+ aus
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ m
+
+ dem
+
+
+ wieder ein. — Er langte den Zettel aus dem
+
+
+
+
+
+
+
+ A
+
+
+
+ c
+
+
+
+ c
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ b
+
+
+
+ u
+
+ Accisbue
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+
+
+ ,
+
+ heraus,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ ſagte
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ e
+
+
+
+ r
+
+ ſeiner
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ ,
+
+ Frau,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+ Accisbue heraus, und ſagte ſeiner Frau, daß
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+
+
+ ,
+
+ das,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+ da
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ ,
+
+ wre,
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ e
+
+
+
+ y
+
+
+
+ ſ
+
+
+
+
+
+
+
+ a
+
+
+
+ ff
+
+
+
+ e
+
+
+
+ n
+
+ herbeyſaffen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ mte.
+
+
+ e das, was da wre, herbeyſaffen mte.
+
+
+
+
+
+
+
+ J
+
+
+
+ n
+
+
+
+ d
+
+
+
+ e
+
+
+
+ ß
+
+ Jndeß
+
+
+
+
+
+
+ m
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+
+
+ n
+
+ mangelten
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ i
+
+
+
+ g
+
+
+
+ e
+
+ einige
+
+
+
+
+
+
+ G
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+
+
+ l
+
+
+
+ i
+
+
+
+ a
+
+
+
+ ,
+
+
+
+ r
+
+
+
+ a
+
+ Generalia,
+
+
+ Jndeß mangelten do einige Generalia, die
+
+
+
+
+
+
+
+ a
+
+
+
+ l
+
+
+
+ ſ
+
+
+
+ o
+
+ alſo
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+
+
+ .
+
+ wegfielen.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+ Hartkopf
+
+
+
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ gieng
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+
+
+ ſelb
+
+
+ alſo wegfielen. — Hartkopf gieng ſelb
+
+ Hartkopf mußte er bennen, und
+endli na langem Nadenken fiel es ihm er
+wieder ein. — Er langte den Zettel aus dem
+Accisbue heraus, und ſagte ſeiner Frau, daß
+e das, was da wre, herbeyſaffen mte.
+Jndeß mangelten do einige Generalia, die
+alſo wegfielen. — Hartkopf gieng ſelb
+mit und berbrate es. —
+
+
+
+
diff --git a/dinglehopper/tests/data/directory-test/ocr/1.xml b/dinglehopper/tests/data/directory-test/ocr/1.xml
new file mode 100644
index 0000000..0e62647
--- /dev/null
+++ b/dinglehopper/tests/data/directory-test/ocr/1.xml
@@ -0,0 +1,3394 @@
+
+
+
+ doculibtopagexml
+ 2019-01-08T10:25:36
+ 2019-04-26T07:11:05
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+ ber
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ v
+
+
+
+ i
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+ vielen
+
+
+
+
+
+
+ S
+
+
+
+ o
+
+
+
+ r
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ Sorgen
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ wegen
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+ e
+
+
+
+ n
+
+ deelben
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ a
+
+
+
+ ß
+
+ vergaß
+
+
+ ber die vielen Sorgen wegen deelben vergaß
+
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+ ihr
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ n
+
+
+
+ o
+
+
+
+
+
+ no
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ m
+
+
+
+ .
+
+ aem.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ ihr do no an aem. —
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+
+
+ ,
+
+ Hartkopf,
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ der
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ⸗
+
+ ver⸗
+
+
+ Hartkopf, der Frau Amtmnnin das ver⸗
+
+
+
+
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ ſproene
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ i
+
+
+
+ e
+
+
+
+ f
+
+
+
+ e
+
+
+
+ r
+
+
+
+ n
+
+
+
+ .
+
+ berliefern.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ i
+
+
+
+ n
+
+ Ein
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+
+
+ p
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+ r
+
+
+
+ e
+
+ Erpreer
+
+
+ ſproene zu berliefern. — Ein Erpreer
+
+
+
+
+
+
+
+ w
+
+
+
+ d
+
+
+
+ e
+
+
+
+ u
+
+
+
+ r
+
+ wurde
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ a
+
+
+
+ b
+
+
+
+ g
+
+
+
+ e
+
+
+
+ ſ
+
+
+
+
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ abgeſit,
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+ um
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+
+
+ s
+
+ ums
+
+
+
+
+
+
+ H
+
+
+
+ i
+
+
+
+ m
+
+
+
+ ⸗
+
+ Him⸗
+
+
+ wurde an ihn abgeſit, um ihn ums Him⸗
+
+
+
+
+
+
+
+ m
+
+
+
+ e
+
+
+
+ l
+
+
+
+ s
+
+
+
+ w
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+ melswien
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ ſagen,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ V
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ Verſproene
+
+
+ melswien zu ſagen, daß er das Verſproene
+
+
+
+
+
+
+
+ g
+
+
+
+ l
+
+
+
+ e
+
+
+
+ i
+
+
+
+
+
+ glei
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ i
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ berbringen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ ,
+
+ mte,
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+ glei den Augenbli berbringen mte, die
+
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ h
+
+
+
+
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+ htte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ f
+
+ auf
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ verlaen,
+
+
+ Frau Amtmnnin htte auf ihn verlaen,
+
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ n
+
+
+
+ u
+
+
+
+ n
+
+ nun
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ wßte
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ n
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ nit,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ f
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ anfangen
+
+
+ und nun wßte e nit, was e anfangen
+
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ ſote.
+
+
+
+
+
+
+ D
+
+
+
+ e
+
+
+
+ n
+
+ Den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ ſote
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ kommen,
+
+
+ ſote. Den Augenbli ſote er kommen,
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ i
+
+
+
+ n
+
+ in
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+
+
+ e
+
+
+
+ r
+
+ ihrer
+
+
+
+
+
+
+ A
+
+
+
+ n
+
+
+
+ g
+
+
+
+
+
+
+
+ .
+
+ Ang.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ D
+
+
+
+ i
+
+
+
+ e
+
+ Die
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+ n
+
+
+
+
+
+ ſon
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ vergieng
+
+
+ ſon vergieng e in ihrer Ang. — Die
+
+
+
+
+
+
+
+ G
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ Ge
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ n
+
+ wren
+
+
+
+
+
+
+ ſ
+
+
+
+
+
+
+
+ o
+
+
+
+ n
+
+ ſon
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ angekommen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ f
+
+
+
+ e
+
+
+
+ h
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+ fehlte
+
+
+ Ge wren ſon angekommen, und es fehlte
+
+ ber die vielen Sorgen wegen deelben vergaß
+Hartkopf, der Frau Amtmnnin das ver⸗
+ſproene zu berliefern. — Ein Erpreer
+wurde an ihn abgeſit, um ihn ums Him⸗
+melswien zu ſagen, daß er das Verfproene
+glei den Augenbli berbringen mte, die
+Frau Amtmnnin htte auf ihn verlaen,
+und nun wßte e nit, was e anfangen
+ſote. Den Augembli ſote er kommen,
+ſon vergieng e in ihrer Ang. — Die
+Ge wren ſon angekommen, und es fehlte
+ihr do no an aem. —
+
+
+
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ f
+
+
+
+ p
+
+ Hartkopf
+
+
+
+
+
+
+ m
+
+
+
+ u
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ mußte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ bennen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+ Hartkopf mußte er bennen, und
+
+
+
+
+
+
+
+ m
+
+
+
+ i
+
+
+
+ t
+
+ mit
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ a
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ berbrate
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ .
+
+ es.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ mit und berbrate es. —
+
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ m
+
+ langem
+
+
+
+
+
+
+ N
+
+
+
+ a
+
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+
+
+ k
+
+
+
+ e
+
+
+
+ n
+
+ Nadenken
+
+
+
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+ fiel
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ m
+
+ ihm
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ d
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ endli
+
+
+
+
+
+
+ n
+
+
+
+ a
+
+
+
+
+
+ na
+
+
+ endli na langem Nadenken fiel es ihm er
+
+
+
+
+
+
+
+ w
+
+
+
+ i
+
+
+
+ e
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ wieder
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ .
+
+ ein.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+ Er
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ langte
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ Z
+
+
+
+ e
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+
+
+ l
+
+ Zettel
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+ aus
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ m
+
+ dem
+
+
+ wieder ein. — Er langte den Zettel aus dem
+
+
+
+
+
+
+
+ A
+
+
+
+ c
+
+
+
+ c
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ b
+
+
+
+ u
+
+ Accisbue
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+
+
+ ,
+
+ heraus,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ ſagte
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ e
+
+
+
+ r
+
+ ſeiner
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ ,
+
+ Frau,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+ Accisbue heraus, und ſagte ſeiner Frau, daß
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+
+
+ ,
+
+ das,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+ da
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ ,
+
+ wre,
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ e
+
+
+
+ y
+
+
+
+ ſ
+
+
+
+
+
+
+
+ a
+
+
+
+ ff
+
+
+
+ e
+
+
+
+ n
+
+ herbeyſaffen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ mte.
+
+
+ e das, was da wre, herbeyſaffen mte.
+
+
+
+
+
+
+
+ J
+
+
+
+ n
+
+
+
+ d
+
+
+
+ e
+
+
+
+ ß
+
+ Jndeß
+
+
+
+
+
+
+ m
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+
+
+ n
+
+ mangelten
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ i
+
+
+
+ g
+
+
+
+ e
+
+ einige
+
+
+
+
+
+
+ G
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+
+
+ l
+
+
+
+ i
+
+
+
+ a
+
+
+
+ ,
+
+
+
+ r
+
+
+
+ a
+
+ Generalia,
+
+
+ Jndeß mangelten do einige Generalia, die
+
+
+
+
+
+
+
+ a
+
+
+
+ l
+
+
+
+ ſ
+
+
+
+ o
+
+ alſo
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+
+
+ .
+
+ wegfielen.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+ Hartkopf
+
+
+
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ gieng
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+
+
+ ſelb
+
+
+ alſo wegfielen. — Hartkopf gieng ſelb
+
+ Hartkopf mußte er bennen, und
+endli na langem Nadenken fiel es ihm er
+wieder ein. — Er langte den Zettel aus dem
+Accisbue heraus, und ſagte ſeiner Frau, daß
+e das, was da wre, herbeyſaffen mte.
+Jndeß mangelten do einige Generalia, die
+alſo wegfielen. — Hartkopf gieng ſelb
+mit und berbrate es. —
+
+
+
+
diff --git a/dinglehopper/tests/data/directory-test/ocr/2.xml b/dinglehopper/tests/data/directory-test/ocr/2.xml
new file mode 100644
index 0000000..0e62647
--- /dev/null
+++ b/dinglehopper/tests/data/directory-test/ocr/2.xml
@@ -0,0 +1,3394 @@
+
+
+
+ doculibtopagexml
+ 2019-01-08T10:25:36
+ 2019-04-26T07:11:05
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+ ber
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ v
+
+
+
+ i
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+ vielen
+
+
+
+
+
+
+ S
+
+
+
+ o
+
+
+
+ r
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ Sorgen
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ wegen
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+ e
+
+
+
+ n
+
+ deelben
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ a
+
+
+
+ ß
+
+ vergaß
+
+
+ ber die vielen Sorgen wegen deelben vergaß
+
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+ ihr
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ n
+
+
+
+ o
+
+
+
+
+
+ no
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ m
+
+
+
+ .
+
+ aem.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ ihr do no an aem. —
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+
+
+ ,
+
+ Hartkopf,
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ der
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ⸗
+
+ ver⸗
+
+
+ Hartkopf, der Frau Amtmnnin das ver⸗
+
+
+
+
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ ſproene
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ i
+
+
+
+ e
+
+
+
+ f
+
+
+
+ e
+
+
+
+ r
+
+
+
+ n
+
+
+
+ .
+
+ berliefern.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ i
+
+
+
+ n
+
+ Ein
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+
+
+ p
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+ r
+
+
+
+ e
+
+ Erpreer
+
+
+ ſproene zu berliefern. — Ein Erpreer
+
+
+
+
+
+
+
+ w
+
+
+
+ d
+
+
+
+ e
+
+
+
+ u
+
+
+
+ r
+
+ wurde
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ a
+
+
+
+ b
+
+
+
+ g
+
+
+
+ e
+
+
+
+ ſ
+
+
+
+
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ abgeſit,
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+ um
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+
+
+ s
+
+ ums
+
+
+
+
+
+
+ H
+
+
+
+ i
+
+
+
+ m
+
+
+
+ ⸗
+
+ Him⸗
+
+
+ wurde an ihn abgeſit, um ihn ums Him⸗
+
+
+
+
+
+
+
+ m
+
+
+
+ e
+
+
+
+ l
+
+
+
+ s
+
+
+
+ w
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+ melswien
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ ſagen,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ V
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ Verſproene
+
+
+ melswien zu ſagen, daß er das Verſproene
+
+
+
+
+
+
+
+ g
+
+
+
+ l
+
+
+
+ e
+
+
+
+ i
+
+
+
+
+
+ glei
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ i
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ berbringen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ ,
+
+ mte,
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+ glei den Augenbli berbringen mte, die
+
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ h
+
+
+
+
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+ htte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ f
+
+ auf
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ verlaen,
+
+
+ Frau Amtmnnin htte auf ihn verlaen,
+
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ n
+
+
+
+ u
+
+
+
+ n
+
+ nun
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ wßte
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ n
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ nit,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ f
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ anfangen
+
+
+ und nun wßte e nit, was e anfangen
+
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ ſote.
+
+
+
+
+
+
+ D
+
+
+
+ e
+
+
+
+ n
+
+ Den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ ſote
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ kommen,
+
+
+ ſote. Den Augenbli ſote er kommen,
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ i
+
+
+
+ n
+
+ in
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+
+
+ e
+
+
+
+ r
+
+ ihrer
+
+
+
+
+
+
+ A
+
+
+
+ n
+
+
+
+ g
+
+
+
+
+
+
+
+ .
+
+ Ang.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ D
+
+
+
+ i
+
+
+
+ e
+
+ Die
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+ n
+
+
+
+
+
+ ſon
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ vergieng
+
+
+ ſon vergieng e in ihrer Ang. — Die
+
+
+
+
+
+
+
+ G
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ Ge
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ n
+
+ wren
+
+
+
+
+
+
+ ſ
+
+
+
+
+
+
+
+ o
+
+
+
+ n
+
+ ſon
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ angekommen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ f
+
+
+
+ e
+
+
+
+ h
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+ fehlte
+
+
+ Ge wren ſon angekommen, und es fehlte
+
+ ber die vielen Sorgen wegen deelben vergaß
+Hartkopf, der Frau Amtmnnin das ver⸗
+ſproene zu berliefern. — Ein Erpreer
+wurde an ihn abgeſit, um ihn ums Him⸗
+melswien zu ſagen, daß er das Verfproene
+glei den Augenbli berbringen mte, die
+Frau Amtmnnin htte auf ihn verlaen,
+und nun wßte e nit, was e anfangen
+ſote. Den Augembli ſote er kommen,
+ſon vergieng e in ihrer Ang. — Die
+Ge wren ſon angekommen, und es fehlte
+ihr do no an aem. —
+
+
+
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ f
+
+
+
+ p
+
+ Hartkopf
+
+
+
+
+
+
+ m
+
+
+
+ u
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ mußte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ bennen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+ Hartkopf mußte er bennen, und
+
+
+
+
+
+
+
+ m
+
+
+
+ i
+
+
+
+ t
+
+ mit
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ a
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ berbrate
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ .
+
+ es.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ mit und berbrate es. —
+
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ m
+
+ langem
+
+
+
+
+
+
+ N
+
+
+
+ a
+
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+
+
+ k
+
+
+
+ e
+
+
+
+ n
+
+ Nadenken
+
+
+
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+ fiel
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ m
+
+ ihm
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ d
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ endli
+
+
+
+
+
+
+ n
+
+
+
+ a
+
+
+
+
+
+ na
+
+
+ endli na langem Nadenken fiel es ihm er
+
+
+
+
+
+
+
+ w
+
+
+
+ i
+
+
+
+ e
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ wieder
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ .
+
+ ein.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+ Er
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ langte
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ Z
+
+
+
+ e
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+
+
+ l
+
+ Zettel
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+ aus
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ m
+
+ dem
+
+
+ wieder ein. — Er langte den Zettel aus dem
+
+
+
+
+
+
+
+ A
+
+
+
+ c
+
+
+
+ c
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ b
+
+
+
+ u
+
+ Accisbue
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+
+
+ ,
+
+ heraus,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ ſagte
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ e
+
+
+
+ r
+
+ ſeiner
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ ,
+
+ Frau,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+ Accisbue heraus, und ſagte ſeiner Frau, daß
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+
+
+ ,
+
+ das,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+ da
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ ,
+
+ wre,
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ e
+
+
+
+ y
+
+
+
+ ſ
+
+
+
+
+
+
+
+ a
+
+
+
+ ff
+
+
+
+ e
+
+
+
+ n
+
+ herbeyſaffen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ mte.
+
+
+ e das, was da wre, herbeyſaffen mte.
+
+
+
+
+
+
+
+ J
+
+
+
+ n
+
+
+
+ d
+
+
+
+ e
+
+
+
+ ß
+
+ Jndeß
+
+
+
+
+
+
+ m
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+
+
+ n
+
+ mangelten
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ i
+
+
+
+ g
+
+
+
+ e
+
+ einige
+
+
+
+
+
+
+ G
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+
+
+ l
+
+
+
+ i
+
+
+
+ a
+
+
+
+ ,
+
+
+
+ r
+
+
+
+ a
+
+ Generalia,
+
+
+ Jndeß mangelten do einige Generalia, die
+
+
+
+
+
+
+
+ a
+
+
+
+ l
+
+
+
+ ſ
+
+
+
+ o
+
+ alſo
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+
+
+ .
+
+ wegfielen.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+ Hartkopf
+
+
+
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ gieng
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+
+
+ ſelb
+
+
+ alſo wegfielen. — Hartkopf gieng ſelb
+
+ Hartkopf mußte er bennen, und
+endli na langem Nadenken fiel es ihm er
+wieder ein. — Er langte den Zettel aus dem
+Accisbue heraus, und ſagte ſeiner Frau, daß
+e das, was da wre, herbeyſaffen mte.
+Jndeß mangelten do einige Generalia, die
+alſo wegfielen. — Hartkopf gieng ſelb
+mit und berbrate es. —
+
+
+
+
diff --git a/dinglehopper/tests/data/directory-test/ocr/3-has-no-gt.xml b/dinglehopper/tests/data/directory-test/ocr/3-has-no-gt.xml
new file mode 100644
index 0000000..0e62647
--- /dev/null
+++ b/dinglehopper/tests/data/directory-test/ocr/3-has-no-gt.xml
@@ -0,0 +1,3394 @@
+
+
+
+ doculibtopagexml
+ 2019-01-08T10:25:36
+ 2019-04-26T07:11:05
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+ ber
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ v
+
+
+
+ i
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+ vielen
+
+
+
+
+
+
+ S
+
+
+
+ o
+
+
+
+ r
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ Sorgen
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ wegen
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+ e
+
+
+
+ n
+
+ deelben
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ a
+
+
+
+ ß
+
+ vergaß
+
+
+ ber die vielen Sorgen wegen deelben vergaß
+
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+ ihr
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ n
+
+
+
+ o
+
+
+
+
+
+ no
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ m
+
+
+
+ .
+
+ aem.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ ihr do no an aem. —
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+
+
+ ,
+
+ Hartkopf,
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ der
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ⸗
+
+ ver⸗
+
+
+ Hartkopf, der Frau Amtmnnin das ver⸗
+
+
+
+
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ ſproene
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ i
+
+
+
+ e
+
+
+
+ f
+
+
+
+ e
+
+
+
+ r
+
+
+
+ n
+
+
+
+ .
+
+ berliefern.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ i
+
+
+
+ n
+
+ Ein
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+
+
+ p
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+ r
+
+
+
+ e
+
+ Erpreer
+
+
+ ſproene zu berliefern. — Ein Erpreer
+
+
+
+
+
+
+
+ w
+
+
+
+ d
+
+
+
+ e
+
+
+
+ u
+
+
+
+ r
+
+ wurde
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+ an
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ a
+
+
+
+ b
+
+
+
+ g
+
+
+
+ e
+
+
+
+ ſ
+
+
+
+
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ abgeſit,
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+ um
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ u
+
+
+
+ m
+
+
+
+ s
+
+ ums
+
+
+
+
+
+
+ H
+
+
+
+ i
+
+
+
+ m
+
+
+
+ ⸗
+
+ Him⸗
+
+
+ wurde an ihn abgeſit, um ihn ums Him⸗
+
+
+
+
+
+
+
+ m
+
+
+
+ e
+
+
+
+ l
+
+
+
+ s
+
+
+
+ w
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+ melswien
+
+
+
+
+
+
+ z
+
+
+
+ u
+
+ zu
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ ſagen,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+ das
+
+
+
+
+
+
+ V
+
+
+
+ e
+
+
+
+ r
+
+
+
+ ſ
+
+
+
+ p
+
+
+
+ r
+
+
+
+ o
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+ Verſproene
+
+
+ melswien zu ſagen, daß er das Verſproene
+
+
+
+
+
+
+
+ g
+
+
+
+ l
+
+
+
+ e
+
+
+
+ i
+
+
+
+
+
+ glei
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ i
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ berbringen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ ,
+
+ mte,
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+ glei den Augenbli berbringen mte, die
+
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+ Frau
+
+
+
+
+
+
+ A
+
+
+
+ m
+
+
+
+ t
+
+
+
+ m
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ i
+
+
+
+ n
+
+ Amtmnnin
+
+
+
+
+
+
+ h
+
+
+
+
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+ htte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ f
+
+ auf
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ n
+
+ ihn
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ l
+
+
+
+ a
+
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ verlaen,
+
+
+ Frau Amtmnnin htte auf ihn verlaen,
+
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ n
+
+
+
+ u
+
+
+
+ n
+
+ nun
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ wßte
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ n
+
+
+
+ i
+
+
+
+
+
+
+
+ t
+
+
+
+ ,
+
+ nit,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ f
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+ anfangen
+
+
+ und nun wßte e nit, was e anfangen
+
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ ſote.
+
+
+
+
+
+
+ D
+
+
+
+ e
+
+
+
+ n
+
+ Den
+
+
+
+
+
+
+ A
+
+
+
+ u
+
+
+
+ g
+
+
+
+ e
+
+
+
+ n
+
+
+
+ b
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ Augenbli
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ ſote
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+ er
+
+
+
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ kommen,
+
+
+ ſote. Den Augenbli ſote er kommen,
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ i
+
+
+
+ n
+
+ in
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ r
+
+
+
+ e
+
+
+
+ r
+
+ ihrer
+
+
+
+
+
+
+ A
+
+
+
+ n
+
+
+
+ g
+
+
+
+
+
+
+
+ .
+
+ Ang.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ D
+
+
+
+ i
+
+
+
+ e
+
+ Die
+
+
+
+
+
+
+ ſ
+
+
+
+ o
+
+
+
+ n
+
+
+
+
+
+ ſon
+
+
+
+
+
+
+ v
+
+
+
+ e
+
+
+
+ r
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ vergieng
+
+
+ ſon vergieng e in ihrer Ang. — Die
+
+
+
+
+
+
+
+ G
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ Ge
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ n
+
+ wren
+
+
+
+
+
+
+ ſ
+
+
+
+
+
+
+
+ o
+
+
+
+ n
+
+ ſon
+
+
+
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ k
+
+
+
+ o
+
+
+
+ m
+
+
+
+ m
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ angekommen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ f
+
+
+
+ e
+
+
+
+ h
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+ fehlte
+
+
+ Ge wren ſon angekommen, und es fehlte
+
+ ber die vielen Sorgen wegen deelben vergaß
+Hartkopf, der Frau Amtmnnin das ver⸗
+ſproene zu berliefern. — Ein Erpreer
+wurde an ihn abgeſit, um ihn ums Him⸗
+melswien zu ſagen, daß er das Verfproene
+glei den Augenbli berbringen mte, die
+Frau Amtmnnin htte auf ihn verlaen,
+und nun wßte e nit, was e anfangen
+ſote. Den Augembli ſote er kommen,
+ſon vergieng e in ihrer Ang. — Die
+Ge wren ſon angekommen, und es fehlte
+ihr do no an aem. —
+
+
+
+
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ f
+
+
+
+ p
+
+ Hartkopf
+
+
+
+
+
+
+ m
+
+
+
+ u
+
+
+
+ ß
+
+
+
+ t
+
+
+
+ e
+
+ mußte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+
+
+
+
+ n
+
+
+
+ n
+
+
+
+ e
+
+
+
+ n
+
+
+
+ ,
+
+ bennen,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+ Hartkopf mußte er bennen, und
+
+
+
+
+
+
+
+ m
+
+
+
+ i
+
+
+
+ t
+
+ mit
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+
+
+
+
+ b
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ r
+
+
+
+ a
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+ berbrate
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ .
+
+ es.
+
+
+
+
+
+
+ —
+
+ —
+
+
+ mit und berbrate es. —
+
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ m
+
+ langem
+
+
+
+
+
+
+ N
+
+
+
+ a
+
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+
+
+ k
+
+
+
+ e
+
+
+
+ n
+
+ Nadenken
+
+
+
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+ fiel
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+ es
+
+
+
+
+
+
+ i
+
+
+
+ h
+
+
+
+ m
+
+ ihm
+
+
+
+
+
+
+ e
+
+
+
+ r
+
+
+
+
+
+ er
+
+
+
+
+
+
+ e
+
+
+
+ n
+
+
+
+ d
+
+
+
+ l
+
+
+
+ i
+
+
+
+
+
+ endli
+
+
+
+
+
+
+ n
+
+
+
+ a
+
+
+
+
+
+ na
+
+
+ endli na langem Nadenken fiel es ihm er
+
+
+
+
+
+
+
+ w
+
+
+
+ i
+
+
+
+ e
+
+
+
+ d
+
+
+
+ e
+
+
+
+ r
+
+ wieder
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ .
+
+ ein.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ E
+
+
+
+ r
+
+ Er
+
+
+
+
+
+
+ l
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ langte
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ n
+
+ den
+
+
+
+
+
+
+ Z
+
+
+
+ e
+
+
+
+ t
+
+
+
+ t
+
+
+
+ e
+
+
+
+ l
+
+ Zettel
+
+
+
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+ aus
+
+
+
+
+
+
+ d
+
+
+
+ e
+
+
+
+ m
+
+ dem
+
+
+ wieder ein. — Er langte den Zettel aus dem
+
+
+
+
+
+
+
+ A
+
+
+
+ c
+
+
+
+ c
+
+
+
+ i
+
+
+
+
+
+
+
+ e
+
+
+
+ s
+
+
+
+ b
+
+
+
+ u
+
+ Accisbue
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ s
+
+
+
+ ,
+
+ heraus,
+
+
+
+
+
+
+ u
+
+
+
+ n
+
+
+
+ d
+
+ und
+
+
+
+
+
+
+ ſ
+
+
+
+ a
+
+
+
+ g
+
+
+
+ t
+
+
+
+ e
+
+ ſagte
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ e
+
+
+
+ r
+
+ ſeiner
+
+
+
+
+
+
+ F
+
+
+
+ r
+
+
+
+ a
+
+
+
+ u
+
+
+
+ ,
+
+ Frau,
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ ß
+
+ daß
+
+
+ Accisbue heraus, und ſagte ſeiner Frau, daß
+
+
+
+
+
+
+
+
+
+
+
+ e
+
+ e
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+
+
+ s
+
+
+
+ ,
+
+ das,
+
+
+
+
+
+
+ w
+
+
+
+ a
+
+
+
+ s
+
+ was
+
+
+
+
+
+
+ d
+
+
+
+ a
+
+ da
+
+
+
+
+
+
+ w
+
+
+
+
+
+
+
+ r
+
+
+
+ e
+
+
+
+ ,
+
+ wre,
+
+
+
+
+
+
+ h
+
+
+
+ e
+
+
+
+ r
+
+
+
+ b
+
+
+
+ e
+
+
+
+ y
+
+
+
+ ſ
+
+
+
+
+
+
+
+ a
+
+
+
+ ff
+
+
+
+ e
+
+
+
+ n
+
+ herbeyſaffen
+
+
+
+
+
+
+ m
+
+
+
+
+
+
+
+
+
+
+
+ t
+
+
+
+ e
+
+
+
+ .
+
+ mte.
+
+
+ e das, was da wre, herbeyſaffen mte.
+
+
+
+
+
+
+
+ J
+
+
+
+ n
+
+
+
+ d
+
+
+
+ e
+
+
+
+ ß
+
+ Jndeß
+
+
+
+
+
+
+ m
+
+
+
+ a
+
+
+
+ n
+
+
+
+ g
+
+
+
+ e
+
+
+
+ l
+
+
+
+ t
+
+
+
+ e
+
+
+
+ n
+
+ mangelten
+
+
+
+
+
+
+ d
+
+
+
+ i
+
+
+
+ e
+
+ die
+
+
+
+
+
+
+ d
+
+
+
+ o
+
+
+
+
+
+ do
+
+
+
+
+
+
+ e
+
+
+
+ i
+
+
+
+ n
+
+
+
+ i
+
+
+
+ g
+
+
+
+ e
+
+ einige
+
+
+
+
+
+
+ G
+
+
+
+ e
+
+
+
+ n
+
+
+
+ e
+
+
+
+ l
+
+
+
+ i
+
+
+
+ a
+
+
+
+ ,
+
+
+
+ r
+
+
+
+ a
+
+ Generalia,
+
+
+ Jndeß mangelten do einige Generalia, die
+
+
+
+
+
+
+
+ a
+
+
+
+ l
+
+
+
+ ſ
+
+
+
+ o
+
+ alſo
+
+
+
+
+
+
+ w
+
+
+
+ e
+
+
+
+ g
+
+
+
+ fi
+
+
+
+ e
+
+
+
+ l
+
+
+
+ e
+
+
+
+ n
+
+
+
+ .
+
+ wegfielen.
+
+
+
+
+
+
+ —
+
+ —
+
+
+
+
+
+
+ H
+
+
+
+ a
+
+
+
+ r
+
+
+
+ t
+
+
+
+ k
+
+
+
+ o
+
+
+
+ p
+
+
+
+ f
+
+ Hartkopf
+
+
+
+
+
+
+ g
+
+
+
+ i
+
+
+
+ e
+
+
+
+ n
+
+
+
+ g
+
+ gieng
+
+
+
+
+
+
+ ſ
+
+
+
+ e
+
+
+
+ l
+
+
+
+ b
+
+
+
+
+
+ ſelb
+
+
+ alſo wegfielen. — Hartkopf gieng ſelb
+
+ Hartkopf mußte er bennen, und
+endli na langem Nadenken fiel es ihm er
+wieder ein. — Er langte den Zettel aus dem
+Accisbue heraus, und ſagte ſeiner Frau, daß
+e das, was da wre, herbeyſaffen mte.
+Jndeß mangelten do einige Generalia, die
+alſo wegfielen. — Hartkopf gieng ſelb
+mit und berbrate es. —
+
+
+
+
diff --git a/dinglehopper/tests/test_integ_cli_dir.py b/dinglehopper/tests/test_integ_cli_dir.py
new file mode 100644
index 0000000..435b452
--- /dev/null
+++ b/dinglehopper/tests/test_integ_cli_dir.py
@@ -0,0 +1,41 @@
+import os
+import pytest
+from ocrd_utils import initLogging
+from dinglehopper.cli import process_dir
+
+data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+
+
+@pytest.mark.integration
+def test_cli_directory(tmp_path):
+ """
+ Test that the cli/process_dir() processes a directory of files and
+ yields JSON and HTML reports.
+ """
+
+ initLogging()
+ process_dir(os.path.join(data_dir, "directory-test", "gt"),
+ os.path.join(data_dir, "directory-test", "ocr"),
+ "report", str(tmp_path / "reports"), False, True,
+ "line")
+
+ assert os.path.exists(tmp_path / "reports/1.xml-report.json")
+ assert os.path.exists(tmp_path / "reports/1.xml-report.html")
+ assert os.path.exists(tmp_path / "reports/2.xml-report.json")
+ assert os.path.exists(tmp_path / "reports/2.xml-report.html")
+
+
+@pytest.mark.integration
+def test_cli_fail_without_gt(tmp_path):
+ """
+ Test that the cli/process_dir skips a file if there is no corresponding file
+ in the other directory.
+ """
+
+ initLogging()
+ process_dir(os.path.join(data_dir, "directory-test", "gt"),
+ os.path.join(data_dir, "directory-test", "ocr"),
+ "report", str(tmp_path / "reports"), False, True,
+ "line")
+
+ assert len(os.listdir(tmp_path / "reports")) == 2 * 2
diff --git a/dinglehopper/tests/test_integ_differences.py b/dinglehopper/tests/test_integ_differences.py
new file mode 100644
index 0000000..3590317
--- /dev/null
+++ b/dinglehopper/tests/test_integ_differences.py
@@ -0,0 +1,27 @@
+import json
+import os
+import pytest
+from ocrd_utils import initLogging
+from dinglehopper.cli import process
+
+data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+
+
+@pytest.mark.integration
+def test_cli_differences(tmp_path):
+ """Test that the cli/process() yields a JSON report that includes
+ the differences found between the GT and OCR text"""
+
+ initLogging()
+ process(os.path.join(data_dir, "test-gt.page2018.xml"),
+ os.path.join(data_dir, "test-fake-ocr.page2018.xml"),
+ "report", tmp_path, differences=True)
+
+ assert os.path.exists(tmp_path / "report.json")
+
+ with open(tmp_path / "report.json", "r") as jsonf:
+ j = json.load(jsonf)
+
+ assert j["differences"] == {"character_level": {'n :: m': 1, 'ſ :: f': 1},
+ "word_level": {'Augenblick :: Augemblick': 1,
+ 'Verſprochene :: Verfprochene': 1}}
diff --git a/dinglehopper/tests/test_integ_summarize.py b/dinglehopper/tests/test_integ_summarize.py
new file mode 100644
index 0000000..0908152
--- /dev/null
+++ b/dinglehopper/tests/test_integ_summarize.py
@@ -0,0 +1,101 @@
+import json
+import os
+import pytest
+from .util import working_directory
+from .. import cli_summarize
+
+expected_cer_avg = (0.05 + 0.10) / 2
+expected_wer_avg = (0.15 + 0.20) / 2
+expected_diff_c = {"a": 30, "b": 50}
+expected_diff_w = {"c": 70, "d": 90}
+
+
+@pytest.fixture
+def create_summaries(tmp_path):
+ """Create two summary reports with mock data"""
+ reports_dirname = tmp_path / "reports"
+ reports_dirname.mkdir()
+
+ report1 = {"cer": 0.05, "wer": 0.15,
+ "differences": {
+ "character_level": {"a": 10, "b": 20},
+ "word_level": {"c": 30, "d": 40}
+ }}
+ report2 = {"cer": 0.10, "wer": 0.20,
+ "differences": {
+ "character_level": {"a": 20, "b": 30},
+ "word_level": {"c": 40, "d": 50}
+ }}
+
+ with open(os.path.join(reports_dirname, "report1.json"), "w") as f:
+ json.dump(report1, f)
+ with open(os.path.join(reports_dirname, "report2.json"), "w") as f:
+ json.dump(report2, f)
+
+ return str(reports_dirname)
+
+
+@pytest.mark.integration
+def test_cli_summarize_json(tmp_path, create_summaries):
+ """Test that the cli/process() yields a summarized JSON report"""
+ with working_directory(tmp_path):
+ reports_dirname = create_summaries
+ cli_summarize.process(reports_dirname)
+
+ with open(os.path.join(reports_dirname, "summary.json"), "r") as f:
+ summary_data = json.load(f)
+
+
+ assert summary_data["num_reports"] == 2
+ assert summary_data["cer_avg"] == expected_cer_avg
+ assert summary_data["wer_avg"] == expected_wer_avg
+ assert summary_data["differences"]["character_level"] == expected_diff_c
+ assert summary_data["differences"]["word_level"] == expected_diff_w
+
+
+@pytest.mark.integration
+def test_cli_summarize_html(tmp_path, create_summaries):
+ """Test that the cli/process() yields an HTML report"""
+ with working_directory(tmp_path):
+ reports_dirname = create_summaries
+ cli_summarize.process(reports_dirname)
+
+ html_file = os.path.join(reports_dirname, "summary.html")
+ assert os.path.isfile(html_file)
+
+ with open(html_file, "r") as f:
+ contents = f.read()
+
+ assert len(contents) > 0
+ assert "Number of reports: 2" in contents
+ assert f"Average CER: {round(expected_cer_avg, 4)}" in contents
+ assert f"Average WER: {round(expected_wer_avg, 4)}" in contents
+
+
+@pytest.mark.integration
+def test_cli_summarize_html_skip_invalid(tmp_path, create_summaries):
+ """
+ Test that the cli/process() does not include reports that are missing a WER value.
+ """
+ with working_directory(tmp_path):
+ reports_dirname = create_summaries
+
+ # This third report has no WER value and should not be included in the summary
+ report3 = {"cer": 0.10,
+ "differences": {
+ "character_level": {"a": 20, "b": 30},
+ "word_level": {"c": 40, "d": 50}
+ }}
+
+ with open(os.path.join(reports_dirname, "report3-missing-wer.json"), "w") as f:
+ json.dump(report3, f)
+
+ cli_summarize.process(reports_dirname)
+
+ html_file = os.path.join(reports_dirname, "summary.html")
+ assert os.path.isfile(html_file)
+
+ with open(html_file, "r") as f:
+ contents = f.read()
+
+ assert "Number of reports: 2" in contents # report3 is not included
diff --git a/setup.py b/setup.py
index d7a3776..476ec8f 100644
--- a/setup.py
+++ b/setup.py
@@ -27,6 +27,7 @@
"dinglehopper=dinglehopper.cli:main",
"dinglehopper-line-dirs=dinglehopper.cli_line_dirs:main",
"dinglehopper-extract=dinglehopper.cli_extract:main",
+ "dinglehopper-summarize=dinglehopper.cli_summarize:main",
"ocrd-dinglehopper=dinglehopper.ocrd_cli:ocrd_dinglehopper",
]
},