From d034dde63a830de78df8e057b873cc3a964db377 Mon Sep 17 00:00:00 2001 From: Paul Lam Date: Tue, 10 Oct 2023 08:33:37 +0900 Subject: [PATCH] fixed _gen_document_dict() to fallover gracefully if a doc is missing DOI --- mind_palace/extract.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mind_palace/extract.py b/mind_palace/extract.py index 7100c9b..46eebda 100644 --- a/mind_palace/extract.py +++ b/mind_palace/extract.py @@ -7,7 +7,9 @@ def _gen_document_dict(file_path) -> dict[str, TextNode]: xml = docs.load_tei_xml(file_path) doi = xml.header.doi - assert doi is not None + if doi is None: + print(f"DOI is None for {file_path}. Replacing with title instead.") + doi = xml.header.title try: title_node = docs.title(xml, doi)