Skip to content

Commit

Permalink
add docs.cite_authors()
Browse files Browse the repository at this point in the history
  • Loading branch information
Quantisan committed Sep 26, 2023
1 parent cc42a96 commit db6696b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
10 changes: 9 additions & 1 deletion mind_palace/docs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from llama_index.schema import TextNode, NodeRelationship, RelatedNodeInfo
import grobid_tei_xml
from llama_index.schema import NodeRelationship, RelatedNodeInfo, TextNode


def load_tei_xml(file_path):
Expand All @@ -8,6 +8,14 @@ def load_tei_xml(file_path):
return grobid_tei_xml.parse_document_xml(xml_file.read())


def cite_authors(xml):
first_author = xml.header.authors[0]
first_author_display_name = f"{first_author.surname}, {first_author.given_name}"
return first_author_display_name + (
", et al." if len(xml.header.authors) > 1 else "."
)


def title(xml, doc_id):
return TextNode(
text=xml.header.title,
Expand Down
19 changes: 17 additions & 2 deletions tests/unit/test_docs.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
from tests.context import docs
from unittest.mock import MagicMock

import grobid_tei_xml.types as grobid_types
from llama_index.schema import TextNode, NodeRelationship
from llama_index.schema import NodeRelationship, TextNode

from tests.context import docs

XML_PATH = "./resources/xmls/12-pdfs-from-steve-aug-22/"


def test_cite_authors():
xml = MagicMock()
xml.header.authors = [
MagicMock(surname="Doe", given_name="John"),
MagicMock(surname="Smith", given_name="Jane"),
]
assert docs.cite_authors(xml) == "Doe, John, et al."

xml.header.authors = [MagicMock(surname="Doe", given_name="John")]
assert docs.cite_authors(xml) == "Doe, John."


def gen_nodes():
return [
TextNode(text="this is first"),
Expand Down

0 comments on commit db6696b

Please sign in to comment.