-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from TheDataGuild/refactor/docs-module
Refactor/docs module
- Loading branch information
Showing
31 changed files
with
66 additions
and
90 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from llama_index.schema import TextNode, NodeRelationship, RelatedNodeInfo | ||
import grobid_tei_xml | ||
|
||
|
||
def load_tei_xml(file_path): | ||
print(f"Loading {file_path}") | ||
with open(file_path, "r") as xml_file: | ||
return grobid_tei_xml.parse_document_xml(xml_file.read()) | ||
|
||
|
||
def title(xml, doc_id): | ||
return TextNode( | ||
text=xml.header.title, | ||
id_=f"{doc_id}-title", | ||
) | ||
|
||
|
||
def abstract(xml, doc_id): | ||
return TextNode( | ||
text=xml.abstract, | ||
id_=f"{doc_id}-abstract", | ||
) | ||
|
||
|
||
def set_relationships(title_node, abstract_node): | ||
abstract_node.relationships[NodeRelationship.PARENT] = RelatedNodeInfo( | ||
node_id=title_node.node_id | ||
) | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file removed
BIN
-2.41 MB
...sating Tandem Microbubble for Localized and Directional Single-Cell Membrane Poration.pdf
Binary file not shown.
Binary file removed
BIN
-1.42 MB
...ound Med Biol_Sonoporation as a Cellular Stress Induction of Morphological Repression.pdf
Binary file not shown.
Binary file removed
BIN
-5.04 MB
...from-steve-aug-22/2013_Ultrasound_in_Medicine_and_Biol_Hu_ Sonoporation_Pore_Dynamics.pdf
Binary file not shown.
Binary file removed
BIN
-1.35 MB
...om-steve-aug-22/2014_J_Royal_Soc_Interface_Yu_Single-site_sonoporation_disrupts_actin.pdf
Binary file not shown.
Binary file removed
BIN
-1.25 MB
...dfs-from-steve-aug-22/2015_Royal_Society_Interface_Yu_Membrane blebbing as a recovery.pdf
Binary file not shown.
Binary file removed
BIN
-1.19 MB
...ed Release_Mechanistic understanding the bioeffects of ultrasound-driven microbubbles.pdf
Binary file not shown.
Binary file removed
BIN
-2.4 MB
...nsity Pulsed Ultrasound and Sonoporation_ Platform Design and Flow Cytometry Protocol.pdf
Binary file not shown.
Binary file removed
BIN
-1.47 MB
...ics_Evaluation of the Properties of Daughter Bubbles Generated by Inertial Cavitation.pdf
Binary file not shown.
Binary file removed
BIN
-8.28 MB
...urces/pdfs/12-pdfs-from-steve-aug-22/2020_Ultrasound in Medicine and BioIogy_Yu_Blebs.pdf
Binary file not shown.
Binary file removed
BIN
-2.3 MB
.../pdfs/12-pdfs-from-steve-aug-22/2021_Nature Scientific Reports_Sonoporation generates.pdf
Binary file not shown.
Binary file removed
BIN
-2.33 MB
...g-22/2021_Scientific Reports Nature_Sonoporation generates downstream impact in vitro.pdf
Binary file not shown.
Binary file removed
BIN
-2.98 MB
...-aug-22/2022_BME Frontiers_Ultrasound-Mediated Drug Delivery Sonoporation Mechanisms,.pdf
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,4 @@ | |
) | ||
|
||
import extract | ||
import docs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from .context import docs | ||
import grobid_tei_xml.types as grobid_types | ||
from llama_index.schema import TextNode, NodeRelationship | ||
|
||
XML_PATH = "./resources/xmls/12-pdfs-from-steve-aug-22/" | ||
|
||
|
||
def test_node_relationships(): | ||
title_node = TextNode(text="this is title") | ||
abstract_node = TextNode(text="this is abstract") | ||
assert docs.set_relationships(title_node, abstract_node) is None | ||
assert ( | ||
abstract_node.relationships[NodeRelationship.PARENT].node_id | ||
== title_node.node_id | ||
) | ||
|
||
|
||
def test_load_tei_xml(): | ||
xml = docs.load_tei_xml( | ||
XML_PATH | ||
+ "2010_PhysRevLett_Pulsating Tandem Microbubble for Localized and Directional Single-Cell Membrane Poration.pdf.tei.xml" | ||
) | ||
assert isinstance(xml, grobid_types.GrobidDocument) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,18 @@ | ||
from .context import extract | ||
import grobid_tei_xml.types as grobid_types | ||
from llama_index.schema import TextNode, NodeRelationship | ||
|
||
XML_PATH = "./resources/pdfs/12-pdfs-from-steve-aug-22/xml/" | ||
|
||
|
||
def test_node_relationships(): | ||
title_node = TextNode(text="this is title") | ||
abstract_node = TextNode(text="this is abstract") | ||
assert extract.set_relationships(title_node, abstract_node) is None | ||
assert ( | ||
abstract_node.relationships[NodeRelationship.PARENT].node_id | ||
== title_node.node_id | ||
) | ||
|
||
|
||
def test_load_tei_xml(): | ||
xml = extract._load_tei_xml( | ||
XML_PATH | ||
+ "2010_PhysRevLett_Pulsating Tandem Microbubble for Localized and Directional Single-Cell Membrane Poration.pdf.tei.xml" | ||
) | ||
assert isinstance(xml, grobid_types.GrobidDocument) | ||
from . import test_docs | ||
|
||
|
||
def test_gen_document_dict(): | ||
xml = extract._load_tei_xml( | ||
XML_PATH | ||
nodes_dict = extract._gen_document_dict( | ||
test_docs.XML_PATH | ||
+ "2010_PhysRevLett_Pulsating Tandem Microbubble for Localized and Directional Single-Cell Membrane Poration.pdf.tei.xml" | ||
) | ||
nodes_dict = extract._gen_document_dict(xml) | ||
for node in nodes_dict.values(): | ||
assert isinstance(node, extract.TextNode) | ||
|
||
|
||
def test_seed_nodes(): | ||
input_dir = XML_PATH | ||
nodes = extract.seed_nodes(input_dir) | ||
nodes = extract.seed_nodes(test_docs.XML_PATH) | ||
assert isinstance(nodes, list) | ||
for node in nodes: | ||
assert isinstance(node, extract.TextNode) |