Skip to content

Commit

Permalink
update tests: make_xml takes input arg
Browse files Browse the repository at this point in the history
  • Loading branch information
linxOD committed Sep 19, 2022
1 parent 889515d commit 471b9e4
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 29 deletions.
65 changes: 38 additions & 27 deletions freud_api_crawler/freud_api_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,32 +1203,16 @@ def yield_works(url, simple=True):
yield item


def make_xml(workpath, out_dir, save=False):
def make_xml(workpath, out_dir, dump, save=False, test=False):
"""serializes a manifestation as XML/TEI document
:param save: if set, a XML/TEI file `{workpath}` is saved
:param type: bool
:return: A lxml.etree
"""
data = glob.glob(os.path.join(out_dir, workpath, "data", "*.json"))
doc = []
for x in data:
try:
with open(x, 'r', encoding='utf8') as f:
json_dump = json.load(f)
except FileNotFoundError:
print(f"file {x} not found, run get_man_json_dump() function first")
json_dump['publicationHistory'] = []
history = glob.glob(os.path.join(out_dir, workpath, "data", "*.json"))
for x in history:
try:
with open(x, 'r', encoding='utf8') as f:
json_dump['publicationHistory'].append(
json.load(f)
)
except FileNotFoundError:
print("no json dump found")
if test:
json_dump = dump
templateLoader = jinja2.PackageLoader(
"freud_api_crawler", "templates"
)
Expand All @@ -1239,11 +1223,38 @@ def make_xml(workpath, out_dir, save=False):
tei = ET.fromstring(tei)
transform = ET.XSLT(XSL_DOC)
tei = transform(tei)
if save:
signatur = json_dump["signature"]
filename = signatur.replace("/", "__")
savepath = os.path.join(out_dir, workpath)
with open(os.path.join(savepath, f"sfe-{filename}.xml"), 'wb') as f:
f.write(ET.tostring(tei, pretty_print=True, encoding="utf-8"))
doc.append(tei)
return doc[0]
else:
data = glob.glob(os.path.join(out_dir, workpath, "data", "*.json"))
for x in data:
try:
with open(x, 'r', encoding='utf8') as f:
json_dump = json.load(f)
except FileNotFoundError:
print(f"file {x} not found, run get_man_json_dump() function first")
json_dump['publicationHistory'] = []
history = glob.glob(os.path.join(out_dir, workpath, "data", "*.json"))
for x in history:
try:
with open(x, 'r', encoding='utf8') as f:
json_dump['publicationHistory'].append(
json.load(f)
)
except FileNotFoundError:
print("no json dump found")
templateLoader = jinja2.PackageLoader(
"freud_api_crawler", "templates"
)
templateEnv = jinja2.Environment(loader=templateLoader)
template = templateEnv.get_template('./tei.xml')
tei = template.render({"objects": [json_dump]})
tei = re.sub(r'\s+$', '', tei, flags=re.MULTILINE)
tei = ET.fromstring(tei)
transform = ET.XSLT(XSL_DOC)
tei = transform(tei)
if save:
signatur = json_dump["signature"]
filename = signatur.replace("/", "__")
savepath = os.path.join(out_dir, workpath)
with open(os.path.join(savepath, f"sfe-{filename}.xml"), 'wb') as f:
f.write(ET.tostring(tei, pretty_print=True, encoding="utf-8"))
return tei
4 changes: 2 additions & 2 deletions tests/test_freud_api_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ def test_013_check_dummy_tei(self):
def test_014_check_tei_serialiazer(self):
"""test tei serialisation"""
frd_obj = FRD_MANIFESTATION
frd_obj.get_man_json_dump(lmt=False)
xml = frd.make_xml(save=False, out_dir=SAVE_DIR, workpath=WERK_PATH)
dump = frd_obj.get_man_json_dump(lmt=False)
xml = frd.make_xml(save=False, out_dir=SAVE_DIR, dump=dump, workpath=WERK_PATH, test=True)
xml_str = ET.tostring(xml).decode('utf-8')
print(type(xml), type(xml_str))
self.assertTrue(frd_obj.manifestation_id in xml_str)
Expand Down

0 comments on commit 471b9e4

Please sign in to comment.