Skip to content

Commit

Permalink
Merge pull request #109 from Auden-Musulin-Papers/dev
Browse files Browse the repository at this point in the history
Release v0.5.0
  • Loading branch information
linxOD authored Dec 14, 2023
2 parents 0c8eb5e + eb1d665 commit 1bdecba
Show file tree
Hide file tree
Showing 35 changed files with 1,901 additions and 1,007 deletions.
9 changes: 9 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[flake8]
ignore = D203 W504 W503
max-line-length = 120
exclude =
.git
.tox
build
dist
env
107 changes: 102 additions & 5 deletions amp-app.xpr
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@
<options>
<serialized xml:space="preserve">
<serializableOrderedMap>
<entry>
<String>additional.frameworks.directories</String>
<String-array>
<String>${pd}/framework</String>
</String-array>
</entry>
<entry>
<String>frameworks.directory.url.with.editor.variables</String>
<String>file:/home/daniel/Documents/ERC-TibSchol/TEI-curation/framework</String>
</entry>
<entry>
<String>key.editor.document.type.custom.locations.option.pane</String>
<Boolean>true</Boolean>
</entry>
<entry>
<String>key.editor.format.xml.option.pane</String>
<Boolean>true</Boolean>
Expand Down Expand Up @@ -34,11 +48,11 @@
</scenarioAssociation>
<scenarioAssociation>
<field name="url">
<String>html/css/style.css</String>
<String>data/editions/</String>
</field>
<field name="scenarioIds">
<list>
<String>search</String>
<String>editions</String>
</list>
</field>
<field name="scenarioTypes">
Expand All @@ -54,11 +68,11 @@
</scenarioAssociation>
<scenarioAssociation>
<field name="url">
<String>data/editions/</String>
<String>html/css/style.css</String>
</field>
<field name="scenarioIds">
<list>
<String>editions</String>
<String>search</String>
</list>
</field>
<field name="scenarioTypes">
Expand Down Expand Up @@ -526,6 +540,89 @@
<String-array/>
</field>
</scenario>
<scenario>
<field name="advancedOptionsMap">
<null/>
</field>
<field name="name">
<String>cv</String>
</field>
<field name="baseURL">
<String></String>
</field>
<field name="footerURL">
<String></String>
</field>
<field name="fOPMethod">
<String>pdf</String>
</field>
<field name="fOProcessorName">
<String>Apache FOP</String>
</field>
<field name="headerURL">
<String></String>
</field>
<field name="inputXSLURL">
<String>${pdu}/xslt/computer-vision.xsl</String>
</field>
<field name="inputXMLURL">
<String>${currentFileURL}</String>
</field>
<field name="defaultScenario">
<Boolean>false</Boolean>
</field>
<field name="isFOPPerforming">
<Boolean>false</Boolean>
</field>
<field name="type">
<String>XSL</String>
</field>
<field name="saveAs">
<Boolean>true</Boolean>
</field>
<field name="openInBrowser">
<Boolean>false</Boolean>
</field>
<field name="outputResource">
<String>${pd}/html/cv.html</String>
</field>
<field name="openOtherLocationInBrowser">
<Boolean>false</Boolean>
</field>
<field name="locationToOpenInBrowserURL">
<null/>
</field>
<field name="openInEditor">
<Boolean>false</Boolean>
</field>
<field name="showInHTMLPane">
<Boolean>false</Boolean>
</field>
<field name="showInXMLPane">
<Boolean>false</Boolean>
</field>
<field name="showInSVGPane">
<Boolean>false</Boolean>
</field>
<field name="showInResultSetPane">
<Boolean>false</Boolean>
</field>
<field name="useXSLTInput">
<Boolean>true</Boolean>
</field>
<field name="xsltParams">
<list/>
</field>
<field name="cascadingStylesheets">
<String-array/>
</field>
<field name="xslTransformer">
<String>Saxon-PE</String>
</field>
<field name="extensionURLs">
<String-array/>
</field>
</scenario>
<scenario>
<field name="advancedOptionsMap">
<null/>
Expand Down Expand Up @@ -650,7 +747,7 @@
<Boolean>true</Boolean>
</field>
<field name="openInBrowser">
<Boolean>true</Boolean>
<Boolean>false</Boolean>
</field>
<field name="outputResource">
<String>${pd}/html/${cfn}.html</String>
Expand Down
6 changes: 3 additions & 3 deletions build_app/ant/copy-task.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<property name="data_editions_am" value="${basedir}/data/editions/additional-materials"/> -->
<property name="data_editions_c" value="${basedir}/data/editions/correspondence"/>
<property name="data_editions_p" value="${basedir}/data/editions/photos"/>
<property name="xsl_editions" value="${basedir}/xslt/preprocess.xsl"/>
<!-- <property name="xsl_editions" value="${basedir}/xslt/preprocess.xsl"/> -->
<property name="indices" value="${basedir}/data/indices"/>
<property name="geojson" value="${basedir}/html/geo"/>

Expand All @@ -19,10 +19,10 @@
</fileset>
</copy> -->

<xslt style="${xsl_editions}" basedir="${data_editions}" destdir="${data_editions}" includes="*.xml">
<!-- <xslt style="${xsl_editions}" basedir="${data_editions}" destdir="${data_editions}" includes="*.xml">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<classpath location="${basedir}/saxon/saxon9he.jar"/>
</xslt>
</xslt> -->

<move todir="${data_editions}" includeemptydirs="false">
<fileset dir="${data_editions}">
Expand Down
4 changes: 2 additions & 2 deletions build_app/python/convert2jp2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os, glob
import os
import glob
from PIL import Image

savepath = "./data/images/update/"
Expand All @@ -10,7 +11,6 @@
outfile = f + ".jpg"
outfile = outfile.split("/")[-1]
outfile = f"{savepath}{outfile}"

print(outfile)
if infile != outfile:
try:
Expand Down
15 changes: 9 additions & 6 deletions build_app/python/make_ts_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
},
{
'name': 'title',
'type': 'string'
'type': 'string',
},
{
'name': 'full_text',
'type': 'string'
'type': 'string',
},
{
'name': 'year',
Expand Down Expand Up @@ -70,12 +70,13 @@

client.collections.create(current_schema)


def get_entities(ent_type, ent_node, ent_name):
entities = []
e_path = f'.//tei:rs[@type="{ent_type}"]/@ref'
for p in body:
ent = p.xpath(e_path, namespaces={'tei': "http://www.tei-c.org/ns/1.0"})
ref = [ref.replace("#", "") for e in ent if len(ent) > 0 for ref in e.split()]
ref = [ref.replace("#", "") for e in ent if len(ent) > 0 for ref in e.split()]
for r in ref:
p_path = f'.//tei:{ent_node}[@xml:id="{r}"]//tei:{ent_name}[1]'
en = doc.any_xpath(p_path)
Expand All @@ -88,14 +89,16 @@ def get_entities(ent_type, ent_node, ent_name):
f.write(f"{r} in {record['id']}\n")
return [ent for ent in sorted(set(entities))]


records = []
cfts_records = []
for x in tqdm(files, total=len(files)):
doc = TeiReader(xml=x,xsl='./xslt/preprocess_typesense.xsl')
doc = TeiReader(xml=x, xsl='./xslt/preprocess_typesense.xsl')
facs = doc.any_xpath('.//tei:body/tei:div/tei:pb/@facs')
pages = 0
for v in facs:
p_group = f".//tei:body/tei:div/tei:p[preceding-sibling::tei:pb[1]/@facs='{v}']|.//tei:body/tei:div/tei:lg[preceding-sibling::tei:pb[1]/@facs='{v}']"
p_group = f""".//tei:body/tei:div/tei:p[preceding-sibling::tei:pb[1]/@facs='{v}']|
.//tei:body/tei:div/tei:lg[preceding-sibling::tei:pb[1]/@facs='{v}']"""
body = doc.any_xpath(p_group)
pages += 1
cfts_record = {
Expand Down Expand Up @@ -160,4 +163,4 @@ def get_entities(ent_type, ent_node, ent_name):

make_index = CFTS_COLLECTION.documents.import_(cfts_records, {'action': 'upsert'})
print(make_index)
print('done with cfts-index amp')
print('done with cfts-index amp')
15 changes: 9 additions & 6 deletions build_app/python/make_ts_index_local.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import glob
import os

import lxml.etree as ET
from typesense import Client
from typesense.api_call import ObjectNotFound
# import lxml.etree as ET
# from typesense import Client
# from typesense.api_call import ObjectNotFound
from acdh_tei_pyutils.tei import TeiReader
from tqdm import tqdm

Expand Down Expand Up @@ -78,12 +78,13 @@

# client.collections.create(current_schema)


def get_entities(ent_type, ent_node, ent_name):
entities = []
e_path = f'.//tei:rs[@type="{ent_type}"]/@ref'
for p in body:
ent = p.xpath(e_path, namespaces={'tei': "http://www.tei-c.org/ns/1.0"})
ref = [ref.replace("#", "") for e in ent if len(ent) > 0 for ref in e.split()]
ref = [ref.replace("#", "") for e in ent if len(ent) > 0 for ref in e.split()]
for r in ref:
p_path = f'.//tei:{ent_node}[@xml:id="{r}"]//tei:{ent_name}[1]'
en = doc.any_xpath(p_path)
Expand All @@ -96,13 +97,15 @@ def get_entities(ent_type, ent_node, ent_name):
f.write(f"{r} in {record['id']}\n")
return [ent for ent in sorted(set(entities))]


records = []
for x in tqdm(files, total=len(files)):
doc = TeiReader(xml=x,xsl='./xslt/preprocess_typesense.xsl')
doc = TeiReader(xml=x, xsl='./xslt/preprocess_typesense.xsl')
facs = doc.any_xpath('.//tei:body/tei:div/tei:pb/@facs')
pages = 0
for v in facs:
p_group = f".//tei:body/tei:div/tei:p[preceding-sibling::tei:pb[1]/@facs='{v}']|.//tei:body/tei:div/tei:lg[preceding-sibling::tei:pb[1]/@facs='{v}']"
p_group = f""".//tei:body/tei:div/tei:p[preceding-sibling::tei:pb[1]/@facs='{v}']|
.//tei:body/tei:div/tei:lg[preceding-sibling::tei:pb[1]/@facs='{v}']"""
body = doc.any_xpath(p_group)
pages += 1
record = {}
Expand Down
12 changes: 1 addition & 11 deletions build_app/shell/fetch_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,4 @@ mv amp-data-main/data ./data
rm -rf amp-data-main
rm main.zip

rm -rf ./data/tmp

# rm dev.zip
# rm -rf ./data
# wget https://github.com/Auden-Musulin-Papers/amp-data/archive/refs/heads/dev.zip
# unzip dev
# mv amp-data-dev/data ./data
# rm -rf amp-data-dev
# rm dev.zip

# rm -rf ./data/tmp
rm -rf ./data/tmp
Loading

0 comments on commit 1bdecba

Please sign in to comment.