Skip to content

Commit

Permalink
polished up
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkroorda committed May 10, 2023
1 parent d09d7ae commit d1847f5
Show file tree
Hide file tree
Showing 56 changed files with 51,118 additions and 116,541 deletions.
4 changes: 2 additions & 2 deletions app/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ provenanceSpec:
corpus: '{org} - {repo}{relative}'
doi: 10.5281/zenodo.notyet
moduleSpecs: []
version: 0.3.0
version: 0.3.1
webBase: https://public.{org}.org/{repo}
webHint: Show this on the website
webLang: en
Expand All @@ -29,5 +29,5 @@ typeDisplay:
sentence:
condense: true
w:
featuresBare: word_num
featuresBare: num
writing: grc
2 changes: 1 addition & 1 deletion app/config_custom.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
typeDisplay:
w:
featuresBare: word_num
featuresBare: num
sentence:
condense: true
writing: 'grc'
Expand Down
127 changes: 69 additions & 58 deletions programs/lowfat.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,70 +32,75 @@ def convertTaskCustom(self):
otext = {
"fmt:text-orig-full": "{text}{after}",
"sectionTypes": "book,chapter,verse",
"sectionFeatures": "book_short,chapter,verse",
"sectionFeatures": "book,chapter,verse",
}
intFeatures = {
"appositioncontainer",
"articular",
"chapter",
"verse",
"book_num",
"sentence_number",
"discontinuous",
"nodeId",
"num",
"strong",
"word_in_verse",
"empty",
"verse",
}
featureMeta = dict(
book_num=dict(
description="NT book number (Matthew=1, Mark=2, ..., Revelation=27)"
monoAtts = {"appositioncontainer", "articular", "discontinuous"}
featureMeta = (
("after", "material after the end of the word"),
("appositioncontainer", "1 if it is an apposition container"),
("articular", "1 if the wg has an article"),
("book", "book name (abbreviated), from ref attribute in xml"),
("case", "grammatical case"),
("chapter", "chapter number, from ref attribute in xml"),
("class", "morphological class (on w); syntactical class (on wg)"),
("clauseType", "clause type"),
("cltype", "clause type"),
("crule", "clause rule (from xml attribute Rule)"),
("degree", "grammatical degree"),
("discontinuous", "1 if the word is out of sequence in the xml"),
("domain", "domain"),
("frame", "frame"),
("gender", "grammatical gender"),
("gloss", "short translation"),
("id", "xml id"),
("junction", "type of junction"),
("lang", "language the text is in"),
("lemma", "lexical lemma"),
("ln", "ln"),
("mood", "verbal mood"),
("morph", "morphological code"),
("nodeId", "node id (as in the XML source data"),
("normalized", "lemma normalized"),
(
"num",
(
"generated number (not in xml): "
"book: (Matthew=1, Mark=2, ..., Revelation=27); "
"sentence: numbered per chapter; "
"word: numbered per verse."
),
),
book_short=dict(description="Book name (abbreviated)"),
sentence_number=dict(description="Sentence number (counted per chapter)"),
Rule=dict(description="Clause rule"),
appositioncontainer=dict(description="Apposition container"),
articular=dict(description="Articular"),
class_wg=dict(description="Syntactical class"),
clauseType=dict(description="Type of clause"),
cltype=dict(description="Type of clause"),
junction=dict(description="Type of junction"),
nodeId=dict(description="Node ID (as in the XML source data"),
role_wg=dict(description="Role"),
rule=dict(description="Syntactical rule"),
type_wg=dict(description="Syntactical type"),
text=dict(description="the text of a word"),
after=dict(description="After the end of the word"),
book=dict(description="Book name (abbreviated)"),
case=dict(description="Type of case"),
chapter=dict(description="Number of the chapter"),
class_w=dict(description="Morphological class"),
degree=dict(description="Degree"),
discontinuous=dict(description="Discontinuous"),
domain=dict(description="domain"),
frame=dict(description="frame"),
gender=dict(description="gender"),
gloss=dict(description="gloss"),
id=dict(description="xml iD"),
lemma=dict(description="lemma"),
ln=dict(description="ln"),
mood=dict(description="verbal mood"),
morph=dict(description="morph"),
normalized=dict(description="lemma normalized"),
number=dict(description="number"),
person=dict(description="person"),
ref=dict(description="biblical reference with word counting"),
referent=dict(description="number of referent"),
role_w=dict(description="role"),
strong=dict(description="strong number"),
subjref=dict(description="number"),
tense=dict(description="Verbal tense"),
type_w=dict(description="Morphological type"),
unicode=dict(description="lemma in unicode characters"),
verse=dict(description="verse"),
voice=dict(description="Verbal voice"),
word_in_verse=dict(description="number of word"),
empty=dict(description="whether a slot has been inserted in an empty element"),
("number", "grammatical number"),
("note", "annotation of linguistic nature"),
("person", "grammatical person"),
("ref", "biblical reference with word counting"),
("referent", "number of referent"),
("strong", "strong number"),
("subjref", "number of subject referent"),
("role", "role"),
("rule", "syntactical rule"),
("text", "the text of a word"),
("tense", "verbal tense"),
("type", "morphological type (on w), syntactical type (on wg)"),
("unicode", "word in unicode characters plus material after it"),
("verse", "verse number, from ref attribute in xml"),
("voice", "verbal voice"),
)
featureMeta = {k: dict(description=v) for (k, v) in featureMeta}

self.intFeatures = intFeatures
self.featureMeta = featureMeta
self.monoAtts = monoAtts

tfVersion = self.tfVersion
xmlVersion = self.xmlVersion
Expand Down Expand Up @@ -154,6 +159,8 @@ def getDirector(self):
xmlPath = self.xmlPath
featureMeta = self.featureMeta
transform = self.transform
renameAtts = self.renameAtts
monoAtts = self.monoAtts

transformFunc = (
(lambda x: BytesIO(x.encode("utf-8")))
Expand Down Expand Up @@ -211,6 +218,10 @@ def beforeChildren(cv, cur, node, tag):
return

atts = {etree.QName(k).localname: v for (k, v) in node.attrib.items()}
atts = {renameAtts.get(k, k): v for (k, v) in atts.items()}
for m in monoAtts:
if atts.get(m, None) == "true":
atts[m] = 1

if tag == "w":
# atts["text"] = atts["unicode"]
Expand All @@ -221,7 +232,7 @@ def beforeChildren(cv, cur, node, tag):
atts["book"] = bRef
atts["chapter"] = chRef
atts["verse"] = vRef
atts["word_num"] = wRef
atts["num"] = wRef
thisChapterNum = atts["chapter"]
thisVerseNum = atts["verse"]
if thisChapterNum != cv.get("chapter", cur["chapter"]):
Expand Down Expand Up @@ -253,13 +264,13 @@ def beforeChildren(cv, cur, node, tag):
else:
if tag == "book":
cur["bookNum"] += 1
atts["book_num"] = cur["bookNum"]
atts["book_short"] = atts["id"]
atts["num"] = cur["bookNum"]
atts["book"] = atts["id"]
del atts["id"]

elif tag == "sentence":
cur["sentNum"] += 1
atts["sent_num"] = cur["sentNum"]
atts["num"] = cur["sentNum"]

curNode = cv.node(tag)
cur["elems"].append(curNode)
Expand Down
Loading

0 comments on commit d1847f5

Please sign in to comment.