diff --git a/CHANGELOG.md b/CHANGELOG.md index aa17d2df..c45dd3ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # A→Z+T Changelog +# Version 0.8.7 +- added referify data for current subgroup +- give warning if user is going to undo analysis regroupings +- mark a sensid to sort again without losing the sound file attached to it. +- major overhaul to LIFT urls, including class to generate them and catalog to store them +- major overhaul to framed data, including class to generate and catalog to store +- Added comparison group button for rename framed group window (to help with transcriptions) +- added actual groups by datapoint for non-default tone reports +- Reworked and multiple improvements to naming groups: + - playable buttons + - praat link to sound file + - comparison button(s) + - navigation buttons to continue through groups +- added interpretation of glottal stop into sdistinctions. +- added settings for interpretation of trigraphs and digraphs +- multiple fixes to segment interpretation settings +- set up mail of bug report, and links to webpage documentation + # Version 0.8.6 - reworked buttons and UI on transcription window - comparison option for transcription window diff --git a/ROADMAP.md b/ROADMAP.md index 7320d3db..a6c94df1 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -4,7 +4,6 @@ standarize fns of exit buttons from config changing screens (exit should make ch maybe add backwards epsilon to vowels? ## release stoppers - put image creation after splash creation? -- give warning if user is going to undo analysis regroupings - check out why tooltips aren't working before first refresh ## Documentation @@ -20,6 +19,8 @@ maybe add backwards epsilon to vowels? - make font changes more general, tied to +/- ## Simplify (non-OOP related) +- Set means for user to check verification stage again. This will require invalidating all the data to be redone (not currently implimented). + - Once done, there is currently no AZT way to redo it. - on import, check for entries without: - citation, copy over from lexical-unit (all langs) - gloss, copy over from definitions (truncate, all langs) @@ -40,7 +41,6 @@ maybe add backwards epsilon to vowels? - transcribe (but not sort, etc) ## Migrate further toward OOP -- mark a sensid to sort again without losing the sound file attached to it. - distinguish between frames to do for sorting (with unsorted data) and frames to do for other tasks... - make 'next' go to next frame, if done sorting, or not, as appropriate - make tone analysis one thing, and tone report another thing, and call analysis if not done since data was last added/modified (store this info somewhere, clear after analysis completes) @@ -106,13 +106,7 @@ maybe add backwards epsilon to vowels? - add progress of recording (on its own tab?) ## For Future Versions -- Add comparison group button for rename framed group window (to help with transcriptions) -- consider adding actual groups by datapoint for non-default tone reports - - I.e., which data points are different -- think through ease of use questions: - - How to make naming groups straightforward from the main interface? - implement XML2XLP.txt (to produce PDF without further user input) -- add interpretation of glottal stop into sdistinctions. - fix zip problem on Windows: "OSError: [Errno 22] Invalid argument: 'log_-:.7z'" - extra space being added for None forms in Frame construction - Sort out why not all nouns show in ad hoc selection list. @@ -179,11 +173,8 @@ maybe add backwards epsilon to vowels? - widen "Do" menu item (give all a minimum width) ## For some time -- set up mail of bug report (better than WeSay) - look into having hg commit changes to verification status file - don't track checkdefaults? (make per user file?) -- Set means for user to check verification stage again. - - Once done, there is currently no AZT way to redo it. - Look up how to get real required heights and widths, availablexy isn't working correctly. - fix reconfigure scrolling window frame problem (remove need for if self.configured <1:) - constrain frames with less data, to only scroll as needed. diff --git a/lift.py b/lift.py index 90d1f6a4..dbfd5d82 100644 --- a/lift.py +++ b/lift.py @@ -14,6 +14,8 @@ import rx import logging import ast #For string list interpretation +import copy +import collections log = logging.getLogger(__name__) try: #Allow this module to be used without translation _ @@ -22,6 +24,8 @@ def _(x): return x """This returns the root node of an ElementTree tree (the entire tree as nodes), to edit the XML.""" +class Object(object): + pass class TreeParsed(object): def __init__(self, lift): self=Tree(lift).parsed @@ -40,6 +44,7 @@ def __init__(self, filename,nsyls=None): self.debug=False self.filename=filename #lift_file.liftstr() self.logfile=filename+".changes" + self.urls={} #store urls generated """Problems reading a valid LIFT file are dealt with in main.py""" try: self.read() #load and parse the XML file. (Should this go to check?) @@ -49,20 +54,19 @@ def __init__(self, filename,nsyls=None): datetime.datetime.utcnow().isoformat()[:-16], #once/day '.txt'] self.backupfilename=''.join(backupbits) - self.initattribs() self.getguids() #sets: self.guids and self.nguids self.getsenseids() #sets: self.senseids and self.nsenseids - log.info("Working on {} with {}, entries " + log.info("Working on {} with {} entries " "and {} senses".format(filename,self.nguids,self.nsenseids)) """These three get all possible langs by type""" self.glosslangs() #sets: self.glosslangs self.analangs() #sets: self.analangs, self.audiolangs self.pss=self.pss() #log.info(self.pss) - self.getformstosearch() #sets: self.formstosearch[lang][ps] #no guids """This is very costly on boot time, so this one line is not used:""" # self.getguidformstosearch() #sets: self.guidformstosearch[lang][ps] - self.citationforms=self.citationforms() - self.lexemes=self.lexemes() + self.lcs=self.citations() + self.lxs=self.lexemes() + self.locations=self.getlocations() self.defaults=[ #these are lift related defaults 'analang', 'glosslang', @@ -74,583 +78,69 @@ def __init__(self, filename,nsyls=None): # self.findduplicateforms() # self.findduplicateexamples() """Think through where this belongs; what classes/functions need it?""" + self.morphtypes=self.getmorphtypes() log.info("Language initialization done.") - def initattribs(self): - # """This dictionary defines where to find each node in the xml {url}, - # and what we're looking for in each case (node, node text, or attribute - # value.""" - # """Make all urls fully specified for guid, lang, etc; they will be - # removed if the relevant variable is None.""" - # """In the following language attributes, the field
can - # exist in multiple fields, so pay attention to the difference - # between - # form='{analang}' (always in the language to be analyzed: - # lexeme, citation) - # form='{glosslang}' (always in a gloss language: - # gloss, definition) - # form='{lang}' (either: field --under entry, sense, or - # pronunciation) - # Controlling this difference allows for things like getting an entry - # with a form in a particular language, and a gloss in a particular - # (other) language, and/or a tone description () in a - # particular (yet another) language. - # For now, I'm just going to assume people write meta descriptions - # in their primary gloss language. - # url here is a tuple with a base URL and a list of variable names - # that will be added to it later (and/or removed, if None). The URL - # should have each in {braces}, and the variable list each in - # 'quotes', as those are strings/names of variables, to be assigned - # values later. In any case, the names and number of arguments - # should match --except for duplicates in the URL, which - # should occur only once in the variable list. I'm keeping the order - # the same as far as possible, but that doesn't ultimately matter.""" - a=self.attribdict={} - a['template']={ - 'cm': "Give a prose description here", - 'url': (("url in the XML file, variables OK" - ),['guid','senseid','ps']), - 'attr': 'script'} - a['entry']= { - 'cm': 'use to get entries with a given guid or senseid', - 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']/.." - ),['guid','senseid']), - 'attr':'node'} - a['example']={ - 'cm': 'use to get examples with a given guid or senseid', - 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']/example" - ),['guid','senseid']), - 'attr':'node'} - a['examplebylocation']={ - 'cm': 'use to get examples with a given guid or senseid', - 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']/example" - "/field[@type='location']/form[text='{location}']/../.." - ),['guid','senseid','location']), - 'attr':'node'} - a['guidbyps']={ - 'cm': 'use to get guids of entries with a given ps', - 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - ),['guid','senseid','ps']), - 'attr':'guid'} - a['senseidbyps']={ - 'cm': 'use to get ids of senses with a given ps', - 'url':(("entry/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - ),['senseid','ps']), - 'attr':'id'} - a['guidwanyps']={ - 'cm': 'use to get guids of entries with any ps', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']/grammatical-info[@value]/../.." - ),['guid','analang','senseid']), - 'attr':'guid'} - a['senseidwanyps']={ - 'cm': 'use to get ids of senses with any ps', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']/grammatical-info[@value]/.." - ),['guid','analang','senseid']), - 'attr':'id'} - a['guidbypronfield']={ - 'cm': 'use to get guids of entries with fields at the ' - 'pronunciation level', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']/grammatical-info[@value='{ps}']/../.." - "/pronunciation" - "/trait[@name='location'][@value='{location}']/.." - "/form[@lang='{analang}']/.." - #lang could be any: - "/field[@type='{fieldtype}']/form[@lang='{lang}']/../../.." - ),['guid','analang','senseid','ps','location', - 'fieldtype','lang']), - 'attr':'guid'} - a['guidbypronfieldvalue']={ - 'cm': 'use to get guids of entries with fields at the ' - 'pronunciation level', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']/grammatical-info[@value='{ps}']/../.." - "/pronunciation" - "/trait[@name='location'][@value='{location}']/.." - "/form[@lang='{analang}']/.." - "/field[@type='{fieldtype}']" - "/form[@lang='{lang}'][text='{fieldvalue}']" - "/../../.." # ^ lang could be any - ),['guid','analang','senseid','ps','location', - 'fieldtype','lang','fieldvalue']), - 'attr':'guid'} - a['senseidbyexfieldvalue']={ - 'cm': 'use to get guids of entries with fields at the ' - 'example level', - 'url':(("entry[@guid='{guid}']" - # "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/example" - "/field[@type='location']" - "/form[@lang='{glosslang}'][text='{location}']" - "/../.." - "/field[@type='{fieldtype}']" - "/form[@lang='{glosslang}']" - "[text='{fieldvalue}']/../../.." - ),['guid','analang','senseid','ps','glosslang', - 'location','fieldtype','fieldvalue']), - 'attr':'id'} - a['guidbyexfieldvalue']={ - 'cm': 'use to get guids of entries with fields at the ' - 'example level', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense" - "/grammatical-info[@value='{ps}']/.." - "/example" - "/field[@type='location']" - "/form[@lang='{glosslang}'][text='{location}']" - "/../.." - "/field[@type='{fieldtype}']" - "/form[@lang='{glosslang}']" - "[text='{fieldvalue}']/../../../.." - ),['guid','analang','ps','location','glosslang', - 'fieldtype','fieldvalue']), - 'attr':'guid'} - a['guidbysensefield']={ - 'cm': 'use to get guids of entries with fields at the ' - 'sense level', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense" - "/grammatical-info[@value='{ps}']/.." - "/field[@type='{fieldtype}']/../.." - ),['guid','analang','ps','fieldtype']), - 'attr':'guid'} - a['guidbyentryfield']={ - 'cm': 'use to get guids of entries with fields at the ' - 'entry level', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/field[@type='{fieldtype}']/.." - ),['guid','analang','senseid','ps','fieldtype']), - 'attr':'guid'} - a['guidbylang']={ - 'cm': 'use to get guids of all entries with lexeme of a ' - 'given lang (or not)', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - ),['guid','analang']), - 'attr':'guid'} - a['guidbysenseid']={ - 'cm': 'use to get guids of sense with particular id', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']/.." - ),['guid','senseid']), - 'attr':'guid'} - a['guid']={ - 'cm': 'use to get guids of all entries (no qualifications)', - 'url':(("entry[@guid='{guid}']" - ),['guid']), - 'attr':'guid'} - a['senseid']={ - 'cm': 'use to get ids of all senses (no qualifications)', - 'url':(("entry" - "/sense[@id='{senseid}']" - ),['senseid']), - 'attr':'id'} - a['senseidbytoneUFgroup']={ - 'cm': 'use to get ids of all senses by tone group', - 'url':(("entry" - "/sense[@id='{senseid}']" - "/field[@type='{fieldtype}']" - "/form[@lang='{lang}'][text='{form}']/../.." - ),['senseid','fieldtype','lang','form']), - 'attr':'id'} - a['guidbylexeme']={ - 'cm': 'use to get guid by ps and lexeme in the specified ' - 'language (no reference to fields)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/lexical-unit" - "/form[@lang='{analang}'][text='{form}']" - "/../.." # ^ [.=’text'] not until python 3.7 - ),['guid','senseid','ps','analang','form']), - 'attr':'guid'} - a['guidbysense']={ - 'cm': 'use to get guid by ps and citation form in the ' - 'specified language (no reference to fields)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']/.." - ),['guid','senseid']), - 'attr':'guid'} - a['senseidbylexeme']={ - 'cm': 'use to get senseid by ps and lexeme in the ' - 'specified language (no reference to fields)', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit" - "/form[@lang='{analang}'][text='{form}']/../.." - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - ),['guid','analang','form','senseid','ps']), - 'attr':'id'} - a['guidbycitation']={ - 'cm': 'use to get guid by ps and citation form in the ' - 'specified language (no reference to fields)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/citation" - "/form[@lang=guid'{analang}'][text='{form}']" - "/../.." # ^ [].=’text'] not until python 3.7 - ),['guid','senseid','ps','analang','form']), - 'attr':'guid'} - a['toneUFfieldvalue']={ - 'cm': 'use to get tone UF values of all senses within the ' - 'constraints specified.', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/field[@type='{fieldtype}']" - "/form[@lang='{lang}']/text" - ),['guid','senseid','ps','fieldtype','lang']), - 'attr':'nodetext'} - a['lexemenode']={ - 'cm': 'use to get lexemes of all entries with a form ' - 'in the specified language (no reference to fields)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/lexical-unit/form[@lang='{analang}']" - ),['guid','senseid','ps','analang']), - 'attr':'node'} - a['lexeme']={ - 'cm': 'use to get lexemes of all entries with a form in ' - 'the specified language (no reference to fields)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/lexical-unit/form[@lang='{analang}']/text" - ),['guid','senseid','ps','analang']), - 'attr':'nodetext'} - a['citationnode']={ - 'cm': 'use to get citation forms of one or all entries ' - 'with a form in the specified language (no ' - 'reference to fields)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/citation/form[@lang='{analang}']" - ),['guid','senseid','ps','analang']), - 'attr':'node'} - a['citation']={ - 'cm': 'use to get citation forms of one or all entries ' - 'with a form in the specified language (no reference ' - 'to fields)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/citation/form[@lang='{analang}']/text" - ),['guid','senseid','ps','analang']), - 'attr':'nodetext'} - a['definitionnode']={ - 'cm': 'use to get definition nodes of entries', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/definition" - "/form[@lang='{glosslang}']" - ),['guid','senseid','ps','glosslang']), - 'attr':'node'} - a['definition']={ - 'cm': 'use to get definitions of entries', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/definition" - "/form[@lang='{glosslang}']/text" - ),['guid','senseid','ps','glosslang']), - 'attr':'nodetext'} - a['glossnode']={ - 'cm': 'use to get gloss nodes', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/gloss[@lang='{glosslang}']" - ),['guid','senseid','ps','glosslang']), - 'attr':'node'} - a['gloss']={ - 'cm': 'use to get glosses of entries', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/gloss[@lang='{glosslang}']/text" - ),['guid','senseid','ps','glosslang']), - 'attr':'nodetext'} - a['glossofexample']={ - 'cm': 'use to get glosses/translations of examples', - 'url':(("translation[@type='Frame translation']" - "/form[@lang='{glosslang}']/text" - ),['glosslang']), - 'attr':'nodetext'} - a['formofexample']={ - 'cm': 'use to get analang forms of examples', - 'url':(("form[@lang='{lang}']/text" - ),['lang']), - 'attr':'nodetext'} - a['fieldnode']={ - 'cm': 'use to get whole field nodes (to modify)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/field[@type='{fieldtype}']/form[@lang='{lang}']" - "/.." - ),['guid','senseid','ps','fieldtype','lang']), - 'attr':'node'} - a['fieldname']={ - 'cm': 'use to get value(s) for type of field in sense', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/field" - ),['guid','senseid','ps']), - 'attr':'type'} - a['fieldvalue']={ - 'cm': 'use to get value(s) for field(s) of a specified ' - '(or all) type(s) with a form in the specified (or ' - 'any) language for one or all entries (no ' - 'reference to fields, nor to lexeme form language)', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/field[@type='{fieldtype}']" - "/form[@lang='{lang}']/text" #This can be ANY lang. - ),['guid','senseid','ps','fieldtype','lang']), - 'attr':'nodetext'} - a['pronunciationbylocation']={ - 'cm': 'use to get value(s) for pronunciation information ' - 'for a given location', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/pronunciation" - "/trait[@name='location'][@value='{location}']" - "/../form[@lang='{analang}']/text" - ),['guid','senseid','ps','location','analang']), - 'attr':'nodetext'} - a['pronunciationfieldname']={ - 'cm': 'use to get value(s) for a field type of a specified ' - '(or not) location', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/pronunciation" - "/trait[@name='location'][@value='{location}']" - "/../field"),['guid','senseid','ps','location']), - 'attr':'type'} - a['pronunciationfieldvalue']={ - 'cm': 'use to get value(s) for <>', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/pronunciation" - "/trait[@name='location'][@value='{location}']/.." - "/field[@type='{fieldtype}']" - "/form[@lang='{lang}']/text" - ),['guid','senseid','ps','location','fieldtype', - 'lang']), #not necessarily glosslang or analang... - 'attr':'nodetext'} - a['exfieldvalue']={ - 'cm': 'use to get values of fields at the example level', - 'url':(("entry[@guid='{guid}']" - # "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/example" - "/field[@type='location']" - "/form[@lang='{glosslang}']" - "[text='{location}']/../.." - "/field[@type='{fieldtype}']" - "/form[@lang='{glosslang}']/text" - ),['guid','analang','senseid','ps','glosslang', - 'location','fieldtype']), - 'attr':'nodetext'} - a['examplewfieldlocvaluefromsense']={ - 'cm': 'use to get an example with a given tone/exfield ' - 'when you have the sense node.', - 'url':(("example/field[@type='location']" - "/form[text='{location}']/../.." - "/field[@type='{fieldtype}']" - "/form[text='{fieldvalue}']/../.." - ),['location','fieldtype','fieldvalue']), - 'attr':'nodetext'} - a['exfieldlocation']={ - 'cm': 'use to get location of fields at the example level', - 'url':(("entry[@guid='{guid}']" - "/lexical-unit/form[@lang='{analang}']/../.." - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/example" - "/field[@type='location']" - "/form[@lang='{glosslang}']/text" - ),['guid','analang','senseid','ps','glosslang']), - 'attr':'nodetext'} - a['pronunciationfieldlocation']={ - 'cm': 'use to get value(s) for pronunciation location' - '/context', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/pronunciation" - "/field[@type='{fieldtype}']/.." - "/trait[@name='location']" - ),['guid','senseid','ps','fieldtype']), - 'attr':'value'} - a['pronunciation']={ - 'cm': 'use to get value(s) for pronunciation in fields ' - 'with location specified', - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/pronunciation" - "/trait[@name='location'][@value='{location}']/.." - "/form[@lang='{glosslang}']/text" - ),['guid','senseid','ps','location','glosslang']), - 'attr':'nodetext'} - a['lexemelang']={ - 'cm': "analysis languages used in lexemes", - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/lexical-unit/form" - ),['guid','senseid','ps']), - 'attr': 'lang'} - a['citationlang']={ - 'cm': "analysis languages used in citation forms", - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/citation/form" - ),['guid','senseid','ps']), - 'attr': 'lang'} - a['pronunciationlang']={ - 'cm': "analysis languages used in citation forms", - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/../.." - "/pronunciation/form" - ),['guid','senseid','ps']), - 'attr': 'lang'} - a['glosslang']={ - 'cm': "gloss languages used in glosses", - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/gloss" - ),['guid','senseid','ps']), - 'attr': 'lang'} - a['defnlang']={ - 'cm': "gloss languages used in definitions", - 'url':(("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info[@value='{ps}']/.." - "/definition" - "/form"),['guid','senseid','ps']), - 'attr': 'lang'} - a['illustration']={ - 'cm': "Illustration by entry", - 'url': (("entry[@guid='{guid}']" - "/sense[@id='{senseid}']/illustration" - ),['guid','senseid','ps']), - 'attr': 'href'} - a['ps']={ - 'cm': "Part of speech, or grammatical category", - 'url': (("entry[@guid='{guid}']" - "/sense[@id='{senseid}']" - "/grammatical-info" - ),['guid','senseid']), - 'attr': 'value'} - def geturlnattr(self, attribute, **kwargs): - if attribute == 'attributes': - return self.attribdict.keys() - if attribute not in self.attribdict: - log.info("Sorry, {} isn't defined yet. This is what's " - "available:".format(attribute)) - for line in list(self.attribdict.keys()): - try: - log.info(' '.join(line, '\t',self.attribdict[line]['cm'])) - except: - log.info('{}\tUNDOCUMENTED?!?!'.format(line)) - log.info("This is where that key was called; fix it, and try again:") - """I should also add/remove pronunciation or other systematic things here""" - url=self.attribdict[attribute]['url'] - for field in url[1]: - if field not in kwargs: - kwargs[field]=None - url=(url[0],kwargs) - url=buildurl(url) - log.log(2,'After buildurl: {}'.format(url)) - url=removenone(url) - log.log(2,'After removenone: {}'.format(url)) - log.log(1,'Ori: {}'.format(self.attribdict[attribute]['url'])) - return {'url':url,'attr':self.attribdict[attribute]['attr']} - def get(self, attribute, **kwargs): - """kwargs are guid=None, senseid=None, analang=None, - glosslang=None, lang=None, ps=None, form=None, fieldtype=None, - location=None, fieldvalue=None, showurl=False):""" - """This needs to work when there are multiple languages, etc. - I think we should iterate over possibilities, if none are specified. - over lang, what else? - NB: this would create nested dictionaries... We need to be able to - access them consistently later.""" - """I need to be careful to not mix up lang=glosslang and lang=analang: - @lang should only be used here when referring to a field.""" - if attribute == 'Test': - from random import randint - guid=self.guids[randint(0, len(self.guids))-1] - log.info("Showing info for randomly selected *entry*: {}".format(guid)) - for attribute in self.geturlnattr('attributes'): - log.info('{}: {}'.format(attribute,self.get(attribute,guid=guid)))#,showurl=True - senseid=self.senseids[randint(0, len(self.senseids))-1] - log.info("Showing info for randomly selected *sense*: {}".format(senseid)) - for attribute in self.geturlnattr('attributes'): - log.info('{}: {}'.format(attribute,self.get(attribute,senseid=senseid)))#,showurl=True - return - log.log(3,'kwargs: {}'.format(kwargs)) - """pull output from urlnattr, where first is string with {}, second - is strings naming fields. convert those names to field values here, - once those fields/values have been defined. + def retarget(self,urlobj,target,showurl=False): + k=self.urlkey(urlobj.kwargs) + urlobj.kwargs['retarget']=target + k2=self.urlkey(urlobj.kwargs) + if k2 not in self.urls: + self.urls[k2]=copy.deepcopy(urlobj) + self.urls[k2].retarget(target) + if showurl: + log.info("URL for retarget: {}".format(self.urls[k2].url)) + return self.urls[k2] + def urlkey(self,kwargs): + kwargscopy=kwargs.copy() #for only differences that change the URL + kwargscopy.pop('showurl',False) + k=tuple(sorted([(str(x),str(y)) for (x,y) in kwargscopy.items()])) + return k + def get(self, target, node=None, **kwargs): + """This method calls for a URL object, if it's not already there. + Followup methods include + LiftURL.get() on that object: to get the desired text/attr/node + lift.retarget(): to move from one url to a parent (e.g., the sense + node containing a found example) before LiftURL.get() again + get entries: lift.get("entry").get() + lift.get("entry".get('guid')) + get senses: lift.get("sense").get() + lift.get("sense".get('senseid')) + get *all* pss: lift.get("ps").get('value') + Never ask for just the form! give the parent, to get a particular form: + lift.get("lexeme/form").get('text') + lift.get("example/form").get('text') + lift.get("example/translation/form").get('text') + lift.get("citation/form").get('text') + just 1 of each pss: dict.fromkeys(lift.get("ps").get('value')) + get tone value (from example): + lift.get("example/tonefield/form/text",location=location).get('text') + get tone value (from sense, UF): + lift.get('tonefield/form/text', senseid=senseid).get('text') + lift.get('sense/tonefield/form/text', senseid=senseid).get('text') + location: lift.get('locationfield', senseid=senseid).get('text') """ - """This is slightly faster than kwargs""" - # was: urlnattr=attributesettings(attribute,guid,senseid,analang, - # glosslang, lang,ps,form, - # fieldtype,location, - # fieldvalue=fieldvalue - urlnattr=self.geturlnattr(attribute, **kwargs) - url=urlnattr['url'] - if 'showurl' in kwargs and kwargs['showurl']==True: - log.info(url) - try: - nodeset=self.nodes.findall(url) #This is the only place we need self=lift - except BaseException as e: - log.error("Problem getting url: {} ({})".format(url,e)) - return - output=[] - attr=urlnattr['attr'] - for node in nodeset: - if attr == 'nodetext': - if node is not None: - log.log(1,"Returning node text") - output+=[node.text] - elif attr == 'node': - if node is not None: - log.log(1,"Returning whole node") - output+=[node] + if node is None: + node=self.nodes + what=kwargs.get('what','node') + path=kwargs.get('path',[]) + if type(path) is not list: + path=kwargs['path']=[path] + showurl=kwargs.get('showurl',False) + kwargs['target']=target # we want target to be kwarg for LiftURL + k=self.urlkey(kwargs) + if k not in self.urls: + self.urls[k]=LiftURL(base=node,**kwargs) #needs base and target to be sensible; attribute? + else: + log.info("URL key found, using: {} ({})".format(k,self.urls[k].url)) + if self.urls[k].base == node: + log.log(4,"Same base {}, moving on.".format(node)) else: - log.log(1,"Returning node attribute") - output+=[node.get(attr)] - return output + log.log(4,"Different base of same tag ({}; {}≠{}), rebasing." + "".format(node.tag,self.urls[k].base,node)) + self.urls[k].rebase(node) + if showurl: + log.info("Using URL {}".format(self.urls[k].url)) + return self.urls[k] #These are LiftURL objects def makenewguid(self): from random import randint log.info("Making a new unique guid") @@ -714,33 +204,36 @@ def addentry(self, showurl=False, **kwargs): self.getguids() self.getsenseids() return senseid - def modverificationnode(self,senseid,vtype,add=None,rm=None,addifrmd=False): - nodes=self.addverificationnode(senseid,vtype=vtype) - vf=nodes[0] - sensenode=nodes[1] - if vf is None: - log.info("Sorry, this didn't return a node: {}".format(senseid)) + def evaluatenode(self,node): + if node is None: + log.info("Sorry, this didn't return a node: {}".format(node)) return - if vf.text is not None: - log.log(2,"{}; {}".format(vf.text, type(vf.text))) + if node.text is not None: try: - l=ast.literal_eval(vf.text) + l=ast.literal_eval(node.text) except SyntaxError: #if the literal eval doesn't work, it's a string - l=vf.text - log.log(2,"{}; {}".format(l, type(l))) + l=node.text if type(l) != list: #in case eval worked, but didn't produce a list log.log(2,"One item: {}; {}".format(l, type(l))) l=[l,] else: log.log(2,"empty verification list found") l=list() + return l + def modverificationnode(self,senseid,vtype,add=None,rms=[],addifrmd=False): + nodes=self.addverificationnode(senseid,vtype=vtype) + vf=nodes[0] + sensenode=nodes[1] + l=self.evaluatenode(vf) + log.log(2,"{}; {}".format(vf.text, type(vf.text))) + log.log(2,"{}; {}".format(l, type(l))) changed=False - if rm != None and rm in l: - i=l.index(rm) #be ready to replace - l.remove(rm) - changed=True - else: - i=len(l) + i=len(l) + for rm in rms: + if rm != None and rm in l: + i=l.index(rm) #be ready to replace + l.remove(rm) + changed=True if (add != None and add not in l #if there, v-if rmd, or not changing and (addifrmd == False or changed == True)): l.insert(i,add) #put where removed from, if done. @@ -754,6 +247,19 @@ def modverificationnode(self,senseid,vtype,add=None,rm=None,addifrmd=False): sensenode.remove(vf) else: log.log(2,"Not removing empty node") + def getverificationnodevaluebyframe(self,senseid,vtype,frame): + nodes=self.addverificationnode(senseid,vtype=vtype) + vf=nodes[0] + # sensenode=nodes[1] + l=self.evaluatenode(vf) + log.info("text: {}; vf: {}".format(l,vf)) + values=[] + if l is not None: + for field in l: + log.info("field value: {}".format(field)) + if frame in field: + values.append(field) + return values def addverificationnode(self,senseid,vtype): node=self.getsensenode(senseid=senseid) if node is None: @@ -765,495 +271,114 @@ def addverificationnode(self,senseid,vtype): attrib={'type':"{} verification".format(vtype)}) return (vf,node) def getentrynode(self,senseid,showurl=False): - """Get the sense node""" - urlnattr=self.geturlnattr('entry',senseid=senseid) - url=urlnattr['url'] - if showurl==True: - log.info(url) - node=self.nodes.find(url) #this should always find just one node - return node + return self.get('entry',senseid=senseid).get() def getsensenode(self,senseid,showurl=False): - """Get the sense node""" - urlnattr=self.geturlnattr('senseid',senseid=senseid) - url=urlnattr['url'] - if showurl==True: - log.info(url) - node=self.nodes.find(url) #this should always find just one node - return node - def addexamplefields(self,showurl=False,**kwargs): - # ,guid,senseid,analang,glosslang,glosslang2,forms, - # fieldtype,location,fieldvalue,ps=None - # This fuction will add an XML node to the lift tree, like a new - # example field. - # The program should know before calling this, that there isn't - # already the relevant node --since it is agnostic of what is already - # there. + return self.get('sense',senseid=senseid).get()[0] + def addmodexamplefields(self,**kwargs): log.info(_("Adding values (in lift.py) : {}").format(kwargs)) - node=self.getsensenode(senseid=kwargs['senseid']) - if node is None: - log.info("Sorry, this didn't return a node: {}".format( - kwargs['senseid'])) - return - # Logic to check if this example already here - # This function returns a text node (from any one of a number of - # example nodes, which match form, gloss and location) containing a - # tone sorting value, or None (if no example nodes match form, gloss - # and location) - #We're adding a node to kwargs here. - exfieldvalue=self.exampleissameasnew(**kwargs,node=node,showurl=False) - if exfieldvalue is None: #If not already there, make it. + #These should always be there: + senseid=kwargs.get('senseid') + location=kwargs.get('location') + fieldtype=kwargs.get('fieldtype','tone') # needed? ever not 'tone'? + exfieldvalue=self.get("example/tonefield/form/text", senseid=senseid, + location=location).get('node') + # Set values for any duplicates, too. Don't leave inconsisted data. + tonevalue=kwargs.get('fieldvalue') #don't test for this above + if len(exfieldvalue) > 0: + for e in exfieldvalue: + e.text=tonevalue + else: #If not already there, make it. log.info("Didn't find that example already there, creating it...") - p=ET.SubElement(node, 'example') - form=ET.SubElement(p,'form',attrib={'lang':kwargs['analang']}) - t=ET.SubElement(form,'text') - t.text=kwargs['forms'][kwargs['analang']] + sensenode=self.getsensenode(senseid=senseid) + if sensenode is None: + log.info("Sorry, this didn't return a node: {}".format(senseid)) + return + analang=kwargs.get('analang') + db=kwargs.get('db') #This an object with values + forms=db.framed #because this should always be framed + glosslangs=db.glosslangs + p=Node(sensenode, tag='example') + p.makeformnode(analang,forms[analang]) """Until I have reason to do otherwise, I'm going to assume these fields are being filled in in the glosslang language.""" - fieldgloss=ET.SubElement(p,'translation',attrib={'type': - 'Frame translation'}) - for lang in [kwargs['glosslang'],kwargs['glosslang2']]: - if lang != None and lang in kwargs['forms']: - form=ET.SubElement(fieldgloss,'form', - attrib={'lang':lang}) - glosstext=ET.SubElement(form,'text') - glosstext.text=kwargs['forms'][lang] - exfield=ET.SubElement(p,'field', - attrib={'type':kwargs['fieldtype']}) - form=ET.SubElement(exfield,'form', - attrib={'lang':kwargs['glosslang']}) - exfieldvalue=ET.SubElement(form,'text') - locfield=ET.SubElement(p,'field',attrib={'type':'location'}) - form=ET.SubElement(locfield,'form', - attrib={'lang':kwargs['glosslang']}) - fieldlocation=ET.SubElement(form,'text') - fieldlocation.text=kwargs['location'] - else: - log.debug("=> Found that example already there") - exfieldvalue.text=kwargs['fieldvalue'] #change this *one* value, either way. - if 'guid' in kwargs: - self.updatemoddatetime(guid=kwargs['guid'],senseid=kwargs['senseid']) - else: - self.updatemoddatetime(senseid=kwargs['senseid']) - if self.debug == True: - log.info("add langform: {}".format(kwargs['forms'][kwargs['analang']])) - log.info("add tone: {}".format(['fieldvalue'])) - log.info("add gloss: {}".format(kwargs['forms'][kwargs['glosslang']])) - if glosslang2 != None: - log.info(' '.join("add gloss2:", kwargs['forms'][kwargs['glosslang2']])) + fieldgloss=Node(p,'translation',attrib={'type':'Frame translation'}) + for lang in glosslangs: + if lang in forms: + fieldgloss.makeformnode(lang,forms[lang]) + exfieldvalue=p.makefieldnode(fieldtype,glosslangs[0],text=tonevalue, + gimmetext=True) + p.makefieldnode('location',glosslangs[0],text=location) + self.write() + self.updatemoddatetime(senseid=senseid) def forminnode(self,node,value): - # Returns True if `value` is in *any* text node child of any form child - # of node: [node/'form'/'text' = value] + """Returns True if `value` is in *any* text node child of any form child + of node: [node/'form'/'text' = value] Is this needed?""" for f in node.findall('form'): for t in f.findall('text'): if t.text == value: return True return False def convertalltodecomposed(self): + """Do we want/need this? Not using anywhere...""" for form in self.nodes.findall('.//form'): if form.get('lang') in self.analangs: for t in form.findall('.//text'): t.text=rx.makeprecomposed(t.text) - def exampleisnotsameasnew(self, showurl=False, **kwargs): - # guid,senseid,analang, glosslang, glosslang2, forms, fieldtype, - # location,fieldvalue,example,ps=None, - # """This checks all the above information, to see if we're dealing with - # the same example or not. Stop and return nothing at first node that - # doesn't match (from form, translation and location). If they all match, - # then return the tone value node to change.""" - tonevalue='' # set now, will change later, or not... - log.log(2,"Looking for bits that don't match") - if kwargs['example'] == None: - log.info("Hey! You gave me an empty example!") - return - for node in kwargs['example']: - try: - log.log(2,'Node: {} ; {}'.format(node.tag, - node.find('.//text').text)) - except: - log.log(2,'Node: {} ; Likely no text node!'.format(node.tag)) - if (node.tag == 'form'): - if ((node.get('lang') == kwargs['analang']) - and (node.find('text').text != kwargs['forms'][kwargs['analang']])): - log.log(2,'{} == {}; {} != {}'.format(node.get('lang'), - kwargs['analang'], node.find('text').text, kwargs['forms'][kwargs['analang']])) - return - elif ((node.tag == 'translation') and - (node.get('type') == 'Frame translation')): - for form in node.find('form'): - if (((form.get('lang') == kwargs['analang']) and - (not self.forminnode(node, - kwargs['forms'][kwargs['analang']]))) or - (('glosslang2' in kwargs) and - (kwargs['glosslang2'] != None) and - (form.get('lang') == kwargs['glosslang2']) and - (not self.forminnode(node, - kwargs['forms'][kwargs['glosslang2']])))): - log.log(2,'translation {} != {}'.format( - node.find('form/text').text, kwargs['forms'])) - return - elif (node.tag == 'field'): - if (node.get('type') == 'location'): - if not self.forminnode(node,kwargs['location']): - log.log(2,'location {} not in {}'.format( - kwargs['location'],node)) - return - if (node.get('type') == 'tone'): - for form in node: - if ((form.get('lang') == kwargs['glosslang']) - or (form.get('lang') == kwargs['glosslang2'])): - """This is set once per example, since this - function runs on an example node""" - tonevalue=form.find('text') - log.debug('tone value found: {}'.format( - tonevalue.text)) - else: - log.log(2,"Not the same lang for tone form: {}" - "".format(form.get('lang'))) - return - else: - log.debug("Not sure what kind of node I'm dealing with! ({})" - "".format(node.tag)) - return tonevalue - def exampleissameasnew(self,showurl=False, **kwargs): - # ,guid,senseid,analang, glosslang,glosslang2,forms, fieldtype, - # location,fieldvalue,node,ps=None - """This looks for any example in the given sense node, with the same - form, gloss, and location values""" - log.info('Looking for an example node matching these form and gloss' - 'elements: {}'.format(kwargs['forms'])) - examples=kwargs['node'].findall('example') - for example in examples: - log.info(_("Looking at example {} ({} of {})").format(example, - examples.index(example), len(examples))) - valuenode=self.exampleisnotsameasnew(**kwargs, example=example - # guid,senseid,analang, - # glosslang,glosslang2,forms, - # fieldtype, - # location,fieldvalue,example,ps=None - ,showurl=False) - if type(valuenode) is ET.Element: #None: #i.e., they *are* the same node - log.info(_("Found it! {}: {}".format(type(valuenode),valuenode.text))) - return valuenode #if you find the example, we're done looking - else: #if not, just keep looking, at next example node - log.debug('=> This is not the example we are looking ' - 'for: {}'.format(valuenode)) def findduplicateforms(self): + """This removes duplicate form nodes in lx or lc nodes, not much point. + """ + dup=False for entry in self.nodes: - for node in entry: - if ((node.tag == 'lexical-unit') or (node.tag == 'citation')): - forms=node.findall('form') - removed=list() - for form in forms: - f1i=forms.index(form) - for form2 in forms: - f2i=forms.index(form2) - if (f2i <= f1i) or (form2 in removed): - log.log(3,"{} <= {} or form {} already removed; " - "continuing.".format(f2i,f2i,f1i)) - continue - if (form.get('lang') == form2.get('lang') and - form.find('text').text == form2.find('text').text): - log.debug("Found {} {} {}".format(f1i, - form.get('lang'), - form.find('text').text, - )) - log.debug("Found {} {} {}".format(f2i, - form2.get('lang'), - form2.find('text').text - )) - log.debug("Removing form {} {} {}".format(f2i, - form2.get('lang'), - form2.find('text').text - )) - node.remove(form2) - removed.append(form2) - else: - log.log(3,"Not removing form") - self.write() + formparents=entry.findall('lexical-unit')+entry.findall('citation') + for fp in formparents: + for lang in self.analangs: + forms=fp.findall('form[@lang="{}"]'.format(lang)) + if len(forms) >1: + log.info("Found multiple form fields in an entry: {}" + "".format([x.find('text').text for x in forms])) + dup=True + if not dup: + log.info("No duplicate form fields were found in the lexicon.") def findduplicateexamples(self): - def getexdict(example): - analangs=[] - otheranalangs=[] - glosslangs=[] - forms={} - analang='' - glosslang='' - glosslang2='' - location='' - tonevalue='' - log.log(3,"Working on example {} (this sense: {})".format(exn,exindex)) - formnodes=example.findall('form') #multiple/sense - translationformnodes=example.findall( - "translation[@type='Frame translation']/form") #just one/sense - if formnodes != None: - for formnode in formnodes: - lang=formnode.get('lang') - formnodetext=formnode.find('text') - if formnodetext is not None: - analangs.append(lang) - forms[lang]=formnodetext.text - else: - log.log(3,"No formnodetext! (lang: {})".format(lang)) - if analangs != []: - log.log(2,"Analangs: {}".format(analangs)) - analang=analangs[0] - if len(analangs) >1: - otheranalangs=analangs[1:] - else: - analang=None - else: - log.error("No form node!") - if translationformnodes != None: - for formnode in translationformnodes: - lang=formnode.get('lang') - formnodetext=formnode.find('text') - if formnodetext != None: - glosslangs.append(lang) - forms[lang]=formnodetext.text - else: - log.log(4,"No glossformnodetext! ({}; lang: {})".format( - lang,formnodetext)) - log.log(2,"glosslangs: {}".format(glosslangs)) - if len(glosslangs) >1: - glosslang2=glosslangs[1] - else: - glosslang2=None - log.log(3,"No glosslang2formnodetext; hope that's OK!") - if len(glosslangs) >0: - glosslang=glosslangs[0] - else: - glosslang=None - log.log(4,"No glosslangformnodetext!") - else: - log.log(4,"No translation node!") - #We don't care about form[@lang] for these: - locationnode=example.find("field[@type='location']/form/text") - valuenode=example.find("field[@type='tone']/form/text") - if locationnode != None: - location=locationnode.text - else: - location=None - if valuenode != None: - tonevalue=valuenode.text - else: - tonevalue='' #this means an empty/missing tone node only. - return (forms, analang, glosslang, glosslang2, location, tonevalue, - otheranalangs) - def compare(x,y): - same=0 - for i in range(len(x)): - if x[i] == y[i]: - log.log(2,"{} =".format(x[i])) - log.log(2,"{}".format(y[i])) - same+=1 - else: - log.log(2,"{} ≠".format(x[i])) - log.log(2,"{}!".format(y[i])) - same=False - return same - forms={} - entries=self.nodes.findall('entry') - exn=0 - for entry in entries: - entindex=entries.index(entry) - log.log(2,"Working on entry {}: {}".format(entindex, - entry.get('guid'))) - senses=entry.findall('sense') - for sense in senses: - senseindex=senses.index(sense) - log.log(3,"Working on sense {}: {}".format(senseindex, - sense.get('id'))) - examples=sense.findall('example') - for example in examples: - #If empty node, remove it - if len(example) == 0: - log.info("Deleting Empty example in {}".format( - entry.get('guid'))) - sense.remove(example) - continue - exn+=1 - exindex=list(examples).index(example) - ex1=getexdict(example) - if ex1[1] == None: - log.info("No analang found for example {} in {}" - "".format(ex2index,sense.get('id'))) - continue - for example2 in examples: - ex2index=list(examples).index(example2) - if exindex >= ex2index: - log.log(2,"Comparing example {} with example {}; " - "skipping.".format(exindex,ex2index)) - continue - else: - log.log(2,"Comparing example {} with example {}." - "".format(exindex,ex2index)) - #here we replicate/skip exampleissameasnew - ex2=getexdict(example2) - if ex2[1] == None: - log.info("No analang found for example {} in {}" - "".format(ex2index,sense.get('id'))) - continue - othertonevalue=self.exampleisnotsameasnew( - node=sense, - example=example2, - forms=ex1[0], - analang=ex1[1], - glosslang=ex1[2], - glosslang2=ex1[3], - location=ex1[4] - # ,showurl=True - ) - if othertonevalue == None: - log.log(2,"No same node found!") - elif not ((othertonevalue == ex1[5]) or - ((type(othertonevalue) is ET.Element) and - (othertonevalue.text == ex1[5]))): - try: - log.log(3,"Same node, different value! ({}!={})" - "; copying over {}?{}".format( - othertonevalue.text, - ex1[5],type(othertonevalue.text), - type(ex1[5]))) - if ((othertonevalue.text == '') or - (othertonevalue.text == None)): - log.log(3,"empty othertonevalue node") - othertonevalue.text == ex1[5] - ex2=getexdict(example) - elif ((ex1[5] == '') or (ex1[5] == None)): - log.log(3,"empty ex1 tonevalue node") - t=example.find("field[@type='tone']/form/" - "text") - if t is not None: - log.log(2,"tone node there, adding " - "tonevalue") - t.text=othertonevalue.text - else: - log.log(2,"No tone node there, adding") - fld=ET.SubElement(example,'field', - attrib={'type':'tone'}) - f=ET.SubElement(fld,'form', - attrib={'lang':ex1[2]}) - g=ET.SubElement(f,'text') - g.text=othertonevalue.text - ex1=getexdict(example) - else: - log.error("Huh? tonevalue nodes: {}; {} in " - "{}".format(othertonevalue, - ex1[5], sense.get('id'))) - except: - log.log(2,"Same text, different value! ({}!={})" - "; copying over {}?{}".format( - othertonevalue, - ex1[5],type(othertonevalue), - type(ex1[5]))) - if (ex2[5] == '') or (ex2[5] == None): - log.log(2,"empty ex2 tonevalue node") - t=example2.find("field[@type='tone']/" - "form/text") - if t is not None: - t.text=ex1[5] - else: - fld=ET.SubElement(example2,'field', - attrib={'type':'tone'}) - f=ET.SubElement(fld,'form', - attrib={'lang':ex1[2]}) - g=ET.SubElement(f,'text') - g.text=ex1[5] - ex2=getexdict(example) - else: - log.error("Huh? tonevalues: {}; {} in {}" - "".format(othertonevalue, - ex1[5], sense.get('id'))) - compare(ex1,ex2) #This compares tuples - if (othertonevalue != None) and (ex2[5] == ex1[5]): - try: - log.log(2,"Same node, same value! ({}={}-{})" - "".format( - ex1[5],othertonevalue.text,ex2[5])) - except: - log.log(2,"Same text, same value! ({}={}-{})" - "".format( - ex1[5],othertonevalue,ex2[5])) - compare(ex1,ex2) #This compares tuples - if (ex1[6] == []) and (ex2[6] == []): - log.info("No second analang; removing second " - "example from {}.".format(sense.get('id'))) - if example2 in sense: - sense.remove(example2) - for lang in ex1[6]: - if lang in ex2[6]: - log.log(2,"Language {} found in both " - "examples.".format(lang)) - if ex1[0][lang] == ex2[0][lang]: - log.log(2,"Language {} content same in " - "both examples.".format(lang)) - if example2 in sense: - try: - log.info("Removing second " - "example from {}." - "".format( - sense.get('id'))) - except: - log.info("Removing second " - "example from {}".format( - sense.get('id'))) - sense.remove(example2) - else: - #Don't remove here - log.log(3,"Language {} content " - "DIFFERENT".format(lang)) - else: - if example2 in sense: - try: - log.info("Removing second example " - "from {}".format( - sense.get('id'))) - except: - log.info("Removing second example " - "from {}".format( - sense.get('id'))) - sense.remove(example2) - for lang in ex2[6]: - if lang not in ex1[6]: - if example in sense: - try: - log.info("Removing first example." - "from {}".format( - sense.get('id'))) - except: - log.info("Removing first example." - "from {}".format( - sense.get('id'))) - sense.remove(example) - self.write() + dup=False + senses=self.nodes.findall('entry/sense') + for sense in senses: + for l in self.locations: + examples=sense.findall('example/field[@type="location"]/' + 'form[text="{}"]/../..'.format(l)) #'senselocations' + if len(examples)>1: + log.error("Found multiple/duplicate examples (of the same " + "location ({}) in the same sense: {})" + "".format(l,[x.find('form/text').text for x in examples])) + """Before implementing the following, we need a test for + presence of audio file link, and different tone values, + including which to preserve if different (i.e., not '')""" + # for e in examples[1:]: + # sense.remove(e) + dup=True + if dup: + pass #not yet: self.write() + else: + log.info("No duplicate examples (same sense and location) were " + "found in the lexicon.") def addtoneUF(self,senseid,group,analang,guid=None,showurl=False): - urlnattr=self.geturlnattr('senseid',senseid=senseid) #give the sense. - url=urlnattr['url'] - if showurl==True: - log.info(url) - node=self.nodes.find(url) #this should always find just one node - if node is None: - log.info(' '.join("Sorry, this didn't return a node:",guid,senseid)) + node=self.get('sense',senseid=senseid).get() #give the sense. + if node == []: + log.info("Sorry, this didn't return a node: guid {}; senseid {}" + "".format(guid,senseid)) return - t=None - for field in node.findall('field'): - if field.get('type') == 'tone': - f=field.findall('form') - f2=field.find('form') - t=f2.find('text') - for fs in f: - t2=fs.find('text') - if t is None: - p=ET.SubElement(node, 'field',attrib={'type':'tone'}) - f=ET.SubElement(p,'form',attrib={'lang':analang}) - t=ET.SubElement(f,'text') - t.text=group + t=self.get('field/form/text',node=node[0],ftype='tone').get() + if t == []: + p=Node(node[0],'field',attrib={'type':'tone'}) + p.makeformnode(analang,text=group) + else: + t[0].text=group self.updatemoddatetime(guid=guid,senseid=senseid) - # self.write() + self.write() """ toneinfo for sense. """ - def addmediafields(self,node, url,lang, showurl=False):#lang=Check.audiolang + def addmediafields(self,node, url,lang, showurl=False): """This fuction will add an XML node to the lift tree, like a new example field.""" """The program should know before calling this, that there isn't @@ -1262,63 +387,51 @@ def addmediafields(self,node, url,lang, showurl=False):#lang=Check.audiolang log.info("Adding {} value to {} location".format(url,node)) possibles=node.findall("form[@lang='{lang}']/text".format(lang=lang)) for possible in possibles: - log.debug(possibles.index(possible)) + log.debug("Checking possible: {} (index: {})".format(possible, + possibles.index(possible))) if hasattr(possible,'text'): if possible.text == url: log.debug("This one is already here; not adding.") return - form=ET.SubElement(node,'form',attrib={'lang':lang}) - t=ET.SubElement(form,'text') - t.text=url + form=Node(node,'form',attrib={'lang':lang}) + t=form.maketextnode(text=url) prettyprint(node) """Can't really do this without knowing what entry or sense I'm in...""" self.write() def addmodcitationfields(self,entry,langform,lang): citation=entry.find('citation') if citation is None: - citation=ET.SubElement(entry, 'citation') - form=citation.find("form[@lang='{lang}']".format(lang=lang)) - if form is None: - form=ET.SubElement(citation,'form',attrib={'lang':lang}) - t=form.find('text') - if t is None: - t=ET.SubElement(form,'text') - t.text=langform - def addpronunciationfields(self,guid,senseid,analang, - glosslang,glosslang2, - lang,forms, - # langform,glossform,gloss2form, - fieldtype, - location,fieldvalue,ps=None,showurl=False): + citation=Node(entry, 'citation') + citation.makeformnode(lang=lang,text=langform) + def addpronunciationfields(self,**kwargs): """This fuction will add an XML node to the lift tree, like a new pronunciation field.""" """The program should know before calling this, that there isn't already the relevant node.""" - # urlnattr=attributesettings(attribute,guid,analang,glosslang,lang,ps,form, - # fieldtype,location) - urlnattr=self.geturlnattr('guid',guid=guid) #just give me the entry. - url=urlnattr['url'] - if showurl==True: - log.info(url) - node=self.nodes.find(url) #this should always find just one node - # nodes=self.nodes.findall(url) #this is a list - p=ET.SubElement(node, 'pronunciation') - form=ET.SubElement(p,'form',attrib={'lang':analang}) - t=ET.SubElement(form,'text') - t.text=langform - field=ET.SubElement(p,'field',attrib={'type':fieldtype}) - form=ET.SubElement(field,'form',attrib={'lang':lang}) - t2=ET.SubElement(form,'text') - t2.text=fieldvalue - fieldgloss=ET.SubElement(p,'field',attrib={'type':'gloss'}) - form=ET.SubElement(fieldgloss,'form',attrib={'lang':glosslang}) - t3=ET.SubElement(form,'text') - t3.text=glossform - trait=ET.SubElement(p,'trait',attrib={'name':'location', 'value':location}) - self.updatemoddatetime(guid=guid,senseid=senseid) - # self.write() + guid=kwargs.get('guid',None) + senseid=kwargs.get('senseid',None) + if guid is not None: + node=self.get('entry',guid=guid,showurl=True).get()[0] + elif senseid is not None: + node=self.get('entry',senseid=senseid,showurl=True).get()[0] + analang=kwargs.get('analang') + glosslang=kwargs.get('glosslang') + langform=kwargs.get('langform') + glossform=kwargs.get('glossform') + fieldtype=kwargs.get('fieldtype','tone') + fieldvalue=kwargs.get('fieldvalue') + location=kwargs.get('location') + p=Node(node, 'pronunciation') + p.makeformnode(lang=analang,text=langform) + p.makefieldnode(type=fieldtype,lang=glosslang,text=fieldvalue) + p.makefieldnode(type='gloss',lang=glosslang,text=glossform) + p.maketraitnode(type='location',value=location) + if senseid is not None: + self.updatemoddatetime(senseid=senseid) + elif guid is not None: + self.updatemoddatetime(guid=guid) + self.write() """End here:""" #build up, or down? - #node.append('pronunciation') """
dìve
@@ -1333,74 +446,19 @@ def addpronunciationfields(self,guid,senseid,analang,
""" - def rmexfields(self,guid=None,senseid=None,analang=None, - glosslang=None,langform=None,glossform=None,fieldtype=None, - location=None,fieldvalue=None,ps=None,showurl=False): - #We need fieldvalue here to be able to remove 'NA'. - urlnattr=self.geturlnattr('senseid',senseid=senseid) #just give me the sense. - url=urlnattr['url'] - if showurl==True: - log.info(url) - node=self.nodes.find(url) #this should always find just one node - log.debug("removing LIFT fields location={},fieldtype={},fieldvalue={}" - "".format(location,fieldtype,fieldvalue)) - urlnattr2=self.geturlnattr('examplewfieldlocvaluefromsense', - location=location, - fieldtype=fieldtype,fieldvalue=fieldvalue - ) - url2=urlnattr2['url'] - if showurl==True: - log.info("url for examples: {} (n={}".format(url2,len(node.findall( - url2)))) - for example in node.findall(url2): - node.remove(example) - # """
1
""" - # """
Plural
""" - # for child in node: - # print (child.tag, child.attrib) - # for child in example: - # print ('child:',child.tag, child.attrib, child.text) - # for grandchild in child: - # print ('grandchild:',grandchild.tag, grandchild.attrib, grandchild.text) - # log.info("Continuing on to the next example node now:") - # log.info("Continuing again to the next example node now:") - self.updatemoddatetime(guid=guid,senseid=senseid) - # self.write() - def updateexfieldvalue(self,guid=None,senseid=None,analang=None, - glosslang=None,langform=None,glossform=None,fieldtype=None, - location=None,fieldvalue=None,ps=None, - newfieldvalue=None,showurl=False): - """This updates the fieldvalue, based on current value. It assumes - there is a field already there; use addexamplefields if not""" - urlnattr=self.geturlnattr('exfieldvalue',senseid=senseid, - fieldtype=fieldtype, - location=location, - fieldvalue=fieldvalue - ) #just give me the sense. - url=urlnattr['url'] - if showurl==True: - log.info(url) - node=self.nodes.find(url) #this should always find just one node - # for value in node.findall(f"field[@type=location]/" - # f"form[text='{location}']" - # f"[@type='{fieldtype}']/" - # f"form[text='{fieldvalue}']/text"): - # # """
1
""" - # # """
Plural
""" - node.text=newfieldvalue #remove(example) - self.updatemoddatetime(guid=guid,senseid=senseid) - # self.write() - def updatemoddatetime(self,guid=None,senseid=None,analang=None, - glosslang=None,langform=None,glossform=None,fieldtype=None, - location=None,fieldvalue=None,ps=None, - newfieldvalue=None,showurl=False): + def updatemoddatetime(self,guid=None,senseid=None): """This updates the fieldvalue, ignorant of current value.""" - urlnattr=self.geturlnattr('entry',guid=guid,senseid=senseid) #just entry - url=urlnattr['url'] - if showurl==True: - log.info(url) - node=self.nodes.find(url) #this should always find just one node - node.attrib['dateModified']=getnow() + if senseid is not None: + surl=self.get('sense',senseid=senseid) #url object + for s in surl.get(): + s.attrib['dateModified']=getnow() #node + eurl=self.get('entry',senseid=senseid) #url object + for e in eurl.get(): + e.attrib['dateModified']=getnow() #node + elif guid is not None: #only if no senseid given + for e in self.get('entry',guid=guid).get(): + e.attrib['dateModified']=getnow() + self.write() def read(self): """this parses the lift file into an entire ElementTree tree, for reading or writing the LIFT file.""" @@ -1469,114 +527,57 @@ def write(self,filename=None): pathlib.Path(filename).name, pathlib.Path(filename).parent, os.listdir(pathlib.Path(filename).parent))) def analangs(self): - log.log(1,_("Looking for analangs in lift file")) + """These are ordered by frequency in the database""" self.audiolangs=[] self.analangs=[] - possibles=list(dict.fromkeys(self.get('lexemelang')+self.get('citation' - 'lang')+self.get('pronunciationlang'))) - log.info(_("Possible analysis language codes found: {}".format(possibles))) - for glang in ['fr','en']: - if glang in possibles: - for form in ['citation','lexeme']: - gforms=self.get(form,analang=glang) - if 0< len(gforms): - log.info("LWC lang {} found in {} field: {}".format( - glang,form,self.get(form,analang=glang))) - """For Saxwe, and others who have fr or en encoding errors""" - if len(gforms) <= 10: - log.info("Only {} examples of LWC lang {} found " - "in {} field; is this correct?".format( - len(gforms),glang,form)) - # possibles.remove(glang) #not anymore - for lang in possibles: + lxl=self.get('lexeme/form').get('lang') + lcl=self.get('citation/form').get('lang') + pronl=self.get('pronunciation/form').get('lang') + langsbycount=collections.Counter(lxl+lcl+pronl) + self.analangs=[i[0] for i in langsbycount.most_common()] + log.info(_("Possible analysis language codes found: {}".format( + self.analangs))) + for glang in set(['fr','en']) & set(self.analangs): + c=langsbycount[glang] + if 0< c: + """For Saxwe, and others who have fr or en encoding errors""" + if c <= 10: + log.info("Only {} examples of LWC lang {} found; is this " + "correct?".format(c,glang)) + for lang in self.analangs: if 'audio' in lang: log.debug(_("Audio language {} found.".format(lang))) self.audiolangs+=[lang] - else: - self.analangs+=[lang] + self.analangs.remove(lang) if self.audiolangs == []: log.debug(_('No audio languages found in Database; creating one ' 'for each analysis language.')) for self.analang in self.analangs: self.audiolangs+=[f'{self.analang}-Zxxx-x-audio'] - log.debug('Audio languages: {}'.format(self.audiolangs)) - log.debug('Analysis languages: {}'.format(self.analangs)) + log.info('Audio languages: {}'.format(self.audiolangs)) + log.info('Analysis languages: {}'.format(self.analangs)) def glosslangs(self): - self.glosslangs=list(dict.fromkeys(self.get('glosslang')+self.get( - 'defnlang'))) - log.debug(_("gloss languages found: {}".format(self.glosslangs))) - def glossordefn(self,guid=None,senseid=None,lang='ALL',ps=None - ,showurl=False): - if lang == None: #This allows for a specified None='give me nothing' - return - elif lang == 'ALL': - lang=None #this is how the script gives all, irrespective of lang. - forms=self.get('gloss',guid=guid,senseid=senseid,glosslang=lang,ps=ps, - showurl=showurl) #,showurl=True - if forms == []: - formsd=self.get('definition',guid=guid,senseid=senseid, - glosslang=lang, - showurl=showurl) - forms=list() - for form in formsd: - forms.append(rx.glossifydefn(form)) - return forms - def citationorlexeme(self,guid=None,senseid=None,lang=None,ps=None - ,showurl=False): - """I think this was a nice idea, but unnecessary; ability to use guid - or senseid is more important.""" - # if guid is None: - # try: - # for guid in self.guidsvalidwps: - # return self.citationorlexeme(guid=guid,senseid=senseid, - # lang=lang,ps=ps, - # showurl=showurl) - # except: - # for guid in self.guids: - # return self.citationorlexeme(guid=guid,senseid=senseid, - # lang=lang,ps=ps, - # showurl=showurl) - # else: - forms=self.get('citation',guid=guid,senseid=senseid,analang=lang, - showurl=showurl, - ps=ps - ) #,showurl=True - if forms == []: #for the whole db this will not work if even one gloss is filled out - forms=self.get('lexeme',guid=guid,senseid=senseid,analang=lang, - showurl=showurl, - ps=ps - ) - return forms - def fields(self,guid=None,lang=None): #get all the field types in a given entry - return list(dict.fromkeys(self.get('fieldname',guid=guid,lang=lang)))#nfields=0 - def getsenseids(self): #get the number entries in a lift file. - self.senseids=self.get('senseid') #,showurl=True - self.nsenseids=len(self.senseids) #,guid,lang,fieldtype,location - def getguids(self): #get the number entries in a lift file. - self.guids=self.get('guid') #,showurl=True - self.nguids=len(self.guids) #,guid,lang,fieldtype,location + """These are ordered by frequency in the database""" + g=self.get('gloss').get('lang') + d=self.get('definition/form').get('lang') + self.glosslangs=[i[0] for i in collections.Counter(g+d).most_common()] + log.info(_("gloss languages found: {}".format(self.glosslangs))) + def fields(self,guid=None,lang=None): # all field types in a given entry + f=list(dict.fromkeys(self.get('field').get('type'))) + return f + def getlocations(self,guid=None,lang=None): # all field locations in a given entry + l=list(dict.fromkeys(self.get('example/locationfield').get('text'))) + log.info('Locations found in Examples: {}'.format(l)) + return l + def getsenseids(self): + self.senseids=self.get('sense').get('senseid') + self.nsenseids=len(self.senseids) + def getguids(self): + self.guids=self.get('entry').get('guid') + self.nguids=len(self.guids) + """Set up""" def nc(self): nounclasses="1 2 3 4 5 6 7 8 9 10 11 12 13 14" - # def nlist(self): #This variable gives lists, to iterate over. - # # prenasalized=['mb','mp','mbh','mv','mf','nd','ndz','ndj','nt','ndh','ng','ŋg','ŋg','nk','ngb','npk','ngy','nj','nch','ns','nz'] #(graphs that preceede a consonant) - # ntri=["ng'"] - # ndi=['mm','ny','ŋŋ'] - # nm=['m','m','M','n','n','ŋ','ŋ','ɲ'] - # nasals=ntri+ndi+nm - # actuals={} - # for lang in self.analangs: - # unsorted=self.inxyz(lang,nasals) - # """Make digraphs appear first, so they are matched if present""" - # actuals[lang]=sorted(unsorted,key=len, reverse=True) - # return actuals - # def glist(self): #This variable gives lists, to iterate over. - # glides=['ẅ','y','Y','w','W'] - # actuals={} - # for lang in self.analangs: - # unsorted=self.inxyz(lang,glides) #remove the symbols which are not in the data. - # """Make digraphs appear first, so they are matched if present""" - # actuals[lang]=sorted(unsorted,key=len, reverse=True) - # return actuals def clist(self): #This variable gives lists, to iterate over. log.log(2,"Creating CV lists from scratch") """These are all possible forms, that I have ever run across. @@ -1637,37 +638,18 @@ def clist(self): #This variable gives lists, to iterate over. x[dconsvar]+=c[stype][nglyphs] else: x[consvar]+=c[stype][nglyphs] - # s['g']={} - # x['NC']=['mbh','ndz','ndj','ndh','ngb','npk','ngy','nch','mb','mp', - # 'mv','mf','nd','nt','ng','ŋg','ŋg','nk','nj','ns','nz'] x['ʔ']=['ʔ', "ꞌ", #Latin Small Letter Saltillo "'", #Tag Apostrophe 'ʼ' #modifier letter apostrophe ] x['G']=['ẅ','y','Y','w','W'] - # x['CG']=list((char+g for char in x['C'] for g in x['G'])) x['N']=['m','M','n','ŋ','ɲ','ɱ'] #'N', messed with profiles x['Ndg']=['mm','ŋŋ','ny'] x['Ntg']=["ng'"] - # x['NC']=list((n+char for char in x['C'] for n in x['N'])) - # x['NCG']=list((n+char+g for char in x['C'] for n in x['N'] - # for g in x['G'])) """Non-Nasal/Glide Sonorants""" x['S']=['l','r'] x['Sdg']=['rh','wh'] - # x['CS']=list((char+s for char in x['C'] for s in x['S'])) - # x['NCS']=list((n+char+s for char in x['C'] for n in x['N'] - # for s in x['S'])) - # self.treatlabializepalatalizedasC=False - # if self.treatlabializepalatalizedasC==True: - # lp={} - # lp['lab']=list(char+'w' for char in c) - # lp['pal']=list(char+'y' for char in c) - # lp['labpal']=list(char+'y' for char in lp['lab']) - # lp['labpal']+=list(char+'w' for char in lp['pal']) - # for stype in sorted(lp.keys()): #larger graphs first - # c=lp[stype]+c x['V']=[ #decomposed first: #tilde (decomposed): @@ -1716,112 +698,103 @@ def clist(self): #This variable gives lists, to iterate over. self.s[lang][stype]=rx.inxyz(self,lang,x[stype]) log.debug('hypotheticals[{}][{}]: {}'.format(lang,stype, str(x[stype]))) - log.debug('actuals[{}][{}]: {}'.format(lang,stype, + log.debug('Actual Segments found [{}][{}]: {}'.format(lang,stype, str(self.s[lang][stype]))) + log.info('Actual Segments found: {}'.format(self.s)) def slists(self): self.segmentsnotinregexes={} self.clist() - # def segmentin(self, lang, glyph): - # """This actually allows for dygraphs, etc., so I'm keeping it.""" - # """check each form and lexeme in the lift file (not all files - # use both).""" - # for form in self.citationforms[lang] + self.lexemes[lang]: - # if re.search(glyph,form): #see if the glyph is there - # return glyph #find it and stop looking, or return nothing - # def inxyz(self, lang, segmentlist): #This calls the above script for each character. - # actuals=list() - # for i in segmentlist: - # s=self.segmentin(lang,i) - # #log.info(s) #to see the following run per segment - # if s is not None: - # actuals.append(s) - # return list(dict.fromkeys(actuals)) - def getguidformstosearchbyps(self,ps,lang=None): - if lang is None: - lang=self.analang - self.guidformstosearch[lang][ps]={} #Erases all previous data!! - for guid in self.get('guidbyps',lang=lang,ps=ps): - form=self.get('citation',guid=guid,lang=lang,ps=ps) - if len(form) == 0: #no items returned - form=self.get('lexeme',guid=guid,lang=lang,ps=ps) - self.guidformstosearch[lang][ps][guid]=form - def getsenseidformstosearchbyps(self,ps,lang=None): - if lang is None: - lang=self.analang - self.senseidformstosearch[lang][ps]={} #Erases all previous data!! - for senseid in self.get('senseidbyps',lang=lang,ps=ps): - form=self.get('citation',senseid=senseid,lang=lang,ps=ps) - if len(form) == 0: #no items returned - form=self.get('lexeme',senseid=senseid,lang=lang,ps=ps) - self.senseidformstosearch[lang][ps][senseid]=form - def getguidformstosearch(self): - # import time - """This outputs a dictionary of form {analang: {guid:form}*}*, where - form is citation if available, or else lexeme. This is to be flexible - for entries in process of analysis, and to have a dictionary to check - with regexes for output.""" - self.guidformstosearch={} - self.senseidformstosearch={} - for lang in self.analangs: - self.guidformstosearch[lang]={} #This will erase all previous data!! - self.senseidformstosearch[lang]={} - for ps in self.pss: #I need to break this up. - # start_time=time.time() - self.getguidformstosearchbyps(ps,lang=lang) - self.getsenseidformstosearchbyps(ps,lang=lang) - #"n",str(time.time() - start_time),"seconds.") - #log.info(self.guidformstosearch) - def getformstosearchbyps(self,ps,lang=None): - if lang is None: - lang=self.analang - self.formstosearch[lang][ps]={} #Erases all previous data!! - #for guid in self.get('guidbynofield',lang=lang,ps=ps): - # forms=self.citationorlexeme(lang=lang,ps=ps) - """This actually needs this logic here, since formstosearch hasn't - been made yet.""" - forms=self.get('citation',analang=lang,ps=ps) - # if len(forms) == 0: #no items returned, I should probably combine - #this at some point, list(dict.fromkeys(form1+form2)) - forms+=self.get('lexeme',analang=lang,ps=ps) - # forms1=self.get('citation',lang=lang,ps=ps) - # forms2=self.get('lexeme',lang=lang,ps=ps) - # forms=list(dict.fromkeys(forms1+forms2)) - self.formstosearch[lang][ps]=forms - def getformstosearch(self): - # import time - """This outputs a dictionary of form {analang: {guid:form}*}*, where - form is citation if available, or else lexeme. This is to be flexible - for entries in process of analysis, and to have a dictionary to check - with regexes for output.""" - self.formstosearch={} - for lang in self.analangs: - self.formstosearch[lang]={} #This will erase all previous data!! - for ps in self.pss+[None]: #I need to break this up. - # start_time=time.time() - self.getformstosearchbyps(ps,lang=lang) - #"n",str(time.time() - start_time),"seconds.") - # log.info(self.formstosearch) - def citationforms(self): #outputs generator object with each form in LIFT file. - """This produces a dictionary, of forms for each language.""" - #return self.get('citationform') - output={} + # def getformstosearch(self): + # """This outputs a dictionary of form {analang: {guid:form}*}*, where + # form is citation if available, or else lexeme. This is to be flexible + # for entries in process of analysis, and to have a dictionary to check + # with regexes for output.""" + # fts={} + # for lang in self.analangs: + # fts[lang]={} #This will erase all previous data!! + # for ps in self.pss+[None]: #I need to break this up. + # fts[lang][ps]={} + # for s in self.get('sense',analang=lang,ps=ps).get('senseid'): + # f=self.citation(senseid=s,analang=lang) + # # if f == []: + # # f=self.lexeme(senseid=s,analang=lang) + # for fi in f: + # if fi in fts[lang][ps]: + # fts[lang][ps][fi].append(s) + # else: + # fts[lang][ps][fi]=[s] + # log.debug("Found the following forms to search: {}".format( + # fts)) + # return fts + """Get stuff""" + def gloss(self,**kwargs): + return self.get('gloss/text', **kwargs).get('text') + def glosses(self,**kwargs): + output={} # This produces a dictionary, of forms for each language + for lang in self.glosslangs: + kwargs['glosslang']=lang + output[lang]=self.gloss(**kwargs)#.get('text') + return output + def definition(self,**kwargs): + truncate=kwargs.pop('truncate',False) + forms=self.get('definition', **kwargs).get('text') + if truncate: + forms=[rx.glossifydefn(f) for f in forms] + return forms + def definitions(self,**kwargs): + output={} # This produces a dictionary, of forms for each language + for lang in self.glosslangs: + kwargs['glosslang']=lang + output[lang]=self.definition(**kwargs) + return output + def glossordefn(self,**kwargs): + forms=self.gloss(**kwargs) + if forms == []: + log.info("Missing gloss form; looking for definition form.({})" + "".format(kwargs)) + kwargs['truncate']=True + forms=self.definition(**kwargs) + return forms + def glossesordefns(self,**kwargs): + output={} # This produces a dictionary, of forms for each language + for lang in self.glosslangs: + kwargs['glosslang']=lang + output[lang]=self.gloss(**kwargs) + if output[lang] == []: + kwargs['truncate']=True + forms=self.definition(**kwargs) + return output + def citationorlexeme(self,**kwargs): + """This produces a list; specify senseid and analang as you like.""" + output=self.citation(**kwargs) + if output == []: + output=self.lexeme(**kwargs) + log.info("Missing citation form; looking for lexeme form.({})" + "".format(kwargs)) + return output + def citation(self,**kwargs): + """This produces a list; specify senseid and analang as you like.""" + output=self.get('citation/form/text',**kwargs).get('text') + return output + def citations(self,**kwargs): + output={} # This produces a dictionary, of forms for each language for lang in self.analangs: - output[lang]=self.get('citation',analang=lang) - #output[lang]=list() - #for form in self.nodes.findall(f"entry/citation/form[@lang='{lang}']/text"): - # output[lang]+=[form.text] #print the text of the node above - #log.info(output.keys()) #to see which languages are found + kwargs['analang']=lang + output[lang]=self.citation(**kwargs) #.get('text') + log.info("Found the following citation forms: {}".format(output)) + return output + def lexeme(self,**kwargs): + """This produces a list; specify senseid and analang as you like.""" + output=self.get('lexeme/form/text',**kwargs).get('text') return output - def lexemes(self): - output={} + def lexemes(self,**kwargs): + output={} # This produces a dictionary, of forms for each language. for lang in self.analangs: - output[lang]=self.get('lexeme',analang=lang) #list() - #for form in self.nodes.findall(f"entry/lexical-unit/form[@lang='{lang}']/text"): - # output[lang]+=[form.text] #print the text of the node above - #log.info(output.keys()) #to see which languages are found + kwargs['analang']=lang + output[lang]=self.lexeme(**kwargs) + log.info("Found the following lexemes: {}".format(output)) return output def extrasegments(self): - # start_time=time.time() #this enables boot time evaluation for lang in self.analangs: self.segmentsnotinregexes[lang]=list() extras=list() @@ -1831,7 +804,7 @@ def extrasegments(self): # nonwordforming=re.compile('[() \[\]\|,\-!@#$*?]') invalid=['(',')',' ','[',']','|',',','-','!','@','#','$','*','?' ,'\n'] - for form in [x for x in self.citationforms[lang]+self.lexemes[lang] + for form in [x for x in self.lcs[lang]+self.lxs[lang] if x != None]: for x in form: if ((x not in invalid) and @@ -1852,207 +825,42 @@ def extrasegments(self): "complex segments which should be counted as a single " "segment.") log.info("--those may not be covered by your regexes.") - print("No problems!") - - # log.log(2,"{} (lift.extrasegments run time): {}".format( - # time.time()-start_time,self.segmentsnotinregexes)) + def ps(self,**kwargs): #get POS values, limited as you like + return self.get('ps',**kwargs).get('value') def pss(self): #get all POS values in the LIFT file - return list(dict.fromkeys(self.get('ps'))) - #pss=list() - #for ps in self.nodes.findall(f"entry/sense/grammatical-info"): - # thisps=ps.attrib.get('value') - # if thisps not in pss and thisps is not None: - # pss.append(thisps) - #return pss #return the list + p=list(dict.fromkeys(self.ps())) + log.info("Found these ps values: {}".format(p)) + return p + def getmorphtypes(self): #get all morph-type values in the LIFT file + m=collections.Counter(self.get('morphtype',showurl=True).get('value') + ).most_common() + log.info("Found these morph-type values: {}".format(m)) + return m """CONTINUE HERE: Making things work for the new lift.get() paradigm.""" - def formsbyps(self,ps): #self is LIFT! #should be entriesbyps - """This function just pulls all entries of a particular - grammatical category""" - """This function, and others like it, should pull from the profiles - data variable, which should be redesigned so it can recompile - quickly for changes in form.""" - output=[] - winfoentries=[] - x=0 - y=0 - z=0 - zz=0 - if ps is None: - entries=self.nodes.findall(f"entry") - #gi1=self.nodes.findall(f".//grammatical-info") - #log.info(' '.join('Entries found:',len(entries)))#for gi in entry.find(f"grammatical-info"): - #log.info(' '.join('Ps found:',len(gi1)))#for gi in entry.find(f"grammatical-info"): - for entry in entries: - gi=entry.find(f".//grammatical-info") #.get('value') - #if gi is not None and len(gi)>1: - # log.info(gi) - # log.info(' '.join(x,y,z,zz)) - # exit() - # x+=1 - if gi is None: #entry.find(f".//grammatical-info") is None: - # y+=1 - output+=[entry] #add this item to a list, not it's elements - #elif gi is not None: #entry.find(f".//grammatical-info") is not None: - # z+=1 - # log.info(type(gi)) - # log.info(gi.get('value')) - # log.info(len(gi)) - # winfoentries+=[entry] - #else: - # zz+=1 - # log.info("Huh?") - else: - #log.info(ps) - # for self.db.get('') - for entry in self.nodes.findall(f"entry/sense/grammatical-info[@value='{ps}']/../.."): - #log.info('Skipping this for now...') - output+=[entry] #add this item to a list, not it's elements - #log.info(' '.join('Entries without ps:',len(output))) - #log.info(' '.join('Entries with ps:',len(winfoentries))) - #log.info(' '.join('Total entries found (Should be 2468):',len(output)+len(winfoentries))) - #log.info(' '.join('multiple:',x,'no ps:',y,'wps',z,'totalps:',y+z,'huh:',zz)) - #exit() - return output #list of entry nodes - def guidformsbyregex(self,regex,ps=None,analang=None): #self is LIFT! - # from multiprocessing.dummy import Pool as ThreadPool - """This function takes in a ps and compiled regex, - and outputs a dictionary of {guid:form} form.""" - if analang is None: - analang=self.analang - #log.info(regex) - output={} - def checkformsbyps(self,analang,ps): - for form in self.formstosearch[analang][ps]: - if regex.search(form): #re.search(regex,form): #,showurl=True - for guid in self.get('guidbylexeme',form=form,ps=ps): - output[guid]=form - return output - if ps == 'All': #When I'm looking through each ps, not ps=None (e.g., invalid). - for ps in self.pss+[None]: - output.update(checkformsbyps(self,analang,ps)) #adds dict entries - return output - else: - output.update(checkformsbyps(self,analang,ps)) - return output - for entry in entries: - def debug(): - log.info(len(entry)) - log.info(str(entry.tag)) - log.info(str(entry.get('guid'))) - log.info(entry.attrib) #('value')) - log.info(str(entry.find('lexical-unit'))) - log.info(str(entry.find('citation'))) - log.info(str(entry.find('form'))) - log.info(self.get('lexeme',guid=entry.get('guid'))) - log.info(self.get('lexeme',guid=entry.get('guid'))[0]) - exit() - #debug() - """FIX THIS!!!""" - form=self.get('lexeme',guid=entry.get('guid')) #self.formbyid(entry.get('guid'))[0] #just looking for one at this point. - log.info("Apparently there are no/multiple forms for this entry...") - return output - def senseidformsbyregex(self,regex,analang,ps=None): #self is LIFT! - """This function takes in a ps and compiled regex, - and outputs a dictionary of {senseid:form} form.""" - # if analang is None: - # analang=self.analang - output={} - def checkformsbyps(self,analang,ps): - for form in self.formstosearch[analang][ps]: - if regex.search(form): #re.search(regex,form): #,showurl=True - for senseid in self.get('senseidbylexeme',form=form,ps=ps): - output[senseid]=form - return output - if ps == 'All': #When I'm looking through each ps, not ps=None (e.g., invalid). - for ps in self.pss+[None]: - output.update(checkformsbyps(self,analang,ps)) #adds dict entries - return output - else: - output.update(checkformsbyps(self,analang,ps)) - return output - for entry in entries: - def debug(): - log.info(len(entry)) - log.info(str(entry.tag)) - log.info(str(entry.get('guid'))) - log.info(entry.attrib) #('value')) - log.info(str(entry.find('lexical-unit'))) - log.info(str(entry.find('citation'))) - log.info(str(entry.find('form'))) - log.info(self.get('lexeme',guid=entry.get('guid'))) - log.info(self.get('lexeme',guid=entry.get('guid'))[0]) - exit() - #debug() - """FIX THIS!!!""" - form=self.get('lexeme',guid=entry.get('guid')) #self.formbyid(entry.get('guid'))[0] #just looking for one at this point. - log.info("Apparently there are no/multiple forms for this entry...") - return output - def formbyid(self,guid,lang=None): #This is the language version, use without entry. - """bring this logic into get()""" - form=self.get('citation',guid=guid,lang=lang) #self.nodes.findall(f"entry[@guid='{guid}']/citation/form[@lang='{self.xyz}']/text") - if form == []: #default to lexical form for missing citation forms. - form=self.get('lexeme',guid=guid,lang=lang) #form=self.nodes.findall(f"entry[@guid='{guid}']/lexical-unit/form[@lang='{self.xyz}']/text") - if form == []: - return None - #log.info(form) - return form #[0].text #print the text of the node above - def psbyid(self,guid): #This is the language version, use without entry. - #return self.nodes.find(f"entry[@guid='{guid}']/sense/grammatical-info").get('value') - #return ps.attrib.get('value') - ps=self.nodes.find(f"entry[@guid='{guid}']/sense/grammatical-info") - if ps is not None: - return ps.attrib.get('value') - def formsnids(self): #outputs [guid, form] tuples for each entry in the lexicon. Is this more efficient than using idsbyformregex? - for entry in self.nodes.findall(f"entry"): - for form in entry.findall(f"./citation/form[@lang='{self.xyz}']/text"): #Not lexeme-unit..… - yield entry.get('guid'), form.text #print the text of the node above - #figure out how to filter this by part of speech - def idsbylexemeregex(self,regex): #outputs [guid, ps, form] tuples for each entry in the LIFT file lexeme which matches the regex. - for ps in pss(): #for each POS - for entry in self.nodes.findall(f"entry/sense/grammatical-info[@value='{ps}']/../.."): #for each entry - for form in entry.findall(f"./lexical-unit/form[@lang='{self.xyz}']/text"): #for each CITATION form this needs to see lexeme forms, too..… - #for form in entry.findall(f"./citation/form[@lang='{xyz}']/text"): #for each CITATION form this needs to see lexeme forms, too..… - # if re.search(regex,form.text): #check if the form matches the regex - if regex.search(form.text): #check if the form matches the regex - yield entry.get('guid'), ps, form.text #print the tuple (may want to augment this some day to include other things) - def idsbylexemeregexnps(self,ps,regex): #outputs [guid, ps, form] tuples for each entry in the LIFT file lexeme which matches the regex and ps. - """This puts out a dictionary with guid keys and (ps,form) tuples - for values. I need to rework this. I think not use it anymore...""" - output={} - for form in self.formstosearch[self.analang][ps]: - # form=self.formstosearch[self.analang][ps][guid] - # log.info(form) - if regex.search(form): - # if len(form) == 1 and regex.search(form): - output[guid]=form - return output - #exit() - - for entry in self.nodes.findall(f"entry/sense/grammatical-info[@value='{ps}']/../../lexical-unit/form[@lang='{self.analang}']/../.."): - form=entry.find(f"lexical-unit/form[@lang='{self.analang}']/text") - if regex.search(form.text): #re.search(regex,form.text): - output[entry.get('guid')]=(ps, form.text) - return output - def wordcountbyps(self,ps): - count=0 - if ps is None: - #entries= - #gi1=self.nodes.findall(f".//grammatical-info") - #log.info(' '.join('Entries found:',len(entries)))#for gi in entry.find(f"grammatical-info"): - #log.info(' '.join('Ps found:',len(gi1)))#for gi in entry.find(f"grammatical-info"): - for entry in self.nodes.findall(f"entry"): - #gi= #.get('value') - if entry.find(f".//grammatical-info") is None: #entry.find(f".//grammatical-info") is None: - # y+=1 - count+=1 - else: - for entry in self.nodes.findall(f"entry/sense/grammatical-info[@value='{ps}']/../.."): #for each entry - count+=1 - return count - def formsregex(self,regex): #UNUSED? outputs [guid, form] tuples where the form matches a regex. This might make sense for a tuple with lexeme-unit, citation, and plural. - for entry in formsnids(): - if regex.search(entry[1]): #check regex against the form part of the tuple output by formsnids - yield entry[0] #print id part of the tuple output by formsnids +class Node(ET.Element): + def makefieldnode(self,type,lang,text=None,gimmetext=False): + n=Node(self,'field',attrib={'type':type}) + nn=n.makeformnode(lang,text,gimmetext=gimmetext) + if gimmetext: + return nn + def makeformnode(self,lang,text=None,gimmetext=False): + n=Node(self,'form',attrib={'lang':lang}) + nn=n.maketextnode(text,gimmetext=gimmetext) #Node(n,'text') + if gimmetext: + return nn + def maketextnode(self,text=None,gimmetext=False): + n=Node(self,'text') + if text is not None: + n.text=str(text) + if gimmetext: + return n.text + def maketraitnode(self,type,value,gimmenode=False): + n=Node(self,'trait',attrib={'name':type, 'value':str(value)}) + if gimmenode: + return n + def __init__(self, parent, tag, attrib={}, **kwargs): + super(Node, self).__init__(tag, attrib, **kwargs) + parent.append(self) class Entry(object): # what does "object do here?" #import lift.put as put #class put: #import get #class put: @@ -2075,7 +883,7 @@ def __init__(self, db, guid=None, *args, **kwargs): """get(self,attribute,guid=None,analang=None,glosslang=None,lang=None, ps=None,form=None,fieldtype=None,location=None,showurl=False)""" # self.lexeme=db.get('lexeme',guid=guid) #don't use this! - self.citation=db.citationorlexeme(guid=guid,lang=self.analang) + self.lc=db.citationorlexeme(guid=guid,lang=self.analang) self.gloss=db.glossordefn(guid=guid,lang=self.glosslang) self.gloss2=db.glossordefn(guid=guid,lang=self.glosslang2) # self.citation=get.citation(self,self.analang) @@ -2106,6 +914,585 @@ def __init__(self, xyz): class Unused(): def removedups(x): #This removes duplicates from a list return list(dict.fromkeys(x)) +class LiftURL(): + def get(self,what='node'): + log.log(4,self.__dict__) + n=self.base.findall(self.url) + if n != []: + log.log(4,"found: {} (x{}), looking for {}".format(n[:1],len(n),what)) + what=self.unalias(what) + if n == [] or what is None or what == 'node': + return n + elif what == 'text': + r=[i.text for i in n] + log.log(4,r) + return r + else: + r=[i.get(what) for i in n] + log.log(4,r) + return r + def build(self,tag,liftattr=None,myattr=None,attrs=None): + buildanother=False + noseparator=False + log.log(4,"building {}, @dict:{}, @{}={}, on top of {}".format(tag, + attrs,liftattr,myattr, self.currentnodename())) + b=tag + if attrs is None: + attrs={liftattr: myattr} + for attr in attrs: + if (None not in [attr,attrs[attr]] and attrs[attr] in self.kwargs + and self.kwargs[attrs[attr]] is not None): + if "'" in self.kwargs[attrs[attr]]: + b+="[@{}=\"{}\"]".format(attr,self.kwargs[attrs[attr]]) + else: + b+="[@{}='{}']".format(attr,self.kwargs[attrs[attr]]) + if ((liftattr is None or (liftattr in self.kwargs #no lift attribute + and self.kwargs[liftattr] is None)) + and tag == 'text' and myattr in self.kwargs #text value to match + and self.kwargs[myattr] is not None): + b="[{}='{}']".format(tag,self.kwargs[myattr]) + noseparator=True + if tag == 'text' and tag in self.targettail: + buildanother=True #the only way to get text node w/o value + self.url+=[b] + if noseparator: + l=self.url + self.url=[i for i in l[:len(l)-2]]+[''.join([i for i in l[len(l)-2:]])] + else: + self.level['cur']+=1 + self.level[self.alias.get(tag,tag)]=self.level['cur'] + log.log(4,"Path so far: {}".format(self.drafturl())) + if buildanother: + self.build(tag) + def parent(self): + self.level['cur']-=2 #go up for this and its parent + self.build("..") + def entry(self): + self.build("entry","guid","guid") + self.bearchildrenof("entry") + def text(self,value=None): + self.baselevel() + self.build("text",myattr=value) + def form(self,value=None,lang=None): + self.baselevel() + self.kwargs['value']=self.kwargs.get(value,None) #location and tonevalue + log.log(4,"form kwargs: {}".format(self.kwargs)) + self.build("form","lang",lang) #OK if lang is None + if value is not None: + self.text("value") + def citation(self): + self.baselevel() + self.build("citation") + self.form("lcform","analang") + def lexeme(self): + self.baselevel() + self.build("lexical-unit") + self.form("lxform","analang") + def pronunciation(self): + self.baselevel() + self.build("pronunciation") + self.kwargs['ftype']='location' + attrs={"name":"ftype",'value':'location'} + self.trait(attrs=attrs) + self.form("pronunciation",'analang') + def trait(self,attrs={}): + self.baselevel() + self.build("trait",attrs=attrs) + def sense(self): + self.baselevel() + self.build("sense","id","senseid") + self.bearchildrenof("sense") + def ps(self): + self.baselevel() + self.build("grammatical-info","value","ps") + def gloss(self): + self.baselevel() + self.build("gloss","lang","glosslang") + def definition(self): + self.baselevel() + self.build("definition","lang","glosslang") + self.form("definition","glosslang") + def example(self): + self.baselevel() + self.build("example") + self.maybeshow('form') + self.maybeshow('translation') + self.maybeshow('locationfield') + self.maybeshow('tonefield') + def translation(self): + self.baselevel() + self.kwargs['ftype']='Frame translation' + self.kwargs['formtext']='translationvalue' + self.build("translation","type","ftype") + # self.form("translationvalue",'glosslang') + def field(self): + self.baselevel() + self.build("field","type","ftype") + def locationfield(self): + self.baselevel() + self.kwargs['ftype']='location' + self.kwargs['formtext']='location' + self.field() + self.form("location",'glosslang') + def tonefield(self): + self.baselevel() + self.kwargs['ftype']='tone' + """I assume we will never use sense/tonefield and example/tonefield + in the same url...""" + self.level['tonefield']=self.level['cur']+1 #so this won't repeat + self.field() + if 'tonevalue' in self.kwargs: + self.kwargs['formtext']='tonevalue' + self.form("tonevalue",'glosslang') + else: #dont' force a text node with no text value + self.kwargs['formtext']=None + self.form(lang='glosslang') + def morphtype(self,attrs={}): + if 'morphtype' in self.kwargs: + attrs={'name':"morph-type",'value':self.kwargs[morphtype]} + self.trait(attrs) # + def attrdonothing(self): + pass + def maybeshow(self,nodename,parent=None): + # for arg in args: + # log.info("maybeshow arg: {}".format(arg)) + if self.shouldshow(nodename): #We need it for a child to show, etc + self.show(nodename,parent) + def show(self,nodename,parent=None): #call this directly if you know you want it + if nodename == 'form': #args:value,lang + if parent is None: + log.error("Sorry, I can't tell what form to pass to this field;" + "\nWhat is its parent?") + return + else: + args=self.formargsbyparent(parent) + elif nodename == 'text': #args:value,lang + args=['formtext'] #This needs to be smarter; different kinds of formtext + else: + args=list() + for arg in args: + log.log(4,"show arg: {}".format(arg)) + if len(args) == 0: + getattr(self,nodename)() + else: + getattr(self,nodename)(*args) + def formargsbyparent(self,parent): + args=list() + if parent in ['gloss', 'definition','translation']: + args.append(parent) + args.append('glosslang') + if parent in ['lexeme', 'citation', 'example']: + if parent == 'example': + if 'audiolang' not in self.kwargs: + args.append('exampleform') + else: + args.append('exampleaudio') + args.append('analang') + return args + else: + args.append(parent) + args.append('analang') + return args + def lift(self): + log.error("LiftURL is trying to make a lift node; this should never " + "happen; exiting!") + exit() + def bearchildrenof(self,parent): + log.log(4,"bearing children of {} ({})".format(parent, + self.children[parent])) + for i in self.children[parent]: + log.log(4,"bearchildrenof i: {}".format(i)) + self.maybeshow(i,parent) + def levelup(self,target): + while self.level.get(target,self.level['cur']+1) < self.level['cur']: + self.parent() + def baselevel(self): + parents=self.parentsof(self.callerfn()) + for target in parents: #self.levelsokfor[self.callerfn()]: #targets: #targets should be ordered, with best first + if target in self.level and self.level[target] == self.level['cur']: + return #if we're on an acceptable level, just stop + elif target in self.level: + self.levelup(target) + return + elif parents.index(target) < len(parents)-1: + log.log(4,"level {} not in {}; checking the next one...".format( + target,self.level)) + else: + log.error("last level {} (of {}) not in {}; this is a problem!" + "".format(target,parents,self.level)) + log.error("this is where we're at: {}\n {}".format(self.kwargs, + self.drafturl())) + exit() + def maybeshowtarget(self,parent): + # parent here is a node ancestor to the current origin, which may + # or may not be an ancestor of targethead. If it is, show it. + f=self.getfamilyof(parent,x=[]) + log.log(4,"Maybeshowtarget: {} (family: {})".format(parent,f)) + if self.targethead in f: + if parent in self.level: + log.log(4,"Maybeshowtarget: leveling up to {}".format(parent)) + self.levelup(parent) + else: + log.log(4,"Maybeshowtarget: showing {}".format(parent)) + self.show(parent) + self.showtargetinhighestdecendance(parent) + return True + def showtargetinlowestancestry(self,nodename): + log.log(4,"Running showtargetinlowestancestry for {}/{} on {}".format( + self.targethead,self.targettail,nodename)) + #If were still empty at this point, just do the target if we can + if nodename == [] and self.targethead in self.children[self.basename]: + self.show(self.targethead) + return + gen=nodename + g=1 + r=giveup=False + while not r and giveup is False: + log.log(4,"Trying generation {}".format(g)) + gen=self.parentsof(gen) + for p in gen: + r=self.maybeshowtarget(p) + if r: + break + g+=1 + if g>10: + giveup=True + if giveup is True: + log.error("Hey, I've looked back {} generations, and I don't see " + "an ancestor of {} (target) which is also an ancestor of " + "{} (current node).".format(g,self.targethead,nodename)) + def showtargetinhighestdecendance(self,nodename): + log.log(4,"Running showtargetinhighestdecendance for {} on {}".format( + self.targethead,nodename)) + if nodename in self.children: + children=self.children[nodename] + else: + log.log(4,"Node {} has no children, so not looking further for " + "descendance.".format(nodename)) + return + grandchildren=[i for child in children + if child in self.children + for i in self.children[child] + ] + greatgrandchildren=[i for child in children + if child in self.children + for grandchild in self.children[child] + if grandchild in self.children + for i in self.children[grandchild] + ] + gggrandchildren=[i for child in children + if child in self.children + for grandchild in self.children[child] + if grandchild in self.children + for ggrandchild in self.children[grandchild] + if ggrandchild in self.children + for i in self.children[ggrandchild] + ] + log.log(4,"Looking for {} in children of {}: {}".format( + self.targethead,nodename,children)) + log.log(4,"Grandchildren of {}: {}".format(nodename,grandchildren)) + log.log(4,"Greatgrandchildren of {}: {}".format( + nodename,greatgrandchildren)) + if self.targethead in children: + log.log(4,"Showing '{}', child of {}".format(self.targethead,nodename)) + self.show(self.targethead,nodename) + elif self.targethead in grandchildren: + log.log(4,"Found target ({}) in grandchildren of {}: {}".format( + self.targethead,nodename,grandchildren)) + for c in children: + if c in self.children and self.targethead in self.children[c]: + log.log(4,"Showing '{}', nearest ancenstor".format(c)) + self.show(c,nodename) #others will get picked up below + self.showtargetinhighestdecendance(c) + elif self.targethead in greatgrandchildren: + log.log(4,"Found target ({}) in gr8grandchildren of {}: {}".format( + self.targethead,nodename,greatgrandchildren)) + for c in children: + for cc in grandchildren: + if cc in self.children and self.targethead in self.children[cc]: + log.log(4,"Showing '{}', nearest ancenstor".format(c)) + self.show(c,nodename) #others will get picked up below + self.showtargetinhighestdecendance(c) + elif self.targethead in gggrandchildren: + log.log(4,"Found target ({}) in gggrandchildren of {}: {}".format( + self.targethead,nodename,gggrandchildren)) + for c in children: + for cc in grandchildren: + for ccc in greatgrandchildren: + if ccc in self.children and self.targethead in self.children[ccc]: + log.log(4,"Showing '{}', nearest ancenstor".format(c)) + self.show(c,nodename) #others will get picked up below + self.showtargetinhighestdecendance(c) + else: + log.error("Target not found in children, grandchildren, or " + "greatgrandchildren!") + def nodesatlevel(self,levelname='cur'): + if levelname not in self.level: + return [] + cur=[x for x,y in self.level.items() if y == self.level[levelname] + and x != levelname] #obviously not that one... + cur.reverse() + return cur + def parsetargetlineage(self): + if '/' in self.target: #if target lineage is given + self.targetbits=self.target.split('/') + log.log(4,"{} : {}".format(self.target,self.targetbits)) + self.targethead=self.targetbits[0] + self.targettail=self.targetbits[1:] + else: + self.targethead=self.target + self.targetbits=[self.targethead,] + self.targettail=[] + if 'form' in self.targethead and 'form' not in self.children[ + self.getalias(self.basename)]: + log.error("Looking for {} as the head of a target is going to " + "cause problems, as it appears in too many places, and is likely " + "to not give the desired results. Fix this, and try again. (whole " + "target: {})".format(self.targethead,self.target)) + exit() + def tagonly(self,nodename): + return nodename.split('[')[0] + def currentnodename(self): + last=self.url[-1:] + if len(last)>0: + n=self.tagonly(last[0]) + return self.getalias(n) + def unalias(self,nodename): + """This returns the names used in the LIFT file""" + if nodename in self.alias.values(): + for k in self.alias: + if self.alias[k] == nodename: + return k + return nodename #else + def getalias(self,nodename): + """This returns the names I typically use""" + return self.alias.get(nodename,nodename) + def rebase(self,rebase): + """This just changes the node set from which the url draws. + because different bases (within the whole lift file) would result in + the same URL, but not the same data, we need to tell this object which + base (e.g. which example) to take data from. This method should *not* + change type of base (e.g. from example to sense); that is for retarget. + """ + self.base=rebase + def retarget(self,target): + self.url=[self.url] + self.target=target + self.parsetargetlineage() + self.maketarget() + self.makeurl() + def maketarget(self): + """start by breaking up target, if expressed as lineage. This is needed + to target form, with example/form distinct from example/field/form. + Without target='example/form', target form will always find field/form, + even if field has a sibling form (as it typically would) under example. + Once the level of the lineage head is decided, the rest of the lineage + is added.""" + # Now operate on the head of the target lineage + log.log(4,"URL (before {} target): {}".format(self.target,self.drafturl())) + if self.getalias(self.targethead) not in self.level: #If the target hasn't been made yet. + log.log(4,self.url) + i=self.currentnodename() + log.log(4,"URL base: {}; i: {}".format(self.basename,i)) + if i is None: #if it is, skip down in any case. + i=self.basename + log.log(4,"URL bit list: {}; i: {}".format(self.url,i)) + if type(i) == list: + i=i[0] #This should be a string + f=self.getfamilyof(i,x=[]) + log.log(4,"Target: {}; {} family: {}".format(self.targethead,i,f)) + if self.targethead in f: + self.showtargetinhighestdecendance(i) #should get targethead + # return #only do this for the first you find (last placed). + else:#Continue here if target not a current level node decendent: + self.showtargetinlowestancestry(i) + # Either way, we finish by making the target tail, and leveling up. + if self.targettail is not None: + log.log(4,"Adding targettail {} to url: {}".format(self.targettail, + self.drafturl())) + for b in self.targettail: + log.log(4,"Adding targetbit {} to url: {}".format(b,self.drafturl())) + n=self.targetbits.index(b) + bp=self.tagonly(self.targetbits[n-1]) #.split('[')[0]#just the node, not attrs + afterbp=self.drafturl().split(self.unalias(bp)) + log.log(4,"b: {}; bp: {}; afterbp: {}".format(b,bp,afterbp)) + log.log(4,"showing target element {}: {} (of {})".format(n,b,bp)) + if len(afterbp) <=1 or b not in afterbp[-1]: + log.log(4,"showing target element {}: {} (of {})".format(n,b,bp)) + self.levelup(bp) + self.show(b,parent=bp) + self.levelup(self.targetbits[-1])#leave last in target, whatever else + def drafturl(self): + return '/'.join(self.url) + def makeurl(self): + self.url='/'.join(self.url) + def printurl(self): + print(self.url) + def usage(self): + log.info("Basic usage of this class includes the following kwargs:\n" + "\tbase: node from which we are pulling (should be supplied)\n" + "\ttarget: node we are looking for\n" + "\tget: thing we want: node (default)/'text'/attribute name\n" + "Below here implies an entry node:\n" + "\tguid: id used to identify a lift entry\n" + "\tlxform: form to find in lexeme form fields\n" + "\tlcform: form to find in citation form fields\n" + "\tmorphtype: type of morpheme (stem, affix, etc)\n" + "\tpronunciation: form to find in pronunciation form fields\n" + "Below here implies a sense node:\n" + "\tsenseid: id used to identify a lift sense\n" + "\tdefinition: definition of sense\n" + "\tgloss: gloss (one word definition) of sense\n" + "Below here implies an example node:\n" + "\ttranslation: Translation of example forms\n" + "\ttonevalue: value of an example tone group (from sorting)\n" + "" + ) + def shouldshow(self,node): + c=self.getfamilyof(node,x=[]) + # This fn is not called by showtargetinhighestgeneration or maketarget + if node in self.level: + return False + elif node == self.targethead: #do this later + return False + elif self.attrneeds(node,c): + return True + elif self.kwargsneeds(node,c): + return True + elif self.pathneeds(node,c): + return True + else: + return False + def getfamilyof(self,node,x): + log.log(4,"running kwargshaschildrenof.gen on '{}'".format(node)) + if type(node) is str: + node=[node] + for i in node: + log.log(4,"running kwargshaschildrenof.gen on '{}'".format(i)) + if i is not '': + ii=self.children.get(i,'') + log.log(4,"Found '{}' this time!".format(ii)) + if ii is not '': + x+=ii + self.getfamilyof(ii,x) + return x + def pathneeds(self,node,children): + path=self.path + log.log(4,"Path: {}; children: {}".format(path,children)) + if node in path and node not in self.level: + log.log(4,"Parent ({}) in path: {}".format(node,path)) + return True + if children != []: + childreninpath=set(children) & set(path) + if childreninpath != set(): + pathnotdone=childreninpath-set(self.level) + if pathnotdone != set(): + log.log(4,"Found descendant of {} in path, which isn't " + "already there: {}".format(node, pathnotdone)) + return True + return False + def attrneeds(self,node,children): + log.log(4,"looking for attr(s) of {} in {}".format([node]+children, + self.attrs)) + for n in [node]+children: + if n in self.attrs: + log.log(4,"looking for attr(s) of {} in {}".format(n,self.attrs)) + common=set(self.attrs[n])&set(list(self.kwargs)+[self.what]) + if common != set(): + log.log(4,"Found attr(s) {} requiring {}".format(common,n)) + return True + else: + log.log(4,"{} not found in {}".format(n,self.attrs.keys())) + return False + def kwargsneeds(self,node,children): + if node in self.kwargs: + log.log(4,"Parent ({}) in kwargs: {}".format(node,self.kwargs)) + return True + if children != []: + log.log(4,"Looking for descendants of {} ({}) in kwargs: {}".format( + node,children,self.kwargs)) + childreninkwargs=set(children) & set(self.kwargs) + if childreninkwargs != set(): + log.log(4,"Found descendants of {} in kwargs: {}".format(node, + childreninkwargs)) + pathnotdone=childreninkwargs-set(self.level) + if pathnotdone != set(): + log.log(4,"Found descendants of {} in kwargs, which aren't " + "already there: ".format(node,pathnotdone)) + return True + return False + def callerfn(self): + return sys._getframe(2).f_code.co_name #2 gens since this is a fn, too + def parentsof(self,nodenames): + log.log(4,"children: {}".format(self.children.items())) + log.log(4,"key pair: {}".format( + ' '.join([str(x) for x in self.children.items() if nodenames in x[1]]))) + p=[] + if type(nodenames) != list: + nodenames=[nodenames] + for nodename in nodenames: + i=[x for x,y in self.children.items() if nodename in y] + i.reverse() + p+=i + plist=list(dict.fromkeys(p)) + log.log(4,"parents of {}: {}".format(nodenames,plist)) + return plist + def setattrsofnodes(self): + self.attrs={} #These are atttributes we ask for, which require the field + self.attrs['entry']=['guid'] + self.attrs['sense']=['senseid'] + self.attrs['tonefield']=['tonevalue'] + self.attrs['locationfield']=['location'] + def setchildren(self): + """These are the kwargs that imply a field. field names also added to + ensure that depenents get picked up. + """ + # use self.alias.get(tag,tag) where needed! + self.children={} + self.children['lift']=['entry'] + self.children['entry']=['lexeme','pronunciation','sense', + 'citation','morphtype','trait'] + self.children['sense']=['ps','definition','gloss', + 'example','tonefield','field'] + self.children['example']=['form','translation','locationfield', + 'tonefield','field'] + self.children['field']=['form'] + self.children['lexeme']=['form'] + self.children['definition']=['form'] + self.children['citation']=['form'] + self.children['form']=['text'] + self.children['gloss']=['text'] + self.children['pronunciation']=['field','trait','form'] + self.children['translation']=['form'] + def setaliases(self): + self.alias={} + self.alias['lexical-unit']='lexeme' + self.alias['grammatical-info']='ps' + self.alias['id']='senseid' + self.alias['ftype']='fieldtype' + self.alias["field[@type='tone']"]='tonefield' + def __init__(self, *args,**kwargs): + self.base=kwargs['base'] + self.setaliases() + basename=self.basename=self.getalias(self.base.tag) + super(LiftURL, self).__init__() + log.info("LiftURL called with {}".format(kwargs)) + self.kwargs=kwargs + target=self.target=self.kwargs.pop('target','entry') # what do we want? + self.setchildren() + self.parsetargetlineage() + self.what=self.kwargs.pop('what','node') #This should always be there + self.path=kwargs.pop('path',[]) + self.url=[] + self.level={'cur':0,basename:0} + self.guid=self.senseid=self.attrdonothing + self.setattrsofnodes() + self.bearchildrenof(basename) + log.log(4,"Making Target now.") + self.maketarget() + self.makeurl() + log.log(4,"Final URL: {}".format(self.url)) + # self.printurl() """Functions I'm using, but not in a class""" def prettyprint(node): # This fn is for seeing the Element contents before writing them (in case of @@ -2144,24 +1531,670 @@ def removenone(url): return newurl def getnow(): return datetime.datetime.utcnow().isoformat()[:-7]+'Z' - +def another(): + #This should be a class, constructed... + a=attribdict={} + a['template']={ + 'cm': "Give a prose description here", + 'url': (("url in the XML file, variables OK" + ),['guid','senseid','ps']), + 'attr': 'script'} + a['entry']= { + 'cm': 'use to get entries with a given guid or senseid', + 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']/.." + ),['guid','senseid']), + 'attr':'node'} + a['example']={ + 'cm': 'use to get examples with a given guid or senseid', + 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']/example" + ),['guid','senseid']), + 'attr':'node'} + a['examplebylocation']={ + 'cm': 'use to get examples with a given guid or senseid', + 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']/example" + "/field[@type='location']/form[text='{location}']/../.." + ),['guid','senseid','location']), + 'attr':'node'} + a['guidbyps']={ + 'cm': 'use to get guids of entries with a given ps', + 'url':(("entry[@guid='{guid}']/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + ),['guid','senseid','ps']), + 'attr':'guid'} + a['senseidbyps']={ + 'cm': 'use to get ids of senses with a given ps', + 'url':(("entry/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + ),['senseid','ps']), + 'attr':'id'} + a['guidwanyps']={ + 'cm': 'use to get guids of entries with any ps', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']/grammatical-info[@value]/../.." + ),['guid','analang','senseid']), + 'attr':'guid'} + a['senseidwanyps']={ + 'cm': 'use to get ids of senses with any ps', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']/grammatical-info[@value]/.." + ),['guid','analang','senseid']), + 'attr':'id'} + a['guidbypronfield']={ + 'cm': 'use to get guids of entries with fields at the ' + 'pronunciation level', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']/grammatical-info[@value='{ps}']/../.." + "/pronunciation" + "/trait[@name='location'][@value='{location}']/.." + "/form[@lang='{analang}']/.." + #lang could be any: + "/field[@type='{fieldtype}']/form[@lang='{lang}']/../../.." + ),['guid','analang','senseid','ps','location', + 'fieldtype','lang']), + 'attr':'guid'} + a['guidbypronfieldvalue']={ + 'cm': 'use to get guids of entries with fields at the ' + 'pronunciation level', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']/grammatical-info[@value='{ps}']/../.." + "/pronunciation" + "/trait[@name='location'][@value='{location}']/.." + "/form[@lang='{analang}']/.." + "/field[@type='{fieldtype}']" + "/form[@lang='{lang}'][text='{fieldvalue}']" + "/../../.." # ^ lang could be any + ),['guid','analang','senseid','ps','location', + 'fieldtype','lang','fieldvalue']), + 'attr':'guid'} + a['senseidbyexfieldvalue']={ + 'cm': 'use to get guids of entries with fields at the ' + 'example level', + 'url':(("entry[@guid='{guid}']" + # "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/example" + "/field[@type='location']" + "/form[@lang='{glosslang}'][text='{location}']" + "/../.." + "/field[@type='{fieldtype}']" + "/form[@lang='{glosslang}']" + "[text='{fieldvalue}']/../../.." + ),['guid','analang','senseid','ps','glosslang', + 'location','fieldtype','fieldvalue']), + 'attr':'id'} + a['guidbyexfieldvalue']={ + 'cm': 'use to get guids of entries with fields at the ' + 'example level', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense" + "/grammatical-info[@value='{ps}']/.." + "/example" + "/field[@type='location']" + "/form[@lang='{glosslang}'][text='{location}']" + "/../.." + "/field[@type='{fieldtype}']" + "/form[@lang='{glosslang}']" + "[text='{fieldvalue}']/../../../.." + ),['guid','analang','ps','location','glosslang', + 'fieldtype','fieldvalue']), + 'attr':'guid'} + a['guidbysensefield']={ + 'cm': 'use to get guids of entries with fields at the ' + 'sense level', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense" + "/grammatical-info[@value='{ps}']/.." + "/field[@type='{fieldtype}']/../.." + ),['guid','analang','ps','fieldtype']), + 'attr':'guid'} + a['guidbyentryfield']={ + 'cm': 'use to get guids of entries with fields at the ' + 'entry level', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/field[@type='{fieldtype}']/.." + ),['guid','analang','senseid','ps','fieldtype']), + 'attr':'guid'} + a['guidbylang']={ + 'cm': 'use to get guids of all entries with lexeme of a ' + 'given lang (or not)', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + ),['guid','analang']), + 'attr':'guid'} + a['guidbysenseid']={ + 'cm': 'use to get guids of sense with particular id', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']/.." + ),['guid','senseid']), + 'attr':'guid'} + a['guid']={ + 'cm': 'use to get guids of all entries (no qualifications)', + 'url':(("entry[@guid='{guid}']" + ),['guid']), + 'attr':'guid'} + a['senseid']={ + 'cm': 'use to get ids of all senses (no qualifications)', + 'url':(("entry" + "/sense[@id='{senseid}']" + ),['senseid']), + 'attr':'id'} + a['senseidbytoneUFgroup']={ + 'cm': 'use to get ids of all senses by tone group', + 'url':(("entry" + "/sense[@id='{senseid}']" + "/field[@type='{fieldtype}']" + "/form[@lang='{lang}'][text='{form}']/../.." + ),['senseid','fieldtype','lang','form']), + 'attr':'id'} + a['guidbylexeme']={ + 'cm': 'use to get guid by ps and lexeme in the specified ' + 'language (no reference to fields)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/lexical-unit" + "/form[@lang='{analang}'][text='{form}']" + "/../.." # ^ [.=’text'] not until python 3.7 + ),['guid','senseid','ps','analang','form']), + 'attr':'guid'} + a['guidbysense']={ + 'cm': 'use to get guid by ps and citation form in the ' + 'specified language (no reference to fields)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']/.." + ),['guid','senseid']), + 'attr':'guid'} + a['senseidbylexeme']={ + 'cm': 'use to get senseid by ps and lexeme in the ' + 'specified language (no reference to fields)', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit" + "/form[@lang='{analang}'][text='{form}']/../.." + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + ),['guid','analang','form','senseid','ps']), + 'attr':'id'} + a['guidbycitation']={ + 'cm': 'use to get guid by ps and citation form in the ' + 'specified language (no reference to fields)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/citation" + "/form[@lang=guid'{analang}'][text='{form}']" + "/../.." # ^ [].=’text'] not until python 3.7 + ),['guid','senseid','ps','analang','form']), + 'attr':'guid'} + a['toneUFfieldvalue']={ + 'cm': 'use to get tone UF values of all senses within the ' + 'constraints specified.', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/field[@type='{fieldtype}']" + "/form[@lang='{lang}']/text" + ),['guid','senseid','ps','fieldtype','lang']), + 'attr':'nodetext'} + a['lexemenode']={ + 'cm': 'use to get lexemes of all entries with a form ' + 'in the specified language (no reference to fields)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/lexical-unit/form[@lang='{analang}']" + ),['guid','senseid','ps','analang']), + 'attr':'node'} + a['lexeme']={ + 'cm': 'use to get lexemes of all entries with a form in ' + 'the specified language (no reference to fields)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/lexical-unit/form[@lang='{analang}']/text" + ),['guid','senseid','ps','analang']), + 'attr':'nodetext'} + a['citationnode']={ + 'cm': 'use to get citation forms of one or all entries ' + 'with a form in the specified language (no ' + 'reference to fields)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/citation/form[@lang='{analang}']" + ),['guid','senseid','ps','analang']), + 'attr':'node'} + a['citation']={ + 'cm': 'use to get citation forms of one or all entries ' + 'with a form in the specified language (no reference ' + 'to fields)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/citation/form[@lang='{analang}']/text" + ),['guid','senseid','ps','analang']), + 'attr':'nodetext'} + a['definitionnode']={ + 'cm': 'use to get definition nodes of entries', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/definition" + "/form[@lang='{glosslang}']" + ),['guid','senseid','ps','glosslang']), + 'attr':'node'} + a['definition']={ + 'cm': 'use to get definitions of entries', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/definition" + "/form[@lang='{glosslang}']/text" + ),['guid','senseid','ps','glosslang']), + 'attr':'nodetext'} + a['glossnode']={ + 'cm': 'use to get gloss nodes', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/gloss[@lang='{glosslang}']" + ),['guid','senseid','ps','glosslang']), + 'attr':'node'} + a['gloss']={ + 'cm': 'use to get glosses of entries', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/gloss[@lang='{glosslang}']/text" + ),['guid','senseid','ps','glosslang']), + 'attr':'nodetext'} + a['fieldnode']={ + 'cm': 'use to get whole field nodes (to modify)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/field[@type='{fieldtype}']/form[@lang='{lang}']" + "/.." + ),['guid','senseid','ps','fieldtype','lang']), + 'attr':'node'} + a['fieldname']={ + 'cm': 'use to get value(s) for type of field in sense', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/field" + ),['guid','senseid','ps']), + 'attr':'type'} + a['fieldvalue']={ + 'cm': 'use to get value(s) for field(s) of a specified ' + '(or all) type(s) with a form in the specified (or ' + 'any) language for one or all entries (no ' + 'reference to fields, nor to lexeme form language)', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/field[@type='{fieldtype}']" + "/form[@lang='{lang}']/text" #This can be ANY lang. + ),['guid','senseid','ps','fieldtype','lang']), + 'attr':'nodetext'} + a['pronunciationbylocation']={ + 'cm': 'use to get value(s) for pronunciation information ' + 'for a given location', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/pronunciation" + "/trait[@name='location'][@value='{location}']" + "/../form[@lang='{analang}']/text" + ),['guid','senseid','ps','location','analang']), + 'attr':'nodetext'} + a['pronunciationfieldname']={ + 'cm': 'use to get value(s) for a field type of a specified ' + '(or not) location', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/pronunciation" + "/trait[@name='location'][@value='{location}']" + "/../field"),['guid','senseid','ps','location']), + 'attr':'type'} + a['pronunciationfieldvalue']={ + 'cm': 'use to get value(s) for <>', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/pronunciation" + "/trait[@name='location'][@value='{location}']/.." + "/field[@type='{fieldtype}']" + "/form[@lang='{lang}']/text" + ),['guid','senseid','ps','location','fieldtype', + 'lang']), #not necessarily glosslang or analang... + 'attr':'nodetext'} + a['exfieldvaluenode']={ + 'cm': 'use to get values of fields at the example level', + 'url':(("entry[@guid='{guid}']" + # "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/example" + "/field[@type='location']" + "/form[@lang='{glosslang}']" + "[text='{location}']/../.." + "/field[@type='{fieldtype}']" + "/form[@lang='{glosslang}']/text" + ),['guid','analang','senseid','ps','glosslang', + 'location','fieldtype']), + 'attr':'node'} + a['exfieldlocation']={ + 'cm': 'use to get location of fields at the example level', + 'url':(("entry[@guid='{guid}']" + "/lexical-unit/form[@lang='{analang}']/../.." + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/example" + "/field[@type='location']" + "/form[@lang='{glosslang}']/text" + ),['guid','analang','senseid','ps','glosslang']), + 'attr':'nodetext'} + a['pronunciationfieldlocation']={ + 'cm': 'use to get value(s) for pronunciation location' + '/context', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/pronunciation" + "/field[@type='{fieldtype}']/.." + "/trait[@name='location']" + ),['guid','senseid','ps','fieldtype']), + 'attr':'value'} + a['pronunciation']={ + 'cm': 'use to get value(s) for pronunciation in fields ' + 'with location specified', + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/pronunciation" + "/trait[@name='location'][@value='{location}']/.." + "/form[@lang='{glosslang}']/text" + ),['guid','senseid','ps','location','glosslang']), + 'attr':'nodetext'} + a['lexemelang']={ + 'cm': "analysis languages used in lexemes", + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/lexical-unit/form" + ),['guid','senseid','ps']), + 'attr': 'lang'} + a['citationlang']={ + 'cm': "analysis languages used in citation forms", + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/citation/form" + ),['guid','senseid','ps']), + 'attr': 'lang'} + a['pronunciationlang']={ + 'cm': "analysis languages used in citation forms", + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/../.." + "/pronunciation/form" + ),['guid','senseid','ps']), + 'attr': 'lang'} + a['glosslang']={ + 'cm': "gloss languages used in glosses", + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/gloss" + ),['guid','senseid','ps']), + 'attr': 'lang'} + a['defnlang']={ + 'cm': "gloss languages used in definitions", + 'url':(("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info[@value='{ps}']/.." + "/definition" + "/form"),['guid','senseid','ps']), + 'attr': 'lang'} + a['illustration']={ + 'cm': "Illustration by entry", + 'url': (("entry[@guid='{guid}']" + "/sense[@id='{senseid}']/illustration" + ),['guid','senseid','ps']), + 'attr': 'href'} + a['ps']={ + 'cm': "Part of speech, or grammatical category", + 'url': (("entry[@guid='{guid}']" + "/sense[@id='{senseid}']" + "/grammatical-info" + ),['guid','senseid']), + 'attr': 'value'} + #URLs for sense nodes: + a['senselocations']={ + 'cm': 'use to get location of fields at the example level, for a ' + 'given sense', + 'url':(("example" + "/field[@type='location']" + "/form[@lang='{glosslang}']/text" + ),['glosslang']), + 'attr':'text'} + a['examplewfieldlocvaluefromsense']={ + 'cm': 'use to get an example with a given tone/exfield ' + 'when you have the sense node.', + 'url':(("example/field[@type='location']" + "/form[text='{location}']/../.." + "/field[@type='{fieldtype}']" + "/form[text='{fieldvalue}']/../.." + ),['location','fieldtype','fieldvalue']), + 'attr':'nodetext'} + #URLs for example nodes: + a['exampletest']={ + 'cm': 'use to get an example with a given tone/exfield ' + 'when you have the sense node.', + 'url':(("field[@type='location']" + "/form[text='{location}']/../.." + "/field[@type='{fieldtype}']" + "/form[text='{fieldvalue}']/../.." + ),['location','fieldtype','fieldvalue']), + 'attr':'nodetext'} + a['glossofexample']={ + 'cm': 'use to get glosses/translations of examples', + 'url':(("translation[@type='Frame translation']" + "/form[@lang='{glosslang}']/text" + ),['glosslang']), + 'attr':'nodetext'} + a['formofexample']={ + 'cm': 'use to get analang forms of examples', + 'url':(("form[@lang='{lang}']/text" + ),['lang']), + 'attr':'nodetext'} +def printurllog(lift): + log.info('\n'+'\n'.join([str(x)+'\n '+str(y.url) for x,y in lift.urls.items()])) if __name__ == '__main__': import time #for testing; remove in production # def _(x): # return str(x) """To Test:""" - loglevel=5 + # loglevel='Debug' + loglevel='INFO' + print('loglevel=',loglevel) from logsetup import * log=logsetup(loglevel) - filename="/home/kentr/Assignment/Tools/WeSay/dkx/MazHidi_Lift.lift" - filename="/home/kentr/Assignment/Tools/WeSay/gnd/gnd.lift.bak.txt" - filename="/home/kentr/Assignment/Tools/WeSay/bfj/bfj.lift" - filename="/home/kentr/Assignment/Tools/WeSay/gnd/gnd.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/dkx/MazHidi_Lift.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/bse/SIL CAWL Wushi.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/bfj/bfj.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/gnd/gnd.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/eto/eto.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/tsp/TdN.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/eto/eto.lift" + # filename="/home/kentr/Assignment/Tools/WeSay/bqg/Kusuntu.lift" + filename="/home/kentr/Assignment/Tools/WeSay/CAWL_demo/SILCAWL.lift" lift=Lift(filename,nsyls=2) - senseid='26532c2e-fedf-4111-85d2-75b34ed31dd8' - lift.modverificationnode(senseid,add="another value3",rm="Added value") - lift.modverificationnode(senseid,rm="another value3",add="another value2") - lift.modverificationnode(senseid,rm="another value3",add="another value4") + senseids=["begin_7c6fe6a9-9918-48a8-bc3a-e88e61efa8fa", + 'widen_fceb550d-fc99-40af-a288-0433add4f15', + 'flatten_9fb3d2b4-bc9e-4451-b475-36ee10316e40', + 'swallow_af9c3f8f-71e6-4b9a-805c-f6a148dcab8c', + 'frighten_ecffd944-2861-495f-ae38-e7e9cdad45db', + 'prevent_929504ce-35bb-48fe-ae95-8674a97e625f'] + guids=['dd3c93bb-0019-4dce-8d7d-21c1cb8a6d4d', + '09926cec-8be1-4f66-964e-4fdd8fa75fdc', + '2902d6b3-89be-4723-a0bb-97925a905e7f', + '9ba02d67-3a44-4b7f-8f39-ea8e510df402', + 'eece7037-3d55-45c7-b765-95546e5fccc6'] + locations=['Progressive','Isolation']#,'Progressive','Isolation'] + glosslang='en' + pss=["Verb","Noun"] + analang='bfj' + def test(): + for fieldvalue in [2,2]: + for location in locations: + # # for guid in guids: + for senseid in ['prevent_929504ce-35bb-48fe-ae95-8674a97e625f']: + url=lift.get('example/field/form/text', + path=['location','tonefield'], #get this one first + senseid=senseid, + fieldtype='tone',location=location, + tonevalue=fieldvalue, + showurl=True# what='node' + ) #'text' + exfieldvalue=url.get('text') + for e in exfieldvalue: + log.info("exfieldvalue: {}".format(e)) + url_sense=lift.retarget(url,"sense") + # Bind lift object to each url object; or can we store + # this in a way that allows for non-recursive storage + # only of the url object by the lift object? + ids=url_sense.get('senseid') + # log.info("senseids: {}".format(ids)) + for id in [x for x in ids if x is not None]: + log.info("senseid: {}".format(id)) + url_entry=lift.retarget(url,"entry") + idsentry=url_entry.get('guid') + for id in [x for x in idsentry if x is not None]: + log.info("guid: {}".format(id)) + + # example=lift.get('example', + # path=['location','tonefield'], #get this one first + # senseid=senseid, + # fieldtype='tone',location=location, + # tonevalue=fieldvalue, + # what='node') + # sense=lift.get('sense', + # path=['location','tonefield'], #get this one first + # senseid=senseid, + # fieldtype='tone',location=location, + # tonevalue=fieldvalue, + # what='node') + return + for subcheck in range(5): + b=lift.get('sense',fieldtype='tone',location=locations[0], + tonevalue=subcheck,showurl=True).get('senseid') + print(b) + # lift.get("sense", location=locations[0], tonevalue=subcheck, + # path=['tonefield'],showurl=True).get('senseid') + exit() + for senseid in senseids: + exnode=lift.get('example',showurl=True,senseid=senseid, + location=locations[1]).get() + print('exnode:',exnode) + # if len(exnode)>0: + prettyprint(exnode) + for e in exnode: + # print('e:',e) + # prettyprint(e) + audio=lift.get('form/text',node=e,showurl=True,#en-Zxxx-x-audio + analang='en-Zxxx-x-audio').get('text') + print('audio:',audio) + exit() + g=lift.glossordefn(#self,guid, + # senseid, + analang='en', + senseid='continue, resume_d174612b-b3c0-4073-bff0-58fd098252a9', + glosslang='fr',glosslang2=None, + lang='swh',#forms, + # langform="TestForm", + # glossform="testgloss",#gloss2form, + # fieldtype='tone', + # location='Plural', + # fieldvalue=45, + # ps=None, + showurl=True) + # lift.write() + # analang=kwargs.get('analang') + # glosslang=kwargs.get('glosslang') + # langform=kwargs.get('langform') + # glossform=kwargs.get('glossform') + # fieldtype=kwargs.get('fieldtype','tone') + # fieldvalue=kwargs.get('fieldvalue') + # location=kwargs.get('location') + + print(g) + for i in g: + print(i) + quit() + import timeit + def timetest(): + times=50 + out1=timeit.timeit(test, number=times) + print(out1) + timetest() + # log.info(lift.urls) + # log.info('\n'.join([str(x) for x in lift.urls.items()])) + exit()# print('l:',l) + showurl=True + for i in l: + ll=lift.getfrom(i,'example',location="1ss",showurl=showurl) + if ll != []: + # print("ll:",ll) + for ii in ll: + lll=lift.getfrom(ii,'text',analang='en', + path=['example/form'], + # exampleform="to begin", + what='text', + showurl=showurl) + # print(lll) + # for iii in lll: + # print(iii.text) + # lllt=lift.getfrom(ii,'text',analang='en',glosslang='fr', + # path='translation',what='text', + # showurl=showurl) + # if lll != []: + # print("lll:",', '.join(lll)) + showurl=False + # showurl=False + log.info(lift.urls) + # log.info("Done with above") + # fieldtype='tone' + # fieldvalue='1' + # for i in l: + # r=i.findall("field[@type='location']" + # "/form[text='{}']/../.." + # "/field[@type='{}']" + # "/form[text='{}']/../..".format(location,fieldtype,fieldvalue) + # ) + # for ii in r: + # loc=i.find("field[@type='location']/form/text") + # val=i.find("field[@type='{}']/form/text".format(fieldtype)) + # log.info("{}: {}, {}".format(i,loc.text,val.text)) + # print(b.url) + # print(bb.url) + exit() + # senseid='26532c2e-fedf-4111-85d2-75b34ed31dd8' + senseid='skin (of man)_d56b5a5d-7cbf-49b9-a2dd-24eebb0ae462' + lift.modverificationnode(senseid,vtype="V",add="another value3",rm="Added value") + lift.modverificationnode(senseid,vtype="V",rm="another value3",add="another value2") + lift.modverificationnode(senseid,vtype="V",rm="another value3",add="another value4") """Functions to run on a database from time to time""" # lift.findduplicateforms() # lift.findduplicateexamples() diff --git a/main.py b/main.py index c2bf9455..7edd5ebb 100755 --- a/main.py +++ b/main.py @@ -3,8 +3,8 @@ """This file runs the actual GUI for lexical file manipulation/checking""" program={'name':'A→Z+T'} program['tkinter']=True -program['production']=False #True for making screenshots -program['version']='0.8.6' #This is a string... +program['production']=True#False #True for making screenshots +program['version']='0.8.6oop' #This is a string... program['url']='https://github.com/kent-rasmussen/azt' program['Email']='kent_rasmussen@sil.org' import platform @@ -168,6 +168,7 @@ def __init__(self, parent, frame, nsyls=None): self.invalidregex='( |\.|,|\)|\()+' # self.profilelegit=['#','̃','C','N','G','S','V','o'] #In 'alphabetical' order self.profilelegit=['#','̃','N','G','S','D','C','Ṽ','V','ʔ','ː',"̀",'=','<'] #'alphabetical' order + """Are we OK without these?""" # self.guidtriage() #sets: self.guidswanyps self.guidswops self.guidsinvalid self.guidsvalid # self.guidtriagebyps() #sets self.guidsvalidbyps (dictionary keyed on ps) @@ -183,7 +184,7 @@ def __init__(self, parent, frame, nsyls=None): self.loadsettingsfile(setting='profiledata') """I think I need this before setting up regexs""" self.guessanalang() #needed for regexs - log.debug("analang guessed: {} (If you don't like this, change it in " + log.info("analang guessed: {} (If you don't like this, change it in " "the menus)".format(self.analang)) self.maxprofiles=5 # how many profiles to check before moving on to another ps self.maxpss=2 #don't automatically give more than two grammatical categories @@ -219,9 +220,16 @@ def __init__(self, parent, frame, nsyls=None): # hasattr(self,'profile') and (self.profile is not None) and # hasattr(self,'name') and (self.name is not None)): # self.sortingstatus() #because this won't get set later #>checkdefaults? + self.guessglosslangs() #needed for the following + self.datadict=FramedDataDict(self) log.info("Done initializing check; running first check check.") """Testing Zone""" #set None to make labels, else "raised" "groove" "sunken" "ridge" "flat" + # n=self.db.getsensenode() + # senseid="begin_7c6fe6a9-9918-48a8-bc3a-e88e61efa8fa" + # self.name='Progressive' + # RecordButtonFrame.makefilenames(check=self,senseid=senseid) + # log.info(n) self.mainlabelrelief() self.checkcheck() def notifyuserofextrasegments(self): @@ -238,23 +246,11 @@ def notifyuserofextrasegments(self): """Guessing functions""" def guessanalang(self): #have this call set()? - langspriority=collections.Counter(self.db.get('lexemelang')+ - self.db.get('citationlang')).most_common() - try: - self.analang=langspriority[0][0] - log.debug(_("Analysis language with the most fields ({}): {} ({})" - "".format(langspriority[0][1],self.analang,langspriority))) - except: - self.analang=None - log.info(_("Are there any languages in this database? {}").format( - langspriority)) - return """if there's only one analysis language, use it.""" nlangs=len(self.db.analangs) log.debug(_("Found {} analangs: {}".format(nlangs,self.db.analangs))) - if nlangs == 1: # print('Only one analang in database!') + if nlangs == 1: self.analang=self.db.analangs[0] - self.analangdefault=self.db.analangs[0] #In case the above gets changed. log.debug(_('Only one analang in file; using it: ({})'.format( self.db.analangs[0]))) """If there are more than two analangs in the database, check if one @@ -265,19 +261,17 @@ def guessanalang(self): log.debug(_('Looks like I found an iso code for analang! ' '({})'.format(self.db.analangs[0]))) self.analang=self.db.analangs[0] #assume this is the iso code + self.analangdefault=self.db.analangs[0] #In case it gets changed. elif ((len(self.db.analangs[1]) == 3) and (len(self.db.analangs[0]) != 3)): log.debug(_('Looks like I found an iso code for analang! ' '({})'.format(self.db.analangs[1]))) self.analang=self.db.analangs[1] #assume this is the iso code + self.analangdefault=self.db.analangs[1] #In case it gets changed. else: - langspriority=collections.Counter(self.db.get('lexemelang')+ - self.db.get('citationlang')).most_common() - log.debug("All: {}".format(self.db.get('lexemelang')+ - self.db.get('citationlang'))) - log.debug(collections.Counter(self.db.get('lexemelang')+ - self.db.get('citationlang'))) - log.debug('Found the following analangs: {}'.format(langspriority)) + self.analang=self.db.analangs[0] + log.debug('Neither analang looks like an iso code, taking the ' + 'first one: {}'.format(self.db.analangs)) else: #for three or more analangs, take the first plausible iso code for n in range(nlangs): if len(self.db.analangs[n]) == 3: @@ -285,6 +279,9 @@ def guessanalang(self): log.debug(_('Looks like I found an iso code for analang! ' '({})'.format(self.db.analangs[n]))) return + log.debug('None of more than three analangs look like an iso code, ' + 'taking the first one: {}'.format(self.db.analangs)) + self.analang=self.db.analangs[0] def guessaudiolang(self): nlangs=len(self.db.audiolangs) """if there's only one audio language, use it.""" @@ -315,17 +312,19 @@ def guessaudiolang(self): return def guessglosslangs(self): """if there's only one gloss language, use it.""" + if not hasattr(self,'glosslangs'): + self.glosslangs=Glosslangs(None,None) if len(self.db.glosslangs) == 1: log.info('Only one glosslang!') - self.glosslang=self.db.glosslangs[0] - self.glosslang2=None + self.glosslangs[0]=self.glosslang=self.db.glosslangs[0] + self.glosslangs[1]=self.glosslang2=None """if there are two or more gloss languages, just pick the first two, and the user can select something else later (the gloss languages are not for CV profile analaysis, but for info after checking, when this can be reset.""" elif len(self.db.glosslangs) > 1: - self.glosslang=self.db.glosslangs[0] - self.glosslang2=self.db.glosslangs[1] + self.glosslangs[0]=self.glosslang=self.db.glosslangs[0] + self.glosslangs[1]=self.glosslang2=self.db.glosslangs[1] else: print("Can't tell how many glosslangs!",len(self.db.glosslangs)) def getpss(self): @@ -885,9 +884,10 @@ def submitform(): vars[idn].set(id) else: vars[idn].set(0) - framed=self.getframeddata(id, noframe=True) - log.debug("forms: {}".format(framed['formatted'])) - CheckButton(scroll.content, text = framed['formatted'], + framed=self.datadict.getframeddata(id) + forms=framed.formatted(noframe=True) + log.debug("forms: {}".format(forms)) + CheckButton(scroll.content, text = forms, variable = vars[allpssensids.index(id)], onvalue = id, offvalue = 0, ).grid(row=row,column=0,sticky='ew') @@ -1016,11 +1016,11 @@ def chk(): db['before'][lang]['text']+'__'+db['after'][lang]['text']) senseid=self.gimmesenseid() # This needs self.toneframes - framed=self.getframeddata(senseid,truncdefn=True) #after defn above, before below! + framed=self.datadict.getframeddata(senseid) + framed.setframe(self.name) #At this point, remove this frame (in case we don't submit it) del self.toneframes[self.ps][self.name] self.name=self.nameori - print(frame,framed) """Display framed data""" if hasattr(self.addwindow,'framechk'): self.addwindow.framechk.destroy() @@ -1039,9 +1039,8 @@ def chk(): padx=padx,pady=pady) for lang in langs: row+=1 - print('frame[{}]:'.format(lang),frame[lang]) tf[lang]=('form[{}]: {}'.format(lang,frame[lang])) - tfd[lang]=('(ex: '+framed[lang]+')') + tfd[lang]=('(ex: '+framed.forms.framed[lang]+')') l1=Label(self.addwindow.framechk, text=tf[lang], font=self.fonts['read'], @@ -1252,7 +1251,7 @@ def set(self,attribute,choice,window=None,refresh=True): from self.defaultstoclear[attribute]""" self.cleardefaults(attribute) if attribute in ['glosslang','glosslang2']: - pass #Nothing to change here + self.glosslangs=[self.glosslang,self.glosslang2] #Nothing else to change here elif attribute in ['analang', #do the last two cause problems? 'interpret','distinguish']: self.reloadprofiledata() @@ -1465,6 +1464,7 @@ def settingsbyfile(self): "scount", "sextracted", "profilesbysense", + "formstosearch" ]}, 'status':{ 'file':'statusfile', @@ -1713,17 +1713,21 @@ def updatestatuslift(self,name=None,subcheck=None,verified=False,refresh=True): subcheck=self.subcheck if name is None: name=self.name - senseids=self.db.get('senseidbyexfieldvalue',fieldtype='tone', - location=name,fieldvalue=subcheck) + senseids=self.db.get("sense", location=name, tonevalue=subcheck, + path=['tonefield']).get('senseid') value=self.verifictioncode(name,subcheck) if verified == True: add=value - rm=None + rms=[] else: add=None - rm=value + rms=[value] for senseid in self.senseidsincheck(senseids): #only for this ps-profile - self.db.modverificationnode(senseid,vtype=self.profile,add=add,rm=rm) + rms+=self.db.getverificationnodevaluebyframe(senseid, + vtype=self.profile, frame=name) + log.info("Removing {}".format(rms)) + self.db.modverificationnode(senseid,vtype=self.profile,add=add, + rms=rms) if refresh == True: self.db.write() #for when not iterated over, or on last repeat def updatestatus(self,subcheck=None,verified=False,refresh=True): @@ -1780,10 +1784,10 @@ def getprofileofsense(self,senseid): #Convert to iterate over local variables profileori=self.profile #We iterate across this here psori=self.ps #We iterate across this here - forms=self.db.citationorlexeme(senseid=senseid,lang=self.analang) + forms=self.db.citationorlexeme(senseid=senseid,analang=self.analang) if forms == []: self.profile='Invalid' - for self.ps in self.db.get('ps',senseid=senseid): + for self.ps in self.db.ps(senseid=senseid): self.addtoprofilesbysense(senseid) self.ps=psori return None,'Invalid' @@ -1793,8 +1797,14 @@ def getprofileofsense(self,senseid): self.profile=self.profileofform(form) if not set(self.profilelegit).issuperset(self.profile): self.profile='Invalid' - for self.ps in self.db.get('ps',senseid=senseid): + for self.ps in self.db.ps(senseid=senseid): self.addtoprofilesbysense(senseid) + if self.ps not in self.formstosearch: + self.formstosearch[self.ps]={} + if form in self.formstosearch[self.ps]: + self.formstosearch[self.ps][form].append(senseid) + else: + self.formstosearch[self.ps][form]=[senseid] profile=self.profile self.profile=profileori self.ps=psori @@ -1805,6 +1815,7 @@ def getprofiles(self): self.profilesbysense['Invalid']=[] self.profiledguids=[] self.profiledsenseids=[] + self.formstosearch={} self.sextracted={} #Will store matching segments here for ps in self.db.pss: self.sextracted[ps]={} @@ -2027,7 +2038,7 @@ def gimmeguid(self): idsbyps=self.db.get('guidbyps',lang=self.analang,ps=self.ps) return idsbyps[randint(0, len(idsbyps))] def gimmesenseid(self): - idsbyps=self.db.get('senseidbyps',lang=self.analang,ps=self.ps) + idsbyps=self.db.get('sense',ps=self.ps).get('senseid') return idsbyps[randint(0, len(idsbyps)-1)] def framenamesbyps(self,ps): """Names for all tone frames defined for the language.""" @@ -2056,181 +2067,6 @@ def framevaluesbynamepsprofile(self,ps,profile,name): l+=group return list(dict.fromkeys(l)) """Mediating between LIFT and the user""" - def getframeddata(self,source,noframe=False,notonegroup=False,truncdefn=False): - """This generates a dictionary of form {'form':outputform, - 'gloss':outputgloss} for display, by senseid""" - """Sometimes this script is called to make the example fields, other - times to display it. If source is a senseid, it pulls form/gloss/etc - information from the entry. If source is an example, it pulls that info - from the example. The info is formatted uniformly in either case.""" - output={} - log.log(2,"{} {}".format(source, type(source))) - log.log(2,'self.glosslang: {}'.format(self.glosslang)) - log.log(2,'self.glosslang2: {}'.format(self.glosslang2)) - """Just in case there's a problem later...""" - forms={} - glosses={} - gloss={} - tonegroups=None - """Build dual logic here:""" - if isinstance(source,lift.ET.Element): - #This is an example element, not a sense or entry element... - element=source - for node in element: - if (node.tag == 'form') and ((node.get('lang') == self.analang) - or (node.get('lang') == self.audiolang)): - forms[node.get('lang')]=node.findall('text') - if (((node.tag == 'translation') and - (node.get('type') == 'Frame translation')) or - ((node.tag == 'gloss') and - (node.get('lang') == self.glosslang))): - for subnode in node: - if (subnode.tag == 'form'): - glosses[subnode.get('lang')]=subnode.findall('text') - if ((node.tag == 'field') and (node.get('type') == 'tone')): - #This should always be only one value: - tonegroups=node.findall('form/text') - log.log(2,'forms: {}'.format(forms)) - for lang in glosses: - log.log(2,'gloss[{}]: {}'.format(lang,glosses[lang])) - """convert from lists to single items without loosing data, - then pull text from nodes""" - if self.analang in forms: - form=t(firstoflist(forms[self.analang])) - else: - form=None - if self.audiolang in forms: - voice=t(firstoflist(forms[self.audiolang])) - else: - voice=None - for lang in glosses: - if (lang == self.glosslang) or (lang == self.glosslang2): - gloss[lang]=t(firstoflist(glosses[lang])) - """This is what we're pulling from: - -
ga təv
- -
lieu (m), place (f) (pl)
-
- -
1
-
- -
Plural
-
-
- """ - elif (type(source) is str): # and (len(source) == 36): #source is sensedid - #some dbs have senseids with form info, too... - #Asking for a sense, you get all tone groups, if self.name isn't set - log.log(3,'36 character senseid string!') - senseid=source - output['senseid']=senseid - forms[self.analang]=self.db.citationorlexeme(senseid=senseid, - lang=self.analang, - ps=self.ps) - forms[self.audiolang]=self.db.citationorlexeme(senseid=senseid, - lang=self.audiolang, - ps=self.ps) - for lang in [self.glosslang,self.glosslang2]: - if lang is not None: - glosses[lang]=self.db.glossordefn(senseid=senseid,lang=lang, - ps=self.ps) - #If frame is not defined (in self.name) this will output ALL values - #for this sense! - tonegroups=self.db.get('exfieldvalue', senseid=senseid, - fieldtype='tone', location=self.name) - """convert from lists to single items without loosing data""" - form=firstoflist(forms[self.analang]) - voice=firstoflist(forms[self.audiolang]) - for lang in glosses: - gloss[lang]=firstoflist(glosses[lang]) - log.log(2,'gloss[{}]: {}'.format(lang,gloss[lang])) - else: - log.error('Neither Element nor senseid was found!' - '\nThis is almost certainly not what you want!' - '\nFYI, I was looking for {}'.format(source)) - return source - log.log(2,'form: {}'.format(form)) - for lang in gloss: - log.log(2,'gloss:'.format(gloss[lang])) - """The following is the same for senses or examples""" - if notonegroup == False: - #If I haven't defined self.name nor set notonegroup=True, this will - # throw an error on a senseid above. - tonegroup=t(firstoflist(tonegroups)) - log.log(2,'tonegroup: {}'.format(tonegroup)) - if tonegroup is not None: - try: - int(tonegroup) - except: - output['tonegroup']=tonegroup #this is only for named groups - if self.glosslang2 in gloss and (self.glosslang2 is None or - gloss[self.glosslang2] is None): - del gloss[self.glosslang2] #remove this now, and lose checks later - output[self.analang]=None - for lang in list(gloss.keys())+[self.glosslang]: - output[lang]=None - text=('noform','nogloss') - if noframe == False: - frame=self.toneframes[self.ps][self.name] - """Forms and glosses have to be strings, or the regex fails""" - if form is None: - form=nn(form) #'noform' - for lang in gloss: - if gloss[lang] is None: - gloss[lang]=nn(gloss[lang]) #'nogloss' - log.log(2,frame) - output[self.analang]=rx.framerx.sub(form,frame[self.analang]) - for lang in gloss: - """only give these if the frame has this gloss, *and* if - the gloss is in the data (user selection is above)""" - if ((lang in frame) and (lang in gloss) and ( - None not in [gloss[lang],frame[lang]])): - output[lang]=rx.framerx.sub(gloss[lang],frame[lang]) - else: - output[self.analang]=nn(form) #for non-segmental forms - for lang in gloss: - output[lang]=gloss[lang] - if voice is not None: - output[self.audiolang]=voice - text=[str(output[self.analang]),"‘"+str(output[self.glosslang])+"’"] - if self.glosslang2 in output and output[self.glosslang2] is not None: - text+=["‘"+str(output[self.glosslang2])+"’"] - if 'tonegroup' in output: - text=[str(output['tonegroup'])]+text - output['formatted']=' '.join(text) #used to be '\t'... - if None in output: - log.error("Apparently None is an output key! {}".format(output)) - return output - def getframedentry(self,guid): - """This is most likely obsolete""" - """This generates output for selection and verification, by ps""" - glosses={} - form=firstoflist(self.db.citationorlexeme(guid,lang=self.analang, - ps=self.ps)) - glosses[self.glosslang]=firstoflist(self.db.glossordefn(guid, - lang=self.glosslang, ps=self.ps)) - if self.glosslang2 is not None: - glosses[self.glosslang2]=firstoflist(self.db.glossordefn(guid, - lang=self.glosslang2,ps=self.ps)) - frame=self.toneframes[self.ps][self.name] - if self.debug ==True: - print(forms,glosses,frame) - outputform=None - outputgloss={} - outputform=rx.framerx.sub(form,frame[self.analang]) - for lang in glosses: - if (lang != self.glosslang2) or (self.glosslang2 is not None): - outputgloss[lang]=rx.framerx.sub(glosses[lang], - frame[lang]) - printoutput=(' ',outputform, - "‘"+str(outputgloss[self.glosslang])+"’") - if self.glosslang2 is not None: - printoutput+="‘"+str(outputgloss[self.glosslang2])+"’" - print(printoutput) - return {self.analang:outputform,self.glosslang:outputgloss, - self.glosslang2:outputgloss} def senseidtriage(self): # import time # print("Doing senseid triage... This takes awhile...") @@ -2245,7 +2081,7 @@ def senseidtriage(self): if senseid not in self.senseidsinvalid: self.senseidsvalid+=[senseid] print(len(self.senseidsvalid),'senses with valid data remaining.') - self.senseidswanyps=self.db.get('senseidwanyps') #any ps value works here. + self.senseidswanyps=self.db.get('sense',path=['ps'],showurl=True).get('senseid') #any ps value works here. print(len(self.senseidswanyps),'senses with ps data found.') self.senseidsvalidwops=[] self.senseidsvalidwps=[] @@ -2473,8 +2309,11 @@ def button(opts,text,fn=None,column=opts['labelcolumn'],**kwargs): t=(_("Checking {},").format(self.typedict[self.type]['pl'])) proselabel(opts,t,cmd='gettype',parent=tf) opts['columnplus']=1 - if self.name not in self.toneframes[self.ps]: - t=_("no defined tone frame yet.") + if len(self.toneframes[self.ps]) == 0: + t=_("no tone frames defined.") + self.name=None + elif self.name not in self.toneframes[self.ps]: + t=_("no tone frame selected.") self.name=None else: t=(_("working on ‘{}’ tone frame").format(self.name)) @@ -3255,8 +3094,8 @@ def getC(self,window,event=None): def getlocations(self): self.locations=[] for senseid in self.senseidstosort: - for location in self.db.get('exfieldlocation', - senseid=senseid, fieldtype='tone'): + for location in [i for i in self.db.get('locationfield', + senseid=senseid, showurl=True).get('text') if i is not None]: self.locations+=[location] self.locations=list(dict.fromkeys(self.locations)) def topprofiles(self,x='ALL'): @@ -3331,9 +3170,7 @@ def wordsbypsprofilechecksubcheckp(self,parent='NoXLPparent',t="NoText!"): self.buildregex() log.log(2,"self.regex: {}; self.regexCV: {}".format(self.regex, self.regexCV)) - matches=set(self.db.senseidformsbyregex(self.regex, - self.analang, - ps=self.ps).keys()) + matches=set(self.senseidformsbyregex(self.regex)) for typenum in self.typenumsRun: # this removes senses already reported (e.g., in V1=V2) matches-=self.basicreported[typenum] @@ -3371,8 +3208,7 @@ def wordsbypsprofilechecksubcheckp(self,parent='NoXLPparent',t="NoText!"): for senseid in matches: for typenum in self.typenumsRun: self.basicreported[typenum].add(senseid) - framed=self.getframeddata(senseid,noframe=True) - print('\t',framed['formatted']) + framed=self.datadict.getframeddata(senseid) self.framedtoXLP(framed,parent=ex,listword=True) def wordsbypsprofilechecksubcheck(self,parent='NoXLPparent'): """This function iterates across self.name and self.subcheck values @@ -3440,9 +3276,11 @@ def wordsbypsprofilechecksubcheck(self,parent='NoXLPparent'): self.subcheck=subcheckori def idXLP(self,framed): id='x' #string! - bits=[self.ps,self.profile,self.name,self.subcheck,framed[self.analang]] - if self.glosslang in framed and framed[self.glosslang] is not None: - bits+=framed[self.glosslang] + bits=[self.ps,self.profile,self.name,self.subcheck, + framed.forms[self.analang]] + for lang in self.glosslangs: + if lang in framed.forms and framed.forms[lang] is not None: + bits+=framed.forms[lang] for x in bits: if x is not None: id+=x @@ -3458,18 +3296,18 @@ def framedtoXLP(self,framed,parent,listword=False,groups=True): else: exx=xlp.Example(parent,id) #the id goes here... ex=xlp.Word(exx) #This doesn't have an id - if self.audiolang in framed: - url=file.getdiredrelURL(self.reporttoaudiorelURL,framed[self.audiolang]) - el=xlp.LinkedData(ex,self.analang,framed[self.analang],str(url)) + if self.audiolang in framed.forms: + url=file.getdiredrelURL(self.reporttoaudiorelURL, + framed.forms[self.audiolang]) + el=xlp.LinkedData(ex,self.analang,framed.forms[self.analang], + str(url)) else: - el=xlp.LangData(ex,self.analang,framed[self.analang]) - if 'tonegroup' in framed and groups is True: #joined groups show each - elt=xlp.LangData(ex,self.analang,framed['tonegroup']) - if self.glosslang in framed: - eg=xlp.Gloss(ex,self.glosslang,framed[self.glosslang]) - if ((self.glosslang2 != None) and (self.glosslang2 in framed) - and (framed[self.glosslang2] is not None)): - eg2=xlp.Gloss(ex,self.glosslang2,framed[self.glosslang2]) + el=xlp.LangData(ex,self.analang,framed.forms[self.analang]) + if hasattr(framed,'tonegroup') and groups is True: #joined groups show each + elt=xlp.LangData(ex,self.analang,framed.tonegroup) + for lang in self.glosslangs: + if lang in framed.forms: + xlp.Gloss(ex,lang,framed.forms[lang]) def makecountssorted(self): # This iterates across self.profilesbysense to provide counts for each # ps-profile combination (aggravated for profile='Invalid') @@ -3532,10 +3370,9 @@ def senseidsincheck(self,senseids): return senseidstochange def getexsall(self,value): #This returns all the senseids with a given tone value - senseids=self.db.get('senseidbyexfieldvalue',location=self.name, - fieldtype='tone', - fieldvalue=value - ) + senseids=self.db.get("sense", location=self.name, path=['tonefield'], + tonevalue=value + ).get('senseid') senseidsincheck=self.senseidsincheck(senseids) return list(senseidsincheck) def getex(self,value,notonegroup=True,truncdefn=False,renew=False): @@ -3560,11 +3397,9 @@ def getex(self,value,notonegroup=True,truncdefn=False,renew=False): log.info("Using stored value for ‘{}’ group: ‘{}’".format( value, self.exs[value])) senseid=self.exs[value] - framed=self.getframeddata(senseid, - notonegroup=notonegroup, - truncdefn=truncdefn) - if (framed[self.glosslang] is not None): - framed['senseid']=self.exs[value] + framed=self.datadict.getframeddata(senseid) + if framed.glosses() is not None: + output['senseid']=senseid output['framed']=framed #this includes [n], above return output else: @@ -3573,12 +3408,11 @@ def getex(self,value,notonegroup=True,truncdefn=False,renew=False): return output for i in range(len(senseids)): #just keep trying until you succeed senseid=senseids[randint(0, len(senseids))-1] - framed=self.getframeddata(senseid,notonegroup=notonegroup, - truncdefn=truncdefn) - if (framed[self.glosslang] is not None): + framed=self.datadict.getframeddata(senseid) + if framed.glosses() is not None: """As soon as you find one with form and gloss, quit.""" self.exs[value]=senseid - framed['senseid']=senseid + output['senseid']=senseid output['framed']=framed #this includes [n], above return output else: @@ -4023,22 +3857,13 @@ def testsorting(self): delattr(self,'groupselected') #reset this for each word! senseid=self.senseidsunsorted[0] progress=(str(self.senseidstosort.index(senseid)+1)+'/'+str(todo)) - framed=self.getframeddata(senseid,truncdefn=True) + framed=self.datadict.getframeddata(senseid) + framed.setframe(self.name) """After the first entry, sort by groups.""" log.debug('self.tonegroups: {}'.format(status['groups'])) Label(titles, text=progress, font=self.fonts['report'], anchor='w' ).grid(column=1, row=0, sticky="ew") - if 'formatted' in framed: - text=(framed['formatted']) - else: - text=_("Sorry; I can't find {}".format(framed)) - l=Label(self.runwindow.frame, text=text,font=self.fonts['readbig']) - l.grid(column=1,row=1, sticky="w",pady=50) - l.wrap() - scroll.destroy() - self.runwindow.waitdone() - self.runwindow.wait_window(window=l) - return 1 + text=framed.formatted(noframe=False) entryview=Frame(self.runwindow.frame) self.sorting=Label(entryview, text=text,font=self.fonts['readbig']) entryview.grid(column=1, row=1, sticky="new") @@ -4075,6 +3900,21 @@ def testsorting(self): return 1 # this should only happen on Exit self.marksortedsenseid(senseid) self.runwindow.resetframe() + def reverify(self): + log.info("Reverifying a framed tone group, at user request: {}-{}" + "".format(self.name,self.subcheck)) + checkswframes=self.status[self.type][self.ps][self.profile] + if self.name is None or self.name not in checkswframes: + self.getcheck() #guess=True + done=self.status[self.type][self.ps][self.profile][self.name]['done'] + if self.subcheck is None or self.subcheck not in done: + self.getsubcheck()#guess=True + if self.subcheck == None: + log.info("I asked for a framed tone group, but didn't get one.") + return + if self.subcheck in done: + done.remove(self.subcheck) + self.maybesort() def verifyT(self,menu=False): log.info("Running verifyT!") """Show entries each in a row, users mark those that are different, and we @@ -4122,6 +3962,7 @@ def verifyT(self,menu=False): senseids=self.getexsall(self.subcheck) if len(senseids) <2: self.updatestatus(verified=True) + self.updatestatuslift(self.name,self.subcheck,verified=True) # self.checkcheck() #now after verifyT is done log.info("Group ‘{}’ only has {} example; marking verified and " "continuing.".format(self.subcheck,len(senseids))) @@ -4199,6 +4040,7 @@ def verifyT(self,menu=False): elif self.groupselected == "ALLOK": log.debug("User selected ‘{}’, moving on.".format(oktext)) self.updatestatus(verified=True) + self.updatestatuslift(self.name,self.subcheck,verified=True) # self.checkcheck() #now after verifyT is done else: log.debug("User did NOT select ‘{}’, assuming we'll come " @@ -4217,14 +4059,9 @@ def verifybutton(self,parent,senseid,row,column=0,label=False,**kwargs): if 'anchor' not in kwargs: kwargs['anchor']='w' #This should be pulling from the example, as it is there already - framed=FramedData(senseid,db=self.db, - frame=self.toneframes[self.ps][self.name], - location=self.name, analangs=[self.analang], - glosslangs=[self.glosslang,self.glosslang2], - notonegroup=True,truncdefn=True) - # framed=self.getframeddata(senseid,notonegroup=True,truncdefn=True) - text=framed.formatted - # text=(framed['formatted']) + framed=self.datadict.getframeddata(senseid) + framed.setframe(self.name) + text=framed.formatted(notonegroup=True) if label==True: b=Label(parent, text=text, **kwargs @@ -4243,12 +4080,6 @@ def verifybutton(self,parent,senseid,row,column=0,label=False,**kwargs): ipady=15 #Inside the buttons... ) def joinT(self): - def verify(): - groups=self.status[self.type][self.ps][self.profile][self.name][ - 'groups'] - for group in groups: - self.updatestatuslift(self.name,group,verified=True) - self.db.write() #after iterating log.info("Running joinT!") """This window shows up after sorting, or maybe after verification, to allow the user to join groups that look the same. I think before @@ -4321,7 +4152,6 @@ def verify(): if self.groupselected == "ALLOK": print(f"User selected ‘{oktext}’, moving on.") delattr(self,'groupselected') - verify() return 0 else: group1=self.groupselected @@ -4352,7 +4182,6 @@ def verify(): if self.groupselected == "ALLOK": print(f"User selected ‘{oktext}’, moving on.") delattr(self,'groupselected') - verify() return 0 else: msg=_("Now we're going to move group ‘{0}’ into " @@ -4365,22 +4194,22 @@ def verify(): self.status[self.type][self.ps][self.profile][ self.name]['groups'].remove(group1) self.subcheck=group1 + self.updatestatuslift(refresh=False) #done above self.updatestatus(refresh=False) #not verified=True --since joined. - # self.updatestatuslift(refresh=False) #done above self.subcheck=self.groupselected + self.updatestatuslift() #done above self.updatestatus() #not verified=True --since joined. - # self.updatestatuslift() #done above self.maybesort() #go back to verify, etc. """'These are all different' doesn't need to be saved anywhere, as this can happen at any time. Just move on to verification, where each group's sameness will be verified and recorded.""" def updatebysubchecksenseid(self,oldtonevalue,newtonevalue,verified=False): # This function updates the field value and verification status (which - # containst the field value) in the lift file. + # contains the field value) in the lift file. # This is all the words in the database with the given # location:value correspondence (any ps/profile) - lst2=self.db.get('senseidbyexfieldvalue',fieldtype='tone', - location=self.name,fieldvalue=oldtonevalue) + lst2=self.db.get('sense',location=self.name,tonevalue=oldtonevalue + ).get('senseid') # We are agnostic of verification status of any given entry, so don't # use this to change names, not to mark verification status (do that # with self.updatestatuslift()) @@ -4393,11 +4222,11 @@ def updatebysubchecksenseid(self,oldtonevalue,newtonevalue,verified=False): for senseid in senseids: """This updates the fieldvalue from 'fieldvalue' to 'newfieldvalue'.""" - self.db.updateexfieldvalue(senseid=senseid,fieldtype='tone', - location=self.name,fieldvalue=oldtonevalue, - newfieldvalue=newtonevalue) + self.db.addmodexamplefields(senseid=senseid,fieldtype='tone', + location=self.name,#fieldvalue=oldtonevalue, + fieldvalue=newtonevalue) self.db.modverificationnode(senseid=senseid,vtype=self.profile, - add=add,rm=rm,addifrmd=True) + add=add,rms=[rm],addifrmd=True) self.db.write() #once done iterating over senseids def addtonegroup(self): log.info("Adding a tone group!") @@ -4425,7 +4254,7 @@ def addtonefieldex(self,senseid,framed): self.name, senseid, guid)) - self.db.addexamplefields( #This should only mod if already there + self.db.addmodexamplefields( #This should only mod if already there guid=guid,senseid=senseid, analang=self.analang, glosslang=self.glosslang, @@ -4438,8 +4267,8 @@ def addtonefieldex(self,senseid,framed): fieldvalue=self.groupselected #, # ps=None #,showurl=True ) - tonegroup=firstoflist(self.db.get('exfieldvalue', senseid=senseid, - fieldtype='tone', location=self.name)) + tonegroup=unlist(self.db.get("example/tonefield/form/text", + senseid=senseid, location=self.name).get('text')) if tonegroup != self.groupselected: log.error("Field addition failed! LIFT says {}, not {}.".format( tonegroup,self.groupselected)) @@ -4469,14 +4298,13 @@ def getsenseidsbytoneUFgroups(self): """Still working on one ps-profile combo at a time.""" self.getidstosort() #just in case this changed for senseid in self.senseidstosort: #I should be able to make this a regex... - toneUFgroup=firstoflist(self.db.get('toneUFfieldvalue', senseid=senseid, - fieldtype='tone' # Including any lang at this point. - # ,showurl=True - )) - if toneUFgroup not in sorted: - sorted[toneUFgroup]=[senseid] - else: - sorted[toneUFgroup]+=[senseid] + toneUFgroup=firstoflist(self.db.get('sense/tonefield/form/text', + senseid=senseid).get('text')) + if toneUFgroup is not None: + if toneUFgroup not in sorted: + sorted[toneUFgroup]=[senseid] + else: + sorted[toneUFgroup]+=[senseid] self.toneUFgroups=list(dict.fromkeys(sorted)) log.debug("UFtonegroups (getsenseidsbytoneUFgroups): {}".format( self.toneUFgroups)) @@ -4487,10 +4315,8 @@ def gettoneUFgroups(self): #obsolete? toneUFgroups=[] """Still working on one ps-profile combo at a time.""" for senseid in self.senseidstosort: #I should be able to make this a regex... - toneUFgroups+=self.db.get('toneUFfieldvalue', senseid=senseid, - fieldtype='tone' # Including any lang at this point. - # ,showurl=True - ) + toneUFgroups+=self.db.get('sense/tonefield/form/text', + senseid=senseid).get('text') self.toneUFgroups=list(dict.fromkeys(toneUFgroups)) def gettonegroups(self): # This depends on self.ps, self.profile, and self.name @@ -4500,8 +4326,8 @@ def gettonegroups(self): log.log(3,"Looking for tone groups for {} frame".format(self.name)) tonegroups=[] for senseid in self.senseidstosort: #This is a ps-profile slice - tonegroup=self.db.get('exfieldvalue', senseid=senseid, - fieldtype='tone', location=self.name)#, showurl=True) + tonegroup=self.db.get("example/tonefield/form/text", + senseid=senseid, location=self.name).get('text') if unlist(tonegroup) in ['NA','','ALLOK', None]: log.error("tonegroup {} found in sense {} under location {}!" "".format(tonegroup,senseid,self.name)) @@ -4568,10 +4394,10 @@ def sortingstatus(self): self.senseidssorted=[] self.senseidsunsorted=[] for senseid in self.senseidstosort: - v=self.db.get('exfieldvalue',senseid=senseid,fieldtype='tone', - location=self.name) #because it's relevant to this + v=unlist(self.db.get("example/tonefield/form/text", senseid=senseid, + location=self.name).get('text')) log.info("Found tone value: {}".format(v)) - if unlist(v) in ['',None]: + if v in ['',None]: self.senseidsunsorted+=[senseid] else: self.senseidssorted+=[senseid] @@ -4601,10 +4427,10 @@ def tryNAgain(self): Label(self.runwindow.frame, text=text).grid(row=0,column=0) return for senseid in self.senseidstosort: #this is a ps-profile slice - self.db.rmexfields(senseid=senseid,fieldtype='tone', - location=self.name,fieldvalue='NA', - showurl=True - ) + self.db.addmodexamplefields(senseid=senseid,fieldtype='tone', + location=self.name,fieldvalue='', #just clear this + showurl=True + ) self.checkcheck() #redraw the table self.maybesort() #Because we want to go right into sorting... def getanotherskip(self,parent): @@ -4657,45 +4483,28 @@ def unsort(): row=row,column=column,label=label, alwaysrefreshable=alwaysrefreshable, font=font, playable=playable,renew=True,refreshcount=refreshcount,**kwargs) - if 'font' not in kwargs: - font=self.fonts['read'] - else: - font=kwargs['font'] - del kwargs['font'] - if 'anchor' not in kwargs: - kwargs['anchor']='w' - if 'notonegroup' not in kwargs: - notonegroup=True - else: - notonegroup=kwargs['notonegroup'] - del kwargs['notonegroup'] - if 'refreshcount' not in kwargs: - refreshcount=0 - else: - refreshcount=kwargs['refreshcount']+1 - del kwargs['refreshcount'] - if 'sticky' not in kwargs: - sticky="ew" - else: - sticky=kwargs['sticky'] - del kwargs['sticky'] - example=self.getex(group,notonegroup=notonegroup,truncdefn=True,renew=renew) + font=kwargs.pop('font',self.fonts['read']) + kwargs['anchor']=kwargs.get('anchor','w') + notonegroup=kwargs.pop('notonegroup',True) + refreshcount=kwargs.pop('refreshcount',-1)+1 + sticky=kwargs.pop('sticky',"ew") + example=self.getex(group,notonegroup=notonegroup,renew=renew) if example is None: log.error("Apparently the example for tone group {} in frame {} " "came back {}".format(group,self.name,example)) return - if 'renew' in kwargs: - if kwargs['renew'] == True: - log.info("Resetting tone group example ({}): {} of {} examples" - "".format(group,self.exs[group],example['n'])) - del self.exs[group] - del kwargs['renew'] + renew=kwargs.pop('renew',False) + if renew is True: + log.info("Resetting tone group example ({}): {} of {} examples" + "".format(group,self.exs[group],example['n'])) + del self.exs[group] framed=example['framed'] + framed.setframe(self.name) if framed is None: log.error("Apparently the framed example for tone group {} in " "frame {} came back {}".format(group,self.name,example)) return - text=(framed['formatted']) + text=framed.formatted() """This should maybe be brought up a level in frames?""" bf=Frame(parent) bf.grid(column=column, row=row, sticky=sticky) @@ -4704,7 +4513,7 @@ def unsort(): b.grid(column=1, row=0, sticky="ew", ipady=15) #Inside the buttons elif playable == True: url=RecordButtonFrame.makefilenames(None,self, #not Classy... - framed['senseid']) + example['senseid']) diredurl=str(file.getdiredurl(self.audiodir,url)) thefileisthere=file.exists(diredurl) log.info("fileisthere: {} ({})".format(diredurl,url)) @@ -4770,11 +4579,11 @@ def unsort(): def printentryinfo(self,guid): outputs=[ nn(self.db.citationorlexeme(guid=guid)), - nn(self.db.glossordefn(guid=guid,lang=self.glosslang)) + nn(self.db.glossordefn(guid=guid,glosslang=self.glosslang)) ] if self.glosslang2 is not None: #only give this if the user wants it. outputs.append(nn(self.db.glossordefn(guid=guid, - lang=self.glosslang2))) + glosslang=self.glosslang2))) outputs.append(nn(self.db.get('pronunciationfieldvalue', fieldtype='tone', location=self.subcheck,guid=guid))) @@ -4866,8 +4675,8 @@ def record(self): else: self.showentryformstorecord() def makelabelsnrecordingbuttons(self,parent,sense): - t=self.getframeddata(sense['nodetoshow'],noframe=True)[ - self.analang]#+'\t'+sense['gloss'] + framed=self.datadict.getframeddata(sense['nodetoshow']) + t=framed.formatted(noframe=True) for g in ['gloss','gloss2']: if (g in sense) and (sense[g] is not None): t+='\t‘'+sense[g] @@ -4877,9 +4686,9 @@ def makelabelsnrecordingbuttons(self,parent,sense): t+="!" t+='’' lxl=Label(parent, text=t) - lcb=RecordButtonFrame(parent,self,id=sense['guid'], - node=sense['nodetoshow'], - gloss=sense['gloss']) + lcb=RecordButtonFrame(parent,self,id=sense['guid'], #reconfigure! + framed=framed,node=sense['nodetoshow'], + gloss=sense['gloss']) lcb.grid(row=sense['row'],column=sense['column'],sticky='w') lxl.grid(row=sense['row'],column=sense['column']+1,sticky='w') def showentryformstorecordpage(self): @@ -4908,8 +4717,9 @@ def showentryformstorecordpage(self): sense={} sense['column']=0 sense['row']=row - sense['guid']=firstoflist(self.db.get('guidbysense', - senseid=senseid)) + sense['senseid']=senseid + sense['guid']=firstoflist(self.db.get('entry', + senseid=senseid).get('guid')) if sense['guid'] in done: #only the first of multiple senses continue else: @@ -4917,35 +4727,35 @@ def showentryformstorecordpage(self): """These following two have been shifted down a level, and will now return a list of form elements, each. Something will need to be adjusted here...""" - sense['lxnode']=firstoflist(self.db.get('lexemenode', + sense['lxnode']=firstoflist(self.db.get('lexeme', guid=sense['guid'], - lang=self.analang)) - sense['lcnode']=firstoflist(self.db.get('citationnode', + lang=self.analang).get()) + sense['lcnode']=firstoflist(self.db.get('citation', guid=sense['guid'], - lang=self.analang)) + lang=self.analang).get()) sense['gloss']=firstoflist(self.db.glossordefn( guid=sense['guid'], - lang=self.glosslang + glosslang=self.glosslang ),othersOK=True) if ((hasattr(self,'glosslang2')) and (self.glosslang2 is not None)): sense['gloss2']=firstoflist(self.db.glossordefn( guid=sense['guid'], - lang=self.glosslang2 + glosslang=self.glosslang2 ),othersOK=True) if ((sense['gloss'] is None) and (('gloss2' in sense) and (sense['gloss2'] is None))): continue #We can't save the file well anyway; don't bother if self.db.pluralname is not None: - sense['plnode']=firstoflist(self.db.get('fieldnode', + sense['plnode']=firstoflist(self.db.get('field', guid=sense['guid'], lang=self.analang, - fieldtype=self.db.pluralname)) + fieldtype=self.db.pluralname).get()) if self.db.imperativename is not None: - sense['impnode']=firstoflist(self.db.get('fieldnode', + sense['impnode']=firstoflist(self.db.get('field', guid=sense['guid'], lang=self.analang, - fieldtype=self.db.imperativename)) + fieldtype=self.db.imperativename).get()) if sense['lcnode'] is not None: sense['nodetoshow']=sense['lcnode'] else: @@ -5013,7 +4823,7 @@ def setskip(event): for senseid in senses: log.debug("Working on {} with skip: {}".format(senseid, self.runwindow.frame.skip)) - examples=self.db.get('example',senseid=senseid) + examples=self.db.get('example',senseid=senseid).get() if examples == []: log.debug(_("No examples! Add some, then come back.")) continue @@ -5034,12 +4844,12 @@ def setskip(event): ) progressl.grid(row=0,column=2,sticky='ne') """This is the title for each page: isolation form and glosses.""" - framed=self.getframeddata(senseid,noframe=True,notonegroup=True, - truncdefn=True) - if framed[self.analang]=='noform': - entryframe.destroy() + titleframed=self.datadict.getframeddata(senseid) + titleframed.setframe(self.name) + if titleframed.analang is None: + entryframe.destroy() #is this ever needed? continue - text=framed['formatted'] + text=titleframed.formatted(noframe=True,notonegroup=True) Label(entryframe, anchor='w', font=self.fonts['read'], text=text).grid(row=row, column=0,sticky='w') @@ -5054,13 +4864,13 @@ def setskip(event): lift.examplehaslangform(example,self.audiolang) == True): continue """These should already be framed!""" - framed=self.getframeddata(example,noframe=True,truncdefn=True) - if framed[self.analang] is None: + framed=self.datadict.getframeddata(example) + if framed.analang is None: # when? continue row+=1 """If I end up pulling from example nodes elsewhere, I should probably make this a function, like getframeddata""" - text=framed['formatted'] + text=framed.formatted(noframe=True) rb=RecordButtonFrame(examplesframe,self,id=senseid,node=example, form=nn(framed[self.analang]), gloss=nn(framed[self.glosslang]) @@ -5126,6 +4936,14 @@ def next(): text=_("Continue to next syllable profile"), command=next).grid(row=1,column=0) self.donewpyaudio() + def senseidformsbyregex(self,regex,): + """This function takes in a compiled regex, + and outputs a list/dictionary of senseid/{senseid:form} form.""" + output=[] #This is just a list of senseids now: (Do we need the dict?) + for form in self.formstosearch[self.ps]: + if regex.search(form): + output+=self.formstosearch[self.ps][form] + return output def getresults(self): self.getrunwindow() self.makeresultsframe() @@ -5159,25 +4977,15 @@ def getresults(self): for self.subcheck in self.s[self.analang][self.type]: log.debug('self.subcheck: {}'.format(self.subcheck)) self.buildregex() #It would be nice fo this to iterate through... - # for senseid in self.profilesbysense[self.ps][self.profile]: - # print(self.profilesbysense[self.ps][self.profile][0]) - # print(self.db.citationorlexeme(self.profilesbysense[self.ps][self.profile][0])) - # print(firstoflist(self.db.citationorlexeme(self.profilesbysense[self.ps][self.profile][0]))) - senseidstocheck=self.db.senseidformsbyregex(self.regex, - self.analang, - ps=self.ps) - # senseidstocheck= filter(lambda x: self.regex.search( - # firstoflist(self.db.citationorlexeme(x))), - # self.profilesbysense[self.ps][self.profile]) + senseidstocheck=self.senseidformsbyregex(self.regex) if len(senseidstocheck)>0: id=rx.id('x'+self.ps+self.profile+self.name+self.subcheck) ex=xlp.Example(si,id) for senseid in senseidstocheck: #self.senseidformstosearch[lang][ps] # where self.regex(self.senseidformstosearch[lang][ps][senseid]): """This regex is compiled!""" - framed=self.getframeddata(senseid,noframe=True) - # if self.regex(framed[self.analang]): - o=framed['formatted'] + framed=self.datadict.getframeddata(senseid) #not framed! + o=framed.formatted(noframe=True) self.framedtoXLP(framed,parent=ex,listword=True) if self.debug ==True: o=entry.lexeme,entry.citation,nn(entry.gloss), @@ -5205,11 +5013,11 @@ def makeimg(): # window=self.runwindow.frame, # row=i, column=0, font=font, command=self.picked) col=0 - for lang in [self.analang, self.glosslang, self.glosslang2]: + for lang in [self.analang]+self.glosslangs: col+=1 - if lang in framed: + if lang in framed.forms: Label(self.results.scroll.content, - text=framed[lang], font=font, + text=framed.forms[lang], font=font, anchor='w',padx=10).grid(row=i, column=col, sticky='w') if self.su==True: @@ -5459,9 +5267,10 @@ def tonegroupsbyUFlocation(self,senseidsbygroup): for location in locations: #just make them all, delete empty later values[group][location]=list() for senseid in senseidsbygroup[group]: - groupvalue=self.db.get('exfieldvalue',senseid=senseid, - location=location,fieldtype='tone') - if groupvalue != [None]: + groupvalue=self.db.get("example/tonefield/form/text", + senseid=senseid, location=location, + ).get('text') + if groupvalue != []: if unlist(groupvalue) not in values[group][location]: values[group][location]+=groupvalue log.log(3,"values[{}][{}]: {}".format(group,location, @@ -5482,9 +5291,9 @@ def tonegroupsbysenseidlocation(self): for senseid in self.senseidstosort: output[senseid]={} for location in locations: - group=self.db.get('exfieldvalue',senseid=senseid, - location=location,fieldtype='tone') - if group != [None]: + group=self.db.get("example/tonefield/form/text", + senseid=senseid,location=location,showurl=True).get('text') + if group != []: output[senseid][location]=group #Save this info by senseid log.info("Done collecting groups by location for each senseid.") return output @@ -5551,9 +5360,9 @@ def examplestoXLP(examples,parent,groups=True): if not default: groups=True #show groups on all non-default reports for example in examples: - framed=self.getframeddata(example,noframe=True) - if not (framed[self.analang] is None and - framed[self.glosslang] is None):#glosslang2? + framed=self.datadict.getframeddata(example) + if not (framed.forms[self.analang] is None and + framed.forms[self.glosslang] is None):#glosslang2? self.framedtoXLP(framed,parent=parent,listword=True, groups=groups) log.info("Starting report...") @@ -5600,6 +5409,7 @@ def examplestoXLP(examples,parent,groups=True): valuesbylocation=dictofchilddicts(groupvalues,remove=['NA',None]) log.debug("groups (tonegroupreport): {}".format(grouplist)) log.debug("locations (tonegroupreport): {}".format(locations)) + log.debug("valuesbylocation: {}".format(valuesbylocation)) r = open(self.tonereportfile, "w", encoding='utf-8') title=_("Tone Report") self.runwindow.title(title) @@ -5703,13 +5513,12 @@ def output(window,r,text): e1=xlp.Example(s1,id,heading=headtext) for senseid in toreport[group]: #This is for window/text output only, not in XLP file - framed=self.getframeddata(senseid,noframe=True, - notonegroup=True) - text=framed['formatted'] + framed=self.datadict.getframeddata(senseid) + # framed.setframe(self.name) #not needed here, I think + text=framed.formatted(noframe=True,notonegroup=True) #This is put in XLP file: - examples=self.db.get('examplebylocation', - location=location, - senseid=senseid) + examples=self.db.get('example',location=location, + senseid=senseid).get() examplestoXLP(examples,e1,groups=False) if text not in textout: output(window,r,text) @@ -5719,11 +5528,11 @@ def output(window,r,text): else: for senseid in toreport[group]: #groups[group]['senseids']: #This is for window/text output only, not in XLP file - framed=self.getframeddata(senseid,noframe=True, - notonegroup=True) - text=framed['formatted'] + framed=self.datadict.getframeddata(senseid) + # framed.setframe(self.name) #not needed here, I think + text=framed.formatted(noframe=True, notonegroup=True) #This is put in XLP file: - examples=self.db.get('example',senseid=senseid) + examples=self.db.get('example',senseid=senseid).get() log.log(2,"{} examples found: {}".format(len(examples), examples)) if examples != []: @@ -6166,6 +5975,26 @@ def appendformsbylang(self,forms,langs,quote=False): def __init__(self, *args): super(DataList, self).__init__() self.extend(args) +class Glosslangs(DataList): + """docstring for Glosslangs.""" + def lang1(self,lang=None): + if lang is None: + return self[0] + if len(self) >1 and self[1] == lang: + self.pop(1) + self[0]=lang + def lang2(self,lang=None): + if lang is None and len(self) >1: + return self[1] + if len(self) >0 and self[0] != lang: + self[0]=lang + def rm(self,lang): + """This could be either position, and if lang1 will promote lang2""" + self.remove(lang) + def __init__(self, *args): + super(Glosslangs, self).__init__() + self.extend(args) + class DictbyLang(dict): """docstring for DictbyLang.""" def getformfromnode(self,node,truncate=False): @@ -6185,10 +6014,28 @@ def getformfromnode(self,node,truncate=False): def frame(self,framedict,langs): #langs can/should be ordered """the frame only applies if there is a language value; I hope that's what we want...""" - for l in [i for i in langs if i in framedict if i in self]: - self[l]=rx.framerx.sub(self[l],framedict[l]) + for l in [i for i in langs if i in framedict if i in self and self[i] != []]: + self.framed[l]=rx.framerx.sub(self[l],framedict[l]) def __init__(self): super(DictbyLang, self).__init__() + self.framed={} +class FramedDataDict(dict): + def updatelangs(self): + self.analang=self.check.analang + self.glosslangs=self.check.glosslangs + log.debug("analang: {}; glosslangs: {}".format(self.analang,self.glosslangs)) + def getframeddata(self, source, **kwargs): + self.updatelangs() + if source not in self: + self[source]=FramedData(self,source,**kwargs) + else: + self[source].updatelangs() + return self[source] + def __init__(self, check, **kwargs): + super(FramedDataDict, self).__init__() + self.frames=check.toneframes #[ps][name] + self.db=check.db + self.check=check class FramedData(object): """This populates an object with attributes to format data for display, by senseid""" @@ -6196,27 +6043,53 @@ class FramedData(object): times to display it. If source is a senseid, it pulls form/gloss/etc information from the entry. If source is an example, it pulls that info from the example. The info is formatted uniformly in either case.""" - def parsesense(self,db,senseid,truncdefn=False): - self.senseid=senseid - lexs=db.get('lexemenode',senseid=senseid) - cits=db.get('citationnode',senseid=senseid) - log.info("lex: {}, cit: {}".format(lexs,cits)) - defns=db.get('definitionnode',senseid=senseid) - glss=db.get('glossnode',senseid=senseid) - log.info("defns: {}, glss: {}".format(defns,glss)) - for i in lexs+cits: # (later) citation nodes will overwrite lex nodes - self.forms.getformfromnode(i) - for i in defns: #(later) gloss nodes will overwrite these defn nodes - self.glosses.getformfromnode(i,truncate=truncdefn) #only trunc defns - for i in glss: - self.glosses.getformfromnode(i) - if self.location is not None: #otherwise will return all examples - self.tonegroups=self.db.get('exfieldvalue', senseid=senseid, - fieldtype='tone', location=self.location) + def formatted(self,notonegroup=True,noframe=False): + if notonegroup: + toformat=DataList() + else: + toformat=DataList(self.tonegroup) + if noframe: + toformat.appendformsbylang(self.forms,self.analang,quote=False) + toformat.appendformsbylang(self.forms,self.glosslangs,quote=True) else: - tonegroups=None - log.error("Location isn't set; I can't tell which example you want") - def parseexample(self,example): + if not hasattr(self,'framed'): + self.noframe() #Assume no frame if not excplicitly applied + toformat.appendformsbylang(self.framed,self.analang,quote=False) + toformat.appendformsbylang(self.framed,self.glosslangs,quote=True) + return ' '.join(toformat) #put it all together + def setframe(self,frame): + self.frame=self.frames[self.ps][frame] + self.applyframe() + def noframe(self): + self.framed=self.forms + def applyframe(self): + if not self.noframe: + self.forms.frame(self.frame,[self.analang]+self.glosslangs) + self.framed=self.forms.framed + else: + self.noframe() + def gettonegroup(self): + if self.location is not None: + self.tonegroups=self.db.get('example/tonefield/form/text', + senseid=senseid, location=self.location).get('text') + def tonegroup(self): + if self.tonegroups is not None: # wanted&found + tonegroup=unlist(self.tonegroups) + if tonegroup is not None: + try: + int(tonegroup) + except: + self.tonegroup=tonegroup #only for named groups + def parsesense(self,db,senseid): + self.senseid=senseid + self.ps=unlist(db.ps(senseid=senseid)) #there should be just one + self.forms[self.analang]=db.citationorlexeme(senseid=senseid, + analang=self.analang) + self.forms.update(db.glossesordefns(senseid=senseid)) + for f in self.forms: + self.forms[f]=unlist(self.forms[f]) + self.gettonegroup() + def parseexample(self,example): self.senseid=None #We don't have access to this here for i in example: if i.tag == 'form': #language forms, not glosses, etc, below. @@ -6226,28 +6099,45 @@ def parseexample(self,example): ((i.tag == 'gloss'))): for ii in i: if (ii.tag == 'form'): - self.glosses.getformfromnode(ii) - elif ((i.tag == 'field') and (i.get('type') == 'tone')): #should - self.tonegroups=node.findall('form/text') #always be list of one - def __init__(self, source, **kwargs): + self.forms.getformfromnode(ii) #glosses + elif ((i.tag == 'field') and (i.get('type') == 'tone')): + self.tonegroups=i.findall('form/text') #always be list of one + def glosses(self): + g=DictbyLang() + l=0 + for lang in self.glosslangs: + if lang in self.forms: + g[lang]=self.forms[lang] + l+=len(g[lang]) + if l >0: + return g + else: + return None + def updatelangs(self): + self.analang=self.parent.analang + self.glosslangs=self.parent.glosslangs + log.debug("analang: {}; glosslangs: {}".format(self.analang,self.glosslangs)) + def __init__(self, parent, source, **kwargs): super(FramedData, self).__init__() - noframe=kwargs.pop('noframe',False) - notonegroup=kwargs.pop('notonegroup',False) - truncdefn=kwargs.pop('truncdefn',False) - self.location=kwargs.pop('location',None) - self.db=kwargs.pop('db',None) - #These really must be there... - self.analangs=kwargs.pop('analangs') - self.glosslangs=kwargs.pop('glosslangs') - self.frame=kwargs.pop('frame') + self.parent=parent + self.frames=parent.frames + self.updatelangs() + self.db=parent.db #kwargs.pop('db',None) #not needed for examples + self.location=kwargs.pop('location',None) #not needed for noframe + self.noframe=kwargs.pop('noframe',False) + """Generalize these, and manage with methods:""" + # self.notonegroup=kwargs.pop('notonegroup',False) + # truncdefn=kwargs.pop('truncdefn',False) + # self.frame=kwargs.pop('frame',None) #not needed for noframe + #These really must be there, and ordered with first first + #to put data: self.forms=DictbyLang() - self.glosses=DictbyLang() + #defaults to set upfront + self.tonegroups=None self.tonegroup=None """Build dual logic here. We use this to frame senses & examples""" if isinstance(source,lift.ET.Element): - noframe=True #Examples should already be framed - if self.db is not None: - log.info("FYI: You specified database unnecessarily!") + self.noframe=True #Examples should already be framed self.parseexample(source) #example element, not sense or entry: """This is what we're pulling from: @@ -6264,34 +6154,20 @@ def __init__(self, source, **kwargs): """ elif type(source) is str and len(source) >= 36:#senseid can be guid+form - if self.db is not None: #pull from lift by senseid - self.parsesense(self.db,source,truncdefn=truncdefn) - else: - log.error("Can't pull entry ({}) w/o database!".format(source)) - return + self.parsesense(self.db,source) else: log.error('Neither Element nor senseid was found!' '\nThis is almost certainly not what you want!' '\nFYI, I was looking for {}'.format(source)) return source """The following is the same for senses or examples""" - if not notonegroup and self.tonegroups is not None: # wanted&found - tonegroup=unlist(self.tonegroups) - if tonegroup is not None: - try: - int(tonegroup) - except: - self.tonegroup=tonegroup #only for named groups - if not noframe: #Forms and glosses have to be strings, or the rx fails - self.forms.frame(self.frame,self.analangs) - self.glosses.frame(self.frame,self.glosslangs) - if self.tonegroup is None: #i.e., no named group was found above - toformat=DataList() - else: - toformat=DataList(self.tonegroup) - toformat.appendformsbylang(self.forms,self.analangs,quote=False) - toformat.appendformsbylang(self.glosses,self.glosslangs,quote=True) - self.formatted=' '.join(toformat) #put it all together + # # just for convenience: + # self.analang=self.forms[self.analangs[0]] + # self.glosslang=self.forms[self.glosslangs[0]] + # if len(self.glosslangs) >1 and self.glosslangs[1] in self.forms: + # self.glosslang2=self.forms[self.glosslangs[1]] + # else: + # self.glosslang2=None class ExitFlag(object): def istrue(self): # log.debug("Returning {} exitflag".format(self.value)) @@ -6778,6 +6654,9 @@ def _setmenus(self,event=None): command=lambda x=check:Check.tryNAgain(x)) advancedmenu.add_cascade(label=_("Redo"), menu=redomenu) advancedmenu.add_cascade(label=_("Add other"), menu=filemenu) + redomenu.add_command( + label=_("Verification of current framed group"), + command=lambda x=check:Check.reverify(x)) redomenu.add_command( label=_("Digraph and Trigraph settings (Restart)"), command=lambda x=check:Check.askaboutpolygraphs(x)) @@ -6831,12 +6710,12 @@ def helpabout(self): "allows the user to record a word in each of the frames where " "it has been sorted, storing the recorded audio file in a " "directory, with links to each file in the dictionary database." - " Recordings can be made up to 192khz/32float.\nFor help with " - "this tool, please check out the documentation at " - "{url} or write me at " - "{Email}.".format(name=self.program['name'], - url=self.program['url'], - Email=self.program['Email'])) + " Recordings can be made up to 192khz/32float, according to " + "your recording equipment's capacity.").format( + name=self.program['name']) + webtext=_("For help with this tool, please check out the documentation " + "at {url} ").format(url=self.program['url']) + mailtext=_("or write me at {}.").format(self.program['Email']) Label(window.frame, text=title, font=self.fonts['title'],anchor='c',padx=50 ).grid(row=0,column=0,sticky='we') @@ -6845,9 +6724,20 @@ def helpabout(self): Label(f.content, image=self.photo['small'],text='', bg=self.theme['background'] ).grid(row=0,column=0,sticky='we') - l=Label(f.content, text=text, pady=50, padx=50, + l=Label(f.content, text=text, padx=50, + wraplength=int(self.winfo_screenwidth()/2) + ).grid(row=1,column=0,pady=(50,0),sticky='we') + webl=Label(f.content, text=webtext, padx=50,#pady=50, wraplength=int(self.winfo_screenwidth()/2) - ).grid(row=1,column=0,sticky='we') + ) + webl.grid(row=2,column=0,sticky='we') + maill=Label(f.content, text=mailtext, padx=50,#pady=50, + wraplength=int(self.winfo_screenwidth()/2) + ) + maill.grid(row=3,column=0,sticky='we') + webl.bind("", lambda e: openweburl(self.program['url'])) + murl='mailto:{}?subject= A→Z+T question'.format(self.program['Email']) + maill.bind("", lambda e: openweburl(murl)) def maketitle(self): title=_("{name} Dictionary and Orthography Checker").format( name=self.program['name']) @@ -7533,7 +7423,7 @@ def addlink(self): self.db.addmediafields(self.node,self.filename,self.audiolang) def function(self): pass - def makefilenames(self,check=None,senseid=None): + def makefilenames(self=None,check=None,senseid=None): if self is not None: #i.e., this is called by class if self.test==True: return "test_{}_{}.wav".format(self.settings.fs, @@ -7547,12 +7437,13 @@ def makefilenames(self,check=None,senseid=None): check=self.check id=self.id gloss=self.gloss + # audio=None else: #self is None, i.e., this method called on something else. if None in [check, senseid]: return id=senseid - node=firstoflist(check.db.get('examplebylocation',senseid=senseid, - location=check.name)) + node=firstoflist(check.db.get('example',senseid=senseid, + location=check.name).get()) if node is None: # This should never be! log.error("Looks like a node came back 'None'; this may be " @@ -7575,22 +7466,27 @@ def makefilenames(self,check=None,senseid=None): log.error("Node{}cg: {}; tag:{}; attrib:{}; text:{}".format( nodes.index(node),ggchild,ggchild.tag, ggchild.attrib,ggchild.text)) - gloss=node.find(check.db.geturlnattr('glossofexample')['url']).text - form=node.find(check.db.geturlnattr('formofexample', - lang=check.analang)['url']).text - audio=node.find(check.db.geturlnattr('formofexample', - lang=check.audiolang)['url']) + gloss=unlist(check.db.get('translation/form/text',node=node, + glosslang=check.glosslang,showurl=True).get('text')) + form=unlist(check.db.get('form/text',node=node,showurl=True, + analang=check.analang).get('text')) + log.log(4,"gloss: {}".format(gloss)) + log.log(4,"form: {}".format(form)) + audio=check.db.get('form/text',node=node,showurl=True, + analang=check.audiolang).get('text') + log.log(4,"audio: {}".format(audio)) + audio=unlist(audio) if gloss is None: gloss=t(check.db.get('gloss',senseid=senseid, - glosslang=check.glosslang)) + glosslang=check.glosslang).get('text')) if form is None and node is not None: form=t(node.find(f"form[@lang='{check.analang}']/text")) if audio is not None: - filenameURL=str(file.getdiredurl(check.audiodir,audio.text)) + filenameURL=str(file.getdiredurl(check.audiodir,audio)) if file.exists(filenameURL): log.debug("Audio file found! using name: {}; diredname: {}" - "".format(audio.text, filenameURL)) - return audio.text + "".format(audio, filenameURL)) + return audio else: log.debug("Audio link found, but no file found! Making options." "\n{}; diredname: {}".format(audio, filenameURL)) @@ -7640,8 +7536,10 @@ def makefilenames(self,check=None,senseid=None): #test if any of the generated filenames are there for filename in filenames: filenameURL=str(file.getdiredurl(check.audiodir,filename)) + log.debug("Looking for Audio file: {}; filename possibilities: {}; " + "url:{}".format(filename, filenames, filenameURL)) if file.exists(filenameURL): - log.debug("Audio file found! using name: {}; diredname: {}; " + log.debug("Audiofile found! using name: {}; possibilities: {}; " "url:{}".format(filename, filenames, filenameURL)) return filename #if you don't find any, take the *last* values @@ -7654,7 +7552,15 @@ def __init__(self,parent,check,id=None,node=None,form=None,gloss=None,test=False sound-python/""" self.db=check.db self.node=node #This should never be more than one node... - self.form=form + framed=kwargs.pop('framed',None) #Either this or the next two... + if framed is not None: + formdefault=framed.forms[check.analang] + glossdefault=framed.forms[check.glosslang] + else: + formdefault=None + glossdefault=None + self.form=kwargs.pop('form',formdefault) + self.gloss=kwargs.pop('gloss',glossdefault) self.id=id self.gloss=gloss self.check=check @@ -8071,7 +7977,7 @@ def firstoflist(l,othersOK=False,all=False,ignore=[None]): if all == True: #don't worry about othersOK yet if len(l) > 1: ox=[t(v) for v in l[:len(l)-2]] #Should probably always give text - l=ox+[' and '.join([t(v) for v in l[len(l)-2:]])] + l=ox+[' and '.join([t(v) for v in l[len(l)-2:] if v not in ignore])] # for i in range(int(len(output)/2))] else: l[0]=t(l[0]) #for lists of a single element @@ -8086,7 +7992,7 @@ def t(element): if type(element) is str: return element elif element is None: - return + return str(None) else: try: return element.text @@ -8355,34 +8261,37 @@ def returndictndestroy(self,parent,values): #Spoiler: the parent dies! def removesenseidfromsubcheck(self,parent,senseid,name=None,subcheck=None): #?This is the action of a verification button, so needs to be self contained. #merge with addtonefieldex - framed=self.getframeddata(senseid,truncdefn=True) + framed=self.datadict.getframeddata(senseid) + framed.setframe(self.name) + text=framed.formatted(noframe=False) if name is None: name=self.name if subcheck is None: subcheck=self.subcheck log.info(_("Removing senseid {} from subcheck {}".format(senseid,subcheck))) #This should only *mod* if already there - self.db.addexamplefields(senseid=senseid, + self.db.addmodexamplefields(senseid=senseid, analang=self.analang, glosslang=self.glosslang, glosslang2=self.glosslang2, #OK if None forms=framed, fieldtype='tone',location=self.name, - fieldvalue='') #this value should be the only change - tgroups=self.db.get('exfieldvalue', senseid=senseid, - fieldtype='tone', location=self.name) - if type(tgroups) is list: - if len(tgroups) > 1: - log.error(_("Found {} tone values: {}".format(len(tgroups),tgroups))) - return - else: - tgroup=tgroups[0] - if tgroup == '' : - log.info("Field removal succeeded! LIFT says '{}', = ''.".format(tgroup)) - else: - log.error("Field removal failed! LIFT says '{}', != ''.".format(tgroup)) + fieldvalue='',showurl=True) #this value should be the only change + log.info("Checking that removal worked") + tgroups=self.db.get("example/tonefield/form/text", senseid=senseid, + location=self.name).get('text') + if tgroups in [[],'']: + log.info("Field removal succeeded! LIFT says '{}', = []." + "".format(tgroups)) + elif len(tgroups) == 1: + tgroup=tgroups[0] + log.error("Field removal failed! LIFT says '{}', != [].".format(tgroup)) + elif len(tgroups) > 1: + log.error(_("Found {} tone values: {}; Fix this!".format(len(tgroups), + tgroups))) + return rm=self.verifictioncode(name,subcheck) - self.db.modverificationnode(senseid,vtype=self.profile,rm=rm) + self.db.modverificationnode(senseid,vtype=self.profile,rms=[rm]) self.db.write() #This is not iterated over self.markunsortedsenseid(senseid) #This is just for self.status['sorted'] parent.destroy() #.runwindow.resetframe() diff --git a/rx.py b/rx.py index 64d4ad56..9eca4fd7 100644 --- a/rx.py +++ b/rx.py @@ -18,7 +18,7 @@ def urlok(x): def id(x): x=x.replace('˥','4').replace('˦','3').replace('˧','2' ).replace('˨','1').replace('˩','0') - return re.sub('[][  .!=\(\),\'/?ꞌ\n:+]','_',x) #remove charcters that are invalid for ids + return re.sub('[][  .!=\(\),\'/?ꞌ\n:;+]','_',x) #remove charcters that are invalid for ids def tonerxs(): return (re.compile('[˥˦˧˨˩]+', re.UNICODE), re.compile(' ', re.UNICODE), @@ -86,7 +86,7 @@ def segmentin(forms, glyph): def inxyz(db, lang, segmentlist): #This calls the above script for each character. start_time=time.time() #this enables boot time evaluation actuals=list() - forms=db.citationforms[lang] + db.lexemes[lang] + forms=db.lcs[lang] + db.lxs[lang] for i in segmentlist: s=segmentin(forms,i) #log.info(s) #to see the following run per segment