diff --git a/.gitignore b/.gitignore
index 04a41a2..72bb081 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,9 @@
+# compiled python files
**.pyc
+
+# these are auto-generated in index.py
+popupPage.html
+popupPageCorpus.html
+
+# usually don't want to commit this
+gitdox.db
diff --git a/Dockerfile b/Dockerfile
index b52ab15..b33c260 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:18.04
+FROM ubuntu:16.04
EXPOSE 80
# install deps
@@ -13,8 +13,7 @@ RUN chown -R www-data:www-data /var/www/html
RUN chmod +x /var/www/html/*.py
RUN chmod +x /var/www/html/modules/*.py
-# keep these in sync with requirements.txt
-RUN pip install lxml requests github3.py==0.9.3 passlib
+RUN pip install -r /var/www/html/requirements.txt
# install ethercalc and run as a service
RUN npm install -g ethercalc
@@ -47,6 +46,7 @@ RUN echo " \n\
# service and (2) not to use supervisord to manage the execution of these
# processes. But (1) is too heavy a solution, and (2) seems unnecessary unless
# one of our services leaks memory/is unstable
+RUN echo "ln -s /usr/bin/nodejs /usr/bin/node" >> /etc/startup.sh
RUN echo "/usr/bin/redis-server &" >> /etc/startup.sh
RUN echo "/usr/local/bin/ethercalc &" >> /etc/startup.sh
RUN echo "/usr/sbin/apache2ctl -D FOREGROUND" >> /etc/startup.sh
diff --git a/README.md b/README.md
index 0ffc05b..8fcf3be 100644
--- a/README.md
+++ b/README.md
@@ -21,12 +21,14 @@ hope to provide a stable release soon.
First, [install Docker](https://docs.docker.com/install/). You may be able to
install it using your platform's package manager.
+(**Note: if your machine has Apache running, you should stop it first by running `sudo service apache2 stop`.**)
+
```bash
-docker run -dit --restart unless-stopped --name gitdox-dev -p 5000:80 gucorpling/gitdox:dev
+docker run -dit --restart unless-stopped --name gitdox-dev -p 80:80 gucorpling/gitdox:dev
```
GitDox should now be running the docker container you've set up, and you may
-visit `http://localhost:5000` on your machine to verify that it works. GitDox should
+visit `http://localhost` on your machine to verify that it works. GitDox should
now always be running on your machine, even if you reboot it. If for some reason
you need to stop it manually, you may do so:
@@ -54,7 +56,7 @@ to have your GitDox folders live in your host machine's filesystem:
```bash
sudo git clone https://github.com/gucorpling/gitdox /opt/gitdox
sudo chown -R www-data:www-data /opt/gitdox
-docker run -dit --restart unless-stopped --name gitdox -v /opt/gitdox:/var/www/html -p 5000:80 gucorpling/gitdox:dev gitdox
+docker run -dit --restart unless-stopped --name gitdox -v /opt/gitdox:/var/www/html -p 80:80 gucorpling/gitdox:dev gitdox
```
These commands install GitDox under `/opt` in your host machine and allows you to modify them just as you would modify any other file on your machine. But in the Docker command, with the `-v` flag we tell it to mount this folder as `/var/www/html` in the container's filesystem. The files are shared bidirectionally: changes made in the container will flow to the host, and vice versa.
diff --git a/admin.py b/admin.py
index e2ac495..9225226 100755
--- a/admin.py
+++ b/admin.py
@@ -13,7 +13,8 @@
from modules.dataenc import pass_dec, pass_enc
from paths import get_menu
from editor import harvest_meta
-from modules.ether import make_spreadsheet, get_ether_stylesheet_select, get_corpus_select
+from modules.ether import make_spreadsheet, get_ether_stylesheets
+from modules.renderer import render
from passlib.apps import custom_app_context as pwd_context
import github3
import time
@@ -29,23 +30,11 @@
userdir = scriptpath + "users" + os.sep
templatedir = scriptpath + "templates" + os.sep
config = ConfigObj(userdir + 'config.ini')
-skin = config["skin"]
project = config["project"]
-def get_status_select():
-
- status_list = open(prefix+"status.tab").read().replace("\r","").split("\n")
-
- select = """\n"
- return select
+def get_statuses():
+ return open(prefix+"status.tab").read().replace("\r","").split("\n")
def write_user_file(username,password,admin,email,realname,git_username,git_password,git_2fa=False):
@@ -102,7 +91,7 @@ def update_git_info(user,new_git_username,new_git_password,new_git_2fa=False):
o['git_username'] = new_git_username
o['git_2fa'] = str(new_git_2fa).lower()
- try:
+ try:
note = project + ", " + time.ctime()
auth = github3.authorize(new_git_username, new_git_password, ['repo'], note, "")
o['git_token'] = auth.token
@@ -111,169 +100,58 @@ def update_git_info(user,new_git_username,new_git_password,new_git_2fa=False):
del o['git_password']
o.write()
except:
- # fail silently--would want to display an error ideally, but
+ # fail silently--would want to display an error ideally, but
# users will know to try again if the credentials are wrong
pass
-def load_admin(user,admin,theform):
- warn=""
+def load_admin(user, admin, theform):
+ render_data = {}
+
+ # handle user deletion
if theform.getvalue('user_delete'):
- userdir=prefix+'users' + os.sep
- user_del_file=theform.getvalue('user_delete')
- user_del=user_del_file.split('.ini')[0]
+ userdir = prefix + 'users' + os.sep
+ user_del_file = theform.getvalue('user_delete')
+ user_del = user_del_file.split('.ini')[0]
#delete_user(user_del)
#need to also delete the user.ini file
- os.remove(userdir+user_del_file)
+ os.remove(userdir + user_del_file)
+ # handle user creation
if theform.getvalue('create_user'):
- username=theform.getvalue('username')
- password=theform.getvalue('password')
- realname=theform.getvalue('realname') if theform.getvalue('realname') is not None else "anonymous"
- email=theform.getvalue('email') if theform.getvalue('email') is not None else "a@b.com"
- admin=theform.getvalue('admin')
- git_username=theform.getvalue('git_username') if theform.getvalue('git_username') is not None else "none"
- git_password=theform.getvalue('git_password') if theform.getvalue('git_password') is not None else "none"
- git_2fa=theform.getvalue('git_2fa') if theform.getvalue('git_2fa') is not None else "false"
-
- if username!=None and password!=None:
- write_user_file(username,password,admin,email,realname,git_username,git_password,git_2fa)
+ username = theform.getvalue('username')
+ password = theform.getvalue('password')
+ realname = theform.getvalue('realname') if theform.getvalue('realname') is not None else "anonymous"
+ email = theform.getvalue('email') if theform.getvalue('email') is not None else "a@b.com"
+ admin = theform.getvalue('admin')
+ git_username = theform.getvalue('git_username') if theform.getvalue('git_username') is not None else "none"
+ git_password = theform.getvalue('git_password') if theform.getvalue('git_password') is not None else "none"
+ git_2fa = theform.getvalue('git_2fa') if theform.getvalue('git_2fa') is not None else "false"
+
+ if username != None and password != None:
+ write_user_file(username, password, admin, email, realname, git_username, git_password, git_2fa)
else:
- warn="ERROR: username or password missing; user cannot be created."
+ render_data["user_creation_warning"] = "ERROR: username or password missing; user cannot be created."
+ # handle db wipe
if theform.getvalue('init_db'):
setup_db()
- page= "Content-type:text/html\r\n\r\n"
- page+="""
-
-
-
-
Dockerfile syntax highlighting for CodeMirror. Depends on
+ the simplemode addon.
+
+
MIME types defined:text/x-conllu
+
diff --git a/css/gitdox.css b/css/gitdox.css
index 2242871..8d0602b 100644
--- a/css/gitdox.css
+++ b/css/gitdox.css
@@ -1,5 +1,14 @@
-#doctable{ border: 2px solid black;
- border-radius: 4px;font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;background-color:hsla(40,53%,100%,0.30)}
+#doctable {
+ border: 2px solid black;
+ border-radius: 4px;
+ font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;
+ background-color: hsla(40,53%,100%,0.30);
+ width: 100%;
+}
+
+#doctable input[type=text] {
+ width: 95%;
+}
h1, h2{font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;-webkit-font-smoothing: subpixel-antialiased;}
@@ -125,7 +134,7 @@ padding: 10px;
position:relative;
}
-.tooltip span
+.tooltip span.msg
{
display:none;
-moz-border-radius:6px;
@@ -135,7 +144,7 @@ padding: 10px;
background:white;
}
-.tooltip:hover span
+.tooltip:hover span.msg
{
display:block;
position:absolute;
@@ -155,17 +164,63 @@ padding: 10px;
height: 12.8px;
}
-#ValidationTableContainer{
-width: 100%}
+/* for validation_rules */
+ul.tabs{
+ margin: 0px;
+ padding: 0px;
+ list-style: none;
+}
+
+ul.tabs li{
+ background: none;
+ display: inline-block;
+ padding: 10px 15px;
+ cursor: pointer;
+ min-width: 100px;
+ font-size: 18px;
+ font-weight: 600;
+ color: #222;
+ text-align: center;
+}
+
+ul.tabs li.current{
+ background: #ededed;
+ color: #222;
+}
+
+.tab-content{
+ display: none;
+ background: #ededed;
+ padding: 15px;
+}
+
+.tab-content.current{
+ display: inherit;
+}
+
+/* override jtable styles for consistency with rest of gitdox */
+.jtable-title {
+ font-size: 16px !important;
+ border: none !important;
+ border-radius: 0 !important;
+ background: #eeeeee !important;
+}
+table.jtable {
+ border: none !important;
+}
+
+.ui-widget-overlay {
+ opacity: 0.5 !important;
+}
+
.jtable td{
word-break: break-all;
}
#validation_report{
- max-height: 300px;
- overflow-y: scroll;
- overflow-x: scroll;
+ height: 200px;
+ overflow-y: auto;
}
tfoot {
@@ -173,4 +228,14 @@ tfoot {
}
#filter_id{width:30px}
-#filter_mode{width:60px}
\ No newline at end of file
+#filter_mode{width:60px}
+
+
+table.admin {
+ font-family: arial, sans-serif;
+ border-collapse: collapse;
+}
+table.admin td, table.admin th {
+ text-align: left;
+ padding: 8px;
+}
diff --git a/css/gum.css b/css/gum.css
index 00da827..248d4d8 100644
--- a/css/gum.css
+++ b/css/gum.css
@@ -14,7 +14,7 @@ body {
margin-right: 0px; /*auto*/
margin-left: 0px; /*auto*/
margin-bottom: 60px;
- width: 100%; /*832px*/
+
min-height: 100%;
height: 100%;
z-index: 2;
@@ -720,4 +720,4 @@ a.tooltip2 span
#doctable{background-color:#dddddd !important; font-family: arial, sans-serif !important;}
#doctable th, #doctable td {border: 1px solid darkgray;}
-.CodeMirror-wrap{border: 1px solid black !important}
\ No newline at end of file
+.CodeMirror-wrap{border: 1px solid black !important}
diff --git a/editor.py b/editor.py
index 66cba82..154f89b 100755
--- a/editor.py
+++ b/editor.py
@@ -16,7 +16,8 @@
import platform, re
from paths import ether_url, get_menu, get_nlp_credentials
from modules.ether import make_spreadsheet, delete_spreadsheet, sheet_exists, get_socialcalc, ether_to_sgml, \
- build_meta_tag, get_ether_stylesheet_select, get_file_list
+ build_meta_tag, get_ether_stylesheets, get_file_list
+from modules.renderer import render
# Support IIS site prefix on Windows
if platform.system() == "Windows":
@@ -29,7 +30,6 @@
userdir = scriptpath + "users" + os.sep
templatedir = scriptpath + "templates" + os.sep
config = ConfigObj(userdir + 'config.ini')
-skin = config["skin"]
project = config["project"]
editor_help_link = config["editor_help_link"]
# Captions and API URLs for NLP buttons
@@ -66,11 +66,16 @@ def serialize_file(text_content,file_name):
f.write(text_content)#.encode("utf8"))
f.close()
+def get_user_list():
+ user_list=[]
+ scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep
+ userdir = scriptpath + "users" + os.sep
+ return get_file_list(userdir,"ini",forbidden=["admin","default","config"],hide_extension=True)
def load_page(user,admin,theform):
- print("Content-type:text/html\r\n\r\n")
global ether_url
global code_2fa
+
if theform.getvalue("2fa"):
code_2fa = theform.getvalue("2fa")
else:
@@ -87,7 +92,6 @@ def load_page(user,admin,theform):
schema = ""
doc_id = "" # Should only remain so if someone navigated directly to editor.py
docname = ""
- mymsg = ""
old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = ["", "", "", "", "", "", ""]
if int(admin) > 0:
@@ -95,6 +99,9 @@ def load_page(user,admin,theform):
else:
git_username, git_token, git_2fa = (None, None, None)
+ # dict of variables we'll need to render the html
+ render_data = {}
+
if theform.getvalue('id'):
doc_id = theform.getvalue('id')
if int(doc_id) > int(max_id):
@@ -106,7 +113,7 @@ def load_page(user,admin,theform):
corpus = "default_corpus"
schema = ""
text_content = ""
- # If one of the four forms is edited, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc)
+ # If one of the four forms is edited or we're cloning a doc, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc)
if theform.getvalue('edit_docname') and user != "demo":
if docname != 'new_document':
if doc_id > max_id:
@@ -151,14 +158,15 @@ def load_page(user,admin,theform):
else:
update_assignee(doc_id, assignee)
- if theform.getvalue('edit_schema') and user != "demo":
- schema = theform.getvalue('edit_schema')
- if schema != "--none--":
- if doc_id > max_id:
- create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content)
- max_id = doc_id
- else:
- update_schema(doc_id, schema)
+ # cloning metadata from an existing doc into a new doc
+ if theform.getvalue('source_doc'):
+ source_meta = get_doc_meta(theform.getvalue('source_doc'))
+ if doc_id > max_id:
+ create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content)
+ max_id = doc_id
+ for meta in source_meta:
+ m_key, m_val = meta[2:4]
+ save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8"))
else:
# Get previous values from DB
@@ -169,15 +177,28 @@ def load_page(user,admin,theform):
# Handle switch to spreadsheet mode if NLP spreadsheet service is called
if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet" and mode == "xml" and user != "demo":
- api_call = spreadsheet_nlp_api
- nlp_user, nlp_password = get_nlp_credentials()
data_to_process = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0]
- data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"}
- resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password))
- sgml=resp.text.encode("utf8")
+ api_call = spreadsheet_nlp_api
+ if api_call != "":
+ nlp_user, nlp_password = get_nlp_credentials()
+ data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"}
+ resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password))
+ sgml = resp.text.encode("utf8")
+ else:
+ sgml = data_to_process.encode("utf8")
out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml")
mode = "ether"
+ # handle copying metadata
+ if theform.getvalue('source_doc'):
+ source_meta = get_doc_meta(theform.getvalue('source_doc'))
+ existing_meta_keys = [x[2] for x in get_doc_meta(doc_id)]
+ # don't overwrite existing keys
+ meta_to_write = [x for x in source_meta if x[2] not in existing_meta_keys]
+ for meta in meta_to_write:
+ m_key, m_val = meta[2], meta[3]
+ save_meta(int(doc_id), m_key, m_val)
+
if theform.getvalue('edit_docname'):
docname = theform.getvalue('edit_docname')
@@ -215,10 +236,6 @@ def load_page(user,admin,theform):
mode = theform.getvalue('edit_mode')
if mode != old_mode and user != "demo":
update_mode(doc_id,mode)
- if theform.getvalue('edit_schema'):
- schema = theform.getvalue('edit_schema')
- if schema != old_schema and user != "demo":
- update_schema(doc_id, schema)
if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet": # mode has been changed to spreadsheet via NLP
update_mode(doc_id, "ether")
mode = "ether"
@@ -228,9 +245,7 @@ def load_page(user,admin,theform):
old_socialcalc = get_socialcalc(ether_url, old_sheet_name)
out, err = make_spreadsheet(old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc")
if out == "OK":
- out, err = delete_spreadsheet(ether_url,old_sheet_name)
- else:
- mymsg += "out was: " + out + " err was" + err
+ delete_spreadsheet(ether_url,old_sheet_name)
text_content = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0]
@@ -239,6 +254,7 @@ def load_page(user,admin,theform):
if theform.getvalue('code'):
text_content = theform.getvalue('code')
text_content = text_content.replace("\r","")
+ text_content = re.sub(r'&(?!amp;)',r'&',text_content) # Escape unescaped XML &
text_content = unicode(text_content.decode("utf8"))
if user != "demo":
if int(doc_id)>int(max_id):
@@ -295,123 +311,41 @@ def load_page(user,admin,theform):
shutil.rmtree(prefix+subdir)
if theform.getvalue('nlp_xml') == "do_nlp_xml" and mode == "xml":
- api_call=xml_nlp_api
- nlp_user, nlp_password = get_nlp_credentials()
- data = {"data":text_content, "lb":"line", "format":"pipes"}
- resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password))
- text_content=resp.text
+ api_call = xml_nlp_api
+ if api_call != "":
+ nlp_user, nlp_password = get_nlp_credentials()
+ data = {"data":text_content, "lb":"line", "format":"pipes"}
+ resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password))
+ text_content=resp.text
# Editing options
# Docname
# Filename
- push_git = """
- """
- if git_2fa == "true":
- push_git += """"""
- push_git += """
Commit
- """
-
+ status_list = open(prefix+"status.tab").read().replace("\r","").split("\n")
+ render_data['status_options'] = [{'text': x, 'selected': x == status} for x in status_list]
+ render_data['assignee_options'] = [{'text': x, 'selected': x == assignee} for x in get_user_list()]
+ render_data['mode_options'] = [{'text': x, 'selected': x == mode} for x in ["xml", "ether"]]
+ render_data['nlp_service'] = {'xml_button_html': xml_nlp_button.decode("utf8"),
+ 'spreadsheet_button_html': spreadsheet_nlp_button.decode("utf8"),
+ 'disabled': user == "demo" or mode == "ether"}
+ render_data['git_2fa'] = git_2fa == "true"
if git_status:
# Remove some html keyword symbols in the commit message returned by github3
- push_msg=git_status.replace('<','')
- push_msg=push_msg.replace('>','')
- push_git+="""
""" + push_msg + ' successful' + """
"""
-
- status_list = open(prefix+"status.tab").read().replace("\r","").split("\n")
-
- options = ""
- for stat in status_list:
- options +='\n'
- options = options.replace('">'+status +'<', '" selected="selected">'+status+'<')
-
- edit_status=""""
- # Get XML schema list
- schema_list = ['--none--']
- scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep
- schemadir = scriptpath + "schemas" + os.sep
-
- schema_list += get_file_list(schemadir,"xsd",hide_extension=True)
-
- edit_schema = """"
- # edit_schema = edit_schema.replace(schema+'"', schema+'" selected="selected"')
-
- # Get user_list from the logintools
- user_list=[]
- scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep
- userdir = scriptpath + "users" + os.sep
-
- user_list = get_file_list(userdir,"ini",forbidden=["admin","default","config"],hide_extension=True)
-
- edit_assignee=""""
-
- edit_mode = ''''''
- edit_mode = edit_mode.replace(mode+'"', mode+'" selected="selected"')
-
- # Metadata
- if theform.getvalue('metakey'):
- metakey = theform.getvalue('metakey')
- metavalue = theform.getvalue('metavalue').replace("\t","").replace("\n","").replace("\r","")
- if user != "demo":
- save_meta(int(doc_id),metakey.decode("utf8"),metavalue.decode("utf8"))
- if theform.getvalue('metaid'):
- metaid = theform.getvalue('metaid')
- if user != "demo":
- delete_meta(metaid, doc_id)
- if theform.getvalue('corpus_metakey'):
- metakey = theform.getvalue('corpus_metakey')
- metavalue = theform.getvalue('corpus_metavalue').replace("\t","").replace("\n","").replace("\r","")
- if user != "demo":
- save_meta(int(doc_id),metakey.decode("utf8"),metavalue.decode("utf8"),corpus=True)
- if theform.getvalue('corpus_metaid'):
- metaid = theform.getvalue('corpus_metaid')
- if user != "demo":
- delete_meta(metaid, doc_id, corpus=True)
-
- nlp_service = """
';
+ }
+ $("#validate_"+key).before("" + sort1 + sort2 + sort3 + "");
+ $("#validate_"+key).html(output1 + output2 + output3);
});
$("#validate_landing").removeClass("disabledbutton");
$("#validate_landing").html(' re-validate');
diff --git a/js/validation_rules.js b/js/validation_rules.js
new file mode 100644
index 0000000..fb13e11
--- /dev/null
+++ b/js/validation_rules.js
@@ -0,0 +1,242 @@
+$(document).ready(function () {
+ $('#xml-table-container').jtable({
+ title: 'XML Validation Rules',
+ sorting: true,
+ actions: {
+ listAction: function (postData, jtParams) {
+ jtParams.domain = 'xml';
+ return $.Deferred(function ($dfd) {
+ $.ajax({
+ url: 'validation_rules_service.py?action=list',
+ type: 'POST',
+ dataType: 'json',
+ data: jtParams,
+ success: function (data) {
+ $dfd.resolve(data);
+ },
+ error: function() {
+ $dfd.reject();
+ }
+ });
+ });
+ },
+ createAction: 'validation_rules_service.py?action=create',
+ updateAction: 'validation_rules_service.py?action=update',
+ deleteAction: 'validation_rules_service.py?action=delete'
+ },
+ fields: {
+ id: {
+ title: 'ID',
+ key: true,
+ visibility:'hidden'
+ },
+ domain: {
+ defaultValue: 'xml',
+ type: 'hidden'
+ },
+ doc: {
+ title: 'Document'
+ },
+ corpus: {
+ title: 'Corpus'
+ },
+ name: {
+ title: 'XSD Schema',
+ options: 'validation_rules_service.py?action=listschemas&extension=xsd'
+ }
+ }
+ });
+ $('#xml-table-container').jtable('load');
+});
+
+$(document).ready(function () {
+ $('#meta-table-container').jtable({
+ title: 'Metadata Validation Rules',
+ sorting: true,
+ actions: {
+ listAction: function (postData, jtParams) {
+ jtParams.domain = 'meta';
+ return $.Deferred(function ($dfd) {
+ $.ajax({
+ url: 'validation_rules_service.py?action=list',
+ type: 'POST',
+ dataType: 'json',
+ data: jtParams,
+ success: function (data) {
+ $dfd.resolve(data);
+ },
+ error: function() {
+ $dfd.reject();
+ }
+ });
+ });
+ },
+ createAction: 'validation_rules_service.py?action=create',
+ updateAction: 'validation_rules_service.py?action=update',
+ deleteAction: 'validation_rules_service.py?action=delete'
+ },
+ fields: {
+ id: {
+ title: 'ID',
+ key: true,
+ visibility:'hidden'
+ },
+ domain: {
+ defaultValue: 'meta',
+ type: 'hidden'
+ },
+ doc: {
+ title: 'Document'
+ },
+ corpus: {
+ title: 'Corpus'
+ },
+ name: {
+ title: 'Name'
+ },
+ operator: {
+ title: 'Operator',
+ options: ['~', 'exists']
+ },
+ argument: {
+ title: 'Argument'
+ }
+ }
+ });
+ $('#meta-table-container').jtable('load');
+});
+
+$(document).ready(function () {
+ $('#ether-table-container').jtable({
+ title: 'EtherCalc Validation Rules',
+ sorting: true,
+ actions: {
+ listAction: function (postData, jtParams) {
+ jtParams.domain = 'ether';
+ return $.Deferred(function ($dfd) {
+ $.ajax({
+ url: 'validation_rules_service.py?action=list',
+ type: 'POST',
+ dataType: 'json',
+ data: jtParams,
+ success: function (data) {
+ $dfd.resolve(data);
+ },
+ error: function() {
+ $dfd.reject();
+ }
+ });
+ });
+ },
+ createAction: 'validation_rules_service.py?action=create',
+ updateAction: 'validation_rules_service.py?action=update',
+ deleteAction: 'validation_rules_service.py?action=delete'
+ },
+ fields: {
+ id: {
+ title: 'ID',
+ key: true,
+ visibility:'hidden'
+ },
+ domain: {
+ defaultValue: 'ether',
+ type: 'hidden'
+ },
+ doc: {
+ title: 'Document'
+ },
+ corpus: {
+ title: 'Corpus'
+ },
+ name: {
+ title: 'Name'
+ },
+ operator: {
+ title: 'Operator',
+ options: ['~', '|', '=', '==', '>', 'exists', 'doesntexist']
+ },
+ argument: {
+ title: 'Argument'
+ }
+ }
+ });
+ $('#ether-table-container').jtable('load');
+});
+
+$(document).ready(function () {
+ $('#export-table-container').jtable({
+ title: 'Export Validation Rules',
+ sorting: true,
+ actions: {
+ listAction: function (postData, jtParams) {
+ jtParams.domain = 'export';
+ return $.Deferred(function ($dfd) {
+ $.ajax({
+ url: 'validation_rules_service.py?action=list',
+ type: 'POST',
+ dataType: 'json',
+ data: jtParams,
+ success: function (data) {
+ $dfd.resolve(data);
+ },
+ error: function() {
+ $dfd.reject();
+ }
+ });
+ });
+ },
+ createAction: 'validation_rules_service.py?action=create',
+ updateAction: 'validation_rules_service.py?action=update',
+ deleteAction: 'validation_rules_service.py?action=delete'
+ },
+ fields: {
+ id: {
+ title: 'ID',
+ key: true,
+ visibility:'hidden'
+ },
+ domain: {
+ defaultValue: 'export',
+ type: 'hidden'
+ },
+ doc: {
+ title: 'Document'
+ },
+ corpus: {
+ title: 'Corpus'
+ },
+ name: {
+ title: 'Export Spec',
+ options: 'validation_rules_service.py?action=listschemas&extension=ini'
+ },
+ argument: {
+ title: 'XSD Schema',
+ options: 'validation_rules_service.py?action=listschemas&extension=xsd'
+ }
+ }
+ });
+ $('#export-table-container').jtable('load');
+});
+
+$(document).ready(function(){
+ function activateTab(liId, divId) {
+ $('ul.tabs li').removeClass('current');
+ $('.tab-content').removeClass('current');
+ $("#"+liId).addClass('current');
+ $("#"+divId).addClass('current');
+ }
+
+ var liId = localStorage.getItem(location.pathname + "activeLiId");
+ var divId = localStorage.getItem(location.pathname + "activeDivId");
+ if (liId && divId) {
+ activateTab(liId, divId);
+ }
+
+ $('ul.tabs li').click(function() {
+ var liId = $(this).attr('id');
+ var divId = $(this).attr('data-tab');
+ activateTab(liId, divId);
+ localStorage.setItem(location.pathname + "activeLiId", liId);
+ localStorage.setItem(location.pathname + "activeDivId", divId);
+ });
+});
diff --git a/modules/ether.py b/modules/ether.py
index 185a8fc..96a5135 100755
--- a/modules/ether.py
+++ b/modules/ether.py
@@ -18,12 +18,13 @@
from configobj import ConfigObj
from ast import literal_eval
import json
+from copy import copy
import cgi
+import requests
from xml.sax.saxutils import escape
__version__ = "2.0.0"
-
class ExportConfig:
def __init__(self, **kwargs):
@@ -95,6 +96,67 @@ def read_config(self,config_file):
else:
self.template = "\n%%body%%\n\n"
+
+def parse_ether(ether, doc_id=None):
+ """Take in raw socialcalc data and turn it into a dict of Cells. Used in validation."""
+
+ class Cell:
+ def __init__(self, col, row, content, span):
+ self.col = col
+ self.row = row
+ self.header = ""
+ self.content = content
+ self.span = span
+
+ def __repr__(self):
+ return ""
+
+ ether_lines = ether.splitlines()
+
+ # find col letter corresponding to col name
+ parsed = defaultdict(list)
+ colmap = defaultdict(list)
+ rev_colmap = {}
+ all_cells = []
+ for line in ether_lines:
+ if line.startswith("cell:"): # Cell row
+ # A maximal row looks like this incl. span: cell:F2:t:LIRC2014_chw0oir:f:1:rowspan:289
+ # A minimal row without formatting: cell:C2:t:JJ:f:1
+ parts = line.split(":")
+ if len(parts) > 3: # Otherwise invalid row
+ cell_id = parts[1]
+ cell_row = cell_id[1:]
+ cell_col = cell_id[0]
+ # We'd need something like this to support more than 26 cols, i.e. columns AA, AB...
+ # for c in cell_id:
+ # if c in ["0","1","2","3","4","5","6","7","8","9"]:
+ # cell_row += c
+ # else:
+ # cell_col += c
+ cell_content = parts[3].replace("\\c", ":")
+ cell_span = parts[-1] if "rowspan:" in line else "1"
+
+ # record col name
+ if cell_row == "1":
+ colmap[cell_content].append(cell_col)
+ rev_colmap[cell_col] = cell_content
+
+ cell = Cell(cell_col, cell_row, cell_content, cell_span)
+ parsed[cell_col].append(cell)
+ all_cells.append(cell)
+
+ for cell in all_cells:
+ if cell.col in rev_colmap:
+ cell.header = rev_colmap[cell.col]
+ else:
+ if doc_id is None:
+ doc_id = "unknown"
+ raise IOError("Undocumented column: " + cell.col + " in '" + str(cell) + " from doc: " + str(doc_id))
+
+ parsed["__colmap__"] = colmap # Save colmap for apply_rule
+ return parsed
+
+
def unescape_xml(text):
# Fix various common compounded XML escapes
text = text.replace("<","<").replace(">",">")
@@ -181,31 +243,11 @@ def get_file_list(path,extension,hide_extension=False,forbidden=None):
return outfiles
-def get_ether_stylesheet_select():
-
+def get_ether_stylesheets():
scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep
stylesheet_dir = scriptpath + os.sep + ".." + os.sep + "schemas" + os.sep
-
stylesheet_list = get_file_list(stylesheet_dir,"ini",hide_extension=True)
- select = """\n"
- return select
-
-
-def get_corpus_select():
-
- corpora = get_corpora()
- select = """\n"
- return select
+ return stylesheet_list
def flush_open(annos, row_num, colmap):
@@ -218,32 +260,57 @@ def flush_open(annos, row_num, colmap):
def flush_close(closing_element, last_value, last_start, row_num, colmap, aliases):
flushed = ""
- for alias in aliases[closing_element]:
- if last_start[alias] < row_num - 1:
- span_string = ":rowspan:" + str(row_num - last_start[alias])
+
+ for alias in aliases[closing_element][-1]:
+ stack_len = len(last_start[alias])
+
+ if stack_len > 0 and last_start[alias][-1] < row_num - 1:
+ span_string = ":rowspan:" + str(row_num - last_start[alias][-1])
else:
span_string = ""
- flushed += "cell:" + colmap[alias] + str(last_start[alias]) + ":t:" + last_value[alias]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on
+
+ # Use t for tvf to leave links on
+ flushed += ("cell:"
+ + colmap[alias][stack_len - 1]
+ + str(last_start[alias][-1])
+ + ":t:" + str(last_value[alias][-1])
+ + ":f:1:tvf:1" + span_string + "\n")
+
+ # pop the stack since we've closed a tag
+ last_value[alias].pop()
+ last_start[alias].pop()
+
+ aliases[closing_element].pop()
return flushed
-def number_to_letter(number):
- # Currently support up to 26 columns; no support for multiletter column headers beyond letter Z
+def number_to_letters(number):
if number < 27:
- return chr(number + ord('a')-1).upper()
+ return chr(number + ord('a') - 1).upper()
else:
- return None
+ char1 = chr((number // 26) + ord('a')-1).upper()
+ char2 = chr((number % 26) + ord('a')-1).upper()
+ return char1 + char2
def sgml_to_ether(sgml, ignore_elements=False):
- sgml = sgml.replace("\r","")
- current_row = 2
open_annos = defaultdict(list)
+
+ # a mapping from a tag name to a list of values. the list is a stack
+ # where the most recently encountered opening tag's value/start row
+ # is kept on the right side of the list. whenever we see a closing tag
+ # we pop from the stack, and whenever we see an opening tag we push
+ # (append) to the stack
+ last_value = defaultdict(list)
+ last_start = defaultdict(list)
+
+ # maps from tags to a similar stack data structure where the top of the stack
+ # (i.e. the right side of the list) contains all the annotations that were
+ # present on the most recently opened nested element
aliases = defaultdict(list)
- last_value = {}
- last_start = {}
+
+ # values in this dict are also lists which follow the pattern described above
colmap = OrderedDict()
- maxcol = 1
preamble = """socialcalc:version:1.0
MIME-Version: 1.0
@@ -263,45 +330,54 @@ def sgml_to_ether(sgml, ignore_elements=False):
"""
+ sgml = sgml.replace("\r","")
+
output = ""
+ maxcol = 1
+ current_row = 2
for line in sgml.strip().split("\n"):
line = line.strip()
+ # SocialCalc uses colons internally, \\c used to repr colon in data
line = line.replace(":","\\c")
+
if line.startswith("") or line.endswith("/>"): # Skip unary tags and XML instructions
- pass
+ continue
elif line.startswith("]+)>",line)
element = my_match.groups(0)[0]
- output+=flush_close(element, last_value, last_start, current_row, colmap, aliases)
+ output += flush_close(element, last_value, last_start, current_row, colmap, aliases)
elif line.startswith("<"): # Opening tag
my_match = re.match("<([^ >]+)[ >]",line)
element = my_match.groups(0)[0]
- aliases[element] = [] # Reset element aliases to see which attributes this instance has
+ aliases[element].append([]) # Add new set of aliases to see which attributes this instance has
if "=" not in line:
line = "<" + element + " " + element + '="' + element + '">'
- my_match = re.findall('([^" =]+)="([^"]+)"',line)
+ attrs = re.findall('([^" =]+)="([^"]+)"',line)
anno_name = ""
anno_value = ""
- for match in my_match:
- if element != match[0] and ignore_elements is False:
- anno_name = element + "_" + match[0]
+ for attr in attrs:
+ if element != attr[0] and ignore_elements is False:
+ anno_name = element + "_" + attr[0]
else:
- anno_name = match[0]
- anno_value = match[1]
+ anno_name = attr[0]
+ anno_value = attr[1]
open_annos[current_row].append((anno_name,anno_value))
- last_value[anno_name] = anno_value
- last_start[anno_name] = current_row
- if element not in aliases:
- aliases[element] = [anno_name]
- elif anno_name not in aliases[element]:
- aliases[element].append(anno_name)
+ last_value[anno_name].append(anno_value)
+ last_start[anno_name].append(current_row)
+ if anno_name not in aliases[element][-1]:
+ aliases[element][-1].append(anno_name)
+
if anno_name not in colmap:
- maxcol +=1
- colmap[anno_name] = number_to_letter(maxcol)
+ maxcol += 1
+ colmap[anno_name] = [number_to_letters(maxcol)]
+ elif anno_name in colmap and \
+ len(last_start[anno_name]) > len(colmap[anno_name]):
+ maxcol += 1
+ colmap[anno_name].append(number_to_letters(maxcol))
elif len(line) > 0: # Token
token = line.strip()
@@ -313,7 +389,8 @@ def sgml_to_ether(sgml, ignore_elements=False):
preamble += "cell:A1:t:tok:f:2\n" # f <> tvf for links
output = preamble + output
for header in colmap:
- output += "cell:"+colmap[header]+"1:t:"+header+":f:2\n" # NO f <> tvf for links
+ for entry in colmap[header]:
+ output += "cell:"+entry+"1:t:"+header+":f:2\n" # NO f <> tvf for links
output += "\nsheet:c:" + str(maxcol) + ":r:" + str(current_row-1) + ":tvf:1\n"
@@ -340,17 +417,110 @@ def sgml_to_ether(sgml, ignore_elements=False):
def ether_to_csv(ether_path, name):
- command = "curl --netrc -X GET " + ether_path + "_/" + name + "/csv/"
- proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
- (stdout, stderr) = proc.communicate()
- return stdout.decode("utf8")
+ try:
+ r = requests.get(ether_path + "_/" + name + "/csv/")
+ except:
+ return ""
+
+ return r.text
+
+
+def strip_unique_identifier(tag):
+ """Given an SGML closing or opening tag, replace anything that looks
+ like __\d+__ on the end of the tag name, assuming that we were the
+ ones who added it."""
+
+ try:
+ tag_name = re.match("^?([^ >]+)", tag).groups(0)[0]
+ except AttributeError:
+ return tag
+
+ orig_tag_name = re.sub("__\d+__$", "", tag_name)
+ tag = tag.replace("<" + tag_name, "<" + orig_tag_name)
+ tag = tag.replace("" + tag_name, "" + orig_tag_name)
+ tag = tag.replace(tag_name + "=" + '"' + orig_tag_name + '"',
+ orig_tag_name + "=" + '"' + orig_tag_name + '"')
+ return tag
+
+def deunique_should_skip_line(line):
+ return (not line.startswith("<") # tokens
+ or line.startswith("") # xml instrs
+ or line.endswith("/>") # unary tags
+ or line.startswith("', ''] and replaces them with
+ ['', '']"""
+ def swap_run(l, start, end):
+ l[start:end] = l[start:end][::-1]
+
+ run_start = None
+ for i, line in enumerate(lines):
+ if line.startswith(""):
+ if run_start is not None:
+ deuniqued_tag = strip_unique_identifier(line)
+ if deuniqued_tag != lines[run_start]:
+ swap_run(lines, run_start, i)
+ run_start = None
+ else:
+ run_start = i
+ elif run_start is not None:
+ swap_run(lines, run_start, i)
+ run_start = None
+ else:
+ run_start = None
+
+ if run_start is not None:
+ swap_run(lines, run_start, i+1)
+
+ return lines
+
+def deunique_properly_nested_tags(sgml):
+ """Use a silly n^2 algorithm to detect properly nested tags and strip
+ them of their unique identifiers. Probably an n algorithm to do this."""
+ lines = sgml.split("\n")
+ lines = reverse_adjacent_closing_tags(lines)
+
+ output = copy(lines)
+
+ for i, line in enumerate(lines):
+ if deunique_should_skip_line(line) or line.startswith(""):
+ continue
+
+ # if we've gotten this far, we have an opening tag--store the tag name
+ open_element = re.match("<([^ >]+)[ >]", line).groups(0)[0]
+ open_counts = defaultdict(int)
+
+ for j, line2 in enumerate(lines[i:]):
+ if deunique_should_skip_line(line2):
+ continue
+
+ if line2.startswith(""):
+ element = re.match("([^>]+)>", line2).groups(0)[0]
+ open_counts[element] -= 1
+ if element == open_element:
+ break
+ else:
+ element = re.match("<([^ >]+)[ >]", line2).groups(0)[0]
+ open_counts[element] += 1
+
+ # element is properly nested if no element was opened in the block that
+ # was not also closed in the block or vice versa
+ if sum(open_counts.values()) == 0:
+ output[i] = strip_unique_identifier(output[i])
+ output[i+j] = strip_unique_identifier(output[i+j])
+
+ output = reverse_adjacent_closing_tags(output)
+
+ return "\n".join(output)
def ether_to_sgml(ether, doc_id,config=None):
"""
-
:param ether: String in SocialCalc format
:param doc_id: GitDox database internal document ID number as string
+ :param config: Name of an export config (.ini file) under schemas/
:return:
"""
@@ -359,13 +529,15 @@ def ether_to_sgml(ether, doc_id,config=None):
else:
config = ExportConfig(config=config)
+ # mapping from col header (meaningful string) to the col letter
colmap = {}
+ # list of 3-tuples of parsed cells: (col, row, contents)
cells = []
if isinstance(ether,unicode):
ether = ether.encode("utf8")
-
+ # parse cell contents into cells
for line in ether.splitlines():
parsed_cell = re.match(r'cell:([A-Z]+)(\d+):(.*)$', line)
if parsed_cell is not None:
@@ -390,22 +562,37 @@ def ether_to_sgml(ether, doc_id,config=None):
sec_element_checklist = []
row = 1
+ # added to support duplicate columns
+ namecount = defaultdict(int)
+
close_tags = defaultdict(list)
for cell in cells:
- if cell[1] == 1: # Header row
+ # Header row
+ if cell[1] == 1:
colname = cell[2]['t'].replace("\\c",":")
if colname in config.aliases:
- colmap[cell[0]] = config.aliases[colname]
+ colname = config.aliases[colname]
+
+ # if we've already seen a tag of this name, prepare to make it unique
+ namecount[colname] += 1
+ if namecount[colname] > 1:
+ dupe_suffix = "__" + str(namecount[colname]) + "__"
+ else:
+ dupe_suffix = ""
+
+ if "@" in colname:
+ unique_colname = colname.replace("@", dupe_suffix + "@")
else:
- colmap[cell[0]] = colname
+ unique_colname = colname + dupe_suffix
+
+ colmap[cell[0]] = unique_colname
+
# Make sure that everything that should be exported has some priority
- if colname not in config.priorities and config.export_all:
- if not colname.lower().startswith("ignore:"): # Never export columns prefixed with "ignore:"
- if "@" in colname:
- elem = colname.split("@",1)[0]
- else:
- elem = colname
+ if unique_colname.split("@",1)[0] not in config.priorities and config.export_all:
+ if not unique_colname.lower().startswith("ignore:"):
+ elem = unique_colname.split("@",1)[0]
config.priorities.append(elem)
+ # All other rows
else:
col = cell[0]
row = cell[1]
@@ -413,56 +600,70 @@ def ether_to_sgml(ether, doc_id,config=None):
col_name = colmap[col]
else:
raise IOError("Column " + col + " not found in doc_id " + str(doc_id))
+
+ # If the column specifies an attribute name, use it, otherwise use the element's name again
if "@" in col_name:
element, attrib = col_name.split("@",1)
else:
element = col_name
attrib = element
+ # Check whether attrib contains a constant value instruction
+ const_val = ""
+ if "=" in attrib:
+ attrib, const_val = attrib.split("=",1)
+
+ # Check to see if the cell has been merged with other cells
if 'rowspan' in cell[2]:
rowspan = int(cell[2]['rowspan'])
else:
rowspan = 1
- if "|" in element: # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w'
+ # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w'
+ if "|" in element:
element, sec_element = element.split("|",1)
else:
sec_element = ""
+ # Move on to next cell if this is not a desired column
if element not in config.priorities or (element.startswith("ignore:") and config.no_ignore): # Guaranteed to be in priorities if it should be included
- continue # Move on to next cell if this is not a desired column
- if row != last_row: # New row starting, sort previous lists for opening and closing orders
- #close_tags[row].sort(key=lambda x: (-last_open_index[x],x))
- close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True)
+ continue
+
+ # New row starting from this cell, sort previous lists for opening and closing orders
+ if row != last_row:
for element in open_tags[last_row]:
open_tag_order[last_row].append(element)
- #open_tag_order[last_row].sort(key=lambda x: (open_tag_length[x],x), reverse=True)
+
open_tag_order[last_row].sort(key=lambda x: (-open_tag_length[x],config.priorities.index(x)))
for sec_tuple in sec_element_checklist:
prim_found = False
- e_prim, e_sec, attr, val, span = sec_tuple
- if e_prim in open_tags[last_row] and e_prim in open_tag_length:
- if span == open_tag_length[e_prim]:
- open_tags[last_row][e_prim].append((attr, val))
- if e_prim not in close_tags[last_row + span]:
- close_tags[last_row+span-1].append(e_prim)
+ prim_elt, sec_elt, attr, val, span = sec_tuple
+ if prim_elt in open_tags[last_row] and prim_elt in open_tag_length:
+ if span == open_tag_length[prim_elt]:
+ open_tags[last_row][prim_elt].append((attr, val))
+ close_tags[last_row + span].append(prim_elt)
prim_found = True
if not prim_found:
- if e_sec in open_tags[last_row] and e_sec in open_tag_length:
- if span == open_tag_length[e_sec]:
- open_tags[last_row][e_sec].append((attr, val))
- if e_sec not in close_tags[last_row + span]:
- close_tags[last_row + span - 1].append(e_sec)
+ if sec_elt in open_tags[last_row] and sec_elt in open_tag_length:
+ if span == open_tag_length[sec_elt]:
+ open_tags[last_row][sec_elt].append((attr, val))
+ close_tags[last_row + span].append(sec_elt)
sec_element_checklist = [] # Purge sec_elements
+ close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True)
+
last_row = row
- if 't' in cell[2]: # cell contains text
- content = cell[2]['t']
- elif 'v' in cell[2]: # cell contains numerical value
- content = cell[2]['v']
- elif col_name != 'tok':
- continue # cell does not contain a value and this is not a token entry
+
+ if const_val != "":
+ content = const_val
+ else:
+ if 't' in cell[2]: # cell contains text
+ content = cell[2]['t']
+ elif 'v' in cell[2]: # cell contains numerical value
+ content = cell[2]['v']
+ elif col_name != 'tok':
+ continue # cell does not contain a value and this is not a token entry
if col_name == 'tok':
if "<" in content or "&" in content or ">" in content:
@@ -484,6 +685,10 @@ def ether_to_sgml(ether, doc_id,config=None):
# Priorities have been supplied, but this column is not in them
continue
+ # content may not contain straight double quotes in span annotations in SGML export
+ # Note that " is allowed in tokens and in tab-delimited token annotations!
+ content = content.replace('"', """)
+
if sec_element != "":
#open_tags[row][sec_element].append((attrib, content))
sec_element_checklist.append((element,sec_element,attrib,content,rowspan))
@@ -496,10 +701,13 @@ def ether_to_sgml(ether, doc_id,config=None):
close_row = row + rowspan
else:
close_row = row + 1
- if element not in close_tags[close_row]:
- close_tags[close_row].append(element)
+
+ # this introduces too many close tags for elts that have more than one attr.
+ # We take care of this later with close_tag_debt
+ close_tags[close_row].append(element)
open_tag_length[element] = int(close_row) - int(last_open_index[element])
+
# Sort last row tags
#close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True)
if row + 1 in close_tags:
@@ -511,22 +719,25 @@ def ether_to_sgml(ether, doc_id,config=None):
#output = build_meta_tag(doc_id)
template = fill_meta_template(doc_id,config.template)
output = ""
+ close_tag_debt = defaultdict(int)
- for r in xrange(2,len(toks)+3):
- if r == 1970:
- a=4
+ for r in xrange(2,len(toks)+5):
for element in close_tags[r]:
- if element not in config.milestones:
- output += '' + element + '>\n'
-
- if r == len(toks)+2:
- break
+ if element != "" and element not in config.milestones:
+ if close_tag_debt[element] > 0:
+ close_tag_debt[element] -= 1
+ else:
+ output += '' + element + '>\n'
for element in open_tag_order[r]:
tag = '<' + element
+ attr_count = 0
for attrib, value in open_tags[r][element]:
if attrib != "":
tag += ' ' + attrib + '="' + value + '"'
+ attr_count += 1
+ close_tag_debt[element] = len(open_tags[r][element]) - 1
+
if element in config.milestones:
tag += '/>\n'
else:
@@ -544,6 +755,9 @@ def ether_to_sgml(ether, doc_id,config=None):
output = re.sub("%%[^%]+%%", "none", output)
+ # fix tags that look like elt__2__ if it still gives correct sgml
+ output = deunique_properly_nested_tags(output)
+
return output
@@ -605,34 +819,46 @@ def delete_spreadsheet(ether_url, name):
:param name: name of the spreadsheet (last part of URL)
:return: void
"""
-
- ether_command = "curl --netrc -X DELETE " + ether_url + "_/" + name
- del_proc = subprocess.Popen(ether_command,shell=True)
-
- (stdout, stderr) = del_proc.communicate()
-
- return stdout, stderr
+ try:
+ r = requests.delete(ether_url + "_/" + name)
+ except:
+ pass
def sheet_exists(ether_path, name):
return len(get_socialcalc(ether_path,name)) > 0
-def get_socialcalc(ether_path, name):
+def get_socialcalc(ether_path, name, doc_id=None, dirty=True):
+ """
+ Get SocialCalc format serialization for an EtherCalc spreadsheet, or a cached serialization from the sqlite
+ DB is available for a specified doc_id
+
+ :param ether_path: The EtherCalc server base URL, e.g. http://server.com/ethercalc/
+ :param name: spreadsheet name, e.g. gd_corpname_docname
+ :param doc_id: optional doc_id in docs table to fetch/set SocialCalc from cache
+ :return: SocialCalc string
+ """
+
+ if doc_id is not None and not dirty:
+ cache = get_cache(doc_id)[0][0]
+ if cache is not None:
+ return cache
command = "curl --netrc -X GET " + ether_path + "_/" + name
proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
(stdout, stderr) = proc.communicate()
- return stdout.decode("utf8")
+ socialcalc = stdout.decode("utf8")
+ if doc_id is not None:
+ set_cache(doc_id, socialcalc)
+ return socialcalc
def get_timestamps(ether_path):
- command = "curl --netrc -X GET " + ether_path + "_roomtimes"
- proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
- (stdout, stderr) = proc.communicate()
- times = json.loads(stdout)
+ r = requests.get(ether_path + "_roomtimes")
+ times = r.json()
output = {}
for room in times:
- output[room.replace("timestamp-","")] = times[room]
+ output[room.replace("timestamp-", "")] = times[room]
return output
diff --git a/modules/gitdox_sql.py b/modules/gitdox_sql.py
index 014b400..b0eb2ac 100755
--- a/modules/gitdox_sql.py
+++ b/modules/gitdox_sql.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
"""
Data access functions to read from and write to the SQLite backend.
@@ -31,7 +31,7 @@ def setup_db():
#docs table
cur.execute('''CREATE TABLE IF NOT EXISTS docs
- (id INTEGER PRIMARY KEY AUTOINCREMENT, name text, corpus text, status text,assignee_username text ,filename text, content text, mode text, schema text, validation text, timestamp text)''')
+ (id INTEGER PRIMARY KEY AUTOINCREMENT, name text, corpus text, status text,assignee_username text ,filename text, content text, mode text, schema text, validation text, timestamp text, cache text)''')
#metadata table
cur.execute('''CREATE TABLE IF NOT EXISTS metadata
(docid INTEGER, metaid INTEGER PRIMARY KEY AUTOINCREMENT, key text, value text, corpus_meta text, UNIQUE (docid, metaid) ON CONFLICT REPLACE, UNIQUE (docid, key) ON CONFLICT REPLACE)''')
@@ -49,7 +49,24 @@ def create_document(doc_id, name, corpus, status, assigned_username, filename, c
(int(doc_id), name, corpus, status, assigned_username, filename, content, schema))
-def generic_query(sql, params):
+def get_cache(doc_id):
+ try:
+ cache = generic_query("SELECT cache FROM docs WHERE id = ?;",(doc_id,))
+ except sqlite3.Error as err: # Old schema without cache column
+ generic_query("ALTER TABLE docs ADD COLUMN cache TEXT default null;",None)
+ cache = generic_query("SELECT cache FROM docs WHERE id = ?;",(doc_id,))
+ return cache
+
+
+def set_cache(doc_id, cache_contents):
+ try:
+ generic_query("UPDATE docs SET cache = ? WHERE id = ?",(cache_contents,doc_id))
+ except sqlite3.Error as err: # Old schema without cache column
+ generic_query("ALTER TABLE docs ADD COLUMN cache TEXT default null;",None)
+ generic_query("UPDATE docs SET cache = ? WHERE id = ?",(cache_contents,doc_id))
+
+
+def generic_query(sql, params, return_new_id=False):
# generic_query("DELETE FROM rst_nodes WHERE doc=? and project=?",(doc,project))
dbpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "gitdox.db"
@@ -62,8 +79,11 @@ def generic_query(sql, params):
else:
cur.execute(sql)
- rows = cur.fetchall()
- return rows
+ if return_new_id:
+ return cur.lastrowid
+ else:
+ rows = cur.fetchall()
+ return rows
def invalidate_doc_by_name(doc,corpus):
@@ -116,56 +136,16 @@ def cell(text):
text = str(text)
return "\n
" + text + "
"
-def print_meta(doc_id, corpus=False):
- meta = get_doc_meta(doc_id, corpus=corpus)
- if meta is None:
- meta = []
- # docid,metaid,key,value - four cols
- metaid_id = "metaid" if not corpus else "corpus_metaid"
- table_id = "meta_table" if not corpus else "meta_table_corpus"
- table='''
-
-
-
-
-
-
-
- """
- for item in meta:
- # Each item appears in one row of the table
- row = "\n
"
- metaid = str(item[1])
- ('metaid:'+str(metaid))
- id = str(doc_id)
- for i in item[2:-1]:
- cell_contents = cell(i)
- cell_contents = re.sub(r'(
The format of each document will depend on its active mode:
+
+
Metadata is added to XML files in a wrapping tag <meta key="value">
+
Documents in XML mode are downloaded as .xml, as they appear in the editor
+
Documents in spreadsheet mode are downloaded as .sgml to preserve potential span hierarchy conflicts
+
+
+
You can choose custom configurations for exporting spreadsheet data if .ini files are available in the schemas/ directory
+
+
Corpora to export:
+
+
+
+
Filter by status:
+
+ {{{ status_select_html }}}
+
+
Extension for spreadsheet files:
+
+
+
Export configuration for spreadsheets:
+
+
+
+ download
+
+
+
Batch upload
+
Import multiple spreadsheets data by uploading a zip archive with SGML files
+
+
Document names are generated from file names inside the zip, without their extension (e.g. .sgml, .tt)
+
Metadata is taken from the <meta> element surrounding the document
+
Corpus name is taken from a metadatum corpus inside meta, else 'default_corpus'
+
Select XML mode to import into XML editor, or Spreadsheet to convert SGML spans into a new spreadsheet
+
+
+ {{#file_uploaded}}
+
+ The file {{.}} was uploaded successfully
+
+ {{/file_uploaded}}
+ {{#files_imported}}
+ Imported {{.}} files from archive
+
+ {{/files_imported}}
+
+
Batch update DB
+
Execute multiple SQL updates, e.g. to assign documents to users from a list
+
+
The uploaded file should be a tab delimited, two column text file
+
The first rwo contains the headers:
+
in column 1, the criterion, one of 'corpus' or 'name' (=document name)
+
in column 2, the docs table column to update, e.g. 'assignee_username'
+
Subsequent rows give pairs of criterion-value, e.g. 'doc001 user1'
+
+
+ {{#sql_file_uploaded}}
+
+ The file {{.}} was uploaded successfully
+
+ {{/sql_file_uploaded}}
+ {{#sql_statements}}
+ Executed {{.}} DB updates
+
+ {{/sql_statements}}
+
+
+
+
+
Database management
+
+
+
+
-"""
\ No newline at end of file
+"""
diff --git a/templates/admin.mustache b/templates/admin.mustache
new file mode 100644
index 0000000..9f2f5d2
--- /dev/null
+++ b/templates/admin.mustache
@@ -0,0 +1,228 @@
+
+
+