diff --git a/.gitignore b/.gitignore index 04a41a2..72bb081 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,9 @@ +# compiled python files **.pyc + +# these are auto-generated in index.py +popupPage.html +popupPageCorpus.html + +# usually don't want to commit this +gitdox.db diff --git a/Dockerfile b/Dockerfile index b52ab15..b33c260 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:16.04 EXPOSE 80 # install deps @@ -13,8 +13,7 @@ RUN chown -R www-data:www-data /var/www/html RUN chmod +x /var/www/html/*.py RUN chmod +x /var/www/html/modules/*.py -# keep these in sync with requirements.txt -RUN pip install lxml requests github3.py==0.9.3 passlib +RUN pip install -r /var/www/html/requirements.txt # install ethercalc and run as a service RUN npm install -g ethercalc @@ -47,6 +46,7 @@ RUN echo " \n\ # service and (2) not to use supervisord to manage the execution of these # processes. But (1) is too heavy a solution, and (2) seems unnecessary unless # one of our services leaks memory/is unstable +RUN echo "ln -s /usr/bin/nodejs /usr/bin/node" >> /etc/startup.sh RUN echo "/usr/bin/redis-server &" >> /etc/startup.sh RUN echo "/usr/local/bin/ethercalc &" >> /etc/startup.sh RUN echo "/usr/sbin/apache2ctl -D FOREGROUND" >> /etc/startup.sh diff --git a/README.md b/README.md index 0ffc05b..8fcf3be 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,14 @@ hope to provide a stable release soon. First, [install Docker](https://docs.docker.com/install/). You may be able to install it using your platform's package manager. +(**Note: if your machine has Apache running, you should stop it first by running `sudo service apache2 stop`.**) + ```bash -docker run -dit --restart unless-stopped --name gitdox-dev -p 5000:80 gucorpling/gitdox:dev +docker run -dit --restart unless-stopped --name gitdox-dev -p 80:80 gucorpling/gitdox:dev ``` GitDox should now be running the docker container you've set up, and you may -visit `http://localhost:5000` on your machine to verify that it works. GitDox should +visit `http://localhost` on your machine to verify that it works. GitDox should now always be running on your machine, even if you reboot it. If for some reason you need to stop it manually, you may do so: @@ -54,7 +56,7 @@ to have your GitDox folders live in your host machine's filesystem: ```bash sudo git clone https://github.com/gucorpling/gitdox /opt/gitdox sudo chown -R www-data:www-data /opt/gitdox -docker run -dit --restart unless-stopped --name gitdox -v /opt/gitdox:/var/www/html -p 5000:80 gucorpling/gitdox:dev gitdox +docker run -dit --restart unless-stopped --name gitdox -v /opt/gitdox:/var/www/html -p 80:80 gucorpling/gitdox:dev gitdox ``` These commands install GitDox under `/opt` in your host machine and allows you to modify them just as you would modify any other file on your machine. But in the Docker command, with the `-v` flag we tell it to mount this folder as `/var/www/html` in the container's filesystem. The files are shared bidirectionally: changes made in the container will flow to the host, and vice versa. diff --git a/admin.py b/admin.py index e2ac495..9225226 100755 --- a/admin.py +++ b/admin.py @@ -13,7 +13,8 @@ from modules.dataenc import pass_dec, pass_enc from paths import get_menu from editor import harvest_meta -from modules.ether import make_spreadsheet, get_ether_stylesheet_select, get_corpus_select +from modules.ether import make_spreadsheet, get_ether_stylesheets +from modules.renderer import render from passlib.apps import custom_app_context as pwd_context import github3 import time @@ -29,23 +30,11 @@ userdir = scriptpath + "users" + os.sep templatedir = scriptpath + "templates" + os.sep config = ConfigObj(userdir + 'config.ini') -skin = config["skin"] project = config["project"] -def get_status_select(): - - status_list = open(prefix+"status.tab").read().replace("\r","").split("\n") - - select = """\n" - return select +def get_statuses(): + return open(prefix+"status.tab").read().replace("\r","").split("\n") def write_user_file(username,password,admin,email,realname,git_username,git_password,git_2fa=False): @@ -102,7 +91,7 @@ def update_git_info(user,new_git_username,new_git_password,new_git_2fa=False): o['git_username'] = new_git_username o['git_2fa'] = str(new_git_2fa).lower() - try: + try: note = project + ", " + time.ctime() auth = github3.authorize(new_git_username, new_git_password, ['repo'], note, "") o['git_token'] = auth.token @@ -111,169 +100,58 @@ def update_git_info(user,new_git_username,new_git_password,new_git_2fa=False): del o['git_password'] o.write() except: - # fail silently--would want to display an error ideally, but + # fail silently--would want to display an error ideally, but # users will know to try again if the credentials are wrong pass -def load_admin(user,admin,theform): - warn="" +def load_admin(user, admin, theform): + render_data = {} + + # handle user deletion if theform.getvalue('user_delete'): - userdir=prefix+'users' + os.sep - user_del_file=theform.getvalue('user_delete') - user_del=user_del_file.split('.ini')[0] + userdir = prefix + 'users' + os.sep + user_del_file = theform.getvalue('user_delete') + user_del = user_del_file.split('.ini')[0] #delete_user(user_del) #need to also delete the user.ini file - os.remove(userdir+user_del_file) + os.remove(userdir + user_del_file) + # handle user creation if theform.getvalue('create_user'): - username=theform.getvalue('username') - password=theform.getvalue('password') - realname=theform.getvalue('realname') if theform.getvalue('realname') is not None else "anonymous" - email=theform.getvalue('email') if theform.getvalue('email') is not None else "a@b.com" - admin=theform.getvalue('admin') - git_username=theform.getvalue('git_username') if theform.getvalue('git_username') is not None else "none" - git_password=theform.getvalue('git_password') if theform.getvalue('git_password') is not None else "none" - git_2fa=theform.getvalue('git_2fa') if theform.getvalue('git_2fa') is not None else "false" - - if username!=None and password!=None: - write_user_file(username,password,admin,email,realname,git_username,git_password,git_2fa) + username = theform.getvalue('username') + password = theform.getvalue('password') + realname = theform.getvalue('realname') if theform.getvalue('realname') is not None else "anonymous" + email = theform.getvalue('email') if theform.getvalue('email') is not None else "a@b.com" + admin = theform.getvalue('admin') + git_username = theform.getvalue('git_username') if theform.getvalue('git_username') is not None else "none" + git_password = theform.getvalue('git_password') if theform.getvalue('git_password') is not None else "none" + git_2fa = theform.getvalue('git_2fa') if theform.getvalue('git_2fa') is not None else "false" + + if username != None and password != None: + write_user_file(username, password, admin, email, realname, git_username, git_password, git_2fa) else: - warn="
ERROR: username or password missing; user cannot be created.
" + render_data["user_creation_warning"] = "ERROR: username or password missing; user cannot be created." + # handle db wipe if theform.getvalue('init_db'): setup_db() - page= "Content-type:text/html\r\n\r\n" - page+=""" - - - - - - - - - - - - - - - - - - **navbar** -
- **header** -
-

GitDox - Administration

-

administration and user management | back to document list

- """ - page+="""
""" - - page += '''

User Management

- - -

Select users to delete:

- " - - - page+="""

-
delete
-
""" - - #add user - - page+="""

Enter user info to create new user:


- - - - - - - - - - - - - - -
username
password
realname
email
admin
git username
git password
use two-factor auth
- - - - -

-
save
-
""" - if warn!="": - page+=warn - - - page += """ -

Batch download

-

Download all documents

- -
Corpora to export:
- **corpus_select** -

-
Filter by status:
- **status_select** -

-
Extension for spreadsheet files:
- -

-
Export configuration for spreadsheets:
- **stylesheet_select** -

-
download
- """ - - page = page.replace("**corpus_select**",get_corpus_select()) - page = page.replace("**status_select**",get_status_select()) - page = page.replace("**stylesheet_select**",get_ether_stylesheet_select()) - - - msg = "" + render_data['userfiles'].append(userfile) + + # get html for dropdown selections + render_data['corpora'] = [x[0] for x in get_corpora()] + render_data['statuses'] = get_statuses() + render_data['ether_stylesheets'] = get_ether_stylesheets() + + # handle upload imported = 0 if "file" in theform and "mode" in theform: fileitem = theform["file"] @@ -281,7 +159,7 @@ def load_admin(user,admin,theform): if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) - msg = '
The file "' + fn + '" was uploaded successfully
' + render_data['file_uploaded'] = fn from zipfile import ZipFile zip = ZipFile(fileitem.file) file_list = [f for f in zip.namelist() if not os.path.isdir(f)] @@ -293,14 +171,14 @@ def load_admin(user,admin,theform): corpus = meta_key_val["corpus"] else: corpus = "default_corpus" - docname = filename.replace(" ","_") # No spaces in document names - docname = re.sub(r'(.+)\.[^\.]+$',r'\1',docname) # Strip extension + docname = filename.replace(" ","_") # No spaces in document names + docname = re.sub(r'(.+)\.[^\.]+$',r'\1',docname) # Strip extension if not doc_exists(docname, corpus): max_id = generic_query("SELECT MAX(id) AS max_id FROM docs", "")[0][0] if not max_id: # This is for the initial case after init db max_id = 0 - doc_id = int(max_id) + 1 - create_document(doc_id, docname, corpus, "init", "default_user", "gucorpling/gitdox", "", mode) + doc_id = int(max_id) + 1 + create_document(doc_id, docname, corpus, "init", "default_user", "gucorpling/gitdox", "", mode) else: # Document already exists, just overwrite spreadsheet/xml and metadata and set mode doc_id = generic_query("SELECT id FROM docs where corpus=? and name=?", (corpus,docname))[0][0] @@ -329,47 +207,16 @@ def load_admin(user,admin,theform): continue save_meta(doc_id, key.decode("utf8"), value.decode("utf8")) if imported > 0: - msg += 'Imported '+str(imported)+' files from archive
' - - - page+=""" -

Batch upload

-

Import multiple spreadsheets data by uploading a zip archive with SGML files

- -
- - - - - - - - -
Mode: - -
-
-
-""" - - page+=msg - - msg = "" + render_data['files_imported'] = str(imported) + + # handle sql execution sql_statements = 0 if "sqltab" in theform: fileitem = theform["sqltab"] if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) - msg = '
The file "' + fn + '" was uploaded successfully
' + render_data['sql_file_imported'] = fn rows = fileitem.file.read().replace("\r","").split("\n") c1, c2 = ["",""] for i, row in enumerate(rows): @@ -382,134 +229,29 @@ def load_admin(user,admin,theform): sql = "update docs set " + c2 + " = ? where " + c1 + " = ? ;" generic_query(sql, (f2, f1)) sql_statements += 1 - if sql_statements > 0: - msg += 'Executed ' + str(sql_statements) + ' DB updates
' + render_data["sql_statements"] = sql_statements - page += """ -

Batch update DB

-

Execute multiple SQL updates, e.g. to assign documents to users from a list

- -
- - -
- """ + return render("admin", render_data) - page += msg - page+="

Database management

" - #init database, setup_db, wipe all documents +def load_user_config(user, admin, theform): + render_data = {} - page+="""
- warning: this will wipe the database! -
-
init DB
-
""" - - - - page+="
" - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) - page = page.replace("**project**",project) - page = page.replace("**skin**",skin) - - return page - - -def load_user_config(user,admin,theform): if theform.getvalue('new_pass') and user != "demo": new_pass=theform.getvalue('new_pass') update_password(user,new_pass) - if theform.getvalue('new_git_password') and user != "demo": + if theform.getvalue('new_git_password') and user != "demo": new_git_password=theform.getvalue('new_git_password') new_git_username=theform.getvalue('new_git_username') new_git_2fa=theform.getvalue('new_git_2fa') - update_git_info(user,new_git_username,new_git_password,new_git_2fa) + render_data['user'] = user + render_data['admin_eq_one'] = admin == "1" - page= "Content-type:text/html\r\n\r\n" - page+=""" - - - - - - - - - - - - - - - - **navbar** -
- -
-

Coptic XML transcription editor

-

edit user info | back to document list

- -

Edit your account information

- """ - #edit user password - username_info=""""""%user - username_info+=""" - -
username%s
new password
- """ - - - page+=username_info - page+=" \n" - page+="

note: after you changed your password you'll be logged out and you need to log in using your new password again

\n" - - #edit git info - if admin=="1": - page+="""
- - -
new git username
new git password
use two-factor auth
\n""" - - - page+="
\n" - - page += "\t\t\t
\t\t\n
\t\n\n\n" - - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) - page = page.replace("**project**",project) - page = page.replace("**skin**",skin) - - return page + return render("user_admin", render_data) def open_main_server(): @@ -521,10 +263,12 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) user = userconfig["username"] admin = userconfig["admin"] + + print("Content-type:text/html\n\n") if admin == "3": - print(load_admin(user,admin,theform)) + print(load_admin(user, admin, theform)) elif admin == "0" or admin=="1": - print(load_user_config(user,admin,theform)) + print(load_user_config(user, admin, theform)) open_main_server() diff --git a/codemirror-5.15.2/mode/conllu/conllu.js b/codemirror-5.15.2/mode/conllu/conllu.js new file mode 100644 index 0000000..087d982 --- /dev/null +++ b/codemirror-5.15.2/mode/conllu/conllu.js @@ -0,0 +1,80 @@ +// CodeMirror, copyright (c) by Marijn Haverbeke and others +// Distributed under an MIT license: http://codemirror.net/LICENSE + +(function(mod) { + if (typeof exports == "object" && typeof module == "object") // CommonJS + mod(require("../../lib/codemirror"), require("../../addon/mode/simple")); + else if (typeof define == "function" && define.amd) // AMD + define(["../../lib/codemirror", "../../addon/mode/simple"], mod); + else // Plain browser env + mod(CodeMirror); +})(function(CodeMirror) { + "use strict"; + + // Collect all conllu directives + var instructions = ["newdoc"], + instructionRegex = "(" + instructions.join('|') + ")", + instructionOnlyLine = new RegExp(instructionRegex + "\\s*$", "i"), + instructionWithArguments = new RegExp(instructionRegex + "(\\s+)", "i"); + + CodeMirror.defineSimpleMode("conllu", { + start: [ + // Comment line: This is a line starting with a comment + { + regex: /#.*$/, + token: "comment" + }, + { + regex: /^[0-9]+\t/, + token: "def" + }, + { + regex: /(?<=(^[^\t\n]+\t){6})[0-9]+/, + token: "def" + }, + // Highlight an instruction followed by arguments + { + regex: instructionWithArguments, + token: ["variable-2", null], + next: "arguments" + }, + { + regex: /./, + token: null + } + ], + arguments: [ + { + // Line comment without instruction arguments is an error + regex: /#.*$/, + token: "error", + next: "start" + }, + { + regex: /[^#]+\\$/, + token: null + }, + { + // Match everything except for the inline comment + regex: /[^#]+/, + token: null, + next: "start" + }, + { + regex: /$/, + token: null, + next: "start" + }, + // Fail safe return to start + { + token: null, + next: "start" + } + ], + meta: { + lineComment: "#" + } + }); + + CodeMirror.defineMIME("text/x-conllu", "conllu"); +}); diff --git a/codemirror-5.15.2/mode/conllu/index.html b/codemirror-5.15.2/mode/conllu/index.html new file mode 100644 index 0000000..adc0574 --- /dev/null +++ b/codemirror-5.15.2/mode/conllu/index.html @@ -0,0 +1,53 @@ + + +CodeMirror: Dockerfile mode + + + + + + + + + + +
+

CoNLL-U mode

+
+ + + +

Dockerfile syntax highlighting for CodeMirror. Depends on + the simplemode addon.

+ +

MIME types defined: text/x-conllu

+
diff --git a/css/gitdox.css b/css/gitdox.css index 2242871..8d0602b 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -1,5 +1,14 @@ -#doctable{ border: 2px solid black; - border-radius: 4px;font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;background-color:hsla(40,53%,100%,0.30)} +#doctable { + border: 2px solid black; + border-radius: 4px; + font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif; + background-color: hsla(40,53%,100%,0.30); + width: 100%; +} + +#doctable input[type=text] { + width: 95%; +} h1, h2{font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;-webkit-font-smoothing: subpixel-antialiased;} @@ -125,7 +134,7 @@ padding: 10px; position:relative; } -.tooltip span +.tooltip span.msg { display:none; -moz-border-radius:6px; @@ -135,7 +144,7 @@ padding: 10px; background:white; } -.tooltip:hover span +.tooltip:hover span.msg { display:block; position:absolute; @@ -155,17 +164,63 @@ padding: 10px; height: 12.8px; } -#ValidationTableContainer{ -width: 100%} +/* for validation_rules */ +ul.tabs{ + margin: 0px; + padding: 0px; + list-style: none; +} + +ul.tabs li{ + background: none; + display: inline-block; + padding: 10px 15px; + cursor: pointer; + min-width: 100px; + font-size: 18px; + font-weight: 600; + color: #222; + text-align: center; +} + +ul.tabs li.current{ + background: #ededed; + color: #222; +} + +.tab-content{ + display: none; + background: #ededed; + padding: 15px; +} + +.tab-content.current{ + display: inherit; +} + +/* override jtable styles for consistency with rest of gitdox */ +.jtable-title { + font-size: 16px !important; + border: none !important; + border-radius: 0 !important; + background: #eeeeee !important; +} +table.jtable { + border: none !important; +} + +.ui-widget-overlay { + opacity: 0.5 !important; +} + .jtable td{ word-break: break-all; } #validation_report{ - max-height: 300px; - overflow-y: scroll; - overflow-x: scroll; + height: 200px; + overflow-y: auto; } tfoot { @@ -173,4 +228,14 @@ tfoot { } #filter_id{width:30px} -#filter_mode{width:60px} \ No newline at end of file +#filter_mode{width:60px} + + +table.admin { + font-family: arial, sans-serif; + border-collapse: collapse; +} +table.admin td, table.admin th { + text-align: left; + padding: 8px; +} diff --git a/css/gum.css b/css/gum.css index 00da827..248d4d8 100644 --- a/css/gum.css +++ b/css/gum.css @@ -14,7 +14,7 @@ body { margin-right: 0px; /*auto*/ margin-left: 0px; /*auto*/ margin-bottom: 60px; - width: 100%; /*832px*/ + min-height: 100%; height: 100%; z-index: 2; @@ -720,4 +720,4 @@ a.tooltip2 span #doctable{background-color:#dddddd !important; font-family: arial, sans-serif !important;} #doctable th, #doctable td {border: 1px solid darkgray;} -.CodeMirror-wrap{border: 1px solid black !important} \ No newline at end of file +.CodeMirror-wrap{border: 1px solid black !important} diff --git a/editor.py b/editor.py index 66cba82..154f89b 100755 --- a/editor.py +++ b/editor.py @@ -16,7 +16,8 @@ import platform, re from paths import ether_url, get_menu, get_nlp_credentials from modules.ether import make_spreadsheet, delete_spreadsheet, sheet_exists, get_socialcalc, ether_to_sgml, \ - build_meta_tag, get_ether_stylesheet_select, get_file_list + build_meta_tag, get_ether_stylesheets, get_file_list +from modules.renderer import render # Support IIS site prefix on Windows if platform.system() == "Windows": @@ -29,7 +30,6 @@ userdir = scriptpath + "users" + os.sep templatedir = scriptpath + "templates" + os.sep config = ConfigObj(userdir + 'config.ini') -skin = config["skin"] project = config["project"] editor_help_link = config["editor_help_link"] # Captions and API URLs for NLP buttons @@ -66,11 +66,16 @@ def serialize_file(text_content,file_name): f.write(text_content)#.encode("utf8")) f.close() +def get_user_list(): + user_list=[] + scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + userdir = scriptpath + "users" + os.sep + return get_file_list(userdir,"ini",forbidden=["admin","default","config"],hide_extension=True) def load_page(user,admin,theform): - print("Content-type:text/html\r\n\r\n") global ether_url global code_2fa + if theform.getvalue("2fa"): code_2fa = theform.getvalue("2fa") else: @@ -87,7 +92,6 @@ def load_page(user,admin,theform): schema = "" doc_id = "" # Should only remain so if someone navigated directly to editor.py docname = "" - mymsg = "" old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = ["", "", "", "", "", "", ""] if int(admin) > 0: @@ -95,6 +99,9 @@ def load_page(user,admin,theform): else: git_username, git_token, git_2fa = (None, None, None) + # dict of variables we'll need to render the html + render_data = {} + if theform.getvalue('id'): doc_id = theform.getvalue('id') if int(doc_id) > int(max_id): @@ -106,7 +113,7 @@ def load_page(user,admin,theform): corpus = "default_corpus" schema = "" text_content = "" - # If one of the four forms is edited, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc) + # If one of the four forms is edited or we're cloning a doc, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc) if theform.getvalue('edit_docname') and user != "demo": if docname != 'new_document': if doc_id > max_id: @@ -151,14 +158,15 @@ def load_page(user,admin,theform): else: update_assignee(doc_id, assignee) - if theform.getvalue('edit_schema') and user != "demo": - schema = theform.getvalue('edit_schema') - if schema != "--none--": - if doc_id > max_id: - create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) - max_id = doc_id - else: - update_schema(doc_id, schema) + # cloning metadata from an existing doc into a new doc + if theform.getvalue('source_doc'): + source_meta = get_doc_meta(theform.getvalue('source_doc')) + if doc_id > max_id: + create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) + max_id = doc_id + for meta in source_meta: + m_key, m_val = meta[2:4] + save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) else: # Get previous values from DB @@ -169,15 +177,28 @@ def load_page(user,admin,theform): # Handle switch to spreadsheet mode if NLP spreadsheet service is called if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet" and mode == "xml" and user != "demo": - api_call = spreadsheet_nlp_api - nlp_user, nlp_password = get_nlp_credentials() data_to_process = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0] - data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} - resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) - sgml=resp.text.encode("utf8") + api_call = spreadsheet_nlp_api + if api_call != "": + nlp_user, nlp_password = get_nlp_credentials() + data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} + resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) + sgml = resp.text.encode("utf8") + else: + sgml = data_to_process.encode("utf8") out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" + # handle copying metadata + if theform.getvalue('source_doc'): + source_meta = get_doc_meta(theform.getvalue('source_doc')) + existing_meta_keys = [x[2] for x in get_doc_meta(doc_id)] + # don't overwrite existing keys + meta_to_write = [x for x in source_meta if x[2] not in existing_meta_keys] + for meta in meta_to_write: + m_key, m_val = meta[2], meta[3] + save_meta(int(doc_id), m_key, m_val) + if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') @@ -215,10 +236,6 @@ def load_page(user,admin,theform): mode = theform.getvalue('edit_mode') if mode != old_mode and user != "demo": update_mode(doc_id,mode) - if theform.getvalue('edit_schema'): - schema = theform.getvalue('edit_schema') - if schema != old_schema and user != "demo": - update_schema(doc_id, schema) if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet": # mode has been changed to spreadsheet via NLP update_mode(doc_id, "ether") mode = "ether" @@ -228,9 +245,7 @@ def load_page(user,admin,theform): old_socialcalc = get_socialcalc(ether_url, old_sheet_name) out, err = make_spreadsheet(old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc") if out == "OK": - out, err = delete_spreadsheet(ether_url,old_sheet_name) - else: - mymsg += "out was: " + out + " err was" + err + delete_spreadsheet(ether_url,old_sheet_name) text_content = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0] @@ -239,6 +254,7 @@ def load_page(user,admin,theform): if theform.getvalue('code'): text_content = theform.getvalue('code') text_content = text_content.replace("\r","") + text_content = re.sub(r'&(?!amp;)',r'&',text_content) # Escape unescaped XML & text_content = unicode(text_content.decode("utf8")) if user != "demo": if int(doc_id)>int(max_id): @@ -295,123 +311,41 @@ def load_page(user,admin,theform): shutil.rmtree(prefix+subdir) if theform.getvalue('nlp_xml') == "do_nlp_xml" and mode == "xml": - api_call=xml_nlp_api - nlp_user, nlp_password = get_nlp_credentials() - data = {"data":text_content, "lb":"line", "format":"pipes"} - resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) - text_content=resp.text + api_call = xml_nlp_api + if api_call != "": + nlp_user, nlp_password = get_nlp_credentials() + data = {"data":text_content, "lb":"line", "format":"pipes"} + resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) + text_content=resp.text # Editing options # Docname # Filename - push_git = """ - """ - if git_2fa == "true": - push_git += """""" - push_git += """
Commit
- """ - + status_list = open(prefix+"status.tab").read().replace("\r","").split("\n") + render_data['status_options'] = [{'text': x, 'selected': x == status} for x in status_list] + render_data['assignee_options'] = [{'text': x, 'selected': x == assignee} for x in get_user_list()] + render_data['mode_options'] = [{'text': x, 'selected': x == mode} for x in ["xml", "ether"]] + render_data['nlp_service'] = {'xml_button_html': xml_nlp_button.decode("utf8"), + 'spreadsheet_button_html': spreadsheet_nlp_button.decode("utf8"), + 'disabled': user == "demo" or mode == "ether"} + render_data['git_2fa'] = git_2fa == "true" if git_status: # Remove some html keyword symbols in the commit message returned by github3 - push_msg=git_status.replace('<','') - push_msg=push_msg.replace('>','') - push_git+="""

""" + push_msg + ' successful' + """

""" - - status_list = open(prefix+"status.tab").read().replace("\r","").split("\n") - - options = "" - for stat in status_list: - options +='\n' - options = options.replace('">'+status +'<', '" selected="selected">'+status+'<') - - edit_status="""" - # Get XML schema list - schema_list = ['--none--'] - scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep - schemadir = scriptpath + "schemas" + os.sep - - schema_list += get_file_list(schemadir,"xsd",hide_extension=True) - - edit_schema = """" - # edit_schema = edit_schema.replace(schema+'"', schema+'" selected="selected"') - - # Get user_list from the logintools - user_list=[] - scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep - userdir = scriptpath + "users" + os.sep - - user_list = get_file_list(userdir,"ini",forbidden=["admin","default","config"],hide_extension=True) - - edit_assignee="""" - - edit_mode = '''''' - edit_mode = edit_mode.replace(mode+'"', mode+'" selected="selected"') - - # Metadata - if theform.getvalue('metakey'): - metakey = theform.getvalue('metakey') - metavalue = theform.getvalue('metavalue').replace("\t","").replace("\n","").replace("\r","") - if user != "demo": - save_meta(int(doc_id),metakey.decode("utf8"),metavalue.decode("utf8")) - if theform.getvalue('metaid'): - metaid = theform.getvalue('metaid') - if user != "demo": - delete_meta(metaid, doc_id) - if theform.getvalue('corpus_metakey'): - metakey = theform.getvalue('corpus_metakey') - metavalue = theform.getvalue('corpus_metavalue').replace("\t","").replace("\n","").replace("\r","") - if user != "demo": - save_meta(int(doc_id),metakey.decode("utf8"),metavalue.decode("utf8"),corpus=True) - if theform.getvalue('corpus_metaid'): - metaid = theform.getvalue('corpus_metaid') - if user != "demo": - delete_meta(metaid, doc_id, corpus=True) - - nlp_service = """
""" + xml_nlp_button + """
""" + \ - """
"""+ spreadsheet_nlp_button + """
""" - nlp_service = nlp_service.decode("utf8") - - disabled_nlp_service = """
"""+xml_nlp_button+"""
""" + \ - """
""" +spreadsheet_nlp_button + """
""" - disabled_nlp_service = disabled_nlp_service.decode("utf8") - - # Disable NLP services in demo - if user == "demo": - nlp_service = disabled_nlp_service - - page= ""#"Content-type:text/html\r\n\r\n" + # prepare embedded editor html if mode == "ether": - embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "ether.html").read() + render_data['ether_mode'] = True ether_url += "gd_" + corpus + "_" + docname - - stylesheet_select = get_ether_stylesheet_select() - embedded_editor = embedded_editor.replace("**stylesheet_select**",stylesheet_select) + render_data['ether_url'] = ether_url + render_data['ether_stylesheets'] = get_ether_stylesheets() if "file" in theform and user != "demo": fileitem = theform["file"] if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) - msg = 'The file "' + fn + '" was uploaded successfully' if fn.endswith(".xls") or fn.endswith(".xlsx"): make_spreadsheet(fileitem.file.read(),"https://etheruser:etherpass@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname,"excel") else: @@ -421,58 +355,43 @@ def load_page(user,admin,theform): for (key, value) in iteritems(meta_key_val): key = key.replace("@","_") save_meta(int(doc_id),key.decode("utf8"),value.decode("utf8")) - else: - msg = "no file was uploaded" + else: + render_data['ether_mode'] = False - embedded_editor = embedded_editor.replace("**source**",ether_url) + # stop here if no doc selected + if doc_id: + render_data['doc_is_selected'] = len(doc_id) != 0 else: - embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "codemirror.html").read() + return render("editor", render_data) - page += urllib.urlopen(prefix + "templates" + os.sep + "editor.html").read() - page += mymsg - page = page.replace("**embedded_editor**",embedded_editor) + render_data['id'] = doc_id + render_data['mode'] = mode + render_data['schema'] = schema + render_data['docname'] = docname + render_data['corpusname'] = corpus - if len(doc_id) == 0: - exp = re.compile(r"
.*
",re.DOTALL) - page = exp.sub("""

No document selected | back to document list

""",page) - else: - metadata = print_meta(doc_id) - corpus_metadata = print_meta(doc_id,corpus=True) - #corpus_metadata = "" - page=page.replace("**content**",text_content) - page=page.replace("**docname**",docname) - page=page.replace("**corpusname**",corpus) - page=page.replace("**edit_status**",edit_status) - page=page.replace("**repo**",repo_name) - page=page.replace("**edit_schema**",edit_schema) - page=page.replace("**edit_assignee**",edit_assignee) - page=page.replace("**edit_mode**",edit_mode) - page=page.replace("**metadata**",metadata) - page=page.replace("**corpus_metadata**",corpus_metadata) - page=page.replace("**disabled_NLP**",disabled_nlp_service) - page=page.replace("**NLP**",nlp_service) - page=page.replace("**id**",doc_id) - page=page.replace("**mode**",mode) - page=page.replace("**schema**",schema) - if int(admin)>0: - page=page.replace("**github**",push_git) - else: - page = page.replace("**github**", '') + render_data['text_content'] = text_content + render_data['repo'] = repo_name - if int(admin) < 3: - page = page.replace('onblur="validate_docname();"','onblur="validate_docname();" disabled="disabled" class="disabled"') - page = page.replace('onblur="validate_corpusname();"','onblur="validate_corpusname();" disabled="disabled" class="disabled"') - page = page.replace('onblur="validate_repo();"','onblur="validate_repo();" disabled="disabled" class="disabled"') - page = page.replace('''
''','''
''') + render_data["admin_gt_zero"] = int(admin) > 0 + render_data["admin_eq_three"] = admin == "3" - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**", get_menu()) - page = page.replace("**header**", header) - page = page.replace("**project**", project) - page = page.replace("**skin**", skin) - page = page.replace("**editor_help_link**",editor_help_link) + # handle clone meta button, and allow github pushing + if int(admin) > 0: + doc_list = generic_query("SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE",()) + render_data["docs"] = [] + for doc in doc_list: + doc_vars = {} + doc_vars["id"] = str(doc[0]) + doc_vars["corpus"] = doc[1] + doc_vars["name"] = doc[2] + render_data['docs'].append(doc_vars) - return page + render_data["can_save"] = not (int(admin) < 3) + render_data["editor_help_link_html"] = editor_help_link + render_data["first_load"] = len(theform.keys()) == 1 + + return render("editor", render_data) def open_main_server(): @@ -485,7 +404,9 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) user = userconfig["username"] admin = userconfig["admin"] - print(load_page(user,admin,theform).encode("utf8")) + + print("Content-type:text/html\n\n") + print(load_page(user, admin, theform).encode("utf8")) if __name__ == "__main__": diff --git a/editor_metadata_service.py b/editor_metadata_service.py new file mode 100755 index 0000000..f01a720 --- /dev/null +++ b/editor_metadata_service.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import json +import cgi +import os +import platform +from modules.gitdox_sql import * +from modules.logintools import login + +parameter = cgi.FieldStorage() +action = parameter.getvalue("action") +id = parameter.getvalue("id") +docid = parameter.getvalue("docid") +key = parameter.getvalue("key") +value = parameter.getvalue("value") +corpus = parameter.getvalue("corpus") + +if platform.system() == "Windows": + prefix = "transc\\" +else: + prefix = "" + +def read_options(**kwargs): + if "file" in kwargs: + kwargs["file"] = prefix + kwargs["file"] + names = open(kwargs["file"],'r').read().replace("\r","").split("\n") + names = list(name[:name.find("\t")] for name in names) + elif "names" in kwargs: + names = kwargs[names] + selected = kwargs["selected"] if "selected" in kwargs else None + return names + +def row_to_dict(row): + return {'id': row[1], + 'docid': row[0], + 'key': row[2], + 'value': row[3]} + +def get_metadata(): + resp = {} + try: + resp['Result'] = 'OK' + resp['Records'] = [row_to_dict(r) for r in get_doc_meta(docid, corpus=corpus)] + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not fetch metadata' + print json.dumps(resp) + +def get_default_key_options(): + resp = {} + try: + resp['Result'] = 'OK' + if not corpus: + resp['Options'] = read_options(file='..' + os.sep + 'metadata_fields.tab') + else: + resp['Options'] = read_options(file='..' + os.sep + 'corpus_metadata_fields.tab') + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not fetch metadata key options' + print json.dumps(resp) + +def create_metadata(): + resp = {} + try: + id = save_meta(int(docid), key.decode("utf8"), value.decode("utf8"), corpus=corpus) + resp['Result'] = 'OK' + resp['Record'] = {'id': id, + 'docid': docid, + 'key': key, + 'value': value} + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not create metadata' + print json.dumps(resp) + +def delete_metadata(): + resp = {} + try: + delete_meta(int(id), int(docid), corpus=corpus) + resp['Result'] = 'OK' + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not delete metadata' + print json.dumps(resp) + + + +def open_main_server(): + thisscript = os.environ.get('SCRIPT_NAME', '') + loginaction = None + theform = cgi.FieldStorage() + scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + userdir = scriptpath + "users" + os.sep + loginaction, userconfig = login(theform, userdir, thisscript, loginaction) + user = userconfig["username"] + admin = userconfig["admin"] + + print "Content-type:application/json\r\n\r\n" + if action == "list": + get_metadata() + elif action == "keys": + get_default_key_options() + elif user == "demo": + print json.dumps({'Result': 'Error', 'Message': 'Demo user may not make changes.'}) + elif action == "create": + create_metadata() + elif action == "delete": + delete_metadata() + else: + print json.dumps({'Result': 'Error', + 'Message': 'Unknown action: "' + str(action) + '"'}) + +if __name__ == '__main__': + open_main_server() diff --git a/export.py b/export.py index 594cd21..e828546 100755 --- a/export.py +++ b/export.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- import cgi from modules.gitdox_sql import * @@ -104,7 +104,7 @@ def export_doc(doc_id, stylesheet=None): #print("Content-type:text/html\r\n\r\n") import cgitb - cgitb.enable() + #cgitb.enable() from paths import ether_url thisscript = os.environ.get('SCRIPT_NAME', '') diff --git a/img/login_logo.png b/img/login_logo.png index b763ecf..2c560da 100644 Binary files a/img/login_logo.png and b/img/login_logo.png differ diff --git a/index.py b/index.py index 55c6ddd..e81fbf4 100755 --- a/index.py +++ b/index.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- # Import modules for CGI handling import cgi, cgitb @@ -11,6 +11,7 @@ import urllib from modules.gitdox_sql import * from modules.ether import delete_spreadsheet +from modules.renderer import render from paths import ether_url, get_menu from os.path import isfile, join import platform @@ -23,27 +24,6 @@ project = "Scriptorium" - -def make_options(**kwargs): - if "file" in kwargs: - kwargs["file"] = prefix + kwargs["file"] - names = open(kwargs["file"],'r').read().replace("\r","").split("\n") - names = list(name[:name.find("\t")] for name in names) - elif "names" in kwargs: - names = kwargs[names] - selected = kwargs["selected"] if "selected" in kwargs else None - options="" - for name in names: - if name!='': - options+='\n' - for corpus in corpora: - corpus_list += '\n' if "sel_corpus" in theform: selected_corpus = theform.getvalue("sel_corpus") - corpus_list = corpus_list.replace('="'+selected_corpus+'"','="'+selected_corpus+'" selected="selected"') + render_data["sel_corpus"] = selected_corpus + # provide list of corpora for corpus selection dropdown + corpora = get_corpora() + render_data['corpora'] = [] + for corpus in corpora: + render_data['corpora'].append({"name": corpus[0], + "selected": selected_corpus == corpus[0]}) + + # find the documents we need to display if selected_corpus != "" and selected_corpus != "all": doc_list = generic_query("SELECT id,corpus,name,status,assignee_username,mode FROM docs where corpus=? ORDER BY corpus, name COLLATE NOCASE", (selected_corpus,)) if len(doc_list) == 0: # Restricted query produced no documents, switch back to all document display @@ -149,105 +68,32 @@ def load_landing(user, admin, theform): if not max_id: # This is for the initial case after init db max_id = 0 - table = """""" - table += """""" - table += """ - - - - - - - """ - table += """""" - + render_data['docs'] = [] for doc in doc_list: - row="" - for item in doc: - if item == "xml": - item = ' ' - mode = "xml" - elif item == "ether": - item = ' ' - mode = "ether" - elif "-" in str(item): - item = item.replace("-","‑") # Use non-breaking hyphens - row += cell(item) - id = str(doc[0]) - - # validation icons - icons = """
""" - if mode == "xml": - icons += """ """ - elif mode == "ether": - icons += """ """ - icons += """ """ - icons += """
""" - - # edit document - button_edit = """""" - id_code = """" - button_edit += id_code - button_edit += """
edit
""" - - #delete document - button_delete="""""" - button_delete+=id_code - if int(admin) > 0: - button_delete+="""
delete
- """ - else: - button_delete += """
delete
- """ - - row += cell(icons) - row += cell(button_edit) - row += cell(button_delete) - row += "" - table += row - - table+="
idcorpusdocumentstatusassignedmodevalidateactions
" - - if admin == "3": - validation_rules = """
-
- - validation rules
""" - else: - validation_rules = "" + doc_vars = {} + doc_vars["xml"] = "xml" in doc + doc_vars["ether"] = "ether" in doc + doc_vars["other_mode"] = not (doc_vars["xml"] or doc_vars["ether"]) - page = "" - - menu = get_menu() - menu = menu.encode("utf8") - - landing = open(prefix + "templates" + os.sep + "landing.html").read() - header = open(prefix + "templates" + os.sep + "header.html").read() + id = str(doc[0]) + doc_vars["id"] = id + doc_vars["corpus"] = doc[1] + doc_vars["name"] = doc[2] + doc_vars["status"] = doc[3] + doc_vars["assignee"] = doc[4] + render_data['docs'].append(doc_vars) + + render_data["admin_gt_zero"] = int(admin) > 0 + render_data["admin_eq_three"] = admin == "3" + render_data["max_id_plus1"] = str(max_id + 1) + render_data["user"] = user scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep userdir = scriptpath + "users" + os.sep config = ConfigObj(userdir + 'config.ini') - skin = config["skin"] - project = config["project"] - - landing = landing.replace("**max_id_plus1**", str(max_id + 1)) - landing = landing.replace("**user**", user) - landing = landing.replace("**project**", project) - landing = landing.replace("**header**", header) - landing = landing.replace("**skin**", skin) - landing = landing.replace("**validation_rules**", validation_rules) - landing = landing.replace("**corpora**", corpus_list) - landing = landing.replace("**sel_corpus**", selected_corpus) - landing = landing.replace("**table**", table) - landing = landing.replace("**navbar**", menu) - if int(admin) > 0: - landing = landing.replace("**create_doc**",'''onclick="document.getElementById('form_new').submit();" class="button"''') - else: - landing = landing.replace("**create_doc**",'''class="button disabled"''') - page += landing - print("Content-type:text/html\n\n") + render_data["project"] = config["project"] - return page + return render("index", render_data) def open_main_server(): @@ -260,7 +106,10 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) user = userconfig["username"] admin = userconfig["admin"] - print(load_landing(user,admin,theform)) + + print("Content-type:text/html\n\n") + print(load_landing(user, admin, theform)) -open_main_server() +if __name__ == '__main__': + open_main_server() diff --git a/js/editor.js b/js/editor.js index d0f6e8e..a0550b8 100755 --- a/js/editor.js +++ b/js/editor.js @@ -1,11 +1,5 @@ myPopup = ''; -function openPopup(url) { - myPopup = window.open(url,'popupWindow','width=640,height=480'); - if (!myPopup.opener) - myPopup.opener = self; -} - function validate_doc() { $("#validate_editor").addClass("disabledbutton"); $("#validation_report").html("Validating..."); @@ -13,7 +7,7 @@ function validate_doc() { var mode = $("#mode").val(); var schema = $("#schema").val(); $.ajax({ - url: 'modules/validate_spreadsheet.py', + url: 'validate.py', type: 'post', data: {doc_id: docId, mode: mode, schema: schema}, dataType: "html", @@ -31,9 +25,9 @@ function validate_doc() { function do_save(){ if (document.getElementById('code')!=null){ - val = document.getElementById('code').value.replace('&','&'); - document.getElementById('code').value = val; + val = document.getElementById('code').value.replace(/&(?!amp;)/g,'&'); editor.getDoc().setValue(val); + document.getElementById('code').value = val; } document.getElementById('editor_form').submit(); } @@ -43,4 +37,159 @@ function export_ether(){ stylesheet = document.getElementById('ether_stylesheet').value; window.open('export.py?docs=' + doc_id + '&stylesheet=' + stylesheet, '_new'); -} \ No newline at end of file +} + +$(document).ready(function () { + // get id from hidden form element. Watch out, might break in the future + var docid = $("#id").val(); + $('#metadata-table-container').jtable({ + title: ' ', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + jtParams.domain = 'meta'; + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'editor_metadata_service.py?action=list&docid=' + docid, + type: 'POST', + dataType: 'json', + data: jtParams, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'editor_metadata_service.py?action=create', + deleteAction: 'editor_metadata_service.py?action=delete&docid=' + docid + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + docid: { + title: 'Document ID', + defaultValue: docid, + type: 'hidden' + }, + key: { + title: 'Key', + sorting: false + }, + value: { + title: 'Value', + sorting: false + } + }, + // for autocomplete support https://github.com/volosoft/jtable/issues/115 + formCreated: function(event, formData) { + $.ajax({ + url: 'editor_metadata_service.py?action=keys', + type: 'POST', + dataType: 'json', + data: {}, + success: function(data) { + formData.form.find('[name=key]').autocomplete({ + source: data['Options'] + }); + } + }); + } + }); + + $('#metadata-table-container').jtable('load'); +}); + +$(document).ready(function () { + // get id from hidden form element. Watch out, might break in the future + var docid = $("#id").val(); + $('#corpus-metadata-table-container').jtable({ + title: ' ', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + jtParams.domain = 'meta'; + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'editor_metadata_service.py?corpus=true&action=list&docid=' + docid, + type: 'POST', + dataType: 'json', + data: jtParams, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'editor_metadata_service.py?corpus=true&action=create', + deleteAction: 'editor_metadata_service.py?corpus=true&action=delete&docid=' + docid + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + docid: { + defaultValue: docid, + type: 'hidden' + }, + key: { + title: 'Key', + sorting: false + }, + value: { + title: 'Value', + sorting: false + } + }, + // for autocomplete support https://github.com/volosoft/jtable/issues/115 + formCreated: function(event, formData) { + $.ajax({ + url: 'editor_metadata_service.py?corpus=true&action=keys', + type: 'POST', + dataType: 'json', + data: {}, + success: function(data) { + formData.form.find('[name=key]').autocomplete({ + source: data['Options'] + }); + } + }); + } + }); + + $('#corpus-metadata-table-container').jtable('load'); +}); + + +$(document).ready(function(){ + function activateTab(liId, divId) { + $('ul.tabs li').removeClass('current'); + $('.tab-content').removeClass('current'); + $("#"+liId).addClass('current'); + $("#"+divId).addClass('current'); + } + + var liId = localStorage.getItem(location.pathname + "activeLiId"); + var divId = localStorage.getItem(location.pathname + "activeDivId"); + if (liId && divId) { + activateTab(liId, divId); + } + + $('ul.tabs li').click(function() { + var liId = $(this).attr('id'); + var divId = $(this).attr('data-tab'); + activateTab(liId, divId); + localStorage.setItem(location.pathname + "activeLiId", liId); + localStorage.setItem(location.pathname + "activeDivId", divId); + }); +}); diff --git a/js/index.js b/js/index.js index e50a54a..3373a1d 100644 --- a/js/index.js +++ b/js/index.js @@ -2,48 +2,58 @@ function validate_all() { $("#validate_landing").addClass("disabledbutton"); $("#validate_landing").html(' validating...'); $.ajax({ - url: 'modules/validate_spreadsheet.py', + url: 'validate.py', type: 'post', data: {doc_id: 'all'}, dataType: "json", success: function(response) { console.log(response); $.each(response, function(key, value) { - // 1 vs 2 is for ordering ether/xml before metadata + // 1 vs 2 is for ordering ether/xml before metadata, 3 is used for export // sort is hidden text at beginning of cell for sorting purposes var output1 = ''; var output2 = ''; + var output3 = ''; var sort1 = ''; var sort2 = ''; + var sort3 = ''; $.each(value, function(k,v) { if (k == "ether") { - if (v == "spreadsheet is valid") { + if (v.indexOf("Spreadsheet is valid") > -1) { color = 'green'; sort1 = 'v'; } + else if (v.indexOf("No applicable") > -1) { + color = 'gray'; + sort1 = 'n'; + } else { color = 'red'; sort1 = 'i'; } - output1 += '
 ' + v + '
'; + output1 += '
 ' + v + '
'; } else if (k == "meta") { - if (v == "metadata is valid") { + if (v.indexOf("Metadata is valid") > -1) { color = 'green'; sort2 = 'v'; } + else if (v.indexOf("No applicable") > -1) { + color = 'gray'; + sort2 = 'n'; + } else { color = 'red'; sort2 = 'i'; } - output2 += '
 ' + v + '
'; + output2 += '
 ' + v + '
'; } else if (k == "xml") { - if (v.indexOf("validates") !== -1) { + if (v.indexOf("XML is valid") > -1) { color = 'green'; sort1 = 'v'; } - else if (v == "No schema
") { + else if (v.indexOf("No applicable") > -1) { color = 'gray'; sort1 = 'n'; } @@ -51,11 +61,32 @@ function validate_all() { color = 'red'; sort1 = 'i'; } - output1 += '
 ' + v + '
'; + output1 += '
 ' + v + '
'; + } + else if (k == "export") { + if (v.indexOf("Export is valid") > -1) { + color = 'green'; + disp = 'inline-block'; + sort3 = 'v'; + } + else if (v.indexOf("No applicable") > -1) { + color = 'gray'; + disp = 'none'; + sort3 = 'n'; + } + else { + color = 'red'; + disp = 'inline-block'; + sort3 = 'i'; + } + output3 += '
 ' + v + '
'; } }); - $("#validate_"+key).before(""); - $("#validate_"+key).html(output1 + output2); + if (!output3) { + output3 = '
'; + } + $("#validate_"+key).before(""); + $("#validate_"+key).html(output1 + output2 + output3); }); $("#validate_landing").removeClass("disabledbutton"); $("#validate_landing").html(' re-validate'); diff --git a/js/validation_rules.js b/js/validation_rules.js new file mode 100644 index 0000000..fb13e11 --- /dev/null +++ b/js/validation_rules.js @@ -0,0 +1,242 @@ +$(document).ready(function () { + $('#xml-table-container').jtable({ + title: 'XML Validation Rules', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + jtParams.domain = 'xml'; + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'validation_rules_service.py?action=list', + type: 'POST', + dataType: 'json', + data: jtParams, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + domain: { + defaultValue: 'xml', + type: 'hidden' + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' + }, + name: { + title: 'XSD Schema', + options: 'validation_rules_service.py?action=listschemas&extension=xsd' + } + } + }); + $('#xml-table-container').jtable('load'); +}); + +$(document).ready(function () { + $('#meta-table-container').jtable({ + title: 'Metadata Validation Rules', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + jtParams.domain = 'meta'; + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'validation_rules_service.py?action=list', + type: 'POST', + dataType: 'json', + data: jtParams, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + domain: { + defaultValue: 'meta', + type: 'hidden' + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' + }, + name: { + title: 'Name' + }, + operator: { + title: 'Operator', + options: ['~', 'exists'] + }, + argument: { + title: 'Argument' + } + } + }); + $('#meta-table-container').jtable('load'); +}); + +$(document).ready(function () { + $('#ether-table-container').jtable({ + title: 'EtherCalc Validation Rules', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + jtParams.domain = 'ether'; + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'validation_rules_service.py?action=list', + type: 'POST', + dataType: 'json', + data: jtParams, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + domain: { + defaultValue: 'ether', + type: 'hidden' + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' + }, + name: { + title: 'Name' + }, + operator: { + title: 'Operator', + options: ['~', '|', '=', '==', '>', 'exists', 'doesntexist'] + }, + argument: { + title: 'Argument' + } + } + }); + $('#ether-table-container').jtable('load'); +}); + +$(document).ready(function () { + $('#export-table-container').jtable({ + title: 'Export Validation Rules', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + jtParams.domain = 'export'; + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'validation_rules_service.py?action=list', + type: 'POST', + dataType: 'json', + data: jtParams, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + domain: { + defaultValue: 'export', + type: 'hidden' + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' + }, + name: { + title: 'Export Spec', + options: 'validation_rules_service.py?action=listschemas&extension=ini' + }, + argument: { + title: 'XSD Schema', + options: 'validation_rules_service.py?action=listschemas&extension=xsd' + } + } + }); + $('#export-table-container').jtable('load'); +}); + +$(document).ready(function(){ + function activateTab(liId, divId) { + $('ul.tabs li').removeClass('current'); + $('.tab-content').removeClass('current'); + $("#"+liId).addClass('current'); + $("#"+divId).addClass('current'); + } + + var liId = localStorage.getItem(location.pathname + "activeLiId"); + var divId = localStorage.getItem(location.pathname + "activeDivId"); + if (liId && divId) { + activateTab(liId, divId); + } + + $('ul.tabs li').click(function() { + var liId = $(this).attr('id'); + var divId = $(this).attr('data-tab'); + activateTab(liId, divId); + localStorage.setItem(location.pathname + "activeLiId", liId); + localStorage.setItem(location.pathname + "activeDivId", divId); + }); +}); diff --git a/modules/ether.py b/modules/ether.py index 185a8fc..96a5135 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -18,12 +18,13 @@ from configobj import ConfigObj from ast import literal_eval import json +from copy import copy import cgi +import requests from xml.sax.saxutils import escape __version__ = "2.0.0" - class ExportConfig: def __init__(self, **kwargs): @@ -95,6 +96,67 @@ def read_config(self,config_file): else: self.template = "\n%%body%%\n\n" + +def parse_ether(ether, doc_id=None): + """Take in raw socialcalc data and turn it into a dict of Cells. Used in validation.""" + + class Cell: + def __init__(self, col, row, content, span): + self.col = col + self.row = row + self.header = "" + self.content = content + self.span = span + + def __repr__(self): + return "" + + ether_lines = ether.splitlines() + + # find col letter corresponding to col name + parsed = defaultdict(list) + colmap = defaultdict(list) + rev_colmap = {} + all_cells = [] + for line in ether_lines: + if line.startswith("cell:"): # Cell row + # A maximal row looks like this incl. span: cell:F2:t:LIRC2014_chw0oir:f:1:rowspan:289 + # A minimal row without formatting: cell:C2:t:JJ:f:1 + parts = line.split(":") + if len(parts) > 3: # Otherwise invalid row + cell_id = parts[1] + cell_row = cell_id[1:] + cell_col = cell_id[0] + # We'd need something like this to support more than 26 cols, i.e. columns AA, AB... + # for c in cell_id: + # if c in ["0","1","2","3","4","5","6","7","8","9"]: + # cell_row += c + # else: + # cell_col += c + cell_content = parts[3].replace("\\c", ":") + cell_span = parts[-1] if "rowspan:" in line else "1" + + # record col name + if cell_row == "1": + colmap[cell_content].append(cell_col) + rev_colmap[cell_col] = cell_content + + cell = Cell(cell_col, cell_row, cell_content, cell_span) + parsed[cell_col].append(cell) + all_cells.append(cell) + + for cell in all_cells: + if cell.col in rev_colmap: + cell.header = rev_colmap[cell.col] + else: + if doc_id is None: + doc_id = "unknown" + raise IOError("Undocumented column: " + cell.col + " in '" + str(cell) + " from doc: " + str(doc_id)) + + parsed["__colmap__"] = colmap # Save colmap for apply_rule + return parsed + + def unescape_xml(text): # Fix various common compounded XML escapes text = text.replace("&lt;","<").replace("&gt;",">") @@ -181,31 +243,11 @@ def get_file_list(path,extension,hide_extension=False,forbidden=None): return outfiles -def get_ether_stylesheet_select(): - +def get_ether_stylesheets(): scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep stylesheet_dir = scriptpath + os.sep + ".." + os.sep + "schemas" + os.sep - stylesheet_list = get_file_list(stylesheet_dir,"ini",hide_extension=True) - select = """\n" - return select - - -def get_corpus_select(): - - corpora = get_corpora() - select = """\n" - return select + return stylesheet_list def flush_open(annos, row_num, colmap): @@ -218,32 +260,57 @@ def flush_open(annos, row_num, colmap): def flush_close(closing_element, last_value, last_start, row_num, colmap, aliases): flushed = "" - for alias in aliases[closing_element]: - if last_start[alias] < row_num - 1: - span_string = ":rowspan:" + str(row_num - last_start[alias]) + + for alias in aliases[closing_element][-1]: + stack_len = len(last_start[alias]) + + if stack_len > 0 and last_start[alias][-1] < row_num - 1: + span_string = ":rowspan:" + str(row_num - last_start[alias][-1]) else: span_string = "" - flushed += "cell:" + colmap[alias] + str(last_start[alias]) + ":t:" + last_value[alias]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + + # Use t for tvf to leave links on + flushed += ("cell:" + + colmap[alias][stack_len - 1] + + str(last_start[alias][-1]) + + ":t:" + str(last_value[alias][-1]) + + ":f:1:tvf:1" + span_string + "\n") + + # pop the stack since we've closed a tag + last_value[alias].pop() + last_start[alias].pop() + + aliases[closing_element].pop() return flushed -def number_to_letter(number): - # Currently support up to 26 columns; no support for multiletter column headers beyond letter Z +def number_to_letters(number): if number < 27: - return chr(number + ord('a')-1).upper() + return chr(number + ord('a') - 1).upper() else: - return None + char1 = chr((number // 26) + ord('a')-1).upper() + char2 = chr((number % 26) + ord('a')-1).upper() + return char1 + char2 def sgml_to_ether(sgml, ignore_elements=False): - sgml = sgml.replace("\r","") - current_row = 2 open_annos = defaultdict(list) + + # a mapping from a tag name to a list of values. the list is a stack + # where the most recently encountered opening tag's value/start row + # is kept on the right side of the list. whenever we see a closing tag + # we pop from the stack, and whenever we see an opening tag we push + # (append) to the stack + last_value = defaultdict(list) + last_start = defaultdict(list) + + # maps from tags to a similar stack data structure where the top of the stack + # (i.e. the right side of the list) contains all the annotations that were + # present on the most recently opened nested element aliases = defaultdict(list) - last_value = {} - last_start = {} + + # values in this dict are also lists which follow the pattern described above colmap = OrderedDict() - maxcol = 1 preamble = """socialcalc:version:1.0 MIME-Version: 1.0 @@ -263,45 +330,54 @@ def sgml_to_ether(sgml, ignore_elements=False): """ + sgml = sgml.replace("\r","") + output = "" + maxcol = 1 + current_row = 2 for line in sgml.strip().split("\n"): line = line.strip() + # SocialCalc uses colons internally, \\c used to repr colon in data line = line.replace(":","\\c") + if line.startswith(""): # Skip unary tags and XML instructions - pass + continue elif line.startswith("]+)>",line) element = my_match.groups(0)[0] - output+=flush_close(element, last_value, last_start, current_row, colmap, aliases) + output += flush_close(element, last_value, last_start, current_row, colmap, aliases) elif line.startswith("<"): # Opening tag my_match = re.match("<([^ >]+)[ >]",line) element = my_match.groups(0)[0] - aliases[element] = [] # Reset element aliases to see which attributes this instance has + aliases[element].append([]) # Add new set of aliases to see which attributes this instance has if "=" not in line: line = "<" + element + " " + element + '="' + element + '">' - my_match = re.findall('([^" =]+)="([^"]+)"',line) + attrs = re.findall('([^" =]+)="([^"]+)"',line) anno_name = "" anno_value = "" - for match in my_match: - if element != match[0] and ignore_elements is False: - anno_name = element + "_" + match[0] + for attr in attrs: + if element != attr[0] and ignore_elements is False: + anno_name = element + "_" + attr[0] else: - anno_name = match[0] - anno_value = match[1] + anno_name = attr[0] + anno_value = attr[1] open_annos[current_row].append((anno_name,anno_value)) - last_value[anno_name] = anno_value - last_start[anno_name] = current_row - if element not in aliases: - aliases[element] = [anno_name] - elif anno_name not in aliases[element]: - aliases[element].append(anno_name) + last_value[anno_name].append(anno_value) + last_start[anno_name].append(current_row) + if anno_name not in aliases[element][-1]: + aliases[element][-1].append(anno_name) + if anno_name not in colmap: - maxcol +=1 - colmap[anno_name] = number_to_letter(maxcol) + maxcol += 1 + colmap[anno_name] = [number_to_letters(maxcol)] + elif anno_name in colmap and \ + len(last_start[anno_name]) > len(colmap[anno_name]): + maxcol += 1 + colmap[anno_name].append(number_to_letters(maxcol)) elif len(line) > 0: # Token token = line.strip() @@ -313,7 +389,8 @@ def sgml_to_ether(sgml, ignore_elements=False): preamble += "cell:A1:t:tok:f:2\n" # f <> tvf for links output = preamble + output for header in colmap: - output += "cell:"+colmap[header]+"1:t:"+header+":f:2\n" # NO f <> tvf for links + for entry in colmap[header]: + output += "cell:"+entry+"1:t:"+header+":f:2\n" # NO f <> tvf for links output += "\nsheet:c:" + str(maxcol) + ":r:" + str(current_row-1) + ":tvf:1\n" @@ -340,17 +417,110 @@ def sgml_to_ether(sgml, ignore_elements=False): def ether_to_csv(ether_path, name): - command = "curl --netrc -X GET " + ether_path + "_/" + name + "/csv/" - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - return stdout.decode("utf8") + try: + r = requests.get(ether_path + "_/" + name + "/csv/") + except: + return "" + + return r.text + + +def strip_unique_identifier(tag): + """Given an SGML closing or opening tag, replace anything that looks + like __\d+__ on the end of the tag name, assuming that we were the + ones who added it.""" + + try: + tag_name = re.match("^]+)", tag).groups(0)[0] + except AttributeError: + return tag + + orig_tag_name = re.sub("__\d+__$", "", tag_name) + tag = tag.replace("<" + tag_name, "<" + orig_tag_name) + tag = tag.replace("") # unary tags + or line.startswith("', ''] and replaces them with + ['', '']""" + def swap_run(l, start, end): + l[start:end] = l[start:end][::-1] + + run_start = None + for i, line in enumerate(lines): + if line.startswith("]+)[ >]", line).groups(0)[0] + open_counts = defaultdict(int) + + for j, line2 in enumerate(lines[i:]): + if deunique_should_skip_line(line2): + continue + + if line2.startswith("]+)>", line2).groups(0)[0] + open_counts[element] -= 1 + if element == open_element: + break + else: + element = re.match("<([^ >]+)[ >]", line2).groups(0)[0] + open_counts[element] += 1 + + # element is properly nested if no element was opened in the block that + # was not also closed in the block or vice versa + if sum(open_counts.values()) == 0: + output[i] = strip_unique_identifier(output[i]) + output[i+j] = strip_unique_identifier(output[i+j]) + + output = reverse_adjacent_closing_tags(output) + + return "\n".join(output) def ether_to_sgml(ether, doc_id,config=None): """ - :param ether: String in SocialCalc format :param doc_id: GitDox database internal document ID number as string + :param config: Name of an export config (.ini file) under schemas/ :return: """ @@ -359,13 +529,15 @@ def ether_to_sgml(ether, doc_id,config=None): else: config = ExportConfig(config=config) + # mapping from col header (meaningful string) to the col letter colmap = {} + # list of 3-tuples of parsed cells: (col, row, contents) cells = [] if isinstance(ether,unicode): ether = ether.encode("utf8") - + # parse cell contents into cells for line in ether.splitlines(): parsed_cell = re.match(r'cell:([A-Z]+)(\d+):(.*)$', line) if parsed_cell is not None: @@ -390,22 +562,37 @@ def ether_to_sgml(ether, doc_id,config=None): sec_element_checklist = [] row = 1 + # added to support duplicate columns + namecount = defaultdict(int) + close_tags = defaultdict(list) for cell in cells: - if cell[1] == 1: # Header row + # Header row + if cell[1] == 1: colname = cell[2]['t'].replace("\\c",":") if colname in config.aliases: - colmap[cell[0]] = config.aliases[colname] + colname = config.aliases[colname] + + # if we've already seen a tag of this name, prepare to make it unique + namecount[colname] += 1 + if namecount[colname] > 1: + dupe_suffix = "__" + str(namecount[colname]) + "__" + else: + dupe_suffix = "" + + if "@" in colname: + unique_colname = colname.replace("@", dupe_suffix + "@") else: - colmap[cell[0]] = colname + unique_colname = colname + dupe_suffix + + colmap[cell[0]] = unique_colname + # Make sure that everything that should be exported has some priority - if colname not in config.priorities and config.export_all: - if not colname.lower().startswith("ignore:"): # Never export columns prefixed with "ignore:" - if "@" in colname: - elem = colname.split("@",1)[0] - else: - elem = colname + if unique_colname.split("@",1)[0] not in config.priorities and config.export_all: + if not unique_colname.lower().startswith("ignore:"): + elem = unique_colname.split("@",1)[0] config.priorities.append(elem) + # All other rows else: col = cell[0] row = cell[1] @@ -413,56 +600,70 @@ def ether_to_sgml(ether, doc_id,config=None): col_name = colmap[col] else: raise IOError("Column " + col + " not found in doc_id " + str(doc_id)) + + # If the column specifies an attribute name, use it, otherwise use the element's name again if "@" in col_name: element, attrib = col_name.split("@",1) else: element = col_name attrib = element + # Check whether attrib contains a constant value instruction + const_val = "" + if "=" in attrib: + attrib, const_val = attrib.split("=",1) + + # Check to see if the cell has been merged with other cells if 'rowspan' in cell[2]: rowspan = int(cell[2]['rowspan']) else: rowspan = 1 - if "|" in element: # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w' + # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w' + if "|" in element: element, sec_element = element.split("|",1) else: sec_element = "" + # Move on to next cell if this is not a desired column if element not in config.priorities or (element.startswith("ignore:") and config.no_ignore): # Guaranteed to be in priorities if it should be included - continue # Move on to next cell if this is not a desired column - if row != last_row: # New row starting, sort previous lists for opening and closing orders - #close_tags[row].sort(key=lambda x: (-last_open_index[x],x)) - close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) + continue + + # New row starting from this cell, sort previous lists for opening and closing orders + if row != last_row: for element in open_tags[last_row]: open_tag_order[last_row].append(element) - #open_tag_order[last_row].sort(key=lambda x: (open_tag_length[x],x), reverse=True) + open_tag_order[last_row].sort(key=lambda x: (-open_tag_length[x],config.priorities.index(x))) for sec_tuple in sec_element_checklist: prim_found = False - e_prim, e_sec, attr, val, span = sec_tuple - if e_prim in open_tags[last_row] and e_prim in open_tag_length: - if span == open_tag_length[e_prim]: - open_tags[last_row][e_prim].append((attr, val)) - if e_prim not in close_tags[last_row + span]: - close_tags[last_row+span-1].append(e_prim) + prim_elt, sec_elt, attr, val, span = sec_tuple + if prim_elt in open_tags[last_row] and prim_elt in open_tag_length: + if span == open_tag_length[prim_elt]: + open_tags[last_row][prim_elt].append((attr, val)) + close_tags[last_row + span].append(prim_elt) prim_found = True if not prim_found: - if e_sec in open_tags[last_row] and e_sec in open_tag_length: - if span == open_tag_length[e_sec]: - open_tags[last_row][e_sec].append((attr, val)) - if e_sec not in close_tags[last_row + span]: - close_tags[last_row + span - 1].append(e_sec) + if sec_elt in open_tags[last_row] and sec_elt in open_tag_length: + if span == open_tag_length[sec_elt]: + open_tags[last_row][sec_elt].append((attr, val)) + close_tags[last_row + span].append(sec_elt) sec_element_checklist = [] # Purge sec_elements + close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) + last_row = row - if 't' in cell[2]: # cell contains text - content = cell[2]['t'] - elif 'v' in cell[2]: # cell contains numerical value - content = cell[2]['v'] - elif col_name != 'tok': - continue # cell does not contain a value and this is not a token entry + + if const_val != "": + content = const_val + else: + if 't' in cell[2]: # cell contains text + content = cell[2]['t'] + elif 'v' in cell[2]: # cell contains numerical value + content = cell[2]['v'] + elif col_name != 'tok': + continue # cell does not contain a value and this is not a token entry if col_name == 'tok': if "<" in content or "&" in content or ">" in content: @@ -484,6 +685,10 @@ def ether_to_sgml(ether, doc_id,config=None): # Priorities have been supplied, but this column is not in them continue + # content may not contain straight double quotes in span annotations in SGML export + # Note that " is allowed in tokens and in tab-delimited token annotations! + content = content.replace('"', """) + if sec_element != "": #open_tags[row][sec_element].append((attrib, content)) sec_element_checklist.append((element,sec_element,attrib,content,rowspan)) @@ -496,10 +701,13 @@ def ether_to_sgml(ether, doc_id,config=None): close_row = row + rowspan else: close_row = row + 1 - if element not in close_tags[close_row]: - close_tags[close_row].append(element) + + # this introduces too many close tags for elts that have more than one attr. + # We take care of this later with close_tag_debt + close_tags[close_row].append(element) open_tag_length[element] = int(close_row) - int(last_open_index[element]) + # Sort last row tags #close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) if row + 1 in close_tags: @@ -511,22 +719,25 @@ def ether_to_sgml(ether, doc_id,config=None): #output = build_meta_tag(doc_id) template = fill_meta_template(doc_id,config.template) output = "" + close_tag_debt = defaultdict(int) - for r in xrange(2,len(toks)+3): - if r == 1970: - a=4 + for r in xrange(2,len(toks)+5): for element in close_tags[r]: - if element not in config.milestones: - output += '\n' - - if r == len(toks)+2: - break + if element != "" and element not in config.milestones: + if close_tag_debt[element] > 0: + close_tag_debt[element] -= 1 + else: + output += '\n' for element in open_tag_order[r]: tag = '<' + element + attr_count = 0 for attrib, value in open_tags[r][element]: if attrib != "": tag += ' ' + attrib + '="' + value + '"' + attr_count += 1 + close_tag_debt[element] = len(open_tags[r][element]) - 1 + if element in config.milestones: tag += '/>\n' else: @@ -544,6 +755,9 @@ def ether_to_sgml(ether, doc_id,config=None): output = re.sub("%%[^%]+%%", "none", output) + # fix tags that look like elt__2__ if it still gives correct sgml + output = deunique_properly_nested_tags(output) + return output @@ -605,34 +819,46 @@ def delete_spreadsheet(ether_url, name): :param name: name of the spreadsheet (last part of URL) :return: void """ - - ether_command = "curl --netrc -X DELETE " + ether_url + "_/" + name - del_proc = subprocess.Popen(ether_command,shell=True) - - (stdout, stderr) = del_proc.communicate() - - return stdout, stderr + try: + r = requests.delete(ether_url + "_/" + name) + except: + pass def sheet_exists(ether_path, name): return len(get_socialcalc(ether_path,name)) > 0 -def get_socialcalc(ether_path, name): +def get_socialcalc(ether_path, name, doc_id=None, dirty=True): + """ + Get SocialCalc format serialization for an EtherCalc spreadsheet, or a cached serialization from the sqlite + DB is available for a specified doc_id + + :param ether_path: The EtherCalc server base URL, e.g. http://server.com/ethercalc/ + :param name: spreadsheet name, e.g. gd_corpname_docname + :param doc_id: optional doc_id in docs table to fetch/set SocialCalc from cache + :return: SocialCalc string + """ + + if doc_id is not None and not dirty: + cache = get_cache(doc_id)[0][0] + if cache is not None: + return cache command = "curl --netrc -X GET " + ether_path + "_/" + name proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() - return stdout.decode("utf8") + socialcalc = stdout.decode("utf8") + if doc_id is not None: + set_cache(doc_id, socialcalc) + return socialcalc def get_timestamps(ether_path): - command = "curl --netrc -X GET " + ether_path + "_roomtimes" - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - times = json.loads(stdout) + r = requests.get(ether_path + "_roomtimes") + times = r.json() output = {} for room in times: - output[room.replace("timestamp-","")] = times[room] + output[room.replace("timestamp-", "")] = times[room] return output diff --git a/modules/gitdox_sql.py b/modules/gitdox_sql.py index 014b400..b0eb2ac 100755 --- a/modules/gitdox_sql.py +++ b/modules/gitdox_sql.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- """ Data access functions to read from and write to the SQLite backend. @@ -31,7 +31,7 @@ def setup_db(): #docs table cur.execute('''CREATE TABLE IF NOT EXISTS docs - (id INTEGER PRIMARY KEY AUTOINCREMENT, name text, corpus text, status text,assignee_username text ,filename text, content text, mode text, schema text, validation text, timestamp text)''') + (id INTEGER PRIMARY KEY AUTOINCREMENT, name text, corpus text, status text,assignee_username text ,filename text, content text, mode text, schema text, validation text, timestamp text, cache text)''') #metadata table cur.execute('''CREATE TABLE IF NOT EXISTS metadata (docid INTEGER, metaid INTEGER PRIMARY KEY AUTOINCREMENT, key text, value text, corpus_meta text, UNIQUE (docid, metaid) ON CONFLICT REPLACE, UNIQUE (docid, key) ON CONFLICT REPLACE)''') @@ -49,7 +49,24 @@ def create_document(doc_id, name, corpus, status, assigned_username, filename, c (int(doc_id), name, corpus, status, assigned_username, filename, content, schema)) -def generic_query(sql, params): +def get_cache(doc_id): + try: + cache = generic_query("SELECT cache FROM docs WHERE id = ?;",(doc_id,)) + except sqlite3.Error as err: # Old schema without cache column + generic_query("ALTER TABLE docs ADD COLUMN cache TEXT default null;",None) + cache = generic_query("SELECT cache FROM docs WHERE id = ?;",(doc_id,)) + return cache + + +def set_cache(doc_id, cache_contents): + try: + generic_query("UPDATE docs SET cache = ? WHERE id = ?",(cache_contents,doc_id)) + except sqlite3.Error as err: # Old schema without cache column + generic_query("ALTER TABLE docs ADD COLUMN cache TEXT default null;",None) + generic_query("UPDATE docs SET cache = ? WHERE id = ?",(cache_contents,doc_id)) + + +def generic_query(sql, params, return_new_id=False): # generic_query("DELETE FROM rst_nodes WHERE doc=? and project=?",(doc,project)) dbpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "gitdox.db" @@ -62,8 +79,11 @@ def generic_query(sql, params): else: cur.execute(sql) - rows = cur.fetchall() - return rows + if return_new_id: + return cur.lastrowid + else: + rows = cur.fetchall() + return rows def invalidate_doc_by_name(doc,corpus): @@ -116,56 +136,16 @@ def cell(text): text = str(text) return "\n " + text + "" -def print_meta(doc_id, corpus=False): - meta = get_doc_meta(doc_id, corpus=corpus) - if meta is None: - meta = [] - # docid,metaid,key,value - four cols - metaid_id = "metaid" if not corpus else "corpus_metaid" - table_id = "meta_table" if not corpus else "meta_table_corpus" - table=''' - - - - - - - - """ - for item in meta: - # Each item appears in one row of the table - row = "\n " - metaid = str(item[1]) - ('metaid:'+str(metaid)) - id = str(doc_id) - for i in item[2:-1]: - cell_contents = cell(i) - cell_contents = re.sub(r'(" - table += row - table += "\n\n
)(https?://[^ <>]+)',r'\1\2',cell_contents) - row += cell_contents - - # delete meta - metaid_code="""
""" - - button_delete="" - button_delete+=metaid_code - row += cell(button_delete) - row += "\n
\n" - return table - - def save_meta(doc_id,key,value,corpus=False): if corpus: _, corpus_name, _, _, _, _, _ = get_doc_info(doc_id) - generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)", (None,key, value,corpus_name)) + new_id = generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)", (None,key, value,corpus_name), return_new_id = True) else: - generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)",(doc_id,key,value,None)) + new_id = generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)",(doc_id,key,value,None), return_new_id = True) invalidate_doc_by_id(doc_id) + return new_id + def delete_meta(metaid, doc_id, corpus=False): generic_query("DELETE FROM metadata WHERE metaid=?", (metaid,)) if not corpus: @@ -178,10 +158,14 @@ def get_doc_info(doc_id): else: return res +def get_doc_content(doc_id): + res = generic_query("SELECT content FROM docs WHERE id=?", (int(doc_id),)) + return res[0][0] + def get_all_docs(corpus=None, status=None): if corpus is None: if status is None: - return generic_query("SELECT id, name, corpus, mode, content FROM docs", None) + return generic_query("SELECT id, name, corpus, mode, content FROM docs", None) else: return generic_query("SELECT id, name, corpus, mode, content FROM docs where status=?", (status,)) else: @@ -197,25 +181,42 @@ def get_doc_meta(doc_id, corpus=False): _, corpus_name, _, _, _, _, _ = fields return generic_query("SELECT * FROM metadata WHERE corpus_meta=? ORDER BY key COLLATE NOCASE",(corpus_name,)) else: - return None + return [] else: return generic_query("SELECT * FROM metadata WHERE docid=? ORDER BY key COLLATE NOCASE", (int(doc_id),)) def get_corpora(): return generic_query("SELECT DISTINCT corpus FROM docs ORDER BY corpus COLLATE NOCASE", None) -def get_validate_rules(): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate", None) +def get_validate_rules(sort=None, domain=None): + query = "SELECT corpus, doc, domain, name, operator, argument, id FROM validate" + args = [] + if domain: + query += " WHERE domain=? " + args.append(domain) + if sort: + query += " ORDER BY " + sort + return generic_query(query, args) + +def get_xml_rules(): + return get_validate_rules(domain='xml') + +def get_meta_rules(): + return get_validate_rules(domain='meta') + +def get_ether_rules(): + return get_validate_rules(domain='ether') -def get_sorted_rules(sort): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate ORDER BY " + sort, None) # parameterization doesn't work for order by +def get_export_rules(): + return get_validate_rules(domain='export') def create_validate_rule(doc, corpus, domain, name, operator, argument): - generic_query("INSERT INTO validate(doc,corpus,domain,name,operator,argument) VALUES(?,?,?,?,?,?)", (doc, corpus, domain, name, operator, argument)) + new_id = generic_query("INSERT INTO validate(doc,corpus,domain,name,operator,argument) VALUES(?,?,?,?,?,?)", (doc, corpus, domain, name, operator, argument), return_new_id = True) if domain == "meta": invalidate_doc_by_name("%","%") else: invalidate_ether_docs("%","%") + return new_id def delete_validate_rule(id): generic_query("DELETE FROM validate WHERE id=?", (int(id),)) diff --git a/modules/jtable_create_rule.py b/modules/jtable_create_rule.py deleted file mode 100755 index 8c1f048..0000000 --- a/modules/jtable_create_rule.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def create_rule(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - doc = parameter.getvalue("doc") - corpus = parameter.getvalue("corpus") - domain = parameter.getvalue("domain") - name = parameter.getvalue("name") - operator = parameter.getvalue("operator") - argument = parameter.getvalue("argument") - - create_validate_rule(doc,corpus,domain,name,operator,argument) - - new_json_rule = {} - new_json_rule['doc'] = doc - new_json_rule['corpus'] = corpus - new_json_rule['domain'] = domain - new_json_rule['name'] = name - new_json_rule['operator'] = operator - new_json_rule['argument'] = argument - - jtable_result['Result'] = 'OK' - jtable_result['Record'] = new_json_rule - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_create_rule.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print create_rule() \ No newline at end of file diff --git a/modules/jtable_delete_rule.py b/modules/jtable_delete_rule.py deleted file mode 100755 index 0131d74..0000000 --- a/modules/jtable_delete_rule.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def delete_rule(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - id = parameter.getvalue("id") - - delete_validate_rule(id) - - jtable_result['Result'] = 'OK' - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_delete_rule.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print delete_rule() \ No newline at end of file diff --git a/modules/jtable_rule_list.py b/modules/jtable_rule_list.py deleted file mode 100755 index 8ed48ec..0000000 --- a/modules/jtable_rule_list.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def list_rules(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - sort = parameter.getvalue("jtSorting") - if sort is not None: - rules = get_sorted_rules(sort) - else: - rules = get_validate_rules() - json_rules = [] - for rule in rules: - new_json_rule = {} - new_json_rule['corpus'] = rule[0] - new_json_rule['doc'] = rule[1] - new_json_rule['domain'] = rule[2] - new_json_rule['name'] = rule[3] - new_json_rule['operator'] = rule[4] - new_json_rule['argument'] = rule[5] - new_json_rule['id'] = rule[6] - json_rules.append(new_json_rule) - jtable_result['Result'] = 'OK' - jtable_result['Records'] = json_rules - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_rule_list.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print list_rules() \ No newline at end of file diff --git a/modules/jtable_update_rule.py b/modules/jtable_update_rule.py deleted file mode 100755 index 6cf91f1..0000000 --- a/modules/jtable_update_rule.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def update_rules(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - doc = parameter.getvalue("doc") - corpus = parameter.getvalue("corpus") - domain = parameter.getvalue("domain") - name = parameter.getvalue("name") - operator = parameter.getvalue("operator") - argument = parameter.getvalue("argument") - id = parameter.getvalue("id") - - update_validate_rule(doc,corpus,domain,name,operator,argument,id) - - jtable_result['Result'] = 'OK' - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_update_rule.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print update_rules() \ No newline at end of file diff --git a/modules/renderer.py b/modules/renderer.py new file mode 100644 index 0000000..ad892d1 --- /dev/null +++ b/modules/renderer.py @@ -0,0 +1,39 @@ +import platform +import os +from modules.configobj import ConfigObj +from paths import get_menu +from pystache.renderer import Renderer + +if platform.system() == "Windows": + prefix = "transc\\" +else: + prefix = "" +rootpath = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + os.sep +userdir = rootpath + "users" + os.sep +config = ConfigObj(userdir + 'config.ini') + +def render(template_name, variables, template_dir='templates', file_ext=".mustache"): + """ + Render a mustache template given a dict representing its variables. + + Args: + template_name (str): the name of the template to be rendered + variables (dict): a string -> any dict holding values of variables used in the template + template_dir (str): the template directory, relative to the GitDox root directory. + Defaults to 'templates' + file_ext (str): the file extension of templates. Defaults to '.mustache' + + Returns: + str: rendered HTML. + """ + template_dir = prefix + template_dir + + # load Mustache templates so we can reference them in our large templates + templates = dict([(filename[:-len(file_ext)], open(template_dir + os.sep + filename, 'r').read()) + for filename in os.listdir(template_dir) + if filename.endswith(file_ext)]) + renderer = Renderer(partials=templates) + + variables['skin_stylesheet'] = config['skin'] + variables['navbar_html'] = get_menu() + return renderer.render_path(template_dir + os.sep + template_name + file_ext, variables) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py deleted file mode 100755 index a0eb829..0000000 --- a/modules/validate_spreadsheet.py +++ /dev/null @@ -1,438 +0,0 @@ -#!/usr/bin/python -# -*- coding: UTF-8 -*- - -from gitdox_sql import * -from ether import get_socialcalc, make_spreadsheet, exec_via_temp, get_timestamps -from collections import defaultdict -import re -import cgi -import json - - -class Cell: - def __init__(self, col, row, content, span): - self.col = col - self.row = row - self.header = "" - self.content = content - self.span = span - - -def highlight_cells(cells, ether_url, ether_doc_name): - old_ether = get_socialcalc(ether_url, ether_doc_name) - old_ether_lines = old_ether.splitlines() - new_ether_lines = [] - - old_color_numbers = [] - new_color_number = '1' - for line in old_ether_lines: - color_line = re.match(r'color:(\d+):(rgb.*$)', line) - if color_line is not None: - if color_line.group(2) == 'rgb(242, 242, 142)': - old_color_numbers.append(color_line.group(1)) - else: - new_color_number = str(1 + int(color_line.group(1))) - if len(old_color_numbers) > 0: - new_color_number = old_color_numbers[0] - - for line in old_ether_lines: - - parts = line.split(":") - # Check for pure formatting cells, e.g. cell:K15:f:1 - if len(parts) == 4: - if parts[2] == "f": # Pure formatting cell, no content - continue - - parsed_cell = re.match(r'cell:([A-Z]+\d+)(:.*)$', line) - if parsed_cell is not None: - col_row = parsed_cell.group(1) - other = parsed_cell.group(2) - bg = re.search(r':bg:(\d+)($|:)', other) - if bg is not None: - bg = bg.group(1) - - if col_row in cells: - if bg is not None: - if bg != new_color_number: - new_line = re.sub(r':bg:' + bg, r':bg:' + new_color_number, line) - else: - new_line = line - else: - new_line = line + ':bg:' + new_color_number - else: - if bg is not None: - if bg in old_color_numbers: - new_line = re.sub(r':bg:' + bg, r'', line) - else: - new_line = line - else: - new_line = line - new_ether_lines.append(new_line) - elif re.match(r'sheet:', line) is not None: - new_ether_lines.append(line) - if new_color_number not in old_color_numbers: - new_ether_lines.append('color:' + new_color_number + ':rgb(242, 242, 142)') - else: - new_ether_lines.append(line) - - new_ether = '\n'.join(new_ether_lines) - make_spreadsheet(new_ether, ether_url + "_/" + ether_doc_name, "socialcalc") - - -def validate_all_docs(): - docs = generic_query("SELECT id, name, corpus, mode, schema, validation, timestamp FROM docs", None) - doc_timestamps = get_timestamps(ether_url) - reports = {} - - for doc in docs: - doc_id, doc_name, corpus, doc_mode, doc_schema, validation, timestamp = doc - if doc_mode == "ether": - ether_name = "_".join(["gd",corpus,doc_name]) - if ether_name in doc_timestamps and validation is not None and len(validation) > 0: - if timestamp == doc_timestamps[ether_name]: - reports[doc_id] = json.loads(validation) - else: - reports[doc_id] = validate_doc(doc_id) - update_validation(doc_id, json.dumps(reports[doc_id])) - update_timestamp(doc_id, doc_timestamps[ether_name]) - else: - reports[doc_id] = validate_doc(doc_id) - #reports[doc_id] = {"ether":"sample_ether","meta":"sample_meta"} - update_validation(doc_id, json.dumps(reports[doc_id])) - if ether_name in doc_timestamps: - update_timestamp(doc_id, doc_timestamps[ether_name]) - elif doc_mode == "xml": - if validation is None: - reports[doc_id] = validate_doc_xml(doc_id, doc_schema) - try: - validation_report = json.dumps(reports[doc_id]) - except UnicodeDecodeError: - reports[doc_id]["xml"] = "UnicodeDecodeError; unable to print XML validation report for " + doc_name - validation_report = json.dumps(reports[doc_id]) - update_validation(doc_id,validation_report) - else: - reports[doc_id] = json.loads(validation) - - return json.dumps(reports) - - -def validate_doc(doc_id, editor=False): - doc_info = get_doc_info(doc_id) - doc_name = doc_info[0] - doc_corpus = doc_info[1] - - ether_doc_name = "gd_" + doc_corpus + "_" + doc_name - ether = get_socialcalc(ether_url, ether_doc_name) - parsed_ether = parse_ether(ether, doc_name, doc_corpus) - meta = get_doc_meta(doc_id) - - ether_report = '' - meta_report = '' - cells = [] - - rules = get_validate_rules() - for rule in rules: - rule_applies = True - rule_corpus = rule[0] - rule_doc = rule[1] - rule_domain = rule[2] - if rule_corpus is not None: - if re.search(rule_corpus, doc_corpus) is None: - rule_applies = False - if rule_doc is not None: - if re.search(rule_doc, doc_name) is None: - rule_applies = False - - if rule_applies is True: - rule_report, rule_extra, rule_cells = apply_rule(rule, parsed_ether, meta) - cells += rule_cells - if editor is True and len(rule_extra) > 0: - new_report = """
""" + rule_report[:-5] + """ """ + "" + rule_extra + "" + "
" - else: - new_report = rule_report - - if rule_domain == "ether": - ether_report += new_report - elif rule_domain == "meta": - meta_report += new_report - - if editor == True: - highlight_cells(cells, ether_url, ether_doc_name) - - if editor is True: - full_report = ether_report + meta_report - if len(full_report) == 0: - full_report = "Document is valid!" - return full_report - else: - json_report = {} - if len(ether_report) == 0: - ether_report = "spreadsheet is valid" - if len(meta_report) == 0: - meta_report = "metadata is valid" - json_report['ether'] = ether_report - json_report['meta'] = meta_report - return json_report - - -def parse_ether(ether, doc, corpus): - ether_lines = ether.splitlines() - - # find col letter corresponding to col name - parsed = defaultdict(list) - rev_colmap = {} - colmap = {} - all_cells = [] - for line in ether_lines: - if line.startswith("cell:"): # Cell row - # A maximal row looks like this incl. span: cell:F2:t:LIRC2014_chw0oir:f:1:rowspan:289 - # A minimal row without formatting: cell:C2:t:JJ:f:1 - parts = line.split(":") - if len(parts) > 3: # Otherwise invalid row - cell_id = parts[1] - cell_row = cell_id[1:] - cell_col = cell_id[0] - # We'd need something like this to support more than 26 cols, i.e. columns AA, AB... - #for c in cell_id: - # if c in ["0","1","2","3","4","5","6","7","8","9"]: - # cell_row += c - # else: - # cell_col += c - cell_content = parts[3].replace("\\c",":") - if "rowspan:" in line: - cell_span = parts[-1] - else: - cell_span = "1" - if cell_row == "1": # Header row - colmap[cell_content] = cell_col - rev_colmap[cell_col] = cell_content - all_cells.append(Cell(cell_col,cell_row,cell_content,cell_span)) - - for cell in all_cells: - try: - cell.header = rev_colmap[cell.col] - except KeyError: - raise KeyError("KeyError: " + cell.col + "; Document: " + corpus + " :: " + doc + "") - - parsed[cell.header].append(cell) - - parsed["__colmap__"] = colmap # Save colmap for apply_rule - return parsed - - -def apply_rule(rule, parsed_ether, meta): - domain = rule[2] - name = rule[3] - operator = rule[4] - argument = rule[5] - - report = '' - extra = '' - cells = [] - - if name is None: - return report, extra, cells - - if domain == "ether": - - if operator in ["~", "|", "exists"]: - - # find col letter corresponding to col name - if name in parsed_ether: - col = parsed_ether[name] - else: - if operator in ["|","exists"]: - report += "Column named " + name + " not found
" - return report, extra, cells - - for cell in col: - if cell.row != "1": - if operator == "|": # rowspan - if argument == "1": - if cell.span != "1": - report += "Cell " + cell.col + cell.row + ": row span is not 1
" - cells.append(cell.col + cell.row) - else: - if cell.span != "" and cell.span is not None: - report += "Cell " + cell.col + cell.row + ": row span is not " + argument + "
" - cells.append(cell.col + cell.row) - - elif operator == "~": # regex - match = re.search(argument, cell.content) - if match is None: - report += "Cell " + cell.col + cell.row + ": content does not match pattern
" - extra += "Cell " + cell.col + cell.row + ":
" + "Content: " + cell.content + "
" + "Pattern: " + argument + "
" - cells.append(cell.col + cell.row) - - elif operator in ["=", ">","=="]: # care about two cols: name and argument - - # find col letters corresponding to col names - name_letter = parsed_ether["__colmap__"][name] if name in parsed_ether["__colmap__"] else None - arg_letter = parsed_ether["__colmap__"][argument] if argument in parsed_ether["__colmap__"] else None - if name_letter is None: - if operator != "==": - report += "Column named " + name + " not found
" - return report, extra, cells - if arg_letter is None: - if operator != "==": - report += "Column named " + argument + " not found
" - return report, extra, cells - - name_boundaries = [] - arg_boundaries = [] - name_content = {} - arg_content = {} - name_filled = [] - arg_filled = [] - - # find boundary rows - for cell in parsed_ether[name]: - name_boundaries.append(cell.row) - name_content[cell.row] = cell.content - for i in range(int(cell.row), int(cell.row) + int(cell.span)): - name_filled.append(str(i)) - for cell in parsed_ether[argument]: - arg_boundaries.append(cell.row) - arg_content[cell.row] = cell.content - for i in range(int(cell.row), int(cell.row) + int(cell.span)): - arg_filled.append(str(i)) - - if operator == "==": - for row in name_content: - if row in arg_content: - if arg_content[row] != name_content[row]: - cells.append(arg_letter + row) - for boundary in arg_boundaries: - if boundary not in name_boundaries: - cells.append(arg_letter + boundary) - else: - for boundary in name_boundaries: - if boundary not in arg_boundaries: - if boundary in arg_filled: - report += "Span break on line " + boundary + " in column " + name + " but not " \ - + argument + "
" - cells.append(name_letter + boundary) - if operator == "=": - for boundary in arg_boundaries: - if boundary not in name_boundaries: - if boundary in name_filled: - cells.append(arg_letter + boundary) - - elif domain == "meta": - meta_report, meta_extra = apply_meta_rule(rule, meta) - report += meta_report - extra += meta_extra - - return report, extra, cells - - -def apply_meta_rule(rule, meta): - name = rule[3] - operator = rule[4] - argument = rule[5] - report = '' - extra = '' - if operator == "~": - for metadatum in meta: - if metadatum[2] == name: - value = metadatum[3] - match = re.search(argument, value) - if match is None: - report += "Metadata for " + name + " does not match pattern" + "
" - extra += "Metadata: " + value + "
" + "Pattern: " + argument + "
" - elif operator == "exists": - exists = False - for metadatum in meta: - if metadatum[2] == name: - exists = True - break - if exists is False: - report += "No metadata for " + name + '
' - return report, extra - - -def validate_doc_xml(doc_id, schema, editor=False): - xml_report = '' - # xml validation - if schema == "--none--": - xml_report += "No schema
" - else: - command = "xmllint --htmlout --schema " + "../schemas/" + schema + ".xsd" + " tempfilename" - xml = generic_query("SELECT content FROM docs WHERE id=?", (doc_id,))[0][0] - out, err = exec_via_temp(xml.encode("utf8"), command) - err = err.strip() - err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") - err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) - err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) - err = re.sub(r'\n','
',err) - xml_report += err + "
" - - # metadata validation - meta_report = '' - meta_rules = generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'meta'", None) - meta = get_doc_meta(doc_id) - doc_info = get_doc_info(doc_id) - doc_name = doc_info[0] - doc_corpus = doc_info[1] - for rule in meta_rules: - rule_applies = True - rule_corpus = rule[0] - rule_doc = rule[1] - if rule_corpus is not None: - if re.search(rule_corpus, doc_corpus) is None: - rule_applies = False - if rule_doc is not None: - if re.search(rule_doc, doc_name) is None: - rule_applies = False - if rule_applies is True: - rule_report, rule_extra = apply_meta_rule(rule, meta) - if editor is True and len(rule_extra) > 0: - meta_report += """
""" + rule_report[ - :-5] + """ """ + "" + rule_extra + "" + "
" - else: - meta_report += rule_report - - # report - if editor is True: - try: - #full_report = xml_report.decode("utf8") + meta_report.decode("utf8") - full_report = xml_report + meta_report - except Exception as e: - full_report = "[Encoding error: " + str(e) + "]" - if len(full_report) == 0: - full_report = "Document is valid!" - return full_report - else: - json_report = {} - if len(xml_report) == 0: - ether_report = "xml is valid" - if len(meta_report) == 0: - meta_report = "metadata is valid" - json_report['xml'] = xml_report - json_report['meta'] = meta_report - return json_report - - -if __name__ == "__main__": - if __name__ == '__main__' and __package__ is None: - from os import sys, path - - sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - from paths import ether_url - else: - from ..paths import ether_url - - parameter = cgi.FieldStorage() - doc_id = parameter.getvalue("doc_id") - mode = parameter.getvalue("mode") - schema = parameter.getvalue("schema") - - if doc_id == "all": - print "Content-type:application/json\n\n" - print validate_all_docs().encode("utf8") - else: - print "Content-type:text/html\n\n" - if mode == "ether": - print validate_doc(doc_id, editor=True).encode("utf8") - elif mode == "xml": - print validate_doc_xml(doc_id, schema, editor=True).encode("utf8") \ No newline at end of file diff --git a/modules/validation/__init__.py b/modules/validation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modules/validation/ether_validator.py b/modules/validation/ether_validator.py new file mode 100644 index 0000000..2801d0b --- /dev/null +++ b/modules/validation/ether_validator.py @@ -0,0 +1,331 @@ +from validator import Validator +from collections import defaultdict +import re + + +class EtherValidator(Validator): + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.domain = rule[2] + self.name = rule[3] + self.operator = rule[4] + self.argument = rule[5] + + + def _apply_exists(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + if len(col_letters) == 0: + report += "Column named '" + self.name + "' not found
" + return report, tooltip, cells + + def _apply_doesntexist(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + if len(col_letters) > 0: + report += "Columns named '" + self.name + "' are not allowed
" + cells += [letter + "1" for letter in col_letters] + return report, tooltip, cells + + def _apply_span_equals_number(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + if len(col_letters) == 0: + report += "Column named " + self.name + " not found
" + return report, tooltip, cells + + for letter in col_letters: + for cell in parsed_ether[letter]: + if cell.row == "1": + continue + + if self.argument == "1": + if cell.span != "1": + report += "Cell " + cell.col + cell.row + ": span is not 1
" + cells.append(cell.col + cell.row) + else: + if cell.span != "" and cell.span != self.argument: + report += "Cell " + cell.col + cell.row + ": span is not " + self.argument + "
" + cells.append(cell.col + cell.row) + return report, tooltip, cells + + def _apply_regex(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + for letter in col_letters: + for cell in parsed_ether[letter]: + if cell.row == "1": + continue + match = re.search(self.argument, cell.content) + if match is None: + report += ("Cell " + cell.col + cell.row + + ": content does not match pattern " + self.argument + "
") + tooltip += ("Cell " + cell.col + cell.row + ":
" + + "Content: " + cell.content + "
" + + "Pattern: " + self.argument + "
") + cells.append(cell.col + cell.row) + return report, tooltip, cells + + def _binary_op_check_cols_exist(self, colmap): + name_letters = colmap[self.name] + arg_letters = colmap[self.argument] + + if len(name_letters) == 0: + if self.operator != "==": + return "Column named " + self.name + " not found
" + if len(arg_letters) == 0: + if self.operator != "==": + return "Column named " + self.argument + " not found
" + + return "" + + def _binary_op_setup(self, parsed_ether): + colmap = parsed_ether['__colmap__'] # name -> list of col letters + name_letters = colmap[self.name] + arg_letters = colmap[self.argument] + + name_tuples = defaultdict(list) + arg_tuples = defaultdict(list) + start_rows = defaultdict(list) + all_rows = [] + + for letter in name_letters: + for cell in parsed_ether[letter]: + start_rows[letter].append(cell.row) + # "de-merge" cell so we have an entry for every row in its span with its letter and content + for i in range(int(cell.span) or 1): + row = str(int(cell.row) + i) + name_tuples[row].append((letter, cell.content)) + all_rows.append(row) + + # same as above with arg_letters + for letter in arg_letters: + for cell in parsed_ether[letter]: + start_rows[letter].append(cell.row) + for i in range(int(cell.span) or 1): + row = str(int(cell.row) + i) + arg_tuples[row].append((letter, cell.content)) + if row not in all_rows: + all_rows.append(row) + + name_start_cells = [] + name_start_rows = set() # for O(1) lookup + for letter in name_letters: + name_start_cells += [(letter, row) for row in start_rows[letter]] + name_start_rows = name_start_rows.union(set(row for row in start_rows[letter])) + + arg_start_cells = [] + arg_start_rows = set() + for letter in arg_letters: + arg_start_cells += [(letter, row) for row in start_rows[letter]] + arg_start_rows = arg_start_rows.union(set(row for row in start_rows[letter])) + + return name_letters, arg_letters, name_tuples, arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, arg_start_cells, arg_start_rows + + def _apply_subspan(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + err = self._binary_op_check_cols_exist(colmap) + if err: + report += err + return report, tooltip, cells + + name_letters, arg_letters, name_tuples, \ + arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, \ + arg_start_cells, arg_start_rows = self._binary_op_setup(parsed_ether) + + for row in all_rows: + # check to see if all cells in rhs are contained within cells on lhs + if row in arg_tuples and row not in name_tuples: + for letter, _ in arg_tuples[row]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " must appear in the span of a cell in one of these columns: " + + ", ".join(name_letters) + "
") + + return report, tooltip, cells + + def _apply_equal_span_length(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + err = self._binary_op_check_cols_exist(colmap) + if err: + report += err + return report, tooltip, cells + + name_letters, arg_letters, name_tuples, \ + arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, \ + arg_start_cells, arg_start_rows = self._binary_op_setup(parsed_ether) + + for row in all_rows: + if row == "1": + continue + name_len = len(name_tuples[row]) + arg_len = len(arg_tuples[row]) + + if name_len > arg_len: + for letter, _ in name_tuples[row][arg_len:]: + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(arg_letters) + "
") + elif arg_len > name_len: + for letter, _ in arg_tuples[row][name_len:]: + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(name_letters) + "
") + + for letter, row in name_start_cells: + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(arg_letters) + "
") + + for letter, row in arg_start_cells: + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(name_letters) + "
") + + return report, tooltip, cells + + def _apply_equal_span_length_and_content(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + err = self._binary_op_check_cols_exist(colmap) + if err: + report += err + return report, tooltip, cells + + + name_letters, arg_letters, name_tuples, \ + arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, \ + arg_start_cells, arg_start_rows = self._binary_op_setup(parsed_ether) + + for row in all_rows: + if row == "1": + continue + + name_len = len(name_tuples[row]) + arg_len = len(arg_tuples[row]) + + if name_len > arg_len: + for letter, _ in name_tuples[row][arg_len:]: + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(arg_letters) + "
") + elif arg_len > name_len: + for letter, _ in arg_tuples[row][name_len:]: + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(name_letters) + "
") + + for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): + name_letter, name_content = name_tuples[row][i] + arg_letter, arg_content = arg_tuples[row][i] + + if arg_content != name_content and (row in start_rows[arg_letter] or row in start_rows[name_letter]): + cells.append(name_letter + row) + cells.append(arg_letter + row) + report += ("Cells " + name_letter + row + + " and " + arg_letter + row + + " must have equivalent content.
") + + for letter, row in name_start_cells: + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(arg_letters) + "
") + + for letter, row in arg_start_cells: + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(name_letters) + "
") + + return report, tooltip, cells + + def _apply_rule(self, parsed_ether): + if self.name is None: + return "", "", [] + + if self.operator == "exists": + return self._apply_exists(parsed_ether) + if self.operator == "doesntexist": + return self._apply_doesntexist(parsed_ether) + elif self.operator == "|": + return self._apply_span_equals_number(parsed_ether) + elif self.operator == "~": + return self._apply_regex(parsed_ether) + elif self.operator == ">": + return self._apply_subspan(parsed_ether) + elif self.operator == "=": + return self._apply_equal_span_length(parsed_ether) + elif self.operator == "==": + return self._apply_equal_span_length_and_content(parsed_ether) + else: + raise Exception("Unknown EtherCalc validation operator: '" + str(self.operator) + "'") + + + def validate(self, parsed_ether, doc_name, doc_corpus): + res = {"report": "", + "tooltip": "", + "cells": []} + + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return res, False + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return res, False + + report, tooltip, cells = self._apply_rule(parsed_ether) + + res['report'] += report + res['tooltip'] += tooltip + res['cells'] += cells + return res, True diff --git a/modules/validation/export_validator.py b/modules/validation/export_validator.py new file mode 100644 index 0000000..cb33425 --- /dev/null +++ b/modules/validation/export_validator.py @@ -0,0 +1,40 @@ +from validator import Validator +from ..ether import exec_via_temp, ExportConfig, ether_to_sgml +import re + +# TODO: would have been ideal to write this without any filesystem operations +class ExportValidator(Validator): + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.config = rule[3] + self.schema = rule[5] + + def validate(self, socialcalc, doc_id, doc_name, doc_corpus): + report = "" + + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return report, False + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return report, False + + export_data = ether_to_sgml(socialcalc, doc_id, config=self.config) + + schema = self.schema + command = "xmllint --schema schemas/" + schema + " tempfilename" + out, err = exec_via_temp(export_data, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = err.replace("<","<").replace(">",">") + err = re.sub(r'/tmp/[A-Za-z0-9_]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9_]+','XML schema ',err) + err = re.sub(r'\n','
',err) + if err.strip() == "XML schema validates": + report = "" + else: + report = "Problems with exporting with " + self.config \ + + " and validating with " + self.schema + ":
" + err.decode("utf8") + "
" + + return report, True diff --git a/modules/validation/legacy_xml_validator.py b/modules/validation/legacy_xml_validator.py new file mode 100644 index 0000000..f3cda1e --- /dev/null +++ b/modules/validation/legacy_xml_validator.py @@ -0,0 +1,26 @@ +from validator import Validator +from ..ether import exec_via_temp +import re + +# TODO: would have been ideal to write this without any filesystem operations +class LegacyXmlValidator(Validator): + def __init__(self, schema): + self.schema = schema + + def validate(self, doc): + report = "" + + if self.schema == '--none--': + return report + else: + schema = self.schema + command = "xmllint --htmlout --schema schemas/" + schema + ".xsd tempfilename" + out, err = exec_via_temp(doc, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) + err = re.sub(r'\n','
',err) + report += err + "
" + + return report diff --git a/modules/validation/meta_validator.py b/modules/validation/meta_validator.py new file mode 100644 index 0000000..22203ce --- /dev/null +++ b/modules/validation/meta_validator.py @@ -0,0 +1,55 @@ +from validator import Validator +import re + +class MetaValidator(Validator): + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.name = rule[3] + self.operator = rule[4] + self.argument = rule[5] + + def _apply_match(self, metadata): + report, tooltip = "", "" + for d in metadata: + if d[2] == self.name: + value = d[3] + match = re.search(self.argument, value) + if match is None: + report += "Metadata for " + self.name + " does not match pattern" + "
" + tooltip += "Metadata: " + value + "
" + "Pattern: " + self.argument + "
" + + return report, tooltip + + def _apply_exists(self, metadata): + report, tooltip = "", "" + exists = False + for d in metadata: + if d[2] == self.name: + exists = True + break + if exists is False: + report += "No metadata for " + self.name + '
' + + return report, tooltip + + def _apply_rule(self, metadata): + if self.operator == "~": + return self._apply_match(metadata) + elif self.operator == "exists": + return self._apply_exists(metadata) + else: + raise Exception("Unknown metadata validation operator: '" + str(self.operator) + "'") + + def validate(self, metadata, doc_name, doc_corpus): + out_dict = {"report":"", "tooltip":""} + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return {"report": "", "tooltip": ""}, False + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return {"report": "", "tooltip": ""}, False + + + report, tooltip = self._apply_rule(metadata) + return {"report": report, "tooltip": tooltip}, True diff --git a/modules/validation/validator.py b/modules/validation/validator.py new file mode 100644 index 0000000..87d472a --- /dev/null +++ b/modules/validation/validator.py @@ -0,0 +1,13 @@ +class Validator(object): + """ + Abstract class that all GitDox validations should inherit from. + When at all possible, all Validation classes should not produce any + side-effects: there should be no SQL queries, filesystem operations, + etc. caused by a validation. + + Conceptually, an instance of this class represents a single validation + "rule" against which a document will be checked. + """ + + def validate(self, doc, *args, **kwargs): + raise NotImplementedError diff --git a/modules/validation/xml_validator.py b/modules/validation/xml_validator.py new file mode 100644 index 0000000..ab6cd0a --- /dev/null +++ b/modules/validation/xml_validator.py @@ -0,0 +1,34 @@ +from validator import Validator +from ..ether import exec_via_temp +import re + +class XmlValidator(Validator): + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.schema = rule[3] + + def validate(self, doc, doc_name, doc_corpus): + report = "" + + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return report, False + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return report, False + + schema = self.schema + command = "xmllint --schema schemas/" + schema + " tempfilename" + out, err = exec_via_temp(doc, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = re.sub(r'/tmp/[A-Za-z0-9_]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9_]+','XML schema ',err) + err = re.sub(r'\n','
',err) + if err == "XML schema validates": + report = "" + else: + report = "Problems validating with " + self.schema + ":
" + err + "
" + + return report, True diff --git a/paths.py b/paths.py index aa5524c..de29905 100755 --- a/paths.py +++ b/paths.py @@ -7,14 +7,23 @@ else: prefix = "" +gitdox_root = os.path.dirname(os.path.realpath(__file__)) + # to use password authentication, use a netrc file called .netrc in the project root -ether_url = ConfigObj(prefix + "users" + os.sep + "config.ini")["ether_url"] -if not ether_url.endswith(os.sep): - ether_url += os.sep +try: + ether_url = ConfigObj(gitdox_root + os.sep + "users" + os.sep + "config.ini")["ether_url"] + if not ether_url.endswith(os.sep): + ether_url += os.sep +except KeyError: + ether_url = "" def get_menu(): config = ConfigObj(prefix + "users" + os.sep + "config.ini") + + if "banner" not in config: + return "" banner = config["banner"] + if banner.startswith("http"): # Web resource resp = requests.get(banner) return resp.text diff --git a/popupPage.html b/popupPage.html deleted file mode 100644 index 24dd039..0000000 --- a/popupPage.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - -
-field name (e.g., author):
- - -***options*** - -
-field value (e.g., Besa):
-
- -
- - - diff --git a/popupPageCorpus.html b/popupPageCorpus.html deleted file mode 100644 index 63ca339..0000000 --- a/popupPageCorpus.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - -
-field name (e.g., author):
- - -***options*** - -
-field value (e.g., Besa):
-
- -
- - - diff --git a/requirements.txt b/requirements.txt index 1f74461..e49aa9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ +six lxml requests github3.py==0.9.3 passlib +pystache diff --git a/schemas/gum_export.ini b/schemas/gum_export.ini index c80f13b..a2564a6 100644 --- a/schemas/gum_export.ini +++ b/schemas/gum_export.ini @@ -1,7 +1,7 @@ -aliases = """{"text_id":"text@id","s_type":"s@type","hi_rend":"hi@rend","p_n":"p","p@n":"p","sp_who":"sp@who","list_type":"list@type","item_n":"item@n","figure_rend":"figure@rend","date_when":"date@when","date_from":"date@from","date_to":"date@to","date_notBefore":"date@notBefore","date_notAfter":"date@notAfter","head_rend":"head@rend","incident_who":"incident@who","p_rend":"p@rend","ref_target":"ref@target","sp_who":"sp@who"}""" +aliases = """{"text_id":"text@id","s_type":"s@type","hi_rend":"hi@rend","p_n":"p","p@n":"p","sp_who":"sp@who","sp_whom":"sp@whom","list_type":"list@type","item_n":"item@n","figure_rend":"figure@rend","date_when":"date@when","date_from":"date@from","date_to":"date@to","date_notBefore":"date@notBefore","date_notAfter":"date@notAfter","head_rend":"head@rend","incident_who":"incident@who","p_rend":"p@rend","ref_target":"ref@target","table_rend":"table@rend","table_rows":"table@rows","table_cols":"table@cols","cell_rend":"cell@rend","note_place":"note@place","note_n":"note@n"}""" tok_annos = """["pos","lemma"]""" -no_content = """["head","sic","q","item","list","p","s","figure","caption","date","incident","sp","ref","w","hi","quote"]""" -priorities = """["sp","head","p","figure","caption","list","item","quote","s","q","hi","sic","date","incident","w","pos","lemma","tok"]""" +no_content = """["head","sic","q","item","list","p","note","s","figure","caption","date","incident","sp","ref","w","hi","quote"]""" +priorities = """["sp","table","row","cell","head","p","figure","caption","list","item","quote","s","q","hi","sic","date","incident","w","pos","lemma","tok"]""" export_all = False template = """ %%body%% diff --git a/schemas/scriptorium_tei.ini b/schemas/scriptorium_tei.ini index 90b6a5e..870ec67 100644 --- a/schemas/scriptorium_tei.ini +++ b/schemas/scriptorium_tei.ini @@ -1,11 +1,9 @@ -aliases = """{"norm_group":"phr","chapter_n":"div1@n","verse_n":"p@n","orig":"w","pos":"w@type","lemma":"w@lemma","coptic_sent":"s","orig_group":"phr","pb@xml_id":"pb@xml:id","pb_xml_id":"pb@xml:id","morph":"m","verse":"div@n","cb_n":"cb@n","lb_n":"lb@n","hi_rend":"hi@rend","translation":"s@style","p_n":"p","p@n":"p","lang":"m|w@xml:lang"}""" -priorities = """["div","ab","div1","div2","p","pb","cb","lb","s","term","phr","w","hi","m","tok"]""" +aliases = """{"norm_group":"phr","chapter_n":"div1@n","verse_n":"p@n","orig":"w","pos":"w@type","lemma":"w@lemma","coptic_sent":"s","orig_group":"phr","pb@xml_id":"pb@xml:id","pb_xml_id":"pb@xml:id","morph":"m","verse":"ab@n","cb_n":"cb@n","lb_n":"lb@n","vid_n":"div@type=textpart","verse":"ab","hi_rend":"hi@rend","translation":"s@style","p_n":"p","p@n":"p","lang":"m|w@xml:lang"}""" +priorities = """["div","ab","div1","div2","div","p","pb","cb","lb","s","term","phr","w","hi","m","tok"]""" milestones = """["pb","lb","cb"]""" no_content = """["w","m","s","phr","div","div1","ab","div2","p","cb","pb","lb","term","hi"]""" export_all= False -template = """ - - +template = """ @@ -85,4 +83,4 @@ template = """ -""" \ No newline at end of file +""" diff --git a/templates/admin.mustache b/templates/admin.mustache new file mode 100644 index 0000000..9f2f5d2 --- /dev/null +++ b/templates/admin.mustache @@ -0,0 +1,228 @@ + + + + GitDox - Admin + {{> shared_head_content }} + + + + + {{{ navbar_html }}} +
+ {{> header }} +
+

GitDox - Administration

+

+ administration and user management + | + back to document list +

+ + +
+

User Management

+

Select users to delete:

+ +

+ +
+ delete +
+
+ + +
+

Enter user info to create new user:

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
username
password
realname
email
admin +
git username
git password
use two-factor auth
+

+ +
+ save +
+
+ {{#user_creation_warning}} +
{{.}}
+ {{/user_creation_warning}} + +

Batch download

+

Download all documents

+
    +
  • Documents will be downloaded in a zip file
  • +
  • The format of each document will depend on its active mode: +
      +
    • Metadata is added to XML files in a wrapping tag <meta key="value">
    • +
    • Documents in XML mode are downloaded as .xml, as they appear in the editor
    • +
    • Documents in spreadsheet mode are downloaded as .sgml to preserve potential span hierarchy conflicts
    • +
    +
  • +
  • You can choose custom configurations for exporting spreadsheet data if .ini files are available in the schemas/ directory
  • +
+
Corpora to export:
+ + +

+
Filter by status:
+ + {{{ status_select_html }}} +

+
Extension for spreadsheet files:
+ +

+
Export configuration for spreadsheets:
+ +

+
+ download +
+ +

Batch upload

+

Import multiple spreadsheets data by uploading a zip archive with SGML files

+
    +
  • Document names are generated from file names inside the zip, without their extension (e.g. .sgml, .tt)
  • +
  • Metadata is taken from the <meta> element surrounding the document
  • +
  • Corpus name is taken from a metadatum corpus inside meta, else 'default_corpus'
  • +
  • Select XML mode to import into XML editor, or Spreadsheet to convert SGML spans into a new spreadsheet
  • +
+
+ + + + + + + + + + + + +
Mode: + +
+ +
+ +
+
+ {{#file_uploaded}} +
+ The file {{.}} was uploaded successfully +
+ {{/file_uploaded}} + {{#files_imported}} + Imported {{.}} files from archive +
+ {{/files_imported}} + +

Batch update DB

+

Execute multiple SQL updates, e.g. to assign documents to users from a list

+
    +
  • The uploaded file should be a tab delimited, two column text file
  • +
  • The first rwo contains the headers: +
    • in column 1, the criterion, one of 'corpus' or 'name' (=document name)
    • +
    • in column 2, the docs table column to update, e.g. 'assignee_username'
  • +
  • Subsequent rows give pairs of criterion-value, e.g. 'doc001 user1'
  • +
+
+ +
+ +
+ {{#sql_file_uploaded}} +
+ The file {{.}} was uploaded successfully +
+ {{/sql_file_uploaded}} + {{#sql_statements}} + Executed {{.}} DB updates +
+ {{/sql_statements}} + +
+
+ +

Database management

+
+ warning: this will wipe the database! +
+ +
+ init DB +
+
+
+
+ + diff --git a/templates/codemirror.html b/templates/codemirror.html deleted file mode 100644 index d626e9f..0000000 --- a/templates/codemirror.html +++ /dev/null @@ -1,329 +0,0 @@ - -
Save
**NLP** **github** - - diff --git a/templates/codemirror.mustache b/templates/codemirror.mustache new file mode 100644 index 0000000..e9bf87f --- /dev/null +++ b/templates/codemirror.mustache @@ -0,0 +1,480 @@ + + + + + + + + + + + + + +
+ +
+
+
+ Save +
+{{#nlp_service}} + {{#disabled}} +
{{{xml_button_html}}}
+
{{{spreadsheet_button_html}}}
+ {{/disabled}} + {{^disabled}} +
+ {{{xml_button_html}}} +
+
+ {{{spreadsheet_button_html}}} +
+ {{/disabled}} +{{/nlp_service}} + + + +{{#git_2fa}}{{/git_2fa}} +
Commit
+{{#git_commit_response}} +

{{.}} successful

+{{/git_commit_response}} + + + + + + + + + + + + + diff --git a/templates/editor.html b/templates/editor.html deleted file mode 100644 index b8e68ac..0000000 --- a/templates/editor.html +++ /dev/null @@ -1,139 +0,0 @@ - -**docname** - GitDox: Edit - - - - - - - - - - - - - - - - - - - - - - - - - -**navbar** -
- **header** -
-

GitDox: Edit

- **editor_help_link** -
- -
- - - - - - - - - - - - - -
- -
- - - - - -

Editor | back to document list

- - - - - - - - - - - - - - - - - - - - - - - -
Document Name: - -
-
Validate
Corpus Name: - -
-
-
-
-
Git Repo: -
-
XML Schema:**edit_schema**
Assigned to:**edit_assignee**
Status:**edit_status**
Mode:**edit_mode**
- - -**embedded_editor** - -

meta data

-**metadata** - -
Add document meta
- - - -

-**corpus_metadata** - -
Add corpus meta
- - - - - -
- - -
- - -
- -
-
- \ No newline at end of file diff --git a/templates/editor.mustache b/templates/editor.mustache new file mode 100644 index 0000000..9047876 --- /dev/null +++ b/templates/editor.mustache @@ -0,0 +1,205 @@ + + + {{docname}} - GitDox: Edit + {{> shared_head_content }} + + + + {{^first_load}} + + {{/first_load}} + + + {{{navbar_html}}} +
+ {{> header}} +
+

GitDox: Edit

+ {{{ editor_help_link_html }}} +
+ +
+ {{#doc_is_selected}} +
+
+ + + + + +

Editor | back to document list

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Document Name: + + {{#can_save}} +
+ +
+ {{/can_save}} +
+
Validate
+
Corpus Name: + + {{#can_save}} +
+ +
+ {{/can_save}} +
+
+
+
Git Repo: + + {{#can_save}} +
+ +
+ {{/can_save}} +
Assigned to: + +
Status: + +
Mode: + +
+ + {{#ether_mode}} + {{> ethercalc}} + {{/ether_mode}} + + {{^ether_mode}} + {{> codemirror}} + {{/ether_mode}} + +
    + + +
+
+
+
+ +
+
+ + {{#admin_gt_zero}} + + {{/admin_gt_zero}} + {{^admin_gt_zero}} + + {{/admin_gt_zero}} +
+ +
+ +
+ +
+ {{/doc_is_selected}} + {{^doc_is_selected}} +

No document selected | back to document list

+ {{/doc_is_selected}} +
+
+
+ + diff --git a/templates/ether.html b/templates/ether.html deleted file mode 100644 index 1d9b3ba..0000000 --- a/templates/ether.html +++ /dev/null @@ -1,19 +0,0 @@ - -
Save
**disabled_NLP** **github** - -

Export

- -

Generate XML from this spreadsheet using a stylesheet

-
- **stylesheet_select** - - -
- -

Upload

- -

Replace existing spreadsheet data by uploading an SGML or Excel file

-
- - -
\ No newline at end of file diff --git a/templates/ethercalc.mustache b/templates/ethercalc.mustache new file mode 100644 index 0000000..ca6a50c --- /dev/null +++ b/templates/ethercalc.mustache @@ -0,0 +1,63 @@ + + + + +
+ Save +
+ +{{#nlp_service}} + {{#disabled}} +
{{{xml_button_html}}}
+
{{{spreadsheet_button_html}}}
+ {{/disabled}} + {{^disabled}} +
+ {{{xml_button_html}}} +
+
+ {{{spreadsheet_button_html}}} +
+ {{/disabled}} +{{/nlp_service}} + + + + +{{#git_2fa}}{{/git_2fa}} +
Commit
+{{#git_commit_response}} +

{{.}} successful

+{{/git_commit_response}} + +

Export

+ +

Generate XML from this spreadsheet using a stylesheet

+
+ + + +
+ +

Upload

+ +

Replace existing spreadsheet data by uploading an SGML or Excel file

+
+ + +
diff --git a/templates/header.html b/templates/header.html deleted file mode 100644 index de32031..0000000 --- a/templates/header.html +++ /dev/null @@ -1,14 +0,0 @@ - diff --git a/templates/header.mustache b/templates/header.mustache new file mode 100644 index 0000000..1ab9b72 --- /dev/null +++ b/templates/header.mustache @@ -0,0 +1,18 @@ + diff --git a/templates/index.mustache b/templates/index.mustache new file mode 100644 index 0000000..d260043 --- /dev/null +++ b/templates/index.mustache @@ -0,0 +1,227 @@ + + + + GitDox: {{project}} + {{> shared_head_content}} + + + + + + {{{ navbar_html }}} +
+ + {{> header }} +
+

GitDox: Project {{project}}

+
+ {{user}} +
+ + +
+ + + admin + + + {{#admin_eq_three}} + + validation rules + + {{/admin_eq_three}} + +
+ validate +
+
+

For help getting started see the wiki

+
+
+ + +
Choose a corpus: + +
+
+
+
+
+ + {{#admin_gt_zero}} +
+ + New Document +
+ {{/admin_gt_zero}} + {{^admin_gt_zero}} +
+ + New Document +
+ {{/admin_gt_zero}} +
+
+ + {{#admin_gt_zero}} + + {{/admin_gt_zero}} + {{^admin_gt_zero}} + + {{/admin_gt_zero}} +
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + {{#docs}} + + + + + + + + + + + + + + + + + + + + {{/docs}} + +
idcorpusdocumentstatusassignedmodevalidateactions
{{id}}{{corpus}}{{name}}{{status}}{{assignee}} + {{#xml}} +   + {{/xml}} + {{#ether}} +   + {{/ether}} + {{#other_mode}} + ‑ + {{/other_mode}} + +
+ {{#xml}} +   + {{/xml}} + {{#ether}} +   + {{/ether}} +   + {{#ether}} +   + {{/ether}} +
+
+ +
+ edit +
+
+
+
+ + {{#admin_gt_zero}} + +
+ delete +
+ + {{/admin_gt_zero}} + {{^admin_gt_zero}} + +
+ delete +
+ + {{/admin_gt_zero}} +
+
+
+
+ + + diff --git a/templates/landing.html b/templates/landing.html deleted file mode 100644 index d20e5bd..0000000 --- a/templates/landing.html +++ /dev/null @@ -1,71 +0,0 @@ - - - - GitDox: **project** - - - - - - - - - - - - - - - - - **navbar** -
- **header** -
-

GitDox: Project **project**

-
- **user** -
- - -
- -
- - admin
-
- - **validation_rules** - -
- - validate
-
-

For help getting started see the wiki

-

-

- - -
Choose a corpus: - -
-
-

- -

-

- -
- - New Document -
-
-

- **table** -
-
- - - diff --git a/templates/shared_head_content.mustache b/templates/shared_head_content.mustache new file mode 100644 index 0000000..5af015e --- /dev/null +++ b/templates/shared_head_content.mustache @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/templates/user_admin.mustache b/templates/user_admin.mustache new file mode 100644 index 0000000..4037fbf --- /dev/null +++ b/templates/user_admin.mustache @@ -0,0 +1,55 @@ + + + + GitDox - Admin + {{> shared_head_content }} + + + + {{{ navbar_html }}} +
+ {{> header }} +
+

Coptic XML transcription editor

+

edit user info | back to document list

+ +
+

Change password

+ + + + + + + + + +
username{{user}}
new password
+ +
+
+

Note: after you've changed your password you'll need to log in using your new password.

+ + {{#admin_eq_one}} +
+ + + + + + + + + + + + + +
new git username
new git password
use two-factor auth
+ +
+ {{/admin_eq_one}} +
+
+ + diff --git a/templates/validation_rules.mustache b/templates/validation_rules.mustache new file mode 100644 index 0000000..ba94323 --- /dev/null +++ b/templates/validation_rules.mustache @@ -0,0 +1,33 @@ + + + + GitDox - Validation Rules + {{> shared_head_content}} + + + + {{{navbar_html}}} +
+ {{> header}} +
+

GitDox - Validation Rules

+

+ return to index +

+ +
    + + + + +
+
+
+
+
+
+
+
+
+ + diff --git a/users/config.ini b/users/config.ini index 73f6e69..c3bcb2c 100644 --- a/users/config.ini +++ b/users/config.ini @@ -3,15 +3,15 @@ templatedir = templates/login/ # path to the login template directory (absolute controltemplates = templates/control/ # path to the control template directory skin = css/gum.css project = GitDox # Specify your project name here -banner = header.html #note you can also use web addresses to retrieve a banner from your project page, e.g. http://myproject.org/nav.html +# banner = header.html #note you can also use web addresses to retrieve a banner from your project page, e.g. http://myproject.org/nav.html cookiepath = "" # the 'super-url' of the scripts - for the cookie. Can be '' if no other script in your domain uses cookies adminuser = admin # the login name who is the *main* administrator account. This one cannot be deleted. editor_help_link =

For help getting started see the GitDox website

xml_nlp_button = """I|'m Tokenize""" spreadsheet_nlp_button = """ NLP""" -xml_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/tt_tokenize -spreadsheet_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/api -ether_url = /ethercalc/ +xml_nlp_api = "" # URL to call for service modifying the XML mode data, e.g. a tokenizer: https://corpling.uis.georgetown.edu/coptic-nlp/tt_tokenize +spreadsheet_nlp_api = "" # URL to call for a service manipulating the data before conversion to spreadsheet mode, e.g. an NLP pipeline: https://corpling.uis.georgetown.edu/coptic-nlp/api +ether_url = http://localhost/ethercalc/ # nlp service credentials nlp_user = user diff --git a/validate.py b/validate.py new file mode 100755 index 0000000..97172df --- /dev/null +++ b/validate.py @@ -0,0 +1,307 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from collections import defaultdict +import re, sys +import cgi, cgitb +import json + +from paths import ether_url +from modules.gitdox_sql import * +from modules.ether import get_socialcalc, make_spreadsheet, exec_via_temp, get_timestamps, parse_ether +from modules.validation.legacy_xml_validator import LegacyXmlValidator +from modules.validation.xml_validator import XmlValidator +from modules.validation.meta_validator import MetaValidator +from modules.validation.ether_validator import EtherValidator +from modules.validation.export_validator import ExportValidator + + +def highlight_cells(cells, ether_url, ether_doc_name, doc_id=None, dirty=True): + old_ether = get_socialcalc(ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty) + old_ether_lines = old_ether.splitlines() + new_ether_lines = [] + + old_color_numbers = [] + new_color_number = '1' + for line in old_ether_lines: + color_line = re.match(r'color:(\d+):(rgb.*$)', line) + if color_line is not None: + if color_line.group(2) == 'rgb(242, 242, 142)': + old_color_numbers.append(color_line.group(1)) + else: + new_color_number = str(1 + int(color_line.group(1))) + if len(old_color_numbers) > 0: + new_color_number = old_color_numbers[0] + + for line in old_ether_lines: + + parts = line.split(":") + # Check for pure formatting cells, e.g. cell:K15:f:1 + if len(parts) == 4: + if parts[2] == "f": # Pure formatting cell, no content + continue + + parsed_cell = re.match(r'cell:([A-Z]+)(\d+)(:.*)$', line) + if parsed_cell is not None: + col = parsed_cell.group(1) + row = parsed_cell.group(2) + col_row = col + row + other = parsed_cell.group(3) + bg = re.search(r':bg:(\d+)($|:)', other) + if bg is not None: + bg = bg.group(1) + span = parts[-1] if "rowspan:" in line else "1" + + spanned_rows = [col + str(int(row) + x) for x in range(int(span))] + highlighted_spanned_rows = [x for x in spanned_rows if x in cells] + if len(highlighted_spanned_rows) > 0: + if bg is not None: + if bg != new_color_number: + new_line = re.sub(r':bg:' + bg, r':bg:' + new_color_number, line) + else: + new_line = line + else: + new_line = line + ':bg:' + new_color_number + else: + if bg is not None: + if bg in old_color_numbers: + new_line = re.sub(r':bg:' + bg, r'', line) + else: + new_line = line + else: + new_line = line + new_ether_lines.append(new_line) + elif re.match(r'sheet:', line) is not None: + new_ether_lines.append(line) + if new_color_number not in old_color_numbers: + new_ether_lines.append('color:' + new_color_number + ':rgb(242, 242, 142)') + else: + new_ether_lines.append(line) + + new_ether = '\n'.join(new_ether_lines) + make_spreadsheet(new_ether, ether_url + "_/" + ether_doc_name, "socialcalc") + + +def validate_doc_meta(doc_id, editor): + # metadata validation + report = {"report":"","tooltip":""} + rules = [MetaValidator(x) for x in get_meta_rules()] + + meta = get_doc_meta(doc_id) + doc_info = get_doc_info(doc_id) + doc_name = doc_info[0] + doc_corpus = doc_info[1] + meta_rule_fired = False + for rule in rules: + res, fired = rule.validate(meta, doc_name, doc_corpus) + meta_rule_fired = meta_rule_fired or fired + if editor and len(res['tooltip']) > 0: + report["tooltip"] += ("""
""" + + res['report'][:-5] + + """ """ + + "" + res['tooltip'] + "" + + "
") + else: + report["report"] += res['report'] + + if not meta_rule_fired: + report["report"] = "No applicable metadata rules
" + elif len(report["report"]) == 0: + report["report"] = "Metadata is valid
" + else: + report["report"] = "Metadata Problems:
" + report["report"] + + return report + +#@profile +def validate_doc_ether(doc_id, editor=False, dirty=True): + """ + Validate a document in spreadsheet mode + + :param doc_id: doc ID in the sqlite DB docs table + :param editor: boolean - is this being run by user from editor.py? + :param dirty: boolean - if spreadsheet already cached, has its SocialCalc changed since last recorded timestamp? + :return: dictionary with validation report + """ + ether_rules = [EtherValidator(x) for x in get_ether_rules()] + export_rules = [ExportValidator(x) for x in get_export_rules()] + + doc_info = get_doc_info(doc_id) + doc_name = doc_info[0] + doc_corpus = doc_info[1] + + ether_doc_name = "gd_" + doc_corpus + "_" + doc_name + socialcalc = get_socialcalc(ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty) + parsed_ether = parse_ether(socialcalc,doc_id=doc_id) + + report = '' + cells = [] + + # check metadata + meta_validation = validate_doc_meta(doc_id, editor) + + ether_rule_fired = False + for rule in ether_rules: + res, fired = rule.validate(parsed_ether, doc_name, doc_corpus) + ether_rule_fired = ether_rule_fired or fired + if editor and len(res['tooltip']) > 0: + report += ("""
""" + + res['report'][:-5] + + """ """ + + "" + res['tooltip'] + "" + + "
") + else: + report += res['report'] + cells += res['cells'] + if not ether_rule_fired: + report = "No applicable spreadsheet validation rules
" + elif report: + report = "Spreadsheet Problems:
" + report + else: + report = "Spreadsheet is valid
" + + export_report = "" + export_rule_fired = False + for rule in export_rules: + res, fired = rule.validate(socialcalc, doc_id, doc_name, doc_corpus) + export_rule_fired = export_rule_fired or fired + export_report += res + if not export_rule_fired: + export_report = "No applicable export validation rules
" + elif export_report: + export_report = "Export Problems:
" + export_report + else: + export_report = "Export is valid
" + + if editor: + highlight_cells(cells, ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty) + full_report = report + meta_validation["report"] + export_report + if len(full_report) == 0: + full_report = "Document is valid!" + return full_report + else: + json_report = {} + json_report['ether'] = report + json_report['meta'] = meta_validation["report"] + json_report['export'] = export_report + return json_report + +#@profile +def validate_doc_xml(doc_id, schema, editor=False): + rules = [XmlValidator(x) for x in get_xml_rules()] + + doc_info = get_doc_info(doc_id) + doc_name = doc_info[0] + doc_corpus = doc_info[1] + doc_content = get_doc_content(doc_id) + + # Schemas used to be assigned per document--do not support this anymore + #xml_report = LegacyXmlValidator(schema).validate(doc_content) + + xml_report = "" + xml_rule_fired = False + for rule in rules: + res, fired = rule.validate(doc_content, doc_name, doc_corpus) + xml_report += res + xml_rule_fired = xml_rule_fired or fired + if not xml_rule_fired: + xml_report = "No applicable XML schemas
" + elif xml_report: + xml_report = "XML problems:
" + xml_report + else: + xml_report = "XML is valid
" + + meta_validation = validate_doc_meta(doc_id, editor) + meta_report = meta_validation["report"] + + # report + if editor is True: + try: + #full_report = xml_report.decode("utf8") + meta_report.decode("utf8") + full_report = xml_report + meta_report + except Exception as e: + full_report = "[Encoding error: " + str(e) + "]" + + return full_report + else: + json_report = {} + + json_report['xml'] = xml_report + json_report['meta'] = meta_report + return json_report + +#@profile +def validate_all_docs(): + docs = generic_query("SELECT id, name, corpus, mode, schema, validation, timestamp FROM docs", None) + doc_timestamps = get_timestamps(ether_url) + reports = {} + + for doc in docs: + doc_id, doc_name, corpus, doc_mode, doc_schema, validation, timestamp = doc + if doc_mode == "ether": + ether_name = "_".join(["gd", corpus, doc_name]) + if ether_name in doc_timestamps and validation is not None and len(validation) > 0: + if timestamp == doc_timestamps[ether_name]: + reports[doc_id] = json.loads(validation) + else: + reports[doc_id] = validate_doc_ether(doc_id, dirty=True) + update_validation(doc_id, json.dumps(reports[doc_id])) + update_timestamp(doc_id, doc_timestamps[ether_name]) + else: + if ether_name in doc_timestamps: + new_time = doc_timestamps[ether_name] + else: + new_time = None + if new_time == timestamp: + reports[doc_id] = validate_doc_ether(doc_id, dirty=False) + else: + reports[doc_id] = validate_doc_ether(doc_id, dirty=True) + #reports[doc_id] = {"ether":"sample_ether","meta":"sample_meta"} + update_validation(doc_id, json.dumps(reports[doc_id])) + if ether_name in doc_timestamps: + update_timestamp(doc_id, doc_timestamps[ether_name]) + elif doc_mode == "xml": + if validation is None: + reports[doc_id] = validate_doc_xml(doc_id, doc_schema) + try: + validation_report = json.dumps(reports[doc_id]) + except UnicodeDecodeError: + reports[doc_id]["xml"] = "UnicodeDecodeError; unable to print XML validation report for " + doc_name + validation_report = json.dumps(reports[doc_id]) + update_validation(doc_id,validation_report) + else: + reports[doc_id] = json.loads(validation) + + return json.dumps(reports) + +if __name__ == "__main__": + + mode = "" + schema = "" + if len(sys.argv) > 1: + from argparse import ArgumentParser + p = ArgumentParser() + p.add_argument("-d","--doc",help="doc ID in gitdox.db or 'all'", default="all") + p.add_argument("-i","--invalidate",action="store_true",help="invalidate all documents before running validation") + + opts = p.parse_args() + doc_id = opts.doc + if opts.invalidate: + invalidate_doc_by_name("%","%") + if doc_id != "all": + _, _, _, _, _, mode, schema = get_doc_info(doc_id) + else: + parameter = cgi.FieldStorage() + doc_id = parameter.getvalue("doc_id") + mode = parameter.getvalue("mode") + schema = parameter.getvalue("schema") + + if doc_id == "all": + print "Content-type:application/json\n\n" + print validate_all_docs().encode("utf8") + else: + print "Content-type:text/html\n\n" + if mode == "ether": + print validate_doc_ether(doc_id, editor=True).encode("utf8") + elif mode == "xml": + print validate_doc_xml(doc_id, schema, editor=True).encode("utf8") diff --git a/validation_rules.py b/validation_rules.py index c21a203..6288666 100755 --- a/validation_rules.py +++ b/validation_rules.py @@ -1,125 +1,17 @@ #!/usr/bin/env python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- -import cgi, cgitb +import cgi, cgitb import os, platform from modules.logintools import login from modules.gitdox_sql import * from modules.configobj import ConfigObj +from modules.renderer import render from paths import get_menu -# Support IIS site prefix on Windows -if platform.system() == "Windows": - prefix = "transc\\" -else: - prefix = "" - -scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep -userdir = scriptpath + "users" + os.sep -templatedir = scriptpath + "templates" + os.sep -config = ConfigObj(userdir + 'config.ini') -skin = config["skin"] -project = config["project"] - def load_validation_rules(): - - page= "Content-type:text/html\r\n\r\n" - page+=""" - - - - - - - - - - - - - - - - - **navbar** -
- **header** -
-

GitDox - Validation

-

validation rule management | back to document list

- - - - - """ - - page+="""""" - - page+="""
""" - - - page+="
" - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) - page = page.replace("**project**",project) - page = page.replace("**skin**",skin) - - return page - + render_data = {} + return render("validation_rules", render_data) def open_main_server(): thisscript = os.environ.get('SCRIPT_NAME', '') @@ -130,6 +22,7 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) admin = userconfig["admin"] if admin == "3": + print "Content-type:text/html\r\n\r\n" print load_validation_rules() diff --git a/validation_rules_service.py b/validation_rules_service.py new file mode 100755 index 0000000..07d058f --- /dev/null +++ b/validation_rules_service.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import json +import cgi +import os +import platform +from modules.gitdox_sql import * +from modules.logintools import login + +parameter = cgi.FieldStorage() +action = parameter.getvalue("action") + +# for rules +doc = parameter.getvalue("doc") +corpus = parameter.getvalue("corpus") +domain = parameter.getvalue("domain") +name = parameter.getvalue("name") +operator = parameter.getvalue("operator") +argument = parameter.getvalue("argument") +id = parameter.getvalue("id") + +# for schemas +schema_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep + 'schemas' +extension = parameter.getvalue("extension") + +# for sorting +sort = parameter.getvalue("jtSorting") + +def row_to_dict(row): + return {'corpus': row[0], + 'doc': row[1], + 'domain': row[2], + 'name': row[3], + 'operator': row[4], + 'argument': row[5], + 'id': row[6]} + +def list_rules(): + resp = {} + try: + parameter = cgi.FieldStorage() + rules = get_validate_rules(sort=sort, domain=domain) + + json_rules = [row_to_dict(row) for row in rules] + resp['Result'] = 'OK' + resp['Records'] = json_rules + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while attempting to retrieve the list of rules.' + print json.dumps(resp) + +def create_rule(): + resp = {} + try: + id = create_validate_rule(doc, corpus, domain, name, operator, argument) + resp['Result'] = 'OK' + resp['Record'] = {'doc': doc, + 'corpus': corpus, + 'domain': domain, + 'name': name, + 'operator': operator, + 'argument': argument, + 'id': id} + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while attempting to create a new rule.' + print json.dumps(resp) + +def update_rule(): + resp = {} + try: + update_validate_rule(doc, corpus, domain, name, operator, argument, id) + resp['Result'] = 'OK' + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while attempting to update a rule.' + print json.dumps(resp) + +def delete_rule(): + resp = {} + try: + delete_validate_rule(id) + resp['Result'] = 'OK' + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while trying to delete a rule.' + print json.dumps(resp) + + +def list_schemas(): + resp = {} + try: + resp['Result'] = 'OK' + resp['Options'] = [{"DisplayText": x, "Value": x} + for x in os.listdir(schema_dir) + if x.endswith(extension)] + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while trying to list schemas.' + print json.dumps(resp) + +def open_main_server(): + thisscript = os.environ.get('SCRIPT_NAME', '') + loginaction = None + theform = cgi.FieldStorage() + scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + userdir = scriptpath + "users" + os.sep + loginaction, userconfig = login(theform, userdir, thisscript, loginaction) + user = userconfig["username"] + admin = userconfig["admin"] + + print "Content-type:application/json\r\n\r\n" + if action == "list": + list_rules() + elif action == "listschemas": + list_schemas() + elif user == "demo": + print json.dumps({'Result': 'Error', 'Message': 'Demo user may not make changes.'}) + elif action == "create": + create_rule() + elif action == "update": + update_rule() + elif action == "delete": + delete_rule() + else: + print json.dumps({'Result': 'Error', + 'Message': 'Unknown action: "' + str(action) + '"'}) + +if __name__ == '__main__': + open_main_server()