From 376c5f08cdc24ab67ed371ab49fab4f2c8522cac Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 18 Sep 2018 23:36:05 -0400 Subject: [PATCH 001/135] replace login logo with gitdox logo --- img/login_logo.png | Bin 4472 -> 2651 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/img/login_logo.png b/img/login_logo.png index b763ecfd2b5bc27de5ed7581692f8328feb38d60..2c560da589f5fc050f8ccb1c0056e15f3ac84d66 100644 GIT binary patch literal 2651 zcmchZc|4SB8^Mo-Jj2WKToc^o0GJpilmT` zkhC)z=?TShXw*xHL(iM-`5+X8<2{|u2|XD-@&!uvUA4#B3kkIp9^m7|pnQKU+BaTE zNIpR@gg=I9BtwkjjI{So4q{i%JdzGPGO#BaJvyVm@JAfAIAIs&i|k_zq5jEySXFZCV58Jr)A`BqJL?4Q?&fz+E_y}rgaSBqMte&a5Zj^>yLXg}~QOaSo5V0tgYi(Sdh+MtX6hE0t38fTXub`08*0 zXoVE=kfK%}rgs~9VADh2^VwG^t^gr)!B!x*KYO+%?vW~FsJirtS0Eqjka(fQ5<_;ly>m{}Qe6Jq(q zvjI4>hv10;8IS#an8zf?RX56}79=qYVHD?bPr4ezbu{lVVx&85cLXfbeKTB!_#pdG z?I)?;RpR4iw?meC&qbG$M=3dpR7I`LmM23C#M*Rkt1Z2Bbn1iR&cFdbS5@Pi# zEFwxTq4KT_mzJ}qpHr;k7jN|xiVTO^HY4`B#o9Ekqn+NVx4InHIaW=m+!CBdX=zSCrXBRPR$qrmPUZv1)4P2Jm%rtkHj_hT(VtV<0w)eskeGh6#4 zIhD)9EQEUzz(Ym>;dW^|ij0u?b~`Hh?#Y-c`iuyrx-y*V{fA{qsgzG{+or09EfEiO zpj$d8d5>b7%sJXTJ9sVSLI43wVoxbCj@m7aRw@j~N-tkgZS^_sK*6|>8ucURUbA8Q z$A>K|&lk=+Hoy(eA>^D65<@XzFbu=Cc*JzeGuN9G;f|PgH?4`A(k8F)x49v(C-z;D z4&copLeYG0&p!^z66g8Zf+V#+$0_xceHx^1KSgbB#XF0Ku1ckM1sxu~Cbk@Elc@Ad zsG4)>e49LLr=!^|c5K@6@5l3Kn)b=TgDXvNmr`h}rI*R`vNul-syT}Tr^p;QIzdq(?DCRUIXB1d=z1-&!j@DGG7X zZ+31XgXL*@gb1XP>y_5EllPsXQYko_!-yc)@vm)ho8JN(bGC+D#5w%V^}e8W<-1xu z=tPrkHs=Nk>AN?L3wyki{Ekkn&JDaNLtK7!LMQL*(ylOk*I~Tuh6z=SC?s)H&Embe zxH9!zVN29>+vjJ9_eqiT1Tkf+H>^Gdeq4<51RzYP5LpjuKTbK7_-pKT4{>8T_**)~ z;ZP}E)luhsm0>hh0lH{9t&gCx#}r%pegQcX>L0vt`Go#_SlHgq2EDT|QYVaVQ^eA7 z`+{F(_0emrAH}{ZY1=vq`wi*g6Mc7af6g1PMmMqZX1!@yozv9nz!wSnhQ|V}61|;L z$&vN;ZUIVevt>&%{vjQTrPt~g(U>a}3oFPhqfcQR!@#cX$)Ozfrn7azt?@jt77 zhT6}n647Nm=IZhDO!Qr2bGTLy`BsrS?RhR=IYuSlMKC&Y7A7XdCp=wP?5G&XLC8Fl z+sB&eh#zphXSGT-jq-gOY;SD%KFZ4E8Zxy=3OVy4cNy*n*grAnmNk$^a{Pnjiw#BY zy+w9?6OVUq4(RX-ZJjfB)v(A;2hk5bL}ku0Bx2Z@wUn~YrMB;F}I~yR+(oe*^lAJqr&hp+$V_*I4M|u;uKX?c>VT_)_f@C0r0tW7kyX z{l|HC+CD?SR=v37I@=U9iCAst3G228Ifg=U^2Y+F4I+Qr5d=dGocqgqw!mC zDflpuq*ce&Nh1mv)OZaZsrLU42yInmWjO<Y_^gC`nELB;C~1f z)DZCKLoWsh$=bWUN6{%qlguetUKm_-S7Fy8%CsmlIhnb>xp`wx^LU6XPm8C^idd-?KxrUXlud6^l*%*m&IM=w7je@6YybcN literal 4472 zcmV-;5r^)HP)x zNsnCDb^gw9->d3@Y_i!cijzcChoc}BA6Kf044_D;Y8%-L_`F0$Ku7z5s`Ng zmOwB|gu6QcVIcw$tE#)3SwwhvL};x8a+yR-91%>!#Lgb>A`%f1K}1B9ZvZGXm&HN> zAi^U8fdGMWM?~X|^UDCjLRD3C+_;-L>}%QM5Ln3+^m^dAc(}RH8Wur)$qu$2`#n<2ROnJ0R}UVAi!ZQSs zAw+MCR_%RUU_01bQ2LeDwI`<8ti0}v^2nzsr^N25|n~1{0 z$TD#hL`3eMcgspR+5j_qI0Z$7GIM4svoJmnARxj;3vm|McsnLGcOpcDh-C1XCF5~a zqY*(Y5&@uD6A@v-xPbsDLrBa-meDkB1sO+#5x*-Y6APDE5xi6rAE z?q)_zW=0@l8aGKGf|+^N`C=);l4X6gfegP#(4s%`#l*S9eABFj2MC(iTWgewi*Pp) z9rG9ngm4B0*`bkNwT!h=P4ty z)*`~)BD|CifQp(~R=PLO>=P3)Wz`CacIxi6Rc1E#1lln(5g@2Yi$xDH5wo{C3_})< zW|c4z0wR)Kl96z8DKhCMiIf1sor%EQm11tfk`1S#KD0KQ&vy>%{(SYqdbc_apw;Tt zhJ*RcgN3B13bzOXhB|y+GvO9iN)gqBl3AU(TQ(UpDKlrNL`7KKot%Q5m=Td}bkus; z5E+23bcnEK!;^!(dr!CCzrSmmfmz%w>ySSFr@!LG3q)>aZmqS1-#}!5wG<|5 z&6>6B(yr@h4 zaJTNyooVHTlc&FOe*Klx{o1s7i-@9SGMTj2YOAfa2&e4y*67y1emmEP5#5mpmylIT z=i!K$Or{Z0Yn{($S)+TmwjcfE(}(ZgsRsiqAG>h!%v+aEed&x$%IDCVt+t?G zuRnV5^Sj$0Jur)ITGp;?o_*`$>V>snq%f#$Xl7Y>s+ujRs+tXdgpp=ew4@a1N|$8l zZmqR^UY*s)KfUw#N4E~|Y*!FZ%f{=c&wTyz>bVnAbW|mvkw4rQWP#~(tJBSum5V1n z{r>dM4{jbF4EyihefH$w((k{ze)$v>tu-GxKb6I}Bvn;2OT~~~!V9$1QkYqSnP|bI zsQbh1x34|=>7Cd)6hL}8^~Txr-*|ax^H>IN1l=$O#~1RG8VA72>Ema<_Ttk!+fQz8 zTZ_Y6Pwc-wy1HDBojdMfDUOolm^iUanI#q+nxRUN&+06t_(FO`#KG3?&F_5h=-qoZ zR0=?4<4b4G{mzx?`f9>fE;Le{g1aj0)fXjAK&}g{pF37 zGSO5y$|@>JD*&{$nqo;+X%RDX8-^heUF$@Ina%9r@$RkfeE8_SyHPEGAX>hB>cSsf z?N%pvp}vk}n!J(?o-rQkFxxr&{nR8~g{ zD5=@Q>%nmUe{Mbe-#gKa00Nd*`Y->*S0^Wyh&UFj9MROAh`O#L0&^KF6_N6kMch4W z>BTo+xcI9tn%R-Qj)+qFh!9ccaTb`(X4v@ZA#?^ zOg2}}eeEUEQLF~S!WN*2h!zoj-)9E%b773y79fx?OVdVyZ4Xgf*{&_<6Jq`CGwdcb*|?=TN^~QwU%R=g^e)e;qcZP z6PHqmsF`uFxn;%)Bq}Lq5s@9pOr>2}^FYeEt%@%8n>51vd z%crKV`=kMLPEmbm57+hnl)=g zltVRx%`N8k?7^-_A{`tkt9{NOMb*Q@90+%daqbQ?zyHpi-Fwg2gNY7zXOG_hY>!#~^A`(u|Bt)dv zphZ)(dDj2G!H1PU0z?m zypfys z7{WmT96sK!2ZM^LXj1&qCKE52W#i?IxBl|0zx%(1JCA?^1U&e~!wYY|$ihs8g){uBy1?U59hrg{0BRkMhGr%v z=J06VBAl2r56#lE;1p~*-6TQ@Gv%mJv>XwC%}z>LUaL%X{pC}hI2Hi#^powUx3|-H z8R<(^Rf`m6PF0|)sS!PVLDYmV%%4~|gX8cHS~y~w&UD>#_ar(8>4mctZfq1sH|IXk4co>E;8bFH0 zTu^HMT5DSo{q7YHITEgPnu<-h8yEK&@G=&CD_eA|l`V(_c#&8sW^s z%wv--Y%~DV)hQ`&-Q22&6_F!M>^|BLi!-l1k95M)5@YWBNmd3k1JSIdh3W2roT!M1 z0#GgC@#w?H@BH15rIYn@CrL&xS7g-iS(lpr!O^gs-QW=f65(b|Sj=tn%EpDSoWK6| z^1%c|U?LXOn^2w81Ke_$*`a=Ny@#V|Eb?Mw2 zXQ+(hM?{c}XF3+6-j#3v`PXB-3$px6DNHQP$p$$=<@rFr+#i4b_`$Qoy~q2>xCB_W zr+1$|z5CRwF$otYL7K6NHa6Jd)5EP#pWJ@$?)C58xc>Hyy{&x`UOT&X`5Tun|K>~m za-W+?xE;}GVUW8!-TwE#!2;IZokmvSfFnSnNd`wx71iCx&p!U)%^N?svAeZD<|loU zB5P;XPMlj?T3;?p+D$t5nC;CEo*wLN9qioO+1=U;4+6@xod5Ej_yl{ZvcQ<}shuHngL?i1L}MXD+^ZVe|6F$e!E^VxyS~aVm4?qYa9*>TQWZ_tudAe|#J_O(av{v)*#LWn! z^NIiw13?_wsH`L+AU?LaHa$Mwe0lTW>A}|RCy#D!?c95|`)GH*KP<9_ezjjeck;|D zr_Wx!uySfidSWJb6Om>ug=R!ZFtMaU4G)rZfpXG!y7@1EJ!Td_1hcSkhCQt|5+Neb z!x4al$w9&h5R>JBq(=mK^IDgxqEwAt-xrokRAY)`o#V3MHae$C+PMOE(qO!-GAm zLjWtMmdrfD39PC_VZ#h=W;M?nVsQ|xrDXT0mWfWSRuRde1qjYT9g8!^q1J?DVe#-` zHWwBa(Ncu9wIPimVQH-n)oB!tNVA~M4-XGo%8{;gW{rpdRxL!Nr6XduiU|048jHnw zYcpfw*6M6N(@~Ye-DsT6SZk|E^O^Lt0E7h!VF8gh3v)9AfnvNaCxLKh7Gkc$z`{gi z=B4jhI3>EUxHT5BRtXq!n-(HLBmfr9(=eD9)s(7%U=fv))%gFP0I9&ZFfj%I0000< KMNUMnLSTX^sHLy~ From e41ae2f9045ede3943ce20d92db278d5ca8a23cc Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 18 Sep 2018 23:39:31 -0400 Subject: [PATCH 002/135] fix bug where header is getting added twice --- index.py | 3 +-- templates/landing.html | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/index.py b/index.py index 55c6ddd..9311dd0 100755 --- a/index.py +++ b/index.py @@ -22,6 +22,7 @@ prefix = "" project = "Scriptorium" +cgitb.enable() def make_options(**kwargs): @@ -222,7 +223,6 @@ def load_landing(user, admin, theform): menu = menu.encode("utf8") landing = open(prefix + "templates" + os.sep + "landing.html").read() - header = open(prefix + "templates" + os.sep + "header.html").read() scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep userdir = scriptpath + "users" + os.sep @@ -233,7 +233,6 @@ def load_landing(user, admin, theform): landing = landing.replace("**max_id_plus1**", str(max_id + 1)) landing = landing.replace("**user**", user) landing = landing.replace("**project**", project) - landing = landing.replace("**header**", header) landing = landing.replace("**skin**", skin) landing = landing.replace("**validation_rules**", validation_rules) landing = landing.replace("**corpora**", corpus_list) diff --git a/templates/landing.html b/templates/landing.html index d20e5bd..d1aa59d 100644 --- a/templates/landing.html +++ b/templates/landing.html @@ -20,7 +20,6 @@ **navbar**
- **header**

GitDox: Project **project**

From a446c984d12cf21886f326498c1320c14a49e2a3 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 18 Sep 2018 23:39:42 -0400 Subject: [PATCH 003/135] clean up header html --- templates/header.html | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/templates/header.html b/templates/header.html index de32031..14495f7 100644 --- a/templates/header.html +++ b/templates/header.html @@ -1,14 +1,16 @@ - " - header = open(templatedir + "header.html").read() page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) page = page.replace("**project**",project) page = page.replace("**skin**",skin) @@ -470,9 +467,6 @@ def load_user_config(user,admin,theform): **navbar**
-

Coptic XML transcription editor

edit user info | back to document list

@@ -503,9 +497,7 @@ def load_user_config(user,admin,theform): page += "\t\t\t
\t\t\n
\t\n
\n\n" - header = open(templatedir + "header.html").read() page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) page = page.replace("**project**",project) page = page.replace("**skin**",skin) diff --git a/editor.py b/editor.py index 66cba82..1428a33 100755 --- a/editor.py +++ b/editor.py @@ -465,9 +465,7 @@ def load_page(user,admin,theform): page = page.replace('onblur="validate_repo();"','onblur="validate_repo();" disabled="disabled" class="disabled"') page = page.replace('''
''','''
''') - header = open(templatedir + "header.html").read() page = page.replace("**navbar**", get_menu()) - page = page.replace("**header**", header) page = page.replace("**project**", project) page = page.replace("**skin**", skin) page = page.replace("**editor_help_link**",editor_help_link) diff --git a/index.py b/index.py index 9311dd0..1fcc81c 100755 --- a/index.py +++ b/index.py @@ -150,7 +150,7 @@ def load_landing(user, admin, theform): if not max_id: # This is for the initial case after init db max_id = 0 - table = """""" + table = """
""" table += """""" table += """ diff --git a/templates/editor.html b/templates/editor.html index b8e68ac..31c2d63 100644 --- a/templates/editor.html +++ b/templates/editor.html @@ -40,10 +40,9 @@ **navbar**
- **header** -
+

GitDox: Edit

- **editor_help_link** +**editor_help_link**
@@ -136,4 +135,4 @@

meta data

- \ No newline at end of file + diff --git a/validation_rules.py b/validation_rules.py index c21a203..a7bcaa8 100755 --- a/validation_rules.py +++ b/validation_rules.py @@ -56,7 +56,6 @@ def load_validation_rules(): **navbar**
- **header**

GitDox - Validation

validation rule management | back to document list

@@ -112,9 +111,7 @@ def load_validation_rules(): page+="
" - header = open(templatedir + "header.html").read() page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) page = page.replace("**project**",project) page = page.replace("**skin**",skin) From 305b3f77150ace0c36bad68aa9b9126e36a6661a Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 18 Sep 2018 23:51:22 -0400 Subject: [PATCH 006/135] allow no banner on config --- paths.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paths.py b/paths.py index aa5524c..7a7cb88 100755 --- a/paths.py +++ b/paths.py @@ -14,7 +14,11 @@ def get_menu(): config = ConfigObj(prefix + "users" + os.sep + "config.ini") + + if "banner" not in config: + return "" banner = config["banner"] + if banner.startswith("http"): # Web resource resp = requests.get(banner) return resp.text From 732c0cfe83796c5c6345a97f016ecd5589e56f89 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 19 Sep 2018 00:05:27 -0400 Subject: [PATCH 007/135] add codemirror search and replace support --- templates/codemirror.html | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/templates/codemirror.html b/templates/codemirror.html index d626e9f..2646200 100644 --- a/templates/codemirror.html +++ b/templates/codemirror.html @@ -1,4 +1,10 @@ - + + + + + + +
Save
**NLP** **github** - - - + + + + - +
+ +
+
Save
**NLP** **github** + + + + + + + + + + + From c816bcf3fef8ea57bc76a4fba8dfd5af25f95177 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 19 Sep 2018 00:51:34 -0400 Subject: [PATCH 009/135] remove unsafe cgitb.enable calls --- export.py | 2 +- index.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/export.py b/export.py index 594cd21..6ad9294 100755 --- a/export.py +++ b/export.py @@ -104,7 +104,7 @@ def export_doc(doc_id, stylesheet=None): #print("Content-type:text/html\r\n\r\n") import cgitb - cgitb.enable() + #cgitb.enable() from paths import ether_url thisscript = os.environ.get('SCRIPT_NAME', '') diff --git a/index.py b/index.py index 1fcc81c..de80d1d 100755 --- a/index.py +++ b/index.py @@ -22,7 +22,6 @@ prefix = "" project = "Scriptorium" -cgitb.enable() def make_options(**kwargs): From fdf0a863c953b61530e802a5a536f64252b5689a Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Wed, 19 Sep 2018 22:26:19 -0400 Subject: [PATCH 010/135] Ensure ampersand escaping works correctly in XML mode --- editor.py | 1 + js/editor.js | 4 ++-- templates/codemirror.html | 5 +++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/editor.py b/editor.py index 66cba82..0e29081 100755 --- a/editor.py +++ b/editor.py @@ -239,6 +239,7 @@ def load_page(user,admin,theform): if theform.getvalue('code'): text_content = theform.getvalue('code') text_content = text_content.replace("\r","") + text_content = re.sub(r'&(?!amp;)',r'&',text_content) # Escape unescaped XML & text_content = unicode(text_content.decode("utf8")) if user != "demo": if int(doc_id)>int(max_id): diff --git a/js/editor.js b/js/editor.js index d0f6e8e..1d1bb2c 100755 --- a/js/editor.js +++ b/js/editor.js @@ -31,9 +31,9 @@ function validate_doc() { function do_save(){ if (document.getElementById('code')!=null){ - val = document.getElementById('code').value.replace('&','&'); - document.getElementById('code').value = val; + val = document.getElementById('code').value.replace(/&(?!amp;)/g,'&'); editor.getDoc().setValue(val); + document.getElementById('code').value = val; } document.getElementById('editor_form').submit(); } diff --git a/templates/codemirror.html b/templates/codemirror.html index d626e9f..e2a5b51 100644 --- a/templates/codemirror.html +++ b/templates/codemirror.html @@ -325,5 +325,10 @@ delete CodeMirror.keyMap.emacsy["Alt-F"]; delete CodeMirror.keyMap.emacsy["Alt-D"]; + val = document.getElementById('code').value.replace(/&(?!amp;)/g,'&'); + $('#code').val(val); + editor.getDoc().setValue(val); + + From 01ef626db4bac02524a195b7e565cab29e9c4c51 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Thu, 20 Sep 2018 09:14:10 -0400 Subject: [PATCH 011/135] Add TEI table to default codemirror auto-complete --- templates/codemirror.html | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/templates/codemirror.html b/templates/codemirror.html index e2a5b51..362337b 100644 --- a/templates/codemirror.html +++ b/templates/codemirror.html @@ -48,6 +48,7 @@ "figure", "list", "p", + "table", "quote", "s", "head", @@ -200,6 +201,34 @@ "s", "sp" ] + }, + "table": { + "attrs": { + "rend": null, + "rows": null, + "cols": null + }, + "children": [ + "row", + "head" + ] + }, + "row": { + "attrs": { + "n": null + }, + "children": [ + "cell" + ] + }, + "cell": { + "attrs": { + "n": null, + "role": ["label"] + }, + "children": [ + "s" + ] }, "list": { "attrs": { From 4b351b4f5c7e0182fd8df0ee40e763795bb24436 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 20 Sep 2018 21:42:37 -0400 Subject: [PATCH 012/135] modify ether_url and install instructions to sidestep ethercalc path mismatch issue --- README.md | 8 +++++--- users/config.ini | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0ffc05b..8fcf3be 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,14 @@ hope to provide a stable release soon. First, [install Docker](https://docs.docker.com/install/). You may be able to install it using your platform's package manager. +(**Note: if your machine has Apache running, you should stop it first by running `sudo service apache2 stop`.**) + ```bash -docker run -dit --restart unless-stopped --name gitdox-dev -p 5000:80 gucorpling/gitdox:dev +docker run -dit --restart unless-stopped --name gitdox-dev -p 80:80 gucorpling/gitdox:dev ``` GitDox should now be running the docker container you've set up, and you may -visit `http://localhost:5000` on your machine to verify that it works. GitDox should +visit `http://localhost` on your machine to verify that it works. GitDox should now always be running on your machine, even if you reboot it. If for some reason you need to stop it manually, you may do so: @@ -54,7 +56,7 @@ to have your GitDox folders live in your host machine's filesystem: ```bash sudo git clone https://github.com/gucorpling/gitdox /opt/gitdox sudo chown -R www-data:www-data /opt/gitdox -docker run -dit --restart unless-stopped --name gitdox -v /opt/gitdox:/var/www/html -p 5000:80 gucorpling/gitdox:dev gitdox +docker run -dit --restart unless-stopped --name gitdox -v /opt/gitdox:/var/www/html -p 80:80 gucorpling/gitdox:dev gitdox ``` These commands install GitDox under `/opt` in your host machine and allows you to modify them just as you would modify any other file on your machine. But in the Docker command, with the `-v` flag we tell it to mount this folder as `/var/www/html` in the container's filesystem. The files are shared bidirectionally: changes made in the container will flow to the host, and vice versa. diff --git a/users/config.ini b/users/config.ini index 73f6e69..e93efd3 100644 --- a/users/config.ini +++ b/users/config.ini @@ -11,7 +11,7 @@ xml_nlp_button = """I|'m Tokenize""" spreadsheet_nlp_button = """ NLP""" xml_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/tt_tokenize spreadsheet_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/api -ether_url = /ethercalc/ +ether_url = http://localhost/ethercalc/ # nlp service credentials nlp_user = user From 3d63d3d64bb694c3ca38a11891b3d2dc47824ba3 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 20 Sep 2018 23:12:14 -0400 Subject: [PATCH 013/135] handle case where ether_url is absent --- paths.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paths.py b/paths.py index 7a7cb88..0fdf6cd 100755 --- a/paths.py +++ b/paths.py @@ -8,7 +8,10 @@ prefix = "" # to use password authentication, use a netrc file called .netrc in the project root -ether_url = ConfigObj(prefix + "users" + os.sep + "config.ini")["ether_url"] +try: + ether_url = ConfigObj(prefix + "users" + os.sep + "config.ini")["ether_url"] +except KeyError: + ether_url = "" if not ether_url.endswith(os.sep): ether_url += os.sep From c6768dcd1a992e24b1756b0ebdfda1082ca87024 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 20 Sep 2018 23:16:20 -0400 Subject: [PATCH 014/135] utf-8 header lowercase --- export.py | 2 +- index.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/export.py b/export.py index 6ad9294..e828546 100755 --- a/export.py +++ b/export.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- import cgi from modules.gitdox_sql import * diff --git a/index.py b/index.py index de80d1d..bb5ef7e 100755 --- a/index.py +++ b/index.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- # Import modules for CGI handling import cgi, cgitb From 198a0ff5d2c859f9889bbb6c49715787d4ff4f05 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 20 Sep 2018 23:16:46 -0400 Subject: [PATCH 015/135] use requests instead of curl for requests with user input --- editor.py | 2 +- modules/ether.py | 44 ++++++++++++++++++++------------------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/editor.py b/editor.py index 1428a33..07a06c5 100755 --- a/editor.py +++ b/editor.py @@ -228,7 +228,7 @@ def load_page(user,admin,theform): old_socialcalc = get_socialcalc(ether_url, old_sheet_name) out, err = make_spreadsheet(old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc") if out == "OK": - out, err = delete_spreadsheet(ether_url,old_sheet_name) + delete_spreadsheet(ether_url,old_sheet_name) else: mymsg += "out was: " + out + " err was" + err diff --git a/modules/ether.py b/modules/ether.py index 185a8fc..0d8cbc0 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -19,8 +19,10 @@ from ast import literal_eval import json import cgi +import requests from xml.sax.saxutils import escape + __version__ = "2.0.0" @@ -340,10 +342,12 @@ def sgml_to_ether(sgml, ignore_elements=False): def ether_to_csv(ether_path, name): - command = "curl --netrc -X GET " + ether_path + "_/" + name + "/csv/" - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - return stdout.decode("utf8") + try: + r = requests.get(ether_path + "_/" + name + "/csv/") + except: + return "" + + return r.text def ether_to_sgml(ether, doc_id,config=None): @@ -605,35 +609,27 @@ def delete_spreadsheet(ether_url, name): :param name: name of the spreadsheet (last part of URL) :return: void """ - - ether_command = "curl --netrc -X DELETE " + ether_url + "_/" + name - del_proc = subprocess.Popen(ether_command,shell=True) - - (stdout, stderr) = del_proc.communicate() - - return stdout, stderr - + try: + r = requests.delete(ether_url + "_/" + name) + except: + pass def sheet_exists(ether_path, name): return len(get_socialcalc(ether_path,name)) > 0 def get_socialcalc(ether_path, name): - command = "curl --netrc -X GET " + ether_path + "_/" + name - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - return stdout.decode("utf8") + try: + r = requests.get(ether_path + '_/' + name) + except: + return "" + return r.text def get_timestamps(ether_path): - command = "curl --netrc -X GET " + ether_path + "_roomtimes" - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - times = json.loads(stdout) - output = {} - for room in times: - output[room.replace("timestamp-","")] = times[room] - return output + r = requests.get(ether_path + "_roomtimes") + times = r.json() + return {room.replace("timestamp-", ""): times[room] for room in times} if __name__ == "__main__": From af3f6ae7e0dbac7a39cf8417b4482ac9c9f05248 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Tue, 25 Sep 2018 15:03:15 -0400 Subject: [PATCH 016/135] Fix bug on default API call without credentials * Spreadsheet and XML mode NLP APIs now call no URL by default * If URL setting in config can be empty string if no tokenizer/NLP etc. should be used --- editor.py | 24 ++++++++++++++---------- users/config.ini | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/editor.py b/editor.py index 0e29081..5ac5f57 100755 --- a/editor.py +++ b/editor.py @@ -169,12 +169,15 @@ def load_page(user,admin,theform): # Handle switch to spreadsheet mode if NLP spreadsheet service is called if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet" and mode == "xml" and user != "demo": - api_call = spreadsheet_nlp_api - nlp_user, nlp_password = get_nlp_credentials() data_to_process = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0] - data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} - resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) - sgml=resp.text.encode("utf8") + api_call = spreadsheet_nlp_api + if api_call != "": + nlp_user, nlp_password = get_nlp_credentials() + data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} + resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) + sgml = resp.text.encode("utf8") + else: + sgml = data_to_process out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" @@ -296,11 +299,12 @@ def load_page(user,admin,theform): shutil.rmtree(prefix+subdir) if theform.getvalue('nlp_xml') == "do_nlp_xml" and mode == "xml": - api_call=xml_nlp_api - nlp_user, nlp_password = get_nlp_credentials() - data = {"data":text_content, "lb":"line", "format":"pipes"} - resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) - text_content=resp.text + api_call = xml_nlp_api + if api_call != "": + nlp_user, nlp_password = get_nlp_credentials() + data = {"data":text_content, "lb":"line", "format":"pipes"} + resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) + text_content=resp.text # Editing options # Docname diff --git a/users/config.ini b/users/config.ini index 73f6e69..fe5504e 100644 --- a/users/config.ini +++ b/users/config.ini @@ -9,8 +9,8 @@ adminuser = admin # the login name who is the *main* administrator account. This editor_help_link =

For help getting started see the GitDox website

xml_nlp_button = """I|'m Tokenize""" spreadsheet_nlp_button = """ NLP""" -xml_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/tt_tokenize -spreadsheet_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/api +xml_nlp_api = "" # URL to call for service modifying the XML mode data, e.g. a tokenizer: https://corpling.uis.georgetown.edu/coptic-nlp/tt_tokenize +spreadsheet_nlp_api = "" # URL to call for a service manipulating the data before conversion to spreadsheet mode, e.g. an NLP pipeline: https://corpling.uis.georgetown.edu/coptic-nlp/api ether_url = /ethercalc/ # nlp service credentials From 3e03f46cdec27d77c4f34d14ca81ea29ae6929d6 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Fri, 28 Sep 2018 10:01:31 -0400 Subject: [PATCH 017/135] Added utf-8 encoding when no XML-transform API called * Setting no intermediate API between XML and tabulation to spreadsheet crashed on unicode input * Else case of no API call now encodes data in utf-8 as well --- editor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/editor.py b/editor.py index 5ac5f57..71853bc 100755 --- a/editor.py +++ b/editor.py @@ -177,7 +177,7 @@ def load_page(user,admin,theform): resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) sgml = resp.text.encode("utf8") else: - sgml = data_to_process + sgml = data_to_process.encode("utf8") out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" From b249da9b9345999756d8f1530290ca6c36ff8622 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 13:21:36 -0400 Subject: [PATCH 018/135] Revert "fix bug where header is getting added twice" This reverts commit e41ae2f9045ede3943ce20d92db278d5ca8a23cc. --- index.py | 2 ++ templates/landing.html | 1 + 2 files changed, 3 insertions(+) diff --git a/index.py b/index.py index bb5ef7e..e9a274f 100755 --- a/index.py +++ b/index.py @@ -222,6 +222,7 @@ def load_landing(user, admin, theform): menu = menu.encode("utf8") landing = open(prefix + "templates" + os.sep + "landing.html").read() + header = open(prefix + "templates" + os.sep + "header.html").read() scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep userdir = scriptpath + "users" + os.sep @@ -232,6 +233,7 @@ def load_landing(user, admin, theform): landing = landing.replace("**max_id_plus1**", str(max_id + 1)) landing = landing.replace("**user**", user) landing = landing.replace("**project**", project) + landing = landing.replace("**header**", header) landing = landing.replace("**skin**", skin) landing = landing.replace("**validation_rules**", validation_rules) landing = landing.replace("**corpora**", corpus_list) diff --git a/templates/landing.html b/templates/landing.html index d1aa59d..d20e5bd 100644 --- a/templates/landing.html +++ b/templates/landing.html @@ -20,6 +20,7 @@ **navbar**
+ **header**

GitDox: Project **project**

From ceda0a99bdac1e59bc98dba1722dba16c2c01716 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 13:23:23 -0400 Subject: [PATCH 019/135] Revert "fix double banner bug for other pages" This reverts commit ed21a8ea17bfd875def8e4cec9458a90f2c06ce3. --- admin.py | 8 ++++++++ editor.py | 2 ++ index.py | 2 +- templates/editor.html | 7 ++++--- validation_rules.py | 3 +++ 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/admin.py b/admin.py index 46b669f..e2ac495 100755 --- a/admin.py +++ b/admin.py @@ -176,6 +176,7 @@ def load_admin(user,admin,theform): **navbar**
+ **header**

GitDox - Administration

administration and user management | back to document list

@@ -415,7 +416,9 @@ def load_admin(user,admin,theform): page+="
" + header = open(templatedir + "header.html").read() page = page.replace("**navbar**",get_menu()) + page = page.replace("**header**",header) page = page.replace("**project**",project) page = page.replace("**skin**",skin) @@ -467,6 +470,9 @@ def load_user_config(user,admin,theform): **navbar**
+

Coptic XML transcription editor

edit user info | back to document list

@@ -497,7 +503,9 @@ def load_user_config(user,admin,theform): page += "\t\t\t
\t\t\n
\t\n
\n\n" + header = open(templatedir + "header.html").read() page = page.replace("**navbar**",get_menu()) + page = page.replace("**header**",header) page = page.replace("**project**",project) page = page.replace("**skin**",skin) diff --git a/editor.py b/editor.py index 07a06c5..fe34203 100755 --- a/editor.py +++ b/editor.py @@ -465,7 +465,9 @@ def load_page(user,admin,theform): page = page.replace('onblur="validate_repo();"','onblur="validate_repo();" disabled="disabled" class="disabled"') page = page.replace('''
''','''
''') + header = open(templatedir + "header.html").read() page = page.replace("**navbar**", get_menu()) + page = page.replace("**header**", header) page = page.replace("**project**", project) page = page.replace("**skin**", skin) page = page.replace("**editor_help_link**",editor_help_link) diff --git a/index.py b/index.py index e9a274f..49cd3bb 100755 --- a/index.py +++ b/index.py @@ -149,7 +149,7 @@ def load_landing(user, admin, theform): if not max_id: # This is for the initial case after init db max_id = 0 - table = """
idcorpusdocumentstatusassignedmodevalidateactions
""" + table = """
""" table += """""" table += """ diff --git a/templates/editor.html b/templates/editor.html index 31c2d63..b8e68ac 100644 --- a/templates/editor.html +++ b/templates/editor.html @@ -40,9 +40,10 @@ **navbar**
-
+ **header** +

GitDox: Edit

-**editor_help_link** + **editor_help_link**
@@ -135,4 +136,4 @@

meta data

- + \ No newline at end of file diff --git a/validation_rules.py b/validation_rules.py index a7bcaa8..c21a203 100755 --- a/validation_rules.py +++ b/validation_rules.py @@ -56,6 +56,7 @@ def load_validation_rules(): **navbar**
+ **header**

GitDox - Validation

validation rule management | back to document list

@@ -111,7 +112,9 @@ def load_validation_rules(): page+="
" + header = open(templatedir + "header.html").read() page = page.replace("**navbar**",get_menu()) + page = page.replace("**header**",header) page = page.replace("**project**",project) page = page.replace("**skin**",skin) From 84bdc813ba88a6535653d227af8c5770729659e1 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 13:32:21 -0400 Subject: [PATCH 020/135] comment out banner to avoid double banner --- users/config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/users/config.ini b/users/config.ini index e93efd3..a76865a 100644 --- a/users/config.ini +++ b/users/config.ini @@ -3,7 +3,7 @@ templatedir = templates/login/ # path to the login template directory (absolute controltemplates = templates/control/ # path to the control template directory skin = css/gum.css project = GitDox # Specify your project name here -banner = header.html #note you can also use web addresses to retrieve a banner from your project page, e.g. http://myproject.org/nav.html +# banner = header.html #note you can also use web addresses to retrieve a banner from your project page, e.g. http://myproject.org/nav.html cookiepath = "" # the 'super-url' of the scripts - for the cookie. Can be '' if no other script in your domain uses cookies adminuser = admin # the login name who is the *main* administrator account. This one cannot be deleted. editor_help_link =

For help getting started see the GitDox website

From 98e769899b4f6bf6f20cbde1bc3e56b11652ee16 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 13:33:15 -0400 Subject: [PATCH 021/135] make nlp urls blank --- users/config.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/users/config.ini b/users/config.ini index a76865a..25d774e 100644 --- a/users/config.ini +++ b/users/config.ini @@ -9,8 +9,8 @@ adminuser = admin # the login name who is the *main* administrator account. This editor_help_link =

For help getting started see the GitDox website

xml_nlp_button = """I|'m Tokenize""" spreadsheet_nlp_button = """ NLP""" -xml_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/tt_tokenize -spreadsheet_nlp_api = https://corpling.uis.georgetown.edu/coptic-nlp/api +xml_nlp_api = "" +spreadsheet_nlp_api = "" ether_url = http://localhost/ethercalc/ # nlp service credentials From bfe13a6744a2890a901113157c451b78194c7af6 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 13:54:08 -0400 Subject: [PATCH 022/135] don't attempt to call nlp service if not defined --- editor.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/editor.py b/editor.py index fe34203..b5247aa 100755 --- a/editor.py +++ b/editor.py @@ -170,12 +170,17 @@ def load_page(user,admin,theform): # Handle switch to spreadsheet mode if NLP spreadsheet service is called if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet" and mode == "xml" and user != "demo": api_call = spreadsheet_nlp_api - nlp_user, nlp_password = get_nlp_credentials() data_to_process = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0] - data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} - resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) - sgml=resp.text.encode("utf8") - out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") + + if api_call is None or api_call == "": + out, err = make_spreadsheet(data_to_process, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") + else: + nlp_user, nlp_password = get_nlp_credentials() + data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} + resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) + sgml=resp.text.encode("utf8") + out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") + mode = "ether" From c4403ad8bfdc1adfeab75f752a2321d6371e8f98 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 13:57:48 -0400 Subject: [PATCH 023/135] Revert "don't attempt to call nlp service if not defined" This reverts commit bfe13a6744a2890a901113157c451b78194c7af6. --- editor.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/editor.py b/editor.py index b5247aa..fe34203 100755 --- a/editor.py +++ b/editor.py @@ -170,17 +170,12 @@ def load_page(user,admin,theform): # Handle switch to spreadsheet mode if NLP spreadsheet service is called if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet" and mode == "xml" and user != "demo": api_call = spreadsheet_nlp_api + nlp_user, nlp_password = get_nlp_credentials() data_to_process = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0] - - if api_call is None or api_call == "": - out, err = make_spreadsheet(data_to_process, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") - else: - nlp_user, nlp_password = get_nlp_credentials() - data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} - resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) - sgml=resp.text.encode("utf8") - out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") - + data = {"data":data_to_process, "lb":"line", "format":"sgml_no_parse"} + resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user,nlp_password)) + sgml=resp.text.encode("utf8") + out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" From baef5c6891662d7f13a871f74a4bc512df20e0a2 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 14:11:50 -0400 Subject: [PATCH 024/135] lowercase uppercase html tags (was making my IDE wig out) --- popupPage.html | 88 ++++++++++++++++++++++++++++++++------------ popupPageCorpus.html | 54 +++++++++++++++------------ 2 files changed, 94 insertions(+), 48 deletions(-) diff --git a/popupPage.html b/popupPage.html index 24dd039..5440955 100644 --- a/popupPage.html +++ b/popupPage.html @@ -1,28 +1,68 @@ - - - - - -
-field name (e.g., author):
- - -***options*** - -
-field value (e.g., Besa):
-
- - - - + + + +
+ field name (e.g., author):
+ + + +
+ field value (e.g., Besa):
+
+ + + + diff --git a/popupPageCorpus.html b/popupPageCorpus.html index 63ca339..9fab3c4 100644 --- a/popupPageCorpus.html +++ b/popupPageCorpus.html @@ -1,28 +1,34 @@ - - - - - -
-field name (e.g., author):
- - -***options*** - -
-field value (e.g., Besa):
-
- - - - + + + +
+ field name (e.g., author):
+ + + +
+ field value (e.g., Besa):
+
+ + + + From 18cf9d871138aa30c53b410c71dfd8338afd215b Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 30 Sep 2018 11:03:40 -0400 Subject: [PATCH 025/135] Add basic conllu mode to codemirror --- codemirror-5.15.2/mode/conllu/conllu.js | 80 ++++++++++++++++++++++++ codemirror-5.15.2/mode/conllu/index.html | 53 ++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 codemirror-5.15.2/mode/conllu/conllu.js create mode 100644 codemirror-5.15.2/mode/conllu/index.html diff --git a/codemirror-5.15.2/mode/conllu/conllu.js b/codemirror-5.15.2/mode/conllu/conllu.js new file mode 100644 index 0000000..087d982 --- /dev/null +++ b/codemirror-5.15.2/mode/conllu/conllu.js @@ -0,0 +1,80 @@ +// CodeMirror, copyright (c) by Marijn Haverbeke and others +// Distributed under an MIT license: http://codemirror.net/LICENSE + +(function(mod) { + if (typeof exports == "object" && typeof module == "object") // CommonJS + mod(require("../../lib/codemirror"), require("../../addon/mode/simple")); + else if (typeof define == "function" && define.amd) // AMD + define(["../../lib/codemirror", "../../addon/mode/simple"], mod); + else // Plain browser env + mod(CodeMirror); +})(function(CodeMirror) { + "use strict"; + + // Collect all conllu directives + var instructions = ["newdoc"], + instructionRegex = "(" + instructions.join('|') + ")", + instructionOnlyLine = new RegExp(instructionRegex + "\\s*$", "i"), + instructionWithArguments = new RegExp(instructionRegex + "(\\s+)", "i"); + + CodeMirror.defineSimpleMode("conllu", { + start: [ + // Comment line: This is a line starting with a comment + { + regex: /#.*$/, + token: "comment" + }, + { + regex: /^[0-9]+\t/, + token: "def" + }, + { + regex: /(?<=(^[^\t\n]+\t){6})[0-9]+/, + token: "def" + }, + // Highlight an instruction followed by arguments + { + regex: instructionWithArguments, + token: ["variable-2", null], + next: "arguments" + }, + { + regex: /./, + token: null + } + ], + arguments: [ + { + // Line comment without instruction arguments is an error + regex: /#.*$/, + token: "error", + next: "start" + }, + { + regex: /[^#]+\\$/, + token: null + }, + { + // Match everything except for the inline comment + regex: /[^#]+/, + token: null, + next: "start" + }, + { + regex: /$/, + token: null, + next: "start" + }, + // Fail safe return to start + { + token: null, + next: "start" + } + ], + meta: { + lineComment: "#" + } + }); + + CodeMirror.defineMIME("text/x-conllu", "conllu"); +}); diff --git a/codemirror-5.15.2/mode/conllu/index.html b/codemirror-5.15.2/mode/conllu/index.html new file mode 100644 index 0000000..adc0574 --- /dev/null +++ b/codemirror-5.15.2/mode/conllu/index.html @@ -0,0 +1,53 @@ + + +CodeMirror: Dockerfile mode + + + + + + + + + + +
+

CoNLL-U mode

+
+ + + +

Dockerfile syntax highlighting for CodeMirror. Depends on + the simplemode addon.

+ +

MIME types defined: text/x-conllu

+
From ded7e3138331e9414d94e47d8e3def5bc35b48b3 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 18:39:47 -0400 Subject: [PATCH 026/135] modify sgml_to_ether to allow nested spans --- modules/ether.py | 135 ++++++++++++++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 49 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 0d8cbc0..fbfb684 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -19,13 +19,10 @@ from ast import literal_eval import json import cgi -import requests from xml.sax.saxutils import escape - __version__ = "2.0.0" - class ExportConfig: def __init__(self, **kwargs): @@ -220,12 +217,27 @@ def flush_open(annos, row_num, colmap): def flush_close(closing_element, last_value, last_start, row_num, colmap, aliases): flushed = "" - for alias in aliases[closing_element]: - if last_start[alias] < row_num - 1: - span_string = ":rowspan:" + str(row_num - last_start[alias]) + for alias in aliases[closing_element][-1]: + stack_len = len(last_start[alias]) + + with open('tmp','a') as f: + f.write("\nFound element " + closing_element + " at " + str(row_num) + "\n") + f.write(repr(last_value) + "\n") + f.write(repr(last_start) + "\n") + f.write(alias + "\n") + f.write(repr(aliases[closing_element]) + "\n") + f.write(repr(last_start[alias]) + "\n") + f.flush() + + if stack_len > 0 and last_start[alias][-1] < row_num - 1: + span_string = ":rowspan:" + str(row_num - last_start[alias][-1]) else: span_string = "" - flushed += "cell:" + colmap[alias] + str(last_start[alias]) + ":t:" + last_value[alias]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + + flushed += "cell:" + colmap[alias][stack_len - 1] + str(last_start[alias][-1]) + ":t:" + last_value[alias][-1]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + last_value[alias].pop() + last_start[alias].pop() + aliases[closing_element].pop() return flushed @@ -238,14 +250,23 @@ def number_to_letter(number): def sgml_to_ether(sgml, ignore_elements=False): - sgml = sgml.replace("\r","") - current_row = 2 open_annos = defaultdict(list) + + # a mapping from a tag name to a list of values. the list is a stack + # where the most recently encountered opening tag's value/start row + # is kept on the right side of the list. whenever we see a closing tag + # we pop from the stack, and whenever we see an opening tag we push + # (append) to the stack + last_value = defaultdict(list) + last_start = defaultdict(list) + + # maps from tags to a similar stack data structure where the top of the stack + # (i.e. the right side of the list) contains all the annotations that were + # present on the most recently opened nested element aliases = defaultdict(list) - last_value = {} - last_start = {} + + # values in this dict are also lists which follow the pattern described above colmap = OrderedDict() - maxcol = 1 preamble = """socialcalc:version:1.0 MIME-Version: 1.0 @@ -265,45 +286,54 @@ def sgml_to_ether(sgml, ignore_elements=False): """ + sgml = sgml.replace("\r","") + output = "" + maxcol = 1 + current_row = 2 for line in sgml.strip().split("\n"): line = line.strip() + # SocialCalc uses colons internally, \\c used to repr colon in data line = line.replace(":","\\c") + if line.startswith(""): # Skip unary tags and XML instructions - pass + continue elif line.startswith("]+)>",line) element = my_match.groups(0)[0] - output+=flush_close(element, last_value, last_start, current_row, colmap, aliases) + output += flush_close(element, last_value, last_start, current_row, colmap, aliases) elif line.startswith("<"): # Opening tag my_match = re.match("<([^ >]+)[ >]",line) element = my_match.groups(0)[0] - aliases[element] = [] # Reset element aliases to see which attributes this instance has + aliases[element].append([]) # Add new set of aliases to see which attributes this instance has if "=" not in line: line = "<" + element + " " + element + '="' + element + '">' - my_match = re.findall('([^" =]+)="([^"]+)"',line) + attrs = re.findall('([^" =]+)="([^"]+)"',line) anno_name = "" anno_value = "" - for match in my_match: - if element != match[0] and ignore_elements is False: - anno_name = element + "_" + match[0] + for attr in attrs: + if element != attr[0] and ignore_elements is False: + anno_name = element + "_" + attr[0] else: - anno_name = match[0] - anno_value = match[1] + anno_name = attr[0] + anno_value = attr[1] open_annos[current_row].append((anno_name,anno_value)) - last_value[anno_name] = anno_value - last_start[anno_name] = current_row - if element not in aliases: - aliases[element] = [anno_name] - elif anno_name not in aliases[element]: - aliases[element].append(anno_name) + last_value[anno_name].append(anno_value) + last_start[anno_name].append(current_row) + if anno_name not in aliases[element][-1]: + aliases[element][-1].append(anno_name) + if anno_name not in colmap: - maxcol +=1 - colmap[anno_name] = number_to_letter(maxcol) + maxcol += 1 + colmap[anno_name] = [number_to_letter(maxcol)] + elif anno_name in colmap and \ + len(last_start[anno_name]) > len(colmap[anno_name]): + maxcol += 1 + colmap[anno_name].append(number_to_letter(maxcol)) elif len(line) > 0: # Token token = line.strip() @@ -315,7 +345,8 @@ def sgml_to_ether(sgml, ignore_elements=False): preamble += "cell:A1:t:tok:f:2\n" # f <> tvf for links output = preamble + output for header in colmap: - output += "cell:"+colmap[header]+"1:t:"+header+":f:2\n" # NO f <> tvf for links + for entry in colmap[header]: + output += "cell:"+entry+"1:t:"+header+":f:2\n" # NO f <> tvf for links output += "\nsheet:c:" + str(maxcol) + ":r:" + str(current_row-1) + ":tvf:1\n" @@ -342,12 +373,10 @@ def sgml_to_ether(sgml, ignore_elements=False): def ether_to_csv(ether_path, name): - try: - r = requests.get(ether_path + "_/" + name + "/csv/") - except: - return "" - - return r.text + command = "curl --netrc -X GET " + ether_path + "_/" + name + "/csv/" + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + (stdout, stderr) = proc.communicate() + return stdout.decode("utf8") def ether_to_sgml(ether, doc_id,config=None): @@ -609,27 +638,35 @@ def delete_spreadsheet(ether_url, name): :param name: name of the spreadsheet (last part of URL) :return: void """ - try: - r = requests.delete(ether_url + "_/" + name) - except: - pass + + ether_command = "curl --netrc -X DELETE " + ether_url + "_/" + name + del_proc = subprocess.Popen(ether_command,shell=True) + + (stdout, stderr) = del_proc.communicate() + + return stdout, stderr + def sheet_exists(ether_path, name): return len(get_socialcalc(ether_path,name)) > 0 def get_socialcalc(ether_path, name): - try: - r = requests.get(ether_path + '_/' + name) - except: - return "" - return r.text + command = "curl --netrc -X GET " + ether_path + "_/" + name + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + (stdout, stderr) = proc.communicate() + return stdout.decode("utf8") def get_timestamps(ether_path): - r = requests.get(ether_path + "_roomtimes") - times = r.json() - return {room.replace("timestamp-", ""): times[room] for room in times} + command = "curl --netrc -X GET " + ether_path + "_roomtimes" + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + (stdout, stderr) = proc.communicate() + times = json.loads(stdout) + output = {} + for room in times: + output[room.replace("timestamp-","")] = times[room] + return output if __name__ == "__main__": From ddda643474f6c2f7c91c128444c63ec389df91b6 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 18:44:23 -0400 Subject: [PATCH 027/135] remove debug code --- modules/ether.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index fbfb684..ac06dac 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -220,21 +220,14 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase for alias in aliases[closing_element][-1]: stack_len = len(last_start[alias]) - with open('tmp','a') as f: - f.write("\nFound element " + closing_element + " at " + str(row_num) + "\n") - f.write(repr(last_value) + "\n") - f.write(repr(last_start) + "\n") - f.write(alias + "\n") - f.write(repr(aliases[closing_element]) + "\n") - f.write(repr(last_start[alias]) + "\n") - f.flush() - if stack_len > 0 and last_start[alias][-1] < row_num - 1: span_string = ":rowspan:" + str(row_num - last_start[alias][-1]) else: span_string = "" flushed += "cell:" + colmap[alias][stack_len - 1] + str(last_start[alias][-1]) + ":t:" + last_value[alias][-1]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + + # pop the stack since we've closed a tag last_value[alias].pop() last_start[alias].pop() aliases[closing_element].pop() From 470520d1defadf10e48d68d702b0228c6137a50c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 19:47:26 -0400 Subject: [PATCH 028/135] split flushed line into multiple lines --- modules/ether.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/ether.py b/modules/ether.py index ac06dac..7e35af5 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -225,7 +225,12 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase else: span_string = "" - flushed += "cell:" + colmap[alias][stack_len - 1] + str(last_start[alias][-1]) + ":t:" + last_value[alias][-1]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + # Use t for tvf to leave links on + flushed += "cell:" \ + + colmap[alias][stack_len - 1] \ + + str(last_start[alias][-1]) \ + + ":t:" + last_value[alias][-1] \ + + ":f:1:tvf:1" + span_string + "\n" # pop the stack since we've closed a tag last_value[alias].pop() From 8a7c4dc79c07744f95fa3a9b4d99cfcb1f952883 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 19:48:43 -0400 Subject: [PATCH 029/135] Replace line continuations with parens) --- modules/ether.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 7e35af5..c55ef4c 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -226,11 +226,11 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase span_string = "" # Use t for tvf to leave links on - flushed += "cell:" \ - + colmap[alias][stack_len - 1] \ - + str(last_start[alias][-1]) \ - + ":t:" + last_value[alias][-1] \ - + ":f:1:tvf:1" + span_string + "\n" + flushed += ("cell:" + + colmap[alias][stack_len - 1] + + str(last_start[alias][-1]) + + ":t:" + last_value[alias][-1] + + ":f:1:tvf:1" + span_string + "\n") # pop the stack since we've closed a tag last_value[alias].pop() From f6cbf9993a6355f3a9e777b629bb3828f540bb7a Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 20:33:56 -0400 Subject: [PATCH 030/135] fix premature popping of alias stack --- modules/ether.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/ether.py b/modules/ether.py index c55ef4c..59147a7 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -217,6 +217,7 @@ def flush_open(annos, row_num, colmap): def flush_close(closing_element, last_value, last_start, row_num, colmap, aliases): flushed = "" + for alias in aliases[closing_element][-1]: stack_len = len(last_start[alias]) @@ -235,7 +236,8 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase # pop the stack since we've closed a tag last_value[alias].pop() last_start[alias].pop() - aliases[closing_element].pop() + + aliases[closing_element].pop() return flushed From f1ce7dea87dfeddfe9d529eb4fa01e144e62935f Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 30 Sep 2018 11:03:40 -0400 Subject: [PATCH 031/135] Add basic conllu mode to codemirror --- codemirror-5.15.2/mode/conllu/conllu.js | 80 ++++++++++++++++++++++++ codemirror-5.15.2/mode/conllu/index.html | 53 ++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 codemirror-5.15.2/mode/conllu/conllu.js create mode 100644 codemirror-5.15.2/mode/conllu/index.html diff --git a/codemirror-5.15.2/mode/conllu/conllu.js b/codemirror-5.15.2/mode/conllu/conllu.js new file mode 100644 index 0000000..087d982 --- /dev/null +++ b/codemirror-5.15.2/mode/conllu/conllu.js @@ -0,0 +1,80 @@ +// CodeMirror, copyright (c) by Marijn Haverbeke and others +// Distributed under an MIT license: http://codemirror.net/LICENSE + +(function(mod) { + if (typeof exports == "object" && typeof module == "object") // CommonJS + mod(require("../../lib/codemirror"), require("../../addon/mode/simple")); + else if (typeof define == "function" && define.amd) // AMD + define(["../../lib/codemirror", "../../addon/mode/simple"], mod); + else // Plain browser env + mod(CodeMirror); +})(function(CodeMirror) { + "use strict"; + + // Collect all conllu directives + var instructions = ["newdoc"], + instructionRegex = "(" + instructions.join('|') + ")", + instructionOnlyLine = new RegExp(instructionRegex + "\\s*$", "i"), + instructionWithArguments = new RegExp(instructionRegex + "(\\s+)", "i"); + + CodeMirror.defineSimpleMode("conllu", { + start: [ + // Comment line: This is a line starting with a comment + { + regex: /#.*$/, + token: "comment" + }, + { + regex: /^[0-9]+\t/, + token: "def" + }, + { + regex: /(?<=(^[^\t\n]+\t){6})[0-9]+/, + token: "def" + }, + // Highlight an instruction followed by arguments + { + regex: instructionWithArguments, + token: ["variable-2", null], + next: "arguments" + }, + { + regex: /./, + token: null + } + ], + arguments: [ + { + // Line comment without instruction arguments is an error + regex: /#.*$/, + token: "error", + next: "start" + }, + { + regex: /[^#]+\\$/, + token: null + }, + { + // Match everything except for the inline comment + regex: /[^#]+/, + token: null, + next: "start" + }, + { + regex: /$/, + token: null, + next: "start" + }, + // Fail safe return to start + { + token: null, + next: "start" + } + ], + meta: { + lineComment: "#" + } + }); + + CodeMirror.defineMIME("text/x-conllu", "conllu"); +}); diff --git a/codemirror-5.15.2/mode/conllu/index.html b/codemirror-5.15.2/mode/conllu/index.html new file mode 100644 index 0000000..adc0574 --- /dev/null +++ b/codemirror-5.15.2/mode/conllu/index.html @@ -0,0 +1,53 @@ + + +CodeMirror: Dockerfile mode + + + + + + + + + + +
+

CoNLL-U mode

+
+ + + +

Dockerfile syntax highlighting for CodeMirror. Depends on + the simplemode addon.

+ +

MIME types defined: text/x-conllu

+
From 72576d863a012c2ea9b4abe87a322583444ec53c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 16:21:10 -0400 Subject: [PATCH 032/135] make exported sgml tags unique, begin writing de-uniqueing function --- modules/ether.py | 60 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 59147a7..8a10258 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -378,6 +378,29 @@ def ether_to_csv(ether_path, name): (stdout, stderr) = proc.communicate() return stdout.decode("utf8") +def dedupe_properly_nested_tags(sgml): + is_open = {} + + output = sgml.split("\n") + + for i, line in enumerate(sgml.split("\n")): + if (not line.startswith("<") + or line.startswith("") + or line.startswith("]+)>", line).groups(0)[0] + # store line at which we began + is_open[element] = i + else: + element = re.match("<([^ >]+)[ >]", line).groups(0)[0] + + + + return sgml def ether_to_sgml(ether, doc_id,config=None): """ @@ -392,13 +415,15 @@ def ether_to_sgml(ether, doc_id,config=None): else: config = ExportConfig(config=config) + # mapping from col header (meaningful string) to the col letter colmap = {} + # list of 3-tuples of parsed cells: (col, row, contents) cells = [] if isinstance(ether,unicode): ether = ether.encode("utf8") - + # parse cell contents into cells for line in ether.splitlines(): parsed_cell = re.match(r'cell:([A-Z]+)(\d+):(.*)$', line) if parsed_cell is not None: @@ -423,22 +448,37 @@ def ether_to_sgml(ether, doc_id,config=None): sec_element_checklist = [] row = 1 + # added to support duplicate columns + namecount = defaultdict(int) + close_tags = defaultdict(list) for cell in cells: if cell[1] == 1: # Header row colname = cell[2]['t'].replace("\\c",":") + + # if we've already seen a tag of this name, prepare to make it unique + namecount[colname] += 1 + if namecount[colname] > 1: + dupe_suffix = "__" + str(namecount[colname]) + "__" + else: + dupe_suffix = "" + + # if an attr is present, be sure to place the dupe suffix before the attr if colname in config.aliases: - colmap[cell[0]] = config.aliases[colname] + alias = config.aliases[colname] + if "@" in alias: + unique_colname = alias.replace("@", dupe_suffix + "@") else: - colmap[cell[0]] = colname + unique_colname = colname + dupe_suffix + colmap[cell[0]] = unique_colname + # Make sure that everything that should be exported has some priority - if colname not in config.priorities and config.export_all: - if not colname.lower().startswith("ignore:"): # Never export columns prefixed with "ignore:" - if "@" in colname: - elem = colname.split("@",1)[0] - else: - elem = colname + if unique_colname not in config.priorities and config.export_all: + if not unique_colname.lower().startswith("ignore:"): + elem = unique_colname.split("@",1)[0] config.priorities.append(elem) + with open('tmp','a') as f: + f.write(repr(colmap) + "\n" + repr(config.priorities) + "\n\n") else: col = cell[0] row = cell[1] @@ -577,6 +617,8 @@ def ether_to_sgml(ether, doc_id,config=None): output = re.sub("%%[^%]+%%", "none", output) + output = dedupe_properly_nested_tags(output) + return output From 9505250b447e49bbb4e88c6180e88427f72deca6 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 16:51:16 -0400 Subject: [PATCH 033/135] redo curl->requests changes --- modules/ether.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 8a10258..a800de7 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -19,6 +19,7 @@ from ast import literal_eval import json import cgi +import requests from xml.sax.saxutils import escape __version__ = "2.0.0" @@ -373,10 +374,13 @@ def sgml_to_ether(sgml, ignore_elements=False): def ether_to_csv(ether_path, name): - command = "curl --netrc -X GET " + ether_path + "_/" + name + "/csv/" - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - return stdout.decode("utf8") + try: + r = requests.get(ether_path + "_/" + name + "/csv/") + except: + return "" + + return r.text + def dedupe_properly_nested_tags(sgml): is_open = {} @@ -680,13 +684,10 @@ def delete_spreadsheet(ether_url, name): :param name: name of the spreadsheet (last part of URL) :return: void """ - - ether_command = "curl --netrc -X DELETE " + ether_url + "_/" + name - del_proc = subprocess.Popen(ether_command,shell=True) - - (stdout, stderr) = del_proc.communicate() - - return stdout, stderr + try: + r = requests.delete(ether_url + "_/" + name) + except: + pass def sheet_exists(ether_path, name): @@ -700,15 +701,11 @@ def get_socialcalc(ether_path, name): return stdout.decode("utf8") + def get_timestamps(ether_path): - command = "curl --netrc -X GET " + ether_path + "_roomtimes" - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - times = json.loads(stdout) - output = {} - for room in times: - output[room.replace("timestamp-","")] = times[room] - return output + r = requests.get(ether_path + "_roomtimes") + times = r.json() + return {room.replace("timestamp-", ""): times[room] for room in times} if __name__ == "__main__": From 857f6506b7ae1220646df7af621e944610deec37 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 17:05:56 -0400 Subject: [PATCH 034/135] make doc table in index.py occupy all horizontal space --- css/gitdox.css | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/css/gitdox.css b/css/gitdox.css index 2242871..ff401a1 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -1,5 +1,10 @@ -#doctable{ border: 2px solid black; - border-radius: 4px;font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;background-color:hsla(40,53%,100%,0.30)} +#doctable { + border: 2px solid black; + border-radius: 4px; + font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif; + background-color: hsla(40,53%,100%,0.30); + width: 100%; +} h1, h2{font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;-webkit-font-smoothing: subpixel-antialiased;} @@ -173,4 +178,4 @@ tfoot { } #filter_id{width:30px} -#filter_mode{width:60px} \ No newline at end of file +#filter_mode{width:60px} From ba09218d9d799bd9ff8364022de8cdab2c0957d5 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 17:09:13 -0400 Subject: [PATCH 035/135] make doctable inputs in index.py occupy all available space --- css/gitdox.css | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/css/gitdox.css b/css/gitdox.css index ff401a1..95ab679 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -3,7 +3,11 @@ border-radius: 4px; font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif; background-color: hsla(40,53%,100%,0.30); - width: 100%; + width: 100%; +} + +#doctable input[type=text] { + width: 95%; } h1, h2{font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;-webkit-font-smoothing: subpixel-antialiased;} From 9ecafc27f7eb9bf61e93b3b2f0f2ab844854eb85 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Mon, 1 Oct 2018 01:43:38 -0400 Subject: [PATCH 036/135] implement multiple-column export done, todo: merge adjacent elts with same name and mutually exclusive attrs --- modules/ether.py | 136 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 108 insertions(+), 28 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index a800de7..429dc3a 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -18,6 +18,7 @@ from configobj import ConfigObj from ast import literal_eval import json +import copy import cgi import requests from xml.sax.saxutils import escape @@ -382,29 +383,96 @@ def ether_to_csv(ether_path, name): return r.text -def dedupe_properly_nested_tags(sgml): - is_open = {} +def strip_unique_identifier(tag): + """Given an SGML closing or opening tag, replace anything that looks + like __\d+__ on the end of the tag name, assuming that we were the + ones who added it.""" - output = sgml.split("\n") + try: + tag_name = re.match("^]+)", tag).groups(0)[0] + except AttributeError: + return tag + + orig_tag_name = re.sub("__\d+__$", "", tag_name) + tag = tag.replace("<" + tag_name, "<" + orig_tag_name) + tag = tag.replace("") # unary tags + or line.startswith("', ''] and replaces them with + ['', '']""" + def swap_run(l, start, end): + l[start:end] = l[start:end][::-1] + + run_start = None + for i, line in enumerate(lines): + if line.startswith("") - or line.startswith("]+)>", line).groups(0)[0] - # store line at which we began - is_open[element] = i - else: - element = re.match("<([^ >]+)[ >]", line).groups(0)[0] + # if we've gotten this far, we have an opening tag--store the tag name + open_element = re.match("<([^ >]+)[ >]", line).groups(0)[0] + open_counts = defaultdict(int) + + for j, line2 in enumerate(lines[i:]): + if deunique_should_skip_line(line2): + continue + + if line2.startswith("]+)>", line2).groups(0)[0] + open_counts[element] -= 1 + if element == open_element: + break + else: + element = re.match("<([^ >]+)[ >]", line2).groups(0)[0] + open_counts[element] += 1 + # element is properly nested if no element was opened in the block that + # was not also closed in the block or vice versa + if sum(open_counts.values()) == 0: + output[i] = strip_unique_identifier(output[i]) + output[i+j] = strip_unique_identifier(output[i+j]) + return "\n".join(output) - return sgml def ether_to_sgml(ether, doc_id,config=None): """ @@ -460,29 +528,41 @@ def ether_to_sgml(ether, doc_id,config=None): if cell[1] == 1: # Header row colname = cell[2]['t'].replace("\\c",":") + if colname in config.aliases: + colname = config.aliases[colname] + # if we've already seen a tag of this name, prepare to make it unique - namecount[colname] += 1 - if namecount[colname] > 1: - dupe_suffix = "__" + str(namecount[colname]) + "__" + bare_colname = colname.split("@",1)[0] + namecount[bare_colname] += 1 + if namecount[bare_colname] > 1: + dupe_suffix = "__" + str(namecount[bare_colname]) + "__" else: dupe_suffix = "" - # if an attr is present, be sure to place the dupe suffix before the attr - if colname in config.aliases: - alias = config.aliases[colname] - if "@" in alias: - unique_colname = alias.replace("@", dupe_suffix + "@") + if "@" in colname: + unique_colname = colname.replace("@", dupe_suffix + "@") else: unique_colname = colname + dupe_suffix + colmap[cell[0]] = unique_colname # Make sure that everything that should be exported has some priority - if unique_colname not in config.priorities and config.export_all: + if unique_colname.split("@",1)[0] not in config.priorities and config.export_all: if not unique_colname.lower().startswith("ignore:"): elem = unique_colname.split("@",1)[0] config.priorities.append(elem) - with open('tmp','a') as f: - f.write(repr(colmap) + "\n" + repr(config.priorities) + "\n\n") +# if unique_colname.lower().startswith("ignore:"): +# pass +# elif dupe_suffix == "": +# elem = unique_colname.split("@",1)[0] +# config.priorities.append(elem) +# else: +# # need to put dupe__3__ ahead of dupe__2__, etc. for proper nesting +# elem = unique_colname.split("@",1)[0] +# i = config.priorities.index(bare_colname) +# while i < len(config.priorities) and config.priorities[i].startswith(bare_colname): +# i += 1 +# config.priorities.insert(i, elem) else: col = cell[0] row = cell[1] @@ -621,7 +701,7 @@ def ether_to_sgml(ether, doc_id,config=None): output = re.sub("%%[^%]+%%", "none", output) - output = dedupe_properly_nested_tags(output) + output = deunique_properly_nested_tags(output) return output From 545da3467d910e1f855330ba4af46963b294f22c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 4 Oct 2018 01:34:36 -0400 Subject: [PATCH 037/135] finish multiple col export --- modules/ether.py | 98 ++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 429dc3a..656ee53 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -18,7 +18,7 @@ from configobj import ConfigObj from ast import literal_eval import json -import copy +from copy import copy import cgi import requests from xml.sax.saxutils import escape @@ -433,16 +433,13 @@ def swap_run(l, start, end): return lines -def merge_adjacent_tags(lines): - return lines - def deunique_properly_nested_tags(sgml): """Use a silly n^2 algorithm to detect properly nested tags and strip them of their unique identifiers. Probably an n algorithm to do this.""" lines = sgml.split("\n") lines = reverse_adjacent_closing_tags(lines) - output = copy.copy(lines) + output = copy(lines) for i, line in enumerate(lines): if deunique_should_skip_line(line) or line.startswith(" 1: - dupe_suffix = "__" + str(namecount[bare_colname]) + "__" + namecount[colname] += 1 + if namecount[colname] > 1: + dupe_suffix = "__" + str(namecount[colname]) + "__" else: dupe_suffix = "" @@ -551,18 +549,7 @@ def ether_to_sgml(ether, doc_id,config=None): if not unique_colname.lower().startswith("ignore:"): elem = unique_colname.split("@",1)[0] config.priorities.append(elem) -# if unique_colname.lower().startswith("ignore:"): -# pass -# elif dupe_suffix == "": -# elem = unique_colname.split("@",1)[0] -# config.priorities.append(elem) -# else: -# # need to put dupe__3__ ahead of dupe__2__, etc. for proper nesting -# elem = unique_colname.split("@",1)[0] -# i = config.priorities.index(bare_colname) -# while i < len(config.priorities) and config.priorities[i].startswith(bare_colname): -# i += 1 -# config.priorities.insert(i, elem) + # All other rows else: col = cell[0] row = cell[1] @@ -570,47 +557,54 @@ def ether_to_sgml(ether, doc_id,config=None): col_name = colmap[col] else: raise IOError("Column " + col + " not found in doc_id " + str(doc_id)) + + # If the column specifies an attribute name, use it, otherwise use the element's name again if "@" in col_name: element, attrib = col_name.split("@",1) else: element = col_name attrib = element + # Check to see if the cell has been merged with other cells if 'rowspan' in cell[2]: rowspan = int(cell[2]['rowspan']) else: rowspan = 1 - if "|" in element: # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w' + # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w' + if "|" in element: element, sec_element = element.split("|",1) else: sec_element = "" + # Move on to next cell if this is not a desired column if element not in config.priorities or (element.startswith("ignore:") and config.no_ignore): # Guaranteed to be in priorities if it should be included - continue # Move on to next cell if this is not a desired column - if row != last_row: # New row starting, sort previous lists for opening and closing orders - #close_tags[row].sort(key=lambda x: (-last_open_index[x],x)) + continue + + # New row starting from this cell, sort previous lists for opening and closing orders + if row != last_row: close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) + for element in open_tags[last_row]: open_tag_order[last_row].append(element) - #open_tag_order[last_row].sort(key=lambda x: (open_tag_length[x],x), reverse=True) + open_tag_order[last_row].sort(key=lambda x: (-open_tag_length[x],config.priorities.index(x))) for sec_tuple in sec_element_checklist: prim_found = False - e_prim, e_sec, attr, val, span = sec_tuple - if e_prim in open_tags[last_row] and e_prim in open_tag_length: - if span == open_tag_length[e_prim]: - open_tags[last_row][e_prim].append((attr, val)) - if e_prim not in close_tags[last_row + span]: - close_tags[last_row+span-1].append(e_prim) + prim_elt, sec_elt, attr, val, span = sec_tuple + if prim_elt in open_tags[last_row] and prim_elt in open_tag_length: + if span == open_tag_length[prim_elt]: + open_tags[last_row][prim_elt].append((attr, val)) + if prim_elt not in close_tags[last_row + span]: + close_tags[last_row+span-1].append(prim_elt) prim_found = True if not prim_found: - if e_sec in open_tags[last_row] and e_sec in open_tag_length: - if span == open_tag_length[e_sec]: - open_tags[last_row][e_sec].append((attr, val)) - if e_sec not in close_tags[last_row + span]: - close_tags[last_row + span - 1].append(e_sec) + if sec_elt in open_tags[last_row] and sec_elt in open_tag_length: + if span == open_tag_length[sec_elt]: + open_tags[last_row][sec_elt].append((attr, val)) + if sec_elt not in close_tags[last_row + span]: + close_tags[last_row + span - 1].append(sec_elt) sec_element_checklist = [] # Purge sec_elements last_row = row @@ -653,10 +647,12 @@ def ether_to_sgml(ether, doc_id,config=None): close_row = row + rowspan else: close_row = row + 1 - if element not in close_tags[close_row]: - close_tags[close_row].append(element) + # this introduces too many close tags for elts that have more than one attr. + # We take care of this later with close_tag_debt + close_tags[close_row].append(element) open_tag_length[element] = int(close_row) - int(last_open_index[element]) + # Sort last row tags #close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) if row + 1 in close_tags: @@ -668,22 +664,25 @@ def ether_to_sgml(ether, doc_id,config=None): #output = build_meta_tag(doc_id) template = fill_meta_template(doc_id,config.template) output = "" + close_tag_debt = defaultdict(int) - for r in xrange(2,len(toks)+3): - if r == 1970: - a=4 + for r in xrange(2,len(toks)+5): for element in close_tags[r]: - if element not in config.milestones: - output += '\n' - - if r == len(toks)+2: - break + if element != "" and element not in config.milestones: + if close_tag_debt[element] > 0: + close_tag_debt[element] -= 1 + else: + output += '\n' for element in open_tag_order[r]: tag = '<' + element + attr_count = 0 for attrib, value in open_tags[r][element]: if attrib != "": tag += ' ' + attrib + '="' + value + '"' + attr_count += 1 + if attr_count > 1: + close_tag_debt[element] += 1 if element in config.milestones: tag += '/>\n' else: @@ -701,6 +700,7 @@ def ether_to_sgml(ether, doc_id,config=None): output = re.sub("%%[^%]+%%", "none", output) + # fix tags that look like elt__2__ if it still gives correct sgml output = deunique_properly_nested_tags(output) return output From 736c15c9ddb06aac2345a5eab4387b58323fda9c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 4 Oct 2018 12:02:53 -0400 Subject: [PATCH 038/135] remove redundant 'is True's, etc. --- modules/validate_spreadsheet.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py index a0eb829..1d62845 100755 --- a/modules/validate_spreadsheet.py +++ b/modules/validate_spreadsheet.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- from gitdox_sql import * from ether import get_socialcalc, make_spreadsheet, exec_via_temp, get_timestamps @@ -143,10 +143,10 @@ def validate_doc(doc_id, editor=False): if re.search(rule_doc, doc_name) is None: rule_applies = False - if rule_applies is True: + if rule_applies: rule_report, rule_extra, rule_cells = apply_rule(rule, parsed_ether, meta) cells += rule_cells - if editor is True and len(rule_extra) > 0: + if editor and len(rule_extra) > 0: new_report = """
""" + rule_report[:-5] + """ """ + "" + rule_extra + "" + "
" else: new_report = rule_report @@ -156,10 +156,10 @@ def validate_doc(doc_id, editor=False): elif rule_domain == "meta": meta_report += new_report - if editor == True: + if editor: highlight_cells(cells, ether_url, ether_doc_name) - if editor is True: + if editor: full_report = ether_report + meta_report if len(full_report) == 0: full_report = "Document is valid!" @@ -435,4 +435,4 @@ def validate_doc_xml(doc_id, schema, editor=False): if mode == "ether": print validate_doc(doc_id, editor=True).encode("utf8") elif mode == "xml": - print validate_doc_xml(doc_id, schema, editor=True).encode("utf8") \ No newline at end of file + print validate_doc_xml(doc_id, schema, editor=True).encode("utf8") From a00edd6afdb22ccd367fec4309ff6a12b3a8136d Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 4 Oct 2018 14:35:29 -0400 Subject: [PATCH 039/135] simplify a couple more bools, add repr to cell --- modules/validate_spreadsheet.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py index 1d62845..0886e14 100755 --- a/modules/validate_spreadsheet.py +++ b/modules/validate_spreadsheet.py @@ -17,6 +17,9 @@ def __init__(self, col, row, content, span): self.content = content self.span = span + def __repr__(self): + return "" + def highlight_cells(cells, ether_url, ether_doc_name): old_ether = get_socialcalc(ether_url, ether_doc_name) @@ -146,7 +149,7 @@ def validate_doc(doc_id, editor=False): if rule_applies: rule_report, rule_extra, rule_cells = apply_rule(rule, parsed_ether, meta) cells += rule_cells - if editor and len(rule_extra) > 0: + if editor is True and len(rule_extra) > 0: new_report = """
""" + rule_report[:-5] + """ """ + "" + rule_extra + "" + "
" else: new_report = rule_report @@ -234,9 +237,7 @@ def apply_rule(rule, parsed_ether, meta): return report, extra, cells if domain == "ether": - if operator in ["~", "|", "exists"]: - # find col letter corresponding to col name if name in parsed_ether: col = parsed_ether[name] @@ -386,7 +387,7 @@ def validate_doc_xml(doc_id, schema, editor=False): rule_applies = False if rule_applies is True: rule_report, rule_extra = apply_meta_rule(rule, meta) - if editor is True and len(rule_extra) > 0: + if editor and len(rule_extra) > 0: meta_report += """
""" + rule_report[ :-5] + """ """ + "" + rule_extra + "" + "
" else: From 202b42ad2bf407c2b308af0adc3a1fe2c2ff887c Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Thu, 4 Oct 2018 22:26:33 -0400 Subject: [PATCH 040/135] Revert Python 2.6 incompatible dictionary comprehension --- modules/ether.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/ether.py b/modules/ether.py index 0d8cbc0..134a662 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -629,7 +629,10 @@ def get_socialcalc(ether_path, name): def get_timestamps(ether_path): r = requests.get(ether_path + "_roomtimes") times = r.json() - return {room.replace("timestamp-", ""): times[room] for room in times} + output = {} + for room in times: + output[room.replace("timestamp-", "")] = times[room] + return output if __name__ == "__main__": From 743d574a9d4ada7ab760dc8572edb12a8bfbf4a1 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Thu, 4 Oct 2018 22:27:31 -0400 Subject: [PATCH 041/135] Get absolute path for config.ini and tabs not spaces --- paths.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/paths.py b/paths.py index 0fdf6cd..5967897 100755 --- a/paths.py +++ b/paths.py @@ -7,18 +7,20 @@ else: prefix = "" +gitdox_root = os.path.dirname(os.path.realpath(__file__)) + # to use password authentication, use a netrc file called .netrc in the project root try: - ether_url = ConfigObj(prefix + "users" + os.sep + "config.ini")["ether_url"] + ether_url = ConfigObj(gitdox_root + os.sep + "users" + os.sep + "config.ini")["ether_url"] except KeyError: - ether_url = "" + ether_url = "" if not ether_url.endswith(os.sep): - ether_url += os.sep + ether_url += os.sep def get_menu(): config = ConfigObj(prefix + "users" + os.sep + "config.ini") - if "banner" not in config: + if "banner" not in config: return "" banner = config["banner"] From f9841370759ff72fcf7b70d74f2e0bc756c1b945 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 7 Oct 2018 18:51:44 -0400 Subject: [PATCH 042/135] use requests in get_timestamps --- modules/ether.py | 124 +++++++++++++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 46 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 134a662..73f2a1f 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -19,13 +19,10 @@ from ast import literal_eval import json import cgi -import requests from xml.sax.saxutils import escape - __version__ = "2.0.0" - class ExportConfig: def __init__(self, **kwargs): @@ -220,12 +217,27 @@ def flush_open(annos, row_num, colmap): def flush_close(closing_element, last_value, last_start, row_num, colmap, aliases): flushed = "" - for alias in aliases[closing_element]: - if last_start[alias] < row_num - 1: - span_string = ":rowspan:" + str(row_num - last_start[alias]) + for alias in aliases[closing_element][-1]: + stack_len = len(last_start[alias]) + + with open('tmp','a') as f: + f.write("\nFound element " + closing_element + " at " + str(row_num) + "\n") + f.write(repr(last_value) + "\n") + f.write(repr(last_start) + "\n") + f.write(alias + "\n") + f.write(repr(aliases[closing_element]) + "\n") + f.write(repr(last_start[alias]) + "\n") + f.flush() + + if stack_len > 0 and last_start[alias][-1] < row_num - 1: + span_string = ":rowspan:" + str(row_num - last_start[alias][-1]) else: span_string = "" - flushed += "cell:" + colmap[alias] + str(last_start[alias]) + ":t:" + last_value[alias]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + + flushed += "cell:" + colmap[alias][stack_len - 1] + str(last_start[alias][-1]) + ":t:" + last_value[alias][-1]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + last_value[alias].pop() + last_start[alias].pop() + aliases[closing_element].pop() return flushed @@ -238,14 +250,23 @@ def number_to_letter(number): def sgml_to_ether(sgml, ignore_elements=False): - sgml = sgml.replace("\r","") - current_row = 2 open_annos = defaultdict(list) + + # a mapping from a tag name to a list of values. the list is a stack + # where the most recently encountered opening tag's value/start row + # is kept on the right side of the list. whenever we see a closing tag + # we pop from the stack, and whenever we see an opening tag we push + # (append) to the stack + last_value = defaultdict(list) + last_start = defaultdict(list) + + # maps from tags to a similar stack data structure where the top of the stack + # (i.e. the right side of the list) contains all the annotations that were + # present on the most recently opened nested element aliases = defaultdict(list) - last_value = {} - last_start = {} + + # values in this dict are also lists which follow the pattern described above colmap = OrderedDict() - maxcol = 1 preamble = """socialcalc:version:1.0 MIME-Version: 1.0 @@ -265,45 +286,54 @@ def sgml_to_ether(sgml, ignore_elements=False): """ + sgml = sgml.replace("\r","") + output = "" + maxcol = 1 + current_row = 2 for line in sgml.strip().split("\n"): line = line.strip() + # SocialCalc uses colons internally, \\c used to repr colon in data line = line.replace(":","\\c") + if line.startswith(""): # Skip unary tags and XML instructions - pass + continue elif line.startswith("]+)>",line) element = my_match.groups(0)[0] - output+=flush_close(element, last_value, last_start, current_row, colmap, aliases) + output += flush_close(element, last_value, last_start, current_row, colmap, aliases) elif line.startswith("<"): # Opening tag my_match = re.match("<([^ >]+)[ >]",line) element = my_match.groups(0)[0] - aliases[element] = [] # Reset element aliases to see which attributes this instance has + aliases[element].append([]) # Add new set of aliases to see which attributes this instance has if "=" not in line: line = "<" + element + " " + element + '="' + element + '">' - my_match = re.findall('([^" =]+)="([^"]+)"',line) + attrs = re.findall('([^" =]+)="([^"]+)"',line) anno_name = "" anno_value = "" - for match in my_match: - if element != match[0] and ignore_elements is False: - anno_name = element + "_" + match[0] + for attr in attrs: + if element != attr[0] and ignore_elements is False: + anno_name = element + "_" + attr[0] else: - anno_name = match[0] - anno_value = match[1] + anno_name = attr[0] + anno_value = attr[1] open_annos[current_row].append((anno_name,anno_value)) - last_value[anno_name] = anno_value - last_start[anno_name] = current_row - if element not in aliases: - aliases[element] = [anno_name] - elif anno_name not in aliases[element]: - aliases[element].append(anno_name) + last_value[anno_name].append(anno_value) + last_start[anno_name].append(current_row) + if anno_name not in aliases[element][-1]: + aliases[element][-1].append(anno_name) + if anno_name not in colmap: - maxcol +=1 - colmap[anno_name] = number_to_letter(maxcol) + maxcol += 1 + colmap[anno_name] = [number_to_letter(maxcol)] + elif anno_name in colmap and \ + len(last_start[anno_name]) > len(colmap[anno_name]): + maxcol += 1 + colmap[anno_name].append(number_to_letter(maxcol)) elif len(line) > 0: # Token token = line.strip() @@ -315,7 +345,8 @@ def sgml_to_ether(sgml, ignore_elements=False): preamble += "cell:A1:t:tok:f:2\n" # f <> tvf for links output = preamble + output for header in colmap: - output += "cell:"+colmap[header]+"1:t:"+header+":f:2\n" # NO f <> tvf for links + for entry in colmap[header]: + output += "cell:"+entry+"1:t:"+header+":f:2\n" # NO f <> tvf for links output += "\nsheet:c:" + str(maxcol) + ":r:" + str(current_row-1) + ":tvf:1\n" @@ -342,12 +373,10 @@ def sgml_to_ether(sgml, ignore_elements=False): def ether_to_csv(ether_path, name): - try: - r = requests.get(ether_path + "_/" + name + "/csv/") - except: - return "" - - return r.text + command = "curl --netrc -X GET " + ether_path + "_/" + name + "/csv/" + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + (stdout, stderr) = proc.communicate() + return stdout.decode("utf8") def ether_to_sgml(ether, doc_id,config=None): @@ -609,21 +638,24 @@ def delete_spreadsheet(ether_url, name): :param name: name of the spreadsheet (last part of URL) :return: void """ - try: - r = requests.delete(ether_url + "_/" + name) - except: - pass + + ether_command = "curl --netrc -X DELETE " + ether_url + "_/" + name + del_proc = subprocess.Popen(ether_command,shell=True) + + (stdout, stderr) = del_proc.communicate() + + return stdout, stderr + def sheet_exists(ether_path, name): return len(get_socialcalc(ether_path,name)) > 0 def get_socialcalc(ether_path, name): - try: - r = requests.get(ether_path + '_/' + name) - except: - return "" - return r.text + command = "curl --netrc -X GET " + ether_path + "_/" + name + proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + (stdout, stderr) = proc.communicate() + return stdout.decode("utf8") def get_timestamps(ether_path): From 29656a9fdae8322fce3eaac230fbaa14d2dcc9c0 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 18:44:23 -0400 Subject: [PATCH 043/135] remove debug code --- modules/ether.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 73f2a1f..9c95d47 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -220,21 +220,14 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase for alias in aliases[closing_element][-1]: stack_len = len(last_start[alias]) - with open('tmp','a') as f: - f.write("\nFound element " + closing_element + " at " + str(row_num) + "\n") - f.write(repr(last_value) + "\n") - f.write(repr(last_start) + "\n") - f.write(alias + "\n") - f.write(repr(aliases[closing_element]) + "\n") - f.write(repr(last_start[alias]) + "\n") - f.flush() - if stack_len > 0 and last_start[alias][-1] < row_num - 1: span_string = ":rowspan:" + str(row_num - last_start[alias][-1]) else: span_string = "" flushed += "cell:" + colmap[alias][stack_len - 1] + str(last_start[alias][-1]) + ":t:" + last_value[alias][-1]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + + # pop the stack since we've closed a tag last_value[alias].pop() last_start[alias].pop() aliases[closing_element].pop() From 32060d9f08e47d649923f2b00be19b50b9670795 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 19:47:26 -0400 Subject: [PATCH 044/135] split flushed line into multiple lines --- modules/ether.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/ether.py b/modules/ether.py index 9c95d47..cbaae13 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -225,7 +225,12 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase else: span_string = "" - flushed += "cell:" + colmap[alias][stack_len - 1] + str(last_start[alias][-1]) + ":t:" + last_value[alias][-1]+":f:1:tvf:1"+span_string + "\n" # Use t for tvf to leave links on + # Use t for tvf to leave links on + flushed += "cell:" \ + + colmap[alias][stack_len - 1] \ + + str(last_start[alias][-1]) \ + + ":t:" + last_value[alias][-1] \ + + ":f:1:tvf:1" + span_string + "\n" # pop the stack since we've closed a tag last_value[alias].pop() From 97b8d6ce884c673e420418e8c28371d7cc50f3f1 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 19:48:43 -0400 Subject: [PATCH 045/135] Replace line continuations with parens) --- modules/ether.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index cbaae13..4846aa5 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -226,11 +226,11 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase span_string = "" # Use t for tvf to leave links on - flushed += "cell:" \ - + colmap[alias][stack_len - 1] \ - + str(last_start[alias][-1]) \ - + ":t:" + last_value[alias][-1] \ - + ":f:1:tvf:1" + span_string + "\n" + flushed += ("cell:" + + colmap[alias][stack_len - 1] + + str(last_start[alias][-1]) + + ":t:" + last_value[alias][-1] + + ":f:1:tvf:1" + span_string + "\n") # pop the stack since we've closed a tag last_value[alias].pop() From 656ccdcd6b8cf71ee8f2ce0148df32688e558c0e Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 29 Sep 2018 20:33:56 -0400 Subject: [PATCH 046/135] fix premature popping of alias stack --- modules/ether.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/ether.py b/modules/ether.py index 4846aa5..53afce1 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -217,6 +217,7 @@ def flush_open(annos, row_num, colmap): def flush_close(closing_element, last_value, last_start, row_num, colmap, aliases): flushed = "" + for alias in aliases[closing_element][-1]: stack_len = len(last_start[alias]) @@ -235,7 +236,8 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase # pop the stack since we've closed a tag last_value[alias].pop() last_start[alias].pop() - aliases[closing_element].pop() + + aliases[closing_element].pop() return flushed From 68d37f2fa4263e3f7026daf8fee904dc1ffda4d5 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 16:21:10 -0400 Subject: [PATCH 047/135] make exported sgml tags unique, begin writing de-uniqueing function --- modules/ether.py | 60 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 53afce1..18349e2 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -378,6 +378,29 @@ def ether_to_csv(ether_path, name): (stdout, stderr) = proc.communicate() return stdout.decode("utf8") +def dedupe_properly_nested_tags(sgml): + is_open = {} + + output = sgml.split("\n") + + for i, line in enumerate(sgml.split("\n")): + if (not line.startswith("<") + or line.startswith("") + or line.startswith("]+)>", line).groups(0)[0] + # store line at which we began + is_open[element] = i + else: + element = re.match("<([^ >]+)[ >]", line).groups(0)[0] + + + + return sgml def ether_to_sgml(ether, doc_id,config=None): """ @@ -392,13 +415,15 @@ def ether_to_sgml(ether, doc_id,config=None): else: config = ExportConfig(config=config) + # mapping from col header (meaningful string) to the col letter colmap = {} + # list of 3-tuples of parsed cells: (col, row, contents) cells = [] if isinstance(ether,unicode): ether = ether.encode("utf8") - + # parse cell contents into cells for line in ether.splitlines(): parsed_cell = re.match(r'cell:([A-Z]+)(\d+):(.*)$', line) if parsed_cell is not None: @@ -423,22 +448,37 @@ def ether_to_sgml(ether, doc_id,config=None): sec_element_checklist = [] row = 1 + # added to support duplicate columns + namecount = defaultdict(int) + close_tags = defaultdict(list) for cell in cells: if cell[1] == 1: # Header row colname = cell[2]['t'].replace("\\c",":") + + # if we've already seen a tag of this name, prepare to make it unique + namecount[colname] += 1 + if namecount[colname] > 1: + dupe_suffix = "__" + str(namecount[colname]) + "__" + else: + dupe_suffix = "" + + # if an attr is present, be sure to place the dupe suffix before the attr if colname in config.aliases: - colmap[cell[0]] = config.aliases[colname] + alias = config.aliases[colname] + if "@" in alias: + unique_colname = alias.replace("@", dupe_suffix + "@") else: - colmap[cell[0]] = colname + unique_colname = colname + dupe_suffix + colmap[cell[0]] = unique_colname + # Make sure that everything that should be exported has some priority - if colname not in config.priorities and config.export_all: - if not colname.lower().startswith("ignore:"): # Never export columns prefixed with "ignore:" - if "@" in colname: - elem = colname.split("@",1)[0] - else: - elem = colname + if unique_colname not in config.priorities and config.export_all: + if not unique_colname.lower().startswith("ignore:"): + elem = unique_colname.split("@",1)[0] config.priorities.append(elem) + with open('tmp','a') as f: + f.write(repr(colmap) + "\n" + repr(config.priorities) + "\n\n") else: col = cell[0] row = cell[1] @@ -577,6 +617,8 @@ def ether_to_sgml(ether, doc_id,config=None): output = re.sub("%%[^%]+%%", "none", output) + output = dedupe_properly_nested_tags(output) + return output From 368cd0db12ed7ff27262ac543db6dbf98f6413a8 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 16:51:16 -0400 Subject: [PATCH 048/135] redo curl->requests changes --- modules/ether.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 18349e2..aecf889 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -19,6 +19,7 @@ from ast import literal_eval import json import cgi +import requests from xml.sax.saxutils import escape __version__ = "2.0.0" @@ -373,10 +374,13 @@ def sgml_to_ether(sgml, ignore_elements=False): def ether_to_csv(ether_path, name): - command = "curl --netrc -X GET " + ether_path + "_/" + name + "/csv/" - proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - (stdout, stderr) = proc.communicate() - return stdout.decode("utf8") + try: + r = requests.get(ether_path + "_/" + name + "/csv/") + except: + return "" + + return r.text + def dedupe_properly_nested_tags(sgml): is_open = {} @@ -680,13 +684,10 @@ def delete_spreadsheet(ether_url, name): :param name: name of the spreadsheet (last part of URL) :return: void """ - - ether_command = "curl --netrc -X DELETE " + ether_url + "_/" + name - del_proc = subprocess.Popen(ether_command,shell=True) - - (stdout, stderr) = del_proc.communicate() - - return stdout, stderr + try: + r = requests.delete(ether_url + "_/" + name) + except: + pass def sheet_exists(ether_path, name): @@ -700,6 +701,7 @@ def get_socialcalc(ether_path, name): return stdout.decode("utf8") + def get_timestamps(ether_path): r = requests.get(ether_path + "_roomtimes") times = r.json() From 5ddc7778e389541b1e49e297f115d3a57f379ef5 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 17:05:56 -0400 Subject: [PATCH 049/135] make doc table in index.py occupy all horizontal space --- css/gitdox.css | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/css/gitdox.css b/css/gitdox.css index 2242871..ff401a1 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -1,5 +1,10 @@ -#doctable{ border: 2px solid black; - border-radius: 4px;font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;background-color:hsla(40,53%,100%,0.30)} +#doctable { + border: 2px solid black; + border-radius: 4px; + font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif; + background-color: hsla(40,53%,100%,0.30); + width: 100%; +} h1, h2{font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;-webkit-font-smoothing: subpixel-antialiased;} @@ -173,4 +178,4 @@ tfoot { } #filter_id{width:30px} -#filter_mode{width:60px} \ No newline at end of file +#filter_mode{width:60px} From aef70290a848564907fdc5a0b9e42c911f14e342 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 30 Sep 2018 17:09:13 -0400 Subject: [PATCH 050/135] make doctable inputs in index.py occupy all available space --- css/gitdox.css | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/css/gitdox.css b/css/gitdox.css index ff401a1..95ab679 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -3,7 +3,11 @@ border-radius: 4px; font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif; background-color: hsla(40,53%,100%,0.30); - width: 100%; + width: 100%; +} + +#doctable input[type=text] { + width: 95%; } h1, h2{font-family: asul, "Lucida Grande", "Lucida Sans Unicode", "Lucida Sans", "DejaVu Sans", Verdana, sans-serif;-webkit-font-smoothing: subpixel-antialiased;} From a35f63aca9b036e1fcf6cb10242aabaadd2fd98d Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Mon, 1 Oct 2018 01:43:38 -0400 Subject: [PATCH 051/135] implement multiple-column export done, todo: merge adjacent elts with same name and mutually exclusive attrs --- modules/ether.py | 136 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 108 insertions(+), 28 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index aecf889..b504aa0 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -18,6 +18,7 @@ from configobj import ConfigObj from ast import literal_eval import json +import copy import cgi import requests from xml.sax.saxutils import escape @@ -382,29 +383,96 @@ def ether_to_csv(ether_path, name): return r.text -def dedupe_properly_nested_tags(sgml): - is_open = {} +def strip_unique_identifier(tag): + """Given an SGML closing or opening tag, replace anything that looks + like __\d+__ on the end of the tag name, assuming that we were the + ones who added it.""" - output = sgml.split("\n") + try: + tag_name = re.match("^]+)", tag).groups(0)[0] + except AttributeError: + return tag + + orig_tag_name = re.sub("__\d+__$", "", tag_name) + tag = tag.replace("<" + tag_name, "<" + orig_tag_name) + tag = tag.replace("") # unary tags + or line.startswith("', ''] and replaces them with + ['', '']""" + def swap_run(l, start, end): + l[start:end] = l[start:end][::-1] + + run_start = None + for i, line in enumerate(lines): + if line.startswith("") - or line.startswith("]+)>", line).groups(0)[0] - # store line at which we began - is_open[element] = i - else: - element = re.match("<([^ >]+)[ >]", line).groups(0)[0] + # if we've gotten this far, we have an opening tag--store the tag name + open_element = re.match("<([^ >]+)[ >]", line).groups(0)[0] + open_counts = defaultdict(int) + + for j, line2 in enumerate(lines[i:]): + if deunique_should_skip_line(line2): + continue + + if line2.startswith("]+)>", line2).groups(0)[0] + open_counts[element] -= 1 + if element == open_element: + break + else: + element = re.match("<([^ >]+)[ >]", line2).groups(0)[0] + open_counts[element] += 1 + # element is properly nested if no element was opened in the block that + # was not also closed in the block or vice versa + if sum(open_counts.values()) == 0: + output[i] = strip_unique_identifier(output[i]) + output[i+j] = strip_unique_identifier(output[i+j]) + return "\n".join(output) - return sgml def ether_to_sgml(ether, doc_id,config=None): """ @@ -460,29 +528,41 @@ def ether_to_sgml(ether, doc_id,config=None): if cell[1] == 1: # Header row colname = cell[2]['t'].replace("\\c",":") + if colname in config.aliases: + colname = config.aliases[colname] + # if we've already seen a tag of this name, prepare to make it unique - namecount[colname] += 1 - if namecount[colname] > 1: - dupe_suffix = "__" + str(namecount[colname]) + "__" + bare_colname = colname.split("@",1)[0] + namecount[bare_colname] += 1 + if namecount[bare_colname] > 1: + dupe_suffix = "__" + str(namecount[bare_colname]) + "__" else: dupe_suffix = "" - # if an attr is present, be sure to place the dupe suffix before the attr - if colname in config.aliases: - alias = config.aliases[colname] - if "@" in alias: - unique_colname = alias.replace("@", dupe_suffix + "@") + if "@" in colname: + unique_colname = colname.replace("@", dupe_suffix + "@") else: unique_colname = colname + dupe_suffix + colmap[cell[0]] = unique_colname # Make sure that everything that should be exported has some priority - if unique_colname not in config.priorities and config.export_all: + if unique_colname.split("@",1)[0] not in config.priorities and config.export_all: if not unique_colname.lower().startswith("ignore:"): elem = unique_colname.split("@",1)[0] config.priorities.append(elem) - with open('tmp','a') as f: - f.write(repr(colmap) + "\n" + repr(config.priorities) + "\n\n") +# if unique_colname.lower().startswith("ignore:"): +# pass +# elif dupe_suffix == "": +# elem = unique_colname.split("@",1)[0] +# config.priorities.append(elem) +# else: +# # need to put dupe__3__ ahead of dupe__2__, etc. for proper nesting +# elem = unique_colname.split("@",1)[0] +# i = config.priorities.index(bare_colname) +# while i < len(config.priorities) and config.priorities[i].startswith(bare_colname): +# i += 1 +# config.priorities.insert(i, elem) else: col = cell[0] row = cell[1] @@ -621,7 +701,7 @@ def ether_to_sgml(ether, doc_id,config=None): output = re.sub("%%[^%]+%%", "none", output) - output = dedupe_properly_nested_tags(output) + output = deunique_properly_nested_tags(output) return output From 2602b7cf823f64e1b329df5443c86be513b4190b Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 4 Oct 2018 01:34:36 -0400 Subject: [PATCH 052/135] finish multiple col export --- modules/ether.py | 98 ++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index b504aa0..a0d1edb 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -18,7 +18,7 @@ from configobj import ConfigObj from ast import literal_eval import json -import copy +from copy import copy import cgi import requests from xml.sax.saxutils import escape @@ -433,16 +433,13 @@ def swap_run(l, start, end): return lines -def merge_adjacent_tags(lines): - return lines - def deunique_properly_nested_tags(sgml): """Use a silly n^2 algorithm to detect properly nested tags and strip them of their unique identifiers. Probably an n algorithm to do this.""" lines = sgml.split("\n") lines = reverse_adjacent_closing_tags(lines) - output = copy.copy(lines) + output = copy(lines) for i, line in enumerate(lines): if deunique_should_skip_line(line) or line.startswith(" 1: - dupe_suffix = "__" + str(namecount[bare_colname]) + "__" + namecount[colname] += 1 + if namecount[colname] > 1: + dupe_suffix = "__" + str(namecount[colname]) + "__" else: dupe_suffix = "" @@ -551,18 +549,7 @@ def ether_to_sgml(ether, doc_id,config=None): if not unique_colname.lower().startswith("ignore:"): elem = unique_colname.split("@",1)[0] config.priorities.append(elem) -# if unique_colname.lower().startswith("ignore:"): -# pass -# elif dupe_suffix == "": -# elem = unique_colname.split("@",1)[0] -# config.priorities.append(elem) -# else: -# # need to put dupe__3__ ahead of dupe__2__, etc. for proper nesting -# elem = unique_colname.split("@",1)[0] -# i = config.priorities.index(bare_colname) -# while i < len(config.priorities) and config.priorities[i].startswith(bare_colname): -# i += 1 -# config.priorities.insert(i, elem) + # All other rows else: col = cell[0] row = cell[1] @@ -570,47 +557,54 @@ def ether_to_sgml(ether, doc_id,config=None): col_name = colmap[col] else: raise IOError("Column " + col + " not found in doc_id " + str(doc_id)) + + # If the column specifies an attribute name, use it, otherwise use the element's name again if "@" in col_name: element, attrib = col_name.split("@",1) else: element = col_name attrib = element + # Check to see if the cell has been merged with other cells if 'rowspan' in cell[2]: rowspan = int(cell[2]['rowspan']) else: rowspan = 1 - if "|" in element: # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w' + # Check for flexible element, e.g. m|w@x means 'prefer to attach x to m, else to w' + if "|" in element: element, sec_element = element.split("|",1) else: sec_element = "" + # Move on to next cell if this is not a desired column if element not in config.priorities or (element.startswith("ignore:") and config.no_ignore): # Guaranteed to be in priorities if it should be included - continue # Move on to next cell if this is not a desired column - if row != last_row: # New row starting, sort previous lists for opening and closing orders - #close_tags[row].sort(key=lambda x: (-last_open_index[x],x)) + continue + + # New row starting from this cell, sort previous lists for opening and closing orders + if row != last_row: close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) + for element in open_tags[last_row]: open_tag_order[last_row].append(element) - #open_tag_order[last_row].sort(key=lambda x: (open_tag_length[x],x), reverse=True) + open_tag_order[last_row].sort(key=lambda x: (-open_tag_length[x],config.priorities.index(x))) for sec_tuple in sec_element_checklist: prim_found = False - e_prim, e_sec, attr, val, span = sec_tuple - if e_prim in open_tags[last_row] and e_prim in open_tag_length: - if span == open_tag_length[e_prim]: - open_tags[last_row][e_prim].append((attr, val)) - if e_prim not in close_tags[last_row + span]: - close_tags[last_row+span-1].append(e_prim) + prim_elt, sec_elt, attr, val, span = sec_tuple + if prim_elt in open_tags[last_row] and prim_elt in open_tag_length: + if span == open_tag_length[prim_elt]: + open_tags[last_row][prim_elt].append((attr, val)) + if prim_elt not in close_tags[last_row + span]: + close_tags[last_row+span-1].append(prim_elt) prim_found = True if not prim_found: - if e_sec in open_tags[last_row] and e_sec in open_tag_length: - if span == open_tag_length[e_sec]: - open_tags[last_row][e_sec].append((attr, val)) - if e_sec not in close_tags[last_row + span]: - close_tags[last_row + span - 1].append(e_sec) + if sec_elt in open_tags[last_row] and sec_elt in open_tag_length: + if span == open_tag_length[sec_elt]: + open_tags[last_row][sec_elt].append((attr, val)) + if sec_elt not in close_tags[last_row + span]: + close_tags[last_row + span - 1].append(sec_elt) sec_element_checklist = [] # Purge sec_elements last_row = row @@ -653,10 +647,12 @@ def ether_to_sgml(ether, doc_id,config=None): close_row = row + rowspan else: close_row = row + 1 - if element not in close_tags[close_row]: - close_tags[close_row].append(element) + # this introduces too many close tags for elts that have more than one attr. + # We take care of this later with close_tag_debt + close_tags[close_row].append(element) open_tag_length[element] = int(close_row) - int(last_open_index[element]) + # Sort last row tags #close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) if row + 1 in close_tags: @@ -668,22 +664,25 @@ def ether_to_sgml(ether, doc_id,config=None): #output = build_meta_tag(doc_id) template = fill_meta_template(doc_id,config.template) output = "" + close_tag_debt = defaultdict(int) - for r in xrange(2,len(toks)+3): - if r == 1970: - a=4 + for r in xrange(2,len(toks)+5): for element in close_tags[r]: - if element not in config.milestones: - output += '\n' - - if r == len(toks)+2: - break + if element != "" and element not in config.milestones: + if close_tag_debt[element] > 0: + close_tag_debt[element] -= 1 + else: + output += '\n' for element in open_tag_order[r]: tag = '<' + element + attr_count = 0 for attrib, value in open_tags[r][element]: if attrib != "": tag += ' ' + attrib + '="' + value + '"' + attr_count += 1 + if attr_count > 1: + close_tag_debt[element] += 1 if element in config.milestones: tag += '/>\n' else: @@ -701,6 +700,7 @@ def ether_to_sgml(ether, doc_id,config=None): output = re.sub("%%[^%]+%%", "none", output) + # fix tags that look like elt__2__ if it still gives correct sgml output = deunique_properly_nested_tags(output) return output From 1c4133d07e72b80124fb8ad44026967825146c44 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 7 Oct 2018 20:48:24 -0400 Subject: [PATCH 053/135] fix tag swapping bug --- modules/ether.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ether.py b/modules/ether.py index a0d1edb..675a486 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -420,6 +420,7 @@ def swap_run(l, start, end): deuniqued_tag = strip_unique_identifier(line) if deuniqued_tag != lines[run_start]: swap_run(lines, run_start, i) + run_start = None else: run_start = i elif run_start is not None: From 0a0463f55621c584d685b6564830de37d56ec709 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 7 Oct 2018 21:09:07 -0400 Subject: [PATCH 054/135] don't add 'missing' os.sep to empty url --- paths.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paths.py b/paths.py index 5967897..de29905 100755 --- a/paths.py +++ b/paths.py @@ -12,10 +12,10 @@ # to use password authentication, use a netrc file called .netrc in the project root try: ether_url = ConfigObj(gitdox_root + os.sep + "users" + os.sep + "config.ini")["ether_url"] + if not ether_url.endswith(os.sep): + ether_url += os.sep except KeyError: ether_url = "" -if not ether_url.endswith(os.sep): - ether_url += os.sep def get_menu(): config = ConfigObj(prefix + "users" + os.sep + "config.ini") From d919aaf6e8d5c8c0af9945510419baaf7dc0797d Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 02:11:59 -0400 Subject: [PATCH 055/135] rewrite apply_rule to support duplicate columns --- modules/validate_spreadsheet.py | 212 +++++++++++++++++++------------- 1 file changed, 127 insertions(+), 85 deletions(-) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py index 0886e14..89602be 100755 --- a/modules/validate_spreadsheet.py +++ b/modules/validate_spreadsheet.py @@ -7,7 +7,11 @@ import re import cgi import json +from pprint import pformat +def logln(s,fname='tmp'): + with open(fname,'a') as f: + f.write(s + "\n") class Cell: def __init__(self, col, row, content, span): @@ -18,7 +22,7 @@ def __init__(self, col, row, content, span): self.span = span def __repr__(self): - return "" + return "" def highlight_cells(cells, ether_url, ether_doc_name): @@ -146,6 +150,8 @@ def validate_doc(doc_id, editor=False): if re.search(rule_doc, doc_name) is None: rule_applies = False + logln(repr(rule)) + logln("Rule applies: " + str(rule_applies)) if rule_applies: rule_report, rule_extra, rule_cells = apply_rule(rule, parsed_ether, meta) cells += rule_cells @@ -183,8 +189,8 @@ def parse_ether(ether, doc, corpus): # find col letter corresponding to col name parsed = defaultdict(list) + colmap = defaultdict(list) rev_colmap = {} - colmap = {} all_cells = [] for line in ether_lines: if line.startswith("cell:"): # Cell row @@ -202,22 +208,19 @@ def parse_ether(ether, doc, corpus): # else: # cell_col += c cell_content = parts[3].replace("\\c",":") - if "rowspan:" in line: - cell_span = parts[-1] - else: - cell_span = "1" - if cell_row == "1": # Header row - colmap[cell_content] = cell_col + cell_span = parts[-1] if "rowspan:" in line else "1" + + # record col name + if cell_row == "1": + colmap[cell_content].append(cell_col) rev_colmap[cell_col] = cell_content - all_cells.append(Cell(cell_col,cell_row,cell_content,cell_span)) - for cell in all_cells: - try: - cell.header = rev_colmap[cell.col] - except KeyError: - raise KeyError("KeyError: " + cell.col + "; Document: " + corpus + " :: " + doc + "") + cell = Cell(cell_col, cell_row, cell_content, cell_span) + parsed[cell_col].append(cell) + all_cells.append(cell) - parsed[cell.header].append(cell) + for cell in all_cells: + cell.header = rev_colmap[cell.col] parsed["__colmap__"] = colmap # Save colmap for apply_rule return parsed @@ -233,91 +236,130 @@ def apply_rule(rule, parsed_ether, meta): extra = '' cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + if name is None: return report, extra, cells + logln(pformat(dict(parsed_ether))) + + # list of letters with col name + col_letters = colmap[name] + if domain == "ether": - if operator in ["~", "|", "exists"]: - # find col letter corresponding to col name - if name in parsed_ether: - col = parsed_ether[name] - else: - if operator in ["|","exists"]: - report += "Column named " + name + " not found
" + # check to see if column exists + if operator == "exists": + if len(col_letters) == 0: + report += "Column named " + name + " not found
" return report, extra, cells - for cell in col: - if cell.row != "1": - if operator == "|": # rowspan - if argument == "1": - if cell.span != "1": - report += "Cell " + cell.col + cell.row + ": row span is not 1
" - cells.append(cell.col + cell.row) - else: - if cell.span != "" and cell.span is not None: - report += "Cell " + cell.col + cell.row + ": row span is not " + argument + "
" - cells.append(cell.col + cell.row) - - elif operator == "~": # regex - match = re.search(argument, cell.content) - if match is None: - report += "Cell " + cell.col + cell.row + ": content does not match pattern
" - extra += "Cell " + cell.col + cell.row + ":
" + "Content: " + cell.content + "
" + "Pattern: " + argument + "
" - cells.append(cell.col + cell.row) + # check to see that all cells are of a certain row span + elif operator == "|": + # do any exist? + if len(col_letters) == 0: + report += "Column named " + name + " not found
" + return report, extra, cells - elif operator in ["=", ">","=="]: # care about two cols: name and argument + for letter in col_letters: + for cell in parsed_ether[letter]: + if cell.row == "1": + continue - # find col letters corresponding to col names - name_letter = parsed_ether["__colmap__"][name] if name in parsed_ether["__colmap__"] else None - arg_letter = parsed_ether["__colmap__"][argument] if argument in parsed_ether["__colmap__"] else None - if name_letter is None: + if argument == "1": + if cell.span != "1": + report += "Cell " + cell.col + cell.row + ": row span is not 1
" + cells.append(cell.col + cell.row) + else: + if cell.span != "" and cell.span is not None: + report += "Cell " + cell.col + cell.row + ": row span is not " + argument + "
" + cells.append(cell.col + cell.row) + + elif operator == "~": + for letter in col_letters: + for cell in parsed_ether[letter]: + if cell.row == "1": + continue + match = re.search(argument, cell.content) + if match is None: + report += "Cell " + cell.col + cell.row + ": content does not match pattern
" + extra += "Cell " + cell.col + cell.row + ":
" + "Content: " + cell.content + "
" + "Pattern: " + argument + "
" + cells.append(cell.col + cell.row) + + elif operator in ["=", ">", "=="]: + name_letters = colmap[name] + arg_letters = colmap[argument] + + if len(name_letters) == 0: if operator != "==": report += "Column named " + name + " not found
" return report, extra, cells - if arg_letter is None: + if len(arg_letters) == 0: if operator != "==": report += "Column named " + argument + " not found
" return report, extra, cells - name_boundaries = [] - arg_boundaries = [] - name_content = {} - arg_content = {} - name_filled = [] - arg_filled = [] - - # find boundary rows - for cell in parsed_ether[name]: - name_boundaries.append(cell.row) - name_content[cell.row] = cell.content - for i in range(int(cell.row), int(cell.row) + int(cell.span)): - name_filled.append(str(i)) - for cell in parsed_ether[argument]: - arg_boundaries.append(cell.row) - arg_content[cell.row] = cell.content - for i in range(int(cell.row), int(cell.row) + int(cell.span)): - arg_filled.append(str(i)) - - if operator == "==": - for row in name_content: - if row in arg_content: - if arg_content[row] != name_content[row]: - cells.append(arg_letter + row) - for boundary in arg_boundaries: - if boundary not in name_boundaries: - cells.append(arg_letter + boundary) - else: - for boundary in name_boundaries: - if boundary not in arg_boundaries: - if boundary in arg_filled: - report += "Span break on line " + boundary + " in column " + name + " but not " \ - + argument + "
" - cells.append(name_letter + boundary) - if operator == "=": - for boundary in arg_boundaries: - if boundary not in name_boundaries: - if boundary in name_filled: - cells.append(arg_letter + boundary) + name_tuples = defaultdict(list) + arg_tuples = defaultdict(list) + start_rows = defaultdict(list) + all_rows = [] + + for letter in name_letters: + for cell in parsed_ether[letter]: + start_rows[letter].append(cell.row) + for i in range(int(cell.span) or 1): + row = str(int(cell.row) + i) + name_tuples[row].append((letter, cell.content)) + all_rows.append(row) + + for letter in arg_letters: + for cell in parsed_ether[letter]: + start_rows[letter].append(cell.row) + for i in range(int(cell.span) or 1): + row = str(int(cell.row) + i) + arg_tuples[row].append((letter, cell.content)) + if row not in all_rows: + all_rows.append(row) + + import cgitb; cgitb.enable() + for row in all_rows: + # check to see if all cells in rhs are contained within cells on lhs + if operator == ">": + if row in arg_tuples and row not in name_tuples: + for letter, _ in arg_tuples[row]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " must appear in the span of a cell in one of these columns: " + + ", ".join(name_letters) + "
") + + # operator in ["=", "=="], i.e. span equivalence and span and content equivalence + else: + name_len = len(name_tuples[row]) + arg_len = len(arg_tuples[row]) + + if name_len > arg_len: + for letter, _ in name_tuples[row][arg_len:]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " lacks a corresponding value in one of these columns: " + + ", ".join(arg_letters) + "
") + elif arg_len < name_len: + for letter, _ in arg_tuples[row][name_len:]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " lacks a corresponding value in one of these columns: " + + ", ".join(name_letters) + "
") + + if operator == "==": + for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): + name_letter, name_content = name_tuples[row][i] + arg_letter, arg_content = arg_tuples[row][i] + + if arg_content != name_content and (row in start_rows[arg_letter] or row in start_rows[name_letter]): + cells.append(name_letter + row) + cells.append(arg_letter + row) + report += ("Cells " + name_letter + row + + " and " + arg_letter + row + + " must have equivalent content.
") elif domain == "meta": meta_report, meta_extra = apply_meta_rule(rule, meta) From 75d88e8251be1d91bba74639f3cc9a0c37a8bb76 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 02:24:47 -0400 Subject: [PATCH 056/135] remove debug info --- modules/validate_spreadsheet.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py index 89602be..9e60d49 100755 --- a/modules/validate_spreadsheet.py +++ b/modules/validate_spreadsheet.py @@ -9,10 +9,6 @@ import json from pprint import pformat -def logln(s,fname='tmp'): - with open(fname,'a') as f: - f.write(s + "\n") - class Cell: def __init__(self, col, row, content, span): self.col = col @@ -150,8 +146,6 @@ def validate_doc(doc_id, editor=False): if re.search(rule_doc, doc_name) is None: rule_applies = False - logln(repr(rule)) - logln("Rule applies: " + str(rule_applies)) if rule_applies: rule_report, rule_extra, rule_cells = apply_rule(rule, parsed_ether, meta) cells += rule_cells @@ -241,8 +235,6 @@ def apply_rule(rule, parsed_ether, meta): if name is None: return report, extra, cells - logln(pformat(dict(parsed_ether))) - # list of letters with col name col_letters = colmap[name] @@ -320,7 +312,6 @@ def apply_rule(rule, parsed_ether, meta): if row not in all_rows: all_rows.append(row) - import cgitb; cgitb.enable() for row in all_rows: # check to see if all cells in rhs are contained within cells on lhs if operator == ">": From 6e35194a013c9f54cbdb0b5975385dbc265a1d26 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 02:26:14 -0400 Subject: [PATCH 057/135] add a couple comments --- modules/validate_spreadsheet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py index 9e60d49..122339b 100755 --- a/modules/validate_spreadsheet.py +++ b/modules/validate_spreadsheet.py @@ -298,11 +298,13 @@ def apply_rule(rule, parsed_ether, meta): for letter in name_letters: for cell in parsed_ether[letter]: start_rows[letter].append(cell.row) + # "de-merge" cell so we have an entry for every row in its span with its letter and content for i in range(int(cell.span) or 1): row = str(int(cell.row) + i) name_tuples[row].append((letter, cell.content)) all_rows.append(row) + # same as above with arg_letters for letter in arg_letters: for cell in parsed_ether[letter]: start_rows[letter].append(cell.row) From 02bcea7fc8b7892602dfffca387f821cf56929ed Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 02:28:14 -0400 Subject: [PATCH 058/135] don't check to see if title rows are equiv --- modules/validate_spreadsheet.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py index 122339b..67ee930 100755 --- a/modules/validate_spreadsheet.py +++ b/modules/validate_spreadsheet.py @@ -343,6 +343,9 @@ def apply_rule(rule, parsed_ether, meta): + ", ".join(name_letters) + "
") if operator == "==": + if row == "1": + continue + for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): name_letter, name_content = name_tuples[row][i] arg_letter, arg_content = arg_tuples[row][i] From 1b046f348f75910803d491e0003a8dc3036f4d73 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 02:29:36 -0400 Subject: [PATCH 059/135] simplify if statements --- modules/validate_spreadsheet.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py index 67ee930..5447c57 100755 --- a/modules/validate_spreadsheet.py +++ b/modules/validate_spreadsheet.py @@ -342,10 +342,8 @@ def apply_rule(rule, parsed_ether, meta): + " lacks a corresponding value in one of these columns: " + ", ".join(name_letters) + "
") - if operator == "==": - if row == "1": - continue - + # check for content equivalence + if operator == "==" and row != "1": for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): name_letter, name_content = name_tuples[row][i] arg_letter, arg_content = arg_tuples[row][i] From 886211f1100c9418b4d01880bccc72a311895215 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 12:02:05 -0400 Subject: [PATCH 060/135] implement clone from metadata --- editor.py | 11 ++++++++- index.py | 9 +++++-- templates/landing.html | 54 +++++++++++++++++++++++++++++++++++------- 3 files changed, 63 insertions(+), 11 deletions(-) diff --git a/editor.py b/editor.py index 38e2392..5d00224 100755 --- a/editor.py +++ b/editor.py @@ -106,7 +106,7 @@ def load_page(user,admin,theform): corpus = "default_corpus" schema = "" text_content = "" - # If one of the four forms is edited, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc) + # If one of the four forms is edited or we're cloning a doc, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc) if theform.getvalue('edit_docname') and user != "demo": if docname != 'new_document': if doc_id > max_id: @@ -160,6 +160,15 @@ def load_page(user,admin,theform): else: update_schema(doc_id, schema) + # cloning metadata from an existing doc into a new doc + if theform.getvalue('source_doc'): + existing_meta = get_doc_meta(theform.getvalue('source_doc')) + create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) + for meta in existing_meta: + m_key, m_val = meta[2:4] + save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) + max_id = doc_id + else: # Get previous values from DB old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = get_doc_info(doc_id) diff --git a/index.py b/index.py index 49cd3bb..f9ea69a 100755 --- a/index.py +++ b/index.py @@ -241,9 +241,14 @@ def load_landing(user, admin, theform): landing = landing.replace("**table**", table) landing = landing.replace("**navbar**", menu) if int(admin) > 0: - landing = landing.replace("**create_doc**",'''onclick="document.getElementById('form_new').submit();" class="button"''') + landing = landing.replace("**create_doc**", + '''onclick="document.getElementById('form_new').submit();" class="button"''') + landing = landing.replace("**source_doc_attrs**", '''''') + opts = "\n".join(['' for x in doc_list]) + landing = landing.replace("**existing_documents**", opts) else: - landing = landing.replace("**create_doc**",'''class="button disabled"''') + landing = landing.replace("**create_doc**", '''class="button disabled"''') + landing = landing.replace("**source_doc_attrs**", '''disabled="disabled"''') page += landing print("Content-type:text/html\n\n") diff --git a/templates/landing.html b/templates/landing.html index d20e5bd..14995e4 100644 --- a/templates/landing.html +++ b/templates/landing.html @@ -42,7 +42,7 @@

GitDox: Project **project**

validate

For help getting started see the wiki

-

+

idcorpusdocumentstatusassignedmodevalidateactions
Choose a corpus: @@ -52,20 +52,58 @@

GitDox: Project **project**

-

- -

+

+
-
+
New Document
-

- **table** +
+ + +
+ + +
+
+ **table**
- + From 53cde37d341db9cfa399d86c1471c5aa74c911d9 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 12:37:00 -0400 Subject: [PATCH 061/135] format html --- templates/editor.html | 252 +++++++++++++++++++++--------------------- 1 file changed, 126 insertions(+), 126 deletions(-) diff --git a/templates/editor.html b/templates/editor.html index b8e68ac..a4c7c3b 100644 --- a/templates/editor.html +++ b/templates/editor.html @@ -4,136 +4,136 @@ - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + - -
- -
- - - - - -

Editor | back to document list

- - - - - - - - - - - - - - - - - - - - - - - -
Document Name: - -
-
Validate
Corpus Name: - -
-
-
-
-
Git Repo: -
-
XML Schema:**edit_schema**
Assigned to:**edit_assignee**
Status:**edit_status**
Mode:**edit_mode**
- - -**embedded_editor** - -

meta data

-**metadata** - -
Add document meta
- - - -

-**corpus_metadata** - -
Add corpus meta
- - - - - -
- - -
- - -
+ + + **navbar** +
+ **header** +
+

GitDox: Edit

+ **editor_help_link** +
+ +
+ + + + + + + + + + + + + +
+ +
+ + + + + +

Editor | back to document list

+ + + + + + + + + + + + + + + + + + + + + + + +
Document Name: + +
+
Validate
Corpus Name: + +
+
+
+
+
Git Repo: +
+
XML Schema:**edit_schema**
Assigned to:**edit_assignee**
Status:**edit_status**
Mode:**edit_mode**
+ + + **embedded_editor** + +

Metadata

+ **metadata** + +
Add document meta
+ + + +

+ **corpus_metadata** + +
Add corpus meta
+ + + + + +
+ + +
+ + +
- \ No newline at end of file + From bbcd55aef7c90ee614ac45d80e69e6aafa7140b3 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 13:06:18 -0400 Subject: [PATCH 062/135] Allow copying metadata into existing document --- editor.py | 22 ++++++++++++-- templates/editor.html | 69 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/editor.py b/editor.py index 5d00224..ff44552 100755 --- a/editor.py +++ b/editor.py @@ -163,11 +163,12 @@ def load_page(user,admin,theform): # cloning metadata from an existing doc into a new doc if theform.getvalue('source_doc'): existing_meta = get_doc_meta(theform.getvalue('source_doc')) - create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) + if doc_id > max_id: + create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) + max_id = doc_id for meta in existing_meta: m_key, m_val = meta[2:4] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) - max_id = doc_id else: # Get previous values from DB @@ -190,6 +191,13 @@ def load_page(user,admin,theform): out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" + # handle copying metadata + if theform.getvalue('source_doc'): + existing_meta = get_doc_meta(theform.getvalue('source_doc')) + for meta in existing_meta: + m_key, m_val = meta[2:4] + save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) + if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') @@ -468,6 +476,16 @@ def load_page(user,admin,theform): page=page.replace("**id**",doc_id) page=page.replace("**mode**",mode) page=page.replace("**schema**",schema) + + # handle clone meta button + if int(admin) > 0: + doc_list = generic_query("SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE",()) + page = page.replace("**source_doc_attrs**", '''''') + opts = "\n".join(['' for x in doc_list]) + page = page.replace("**existing_documents**", opts) + else: + page = page.replace("**source_doc_attrs**", '''disabled="disabled"''') + if int(admin)>0: page=page.replace("**github**",push_git) else: diff --git a/templates/editor.html b/templates/editor.html index a4c7c3b..158dd37 100644 --- a/templates/editor.html +++ b/templates/editor.html @@ -92,13 +92,13 @@

Editor | back to document list

- + - Git Repo: - -
- + Git Repo: + +
+ XML Schema:**edit_schema** Assigned to:**edit_assignee** @@ -113,27 +113,70 @@

Editor | back to document list

Metadata

**metadata** -
Add document meta
- - +

**corpus_metadata** -
Add corpus meta
+
+ + Add Document Metadata +
- + +

+
Add Corpus Metadata
+
+
+ + +
+ +
+ +
- -
- From 290efc48887d9683685b3e2a9e5dda4ca502bf7c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 13:11:40 -0400 Subject: [PATCH 063/135] don't overwrite existing keys in meta copy --- editor.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/editor.py b/editor.py index ff44552..94bca52 100755 --- a/editor.py +++ b/editor.py @@ -162,11 +162,11 @@ def load_page(user,admin,theform): # cloning metadata from an existing doc into a new doc if theform.getvalue('source_doc'): - existing_meta = get_doc_meta(theform.getvalue('source_doc')) + source_meta = get_doc_meta(theform.getvalue('source_doc')) if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id - for meta in existing_meta: + for meta in source_meta: m_key, m_val = meta[2:4] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) @@ -191,11 +191,15 @@ def load_page(user,admin,theform): out, err = make_spreadsheet(sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" + cgitb.enable() # handle copying metadata if theform.getvalue('source_doc'): - existing_meta = get_doc_meta(theform.getvalue('source_doc')) - for meta in existing_meta: - m_key, m_val = meta[2:4] + source_meta = get_doc_meta(theform.getvalue('source_doc')) + existing_meta = get_doc_meta(doc_id) + # don't overwrite existing keys + meta_to_write = [x for x in source_meta for y in existing_meta if x[2] != y[2]] + for meta in meta_to_add: + m_key, m_val = meta[2], meta[3] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) From 6c413dc103a0718948f07214570ad8940a68a397 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 12 Oct 2018 13:11:40 -0400 Subject: [PATCH 064/135] don't overwrite existing keys in meta copy --- editor.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/editor.py b/editor.py index ff44552..2aa3f5b 100755 --- a/editor.py +++ b/editor.py @@ -162,11 +162,11 @@ def load_page(user,admin,theform): # cloning metadata from an existing doc into a new doc if theform.getvalue('source_doc'): - existing_meta = get_doc_meta(theform.getvalue('source_doc')) + source_meta = get_doc_meta(theform.getvalue('source_doc')) if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id - for meta in existing_meta: + for meta in source_meta: m_key, m_val = meta[2:4] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) @@ -193,9 +193,12 @@ def load_page(user,admin,theform): # handle copying metadata if theform.getvalue('source_doc'): - existing_meta = get_doc_meta(theform.getvalue('source_doc')) - for meta in existing_meta: - m_key, m_val = meta[2:4] + source_meta = get_doc_meta(theform.getvalue('source_doc')) + existing_meta = get_doc_meta(doc_id) + # don't overwrite existing keys + meta_to_write = [x for x in source_meta for y in existing_meta if x[2] != y[2]] + for meta in meta_to_write: + m_key, m_val = meta[2], meta[3] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) From 3de1c14627383954111511ff040dadbf299700b5 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 13 Oct 2018 13:02:29 -0400 Subject: [PATCH 065/135] fix bug: dont copy existing keys --- editor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/editor.py b/editor.py index 2aa3f5b..2d10198 100755 --- a/editor.py +++ b/editor.py @@ -194,9 +194,9 @@ def load_page(user,admin,theform): # handle copying metadata if theform.getvalue('source_doc'): source_meta = get_doc_meta(theform.getvalue('source_doc')) - existing_meta = get_doc_meta(doc_id) + existing_meta_keys = [x[2] for x in get_doc_meta(doc_id)] # don't overwrite existing keys - meta_to_write = [x for x in source_meta for y in existing_meta if x[2] != y[2]] + meta_to_write = [x for x in source_meta if x[2] not in existing_meta_keys] for meta in meta_to_write: m_key, m_val = meta[2], meta[3] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) From e749c1ce42c75777dadff5ec68b69d24c80cb2bb Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 13 Oct 2018 13:06:33 -0400 Subject: [PATCH 066/135] show corpus name w/ doc in clone meta options --- index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.py b/index.py index f9ea69a..67eedd4 100755 --- a/index.py +++ b/index.py @@ -244,7 +244,7 @@ def load_landing(user, admin, theform): landing = landing.replace("**create_doc**", '''onclick="document.getElementById('form_new').submit();" class="button"''') landing = landing.replace("**source_doc_attrs**", '''''') - opts = "\n".join(['' for x in doc_list]) + opts = "\n".join(['' for x in doc_list]) landing = landing.replace("**existing_documents**", opts) else: landing = landing.replace("**create_doc**", '''class="button disabled"''') From 61a6798c577877c89e0aab3d86921f94a7db30f8 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 13 Oct 2018 13:13:23 -0400 Subject: [PATCH 067/135] add corpus name to clone meta dropdown --- editor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/editor.py b/editor.py index 2d10198..9b7d2f0 100755 --- a/editor.py +++ b/editor.py @@ -484,7 +484,7 @@ def load_page(user,admin,theform): if int(admin) > 0: doc_list = generic_query("SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE",()) page = page.replace("**source_doc_attrs**", '''''') - opts = "\n".join(['' for x in doc_list]) + opts = "\n".join(['' for x in doc_list]) page = page.replace("**existing_documents**", opts) else: page = page.replace("**source_doc_attrs**", '''disabled="disabled"''') From 4a5b69eabab3bcf1ecee7ae904d11163839b4423 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 17 Oct 2018 21:14:03 -0400 Subject: [PATCH 068/135] fix validation box css --- css/gitdox.css | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/css/gitdox.css b/css/gitdox.css index 95ab679..143ea52 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -172,9 +172,7 @@ width: 100%} } #validation_report{ - max-height: 300px; - overflow-y: scroll; - overflow-x: scroll; + height: 300px; } tfoot { From 1aacb40d3b8dab143b1bdf35b18364e8a20efb67 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 18 Oct 2018 00:24:31 -0400 Subject: [PATCH 069/135] add overflow-y:auto to #validation_report --- css/gitdox.css | 1 + 1 file changed, 1 insertion(+) diff --git a/css/gitdox.css b/css/gitdox.css index 143ea52..af1170d 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -173,6 +173,7 @@ width: 100%} #validation_report{ height: 300px; + overflow-y: auto; } tfoot { From 150131682a4314177a6ff5e7a05de82f90a81a2a Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 18 Oct 2018 00:25:42 -0400 Subject: [PATCH 070/135] refactor validation code for modularity and sep of concerns --- js/editor.js | 4 +- js/index.js | 2 +- modules/ether.py | 53 +++ modules/gitdox_sql.py | 12 +- modules/validate_spreadsheet.py | 475 -------------------------- modules/validation/__init__.py | 0 modules/validation/ether_validator.py | 260 ++++++++++++++ modules/validation/meta_validator.py | 54 +++ modules/validation/validator.py | 13 + modules/validation/xml_validator.py | 26 ++ validate.py | 219 ++++++++++++ 11 files changed, 639 insertions(+), 479 deletions(-) delete mode 100755 modules/validate_spreadsheet.py create mode 100644 modules/validation/__init__.py create mode 100644 modules/validation/ether_validator.py create mode 100644 modules/validation/meta_validator.py create mode 100644 modules/validation/validator.py create mode 100644 modules/validation/xml_validator.py create mode 100755 validate.py diff --git a/js/editor.js b/js/editor.js index 1d1bb2c..c6f6ab7 100755 --- a/js/editor.js +++ b/js/editor.js @@ -13,7 +13,7 @@ function validate_doc() { var mode = $("#mode").val(); var schema = $("#schema").val(); $.ajax({ - url: 'modules/validate_spreadsheet.py', + url: 'validate.py', type: 'post', data: {doc_id: docId, mode: mode, schema: schema}, dataType: "html", @@ -43,4 +43,4 @@ function export_ether(){ stylesheet = document.getElementById('ether_stylesheet').value; window.open('export.py?docs=' + doc_id + '&stylesheet=' + stylesheet, '_new'); -} \ No newline at end of file +} diff --git a/js/index.js b/js/index.js index e50a54a..ebb50c3 100644 --- a/js/index.js +++ b/js/index.js @@ -2,7 +2,7 @@ function validate_all() { $("#validate_landing").addClass("disabledbutton"); $("#validate_landing").html(' validating...'); $.ajax({ - url: 'modules/validate_spreadsheet.py', + url: 'validate.py', type: 'post', data: {doc_id: 'all'}, dataType: "json", diff --git a/modules/ether.py b/modules/ether.py index 675a486..212cc22 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -96,6 +96,58 @@ def read_config(self,config_file): else: self.template = "\n%%body%%\n\n" +def parse_ether(ether): + """Take in raw socialcalc data and turn it into a dict of Cells. Used in validation.""" + class Cell: + def __init__(self, col, row, content, span): + self.col = col + self.row = row + self.header = "" + self.content = content + self.span = span + def __repr__(self): + return "" + + ether_lines = ether.splitlines() + + # find col letter corresponding to col name + parsed = defaultdict(list) + colmap = defaultdict(list) + rev_colmap = {} + all_cells = [] + for line in ether_lines: + if line.startswith("cell:"): # Cell row + # A maximal row looks like this incl. span: cell:F2:t:LIRC2014_chw0oir:f:1:rowspan:289 + # A minimal row without formatting: cell:C2:t:JJ:f:1 + parts = line.split(":") + if len(parts) > 3: # Otherwise invalid row + cell_id = parts[1] + cell_row = cell_id[1:] + cell_col = cell_id[0] + # We'd need something like this to support more than 26 cols, i.e. columns AA, AB... + #for c in cell_id: + # if c in ["0","1","2","3","4","5","6","7","8","9"]: + # cell_row += c + # else: + # cell_col += c + cell_content = parts[3].replace("\\c",":") + cell_span = parts[-1] if "rowspan:" in line else "1" + + # record col name + if cell_row == "1": + colmap[cell_content].append(cell_col) + rev_colmap[cell_col] = cell_content + + cell = Cell(cell_col, cell_row, cell_content, cell_span) + parsed[cell_col].append(cell) + all_cells.append(cell) + + for cell in all_cells: + cell.header = rev_colmap[cell.col] + + parsed["__colmap__"] = colmap # Save colmap for apply_rule + return parsed + def unescape_xml(text): # Fix various common compounded XML escapes text = text.replace("&lt;","<").replace("&gt;",">") @@ -792,6 +844,7 @@ def get_timestamps(ether_path): return output + if __name__ == "__main__": data = "" storage = cgi.FieldStorage() diff --git a/modules/gitdox_sql.py b/modules/gitdox_sql.py index 014b400..c3e69a5 100755 --- a/modules/gitdox_sql.py +++ b/modules/gitdox_sql.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- """ Data access functions to read from and write to the SQLite backend. @@ -178,6 +178,10 @@ def get_doc_info(doc_id): else: return res +def get_doc_content(doc_id): + res = generic_query("SELECT content FROM docs WHERE id=?", (int(doc_id),)) + return res[0][0] + def get_all_docs(corpus=None, status=None): if corpus is None: if status is None: @@ -207,6 +211,12 @@ def get_corpora(): def get_validate_rules(): return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate", None) +def get_meta_rules(): + return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'meta'", None) + +def get_ether_rules(): + return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'ether'", None) + def get_sorted_rules(sort): return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate ORDER BY " + sort, None) # parameterization doesn't work for order by diff --git a/modules/validate_spreadsheet.py b/modules/validate_spreadsheet.py deleted file mode 100755 index 5447c57..0000000 --- a/modules/validate_spreadsheet.py +++ /dev/null @@ -1,475 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -from gitdox_sql import * -from ether import get_socialcalc, make_spreadsheet, exec_via_temp, get_timestamps -from collections import defaultdict -import re -import cgi -import json -from pprint import pformat - -class Cell: - def __init__(self, col, row, content, span): - self.col = col - self.row = row - self.header = "" - self.content = content - self.span = span - - def __repr__(self): - return "" - - -def highlight_cells(cells, ether_url, ether_doc_name): - old_ether = get_socialcalc(ether_url, ether_doc_name) - old_ether_lines = old_ether.splitlines() - new_ether_lines = [] - - old_color_numbers = [] - new_color_number = '1' - for line in old_ether_lines: - color_line = re.match(r'color:(\d+):(rgb.*$)', line) - if color_line is not None: - if color_line.group(2) == 'rgb(242, 242, 142)': - old_color_numbers.append(color_line.group(1)) - else: - new_color_number = str(1 + int(color_line.group(1))) - if len(old_color_numbers) > 0: - new_color_number = old_color_numbers[0] - - for line in old_ether_lines: - - parts = line.split(":") - # Check for pure formatting cells, e.g. cell:K15:f:1 - if len(parts) == 4: - if parts[2] == "f": # Pure formatting cell, no content - continue - - parsed_cell = re.match(r'cell:([A-Z]+\d+)(:.*)$', line) - if parsed_cell is not None: - col_row = parsed_cell.group(1) - other = parsed_cell.group(2) - bg = re.search(r':bg:(\d+)($|:)', other) - if bg is not None: - bg = bg.group(1) - - if col_row in cells: - if bg is not None: - if bg != new_color_number: - new_line = re.sub(r':bg:' + bg, r':bg:' + new_color_number, line) - else: - new_line = line - else: - new_line = line + ':bg:' + new_color_number - else: - if bg is not None: - if bg in old_color_numbers: - new_line = re.sub(r':bg:' + bg, r'', line) - else: - new_line = line - else: - new_line = line - new_ether_lines.append(new_line) - elif re.match(r'sheet:', line) is not None: - new_ether_lines.append(line) - if new_color_number not in old_color_numbers: - new_ether_lines.append('color:' + new_color_number + ':rgb(242, 242, 142)') - else: - new_ether_lines.append(line) - - new_ether = '\n'.join(new_ether_lines) - make_spreadsheet(new_ether, ether_url + "_/" + ether_doc_name, "socialcalc") - - -def validate_all_docs(): - docs = generic_query("SELECT id, name, corpus, mode, schema, validation, timestamp FROM docs", None) - doc_timestamps = get_timestamps(ether_url) - reports = {} - - for doc in docs: - doc_id, doc_name, corpus, doc_mode, doc_schema, validation, timestamp = doc - if doc_mode == "ether": - ether_name = "_".join(["gd",corpus,doc_name]) - if ether_name in doc_timestamps and validation is not None and len(validation) > 0: - if timestamp == doc_timestamps[ether_name]: - reports[doc_id] = json.loads(validation) - else: - reports[doc_id] = validate_doc(doc_id) - update_validation(doc_id, json.dumps(reports[doc_id])) - update_timestamp(doc_id, doc_timestamps[ether_name]) - else: - reports[doc_id] = validate_doc(doc_id) - #reports[doc_id] = {"ether":"sample_ether","meta":"sample_meta"} - update_validation(doc_id, json.dumps(reports[doc_id])) - if ether_name in doc_timestamps: - update_timestamp(doc_id, doc_timestamps[ether_name]) - elif doc_mode == "xml": - if validation is None: - reports[doc_id] = validate_doc_xml(doc_id, doc_schema) - try: - validation_report = json.dumps(reports[doc_id]) - except UnicodeDecodeError: - reports[doc_id]["xml"] = "UnicodeDecodeError; unable to print XML validation report for " + doc_name - validation_report = json.dumps(reports[doc_id]) - update_validation(doc_id,validation_report) - else: - reports[doc_id] = json.loads(validation) - - return json.dumps(reports) - - -def validate_doc(doc_id, editor=False): - doc_info = get_doc_info(doc_id) - doc_name = doc_info[0] - doc_corpus = doc_info[1] - - ether_doc_name = "gd_" + doc_corpus + "_" + doc_name - ether = get_socialcalc(ether_url, ether_doc_name) - parsed_ether = parse_ether(ether, doc_name, doc_corpus) - meta = get_doc_meta(doc_id) - - ether_report = '' - meta_report = '' - cells = [] - - rules = get_validate_rules() - for rule in rules: - rule_applies = True - rule_corpus = rule[0] - rule_doc = rule[1] - rule_domain = rule[2] - if rule_corpus is not None: - if re.search(rule_corpus, doc_corpus) is None: - rule_applies = False - if rule_doc is not None: - if re.search(rule_doc, doc_name) is None: - rule_applies = False - - if rule_applies: - rule_report, rule_extra, rule_cells = apply_rule(rule, parsed_ether, meta) - cells += rule_cells - if editor is True and len(rule_extra) > 0: - new_report = """
""" + rule_report[:-5] + """ """ + "" + rule_extra + "" + "
" - else: - new_report = rule_report - - if rule_domain == "ether": - ether_report += new_report - elif rule_domain == "meta": - meta_report += new_report - - if editor: - highlight_cells(cells, ether_url, ether_doc_name) - - if editor: - full_report = ether_report + meta_report - if len(full_report) == 0: - full_report = "Document is valid!" - return full_report - else: - json_report = {} - if len(ether_report) == 0: - ether_report = "spreadsheet is valid" - if len(meta_report) == 0: - meta_report = "metadata is valid" - json_report['ether'] = ether_report - json_report['meta'] = meta_report - return json_report - - -def parse_ether(ether, doc, corpus): - ether_lines = ether.splitlines() - - # find col letter corresponding to col name - parsed = defaultdict(list) - colmap = defaultdict(list) - rev_colmap = {} - all_cells = [] - for line in ether_lines: - if line.startswith("cell:"): # Cell row - # A maximal row looks like this incl. span: cell:F2:t:LIRC2014_chw0oir:f:1:rowspan:289 - # A minimal row without formatting: cell:C2:t:JJ:f:1 - parts = line.split(":") - if len(parts) > 3: # Otherwise invalid row - cell_id = parts[1] - cell_row = cell_id[1:] - cell_col = cell_id[0] - # We'd need something like this to support more than 26 cols, i.e. columns AA, AB... - #for c in cell_id: - # if c in ["0","1","2","3","4","5","6","7","8","9"]: - # cell_row += c - # else: - # cell_col += c - cell_content = parts[3].replace("\\c",":") - cell_span = parts[-1] if "rowspan:" in line else "1" - - # record col name - if cell_row == "1": - colmap[cell_content].append(cell_col) - rev_colmap[cell_col] = cell_content - - cell = Cell(cell_col, cell_row, cell_content, cell_span) - parsed[cell_col].append(cell) - all_cells.append(cell) - - for cell in all_cells: - cell.header = rev_colmap[cell.col] - - parsed["__colmap__"] = colmap # Save colmap for apply_rule - return parsed - - -def apply_rule(rule, parsed_ether, meta): - domain = rule[2] - name = rule[3] - operator = rule[4] - argument = rule[5] - - report = '' - extra = '' - cells = [] - - colmap = parsed_ether['__colmap__'] # name -> list of col letters - - if name is None: - return report, extra, cells - - # list of letters with col name - col_letters = colmap[name] - - if domain == "ether": - # check to see if column exists - if operator == "exists": - if len(col_letters) == 0: - report += "Column named " + name + " not found
" - return report, extra, cells - - # check to see that all cells are of a certain row span - elif operator == "|": - # do any exist? - if len(col_letters) == 0: - report += "Column named " + name + " not found
" - return report, extra, cells - - for letter in col_letters: - for cell in parsed_ether[letter]: - if cell.row == "1": - continue - - if argument == "1": - if cell.span != "1": - report += "Cell " + cell.col + cell.row + ": row span is not 1
" - cells.append(cell.col + cell.row) - else: - if cell.span != "" and cell.span is not None: - report += "Cell " + cell.col + cell.row + ": row span is not " + argument + "
" - cells.append(cell.col + cell.row) - - elif operator == "~": - for letter in col_letters: - for cell in parsed_ether[letter]: - if cell.row == "1": - continue - match = re.search(argument, cell.content) - if match is None: - report += "Cell " + cell.col + cell.row + ": content does not match pattern
" - extra += "Cell " + cell.col + cell.row + ":
" + "Content: " + cell.content + "
" + "Pattern: " + argument + "
" - cells.append(cell.col + cell.row) - - elif operator in ["=", ">", "=="]: - name_letters = colmap[name] - arg_letters = colmap[argument] - - if len(name_letters) == 0: - if operator != "==": - report += "Column named " + name + " not found
" - return report, extra, cells - if len(arg_letters) == 0: - if operator != "==": - report += "Column named " + argument + " not found
" - return report, extra, cells - - name_tuples = defaultdict(list) - arg_tuples = defaultdict(list) - start_rows = defaultdict(list) - all_rows = [] - - for letter in name_letters: - for cell in parsed_ether[letter]: - start_rows[letter].append(cell.row) - # "de-merge" cell so we have an entry for every row in its span with its letter and content - for i in range(int(cell.span) or 1): - row = str(int(cell.row) + i) - name_tuples[row].append((letter, cell.content)) - all_rows.append(row) - - # same as above with arg_letters - for letter in arg_letters: - for cell in parsed_ether[letter]: - start_rows[letter].append(cell.row) - for i in range(int(cell.span) or 1): - row = str(int(cell.row) + i) - arg_tuples[row].append((letter, cell.content)) - if row not in all_rows: - all_rows.append(row) - - for row in all_rows: - # check to see if all cells in rhs are contained within cells on lhs - if operator == ">": - if row in arg_tuples and row not in name_tuples: - for letter, _ in arg_tuples[row]: - cells.append(letter + row) - report += ("Cell " + letter + row - + " must appear in the span of a cell in one of these columns: " - + ", ".join(name_letters) + "
") - - # operator in ["=", "=="], i.e. span equivalence and span and content equivalence - else: - name_len = len(name_tuples[row]) - arg_len = len(arg_tuples[row]) - - if name_len > arg_len: - for letter, _ in name_tuples[row][arg_len:]: - cells.append(letter + row) - report += ("Cell " + letter + row - + " lacks a corresponding value in one of these columns: " - + ", ".join(arg_letters) + "
") - elif arg_len < name_len: - for letter, _ in arg_tuples[row][name_len:]: - cells.append(letter + row) - report += ("Cell " + letter + row - + " lacks a corresponding value in one of these columns: " - + ", ".join(name_letters) + "
") - - # check for content equivalence - if operator == "==" and row != "1": - for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): - name_letter, name_content = name_tuples[row][i] - arg_letter, arg_content = arg_tuples[row][i] - - if arg_content != name_content and (row in start_rows[arg_letter] or row in start_rows[name_letter]): - cells.append(name_letter + row) - cells.append(arg_letter + row) - report += ("Cells " + name_letter + row - + " and " + arg_letter + row - + " must have equivalent content.
") - - elif domain == "meta": - meta_report, meta_extra = apply_meta_rule(rule, meta) - report += meta_report - extra += meta_extra - - return report, extra, cells - - -def apply_meta_rule(rule, meta): - name = rule[3] - operator = rule[4] - argument = rule[5] - report = '' - extra = '' - if operator == "~": - for metadatum in meta: - if metadatum[2] == name: - value = metadatum[3] - match = re.search(argument, value) - if match is None: - report += "Metadata for " + name + " does not match pattern" + "
" - extra += "Metadata: " + value + "
" + "Pattern: " + argument + "
" - elif operator == "exists": - exists = False - for metadatum in meta: - if metadatum[2] == name: - exists = True - break - if exists is False: - report += "No metadata for " + name + '
' - return report, extra - - -def validate_doc_xml(doc_id, schema, editor=False): - xml_report = '' - # xml validation - if schema == "--none--": - xml_report += "No schema
" - else: - command = "xmllint --htmlout --schema " + "../schemas/" + schema + ".xsd" + " tempfilename" - xml = generic_query("SELECT content FROM docs WHERE id=?", (doc_id,))[0][0] - out, err = exec_via_temp(xml.encode("utf8"), command) - err = err.strip() - err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") - err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) - err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) - err = re.sub(r'\n','
',err) - xml_report += err + "
" - - # metadata validation - meta_report = '' - meta_rules = generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'meta'", None) - meta = get_doc_meta(doc_id) - doc_info = get_doc_info(doc_id) - doc_name = doc_info[0] - doc_corpus = doc_info[1] - for rule in meta_rules: - rule_applies = True - rule_corpus = rule[0] - rule_doc = rule[1] - if rule_corpus is not None: - if re.search(rule_corpus, doc_corpus) is None: - rule_applies = False - if rule_doc is not None: - if re.search(rule_doc, doc_name) is None: - rule_applies = False - if rule_applies is True: - rule_report, rule_extra = apply_meta_rule(rule, meta) - if editor and len(rule_extra) > 0: - meta_report += """
""" + rule_report[ - :-5] + """ """ + "" + rule_extra + "" + "
" - else: - meta_report += rule_report - - # report - if editor is True: - try: - #full_report = xml_report.decode("utf8") + meta_report.decode("utf8") - full_report = xml_report + meta_report - except Exception as e: - full_report = "[Encoding error: " + str(e) + "]" - if len(full_report) == 0: - full_report = "Document is valid!" - return full_report - else: - json_report = {} - if len(xml_report) == 0: - ether_report = "xml is valid" - if len(meta_report) == 0: - meta_report = "metadata is valid" - json_report['xml'] = xml_report - json_report['meta'] = meta_report - return json_report - - -if __name__ == "__main__": - if __name__ == '__main__' and __package__ is None: - from os import sys, path - - sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - from paths import ether_url - else: - from ..paths import ether_url - - parameter = cgi.FieldStorage() - doc_id = parameter.getvalue("doc_id") - mode = parameter.getvalue("mode") - schema = parameter.getvalue("schema") - - if doc_id == "all": - print "Content-type:application/json\n\n" - print validate_all_docs().encode("utf8") - else: - print "Content-type:text/html\n\n" - if mode == "ether": - print validate_doc(doc_id, editor=True).encode("utf8") - elif mode == "xml": - print validate_doc_xml(doc_id, schema, editor=True).encode("utf8") diff --git a/modules/validation/__init__.py b/modules/validation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modules/validation/ether_validator.py b/modules/validation/ether_validator.py new file mode 100644 index 0000000..5712d2e --- /dev/null +++ b/modules/validation/ether_validator.py @@ -0,0 +1,260 @@ +from validator import Validator +from collections import defaultdict +import re + + +class EtherValidator(Validator): + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.domain = rule[2] + self.name = rule[3] + self.operator = rule[4] + self.argument = rule[5] + + + def _apply_exists(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + if len(col_letters) == 0: + report += "Column named " + self.name + " not found
" + return report, tooltip, cells + + def _apply_span_equals_number(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + if len(col_letters) == 0: + report += "Column named " + self.name + " not found
" + return report, tooltip, cells + + for letter in col_letters: + for cell in parsed_ether[letter]: + if cell.row == "1": + continue + + if self.argument == "1": + if cell.span != "1": + report += "Cell " + cell.col + cell.row + ": span is not 1
" + cells.append(cell.col + cell.row) + else: + if cell.span != "" and cell.span != self.argument: + report += "Cell " + cell.col + cell.row + ": span is not " + self.argument + "
" + cells.append(cell.col + cell.row) + return report, tooltip, cells + + def _apply_regex(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + for letter in col_letters: + for cell in parsed_ether[letter]: + if cell.row == "1": + continue + match = re.search(self.argument, cell.content) + if match is None: + report += ("Cell " + cell.col + cell.row + + ": content does not match pattern " + self.argument + "
") + tooltip += ("Cell " + cell.col + cell.row + ":
" + + "Content: " + cell.content + "
" + + "Pattern: " + self.argument + "
") + cells.append(cell.col + cell.row) + return report, tooltip, cells + + def _binary_op_check_cols_exist(self, colmap): + name_letters = colmap[self.name] + arg_letters = colmap[self.argument] + + if len(name_letters) == 0: + if self.operator != "==": + return "Column named " + self.name + " not found
" + if len(arg_letters) == 0: + if self.operator != "==": + return "Column named " + self.argument + " not found
" + + return "" + + def _binary_op_setup(self, parsed_ether): + colmap = parsed_ether['__colmap__'] # name -> list of col letters + name_letters = colmap[self.name] + arg_letters = colmap[self.argument] + + name_tuples = defaultdict(list) + arg_tuples = defaultdict(list) + start_rows = defaultdict(list) + all_rows = [] + + for letter in name_letters: + for cell in parsed_ether[letter]: + start_rows[letter].append(cell.row) + # "de-merge" cell so we have an entry for every row in its span with its letter and content + for i in range(int(cell.span) or 1): + row = str(int(cell.row) + i) + name_tuples[row].append((letter, cell.content)) + all_rows.append(row) + + # same as above with arg_letters + for letter in arg_letters: + for cell in parsed_ether[letter]: + start_rows[letter].append(cell.row) + for i in range(int(cell.span) or 1): + row = str(int(cell.row) + i) + arg_tuples[row].append((letter, cell.content)) + if row not in all_rows: + all_rows.append(row) + + return name_letters, arg_letters, name_tuples, arg_tuples, start_rows, all_rows + + def _apply_subspan(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + err = self._binary_op_check_cols_exist(colmap) + if err: + report += err + return report, tooltip, cells + + name_letters, arg_letters, name_tuples, \ + arg_tuples, start_rows, all_rows = self._binary_op_setup(parsed_ether) + + for row in all_rows: + # check to see if all cells in rhs are contained within cells on lhs + if row in arg_tuples and row not in name_tuples: + for letter, _ in arg_tuples[row]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " must appear in the span of a cell in one of these columns: " + + ", ".join(name_letters) + "
") + + return report, tooltip, cells + + def _apply_equal_span_length(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + err = self._binary_op_check_cols_exist(colmap) + if err: + report += err + return report, tooltip, cells + + name_letters, arg_letters, name_tuples, \ + arg_tuples, start_rows, all_rows = self._binary_op_setup(parsed_ether) + + for row in all_rows: + name_len = len(name_tuples[row]) + arg_len = len(arg_tuples[row]) + + if name_len > arg_len: + for letter, _ in name_tuples[row][arg_len:]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " lacks a corresponding value in one of these columns: " + + ", ".join(arg_letters) + "
") + elif arg_len > name_len: + for letter, _ in arg_tuples[row][name_len:]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " lacks a corresponding value in one of these columns: " + + ", ".join(name_letters) + "
") + + return report, tooltip, cells + + def _apply_equal_span_length_and_content(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + err = self._binary_op_check_cols_exist(colmap) + if err: + report += err + return report, tooltip, cells + + name_letters, arg_letters, name_tuples, \ + arg_tuples, start_rows, all_rows = self._binary_op_setup(parsed_ether) + + for row in all_rows: + name_len = len(name_tuples[row]) + arg_len = len(arg_tuples[row]) + + if name_len > arg_len: + for letter, _ in name_tuples[row][arg_len:]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " lacks a corresponding value in one of these columns: " + + ", ".join(arg_letters) + "
") + elif arg_len > name_len: + for letter, _ in arg_tuples[row][name_len:]: + cells.append(letter + row) + report += ("Cell " + letter + row + + " lacks a corresponding value in one of these columns: " + + ", ".join(name_letters) + "
") + + if row != "1": + for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): + name_letter, name_content = name_tuples[row][i] + arg_letter, arg_content = arg_tuples[row][i] + + if arg_content != name_content and (row in start_rows[arg_letter] or row in start_rows[name_letter]): + cells.append(name_letter + row) + cells.append(arg_letter + row) + report += ("Cells " + name_letter + row + + " and " + arg_letter + row + + " must have equivalent content.
") + + return report, tooltip, cells + + def _apply_rule(self, parsed_ether): + if self.name is None: + return "", "", [] + + if self.operator == "exists": + return self._apply_exists(parsed_ether) + elif self.operator == "|": + return self._apply_span_equals_number(parsed_ether) + elif self.operator == "~": + return self._apply_regex(parsed_ether) + elif self.operator == ">": + return self._apply_subspan(parsed_ether) + elif self.operator == "=": + return self._apply_equal_span_length(parsed_ether) + elif self.operator == "==": + return self._apply_equal_span_length_and_content(parsed_ether) + else: + raise Exception("Unknown EtherCalc validation operator: '" + str(self.operator) + "'") + + + def validate(self, parsed_ether, doc_name, doc_corpus): + res = {"report": "", + "tooltip": "", + "cells": []} + + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return res + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return res + + report, tooltip, cells = self._apply_rule(parsed_ether) + res['report'] += report + res['tooltip'] += tooltip + res['cells'] += cells + return res diff --git a/modules/validation/meta_validator.py b/modules/validation/meta_validator.py new file mode 100644 index 0000000..3db9fe7 --- /dev/null +++ b/modules/validation/meta_validator.py @@ -0,0 +1,54 @@ +from validator import Validator +import re + +class MetaValidator(Validator): + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.name = rule[3] + self.operator = rule[4] + self.argument = rule[5] + + def _apply_match(self, metadata): + report, tooltip = "", "" + for d in metadata: + if d[2] == self.name: + value = d[3] + match = re.search(self.argument, value) + if match is None: + report += "Metadata for " + self.name + " does not match pattern" + "
" + tooltip += "Metadata: " + value + "
" + "Pattern: " + self.argument + "
" + + return report, tooltip + + def _apply_exists(self, metadata): + report, tooltip = "", "" + exists = False + for d in metadata: + if d[2] == self.name: + exists = True + break + if exists is False: + report += "No metadata for " + self.name + '
' + + return report, tooltip + + def _apply_rule(self, metadata): + if self.operator == "~": + return self._apply_match(metadata) + elif self.operator == "exists": + return self._apply_exists(metadata) + else: + raise Exception("Unknown metadata validation operator: '" + str(self.operator) + "'") + + def validate(self, metadata, doc_name, doc_corpus): + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return "" + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return "" + + report, tooltip = self._apply_rule(metadata) + return {"report": report, + "tooltip": tooltip} diff --git a/modules/validation/validator.py b/modules/validation/validator.py new file mode 100644 index 0000000..87d472a --- /dev/null +++ b/modules/validation/validator.py @@ -0,0 +1,13 @@ +class Validator(object): + """ + Abstract class that all GitDox validations should inherit from. + When at all possible, all Validation classes should not produce any + side-effects: there should be no SQL queries, filesystem operations, + etc. caused by a validation. + + Conceptually, an instance of this class represents a single validation + "rule" against which a document will be checked. + """ + + def validate(self, doc, *args, **kwargs): + raise NotImplementedError diff --git a/modules/validation/xml_validator.py b/modules/validation/xml_validator.py new file mode 100644 index 0000000..7ff6a01 --- /dev/null +++ b/modules/validation/xml_validator.py @@ -0,0 +1,26 @@ +from validator import Validator +from ..ether import exec_via_temp +import re + +# TODO: would have been ideal to write this without any filesystem operations +class XmlValidator(Validator): + def __init__(self, schema): + self.schema = schema + + def validate(self, doc): + report = "" + + if self.schema == '--none--': + report += "No schema
" + else: + schema = self.schema + command = "xmllint --htmlout --schema schemas/" + schema + ".xsd tempfilename" + out, err = exec_via_temp(doc, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) + err = re.sub(r'\n','
',err) + report += err + "
" + + return report diff --git a/validate.py b/validate.py new file mode 100755 index 0000000..6a35c5c --- /dev/null +++ b/validate.py @@ -0,0 +1,219 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from collections import defaultdict +import re +import cgi +import json + +from paths import ether_url +from modules.gitdox_sql import * +from modules.ether import get_socialcalc, make_spreadsheet, exec_via_temp, get_timestamps, parse_ether +from modules.validation.xml_validator import XmlValidator +from modules.validation.meta_validator import MetaValidator +from modules.validation.ether_validator import EtherValidator + +def highlight_cells(cells, ether_url, ether_doc_name): + old_ether = get_socialcalc(ether_url, ether_doc_name) + old_ether_lines = old_ether.splitlines() + new_ether_lines = [] + + old_color_numbers = [] + new_color_number = '1' + for line in old_ether_lines: + color_line = re.match(r'color:(\d+):(rgb.*$)', line) + if color_line is not None: + if color_line.group(2) == 'rgb(242, 242, 142)': + old_color_numbers.append(color_line.group(1)) + else: + new_color_number = str(1 + int(color_line.group(1))) + if len(old_color_numbers) > 0: + new_color_number = old_color_numbers[0] + + for line in old_ether_lines: + + parts = line.split(":") + # Check for pure formatting cells, e.g. cell:K15:f:1 + if len(parts) == 4: + if parts[2] == "f": # Pure formatting cell, no content + continue + + parsed_cell = re.match(r'cell:([A-Z]+\d+)(:.*)$', line) + if parsed_cell is not None: + col_row = parsed_cell.group(1) + other = parsed_cell.group(2) + bg = re.search(r':bg:(\d+)($|:)', other) + if bg is not None: + bg = bg.group(1) + + if col_row in cells: + if bg is not None: + if bg != new_color_number: + new_line = re.sub(r':bg:' + bg, r':bg:' + new_color_number, line) + else: + new_line = line + else: + new_line = line + ':bg:' + new_color_number + else: + if bg is not None: + if bg in old_color_numbers: + new_line = re.sub(r':bg:' + bg, r'', line) + else: + new_line = line + else: + new_line = line + new_ether_lines.append(new_line) + elif re.match(r'sheet:', line) is not None: + new_ether_lines.append(line) + if new_color_number not in old_color_numbers: + new_ether_lines.append('color:' + new_color_number + ':rgb(242, 242, 142)') + else: + new_ether_lines.append(line) + + new_ether = '\n'.join(new_ether_lines) + make_spreadsheet(new_ether, ether_url + "_/" + ether_doc_name, "socialcalc") + +def validate_doc_meta(doc_id, editor): + # metadata validation + report = '' + rules = [MetaValidator(x) for x in get_meta_rules()] + + meta = get_doc_meta(doc_id) + doc_info = get_doc_info(doc_id) + doc_name = doc_info[0] + doc_corpus = doc_info[1] + for rule in rules: + res = rule.validate(meta, doc_name, doc_corpus) + if editor and len(res['tooltip']) > 0: + report += ("""
""" + + res['report'][:-5] + + """ """ + + "" + res['tooltip'] + "" + + "
") + else: + report += res['report'] + return report + +def validate_doc_ether(doc_id, editor=False): + rules = [EtherValidator(x) for x in get_ether_rules()] + + doc_info = get_doc_info(doc_id) + doc_name = doc_info[0] + doc_corpus = doc_info[1] + + ether_doc_name = "gd_" + doc_corpus + "_" + doc_name + parsed_ether = parse_ether(get_socialcalc(ether_url, ether_doc_name)) + + report = '' + cells = [] + + # check metadata + meta_report = validate_doc_meta(doc_id, editor) + + # check ethercalc rules + for rule in rules: + res = rule.validate(parsed_ether, doc_name, doc_corpus) + if editor and len(res['tooltip']) > 0: + report += ("""
""" + + res['report'][:-5] + + """ """ + + "" + res['tooltip'] + "" + + "
") + else: + report += res['report'] + cells += res['cells'] + + if editor: + highlight_cells(cells, ether_url, ether_doc_name) + full_report = report + meta_report + if len(full_report) == 0: + full_report = "Document is valid!" + return full_report + else: + json_report = {} + if len(report) == 0: + report = "spreadsheet is valid" + if len(meta_report) == 0: + meta_report = "metadata is valid" + json_report['ether'] = report + json_report['meta'] = meta_report + return json_report + +def validate_doc_xml(doc_id, schema, editor=False): + xml_report = "" + + doc_content = get_doc_content(doc_id) + xml_report = XmlValidator(schema).validate(doc_content) + meta_report = validate_doc_meta(doc_id, editor) + + # report + if editor is True: + try: + #full_report = xml_report.decode("utf8") + meta_report.decode("utf8") + full_report = xml_report + meta_report + except Exception as e: + full_report = "[Encoding error: " + str(e) + "]" + if len(full_report) == 0: + full_report = "Document is valid!" + return full_report + else: + json_report = {} + if len(xml_report) == 0: + xml_report = "xml is valid" + if len(meta_report) == 0: + meta_report = "metadata is valid" + json_report['xml'] = xml_report + json_report['meta'] = meta_report + return json_report + +def validate_all_docs(): + docs = generic_query("SELECT id, name, corpus, mode, schema, validation, timestamp FROM docs", None) + doc_timestamps = get_timestamps(ether_url) + reports = {} + + for doc in docs: + doc_id, doc_name, corpus, doc_mode, doc_schema, validation, timestamp = doc + if doc_mode == "ether": + ether_name = "_".join(["gd", corpus, doc_name]) + if ether_name in doc_timestamps and validation is not None and len(validation) > 0: + if timestamp == doc_timestamps[ether_name]: + reports[doc_id] = json.loads(validation) + else: + reports[doc_id] = validate_doc_ether(doc_id) + update_validation(doc_id, json.dumps(reports[doc_id])) + update_timestamp(doc_id, doc_timestamps[ether_name]) + else: + reports[doc_id] = validate_doc_ether(doc_id) + #reports[doc_id] = {"ether":"sample_ether","meta":"sample_meta"} + update_validation(doc_id, json.dumps(reports[doc_id])) + if ether_name in doc_timestamps: + update_timestamp(doc_id, doc_timestamps[ether_name]) + elif doc_mode == "xml": + if validation is None: + reports[doc_id] = validate_doc_xml(doc_id, doc_schema) + try: + validation_report = json.dumps(reports[doc_id]) + except UnicodeDecodeError: + reports[doc_id]["xml"] = "UnicodeDecodeError; unable to print XML validation report for " + doc_name + validation_report = json.dumps(reports[doc_id]) + update_validation(doc_id,validation_report) + else: + reports[doc_id] = json.loads(validation) + + return json.dumps(reports) + +if __name__ == "__main__": + parameter = cgi.FieldStorage() + doc_id = parameter.getvalue("doc_id") + mode = parameter.getvalue("mode") + schema = parameter.getvalue("schema") + + if doc_id == "all": + print "Content-type:application/json\n\n" + print validate_all_docs().encode("utf8") + else: + print "Content-type:text/html\n\n" + if mode == "ether": + print validate_doc_ether(doc_id, editor=True).encode("utf8") + elif mode == "xml": + print validate_doc_xml(doc_id, schema, editor=True).encode("utf8") From 08599a8633c4b23d541a0bce6fd3ba99d19f5c16 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Tue, 23 Oct 2018 15:18:41 -0400 Subject: [PATCH 071/135] Ensure that meta_validator always returns a dict * Case where corpus/doc does not match was returning empty string --- modules/validation/meta_validator.py | 5 +++-- validate.py | 14 +++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/modules/validation/meta_validator.py b/modules/validation/meta_validator.py index 3db9fe7..7372434 100644 --- a/modules/validation/meta_validator.py +++ b/modules/validation/meta_validator.py @@ -42,12 +42,13 @@ def _apply_rule(self, metadata): raise Exception("Unknown metadata validation operator: '" + str(self.operator) + "'") def validate(self, metadata, doc_name, doc_corpus): + out_dict = {"report":"", "tooltip":""} if self.corpus is not None: if re.search(self.corpus, doc_corpus) is None: - return "" + return out_dict if self.doc is not None: if re.search(self.doc, doc_name) is None: - return "" + return out_dict report, tooltip = self._apply_rule(metadata) return {"report": report, diff --git a/validate.py b/validate.py index 6a35c5c..b6866dc 100755 --- a/validate.py +++ b/validate.py @@ -75,7 +75,7 @@ def highlight_cells(cells, ether_url, ether_doc_name): def validate_doc_meta(doc_id, editor): # metadata validation - report = '' + report = {"report":"","tooltip":""} rules = [MetaValidator(x) for x in get_meta_rules()] meta = get_doc_meta(doc_id) @@ -85,13 +85,13 @@ def validate_doc_meta(doc_id, editor): for rule in rules: res = rule.validate(meta, doc_name, doc_corpus) if editor and len(res['tooltip']) > 0: - report += ("""
""" + report["tooltip"] += ("""
""" + res['report'][:-5] + """ """ + "" + res['tooltip'] + "" + "
") else: - report += res['report'] + report["report"] += res['report'] return report def validate_doc_ether(doc_id, editor=False): @@ -108,7 +108,7 @@ def validate_doc_ether(doc_id, editor=False): cells = [] # check metadata - meta_report = validate_doc_meta(doc_id, editor) + meta_validation = validate_doc_meta(doc_id, editor) # check ethercalc rules for rule in rules: @@ -125,7 +125,7 @@ def validate_doc_ether(doc_id, editor=False): if editor: highlight_cells(cells, ether_url, ether_doc_name) - full_report = report + meta_report + full_report = report + meta_validation["report"] if len(full_report) == 0: full_report = "Document is valid!" return full_report @@ -133,10 +133,10 @@ def validate_doc_ether(doc_id, editor=False): json_report = {} if len(report) == 0: report = "spreadsheet is valid" - if len(meta_report) == 0: + if len(meta_validation["report"]) == 0: meta_report = "metadata is valid" json_report['ether'] = report - json_report['meta'] = meta_report + json_report['meta'] = meta_validation["report"] return json_report def validate_doc_xml(doc_id, schema, editor=False): From cdd1674785b74e3be96441a1f555ef1694049df6 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Tue, 23 Oct 2018 15:40:24 -0400 Subject: [PATCH 072/135] Convenience interface for CLI validation + bugfix * validate.py can now be run from CLI for individual documents to debug issues --- validate.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/validate.py b/validate.py index b6866dc..6941053 100755 --- a/validate.py +++ b/validate.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from collections import defaultdict -import re +import re, sys import cgi import json @@ -134,7 +134,7 @@ def validate_doc_ether(doc_id, editor=False): if len(report) == 0: report = "spreadsheet is valid" if len(meta_validation["report"]) == 0: - meta_report = "metadata is valid" + meta_validation["report"] = "metadata is valid" json_report['ether'] = report json_report['meta'] = meta_validation["report"] return json_report @@ -203,10 +203,23 @@ def validate_all_docs(): return json.dumps(reports) if __name__ == "__main__": - parameter = cgi.FieldStorage() - doc_id = parameter.getvalue("doc_id") - mode = parameter.getvalue("mode") - schema = parameter.getvalue("schema") + + mode = "" + schema = "" + if len(sys.argv) > 1: + from argparse import ArgumentParser + p = ArgumentParser() + p.add_argument("-d","--doc",help="doc ID in gitdox.db or 'all'", default="all") + + opts = p.parse_args() + doc_id = opts.doc + if doc_id != "all": + _, _, _, _, _, mode, schema = get_doc_info(doc_id) + else: + parameter = cgi.FieldStorage() + doc_id = parameter.getvalue("doc_id") + mode = parameter.getvalue("mode") + schema = parameter.getvalue("schema") if doc_id == "all": print "Content-type:application/json\n\n" From 7d3ce7e843635ff86f6bb10239f9e799925d156c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 23 Oct 2018 22:28:13 -0400 Subject: [PATCH 073/135] use mustache for html templating --- admin.py | 363 +++----------- editor.py | 115 ++--- index.py | 222 ++------- js/validation_rules.js | 39 ++ modules/renderer.py | 31 ++ requirements.txt | 1 + templates/admin.mustache | 231 +++++++++ templates/codemirror.html | 445 ----------------- templates/{editor.html => editor.mustache} | 129 +++-- templates/ether.html | 19 - templates/index.mustache | 242 ++++++++++ templates/landing.html | 109 ----- templates/partials/codemirror.mustache | 449 ++++++++++++++++++ templates/partials/ethercalc.mustache | 24 + .../{header.html => partials/header.mustache} | 2 +- templates/popup_meta.mustache | 29 ++ templates/user_admin.mustache | 75 +++ templates/validation_rules.mustache | 42 ++ validation_rules.py | 108 +---- 19 files changed, 1429 insertions(+), 1246 deletions(-) create mode 100644 js/validation_rules.js create mode 100644 modules/renderer.py create mode 100644 templates/admin.mustache delete mode 100644 templates/codemirror.html rename templates/{editor.html => editor.mustache} (58%) delete mode 100644 templates/ether.html create mode 100644 templates/index.mustache delete mode 100644 templates/landing.html create mode 100644 templates/partials/codemirror.mustache create mode 100644 templates/partials/ethercalc.mustache rename templates/{header.html => partials/header.mustache} (98%) create mode 100644 templates/popup_meta.mustache create mode 100644 templates/user_admin.mustache create mode 100644 templates/validation_rules.mustache diff --git a/admin.py b/admin.py index e2ac495..9e3bff1 100755 --- a/admin.py +++ b/admin.py @@ -14,6 +14,7 @@ from paths import get_menu from editor import harvest_meta from modules.ether import make_spreadsheet, get_ether_stylesheet_select, get_corpus_select +from modules.renderer import render from passlib.apps import custom_app_context as pwd_context import github3 import time @@ -102,7 +103,7 @@ def update_git_info(user,new_git_username,new_git_password,new_git_2fa=False): o['git_username'] = new_git_username o['git_2fa'] = str(new_git_2fa).lower() - try: + try: note = project + ", " + time.ctime() auth = github3.authorize(new_git_username, new_git_password, ['repo'], note, "") o['git_token'] = auth.token @@ -111,169 +112,58 @@ def update_git_info(user,new_git_username,new_git_password,new_git_2fa=False): del o['git_password'] o.write() except: - # fail silently--would want to display an error ideally, but + # fail silently--would want to display an error ideally, but # users will know to try again if the credentials are wrong pass -def load_admin(user,admin,theform): - warn="" +def load_admin(user, admin, theform): + render_data = {} + + # handle user deletion if theform.getvalue('user_delete'): - userdir=prefix+'users' + os.sep - user_del_file=theform.getvalue('user_delete') - user_del=user_del_file.split('.ini')[0] + userdir = prefix + 'users' + os.sep + user_del_file = theform.getvalue('user_delete') + user_del = user_del_file.split('.ini')[0] #delete_user(user_del) #need to also delete the user.ini file - os.remove(userdir+user_del_file) + os.remove(userdir + user_del_file) + # handle user creation if theform.getvalue('create_user'): - username=theform.getvalue('username') - password=theform.getvalue('password') - realname=theform.getvalue('realname') if theform.getvalue('realname') is not None else "anonymous" - email=theform.getvalue('email') if theform.getvalue('email') is not None else "a@b.com" - admin=theform.getvalue('admin') - git_username=theform.getvalue('git_username') if theform.getvalue('git_username') is not None else "none" - git_password=theform.getvalue('git_password') if theform.getvalue('git_password') is not None else "none" - git_2fa=theform.getvalue('git_2fa') if theform.getvalue('git_2fa') is not None else "false" - - if username!=None and password!=None: - write_user_file(username,password,admin,email,realname,git_username,git_password,git_2fa) + username = theform.getvalue('username') + password = theform.getvalue('password') + realname = theform.getvalue('realname') if theform.getvalue('realname') is not None else "anonymous" + email = theform.getvalue('email') if theform.getvalue('email') is not None else "a@b.com" + admin = theform.getvalue('admin') + git_username = theform.getvalue('git_username') if theform.getvalue('git_username') is not None else "none" + git_password = theform.getvalue('git_password') if theform.getvalue('git_password') is not None else "none" + git_2fa = theform.getvalue('git_2fa') if theform.getvalue('git_2fa') is not None else "false" + + if username != None and password != None: + write_user_file(username, password, admin, email, realname, git_username, git_password, git_2fa) else: - warn="
ERROR: username or password missing; user cannot be created.
" + render_data["user_creation_warning"] = "ERROR: username or password missing; user cannot be created." + # handle db wipe if theform.getvalue('init_db'): setup_db() - page= "Content-type:text/html\r\n\r\n" - page+=""" - - - - - - - - - - - - - - - - - - **navbar** -
- **header** -
-

GitDox - Administration

-

administration and user management | back to document list

- """ - page+="""
""" - - page += '''

User Management

- - -

Select users to delete:

- " - - - page+="""

-
delete
-
""" - - #add user - - page+="""

Enter user info to create new user:


- - - - - - - - - - - - - - -
username
password
realname
email
admin
git username
git password
use two-factor auth
- - - - -

-
save
-
""" - if warn!="": - page+=warn - - - page += """ -

Batch download

-

Download all documents

-
    -
  • Documents will be downloaded in a zip file
  • -
  • The format of each document will depend on its active mode: -
      -
    • Metadata is added to XML files in a wrapping tag <meta key="value">
    • -
    • Documents in XML mode are downloaded as .xml, as they appear in the editor
    • -
    • Documents in spreadsheet mode are downloaded as .sgml to preserve potential span hierarchy conflicts
  • -
  • You can choose custom configurations for exporting spreadsheet data if .ini files are available in the schemas/ directory
  • -
-
Corpora to export:
- **corpus_select** -

-
Filter by status:
- **status_select** -

-
Extension for spreadsheet files:
- -

-
Export configuration for spreadsheets:
- **stylesheet_select** -

-
download
- """ - - page = page.replace("**corpus_select**",get_corpus_select()) - page = page.replace("**status_select**",get_status_select()) - page = page.replace("**stylesheet_select**",get_ether_stylesheet_select()) - - - msg = "" + render_data['userfiles'].append(userfile) + + # get html for dropdown selections + render_data['corpus_select_html'] = get_corpus_select() + render_data['status_select_html'] = get_status_select() + render_data['stylesheet_select_html'] = get_ether_stylesheet_select() + + # handle upload imported = 0 if "file" in theform and "mode" in theform: fileitem = theform["file"] @@ -281,7 +171,7 @@ def load_admin(user,admin,theform): if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) - msg = '
The file "' + fn + '" was uploaded successfully
' + render_data['file_uploaded'] = fn from zipfile import ZipFile zip = ZipFile(fileitem.file) file_list = [f for f in zip.namelist() if not os.path.isdir(f)] @@ -293,14 +183,14 @@ def load_admin(user,admin,theform): corpus = meta_key_val["corpus"] else: corpus = "default_corpus" - docname = filename.replace(" ","_") # No spaces in document names - docname = re.sub(r'(.+)\.[^\.]+$',r'\1',docname) # Strip extension + docname = filename.replace(" ","_") # No spaces in document names + docname = re.sub(r'(.+)\.[^\.]+$',r'\1',docname) # Strip extension if not doc_exists(docname, corpus): max_id = generic_query("SELECT MAX(id) AS max_id FROM docs", "")[0][0] if not max_id: # This is for the initial case after init db max_id = 0 - doc_id = int(max_id) + 1 - create_document(doc_id, docname, corpus, "init", "default_user", "gucorpling/gitdox", "", mode) + doc_id = int(max_id) + 1 + create_document(doc_id, docname, corpus, "init", "default_user", "gucorpling/gitdox", "", mode) else: # Document already exists, just overwrite spreadsheet/xml and metadata and set mode doc_id = generic_query("SELECT id FROM docs where corpus=? and name=?", (corpus,docname))[0][0] @@ -329,47 +219,16 @@ def load_admin(user,admin,theform): continue save_meta(doc_id, key.decode("utf8"), value.decode("utf8")) if imported > 0: - msg += 'Imported '+str(imported)+' files from archive
' - - - page+=""" -

Batch upload

-

Import multiple spreadsheets data by uploading a zip archive with SGML files

-
    -
  • Document names are generated from file names inside the zip, without their extension (e.g. .sgml, .tt)
  • -
  • Metadata is taken from the <meta> element surrounding the document
  • -
  • Corpus name is taken from a metadatum corpus inside meta, else 'default_corpus'
  • -
  • Select XML mode to import into XML editor, or Spreadsheet to convert SGML spans into a new spreadsheet
  • -
-
- - - - - - - - -
Mode: - -
-
-
-""" - - page+=msg - - msg = "" + render_data['files_imported'] = str(imported) + + # handle sql execution sql_statements = 0 if "sqltab" in theform: fileitem = theform["sqltab"] if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) - msg = '
The file "' + fn + '" was uploaded successfully
' + render_data['sql_file_imported'] = fn rows = fileitem.file.read().replace("\r","").split("\n") c1, c2 = ["",""] for i, row in enumerate(rows): @@ -382,134 +241,34 @@ def load_admin(user,admin,theform): sql = "update docs set " + c2 + " = ? where " + c1 + " = ? ;" generic_query(sql, (f2, f1)) sql_statements += 1 - if sql_statements > 0: - msg += 'Executed ' + str(sql_statements) + ' DB updates
' - - page += """ -

Batch update DB

-

Execute multiple SQL updates, e.g. to assign documents to users from a list

-
    -
  • The uploaded file should be a tab delimited, two column text file
  • -
  • The first rwo contains the headers: -
    • in column 1, the criterion, one of 'corpus' or 'name' (=document name)
    • -
    • in column 2, the docs table column to update, e.g. 'assignee_username'
  • -
  • Subsequent rows give pairs of criterion-value, e.g. 'doc001 user1'
  • -
-
- - -
- """ + render_data["sql_statements"] = sql_statements - page += msg + render_data["navbar_html"] = get_menu() + render_data["skin_stylesheet"] = skin - page+="

Database management

" - #init database, setup_db, wipe all documents + return render("admin", render_data) - page+="""
- warning: this will wipe the database! -
-
init DB
-
""" +def load_user_config(user, admin, theform): + render_data = {} - - page+="
" - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) - page = page.replace("**project**",project) - page = page.replace("**skin**",skin) - - return page - - -def load_user_config(user,admin,theform): if theform.getvalue('new_pass') and user != "demo": new_pass=theform.getvalue('new_pass') update_password(user,new_pass) - if theform.getvalue('new_git_password') and user != "demo": + if theform.getvalue('new_git_password') and user != "demo": new_git_password=theform.getvalue('new_git_password') new_git_username=theform.getvalue('new_git_username') new_git_2fa=theform.getvalue('new_git_2fa') - update_git_info(user,new_git_username,new_git_password,new_git_2fa) + render_data['user'] = user + render_data['admin_eq_one'] = admin == "1" + render_data["navbar_html"] = get_menu() + render_data["skin_stylesheet"] = skin - page= "Content-type:text/html\r\n\r\n" - page+=""" - - - - - - - - - - - - - - - - **navbar** -
- -
-

Coptic XML transcription editor

-

edit user info | back to document list

- -

Edit your account information

- """ - #edit user password - username_info=""""""%user - username_info+=""" - -
username%s
new password
- """ - - - page+=username_info - page+=" \n" - page+="

note: after you changed your password you'll be logged out and you need to log in using your new password again

\n" - - #edit git info - if admin=="1": - page+="""
- - -
new git username
new git password
use two-factor auth
\n""" - - - page+="
\n" - - page += "\t\t\t
\t\t\n
\t\n\n\n" - - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) - page = page.replace("**project**",project) - page = page.replace("**skin**",skin) - - return page + return render("user_admin", render_data) def open_main_server(): @@ -521,10 +280,12 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) user = userconfig["username"] admin = userconfig["admin"] + + print("Content-type:text/html\n\n") if admin == "3": - print(load_admin(user,admin,theform)) + print(load_admin(user, admin, theform)) elif admin == "0" or admin=="1": - print(load_user_config(user,admin,theform)) + print(load_user_config(user, admin, theform)) open_main_server() diff --git a/editor.py b/editor.py index 9b7d2f0..fd179eb 100755 --- a/editor.py +++ b/editor.py @@ -17,6 +17,7 @@ from paths import ether_url, get_menu, get_nlp_credentials from modules.ether import make_spreadsheet, delete_spreadsheet, sheet_exists, get_socialcalc, ether_to_sgml, \ build_meta_tag, get_ether_stylesheet_select, get_file_list +from modules.renderer import render # Support IIS site prefix on Windows if platform.system() == "Windows": @@ -68,9 +69,9 @@ def serialize_file(text_content,file_name): def load_page(user,admin,theform): - print("Content-type:text/html\r\n\r\n") global ether_url global code_2fa + if theform.getvalue("2fa"): code_2fa = theform.getvalue("2fa") else: @@ -87,7 +88,6 @@ def load_page(user,admin,theform): schema = "" doc_id = "" # Should only remain so if someone navigated directly to editor.py docname = "" - mymsg = "" old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = ["", "", "", "", "", "", ""] if int(admin) > 0: @@ -252,8 +252,6 @@ def load_page(user,admin,theform): out, err = make_spreadsheet(old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc") if out == "OK": delete_spreadsheet(ether_url,old_sheet_name) - else: - mymsg += "out was: " + out + " err was" + err text_content = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0] @@ -423,20 +421,24 @@ def load_page(user,admin,theform): if user == "demo": nlp_service = disabled_nlp_service - page= ""#"Content-type:text/html\r\n\r\n" + # dict of variables we'll need to render the html + render_data = {} + + # prepare embedded editor html if mode == "ether": - embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "ether.html").read() + render_data['ether_mode'] = True + ether_url += "gd_" + corpus + "_" + docname + render_data['ether_url'] = ether_url stylesheet_select = get_ether_stylesheet_select() - embedded_editor = embedded_editor.replace("**stylesheet_select**",stylesheet_select) + render_data['ether_stylesheet_select_html'] = stylesheet_select if "file" in theform and user != "demo": fileitem = theform["file"] if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) - msg = 'The file "' + fn + '" was uploaded successfully' if fn.endswith(".xls") or fn.endswith(".xlsx"): make_spreadsheet(fileitem.file.read(),"https://etheruser:etherpass@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname,"excel") else: @@ -446,68 +448,51 @@ def load_page(user,admin,theform): for (key, value) in iteritems(meta_key_val): key = key.replace("@","_") save_meta(int(doc_id),key.decode("utf8"),value.decode("utf8")) - else: - msg = "no file was uploaded" - - embedded_editor = embedded_editor.replace("**source**",ether_url) else: - embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "codemirror.html").read() + render_data['ether_mode'] = False - page += urllib.urlopen(prefix + "templates" + os.sep + "editor.html").read() - page += mymsg - page = page.replace("**embedded_editor**",embedded_editor) + render_data['doc_is_selected'] = len(doc_id) != 0 + render_data['id'] = doc_id + render_data['mode'] = mode + render_data['schema'] = schema + render_data['docname'] = docname + render_data['corpusname'] = corpus - if len(doc_id) == 0: - exp = re.compile(r"
.*
",re.DOTALL) - page = exp.sub("""

No document selected | back to document list

""",page) - else: - metadata = print_meta(doc_id) - corpus_metadata = print_meta(doc_id,corpus=True) - #corpus_metadata = "" - page=page.replace("**content**",text_content) - page=page.replace("**docname**",docname) - page=page.replace("**corpusname**",corpus) - page=page.replace("**edit_status**",edit_status) - page=page.replace("**repo**",repo_name) - page=page.replace("**edit_schema**",edit_schema) - page=page.replace("**edit_assignee**",edit_assignee) - page=page.replace("**edit_mode**",edit_mode) - page=page.replace("**metadata**",metadata) - page=page.replace("**corpus_metadata**",corpus_metadata) - page=page.replace("**disabled_NLP**",disabled_nlp_service) - page=page.replace("**NLP**",nlp_service) - page=page.replace("**id**",doc_id) - page=page.replace("**mode**",mode) - page=page.replace("**schema**",schema) - - # handle clone meta button - if int(admin) > 0: - doc_list = generic_query("SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE",()) - page = page.replace("**source_doc_attrs**", '''''') - opts = "\n".join(['' for x in doc_list]) - page = page.replace("**existing_documents**", opts) - else: - page = page.replace("**source_doc_attrs**", '''disabled="disabled"''') + render_data['text_content'] = text_content + render_data['repo'] = repo_name - if int(admin)>0: - page=page.replace("**github**",push_git) - else: - page = page.replace("**github**", '') + render_data['edit_status_html'] = edit_status + render_data['edit_schema_html'] = edit_schema + render_data['edit_assignee_html'] = edit_assignee + render_data['edit_mode_html'] = edit_mode + render_data['metadata_html'] = print_meta(doc_id) + render_data['corpus_metadata_html'] = print_meta(doc_id,corpus=True) - if int(admin) < 3: - page = page.replace('onblur="validate_docname();"','onblur="validate_docname();" disabled="disabled" class="disabled"') - page = page.replace('onblur="validate_corpusname();"','onblur="validate_corpusname();" disabled="disabled" class="disabled"') - page = page.replace('onblur="validate_repo();"','onblur="validate_repo();" disabled="disabled" class="disabled"') - page = page.replace('''
''','''
''') + render_data['disabled_nlp_html'] = disabled_nlp_service + render_data['nlp_html'] = nlp_service - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**", get_menu()) - page = page.replace("**header**", header) - page = page.replace("**project**", project) - page = page.replace("**skin**", skin) - page = page.replace("**editor_help_link**",editor_help_link) + render_data["admin_gt_zero"] = int(admin) > 0 + render_data["admin_eq_three"] = admin == "3" + + # handle clone meta button, and allow github pushing + if int(admin) > 0: + doc_list = generic_query("SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE",()) + render_data["docs"] = [] + for doc in doc_list: + doc_vars = {} + doc_vars["id"] = str(doc[0]) + doc_vars["corpus"] = doc[1] + doc_vars["name"] = doc[2] + render_data['docs'].append(doc_vars) - return page + render_data["github_push_html"] = push_git + + render_data["can_save"] = not (int(admin) < 3) + render_data["navbar_html"] = get_menu() + render_data["editor_help_link_html"] = editor_help_link + render_data["skin_stylesheet"] = skin + + return render("editor", render_data) def open_main_server(): @@ -520,7 +505,9 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) user = userconfig["username"] admin = userconfig["admin"] - print(load_page(user,admin,theform).encode("utf8")) + + print("Content-type:text/html\n\n") + print(load_page(user, admin, theform).encode("utf8")) if __name__ == "__main__": diff --git a/index.py b/index.py index 67eedd4..87e1b43 100755 --- a/index.py +++ b/index.py @@ -11,6 +11,7 @@ import urllib from modules.gitdox_sql import * from modules.ether import delete_spreadsheet +from modules.renderer import render from paths import ether_url, get_menu from os.path import isfile, join import platform @@ -24,7 +25,7 @@ project = "Scriptorium" -def make_options(**kwargs): +def read_options(**kwargs): if "file" in kwargs: kwargs["file"] = prefix + kwargs["file"] names = open(kwargs["file"],'r').read().replace("\r","").split("\n") @@ -32,17 +33,7 @@ def make_options(**kwargs): elif "names" in kwargs: names = kwargs[names] selected = kwargs["selected"] if "selected" in kwargs else None - options="" - for name in names: - if name!='': - options+='\n' - for corpus in corpora: - corpus_list += '\n' if "sel_corpus" in theform: selected_corpus = theform.getvalue("sel_corpus") - corpus_list = corpus_list.replace('="'+selected_corpus+'"','="'+selected_corpus+'" selected="selected"') + render_data["sel_corpus"] = selected_corpus + + # provide list of corpora for corpus selection dropdown + corpora = get_corpora() + render_data['corpora'] = [] + for corpus in corpora: + render_data['corpora'].append({"name": corpus[0], + "selected": selected_corpus == corpus[0]}) + # find the documents we need to display if selected_corpus != "" and selected_corpus != "all": doc_list = generic_query("SELECT id,corpus,name,status,assignee_username,mode FROM docs where corpus=? ORDER BY corpus, name COLLATE NOCASE", (selected_corpus,)) if len(doc_list) == 0: # Restricted query produced no documents, switch back to all document display @@ -149,110 +90,40 @@ def load_landing(user, admin, theform): if not max_id: # This is for the initial case after init db max_id = 0 - table = """""" - table += """""" - table += """ - - - - - - - """ - table += """""" - + render_data['docs'] = [] for doc in doc_list: - row="" + doc_vars = {} for item in doc: if item == "xml": - item = ' ' + doc_vars["xml"] = True mode = "xml" elif item == "ether": - item = ' ' + doc_vars["ether"] = True mode = "ether" elif "-" in str(item): - item = item.replace("-","‑") # Use non-breaking hyphens - row += cell(item) - id = str(doc[0]) - - # validation icons - icons = """
""" - if mode == "xml": - icons += """ """ - elif mode == "ether": - icons += """ """ - icons += """ """ - icons += """
""" + doc_vars["other_mode"] = True - # edit document - button_edit = """""" - id_code = """" - button_edit += id_code - button_edit += """
edit
""" - - #delete document - button_delete="""""" - button_delete+=id_code - if int(admin) > 0: - button_delete+="""
delete
- """ - else: - button_delete += """
delete
- """ - - row += cell(icons) - row += cell(button_edit) - row += cell(button_delete) - row += "" - table += row - - table+="
idcorpusdocumentstatusassignedmodevalidateactions
" - - if admin == "3": - validation_rules = """
-
- - validation rules
""" - else: - validation_rules = "" - - page = "" - - menu = get_menu() - menu = menu.encode("utf8") - - landing = open(prefix + "templates" + os.sep + "landing.html").read() - header = open(prefix + "templates" + os.sep + "header.html").read() + id = str(doc[0]) + doc_vars["id"] = id + doc_vars["corpus"] = doc[1] + doc_vars["name"] = doc[2] + doc_vars["status"] = doc[3] + doc_vars["assignee"] = doc[4] + render_data['docs'].append(doc_vars) + + render_data["admin_gt_zero"] = int(admin) > 0 + render_data["admin_eq_three"] = admin == "3" + render_data["max_id_plus1"] = str(max_id + 1) + render_data["navbar_html"] = get_menu().encode("utf8") + render_data["user"] = user scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep userdir = scriptpath + "users" + os.sep config = ConfigObj(userdir + 'config.ini') - skin = config["skin"] - project = config["project"] + render_data["skin_stylesheet"] = config["skin"] + render_data["project"] = config["project"] - landing = landing.replace("**max_id_plus1**", str(max_id + 1)) - landing = landing.replace("**user**", user) - landing = landing.replace("**project**", project) - landing = landing.replace("**header**", header) - landing = landing.replace("**skin**", skin) - landing = landing.replace("**validation_rules**", validation_rules) - landing = landing.replace("**corpora**", corpus_list) - landing = landing.replace("**sel_corpus**", selected_corpus) - landing = landing.replace("**table**", table) - landing = landing.replace("**navbar**", menu) - if int(admin) > 0: - landing = landing.replace("**create_doc**", - '''onclick="document.getElementById('form_new').submit();" class="button"''') - landing = landing.replace("**source_doc_attrs**", '''''') - opts = "\n".join(['' for x in doc_list]) - landing = landing.replace("**existing_documents**", opts) - else: - landing = landing.replace("**create_doc**", '''class="button disabled"''') - landing = landing.replace("**source_doc_attrs**", '''disabled="disabled"''') - page += landing - print("Content-type:text/html\n\n") - - return page + return render("index", render_data) def open_main_server(): @@ -265,7 +136,10 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) user = userconfig["username"] admin = userconfig["admin"] - print(load_landing(user,admin,theform)) + + print("Content-type:text/html\n\n") + print(load_landing(user, admin, theform)) -open_main_server() +if __name__ == '__main__': + open_main_server() diff --git a/js/validation_rules.js b/js/validation_rules.js new file mode 100644 index 0000000..4ad3588 --- /dev/null +++ b/js/validation_rules.js @@ -0,0 +1,39 @@ +$(document).ready(function () { + $('#ValidationTableContainer').jtable({ + title: 'Validation Rules', + sorting: true, + actions: { + listAction: 'modules/jtable_rule_list.py', + createAction: 'modules/jtable_create_rule.py', + updateAction: 'modules/jtable_update_rule.py', + deleteAction: 'modules/jtable_delete_rule.py' + }, + fields: { + id: { + title: 'ID', + key: true + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' + }, + domain: { + title: 'Domain', + options: ['ether', 'meta'] + }, + name: { + title: 'Name' + }, + operator: { + title: 'Operator', + options: ['~', '|', '=', '==', '>', 'exists'] + }, + argument: { + title: 'Argument' + } + } + }); + $('#ValidationTableContainer').jtable('load'); +}); diff --git a/modules/renderer.py b/modules/renderer.py new file mode 100644 index 0000000..a6fec04 --- /dev/null +++ b/modules/renderer.py @@ -0,0 +1,31 @@ +import platform +import os +from pystache.renderer import Renderer + +if platform.system() == "Windows": + prefix = "transc\\" +else: + prefix = "" + +def render(template_name, variables, template_dir='templates' + os.sep, file_ext=".mustache"): + """ + Render a mustache template given a dict representing its variables. + + Args: + template_name (str): the name of the template to be rendered + variables (dict): a string -> any dict holding values of variables used in the template + template_dir (str): the template directory, relative to the GitDox root directory. + Defaults to 'templates' + os.sep + file_ext (str): the file extension of templates. Defaults to '.mustache' + + Returns: + str: rendered HTML. + """ + # load shared Mustache templates so we can reference them in our large templates + partials_dir = prefix + template_dir + os.sep + 'partials' + os.sep + partials = dict([(filename[:-len(file_ext)], open(partials_dir + filename, 'r').read()) + for filename in os.listdir(prefix + template_dir + 'partials') + if filename.endswith(".mustache")]) + renderer = Renderer(partials=partials) + + return renderer.render_path(prefix + template_dir + template_name + file_ext, variables) diff --git a/requirements.txt b/requirements.txt index 1f74461..234b42b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ lxml requests github3.py==0.9.3 passlib +pystache diff --git a/templates/admin.mustache b/templates/admin.mustache new file mode 100644 index 0000000..24879b5 --- /dev/null +++ b/templates/admin.mustache @@ -0,0 +1,231 @@ + + + + + + + + + + + + + + + + + {{{ navbar_html }}} +
+ +
+

GitDox - Administration

+

+ administration and user management + | + back to document list +

+ + +
+

User Management

+

Select users to delete:

+ +

+ +
+ delete +
+
+ + +
+

Enter user info to create new user:

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
username
password
realname
email
admin +
git username
git password
use two-factor auth
+

+ +
+ save +
+
+ {{#user_creation_warning}} +
{{.}}
+ {{/user_creation_warning}} + +

Batch download

+

Download all documents

+
    +
  • Documents will be downloaded in a zip file
  • +
  • The format of each document will depend on its active mode: +
      +
    • Metadata is added to XML files in a wrapping tag <meta key="value">
    • +
    • Documents in XML mode are downloaded as .xml, as they appear in the editor
    • +
    • Documents in spreadsheet mode are downloaded as .sgml to preserve potential span hierarchy conflicts
    • +
    +
  • +
  • You can choose custom configurations for exporting spreadsheet data if .ini files are available in the schemas/ directory
  • +
+
Corpora to export:
+ {{{ corpus_select_html }}} +

+
Filter by status:
+ {{{ status_select_html }}} +

+
Extension for spreadsheet files:
+ +

+
Export configuration for spreadsheets:
+ {{{ stylesheet_select_html }}} +

+
+ download +
+ +

Batch upload

+

Import multiple spreadsheets data by uploading a zip archive with SGML files

+
    +
  • Document names are generated from file names inside the zip, without their extension (e.g. .sgml, .tt)
  • +
  • Metadata is taken from the <meta> element surrounding the document
  • +
  • Corpus name is taken from a metadatum corpus inside meta, else 'default_corpus'
  • +
  • Select XML mode to import into XML editor, or Spreadsheet to convert SGML spans into a new spreadsheet
  • +
+
+ + + + + + + + + + + + +
Mode: + +
+ +
+ +
+
+ {{#file_uploaded}} +
+ The file {{.}} was uploaded successfully +
+ {{/file_uploaded}} + {{#files_imported}} + Imported {{.}} files from archive +
+ {{/files_imported}} + +

Batch update DB

+

Execute multiple SQL updates, e.g. to assign documents to users from a list

+
    +
  • The uploaded file should be a tab delimited, two column text file
  • +
  • The first rwo contains the headers: +
    • in column 1, the criterion, one of 'corpus' or 'name' (=document name)
    • +
    • in column 2, the docs table column to update, e.g. 'assignee_username'
  • +
  • Subsequent rows give pairs of criterion-value, e.g. 'doc001 user1'
  • +
+
+ +
+ +
+ {{#sql_file_uploaded}} +
+ The file {{.}} was uploaded successfully +
+ {{/sql_file_uploaded}} + {{#sql_statements}} + Executed {{.}} DB updates +
+ {{/sql_statements}} + +
+
+ +

Database management

+
+ warning: this will wipe the database! +
+ +
+ init DB +
+
+
+
+ + diff --git a/templates/codemirror.html b/templates/codemirror.html deleted file mode 100644 index 9c0c484..0000000 --- a/templates/codemirror.html +++ /dev/null @@ -1,445 +0,0 @@ - - - - - - -
- -
-
-
Save
**NLP** **github** - - - - - - - - - - - - - diff --git a/templates/editor.html b/templates/editor.mustache similarity index 58% rename from templates/editor.html rename to templates/editor.mustache index 158dd37..04a7d62 100644 --- a/templates/editor.html +++ b/templates/editor.mustache @@ -1,10 +1,10 @@ -**docname** - GitDox: Edit +{{docname}} - GitDox: Edit - + @@ -38,18 +38,17 @@ - **navbar** + {{{navbar_html}}}
- **header** +

GitDox: Edit

- **editor_help_link** + {{{ editor_help_link_html }}}
- - - @@ -62,12 +61,12 @@

GitDox: Edit

+ {{#doc_is_selected}}
-
- - - + + +

Editor | back to document list

@@ -77,17 +76,35 @@

Editor | back to document list

- Document Name: + + Document Name: + + + {{#can_save}} +
+ +
+ {{/can_save}} + - -
+
Validate
-
Validate
- Corpus Name: + + Corpus Name: - -
+ + {{#can_save}} +
+ +
+ {{/can_save}}
@@ -96,27 +113,59 @@

Editor | back to document list

Git Repo: - -
+ + + {{#can_save}} +
+ +
+ {{/can_save}} + + + + XML Schema: + + {{{edit_schema_html}}} + + + + Assigned to: + + {{{edit_assignee_html}}} + + + + Status: + + {{{edit_status_html}}} + + + + Mode: + + {{{edit_mode_html}}} - XML Schema:**edit_schema** - Assigned to:**edit_assignee** - Status:**edit_status** - Mode:**edit_mode** - - **embedded_editor** + {{#ether_mode}} + {{> ethercalc}} + {{/ether_mode}} + {{^ether_mode}} + {{> codemirror}} + {{/ether_mode}}

Metadata

- **metadata** + {{{metadata}}}

- **corpus_metadata** + {{{corpus_metadata_html}}}
@@ -133,11 +182,20 @@

Metadata

- - + {{#admin_gt_zero}} + + {{/admin_gt_zero}} + {{^admin_gt_zero}} + + {{/admin_gt_zero}}
+ {{/doc_is_selected}} + {{^doc_is_selected}} +

No document selected | back to document list

+ {{/doc_is_selected}}
+ diff --git a/templates/ether.html b/templates/ether.html deleted file mode 100644 index 1d9b3ba..0000000 --- a/templates/ether.html +++ /dev/null @@ -1,19 +0,0 @@ - -
Save
**disabled_NLP** **github** - -

Export

- -

Generate XML from this spreadsheet using a stylesheet

-
- **stylesheet_select** - - -
- -

Upload

- -

Replace existing spreadsheet data by uploading an SGML or Excel file

-
- - -
\ No newline at end of file diff --git a/templates/index.mustache b/templates/index.mustache new file mode 100644 index 0000000..0fd4639 --- /dev/null +++ b/templates/index.mustache @@ -0,0 +1,242 @@ + + + + GitDox: {{project}} + + + + + + + + + + + + + + + + + + {{{ navbar_html }}} +
+ + +
+

GitDox: Project {{project}}

+
+ {{user}} + + + + + +
+ + admin
+
+ + {{#admin_eq_three}} +
+
+ validation rules +
+
+ {{/admin_eq_three}} + +
+ + validate
+
+

For help getting started see the wiki

+
+
+ + +
Choose a corpus: + +
+
+
+
+
+ + {{#admin_gt_zero}} +
+ + New Document +
+ {{/admin_gt_zero}} + {{^admin_gt_zero}} +
+ + New Document +
+ {{/admin_gt_zero}} +
+
+ + {{#admin_gt_zero}} + + {{/admin_gt_zero}} + {{^admin_gt_zero}} + + {{/admin_gt_zero}} +
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + {{#docs}} + + + + + + + + + + + + + + + + + + + + {{/docs}} + +
idcorpusdocumentstatusassignedmodevalidateactions
{{id}}{{corpus}}{{name}}{{status}}{{assignee}} + {{#xml}} +   + {{/xml}} + {{#ether}} +   + {{/ether}} + {{#other_mode}} + ‑ + {{/other_mode}} + +
+ {{#xml}} +   + {{/xml}} + {{#xml}} +   + {{/xml}} +   +
+
+
+ +
+ edit +
+
+
+
+ + {{#admin_gt_zero}} + +
+ delete +
+ + {{/admin_gt_zero}} + {{^admin_gt_zero}} + +
+ delete +
+ + {{/admin_gt_zero}} +
+
+
+
+ + + diff --git a/templates/landing.html b/templates/landing.html deleted file mode 100644 index 14995e4..0000000 --- a/templates/landing.html +++ /dev/null @@ -1,109 +0,0 @@ - - - - GitDox: **project** - - - - - - - - - - - - - - - - - **navbar** -
- **header** -
-

GitDox: Project **project**

-
- **user** -
- - -
- -
- - admin
-
- - **validation_rules** - -
- - validate
-
-

For help getting started see the wiki

-
-
- - -
Choose a corpus: - -
-
-
-
-
- -
- - New Document -
-
-
- - -
- - -
-
- **table** -
-
- - - diff --git a/templates/partials/codemirror.mustache b/templates/partials/codemirror.mustache new file mode 100644 index 0000000..07509d4 --- /dev/null +++ b/templates/partials/codemirror.mustache @@ -0,0 +1,449 @@ + + + + + + +
+ +
+
+
+ Save +
+{{{nlp_html}}} +{{{github_push_html}}} + + + + + + + + + + + + + diff --git a/templates/partials/ethercalc.mustache b/templates/partials/ethercalc.mustache new file mode 100644 index 0000000..6ab0719 --- /dev/null +++ b/templates/partials/ethercalc.mustache @@ -0,0 +1,24 @@ + +
+ Save +
+{{{disabled_nlp_html}}} +{{{github_push_html}}} + +

Export

+ +

Generate XML from this spreadsheet using a stylesheet

+
+ {{{ether_stylesheet_select_html}}} + + +
+ +

Upload

+ +

Replace existing spreadsheet data by uploading an SGML or Excel file

+
+ + +
diff --git a/templates/header.html b/templates/partials/header.mustache similarity index 98% rename from templates/header.html rename to templates/partials/header.mustache index 14495f7..2c359aa 100644 --- a/templates/header.html +++ b/templates/partials/header.mustache @@ -10,7 +10,7 @@
-   +  
diff --git a/templates/popup_meta.mustache b/templates/popup_meta.mustache new file mode 100644 index 0000000..e95ad5e --- /dev/null +++ b/templates/popup_meta.mustache @@ -0,0 +1,29 @@ + + + + + +
+ field name (e.g., author):
+ + + {{#options}} + +
+ field value (e.g., Besa):
+
+ +
+ + diff --git a/templates/user_admin.mustache b/templates/user_admin.mustache new file mode 100644 index 0000000..eaf5b02 --- /dev/null +++ b/templates/user_admin.mustache @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + {{{ navbar_html }}} +
+ +
+

Coptic XML transcription editor

+

edit user info | back to document list

+ +
+

Change password

+ + + + + + + + + +
username{{user}}
new password
+ +
+
+

Note: after you've changed your password you'll need to log in using your new password.

+ + {{#admin_eq_one}} +
+ + + + + + + + + + + + + +
new git username
new git password
use two-factor auth
+ +
+ {{/admin_eq_one}} +
+
+ + diff --git a/templates/validation_rules.mustache b/templates/validation_rules.mustache new file mode 100644 index 0000000..293d129 --- /dev/null +++ b/templates/validation_rules.mustache @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + {{{navbar_html}}} +
+ +
+

GitDox - Validation

+

validation rule management | back to document list

+ +
+
+
+ + diff --git a/validation_rules.py b/validation_rules.py index c21a203..0a99808 100755 --- a/validation_rules.py +++ b/validation_rules.py @@ -1,11 +1,12 @@ #!/usr/bin/env python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- -import cgi, cgitb +import cgi, cgitb import os, platform from modules.logintools import login from modules.gitdox_sql import * from modules.configobj import ConfigObj +from modules.renderer import render from paths import get_menu # Support IIS site prefix on Windows @@ -22,104 +23,10 @@ project = config["project"] def load_validation_rules(): - - page= "Content-type:text/html\r\n\r\n" - page+=""" - - - - - - - - - - - - - - - - - **navbar** -
- **header** -
-

GitDox - Validation

-

validation rule management | back to document list

- - - - - """ - - page+="""""" - - page+="""
""" - - - page+="
" - header = open(templatedir + "header.html").read() - page = page.replace("**navbar**",get_menu()) - page = page.replace("**header**",header) - page = page.replace("**project**",project) - page = page.replace("**skin**",skin) - - return page - + render_data = {} + render_data['navbar_html'] = get_menu() + render_data['skin_stylesheet'] = skin + return render("validation_rules", render_data) def open_main_server(): thisscript = os.environ.get('SCRIPT_NAME', '') @@ -130,6 +37,7 @@ def open_main_server(): action, userconfig = login(theform, userdir, thisscript, action) admin = userconfig["admin"] if admin == "3": + print "Content-type:text/html\r\n\r\n" print load_validation_rules() From 93364997be23852fa255d3a30d3bd571686eb510 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 23 Oct 2018 22:37:28 -0400 Subject: [PATCH 074/135] add popupPage*, gitdox.db to gitignore --- .gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index 04a41a2..72bb081 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,9 @@ +# compiled python files **.pyc + +# these are auto-generated in index.py +popupPage.html +popupPageCorpus.html + +# usually don't want to commit this +gitdox.db From fc51d4ada620f6c4bc6f1adac1aceeb85fec0ea9 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 24 Oct 2018 12:08:43 -0400 Subject: [PATCH 075/135] fix hardcoded file ext in partial loading --- modules/renderer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/renderer.py b/modules/renderer.py index a6fec04..2e01ade 100644 --- a/modules/renderer.py +++ b/modules/renderer.py @@ -25,7 +25,7 @@ def render(template_name, variables, template_dir='templates' + os.sep, file_ext partials_dir = prefix + template_dir + os.sep + 'partials' + os.sep partials = dict([(filename[:-len(file_ext)], open(partials_dir + filename, 'r').read()) for filename in os.listdir(prefix + template_dir + 'partials') - if filename.endswith(".mustache")]) + if filename.endswith(file_ext)]) renderer = Renderer(partials=partials) return renderer.render_path(prefix + template_dir + template_name + file_ext, variables) From e595c7ebc5f406f66d172f5e396e41b06c166dcb Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 24 Oct 2018 21:14:06 -0400 Subject: [PATCH 076/135] remove os.sep from template_dir --- modules/renderer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/renderer.py b/modules/renderer.py index 2e01ade..b01529c 100644 --- a/modules/renderer.py +++ b/modules/renderer.py @@ -7,7 +7,7 @@ else: prefix = "" -def render(template_name, variables, template_dir='templates' + os.sep, file_ext=".mustache"): +def render(template_name, variables, template_dir='templates', file_ext=".mustache"): """ Render a mustache template given a dict representing its variables. @@ -15,7 +15,7 @@ def render(template_name, variables, template_dir='templates' + os.sep, file_ext template_name (str): the name of the template to be rendered variables (dict): a string -> any dict holding values of variables used in the template template_dir (str): the template directory, relative to the GitDox root directory. - Defaults to 'templates' + os.sep + Defaults to 'templates' file_ext (str): the file extension of templates. Defaults to '.mustache' Returns: @@ -24,8 +24,8 @@ def render(template_name, variables, template_dir='templates' + os.sep, file_ext # load shared Mustache templates so we can reference them in our large templates partials_dir = prefix + template_dir + os.sep + 'partials' + os.sep partials = dict([(filename[:-len(file_ext)], open(partials_dir + filename, 'r').read()) - for filename in os.listdir(prefix + template_dir + 'partials') + for filename in os.listdir(prefix + template_dir + os.sep + 'partials') if filename.endswith(file_ext)]) renderer = Renderer(partials=partials) - return renderer.render_path(prefix + template_dir + template_name + file_ext, variables) + return renderer.render_path(prefix + template_dir + os.sep + template_name + file_ext, variables) From b8bf1ef7d8be2149cdc258323ceea962d929e0e7 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 24 Oct 2018 23:03:12 -0400 Subject: [PATCH 077/135] make renderer provide navbar_html and skin --- admin.py | 6 ------ editor.py | 3 --- index.py | 2 -- validation_rules.py | 15 --------------- 4 files changed, 26 deletions(-) diff --git a/admin.py b/admin.py index 9e3bff1..4e92869 100755 --- a/admin.py +++ b/admin.py @@ -30,7 +30,6 @@ userdir = scriptpath + "users" + os.sep templatedir = scriptpath + "templates" + os.sep config = ConfigObj(userdir + 'config.ini') -skin = config["skin"] project = config["project"] @@ -244,9 +243,6 @@ def load_admin(user, admin, theform): if sql_statements > 0: render_data["sql_statements"] = sql_statements - render_data["navbar_html"] = get_menu() - render_data["skin_stylesheet"] = skin - return render("admin", render_data) @@ -265,8 +261,6 @@ def load_user_config(user, admin, theform): render_data['user'] = user render_data['admin_eq_one'] = admin == "1" - render_data["navbar_html"] = get_menu() - render_data["skin_stylesheet"] = skin return render("user_admin", render_data) diff --git a/editor.py b/editor.py index fd179eb..7dbd9d0 100755 --- a/editor.py +++ b/editor.py @@ -30,7 +30,6 @@ userdir = scriptpath + "users" + os.sep templatedir = scriptpath + "templates" + os.sep config = ConfigObj(userdir + 'config.ini') -skin = config["skin"] project = config["project"] editor_help_link = config["editor_help_link"] # Captions and API URLs for NLP buttons @@ -488,9 +487,7 @@ def load_page(user,admin,theform): render_data["github_push_html"] = push_git render_data["can_save"] = not (int(admin) < 3) - render_data["navbar_html"] = get_menu() render_data["editor_help_link_html"] = editor_help_link - render_data["skin_stylesheet"] = skin return render("editor", render_data) diff --git a/index.py b/index.py index 87e1b43..36306c6 100755 --- a/index.py +++ b/index.py @@ -114,13 +114,11 @@ def load_landing(user, admin, theform): render_data["admin_gt_zero"] = int(admin) > 0 render_data["admin_eq_three"] = admin == "3" render_data["max_id_plus1"] = str(max_id + 1) - render_data["navbar_html"] = get_menu().encode("utf8") render_data["user"] = user scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep userdir = scriptpath + "users" + os.sep config = ConfigObj(userdir + 'config.ini') - render_data["skin_stylesheet"] = config["skin"] render_data["project"] = config["project"] return render("index", render_data) diff --git a/validation_rules.py b/validation_rules.py index 0a99808..6288666 100755 --- a/validation_rules.py +++ b/validation_rules.py @@ -9,23 +9,8 @@ from modules.renderer import render from paths import get_menu -# Support IIS site prefix on Windows -if platform.system() == "Windows": - prefix = "transc\\" -else: - prefix = "" - -scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep -userdir = scriptpath + "users" + os.sep -templatedir = scriptpath + "templates" + os.sep -config = ConfigObj(userdir + 'config.ini') -skin = config["skin"] -project = config["project"] - def load_validation_rules(): render_data = {} - render_data['navbar_html'] = get_menu() - render_data['skin_stylesheet'] = skin return render("validation_rules", render_data) def open_main_server(): From a2a85515dc6357b4e05d40296242cf0d740cb9f8 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 24 Oct 2018 23:18:46 -0400 Subject: [PATCH 078/135] fix merge error, supply navbar_html and skin_stylesheet again --- modules/renderer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/renderer.py b/modules/renderer.py index b01529c..a7e8349 100644 --- a/modules/renderer.py +++ b/modules/renderer.py @@ -1,11 +1,17 @@ import platform import os +from modules.configobj import ConfigObj +from paths import get_menu from pystache.renderer import Renderer +import cgitb; cgitb.enable() if platform.system() == "Windows": prefix = "transc\\" else: prefix = "" +rootpath = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + os.sep +userdir = rootpath + "users" + os.sep +config = ConfigObj(userdir + 'config.ini') def render(template_name, variables, template_dir='templates', file_ext=".mustache"): """ @@ -28,4 +34,6 @@ def render(template_name, variables, template_dir='templates', file_ext=".mustac if filename.endswith(file_ext)]) renderer = Renderer(partials=partials) + variables['skin_stylesheet'] = config['skin'] + variables['navbar_html'] = get_menu() return renderer.render_path(prefix + template_dir + os.sep + template_name + file_ext, variables) From 82e813ece917a19fbf33103f7cf2e39bd1a281b9 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 25 Oct 2018 01:42:02 -0400 Subject: [PATCH 079/135] extend validation rules creation page for xml and export validation --- js/validation_rules.js | 192 +++++++++++++++++++++++++++- modules/jtable_rule_list.py | 9 +- modules/jtable_schema_list.py | 32 +++++ templates/validation_rules.mustache | 33 ++++- 4 files changed, 256 insertions(+), 10 deletions(-) create mode 100755 modules/jtable_schema_list.py diff --git a/js/validation_rules.js b/js/validation_rules.js index 4ad3588..6769675 100644 --- a/js/validation_rules.js +++ b/js/validation_rules.js @@ -1,9 +1,75 @@ $(document).ready(function () { - $('#ValidationTableContainer').jtable({ - title: 'Validation Rules', + $('#xml-table-container').jtable({ + title: 'XML Validation Rules', sorting: true, actions: { - listAction: 'modules/jtable_rule_list.py', + listAction: function (postData, jtParams) { + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'modules/jtable_rule_list.py', + type: 'POST', + dataType: 'json', + data: {domain: "xml"}, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'modules/jtable_create_rule.py', + updateAction: 'modules/jtable_update_rule.py', + deleteAction: 'modules/jtable_delete_rule.py' + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + domain: { + title: 'Domain', + options: ['xml'], + visibility: 'hidden' + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' + }, + name: { + title: 'XSD Schema', + options: 'modules/jtable_schema_list.py?extension=xsd' + } + } + }); + $('#xml-table-container').jtable('load'); +}); + +$(document).ready(function () { + $('#meta-table-container').jtable({ + title: 'Metadata Validation Rules', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'modules/jtable_rule_list.py', + type: 'POST', + dataType: 'json', + data: {domain: "meta"}, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, createAction: 'modules/jtable_create_rule.py', updateAction: 'modules/jtable_update_rule.py', deleteAction: 'modules/jtable_delete_rule.py' @@ -11,7 +77,13 @@ $(document).ready(function () { fields: { id: { title: 'ID', - key: true + key: true, + visibility:'hidden' + }, + domain: { + title: 'Domain', + options: ['meta'], + visibility: 'hidden' }, doc: { title: 'Document' @@ -19,9 +91,62 @@ $(document).ready(function () { corpus: { title: 'Corpus' }, + name: { + title: 'Name' + }, + operator: { + title: 'Operator', + options: ['~', 'exists'] + }, + argument: { + title: 'Argument' + } + } + }); + $('#meta-table-container').jtable('load'); +}); + +$(document).ready(function () { + $('#ether-table-container').jtable({ + title: 'EtherCalc Validation Rules', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'modules/jtable_rule_list.py', + type: 'POST', + dataType: 'json', + data: {domain: "ether"}, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'modules/jtable_create_rule.py', + updateAction: 'modules/jtable_update_rule.py', + deleteAction: 'modules/jtable_delete_rule.py' + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, domain: { title: 'Domain', - options: ['ether', 'meta'] + options: ['ether'], + visibility: 'hidden' + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' }, name: { title: 'Name' @@ -35,5 +160,60 @@ $(document).ready(function () { } } }); - $('#ValidationTableContainer').jtable('load'); + $('#ether-table-container').jtable('load'); +}); + +$(document).ready(function () { + $('#export-table-container').jtable({ + title: 'Export Validation Rules', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'modules/jtable_rule_list.py', + type: 'POST', + dataType: 'json', + data: {domain: "export"}, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'modules/jtable_create_rule.py', + updateAction: 'modules/jtable_update_rule.py', + deleteAction: 'modules/jtable_delete_rule.py' + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + domain: { + title: 'Domain', + options: ['export'], + visibility: 'hidden' + }, + doc: { + title: 'Document' + }, + corpus: { + title: 'Corpus' + }, + name: { + title: 'Export Spec', + options: 'modules/jtable_schema_list.py?extension=ini' + }, + argument: { + title: 'XSD Schema', + options: 'modules/jtable_schema_list.py?extension=xsd' + } + } + }); + $('#export-table-container').jtable('load'); }); diff --git a/modules/jtable_rule_list.py b/modules/jtable_rule_list.py index 8ed48ec..bfee072 100755 --- a/modules/jtable_rule_list.py +++ b/modules/jtable_rule_list.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- from gitdox_sql import * import json @@ -11,12 +11,17 @@ def list_rules(): try: parameter = cgi.FieldStorage() sort = parameter.getvalue("jtSorting") + domain_filter = parameter.getvalue("domain") if sort is not None: rules = get_sorted_rules(sort) else: rules = get_validate_rules() + json_rules = [] for rule in rules: + if domain_filter and rule[2] != domain_filter: + continue + new_json_rule = {} new_json_rule['corpus'] = rule[0] new_json_rule['doc'] = rule[1] @@ -36,4 +41,4 @@ def list_rules(): print "Content-type:application/json\r\n\r\n" -print list_rules() \ No newline at end of file +print list_rules() diff --git a/modules/jtable_schema_list.py b/modules/jtable_schema_list.py new file mode 100755 index 0000000..30d0be9 --- /dev/null +++ b/modules/jtable_schema_list.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from gitdox_sql import * +import json +import cgi +import os + +schema_dir = (os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + + os.sep + + 'schemas') + +def list_files(): + jtable_result = {} + ext = cgi.FieldStorage().getvalue("extension") + + try: + options = [{"DisplayText": x, + "Value": x} + for x in os.listdir(schema_dir) + if x.endswith(ext)] + jtable_result['Result'] = 'OK' + jtable_result['Options'] = options + return json.dumps(jtable_result) + except: + jtable_result['Result'] = 'Error' + jtable_result['Message'] = 'Something went wrong in jtable_xsd_list.py' + return json.dumps(jtable_result) + + +print "Content-type:application/json\r\n\r\n" +print list_files() diff --git a/templates/validation_rules.mustache b/templates/validation_rules.mustache index 293d129..74032da 100644 --- a/templates/validation_rules.mustache +++ b/templates/validation_rules.mustache @@ -33,9 +33,38 @@

GitDox - Validation

-

validation rule management | back to document list

+

+ validation rule management | back to document list +

-
+ + +
+
+
+
+
+
+
+
+
+
+
+
From b5bbaa392dd05eee4dc9cb2ce26290401f0b5c34 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 25 Oct 2018 02:04:35 -0400 Subject: [PATCH 080/135] fix broken table sorting --- js/validation_rules.js | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/js/validation_rules.js b/js/validation_rules.js index 6769675..48315b6 100644 --- a/js/validation_rules.js +++ b/js/validation_rules.js @@ -4,12 +4,13 @@ $(document).ready(function () { sorting: true, actions: { listAction: function (postData, jtParams) { + jtParams.domain = 'xml'; return $.Deferred(function ($dfd) { $.ajax({ url: 'modules/jtable_rule_list.py', type: 'POST', dataType: 'json', - data: {domain: "xml"}, + data: jtParams, success: function (data) { $dfd.resolve(data); }, @@ -55,12 +56,13 @@ $(document).ready(function () { sorting: true, actions: { listAction: function (postData, jtParams) { + jtParams.domain = 'meta'; return $.Deferred(function ($dfd) { $.ajax({ url: 'modules/jtable_rule_list.py', type: 'POST', dataType: 'json', - data: {domain: "meta"}, + data: jtParams, success: function (data) { $dfd.resolve(data); }, @@ -112,12 +114,13 @@ $(document).ready(function () { sorting: true, actions: { listAction: function (postData, jtParams) { + jtParams.domain = 'ether'; return $.Deferred(function ($dfd) { $.ajax({ url: 'modules/jtable_rule_list.py', type: 'POST', dataType: 'json', - data: {domain: "ether"}, + data: jtParams, success: function (data) { $dfd.resolve(data); }, @@ -169,12 +172,13 @@ $(document).ready(function () { sorting: true, actions: { listAction: function (postData, jtParams) { + jtParams.domain = 'export'; return $.Deferred(function ($dfd) { $.ajax({ url: 'modules/jtable_rule_list.py', type: 'POST', dataType: 'json', - data: {domain: "export"}, + data: jtParams, success: function (data) { $dfd.resolve(data); }, From ee5e10b79ae8017cfe1da9a55735da25bc6c0489 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 25 Oct 2018 02:12:12 -0400 Subject: [PATCH 081/135] remove schema field from editor --- css/gitdox.css | 2 +- editor.py | 32 -------------------------------- templates/editor.mustache | 6 ------ 3 files changed, 1 insertion(+), 39 deletions(-) diff --git a/css/gitdox.css b/css/gitdox.css index af1170d..2ad11f2 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -172,7 +172,7 @@ width: 100%} } #validation_report{ - height: 300px; + height: 200px; overflow-y: auto; } diff --git a/editor.py b/editor.py index 7dbd9d0..95684f2 100755 --- a/editor.py +++ b/editor.py @@ -150,15 +150,6 @@ def load_page(user,admin,theform): else: update_assignee(doc_id, assignee) - if theform.getvalue('edit_schema') and user != "demo": - schema = theform.getvalue('edit_schema') - if schema != "--none--": - if doc_id > max_id: - create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) - max_id = doc_id - else: - update_schema(doc_id, schema) - # cloning metadata from an existing doc into a new doc if theform.getvalue('source_doc'): source_meta = get_doc_meta(theform.getvalue('source_doc')) @@ -237,10 +228,6 @@ def load_page(user,admin,theform): mode = theform.getvalue('edit_mode') if mode != old_mode and user != "demo": update_mode(doc_id,mode) - if theform.getvalue('edit_schema'): - schema = theform.getvalue('edit_schema') - if schema != old_schema and user != "demo": - update_schema(doc_id, schema) if theform.getvalue('nlp_spreadsheet') == "do_nlp_spreadsheet": # mode has been changed to spreadsheet via NLP update_mode(doc_id, "ether") mode = "ether" @@ -350,24 +337,6 @@ def load_page(user,admin,theform): edit_status += options+"" - # Get XML schema list - schema_list = ['--none--'] - scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep - schemadir = scriptpath + "schemas" + os.sep - - schema_list += get_file_list(schemadir,"xsd",hide_extension=True) - - edit_schema = """" - # edit_schema = edit_schema.replace(schema+'"', schema+'" selected="selected"') - # Get user_list from the logintools user_list=[] scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep @@ -461,7 +430,6 @@ def load_page(user,admin,theform): render_data['repo'] = repo_name render_data['edit_status_html'] = edit_status - render_data['edit_schema_html'] = edit_schema render_data['edit_assignee_html'] = edit_assignee render_data['edit_mode_html'] = edit_mode render_data['metadata_html'] = print_meta(doc_id) diff --git a/templates/editor.mustache b/templates/editor.mustache index 04a7d62..501beb2 100644 --- a/templates/editor.mustache +++ b/templates/editor.mustache @@ -125,12 +125,6 @@ {{/can_save}} - - XML Schema: - - {{{edit_schema_html}}} - - Assigned to: From e0fb5e7312b991c89ed8616437d5ad7a4174aacb Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 25 Oct 2018 02:45:01 -0400 Subject: [PATCH 082/135] fix visual bug with mode display in index --- index.py | 12 +++--------- templates/index.mustache | 4 ++-- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/index.py b/index.py index 36306c6..dee98e0 100755 --- a/index.py +++ b/index.py @@ -93,15 +93,9 @@ def load_landing(user, admin, theform): render_data['docs'] = [] for doc in doc_list: doc_vars = {} - for item in doc: - if item == "xml": - doc_vars["xml"] = True - mode = "xml" - elif item == "ether": - doc_vars["ether"] = True - mode = "ether" - elif "-" in str(item): - doc_vars["other_mode"] = True + doc_vars["xml"] = "xml" in doc + doc_vars["ether"] = "ether" in doc + doc_vars["other_mode"] = not (doc_vars["xml"] or doc_vars["ether"]) id = str(doc[0]) doc_vars["id"] = id diff --git a/templates/index.mustache b/templates/index.mustache index 0fd4639..a7f509f 100644 --- a/templates/index.mustache +++ b/templates/index.mustache @@ -193,9 +193,9 @@ {{#xml}}   {{/xml}} - {{#xml}} + {{#ether}}   - {{/xml}} + {{/ether}}   From 8f4cc621b5968ba7b14211c8c6f6bebee5ff6664 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 25 Oct 2018 02:51:04 -0400 Subject: [PATCH 083/135] transition xml validation to rule framework --- modules/gitdox_sql.py | 6 ++++ modules/validation/legacy_xml_validator.py | 26 +++++++++++++++ modules/validation/xml_validator.py | 37 ++++++++++++---------- validate.py | 17 ++++++++-- 4 files changed, 67 insertions(+), 19 deletions(-) create mode 100644 modules/validation/legacy_xml_validator.py diff --git a/modules/gitdox_sql.py b/modules/gitdox_sql.py index c3e69a5..6de2a10 100755 --- a/modules/gitdox_sql.py +++ b/modules/gitdox_sql.py @@ -211,12 +211,18 @@ def get_corpora(): def get_validate_rules(): return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate", None) +def get_xml_rules(): + return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'xml'", None) + def get_meta_rules(): return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'meta'", None) def get_ether_rules(): return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'ether'", None) +def get_export_rules(): + return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'export'", None) + def get_sorted_rules(sort): return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate ORDER BY " + sort, None) # parameterization doesn't work for order by diff --git a/modules/validation/legacy_xml_validator.py b/modules/validation/legacy_xml_validator.py new file mode 100644 index 0000000..f3cda1e --- /dev/null +++ b/modules/validation/legacy_xml_validator.py @@ -0,0 +1,26 @@ +from validator import Validator +from ..ether import exec_via_temp +import re + +# TODO: would have been ideal to write this without any filesystem operations +class LegacyXmlValidator(Validator): + def __init__(self, schema): + self.schema = schema + + def validate(self, doc): + report = "" + + if self.schema == '--none--': + return report + else: + schema = self.schema + command = "xmllint --htmlout --schema schemas/" + schema + ".xsd tempfilename" + out, err = exec_via_temp(doc, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) + err = re.sub(r'\n','
',err) + report += err + "
" + + return report diff --git a/modules/validation/xml_validator.py b/modules/validation/xml_validator.py index 7ff6a01..d44e883 100644 --- a/modules/validation/xml_validator.py +++ b/modules/validation/xml_validator.py @@ -2,25 +2,30 @@ from ..ether import exec_via_temp import re -# TODO: would have been ideal to write this without any filesystem operations class XmlValidator(Validator): - def __init__(self, schema): - self.schema = schema + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.schema = rule[3] - def validate(self, doc): + def validate(self, doc, doc_name, doc_corpus): report = "" - if self.schema == '--none--': - report += "No schema
" - else: - schema = self.schema - command = "xmllint --htmlout --schema schemas/" + schema + ".xsd tempfilename" - out, err = exec_via_temp(doc, command) - err = err.strip() - err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") - err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) - err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) - err = re.sub(r'\n','
',err) - report += err + "
" + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return report + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return report + + schema = self.schema + command = "xmllint --htmlout --schema schemas/" + schema + " tempfilename" + out, err = exec_via_temp(doc, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) + err = re.sub(r'\n','
',err) + report += err + "
" return report diff --git a/validate.py b/validate.py index 6a35c5c..94d2a2f 100755 --- a/validate.py +++ b/validate.py @@ -3,12 +3,13 @@ from collections import defaultdict import re -import cgi +import cgi, cgitb import json from paths import ether_url from modules.gitdox_sql import * from modules.ether import get_socialcalc, make_spreadsheet, exec_via_temp, get_timestamps, parse_ether +from modules.validation.legacy_xml_validator import LegacyXmlValidator from modules.validation.xml_validator import XmlValidator from modules.validation.meta_validator import MetaValidator from modules.validation.ether_validator import EtherValidator @@ -140,10 +141,20 @@ def validate_doc_ether(doc_id, editor=False): return json_report def validate_doc_xml(doc_id, schema, editor=False): - xml_report = "" + rules = [XmlValidator(x) for x in get_xml_rules()] + doc_info = get_doc_info(doc_id) + doc_name = doc_info[0] + doc_corpus = doc_info[1] doc_content = get_doc_content(doc_id) - xml_report = XmlValidator(schema).validate(doc_content) + + # Schemas used to be assigned per document--do not support this anymore + #xml_report = LegacyXmlValidator(schema).validate(doc_content) + + xml_report = "" + for rule in rules: + xml_report += rule.validate(doc_content, doc_name, doc_corpus) + meta_report = validate_doc_meta(doc_id, editor) # report From 6e4f46c92bb315d09efa21093cb41b8b13caa3d2 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 25 Oct 2018 22:03:43 -0400 Subject: [PATCH 084/135] edit button: turn into link instead of using js --- templates/index.mustache | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/templates/index.mustache b/templates/index.mustache index a7f509f..711f140 100644 --- a/templates/index.mustache +++ b/templates/index.mustache @@ -202,13 +202,11 @@ -
- -
+ +
edit
- +
From 44a2253b5b498274a477ce072a87c8de13fe1e11 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Thu, 25 Oct 2018 22:09:14 -0400 Subject: [PATCH 085/135] fix crash when no id supplied --- editor.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/editor.py b/editor.py index 95684f2..a635bfb 100755 --- a/editor.py +++ b/editor.py @@ -419,7 +419,12 @@ def load_page(user,admin,theform): else: render_data['ether_mode'] = False - render_data['doc_is_selected'] = len(doc_id) != 0 + # stop here if no doc selected + if doc_id: + render_data['doc_is_selected'] = len(doc_id) != 0 + else: + return render("editor", render_data) + render_data['id'] = doc_id render_data['mode'] = mode render_data['schema'] = schema From b14a1131e6ae71861b7ffc5f003ead5b10ef16d0 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 26 Oct 2018 02:03:46 -0400 Subject: [PATCH 086/135] fix varname error --- templates/editor.mustache | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/editor.mustache b/templates/editor.mustache index 501beb2..f331cc9 100644 --- a/templates/editor.mustache +++ b/templates/editor.mustache @@ -154,7 +154,7 @@ {{/ether_mode}}

Metadata

- {{{metadata}}} + {{{metadata_html}}} From 07f2068eded8412267fc331856dc5e4373d24e36 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Fri, 26 Oct 2018 02:05:20 -0400 Subject: [PATCH 087/135] implement export validation --- js/index.js | 42 +++++++++++++--- modules/validation/ether_validator.py | 7 +-- modules/validation/export_validator.py | 39 ++++++++++++++ modules/validation/meta_validator.py | 7 ++- modules/validation/xml_validator.py | 17 ++++--- templates/index.mustache | 8 ++- validate.py | 70 +++++++++++++++++++------- 7 files changed, 150 insertions(+), 40 deletions(-) create mode 100644 modules/validation/export_validator.py diff --git a/js/index.js b/js/index.js index ebb50c3..77dd6a4 100644 --- a/js/index.js +++ b/js/index.js @@ -9,18 +9,24 @@ function validate_all() { success: function(response) { console.log(response); $.each(response, function(key, value) { - // 1 vs 2 is for ordering ether/xml before metadata + // 1 vs 2 is for ordering ether/xml before metadata, 3 is used for export // sort is hidden text at beginning of cell for sorting purposes var output1 = ''; var output2 = ''; + var output3 = ''; var sort1 = ''; var sort2 = ''; + var sort3 = ''; $.each(value, function(k,v) { if (k == "ether") { - if (v == "spreadsheet is valid") { + if (v.indexOf("EtherCalc is valid") > -1) { color = 'green'; sort1 = 'v'; } + else if (v.indexOf("no applicable") > -1) { + color = 'gray'; + sort1 = 'n'; + } else { color = 'red'; sort1 = 'i'; @@ -28,10 +34,14 @@ function validate_all() { output1 += '
 ' + v + '
'; } else if (k == "meta") { - if (v == "metadata is valid") { + if (v.indexOf("metadata is valid") > -1) { color = 'green'; sort2 = 'v'; } + else if (v.indexOf("no applicable") > -1) { + color = 'gray'; + sort2 = 'n'; + } else { color = 'red'; sort2 = 'i'; @@ -39,11 +49,11 @@ function validate_all() { output2 += '
 ' + v + '
'; } else if (k == "xml") { - if (v.indexOf("validates") !== -1) { + if (v.indexOf("xml is valid") > -1) { color = 'green'; sort1 = 'v'; } - else if (v == "No schema
") { + else if (v.indexOf("no applicable") > -1) { color = 'gray'; sort1 = 'n'; } @@ -53,9 +63,27 @@ function validate_all() { } output1 += '
 ' + v + '
'; } + else if (k == "export") { + if (v.indexOf("exports are valid") > -1) { + color = 'green'; + sort3 = 'v'; + } + else if (v.indexOf("no applicable") > -1) { + color = 'gray'; + sort3 = 'n'; + } + else { + color = 'red'; + sort3 = 'i'; + } + output3 += '
 ' + v + '
'; + } }); - $("#validate_"+key).before(""); - $("#validate_"+key).html(output1 + output2); + if (!output3) { + output3 = '
'; + } + $("#validate_"+key).before(""); + $("#validate_"+key).html(output1 + output2 + output3); }); $("#validate_landing").removeClass("disabledbutton"); $("#validate_landing").html(' re-validate'); diff --git a/modules/validation/ether_validator.py b/modules/validation/ether_validator.py index 5712d2e..3f67e95 100644 --- a/modules/validation/ether_validator.py +++ b/modules/validation/ether_validator.py @@ -248,13 +248,14 @@ def validate(self, parsed_ether, doc_name, doc_corpus): if self.corpus is not None: if re.search(self.corpus, doc_corpus) is None: - return res + return res, False if self.doc is not None: if re.search(self.doc, doc_name) is None: - return res + return res, False report, tooltip, cells = self._apply_rule(parsed_ether) + res['report'] += report res['tooltip'] += tooltip res['cells'] += cells - return res + return res, True diff --git a/modules/validation/export_validator.py b/modules/validation/export_validator.py new file mode 100644 index 0000000..43cf439 --- /dev/null +++ b/modules/validation/export_validator.py @@ -0,0 +1,39 @@ +from validator import Validator +from ..ether import exec_via_temp, ExportConfig, ether_to_sgml +import re + +# TODO: would have been ideal to write this without any filesystem operations +class ExportValidator(Validator): + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.config = rule[3] + self.schema = rule[5] + + def validate(self, socialcalc, doc_id, doc_name, doc_corpus): + report = "" + + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return report, False + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return report, False + + export_data = ether_to_sgml(socialcalc, doc_id, config=self.config) + + schema = self.schema + command = "xmllint --schema schemas/" + schema + " tempfilename" + out, err = exec_via_temp(export_data, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = re.sub(r'/tmp/[A-Za-z0-9_]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9_]+','XML schema ',err) + err = re.sub(r'\n','
',err) + if err == "XML schema validates": + report = "" + else: + report = "Problems with exporting with " + self.config \ + + " and validating with " + self.schema + ":
" + err + "
" + + return report, True diff --git a/modules/validation/meta_validator.py b/modules/validation/meta_validator.py index 3db9fe7..d967ff2 100644 --- a/modules/validation/meta_validator.py +++ b/modules/validation/meta_validator.py @@ -44,11 +44,10 @@ def _apply_rule(self, metadata): def validate(self, metadata, doc_name, doc_corpus): if self.corpus is not None: if re.search(self.corpus, doc_corpus) is None: - return "" + return {"report": "", "tooltip": ""}, False if self.doc is not None: if re.search(self.doc, doc_name) is None: - return "" + return {"report": "", "tooltip": ""}, False report, tooltip = self._apply_rule(metadata) - return {"report": report, - "tooltip": tooltip} + return {"report": report, "tooltip": tooltip}, True diff --git a/modules/validation/xml_validator.py b/modules/validation/xml_validator.py index d44e883..ab6cd0a 100644 --- a/modules/validation/xml_validator.py +++ b/modules/validation/xml_validator.py @@ -13,19 +13,22 @@ def validate(self, doc, doc_name, doc_corpus): if self.corpus is not None: if re.search(self.corpus, doc_corpus) is None: - return report + return report, False if self.doc is not None: if re.search(self.doc, doc_name) is None: - return report + return report, False schema = self.schema - command = "xmllint --htmlout --schema schemas/" + schema + " tempfilename" + command = "xmllint --schema schemas/" + schema + " tempfilename" out, err = exec_via_temp(doc, command) err = err.strip() err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") - err = re.sub(r'/tmp/[A-Za-z0-9]+:','XML schema:
',err) - err = re.sub(r'/tmp/[A-Za-z0-9]+','XML schema ',err) + err = re.sub(r'/tmp/[A-Za-z0-9_]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9_]+','XML schema ',err) err = re.sub(r'\n','
',err) - report += err + "
" + if err == "XML schema validates": + report = "" + else: + report = "Problems validating with " + self.schema + ":
" + err + "
" - return report + return report, True diff --git a/templates/index.mustache b/templates/index.mustache index 711f140..8ed496d 100644 --- a/templates/index.mustache +++ b/templates/index.mustache @@ -194,9 +194,15 @@   {{/xml}} {{#ether}} -   +   {{/ether}}   + {{#ether}} +   + {{/ether}} + {{^ether}} + + {{/ether}}
diff --git a/validate.py b/validate.py index 94d2a2f..bdb27ba 100755 --- a/validate.py +++ b/validate.py @@ -13,6 +13,7 @@ from modules.validation.xml_validator import XmlValidator from modules.validation.meta_validator import MetaValidator from modules.validation.ether_validator import EtherValidator +from modules.validation.export_validator import ExportValidator def highlight_cells(cells, ether_url, ether_doc_name): old_ether = get_socialcalc(ether_url, ether_doc_name) @@ -83,8 +84,10 @@ def validate_doc_meta(doc_id, editor): doc_info = get_doc_info(doc_id) doc_name = doc_info[0] doc_corpus = doc_info[1] + meta_rule_fired = False for rule in rules: - res = rule.validate(meta, doc_name, doc_corpus) + res, fired = rule.validate(meta, doc_name, doc_corpus) + meta_rule_fired = meta_rule_fired or fired if editor and len(res['tooltip']) > 0: report += ("""
""" + res['report'][:-5] @@ -93,17 +96,26 @@ def validate_doc_meta(doc_id, editor): + "
") else: report += res['report'] + + if not meta_rule_fired: + report = "no applicable metadata rules
" + elif len(report) == 0: + report = "metadata is valid
" + else: + report = "Metadata Problems:
" + report return report def validate_doc_ether(doc_id, editor=False): - rules = [EtherValidator(x) for x in get_ether_rules()] + ether_rules = [EtherValidator(x) for x in get_ether_rules()] + export_rules = [ExportValidator(x) for x in get_export_rules()] doc_info = get_doc_info(doc_id) doc_name = doc_info[0] doc_corpus = doc_info[1] ether_doc_name = "gd_" + doc_corpus + "_" + doc_name - parsed_ether = parse_ether(get_socialcalc(ether_url, ether_doc_name)) + socialcalc = get_socialcalc(ether_url, ether_doc_name) + parsed_ether = parse_ether(socialcalc) report = '' cells = [] @@ -111,9 +123,10 @@ def validate_doc_ether(doc_id, editor=False): # check metadata meta_report = validate_doc_meta(doc_id, editor) - # check ethercalc rules - for rule in rules: - res = rule.validate(parsed_ether, doc_name, doc_corpus) + ether_rule_fired = False + for rule in ether_rules: + res, fired = rule.validate(parsed_ether, doc_name, doc_corpus) + ether_rule_fired = ether_rule_fired or fired if editor and len(res['tooltip']) > 0: report += ("""
""" + res['report'][:-5] @@ -123,21 +136,37 @@ def validate_doc_ether(doc_id, editor=False): else: report += res['report'] cells += res['cells'] + if not ether_rule_fired: + report = "no applicable EtherCalc validation rules
" + elif report: + report = "Ether Problems:
" + report + else: + report = "EtherCalc is valid
" + + export_report = "" + export_rule_fired = False + for rule in export_rules: + res, fired = rule.validate(socialcalc, doc_id, doc_name, doc_corpus) + export_rule_fired = export_rule_fired or fired + export_report += res + if not export_rule_fired: + export_report = "no applicable export validation rules
" + elif export_report: + export_report = "Export Problems:
" + export_report + else: + export_report = "exports are valid
" if editor: highlight_cells(cells, ether_url, ether_doc_name) - full_report = report + meta_report + full_report = report + meta_report + export_report if len(full_report) == 0: full_report = "Document is valid!" return full_report else: json_report = {} - if len(report) == 0: - report = "spreadsheet is valid" - if len(meta_report) == 0: - meta_report = "metadata is valid" json_report['ether'] = report json_report['meta'] = meta_report + json_report['export'] = export_report return json_report def validate_doc_xml(doc_id, schema, editor=False): @@ -152,8 +181,17 @@ def validate_doc_xml(doc_id, schema, editor=False): #xml_report = LegacyXmlValidator(schema).validate(doc_content) xml_report = "" + xml_rule_fired = False for rule in rules: - xml_report += rule.validate(doc_content, doc_name, doc_corpus) + res, fired = rule.validate(doc_content, doc_name, doc_corpus) + xml_report += res + xml_rule_fired = xml_rule_fired or fired + if not xml_rule_fired: + xml_report = "no applicable XML schemas
" + elif xml_report: + xml_report = "Export Problems:
" + xml_report + else: + xml_report = "xml is valid
" meta_report = validate_doc_meta(doc_id, editor) @@ -164,15 +202,11 @@ def validate_doc_xml(doc_id, schema, editor=False): full_report = xml_report + meta_report except Exception as e: full_report = "[Encoding error: " + str(e) + "]" - if len(full_report) == 0: - full_report = "Document is valid!" + return full_report else: json_report = {} - if len(xml_report) == 0: - xml_report = "xml is valid" - if len(meta_report) == 0: - meta_report = "metadata is valid" + json_report['xml'] = xml_report json_report['meta'] = meta_report return json_report From 4451f394fc4d02e21cd08e2b94d3a1ac40452088 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Fri, 26 Oct 2018 10:18:34 -0400 Subject: [PATCH 088/135] Update default GUM export configuration example --- schemas/gum_export.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schemas/gum_export.ini b/schemas/gum_export.ini index c80f13b..a2564a6 100644 --- a/schemas/gum_export.ini +++ b/schemas/gum_export.ini @@ -1,7 +1,7 @@ -aliases = """{"text_id":"text@id","s_type":"s@type","hi_rend":"hi@rend","p_n":"p","p@n":"p","sp_who":"sp@who","list_type":"list@type","item_n":"item@n","figure_rend":"figure@rend","date_when":"date@when","date_from":"date@from","date_to":"date@to","date_notBefore":"date@notBefore","date_notAfter":"date@notAfter","head_rend":"head@rend","incident_who":"incident@who","p_rend":"p@rend","ref_target":"ref@target","sp_who":"sp@who"}""" +aliases = """{"text_id":"text@id","s_type":"s@type","hi_rend":"hi@rend","p_n":"p","p@n":"p","sp_who":"sp@who","sp_whom":"sp@whom","list_type":"list@type","item_n":"item@n","figure_rend":"figure@rend","date_when":"date@when","date_from":"date@from","date_to":"date@to","date_notBefore":"date@notBefore","date_notAfter":"date@notAfter","head_rend":"head@rend","incident_who":"incident@who","p_rend":"p@rend","ref_target":"ref@target","table_rend":"table@rend","table_rows":"table@rows","table_cols":"table@cols","cell_rend":"cell@rend","note_place":"note@place","note_n":"note@n"}""" tok_annos = """["pos","lemma"]""" -no_content = """["head","sic","q","item","list","p","s","figure","caption","date","incident","sp","ref","w","hi","quote"]""" -priorities = """["sp","head","p","figure","caption","list","item","quote","s","q","hi","sic","date","incident","w","pos","lemma","tok"]""" +no_content = """["head","sic","q","item","list","p","note","s","figure","caption","date","incident","sp","ref","w","hi","quote"]""" +priorities = """["sp","table","row","cell","head","p","figure","caption","list","item","quote","s","q","hi","sic","date","incident","w","pos","lemma","tok"]""" export_all = False template = """ %%body%% From dd457b9159051c6fd1571e70d7438d14ac681e82 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Fri, 26 Oct 2018 17:00:20 -0400 Subject: [PATCH 089/135] "(Show all)" option must be first * Otherwise we get the alphabetically first corpus selected in the dropdown on login, but actually all corpora are showing --- templates/index.mustache | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/index.mustache b/templates/index.mustache index 8ed496d..ef40c88 100644 --- a/templates/index.mustache +++ b/templates/index.mustache @@ -57,6 +57,7 @@
Choose a corpus:
From 5ec1b234f2b4132209a2d1505f2ebf1b932f9551 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Fri, 26 Oct 2018 17:24:37 -0400 Subject: [PATCH 090/135] tabs not spaces --- validate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/validate.py b/validate.py index 28bc3bc..b16cae7 100755 --- a/validate.py +++ b/validate.py @@ -104,7 +104,8 @@ def validate_doc_meta(doc_id, editor): else: report["report"] = "Metadata Problems:
" + report["report"] - return report + return report + def validate_doc_ether(doc_id, editor=False): ether_rules = [EtherValidator(x) for x in get_ether_rules()] From 988f2cfa9d7dfb643a1efe26e6fd9dc8985c5483 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Fri, 26 Oct 2018 21:09:25 -0400 Subject: [PATCH 091/135] decode utf8 coming from subprocess --- modules/validation/export_validator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/validation/export_validator.py b/modules/validation/export_validator.py index 43cf439..0559fb7 100644 --- a/modules/validation/export_validator.py +++ b/modules/validation/export_validator.py @@ -30,10 +30,10 @@ def validate(self, socialcalc, doc_id, doc_name, doc_corpus): err = re.sub(r'/tmp/[A-Za-z0-9_]+:','XML schema:
',err) err = re.sub(r'/tmp/[A-Za-z0-9_]+','XML schema ',err) err = re.sub(r'\n','
',err) - if err == "XML schema validates": + if err == "XML schema validates": report = "" else: report = "Problems with exporting with " + self.config \ - + " and validating with " + self.schema + ":
" + err + "
" + + " and validating with " + self.schema + ":
" + err.decode("utf8") + "
" return report, True From 9a3484b946e1da415a718120a5b347b5165f32dd Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Fri, 26 Oct 2018 21:09:42 -0400 Subject: [PATCH 092/135] bugfix --- validate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validate.py b/validate.py index b16cae7..fee910e 100755 --- a/validate.py +++ b/validate.py @@ -98,9 +98,9 @@ def validate_doc_meta(doc_id, editor): report["report"] += res['report'] if not meta_rule_fired: - report["report"] = "no applicable metadata rules
" - elif len(report) == 0: - report["report"] = "metadata is valid
" + report["report"] = "No applicable metadata rules
" + elif len(report["report"]) == 0: + report["report"] = "Metadata is valid
" else: report["report"] = "Metadata Problems:
" + report["report"] From ac7856e31e5d45d86dfcf3fca0238e033d3554ae Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 27 Oct 2018 14:39:32 -0400 Subject: [PATCH 093/135] fix export bugs --- Dockerfile | 2 +- modules/ether.py | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index b52ab15..96e2f1f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ RUN chmod +x /var/www/html/*.py RUN chmod +x /var/www/html/modules/*.py # keep these in sync with requirements.txt -RUN pip install lxml requests github3.py==0.9.3 passlib +RUN pip install lxml requests github3.py==0.9.3 passlib pystache # install ethercalc and run as a service RUN npm install -g ethercalc diff --git a/modules/ether.py b/modules/ether.py index 212cc22..1276287 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -528,9 +528,9 @@ def deunique_properly_nested_tags(sgml): def ether_to_sgml(ether, doc_id,config=None): """ - :param ether: String in SocialCalc format :param doc_id: GitDox database internal document ID number as string + :param config: Name of an export config (.ini file) under schemas/ :return: """ @@ -636,8 +636,6 @@ def ether_to_sgml(ether, doc_id,config=None): # New row starting from this cell, sort previous lists for opening and closing orders if row != last_row: - close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) - for element in open_tags[last_row]: open_tag_order[last_row].append(element) @@ -649,17 +647,17 @@ def ether_to_sgml(ether, doc_id,config=None): if prim_elt in open_tags[last_row] and prim_elt in open_tag_length: if span == open_tag_length[prim_elt]: open_tags[last_row][prim_elt].append((attr, val)) - if prim_elt not in close_tags[last_row + span]: - close_tags[last_row+span-1].append(prim_elt) + close_tags[last_row + span].append(prim_elt) prim_found = True if not prim_found: if sec_elt in open_tags[last_row] and sec_elt in open_tag_length: if span == open_tag_length[sec_elt]: open_tags[last_row][sec_elt].append((attr, val)) - if sec_elt not in close_tags[last_row + span]: - close_tags[last_row + span - 1].append(sec_elt) + close_tags[last_row + span].append(sec_elt) sec_element_checklist = [] # Purge sec_elements + close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) + last_row = row if 't' in cell[2]: # cell contains text content = cell[2]['t'] @@ -700,6 +698,7 @@ def ether_to_sgml(ether, doc_id,config=None): close_row = row + rowspan else: close_row = row + 1 + # this introduces too many close tags for elts that have more than one attr. # We take care of this later with close_tag_debt close_tags[close_row].append(element) @@ -734,8 +733,8 @@ def ether_to_sgml(ether, doc_id,config=None): if attrib != "": tag += ' ' + attrib + '="' + value + '"' attr_count += 1 - if attr_count > 1: - close_tag_debt[element] += 1 + close_tag_debt[element] = len(open_tags[r][element]) - 1 + if element in config.milestones: tag += '/>\n' else: From 6ce6381850474831dca18d777e36d35c383fa68d Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sat, 27 Oct 2018 15:38:47 -0400 Subject: [PATCH 094/135] Validation messages --- validate.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/validate.py b/validate.py index fee910e..0d739a5 100755 --- a/validate.py +++ b/validate.py @@ -139,11 +139,11 @@ def validate_doc_ether(doc_id, editor=False): report += res['report'] cells += res['cells'] if not ether_rule_fired: - report = "no applicable EtherCalc validation rules
" + report = "No applicable spreadsheet validation rules
" elif report: - report = "Ether Problems:
" + report + report = "Spreadsheet Problems:
" + report else: - report = "EtherCalc is valid
" + report = "Spreadsheet is valid
" export_report = "" export_rule_fired = False @@ -152,11 +152,11 @@ def validate_doc_ether(doc_id, editor=False): export_rule_fired = export_rule_fired or fired export_report += res if not export_rule_fired: - export_report = "no applicable export validation rules
" + export_report = "No applicable export validation rules
" elif export_report: export_report = "Export Problems:
" + export_report else: - export_report = "exports are valid
" + export_report = "Export is valid
" if editor: highlight_cells(cells, ether_url, ether_doc_name) @@ -189,11 +189,11 @@ def validate_doc_xml(doc_id, schema, editor=False): xml_report += res xml_rule_fired = xml_rule_fired or fired if not xml_rule_fired: - xml_report = "no applicable XML schemas
" + xml_report = "Mo applicable XML schemas
" elif xml_report: - xml_report = "Export Problems:
" + xml_report + xml_report = "XML problems:
" + xml_report else: - xml_report = "xml is valid
" + xml_report = "XML is valid
" meta_report = validate_doc_meta(doc_id, editor) From 66b84d5958806a8b5f36c08bc4c0cf3d28572fcc Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sat, 27 Oct 2018 16:10:14 -0400 Subject: [PATCH 095/135] use ubuntu 16.04 since 18.04 has bad dependencies, get reqs from requirements.txt --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 96e2f1f..a4d533f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:16.04 EXPOSE 80 # install deps @@ -14,7 +14,7 @@ RUN chmod +x /var/www/html/*.py RUN chmod +x /var/www/html/modules/*.py # keep these in sync with requirements.txt -RUN pip install lxml requests github3.py==0.9.3 passlib pystache +RUN pip install -r /var/www/html/requirements.txt # install ethercalc and run as a service RUN npm install -g ethercalc From 2317a1efa7c6dfc9c74f1147c52d4ea9835b23fe Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sat, 27 Oct 2018 16:25:57 -0400 Subject: [PATCH 096/135] Consolidate messages and use class * Message spans should have separate class in case we want to use spans for stacked fa icons --- css/gitdox.css | 4 ++-- js/index.js | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/css/gitdox.css b/css/gitdox.css index 2ad11f2..8205a60 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -134,7 +134,7 @@ padding: 10px; position:relative; } -.tooltip span +.tooltip span.msg { display:none; -moz-border-radius:6px; @@ -144,7 +144,7 @@ padding: 10px; background:white; } -.tooltip:hover span +.tooltip:hover span.msg { display:block; position:absolute; diff --git a/js/index.js b/js/index.js index 77dd6a4..9d5fbf9 100644 --- a/js/index.js +++ b/js/index.js @@ -19,11 +19,11 @@ function validate_all() { var sort3 = ''; $.each(value, function(k,v) { if (k == "ether") { - if (v.indexOf("EtherCalc is valid") > -1) { + if (v.indexOf("Spreadsheet is valid") > -1) { color = 'green'; sort1 = 'v'; } - else if (v.indexOf("no applicable") > -1) { + else if (v.indexOf("No applicable") > -1) { color = 'gray'; sort1 = 'n'; } @@ -31,14 +31,14 @@ function validate_all() { color = 'red'; sort1 = 'i'; } - output1 += '
 ' + v + '
'; + output1 += '
 ' + v + '
'; } else if (k == "meta") { - if (v.indexOf("metadata is valid") > -1) { + if (v.indexOf("Metadata is valid") > -1) { color = 'green'; sort2 = 'v'; } - else if (v.indexOf("no applicable") > -1) { + else if (v.indexOf("No applicable") > -1) { color = 'gray'; sort2 = 'n'; } @@ -46,14 +46,14 @@ function validate_all() { color = 'red'; sort2 = 'i'; } - output2 += '
 ' + v + '
'; + output2 += '
 ' + v + '
'; } else if (k == "xml") { - if (v.indexOf("xml is valid") > -1) { + if (v.indexOf("XML is valid") > -1) { color = 'green'; sort1 = 'v'; } - else if (v.indexOf("no applicable") > -1) { + else if (v.indexOf("No applicable") > -1) { color = 'gray'; sort1 = 'n'; } @@ -61,14 +61,14 @@ function validate_all() { color = 'red'; sort1 = 'i'; } - output1 += '
 ' + v + '
'; + output1 += '
 ' + v + '
'; } else if (k == "export") { - if (v.indexOf("exports are valid") > -1) { + if (v.indexOf("Export is valid") > -1) { color = 'green'; sort3 = 'v'; } - else if (v.indexOf("no applicable") > -1) { + else if (v.indexOf("No applicable") > -1) { color = 'gray'; sort3 = 'n'; } From 0f47e90c1d5f35b7e6340276d8aa152bcd6312f2 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 28 Oct 2018 19:51:34 -0400 Subject: [PATCH 097/135] HTML escaping and whitespace * Some XML validation messages could mess up HTML rendering of report --- modules/validation/export_validator.py | 57 +++++++++++++------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/modules/validation/export_validator.py b/modules/validation/export_validator.py index 0559fb7..cb33425 100644 --- a/modules/validation/export_validator.py +++ b/modules/validation/export_validator.py @@ -4,36 +4,37 @@ # TODO: would have been ideal to write this without any filesystem operations class ExportValidator(Validator): - def __init__(self, rule): - self.corpus = rule[0] - self.doc = rule[1] - self.config = rule[3] - self.schema = rule[5] + def __init__(self, rule): + self.corpus = rule[0] + self.doc = rule[1] + self.config = rule[3] + self.schema = rule[5] - def validate(self, socialcalc, doc_id, doc_name, doc_corpus): - report = "" + def validate(self, socialcalc, doc_id, doc_name, doc_corpus): + report = "" - if self.corpus is not None: - if re.search(self.corpus, doc_corpus) is None: - return report, False - if self.doc is not None: - if re.search(self.doc, doc_name) is None: - return report, False + if self.corpus is not None: + if re.search(self.corpus, doc_corpus) is None: + return report, False + if self.doc is not None: + if re.search(self.doc, doc_name) is None: + return report, False - export_data = ether_to_sgml(socialcalc, doc_id, config=self.config) + export_data = ether_to_sgml(socialcalc, doc_id, config=self.config) - schema = self.schema - command = "xmllint --schema schemas/" + schema + " tempfilename" - out, err = exec_via_temp(export_data, command) - err = err.strip() - err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") - err = re.sub(r'/tmp/[A-Za-z0-9_]+:','XML schema:
',err) - err = re.sub(r'/tmp/[A-Za-z0-9_]+','XML schema ',err) - err = re.sub(r'\n','
',err) - if err == "XML schema validates": - report = "" - else: - report = "Problems with exporting with " + self.config \ - + " and validating with " + self.schema + ":
" + err.decode("utf8") + "
" + schema = self.schema + command = "xmllint --schema schemas/" + schema + " tempfilename" + out, err = exec_via_temp(export_data, command) + err = err.strip() + err = err.replace("
","").replace("\n","").replace('

xmllint output

',"") + err = err.replace("<","<").replace(">",">") + err = re.sub(r'/tmp/[A-Za-z0-9_]+:','XML schema:
',err) + err = re.sub(r'/tmp/[A-Za-z0-9_]+','XML schema ',err) + err = re.sub(r'\n','
',err) + if err.strip() == "XML schema validates": + report = "" + else: + report = "Problems with exporting with " + self.config \ + + " and validating with " + self.schema + ":
" + err.decode("utf8") + "
" - return report, True + return report, True From ec25448660be3c7f101f06f1d8fdb6de14446b31 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 28 Oct 2018 19:53:38 -0400 Subject: [PATCH 098/135] Bug fix meta report * Make sure to always return value of 'report' key when expecting a string in meta report --- validate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/validate.py b/validate.py index 0d739a5..702c5c2 100755 --- a/validate.py +++ b/validate.py @@ -195,7 +195,8 @@ def validate_doc_xml(doc_id, schema, editor=False): else: xml_report = "XML is valid
" - meta_report = validate_doc_meta(doc_id, editor) + meta_validation = validate_doc_meta(doc_id, editor) + meta_report = meta_validation["report"] # report if editor is True: From 71800f0d0f7bd1cfcbcf41a104664c4a8cecab9c Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 28 Oct 2018 19:54:32 -0400 Subject: [PATCH 099/135] Add flush cache option to CLI validate.py * Use -i to invalidate all documents so they are freshly validated --- validate.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/validate.py b/validate.py index 702c5c2..cfc19ac 100755 --- a/validate.py +++ b/validate.py @@ -258,9 +258,12 @@ def validate_all_docs(): from argparse import ArgumentParser p = ArgumentParser() p.add_argument("-d","--doc",help="doc ID in gitdox.db or 'all'", default="all") + p.add_argument("-i","--invalidate",action="store_true",help="invalidate all documents before running validation") opts = p.parse_args() doc_id = opts.doc + if opts.invalidate: + invalidate_doc_by_name("%","%") if doc_id != "all": _, _, _, _, _, mode, schema = get_doc_info(doc_id) else: From bb5a31452d4a6542f5df652a7dac1e958cfa3951 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 28 Oct 2018 19:55:28 -0400 Subject: [PATCH 100/135] Add SocialCalc caching * Closes #108 --- modules/ether.py | 23 +++++++++++++++++++---- modules/gitdox_sql.py | 19 ++++++++++++++++++- validate.py | 33 ++++++++++++++++++++++++++------- 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 1276287..d833514 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -826,12 +826,28 @@ def sheet_exists(ether_path, name): return len(get_socialcalc(ether_path,name)) > 0 -def get_socialcalc(ether_path, name): +def get_socialcalc(ether_path, name, doc_id=None, dirty=True): + """ + Get SocialCalc format serialization for an EtherCalc spreadsheet, or a cached serialization from the sqlite + DB is available for a specified doc_id + + :param ether_path: The EtherCalc server base URL, e.g. http://server.com/ethercalc/ + :param name: spreadsheet name, e.g. gd_corpname_docname + :param doc_id: optional doc_id in docs table to fetch/set SocialCalc from cache + :return: SocialCalc string + """ + + if doc_id is not None and not dirty: + cache = get_cache(doc_id)[0][0] + if cache is not None: + return cache command = "curl --netrc -X GET " + ether_path + "_/" + name proc = subprocess.Popen(command, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() - return stdout.decode("utf8") - + socialcalc = stdout.decode("utf8") + if doc_id is not None: + set_cache(doc_id, socialcalc) + return socialcalc def get_timestamps(ether_path): @@ -843,7 +859,6 @@ def get_timestamps(ether_path): return output - if __name__ == "__main__": data = "" storage = cgi.FieldStorage() diff --git a/modules/gitdox_sql.py b/modules/gitdox_sql.py index 6de2a10..990a5e4 100755 --- a/modules/gitdox_sql.py +++ b/modules/gitdox_sql.py @@ -31,7 +31,7 @@ def setup_db(): #docs table cur.execute('''CREATE TABLE IF NOT EXISTS docs - (id INTEGER PRIMARY KEY AUTOINCREMENT, name text, corpus text, status text,assignee_username text ,filename text, content text, mode text, schema text, validation text, timestamp text)''') + (id INTEGER PRIMARY KEY AUTOINCREMENT, name text, corpus text, status text,assignee_username text ,filename text, content text, mode text, schema text, validation text, timestamp text, cache text)''') #metadata table cur.execute('''CREATE TABLE IF NOT EXISTS metadata (docid INTEGER, metaid INTEGER PRIMARY KEY AUTOINCREMENT, key text, value text, corpus_meta text, UNIQUE (docid, metaid) ON CONFLICT REPLACE, UNIQUE (docid, key) ON CONFLICT REPLACE)''') @@ -49,6 +49,23 @@ def create_document(doc_id, name, corpus, status, assigned_username, filename, c (int(doc_id), name, corpus, status, assigned_username, filename, content, schema)) +def get_cache(doc_id): + try: + cache = generic_query("SELECT cache FROM docs WHERE id = ?;",(doc_id,)) + except sqlite3.Error as err: # Old schema without cache column + generic_query("ALTER TABLE docs ADD COLUMN cache TEXT default null;",None) + cache = generic_query("SELECT cache FROM docs WHERE id = ?;",(doc_id,)) + return cache + + +def set_cache(doc_id, cache_contents): + try: + generic_query("UPDATE docs SET cache = ? WHERE id = ?",(cache_contents,doc_id)) + except sqlite3.Error as err: # Old schema without cache column + generic_query("ALTER TABLE docs ADD COLUMN cache TEXT default null;",None) + generic_query("UPDATE docs SET cache = ? WHERE id = ?",(cache_contents,doc_id)) + + def generic_query(sql, params): # generic_query("DELETE FROM rst_nodes WHERE doc=? and project=?",(doc,project)) diff --git a/validate.py b/validate.py index cfc19ac..cf00e18 100755 --- a/validate.py +++ b/validate.py @@ -15,8 +15,9 @@ from modules.validation.ether_validator import EtherValidator from modules.validation.export_validator import ExportValidator -def highlight_cells(cells, ether_url, ether_doc_name): - old_ether = get_socialcalc(ether_url, ether_doc_name) + +def highlight_cells(cells, ether_url, ether_doc_name, doc_id=None, dirty=True): + old_ether = get_socialcalc(ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty) old_ether_lines = old_ether.splitlines() new_ether_lines = [] @@ -75,6 +76,7 @@ def highlight_cells(cells, ether_url, ether_doc_name): new_ether = '\n'.join(new_ether_lines) make_spreadsheet(new_ether, ether_url + "_/" + ether_doc_name, "socialcalc") + def validate_doc_meta(doc_id, editor): # metadata validation report = {"report":"","tooltip":""} @@ -106,8 +108,16 @@ def validate_doc_meta(doc_id, editor): return report +#@profile +def validate_doc_ether(doc_id, editor=False, dirty=True): + """ + Validate a document in spreadsheet mode -def validate_doc_ether(doc_id, editor=False): + :param doc_id: doc ID in the sqlite DB docs table + :param editor: boolean - is this being run by user from editor.py? + :param dirty: boolean - if spreadsheet already cached, has its SocialCalc changed since last recorded timestamp? + :return: dictionary with validation report + """ ether_rules = [EtherValidator(x) for x in get_ether_rules()] export_rules = [ExportValidator(x) for x in get_export_rules()] @@ -116,7 +126,7 @@ def validate_doc_ether(doc_id, editor=False): doc_corpus = doc_info[1] ether_doc_name = "gd_" + doc_corpus + "_" + doc_name - socialcalc = get_socialcalc(ether_url, ether_doc_name) + socialcalc = get_socialcalc(ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty) parsed_ether = parse_ether(socialcalc) report = '' @@ -159,7 +169,7 @@ def validate_doc_ether(doc_id, editor=False): export_report = "Export is valid
" if editor: - highlight_cells(cells, ether_url, ether_doc_name) + highlight_cells(cells, ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty) full_report = report + meta_validation["report"] + export_report if len(full_report) == 0: full_report = "Document is valid!" @@ -171,6 +181,7 @@ def validate_doc_ether(doc_id, editor=False): json_report['export'] = export_report return json_report +#@profile def validate_doc_xml(doc_id, schema, editor=False): rules = [XmlValidator(x) for x in get_xml_rules()] @@ -214,6 +225,7 @@ def validate_doc_xml(doc_id, schema, editor=False): json_report['meta'] = meta_report return json_report +#@profile def validate_all_docs(): docs = generic_query("SELECT id, name, corpus, mode, schema, validation, timestamp FROM docs", None) doc_timestamps = get_timestamps(ether_url) @@ -227,11 +239,18 @@ def validate_all_docs(): if timestamp == doc_timestamps[ether_name]: reports[doc_id] = json.loads(validation) else: - reports[doc_id] = validate_doc_ether(doc_id) + reports[doc_id] = validate_doc_ether(doc_id, dirty=True) update_validation(doc_id, json.dumps(reports[doc_id])) update_timestamp(doc_id, doc_timestamps[ether_name]) else: - reports[doc_id] = validate_doc_ether(doc_id) + if ether_name in doc_timestamps: + new_time = doc_timestamps[ether_name] + else: + new_time = None + if new_time == timestamp: + reports[doc_id] = validate_doc_ether(doc_id, dirty=False) + else: + reports[doc_id] = validate_doc_ether(doc_id, dirty=True) #reports[doc_id] = {"ether":"sample_ether","meta":"sample_meta"} update_validation(doc_id, json.dumps(reports[doc_id])) if ether_name in doc_timestamps: From 61371f30a8494bf572fbee6d8eb39ed8c28ec95f Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 28 Oct 2018 19:59:02 -0400 Subject: [PATCH 101/135] Hide export validation badge if not set * Reproduces old look & feel if exports are not used * Cleaner dashboard if exports are not used --- js/index.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/js/index.js b/js/index.js index 9d5fbf9..3373a1d 100644 --- a/js/index.js +++ b/js/index.js @@ -66,21 +66,24 @@ function validate_all() { else if (k == "export") { if (v.indexOf("Export is valid") > -1) { color = 'green'; + disp = 'inline-block'; sort3 = 'v'; } else if (v.indexOf("No applicable") > -1) { color = 'gray'; + disp = 'none'; sort3 = 'n'; } else { color = 'red'; + disp = 'inline-block'; sort3 = 'i'; } output3 += '
 ' + v + '
'; } }); if (!output3) { - output3 = '
'; + output3 = '
'; } $("#validate_"+key).before(""); $("#validate_"+key).html(output1 + output2 + output3); From dc055dd794d0a10ba2f6615f9ac89098b2a1f404 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 28 Oct 2018 20:32:02 -0400 Subject: [PATCH 102/135] Typo --- validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validate.py b/validate.py index cf00e18..eeade11 100755 --- a/validate.py +++ b/validate.py @@ -200,7 +200,7 @@ def validate_doc_xml(doc_id, schema, editor=False): xml_report += res xml_rule_fired = xml_rule_fired or fired if not xml_rule_fired: - xml_report = "Mo applicable XML schemas
" + xml_report = "No applicable XML schemas
" elif xml_report: xml_report = "XML problems:
" + xml_report else: From ce5ae77c91902ce6ed5257487994b7557a537b2f Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 28 Oct 2018 23:11:03 -0400 Subject: [PATCH 103/135] remove hidden icon since export icon no longer shares row --- templates/index.mustache | 3 --- 1 file changed, 3 deletions(-) diff --git a/templates/index.mustache b/templates/index.mustache index ef40c88..4b4bb13 100644 --- a/templates/index.mustache +++ b/templates/index.mustache @@ -200,9 +200,6 @@ {{#ether}}   {{/ether}} - {{^ether}} - - {{/ether}}
From e54841b65103be8272b8f328f5721bc15674874a Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Sun, 28 Oct 2018 23:48:16 -0400 Subject: [PATCH 104/135] fix add int to str bug, support >26 cols in import --- modules/ether.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index d833514..dea1f08 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -284,7 +284,7 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase flushed += ("cell:" + colmap[alias][stack_len - 1] + str(last_start[alias][-1]) - + ":t:" + last_value[alias][-1] + + ":t:" + str(last_value[alias][-1]) + ":f:1:tvf:1" + span_string + "\n") # pop the stack since we've closed a tag @@ -295,12 +295,10 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase return flushed -def number_to_letter(number): - # Currently support up to 26 columns; no support for multiletter column headers beyond letter Z - if number < 27: - return chr(number + ord('a')-1).upper() - else: - return None +def number_to_letters(number): + char1 = chr((number // 26) + ord('a')-1).upper() + char2 = chr((number % 26) + ord('a')-1).upper() + return char1 + char2 def sgml_to_ether(sgml, ignore_elements=False): @@ -383,11 +381,11 @@ def sgml_to_ether(sgml, ignore_elements=False): if anno_name not in colmap: maxcol += 1 - colmap[anno_name] = [number_to_letter(maxcol)] + colmap[anno_name] = [number_to_letters(maxcol)] elif anno_name in colmap and \ len(last_start[anno_name]) > len(colmap[anno_name]): maxcol += 1 - colmap[anno_name].append(number_to_letter(maxcol)) + colmap[anno_name].append(number_to_letters(maxcol)) elif len(line) > 0: # Token token = line.strip() From d9cbe5575008b22a607ba9e7d732ebb913e72b24 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Mon, 29 Oct 2018 00:33:41 -0400 Subject: [PATCH 105/135] add missing dep on six --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 234b42b..e49aa9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +six lxml requests github3.py==0.9.3 From c55cb73d706119db8ac8ec0ff643c8b60055a9cb Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Mon, 29 Oct 2018 00:34:23 -0400 Subject: [PATCH 106/135] extend highlight_cell to highlight a cell if it gets a cell that is contained in a big cell's span --- validate.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/validate.py b/validate.py index eeade11..40816e0 100755 --- a/validate.py +++ b/validate.py @@ -41,15 +41,20 @@ def highlight_cells(cells, ether_url, ether_doc_name, doc_id=None, dirty=True): if parts[2] == "f": # Pure formatting cell, no content continue - parsed_cell = re.match(r'cell:([A-Z]+\d+)(:.*)$', line) + parsed_cell = re.match(r'cell:([A-Z]+)(\d+)(:.*)$', line) if parsed_cell is not None: - col_row = parsed_cell.group(1) - other = parsed_cell.group(2) + col = parsed_cell.group(1) + row = parsed_cell.group(2) + col_row = col + row + other = parsed_cell.group(3) bg = re.search(r':bg:(\d+)($|:)', other) if bg is not None: bg = bg.group(1) + span = parts[-1] if "rowspan:" in line else "1" - if col_row in cells: + spanned_rows = [col + str(int(row) + x) for x in range(int(span))] + highlighted_spanned_rows = [x for x in spanned_rows if x in cells] + if len(highlighted_spanned_rows) > 0: if bg is not None: if bg != new_color_number: new_line = re.sub(r':bg:' + bg, r':bg:' + new_color_number, line) From 4379bb1027d7809ce6a8c44b53b18996db39d157 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Mon, 29 Oct 2018 00:40:47 -0400 Subject: [PATCH 107/135] Remove old comment --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a4d533f..7c2a642 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,6 @@ RUN chown -R www-data:www-data /var/www/html RUN chmod +x /var/www/html/*.py RUN chmod +x /var/www/html/modules/*.py -# keep these in sync with requirements.txt RUN pip install -r /var/www/html/requirements.txt # install ethercalc and run as a service From 63a4f85254213707cccc7b9b97c93d2adf5bce89 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Mon, 29 Oct 2018 13:17:39 -0400 Subject: [PATCH 108/135] link node to nodejs --- Dockerfile | 1 + templates/{partials => }/codemirror.mustache | 0 templates/{partials => }/ethercalc.mustache | 0 templates/{partials => }/header.mustache | 0 4 files changed, 1 insertion(+) rename templates/{partials => }/codemirror.mustache (100%) rename templates/{partials => }/ethercalc.mustache (100%) rename templates/{partials => }/header.mustache (100%) diff --git a/Dockerfile b/Dockerfile index 7c2a642..b33c260 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,6 +46,7 @@ RUN echo " \n\ # service and (2) not to use supervisord to manage the execution of these # processes. But (1) is too heavy a solution, and (2) seems unnecessary unless # one of our services leaks memory/is unstable +RUN echo "ln -s /usr/bin/nodejs /usr/bin/node" >> /etc/startup.sh RUN echo "/usr/bin/redis-server &" >> /etc/startup.sh RUN echo "/usr/local/bin/ethercalc &" >> /etc/startup.sh RUN echo "/usr/sbin/apache2ctl -D FOREGROUND" >> /etc/startup.sh diff --git a/templates/partials/codemirror.mustache b/templates/codemirror.mustache similarity index 100% rename from templates/partials/codemirror.mustache rename to templates/codemirror.mustache diff --git a/templates/partials/ethercalc.mustache b/templates/ethercalc.mustache similarity index 100% rename from templates/partials/ethercalc.mustache rename to templates/ethercalc.mustache diff --git a/templates/partials/header.mustache b/templates/header.mustache similarity index 100% rename from templates/partials/header.mustache rename to templates/header.mustache From 6a6ceea72b4101a1b001d44560a32bcb5c916f1c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Mon, 29 Oct 2018 13:18:05 -0400 Subject: [PATCH 109/135] remove partials/ dir in templates --- modules/renderer.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/renderer.py b/modules/renderer.py index a7e8349..c29dc7f 100644 --- a/modules/renderer.py +++ b/modules/renderer.py @@ -27,13 +27,14 @@ def render(template_name, variables, template_dir='templates', file_ext=".mustac Returns: str: rendered HTML. """ - # load shared Mustache templates so we can reference them in our large templates - partials_dir = prefix + template_dir + os.sep + 'partials' + os.sep - partials = dict([(filename[:-len(file_ext)], open(partials_dir + filename, 'r').read()) - for filename in os.listdir(prefix + template_dir + os.sep + 'partials') - if filename.endswith(file_ext)]) - renderer = Renderer(partials=partials) + template_dir = prefix + template_dir + + # load Mustache templates so we can reference them in our large templates + templates = dict([(filename[:-len(file_ext)], open(template_dir + os.sep + filename, 'r').read()) + for filename in os.listdir(template_dir) + if filename.endswith(file_ext)]) + renderer = Renderer(partials=templates) variables['skin_stylesheet'] = config['skin'] variables['navbar_html'] = get_menu() - return renderer.render_path(prefix + template_dir + os.sep + template_name + file_ext, variables) + return renderer.render_path(template_dir + os.sep + template_name + file_ext, variables) From e20ee9cef3c1672fd8f314bd90017c28d9804c38 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 30 Oct 2018 02:12:55 -0400 Subject: [PATCH 110/135] fix span eq case where same cells are covered by spans of diff length --- modules/validation/ether_validator.py | 120 +++++++++++++++++++------- 1 file changed, 88 insertions(+), 32 deletions(-) diff --git a/modules/validation/ether_validator.py b/modules/validation/ether_validator.py index 3f67e95..fab8156 100644 --- a/modules/validation/ether_validator.py +++ b/modules/validation/ether_validator.py @@ -113,7 +113,20 @@ def _binary_op_setup(self, parsed_ether): if row not in all_rows: all_rows.append(row) - return name_letters, arg_letters, name_tuples, arg_tuples, start_rows, all_rows + name_start_cells = [] + name_start_rows = set() # for O(1) lookup + for letter in name_letters: + name_start_cells += [(letter, row) for row in start_rows[letter]] + name_start_rows = name_start_rows.union(set(row for row in start_rows[letter])) + + arg_start_cells = [] + arg_start_rows = set() + for letter in arg_letters: + arg_start_cells += [(letter, row) for row in start_rows[letter]] + arg_start_rows = arg_start_rows.union(set(row for row in start_rows[letter])) + + return name_letters, arg_letters, name_tuples, arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, arg_start_cells, arg_start_rows def _apply_subspan(self, parsed_ether): report = '' @@ -128,7 +141,9 @@ def _apply_subspan(self, parsed_ether): return report, tooltip, cells name_letters, arg_letters, name_tuples, \ - arg_tuples, start_rows, all_rows = self._binary_op_setup(parsed_ether) + arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, \ + arg_start_cells, arg_start_rows = self._binary_op_setup(parsed_ether) for row in all_rows: # check to see if all cells in rhs are contained within cells on lhs @@ -154,24 +169,44 @@ def _apply_equal_span_length(self, parsed_ether): return report, tooltip, cells name_letters, arg_letters, name_tuples, \ - arg_tuples, start_rows, all_rows = self._binary_op_setup(parsed_ether) + arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, \ + arg_start_cells, arg_start_rows = self._binary_op_setup(parsed_ether) for row in all_rows: + if row == "1": + continue name_len = len(name_tuples[row]) arg_len = len(arg_tuples[row]) if name_len > arg_len: for letter, _ in name_tuples[row][arg_len:]: - cells.append(letter + row) - report += ("Cell " + letter + row - + " lacks a corresponding value in one of these columns: " - + ", ".join(arg_letters) + "
") + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(arg_letters) + "
") elif arg_len > name_len: for letter, _ in arg_tuples[row][name_len:]: - cells.append(letter + row) - report += ("Cell " + letter + row - + " lacks a corresponding value in one of these columns: " - + ", ".join(name_letters) + "
") + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(name_letters) + "
") + + for letter, row in name_start_cells: + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(arg_letters) + "
") + + for letter, row in arg_start_cells: + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(name_letters) + "
") return report, tooltip, cells @@ -187,37 +222,58 @@ def _apply_equal_span_length_and_content(self, parsed_ether): report += err return report, tooltip, cells + name_letters, arg_letters, name_tuples, \ - arg_tuples, start_rows, all_rows = self._binary_op_setup(parsed_ether) + arg_tuples, start_rows, all_rows, \ + name_start_cells, name_start_rows, \ + arg_start_cells, arg_start_rows = self._binary_op_setup(parsed_ether) for row in all_rows: + if row == "1": + continue + name_len = len(name_tuples[row]) arg_len = len(arg_tuples[row]) if name_len > arg_len: for letter, _ in name_tuples[row][arg_len:]: - cells.append(letter + row) - report += ("Cell " + letter + row - + " lacks a corresponding value in one of these columns: " - + ", ".join(arg_letters) + "
") + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(arg_letters) + "
") elif arg_len > name_len: for letter, _ in arg_tuples[row][name_len:]: - cells.append(letter + row) - report += ("Cell " + letter + row - + " lacks a corresponding value in one of these columns: " - + ", ".join(name_letters) + "
") - - if row != "1": - for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): - name_letter, name_content = name_tuples[row][i] - arg_letter, arg_content = arg_tuples[row][i] - - if arg_content != name_content and (row in start_rows[arg_letter] or row in start_rows[name_letter]): - cells.append(name_letter + row) - cells.append(arg_letter + row) - report += ("Cells " + name_letter + row - + " and " + arg_letter + row - + " must have equivalent content.
") + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " has no corresponding value in one of these columns: " + + ", ".join(name_letters) + "
") + + for i in range(min(len(name_tuples[row]), len(arg_tuples[row]))): + name_letter, name_content = name_tuples[row][i] + arg_letter, arg_content = arg_tuples[row][i] + + if arg_content != name_content and (row in start_rows[arg_letter] or row in start_rows[name_letter]): + cells.append(name_letter + row) + cells.append(arg_letter + row) + report += ("Cells " + name_letter + row + + " and " + arg_letter + row + + " must have equivalent content.
") + + for letter, row in name_start_cells: + if row not in arg_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(arg_letters) + "
") + + for letter, row in arg_start_cells: + if row not in name_start_rows: + cells.append(letter + row) + report += ("Cell " + letter + row + + " needs a span of equal length beginning in one of these columns: " + + ", ".join(name_letters) + "
") return report, tooltip, cells From c6875fc78dc8866ca7c48a2d9fab6592e984826e Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Tue, 30 Oct 2018 13:07:54 -0400 Subject: [PATCH 111/135] Add support for fixed values in template export * Values can be fixed using '=' in a stylesheet .ini file in schemas/: * `"verse":"div@type=textpart"` * This causes all verse annotations to generate `
`, regardless of the original annotation value * Fixes 1. of outstanding issues in #54 - generating fixed attribute values from GitDox --- modules/ether.py | 21 +++++++++++++++------ schemas/scriptorium_tei.ini | 10 ++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index dea1f08..175aeeb 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -616,6 +616,11 @@ def ether_to_sgml(ether, doc_id,config=None): element = col_name attrib = element + # Check whether attrib contains a constant value instruction + const_val = "" + if "=" in attrib: + attrib, const_val = attrib.split("=",1) + # Check to see if the cell has been merged with other cells if 'rowspan' in cell[2]: rowspan = int(cell[2]['rowspan']) @@ -657,12 +662,16 @@ def ether_to_sgml(ether, doc_id,config=None): close_tags[row].sort(key=lambda x: (last_open_index[x],config.priorities.index(x)), reverse=True) last_row = row - if 't' in cell[2]: # cell contains text - content = cell[2]['t'] - elif 'v' in cell[2]: # cell contains numerical value - content = cell[2]['v'] - elif col_name != 'tok': - continue # cell does not contain a value and this is not a token entry + + if const_val != "": + content = const_val + else: + if 't' in cell[2]: # cell contains text + content = cell[2]['t'] + elif 'v' in cell[2]: # cell contains numerical value + content = cell[2]['v'] + elif col_name != 'tok': + continue # cell does not contain a value and this is not a token entry if col_name == 'tok': if "<" in content or "&" in content or ">" in content: diff --git a/schemas/scriptorium_tei.ini b/schemas/scriptorium_tei.ini index 90b6a5e..870ec67 100644 --- a/schemas/scriptorium_tei.ini +++ b/schemas/scriptorium_tei.ini @@ -1,11 +1,9 @@ -aliases = """{"norm_group":"phr","chapter_n":"div1@n","verse_n":"p@n","orig":"w","pos":"w@type","lemma":"w@lemma","coptic_sent":"s","orig_group":"phr","pb@xml_id":"pb@xml:id","pb_xml_id":"pb@xml:id","morph":"m","verse":"div@n","cb_n":"cb@n","lb_n":"lb@n","hi_rend":"hi@rend","translation":"s@style","p_n":"p","p@n":"p","lang":"m|w@xml:lang"}""" -priorities = """["div","ab","div1","div2","p","pb","cb","lb","s","term","phr","w","hi","m","tok"]""" +aliases = """{"norm_group":"phr","chapter_n":"div1@n","verse_n":"p@n","orig":"w","pos":"w@type","lemma":"w@lemma","coptic_sent":"s","orig_group":"phr","pb@xml_id":"pb@xml:id","pb_xml_id":"pb@xml:id","morph":"m","verse":"ab@n","cb_n":"cb@n","lb_n":"lb@n","vid_n":"div@type=textpart","verse":"ab","hi_rend":"hi@rend","translation":"s@style","p_n":"p","p@n":"p","lang":"m|w@xml:lang"}""" +priorities = """["div","ab","div1","div2","div","p","pb","cb","lb","s","term","phr","w","hi","m","tok"]""" milestones = """["pb","lb","cb"]""" no_content = """["w","m","s","phr","div","div1","ab","div2","p","cb","pb","lb","term","hi"]""" export_all= False -template = """ - - +template = """ @@ -85,4 +83,4 @@ template = """ -""" \ No newline at end of file +""" From a6598b4b2339ceb16a99bb88b5c7f4aa9d431ee1 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Tue, 30 Oct 2018 16:05:07 -0400 Subject: [PATCH 112/135] bugfix - only return two letter columns if col > 26 * @lgessler - if we add unit tests this could be one, since if number>letter conversion fails, it breaks transformation into spreadsheet --- modules/ether.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/ether.py b/modules/ether.py index 175aeeb..a40cfb4 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -296,9 +296,12 @@ def flush_close(closing_element, last_value, last_start, row_num, colmap, aliase def number_to_letters(number): - char1 = chr((number // 26) + ord('a')-1).upper() - char2 = chr((number % 26) + ord('a')-1).upper() - return char1 + char2 + if number < 27: + return chr(number + ord('a') - 1).upper() + else: + char1 = chr((number // 26) + ord('a')-1).upper() + char2 = chr((number % 26) + ord('a')-1).upper() + return char1 + char2 def sgml_to_ether(sgml, ignore_elements=False): From b5e8d6575210795bc9b80cc9466f4c701b262054 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 30 Oct 2018 17:16:35 -0400 Subject: [PATCH 113/135] put validation rules tables into tabs --- css/gitdox.css | 35 +++++++++++++++++++++++++++-- js/validation_rules.js | 12 ++++++++++ templates/validation_rules.mustache | 27 +++++++++++----------- 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/css/gitdox.css b/css/gitdox.css index 8205a60..e56913b 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -164,8 +164,39 @@ padding: 10px; height: 12.8px; } -#ValidationTableContainer{ -width: 100%} +/* for validation_rules */ +ul.tabs{ + margin: 0px; + padding: 0px; + list-style: none; +} + +ul.tabs li{ + background: none; + display: inline-block; + padding: 10px 15px; + cursor: pointer; + min-width: 100px; + font-size: 18px; + font-weight: 600; + color: #222; + text-align: center; +} + +ul.tabs li.current{ + background: #ededed; + color: #222; +} + +.tab-content{ + display: none; + background: #ededed; + padding: 15px; +} + +.tab-content.current{ + display: inherit; +} .jtable td{ word-break: break-all; diff --git a/js/validation_rules.js b/js/validation_rules.js index 48315b6..2d7c89a 100644 --- a/js/validation_rules.js +++ b/js/validation_rules.js @@ -221,3 +221,15 @@ $(document).ready(function () { }); $('#export-table-container').jtable('load'); }); + +$(document).ready(function(){ + $('ul.tabs li').click(function(){ + var tab_id = $(this).attr('data-tab'); + + $('ul.tabs li').removeClass('current'); + $('.tab-content').removeClass('current'); + + $(this).addClass('current'); + $("#"+tab_id).addClass('current'); + }); +}); diff --git a/templates/validation_rules.mustache b/templates/validation_rules.mustache index 74032da..48e2553 100644 --- a/templates/validation_rules.mustache +++ b/templates/validation_rules.mustache @@ -32,11 +32,12 @@ {{> header}}
-

GitDox - Validation

-

- validation rule management | back to document list +

GitDox - Validation Rules

+

+ return to index

+ +
    + + + + +
-
-
-
-
-
-
-
-
-
-
+
+
+
+
From 241d2eee92bebf229d05ddd50da533a16bf10844 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Tue, 30 Oct 2018 20:30:22 -0400 Subject: [PATCH 114/135] make metadata editing async in editor.py --- css/gitdox.css | 16 +++++ js/editor.js | 51 +++++++++++++++ modules/editor_metadata.py | 96 +++++++++++++++++++++++++++++ templates/editor.mustache | 29 +++------ templates/validation_rules.mustache | 17 ----- 5 files changed, 172 insertions(+), 37 deletions(-) create mode 100755 modules/editor_metadata.py diff --git a/css/gitdox.css b/css/gitdox.css index e56913b..876727b 100644 --- a/css/gitdox.css +++ b/css/gitdox.css @@ -198,6 +198,22 @@ ul.tabs li.current{ display: inherit; } +/* override jtable styles for consistency with rest of gitdox */ +.jtable-title { + font-size: 16px !important; + border: none !important; + border-radius: 0 !important; + background: #eeeeee !important; +} +table.jtable { + border: none !important; +} + +.ui-widget-overlay { + opacity: 0.5 !important; +} + + .jtable td{ word-break: break-all; } diff --git a/js/editor.js b/js/editor.js index c6f6ab7..5ecea1d 100755 --- a/js/editor.js +++ b/js/editor.js @@ -44,3 +44,54 @@ function export_ether(){ window.open('export.py?docs=' + doc_id + '&stylesheet=' + stylesheet, '_new'); } + +$(document).ready(function () { + // get id from hidden form element. Watch out, might break in the future + var docid = $("#id").val(); + $('#metadata-table-container').jtable({ + title: 'Metadata', + sorting: true, + actions: { + listAction: function (postData, jtParams) { + jtParams.domain = 'meta'; + return $.Deferred(function ($dfd) { + $.ajax({ + url: 'modules/editor_metadata.py?action=list&docid=' + docid, + type: 'POST', + dataType: 'json', + data: jtParams, + success: function (data) { + $dfd.resolve(data); + }, + error: function() { + $dfd.reject(); + } + }); + }); + }, + createAction: 'modules/editor_metadata.py?action=create', + deleteAction: 'modules/editor_metadata.py?action=delete&docid=' + docid + }, + fields: { + id: { + title: 'ID', + key: true, + visibility:'hidden' + }, + docid: { + title: 'Document ID', + defaultValue: docid, + type: 'hidden' + }, + key: { + title: 'Key', + options: 'modules/editor_metadata.py?action=keys' + }, + value: { + title: 'Value' + } + } + }); + $('#metadata-table-container').jtable('load'); +}); + diff --git a/modules/editor_metadata.py b/modules/editor_metadata.py new file mode 100755 index 0000000..fc2c0c4 --- /dev/null +++ b/modules/editor_metadata.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from gitdox_sql import * +import json +import cgi +import os +import platform + +parameter = cgi.FieldStorage() +action = parameter.getvalue("action") +id = parameter.getvalue("id") +docid = parameter.getvalue("docid") +key = parameter.getvalue("key") +value = parameter.getvalue("value") + +if platform.system() == "Windows": + prefix = "transc\\" +else: + prefix = "" + +def read_options(**kwargs): + if "file" in kwargs: + kwargs["file"] = prefix + kwargs["file"] + names = open(kwargs["file"],'r').read().replace("\r","").split("\n") + names = list(name[:name.find("\t")] for name in names) + elif "names" in kwargs: + names = kwargs[names] + selected = kwargs["selected"] if "selected" in kwargs else None + return names + +def row_to_dict(row): + return {'id': row[1], + 'docid': row[0], + 'key': row[2], + 'value': row[3]} + +def get_metadata(): + resp = {} + try: + resp['Result'] = 'OK' + resp['Records'] = [row_to_dict(r) for r in get_doc_meta(docid)] + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not fetch metadata' + print json.dumps(resp) + +def get_default_key_options(): + resp = {} + try: + resp['Result'] = 'OK' + resp['Options'] = read_options(file='..' + os.sep + 'metadata_fields.tab') + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not fetch metadata key options' + print json.dumps(resp) + +def create_metadata(): + resp = {} + try: + save_meta(int(docid), key.decode("utf8"), value.decode("utf8")) + resp['Result'] = 'OK' + resp['Record'] = {'docid': docid, + 'key': key, + 'value': value} + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not create metadata' + print json.dumps(resp) + +def delete_metadata(): + resp = {} + try: + delete_meta(int(id), int(docid)) + resp['Result'] = 'OK' + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Could not delete metadata' + print json.dumps(resp) + +print "Content-type:application/json\r\n\r\n" +if action == "list": + get_metadata() +elif action == "create": + create_metadata() +elif action == "delete": + delete_metadata() +elif action == "keys": + get_default_key_options() +else: + print json.dumps({'Result': 'Error', + 'Message': 'Unknown action: "' + str(action) + '"'}) diff --git a/templates/editor.mustache b/templates/editor.mustache index f331cc9..cc72b94 100644 --- a/templates/editor.mustache +++ b/templates/editor.mustache @@ -14,28 +14,18 @@ + + + + + + - - - - + + - {{{navbar_html}}} @@ -153,8 +143,7 @@ {{> codemirror}} {{/ether_mode}} -

Metadata

- {{{metadata_html}}} +
diff --git a/templates/validation_rules.mustache b/templates/validation_rules.mustache index 48e2553..f954c83 100644 --- a/templates/validation_rules.mustache +++ b/templates/validation_rules.mustache @@ -37,23 +37,6 @@ return to index

- - -
    From 75eb60d83fae73d0e500dbe7b9c9e52d8fd08010 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 31 Oct 2018 01:28:19 -0400 Subject: [PATCH 115/135] add autocomplete to metadata table --- index.py | 3 ++- js/editor.js | 18 ++++++++++++++++-- modules/editor_metadata.py | 5 +++-- modules/gitdox_sql.py | 15 ++++++++++----- templates/editor.mustache | 5 ----- templates/popup_meta.mustache | 4 ++-- 6 files changed, 33 insertions(+), 17 deletions(-) diff --git a/index.py b/index.py index dee98e0..c8b1c91 100755 --- a/index.py +++ b/index.py @@ -49,7 +49,8 @@ def gen_meta_popup(): options = read_options(file='corpus_metadata_fields.tab') with open(prefix + 'popupPageCorpus.html', 'w') as f: - f.write(render("popup_meta", {"options": options})) + f.write(render("popup_meta", {"options": options, + "prefix": "corpus_"})) def load_landing(user, admin, theform): gen_meta_popup() diff --git a/js/editor.js b/js/editor.js index 5ecea1d..ac2c34b 100755 --- a/js/editor.js +++ b/js/editor.js @@ -84,14 +84,28 @@ $(document).ready(function () { type: 'hidden' }, key: { - title: 'Key', - options: 'modules/editor_metadata.py?action=keys' + title: 'Key' }, value: { title: 'Value' } + }, + // for autocomplete support https://github.com/volosoft/jtable/issues/115 + formCreated: function(event, formData) { + $.ajax({ + url: 'modules/editor_metadata.py?action=keys', + type: 'POST', + dataType: 'json', + data: {}, + success: function(data) { + formData.form.find('[name=key]').autocomplete({ + source: data['Options'] + }); + } + }); } }); + $('#metadata-table-container').jtable('load'); }); diff --git a/modules/editor_metadata.py b/modules/editor_metadata.py index fc2c0c4..0bb595c 100755 --- a/modules/editor_metadata.py +++ b/modules/editor_metadata.py @@ -60,9 +60,10 @@ def get_default_key_options(): def create_metadata(): resp = {} try: - save_meta(int(docid), key.decode("utf8"), value.decode("utf8")) + id = save_meta(int(docid), key.decode("utf8"), value.decode("utf8")) resp['Result'] = 'OK' - resp['Record'] = {'docid': docid, + resp['Record'] = {'id': id, + 'docid': docid, 'key': key, 'value': value} print json.dumps(resp) diff --git a/modules/gitdox_sql.py b/modules/gitdox_sql.py index 990a5e4..c977732 100755 --- a/modules/gitdox_sql.py +++ b/modules/gitdox_sql.py @@ -66,7 +66,7 @@ def set_cache(doc_id, cache_contents): generic_query("UPDATE docs SET cache = ? WHERE id = ?",(cache_contents,doc_id)) -def generic_query(sql, params): +def generic_query(sql, params, return_new_id=False): # generic_query("DELETE FROM rst_nodes WHERE doc=? and project=?",(doc,project)) dbpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "gitdox.db" @@ -79,8 +79,11 @@ def generic_query(sql, params): else: cur.execute(sql) - rows = cur.fetchall() - return rows + if return_new_id: + return cur.lastrowid + else: + rows = cur.fetchall() + return rows def invalidate_doc_by_name(doc,corpus): @@ -178,11 +181,13 @@ def print_meta(doc_id, corpus=False): def save_meta(doc_id,key,value,corpus=False): if corpus: _, corpus_name, _, _, _, _, _ = get_doc_info(doc_id) - generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)", (None,key, value,corpus_name)) + new_id = generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)", (None,key, value,corpus_name), return_new_id = True) else: - generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)",(doc_id,key,value,None)) + new_id = generic_query("INSERT OR REPLACE INTO metadata(docid,key,value,corpus_meta) VALUES(?,?,?,?)",(doc_id,key,value,None), return_new_id = True) invalidate_doc_by_id(doc_id) + return new_id + def delete_meta(metaid, doc_id, corpus=False): generic_query("DELETE FROM metadata WHERE metaid=?", (metaid,)) if not corpus: diff --git a/templates/editor.mustache b/templates/editor.mustache index cc72b94..e0a6ef7 100644 --- a/templates/editor.mustache +++ b/templates/editor.mustache @@ -150,11 +150,6 @@

    {{{corpus_metadata_html}}} -
    - - Add Document Metadata -
    -

    diff --git a/templates/popup_meta.mustache b/templates/popup_meta.mustache index e95ad5e..de0c744 100644 --- a/templates/popup_meta.mustache +++ b/templates/popup_meta.mustache @@ -2,8 +2,8 @@ - - -
    - field name (e.g., author):
    - - - -
    - field value (e.g., Besa):
    -
    - -
    - - diff --git a/popupPageCorpus.html b/popupPageCorpus.html deleted file mode 100644 index 9fab3c4..0000000 --- a/popupPageCorpus.html +++ /dev/null @@ -1,34 +0,0 @@ - - - - - -
    - field name (e.g., author):
    - - - -
    - field value (e.g., Besa):
    -
    - -
    - - diff --git a/templates/editor.mustache b/templates/editor.mustache index c84e964..d6c99c7 100644 --- a/templates/editor.mustache +++ b/templates/editor.mustache @@ -149,18 +149,6 @@
- - - -

- {{{corpus_metadata_html}}} - - - -

- -
Add Corpus Metadata
-
diff --git a/templates/popup_meta.mustache b/templates/popup_meta.mustache deleted file mode 100644 index de0c744..0000000 --- a/templates/popup_meta.mustache +++ /dev/null @@ -1,29 +0,0 @@ - - - - - -
- field name (e.g., author):
- - - {{#options}} - -
- field value (e.g., Besa):
-
- -
- - From f5cead90966a7f7ad37501fd624f8aa64411a38e Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 31 Oct 2018 13:29:57 -0400 Subject: [PATCH 120/135] add doesntexist operator for ethercalc validation --- js/validation_rules.js | 2 +- modules/validation/ether_validator.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/js/validation_rules.js b/js/validation_rules.js index 2d7c89a..b69caf8 100644 --- a/js/validation_rules.js +++ b/js/validation_rules.js @@ -156,7 +156,7 @@ $(document).ready(function () { }, operator: { title: 'Operator', - options: ['~', '|', '=', '==', '>', 'exists'] + options: ['~', '|', '=', '==', '>', 'exists', 'doesntexist'] }, argument: { title: 'Argument' diff --git a/modules/validation/ether_validator.py b/modules/validation/ether_validator.py index fab8156..2801d0b 100644 --- a/modules/validation/ether_validator.py +++ b/modules/validation/ether_validator.py @@ -21,7 +21,19 @@ def _apply_exists(self, parsed_ether): col_letters = colmap[self.name] # list of letters with col name if len(col_letters) == 0: - report += "Column named " + self.name + " not found
" + report += "Column named '" + self.name + "' not found
" + return report, tooltip, cells + + def _apply_doesntexist(self, parsed_ether): + report = '' + tooltip = '' + cells = [] + colmap = parsed_ether['__colmap__'] # name -> list of col letters + col_letters = colmap[self.name] # list of letters with col name + + if len(col_letters) > 0: + report += "Columns named '" + self.name + "' are not allowed
" + cells += [letter + "1" for letter in col_letters] return report, tooltip, cells def _apply_span_equals_number(self, parsed_ether): @@ -283,6 +295,8 @@ def _apply_rule(self, parsed_ether): if self.operator == "exists": return self._apply_exists(parsed_ether) + if self.operator == "doesntexist": + return self._apply_doesntexist(parsed_ether) elif self.operator == "|": return self._apply_span_equals_number(parsed_ether) elif self.operator == "~": From d544d33a4457c10ff49c74e44d9c9ed97e36237c Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 31 Oct 2018 17:53:42 -0400 Subject: [PATCH 121/135] require login for using editor's metadata service --- ..._metadata.py => editor_metadata_service.py | 58 ++++++++++++------- js/editor.js | 17 +++--- modules/gitdox_sql.py | 22 ++++--- 3 files changed, 58 insertions(+), 39 deletions(-) rename modules/editor_metadata.py => editor_metadata_service.py (62%) diff --git a/modules/editor_metadata.py b/editor_metadata_service.py similarity index 62% rename from modules/editor_metadata.py rename to editor_metadata_service.py index 9f77a2a..cb3cc88 100755 --- a/modules/editor_metadata.py +++ b/editor_metadata_service.py @@ -1,11 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from gitdox_sql import * import json import cgi import os import platform +from modules.gitdox_sql import * +from modules.logintools import login parameter = cgi.FieldStorage() action = parameter.getvalue("action") @@ -21,14 +22,14 @@ prefix = "" def read_options(**kwargs): - if "file" in kwargs: - kwargs["file"] = prefix + kwargs["file"] - names = open(kwargs["file"],'r').read().replace("\r","").split("\n") - names = list(name[:name.find("\t")] for name in names) - elif "names" in kwargs: - names = kwargs[names] - selected = kwargs["selected"] if "selected" in kwargs else None - return names + if "file" in kwargs: + kwargs["file"] = prefix + kwargs["file"] + names = open(kwargs["file"],'r').read().replace("\r","").split("\n") + names = list(name[:name.find("\t")] for name in names) + elif "names" in kwargs: + names = kwargs[names] + selected = kwargs["selected"] if "selected" in kwargs else None + return names def row_to_dict(row): return {'id': row[1], @@ -87,15 +88,30 @@ def delete_metadata(): resp['Message'] = 'Could not delete metadata' print json.dumps(resp) -print "Content-type:application/json\r\n\r\n" -if action == "list": - get_metadata() -elif action == "create": - create_metadata() -elif action == "delete": - delete_metadata() -elif action == "keys": - get_default_key_options() -else: - print json.dumps({'Result': 'Error', - 'Message': 'Unknown action: "' + str(action) + '"'}) + + +def open_main_server(): + thisscript = os.environ.get('SCRIPT_NAME', '') + loginaction = None + theform = cgi.FieldStorage() + scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + userdir = scriptpath + "users" + os.sep + loginaction, userconfig = login(theform, userdir, thisscript, loginaction) + user = userconfig["username"] + admin = userconfig["admin"] + + print "Content-type:application/json\r\n\r\n" + if action == "list": + get_metadata() + elif action == "create": + create_metadata() + elif action == "delete": + delete_metadata() + elif action == "keys": + get_default_key_options() + else: + print json.dumps({'Result': 'Error', + 'Message': 'Unknown action: "' + str(action) + '"'}) + +if __name__ == '__main__': + open_main_server() diff --git a/js/editor.js b/js/editor.js index fa1eb2d..dfbb688 100755 --- a/js/editor.js +++ b/js/editor.js @@ -56,7 +56,7 @@ $(document).ready(function () { jtParams.domain = 'meta'; return $.Deferred(function ($dfd) { $.ajax({ - url: 'modules/editor_metadata.py?action=list&docid=' + docid, + url: 'editor_metadata_service.py?action=list&docid=' + docid, type: 'POST', dataType: 'json', data: jtParams, @@ -69,8 +69,8 @@ $(document).ready(function () { }); }); }, - createAction: 'modules/editor_metadata.py?action=create', - deleteAction: 'modules/editor_metadata.py?action=delete&docid=' + docid + createAction: 'editor_metadata_service.py?action=create', + deleteAction: 'editor_metadata_service.py?action=delete&docid=' + docid }, fields: { id: { @@ -95,7 +95,7 @@ $(document).ready(function () { // for autocomplete support https://github.com/volosoft/jtable/issues/115 formCreated: function(event, formData) { $.ajax({ - url: 'modules/editor_metadata.py?action=keys', + url: 'editor_metadata_service.py?action=keys', type: 'POST', dataType: 'json', data: {}, @@ -122,7 +122,7 @@ $(document).ready(function () { jtParams.domain = 'meta'; return $.Deferred(function ($dfd) { $.ajax({ - url: 'modules/editor_metadata.py?corpus=true&action=list&docid=' + docid, + url: 'editor_metadata_service.py?corpus=true&action=list&docid=' + docid, type: 'POST', dataType: 'json', data: jtParams, @@ -135,8 +135,8 @@ $(document).ready(function () { }); }); }, - createAction: 'modules/editor_metadata.py?corpus=true&action=create', - deleteAction: 'modules/editor_metadata.py?corpus=true&action=delete&docid=' + docid + createAction: 'editor_metadata_service.py?corpus=true&action=create', + deleteAction: 'editor_metadata_service.py?corpus=true&action=delete&docid=' + docid }, fields: { id: { @@ -145,7 +145,6 @@ $(document).ready(function () { visibility:'hidden' }, docid: { - title: 'Document ID', defaultValue: docid, type: 'hidden' }, @@ -161,7 +160,7 @@ $(document).ready(function () { // for autocomplete support https://github.com/volosoft/jtable/issues/115 formCreated: function(event, formData) { $.ajax({ - url: 'modules/editor_metadata.py?corpus=true&action=keys', + url: 'editor_metadata_service.py?corpus=true&action=keys', type: 'POST', dataType: 'json', data: {}, diff --git a/modules/gitdox_sql.py b/modules/gitdox_sql.py index a9237bb..5977d3c 100755 --- a/modules/gitdox_sql.py +++ b/modules/gitdox_sql.py @@ -188,23 +188,27 @@ def get_doc_meta(doc_id, corpus=False): def get_corpora(): return generic_query("SELECT DISTINCT corpus FROM docs ORDER BY corpus COLLATE NOCASE", None) -def get_validate_rules(): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate", None) +def get_validate_rules(sort=None, domain=None): + query = "SELECT corpus, doc, domain, name, operator, argument, id FROM validate" + args = [] + if domain: + query += " WHERE domain=? " + args.append(domain) + if sort: + query += " ORDER BY " + sort + return generic_query(query, args) def get_xml_rules(): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'xml'", None) + return get_validate_rules(domain='xml') def get_meta_rules(): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'meta'", None) + return get_validate_rules(domain='meta') def get_ether_rules(): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'ether'", None) + return get_validate_rules(domain='ether') def get_export_rules(): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate WHERE domain = 'export'", None) - -def get_sorted_rules(sort): - return generic_query("SELECT corpus, doc, domain, name, operator, argument, id FROM validate ORDER BY " + sort, None) # parameterization doesn't work for order by + return get_validate_rules(domain='export') def create_validate_rule(doc, corpus, domain, name, operator, argument): new_id = generic_query("INSERT INTO validate(doc,corpus,domain,name,operator,argument) VALUES(?,?,?,?,?,?)", (doc, corpus, domain, name, operator, argument), return_new_id = True) From e4bcaec991b6e18f86babc231c00e583fb079101 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 31 Oct 2018 17:54:52 -0400 Subject: [PATCH 122/135] consolidate modules/jtable* in validation_rules_service.py --- js/validation_rules.js | 58 +++++++-------- modules/jtable_create_rule.py | 41 ----------- modules/jtable_delete_rule.py | 26 ------- modules/jtable_rule_list.py | 44 ----------- modules/jtable_schema_list.py | 32 -------- modules/jtable_update_rule.py | 32 -------- validation_rules_service.py | 134 ++++++++++++++++++++++++++++++++++ 7 files changed, 161 insertions(+), 206 deletions(-) delete mode 100755 modules/jtable_create_rule.py delete mode 100755 modules/jtable_delete_rule.py delete mode 100755 modules/jtable_rule_list.py delete mode 100755 modules/jtable_schema_list.py delete mode 100755 modules/jtable_update_rule.py create mode 100755 validation_rules_service.py diff --git a/js/validation_rules.js b/js/validation_rules.js index b69caf8..ee31ba5 100644 --- a/js/validation_rules.js +++ b/js/validation_rules.js @@ -7,7 +7,7 @@ $(document).ready(function () { jtParams.domain = 'xml'; return $.Deferred(function ($dfd) { $.ajax({ - url: 'modules/jtable_rule_list.py', + url: 'validation_rules_service.py?action=list', type: 'POST', dataType: 'json', data: jtParams, @@ -20,9 +20,9 @@ $(document).ready(function () { }); }); }, - createAction: 'modules/jtable_create_rule.py', - updateAction: 'modules/jtable_update_rule.py', - deleteAction: 'modules/jtable_delete_rule.py' + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' }, fields: { id: { @@ -31,9 +31,8 @@ $(document).ready(function () { visibility:'hidden' }, domain: { - title: 'Domain', - options: ['xml'], - visibility: 'hidden' + defaultValue: 'xml', + type: 'hidden' }, doc: { title: 'Document' @@ -43,7 +42,7 @@ $(document).ready(function () { }, name: { title: 'XSD Schema', - options: 'modules/jtable_schema_list.py?extension=xsd' + options: 'validation_rules_service.py?action=listschemas&extension=xsd' } } }); @@ -59,7 +58,7 @@ $(document).ready(function () { jtParams.domain = 'meta'; return $.Deferred(function ($dfd) { $.ajax({ - url: 'modules/jtable_rule_list.py', + url: 'validation_rules_service.py?action=list', type: 'POST', dataType: 'json', data: jtParams, @@ -72,9 +71,9 @@ $(document).ready(function () { }); }); }, - createAction: 'modules/jtable_create_rule.py', - updateAction: 'modules/jtable_update_rule.py', - deleteAction: 'modules/jtable_delete_rule.py' + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' }, fields: { id: { @@ -83,9 +82,8 @@ $(document).ready(function () { visibility:'hidden' }, domain: { - title: 'Domain', - options: ['meta'], - visibility: 'hidden' + defaultValue: 'meta', + type: 'hidden' }, doc: { title: 'Document' @@ -117,7 +115,7 @@ $(document).ready(function () { jtParams.domain = 'ether'; return $.Deferred(function ($dfd) { $.ajax({ - url: 'modules/jtable_rule_list.py', + url: 'validation_rules_service.py?action=list', type: 'POST', dataType: 'json', data: jtParams, @@ -130,9 +128,9 @@ $(document).ready(function () { }); }); }, - createAction: 'modules/jtable_create_rule.py', - updateAction: 'modules/jtable_update_rule.py', - deleteAction: 'modules/jtable_delete_rule.py' + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' }, fields: { id: { @@ -141,9 +139,8 @@ $(document).ready(function () { visibility:'hidden' }, domain: { - title: 'Domain', - options: ['ether'], - visibility: 'hidden' + defaultValue: 'ether', + type: 'hidden' }, doc: { title: 'Document' @@ -175,7 +172,7 @@ $(document).ready(function () { jtParams.domain = 'export'; return $.Deferred(function ($dfd) { $.ajax({ - url: 'modules/jtable_rule_list.py', + url: 'validation_rules_service.py?action=list', type: 'POST', dataType: 'json', data: jtParams, @@ -188,9 +185,9 @@ $(document).ready(function () { }); }); }, - createAction: 'modules/jtable_create_rule.py', - updateAction: 'modules/jtable_update_rule.py', - deleteAction: 'modules/jtable_delete_rule.py' + createAction: 'validation_rules_service.py?action=create', + updateAction: 'validation_rules_service.py?action=update', + deleteAction: 'validation_rules_service.py?action=delete' }, fields: { id: { @@ -199,9 +196,8 @@ $(document).ready(function () { visibility:'hidden' }, domain: { - title: 'Domain', - options: ['export'], - visibility: 'hidden' + defaultValue: 'export', + type: 'hidden' }, doc: { title: 'Document' @@ -211,11 +207,11 @@ $(document).ready(function () { }, name: { title: 'Export Spec', - options: 'modules/jtable_schema_list.py?extension=ini' + options: 'validation_rules_service.py?action=listschemas&extension=ini' }, argument: { title: 'XSD Schema', - options: 'modules/jtable_schema_list.py?extension=xsd' + options: 'validation_rules_service.py?action=listschemas&extension=xsd' } } }); diff --git a/modules/jtable_create_rule.py b/modules/jtable_create_rule.py deleted file mode 100755 index f795eb7..0000000 --- a/modules/jtable_create_rule.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def create_rule(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - doc = parameter.getvalue("doc") - corpus = parameter.getvalue("corpus") - domain = parameter.getvalue("domain") - name = parameter.getvalue("name") - operator = parameter.getvalue("operator") - argument = parameter.getvalue("argument") - - id = create_validate_rule(doc,corpus,domain,name,operator,argument) - - new_json_rule = {} - new_json_rule['id'] = id - new_json_rule['doc'] = doc - new_json_rule['corpus'] = corpus - new_json_rule['domain'] = domain - new_json_rule['name'] = name - new_json_rule['operator'] = operator - new_json_rule['argument'] = argument - - jtable_result['Result'] = 'OK' - jtable_result['Record'] = new_json_rule - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_create_rule.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print create_rule() diff --git a/modules/jtable_delete_rule.py b/modules/jtable_delete_rule.py deleted file mode 100755 index 0131d74..0000000 --- a/modules/jtable_delete_rule.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def delete_rule(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - id = parameter.getvalue("id") - - delete_validate_rule(id) - - jtable_result['Result'] = 'OK' - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_delete_rule.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print delete_rule() \ No newline at end of file diff --git a/modules/jtable_rule_list.py b/modules/jtable_rule_list.py deleted file mode 100755 index bfee072..0000000 --- a/modules/jtable_rule_list.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def list_rules(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - sort = parameter.getvalue("jtSorting") - domain_filter = parameter.getvalue("domain") - if sort is not None: - rules = get_sorted_rules(sort) - else: - rules = get_validate_rules() - - json_rules = [] - for rule in rules: - if domain_filter and rule[2] != domain_filter: - continue - - new_json_rule = {} - new_json_rule['corpus'] = rule[0] - new_json_rule['doc'] = rule[1] - new_json_rule['domain'] = rule[2] - new_json_rule['name'] = rule[3] - new_json_rule['operator'] = rule[4] - new_json_rule['argument'] = rule[5] - new_json_rule['id'] = rule[6] - json_rules.append(new_json_rule) - jtable_result['Result'] = 'OK' - jtable_result['Records'] = json_rules - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_rule_list.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print list_rules() diff --git a/modules/jtable_schema_list.py b/modules/jtable_schema_list.py deleted file mode 100755 index 30d0be9..0000000 --- a/modules/jtable_schema_list.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from gitdox_sql import * -import json -import cgi -import os - -schema_dir = (os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - + os.sep - + 'schemas') - -def list_files(): - jtable_result = {} - ext = cgi.FieldStorage().getvalue("extension") - - try: - options = [{"DisplayText": x, - "Value": x} - for x in os.listdir(schema_dir) - if x.endswith(ext)] - jtable_result['Result'] = 'OK' - jtable_result['Options'] = options - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_xsd_list.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print list_files() diff --git a/modules/jtable_update_rule.py b/modules/jtable_update_rule.py deleted file mode 100755 index 6cf91f1..0000000 --- a/modules/jtable_update_rule.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -from gitdox_sql import * -import json -import cgi - - -def update_rules(): - jtable_result = {} - try: - parameter = cgi.FieldStorage() - doc = parameter.getvalue("doc") - corpus = parameter.getvalue("corpus") - domain = parameter.getvalue("domain") - name = parameter.getvalue("name") - operator = parameter.getvalue("operator") - argument = parameter.getvalue("argument") - id = parameter.getvalue("id") - - update_validate_rule(doc,corpus,domain,name,operator,argument,id) - - jtable_result['Result'] = 'OK' - return json.dumps(jtable_result) - except: - jtable_result['Result'] = 'Error' - jtable_result['Message'] = 'Something went wrong in jtable_update_rule.py' - return json.dumps(jtable_result) - - -print "Content-type:application/json\r\n\r\n" -print update_rules() \ No newline at end of file diff --git a/validation_rules_service.py b/validation_rules_service.py new file mode 100755 index 0000000..d435b91 --- /dev/null +++ b/validation_rules_service.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import json +import cgi +import os +import platform +from modules.gitdox_sql import * +from modules.logintools import login + +parameter = cgi.FieldStorage() +action = parameter.getvalue("action") + +# for rules +doc = parameter.getvalue("doc") +corpus = parameter.getvalue("corpus") +domain = parameter.getvalue("domain") +name = parameter.getvalue("name") +operator = parameter.getvalue("operator") +argument = parameter.getvalue("argument") +id = parameter.getvalue("id") + +# for schemas +schema_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep + 'schemas' +extension = parameter.getvalue("extension") + +# for sorting +sort = parameter.getvalue("jtSorting") + +def row_to_dict(row): + return {'corpus': row[0], + 'doc': row[1], + 'domain': row[2], + 'name': row[3], + 'operator': row[4], + 'argument': row[5], + 'id': row[6]} + +def list_rules(): + resp = {} + try: + parameter = cgi.FieldStorage() + rules = get_validate_rules(sort=sort, domain=domain) + + json_rules = [row_to_dict(row) for row in rules] + resp['Result'] = 'OK' + resp['Records'] = json_rules + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while attempting to retrieve the list of rules.' + print json.dumps(resp) + +def create_rule(): + resp = {} + try: + id = create_validate_rule(doc, corpus, domain, name, operator, argument) + resp['Result'] = 'OK' + resp['Record'] = {'doc': doc, + 'corpus': corpus, + 'domain': domain, + 'name': name, + 'operator': operator, + 'argument': argument, + 'id': id} + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while attempting to create a new rule.' + print json.dumps(resp) + +def update_rule(): + resp = {} + try: + update_validate_rule(doc, corpus, domain, name, operator, argument, id) + resp['Result'] = 'OK' + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while attempting to update a rule.' + print json.dumps(resp) + +def delete_rule(): + resp = {} + try: + delete_validate_rule(id) + resp['Result'] = 'OK' + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while trying to delete a rule.' + print json.dumps(resp) + + +def list_schemas(): + resp = {} + try: + resp['Result'] = 'OK' + resp['Options'] = [{"DisplayText": x, "Value": x} + for x in os.listdir(schema_dir) + if x.endswith(extension)] + print json.dumps(resp) + except: + resp['Result'] = 'Error' + resp['Message'] = 'Something went wrong while trying to list schemas.' + print json.dumps(resp) + +def open_main_server(): + thisscript = os.environ.get('SCRIPT_NAME', '') + loginaction = None + theform = cgi.FieldStorage() + scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep + userdir = scriptpath + "users" + os.sep + loginaction, userconfig = login(theform, userdir, thisscript, loginaction) + user = userconfig["username"] + admin = userconfig["admin"] + + print "Content-type:application/json\r\n\r\n" + if action == "list": + list_rules() + elif action == "create": + create_rule() + elif action == "update": + update_rule() + elif action == "delete": + delete_rule() + elif action == "listschemas": + list_schemas() + else: + print json.dumps({'Result': 'Error', + 'Message': 'Unknown action: "' + str(action) + '"'}) + +if __name__ == '__main__': + open_main_server() From 15207c3fd72d33d2c8521b922302b97e289b6981 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 31 Oct 2018 17:57:59 -0400 Subject: [PATCH 123/135] forbid demo user from making changes using jtables --- editor_metadata_service.py | 6 ++++-- validation_rules_service.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/editor_metadata_service.py b/editor_metadata_service.py index cb3cc88..f01a720 100755 --- a/editor_metadata_service.py +++ b/editor_metadata_service.py @@ -103,12 +103,14 @@ def open_main_server(): print "Content-type:application/json\r\n\r\n" if action == "list": get_metadata() + elif action == "keys": + get_default_key_options() + elif user == "demo": + print json.dumps({'Result': 'Error', 'Message': 'Demo user may not make changes.'}) elif action == "create": create_metadata() elif action == "delete": delete_metadata() - elif action == "keys": - get_default_key_options() else: print json.dumps({'Result': 'Error', 'Message': 'Unknown action: "' + str(action) + '"'}) diff --git a/validation_rules_service.py b/validation_rules_service.py index d435b91..07d058f 100755 --- a/validation_rules_service.py +++ b/validation_rules_service.py @@ -118,14 +118,16 @@ def open_main_server(): print "Content-type:application/json\r\n\r\n" if action == "list": list_rules() + elif action == "listschemas": + list_schemas() + elif user == "demo": + print json.dumps({'Result': 'Error', 'Message': 'Demo user may not make changes.'}) elif action == "create": create_rule() elif action == "update": update_rule() elif action == "delete": delete_rule() - elif action == "listschemas": - list_schemas() else: print json.dumps({'Result': 'Error', 'Message': 'Unknown action: "' + str(action) + '"'}) From 903492cb3d83283e1d2e50aa4add2644575bcb82 Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 31 Oct 2018 18:25:03 -0400 Subject: [PATCH 124/135] remember last opened tab --- js/editor.js | 21 ++++++++++++++++----- js/validation_rules.js | 21 ++++++++++++++++----- templates/editor.mustache | 4 ++-- templates/validation_rules.mustache | 8 ++++---- 4 files changed, 38 insertions(+), 16 deletions(-) diff --git a/js/editor.js b/js/editor.js index dfbb688..cb2a241 100755 --- a/js/editor.js +++ b/js/editor.js @@ -178,13 +178,24 @@ $(document).ready(function () { $(document).ready(function(){ - $('ul.tabs li').click(function(){ - var tab_id = $(this).attr('data-tab'); - + function activateTab(liId, divId) { $('ul.tabs li').removeClass('current'); $('.tab-content').removeClass('current'); + $("#"+liId).addClass('current'); + $("#"+divId).addClass('current'); + } + + var liId = localStorage.getItem(location.pathname + "activeLiId"); + var divId = localStorage.getItem(location.pathname + "activeDivId"); + if (liId && divId) { + activateTab(liId, divId); + } - $(this).addClass('current'); - $("#"+tab_id).addClass('current'); + $('ul.tabs li').click(function() { + var liId = $(this).attr('id'); + var divId = $(this).attr('data-tab'); + activateTab(liId, divId); + localStorage.setItem(location.pathname + "activeLiId", liId); + localStorage.setItem(location.pathname + "activeDivId", divId); }); }); diff --git a/js/validation_rules.js b/js/validation_rules.js index ee31ba5..fb13e11 100644 --- a/js/validation_rules.js +++ b/js/validation_rules.js @@ -219,13 +219,24 @@ $(document).ready(function () { }); $(document).ready(function(){ - $('ul.tabs li').click(function(){ - var tab_id = $(this).attr('data-tab'); - + function activateTab(liId, divId) { $('ul.tabs li').removeClass('current'); $('.tab-content').removeClass('current'); + $("#"+liId).addClass('current'); + $("#"+divId).addClass('current'); + } + + var liId = localStorage.getItem(location.pathname + "activeLiId"); + var divId = localStorage.getItem(location.pathname + "activeDivId"); + if (liId && divId) { + activateTab(liId, divId); + } - $(this).addClass('current'); - $("#"+tab_id).addClass('current'); + $('ul.tabs li').click(function() { + var liId = $(this).attr('id'); + var divId = $(this).attr('data-tab'); + activateTab(liId, divId); + localStorage.setItem(location.pathname + "activeLiId", liId); + localStorage.setItem(location.pathname + "activeDivId", divId); }); }); diff --git a/templates/editor.mustache b/templates/editor.mustache index d6c99c7..312bb6f 100644 --- a/templates/editor.mustache +++ b/templates/editor.mustache @@ -144,8 +144,8 @@ {{/ether_mode}}
    - - + +
diff --git a/templates/validation_rules.mustache b/templates/validation_rules.mustache index f954c83..9df0223 100644 --- a/templates/validation_rules.mustache +++ b/templates/validation_rules.mustache @@ -38,10 +38,10 @@

    - - - - + + + +
From 52f74e14374243b809a8ab02da7d05bc6cd3fc3f Mon Sep 17 00:00:00 2001 From: Luke Gessler Date: Wed, 31 Oct 2018 18:45:40 -0400 Subject: [PATCH 125/135] move div id=header into header template --- templates/admin.mustache | 4 +--- templates/editor.mustache | 4 +--- templates/header.mustache | 30 +++++++++++++++-------------- templates/index.mustache | 4 +--- templates/user_admin.mustache | 4 +--- templates/validation_rules.mustache | 4 +--- 6 files changed, 21 insertions(+), 29 deletions(-) diff --git a/templates/admin.mustache b/templates/admin.mustache index 24879b5..ba7fb60 100644 --- a/templates/admin.mustache +++ b/templates/admin.mustache @@ -27,9 +27,7 @@ {{{ navbar_html }}}
- + {{> header }}

GitDox - Administration

diff --git a/templates/editor.mustache b/templates/editor.mustache index 312bb6f..e224502 100644 --- a/templates/editor.mustache +++ b/templates/editor.mustache @@ -30,9 +30,7 @@ {{{navbar_html}}}

- + {{> header}}

GitDox: Edit

{{{ editor_help_link_html }}} diff --git a/templates/header.mustache b/templates/header.mustache index 2c359aa..1ab9b72 100644 --- a/templates/header.mustache +++ b/templates/header.mustache @@ -1,16 +1,18 @@ -