diff --git a/modules/ether.py b/modules/ether.py
index 1cf7e8c..96a5135 100755
--- a/modules/ether.py
+++ b/modules/ether.py
@@ -96,8 +96,10 @@ def read_config(self,config_file):
else:
self.template = "\n%%body%%\n\n"
-def parse_ether(ether):
+
+def parse_ether(ether, doc_id=None):
"""Take in raw socialcalc data and turn it into a dict of Cells. Used in validation."""
+
class Cell:
def __init__(self, col, row, content, span):
self.col = col
@@ -105,6 +107,7 @@ def __init__(self, col, row, content, span):
self.header = ""
self.content = content
self.span = span
+
def __repr__(self):
return ""
@@ -125,12 +128,12 @@ def __repr__(self):
cell_row = cell_id[1:]
cell_col = cell_id[0]
# We'd need something like this to support more than 26 cols, i.e. columns AA, AB...
- #for c in cell_id:
+ # for c in cell_id:
# if c in ["0","1","2","3","4","5","6","7","8","9"]:
# cell_row += c
# else:
# cell_col += c
- cell_content = parts[3].replace("\\c",":")
+ cell_content = parts[3].replace("\\c", ":")
cell_span = parts[-1] if "rowspan:" in line else "1"
# record col name
@@ -143,11 +146,17 @@ def __repr__(self):
all_cells.append(cell)
for cell in all_cells:
- cell.header = rev_colmap[cell.col]
+ if cell.col in rev_colmap:
+ cell.header = rev_colmap[cell.col]
+ else:
+ if doc_id is None:
+ doc_id = "unknown"
+ raise IOError("Undocumented column: " + cell.col + " in '" + str(cell) + " from doc: " + str(doc_id))
parsed["__colmap__"] = colmap # Save colmap for apply_rule
return parsed
+
def unescape_xml(text):
# Fix various common compounded XML escapes
text = text.replace("<","<").replace(">",">")
@@ -676,6 +685,10 @@ def ether_to_sgml(ether, doc_id,config=None):
# Priorities have been supplied, but this column is not in them
continue
+ # content may not contain straight double quotes in span annotations in SGML export
+ # Note that " is allowed in tokens and in tab-delimited token annotations!
+ content = content.replace('"', """)
+
if sec_element != "":
#open_tags[row][sec_element].append((attrib, content))
sec_element_checklist.append((element,sec_element,attrib,content,rowspan))
diff --git a/validate.py b/validate.py
index 40816e0..97172df 100755
--- a/validate.py
+++ b/validate.py
@@ -132,7 +132,7 @@ def validate_doc_ether(doc_id, editor=False, dirty=True):
ether_doc_name = "gd_" + doc_corpus + "_" + doc_name
socialcalc = get_socialcalc(ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty)
- parsed_ether = parse_ether(socialcalc)
+ parsed_ether = parse_ether(socialcalc,doc_id=doc_id)
report = ''
cells = []
|