diff --git a/modules/ether.py b/modules/ether.py index 1cf7e8c..96a5135 100755 --- a/modules/ether.py +++ b/modules/ether.py @@ -96,8 +96,10 @@ def read_config(self,config_file): else: self.template = "\n%%body%%\n\n" -def parse_ether(ether): + +def parse_ether(ether, doc_id=None): """Take in raw socialcalc data and turn it into a dict of Cells. Used in validation.""" + class Cell: def __init__(self, col, row, content, span): self.col = col @@ -105,6 +107,7 @@ def __init__(self, col, row, content, span): self.header = "" self.content = content self.span = span + def __repr__(self): return "" @@ -125,12 +128,12 @@ def __repr__(self): cell_row = cell_id[1:] cell_col = cell_id[0] # We'd need something like this to support more than 26 cols, i.e. columns AA, AB... - #for c in cell_id: + # for c in cell_id: # if c in ["0","1","2","3","4","5","6","7","8","9"]: # cell_row += c # else: # cell_col += c - cell_content = parts[3].replace("\\c",":") + cell_content = parts[3].replace("\\c", ":") cell_span = parts[-1] if "rowspan:" in line else "1" # record col name @@ -143,11 +146,17 @@ def __repr__(self): all_cells.append(cell) for cell in all_cells: - cell.header = rev_colmap[cell.col] + if cell.col in rev_colmap: + cell.header = rev_colmap[cell.col] + else: + if doc_id is None: + doc_id = "unknown" + raise IOError("Undocumented column: " + cell.col + " in '" + str(cell) + " from doc: " + str(doc_id)) parsed["__colmap__"] = colmap # Save colmap for apply_rule return parsed + def unescape_xml(text): # Fix various common compounded XML escapes text = text.replace("&lt;","<").replace("&gt;",">") @@ -676,6 +685,10 @@ def ether_to_sgml(ether, doc_id,config=None): # Priorities have been supplied, but this column is not in them continue + # content may not contain straight double quotes in span annotations in SGML export + # Note that " is allowed in tokens and in tab-delimited token annotations! + content = content.replace('"', """) + if sec_element != "": #open_tags[row][sec_element].append((attrib, content)) sec_element_checklist.append((element,sec_element,attrib,content,rowspan)) diff --git a/validate.py b/validate.py index 40816e0..97172df 100755 --- a/validate.py +++ b/validate.py @@ -132,7 +132,7 @@ def validate_doc_ether(doc_id, editor=False, dirty=True): ether_doc_name = "gd_" + doc_corpus + "_" + doc_name socialcalc = get_socialcalc(ether_url, ether_doc_name, doc_id=doc_id, dirty=dirty) - parsed_ether = parse_ether(socialcalc) + parsed_ether = parse_ether(socialcalc,doc_id=doc_id) report = '' cells = []