Skip to content

Commit

Permalink
Merge pull request #121 from fecgov/release/sprint-15
Browse files Browse the repository at this point in the history
Release/sprint 15
  • Loading branch information
mjtravers authored Oct 31, 2022
2 parents d27b6bd + f1be69c commit 949dc97
Show file tree
Hide file tree
Showing 340 changed files with 2,844 additions and 14,913 deletions.
143 changes: 86 additions & 57 deletions bin/generate-starter-schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,36 @@
import os
import re

parser = argparse.ArgumentParser(description='Convert the FEC validation Excel'
' spreadsheet into JSON schema documents.')
parser.add_argument('excel_filename', help='an excel filename that will be'
' parsed to generate JSON schema docs')
parser.add_argument('--sheets-to-generate', help='a json file containing an'
' array of sheet names to be parsed from the excel file')
parser.add_argument('--version')
JSON_EXT = ".json"

parser = argparse.ArgumentParser(
description="Convert the FEC validation Excel"
" spreadsheet into JSON schema documents."
)
parser.add_argument(
"excel_filename",
help="an excel filename that will be parsed to generate JSON schema docs",
)
parser.add_argument(
"--sheets-to-generate",
help="a json file containing an"
" array of sheet names to be parsed from the excel file",
)
parser.add_argument("--version")
args = parser.parse_args()
EXCEL_FILENAME = args.excel_filename or \
"Form_3X_Receipts_Vendor_10.20.2020.xlsx"
SCHEMA_ID_PREFIX = ("https://github.com/fecgov/fecfile-validate/blob/"
"main/schema")
EXCEL_FILENAME = args.excel_filename or "Form_3X_Receipts_Vendor_10.20.2020.xlsx"
SCHEMA_ID_PREFIX = "https://github.com/fecgov/fecfile-validate/blob/main/schema"
VERSION = args.version or "v0.0.0.0"
SHEETS_TO_SKIP = ['All receipts', 'Version 8.3', 'SUMMARY OF CHANGES',
"All Schedule A Transactions", "ScheduleC", "Schedule C1",
"Scedule C2"]
SHEETS_TO_SKIP = [
"All receipts",
"Version 8.3",
"SUMMARY OF CHANGES",
"All Schedule A Transactions",
"ScheduleC",
"Schedule C1",
"Scedule C2",
"All disbursements",
]

# Column postions of fields in the spreadsheet row array

Expand All @@ -53,12 +67,12 @@ class Columns(Enum):
FIELD_FORM_ASSOCIATION = 8

def get(self, row, has_autopopulate):
index = self.value if has_autopopulate or \
self.value <= 3 else self.value - 1
index = self.value if has_autopopulate or self.value <= 3 else self.value - 1
value = row[index] if index < len(row) else None
return value.strip() if isinstance(value, str) else value

def convert_row_to_property(row, sheet_has_autopopulate):# noqa

def convert_row_to_property(row, sheet_has_autopopulate): # noqa
"""Take a row from the spreadsheet and convert it into a schema object.
Args:
Expand All @@ -75,33 +89,40 @@ def convert_row_to_property(row, sheet_has_autopopulate):# noqa
if col != Columns.AUTO_POPULATE or sheet_has_autopopulate:
spec[col.name] = col.get(row, sheet_has_autopopulate)

title = spec.get(Columns.FIELD_DESCRIPTION.name)
title = str(spec.get(Columns.FIELD_DESCRIPTION.name))
field_type = spec.get(Columns.TYPE.name)
required = spec.get(Columns.REQUIRED.name)
sample_data = spec.get(Columns.SAMPLE_DATA.name)
rule_ref = spec.get(Columns.RULE_REFERENCE.name)

token = title.replace("\n", "_").replace(" ", "_").replace(".", "")\
.replace("(", "").replace(")", "").replace("/", "_")\
.replace("__", "_").lower()
token = (
title.replace("\n", "_")
.replace(" ", "_")
.replace(".", "")
.replace("(", "")
.replace(")", "")
.replace("/", "_")
.replace("__", "_")
.lower()
)
# Prepend tokens that start with a number (presumed to be a line number)
# with capital letter "L".
if token[0].isdigit():
token = 'L' + token
token = "L" + token
prop["title"] = title
prop["description"] = ""

if field_type.startswith("AMT-"):
prop["type"] = "number"
prop["minimum"] = 0
prop["maximum"] = int('9' * int(field_type.split('-')[1]))
prop["maximum"] = int("9" * int(field_type.split("-")[1]))

if field_type.startswith("NUM-") or field_type.startswith("N-"):
length = field_type.split('-')[1].strip()
length = field_type.split("-")[1].strip()
prop["type"] = "string"
prop["minLength"] = 0
prop["maxLength"] = int(length)
prop["pattern"] = rf'^\d{{0,{length}}}$'
prop["pattern"] = rf"^\d{{0,{length}}}$"

if field_type == "Dropdown":
prop["type"] = "string"
Expand All @@ -112,11 +133,11 @@ def convert_row_to_property(row, sheet_has_autopopulate):# noqa
if field_type == "A-1" and rule_ref == "Check-box":
prop["type"] = "boolean"
else:
length = field_type.split('-')[1].strip()
length = field_type.split("-")[1].strip()
prop["type"] = "string"
prop["minLength"] = 0
prop["maxLength"] = int(length)
prop["pattern"] = f'^[ A-Za-z0-9]{{0,{length}}}$'
prop["pattern"] = f"^[ -~]{0,9}$"

if sample_data:
prop["examples"] = [sample_data]
Expand All @@ -131,51 +152,59 @@ def convert_row_to_property(row, sheet_has_autopopulate):# noqa
wb = openpyxl.load_workbook(EXCEL_FILENAME)
sheets_to_generate = None
if args.sheets_to_generate is not None:
with open(os.path.join(os.getcwd(), args.sheets_to_generate), 'r') as f:
with open(os.path.join(os.getcwd(), args.sheets_to_generate), "r") as f:
sheets_to_generate = json.load(f)

print(sheets_to_generate)
trans_type_hits = {}
for ws in wb.worksheets:
if ((sheets_to_generate is not None and ws.title not in sheets_to_generate)
or ws.title in SHEETS_TO_SKIP):
if (
sheets_to_generate is not None and ws.title not in sheets_to_generate
) or ws.title in SHEETS_TO_SKIP:
continue

print(ws.title)
title = ws.title.replace(' ', '')
output_file = title + ".json"

print(f'Parsing {output_file}...')
title = ws.title.replace(" ", "")
output_file = title + JSON_EXT

print(f"Parsing {output_file}...")

sheet_has_autopopulate = ws.cell(3, 5).value is not None and \
ws.cell(3, 5).value.strip() == 'Auto populate'
sheet_has_autopopulate = (
ws.cell(3, 5).value is not None
and ws.cell(3, 5).value.strip() == "Auto populate"
)
schema_properties = {}
required_rows = []
recommended_rows = []
for row in ws.iter_rows(min_row=5, max_col=8, values_only=True):
if (not Columns.COL_SEQ.get(row, sheet_has_autopopulate)
or Columns.COL_SEQ.get(row, sheet_has_autopopulate) == "--"
or not Columns.FIELD_DESCRIPTION.get(row,
sheet_has_autopopulate)
or not Columns.TYPE.get(row, sheet_has_autopopulate)
or len(row) > 10):
if (
not Columns.COL_SEQ.get(row, sheet_has_autopopulate)
or Columns.COL_SEQ.get(row, sheet_has_autopopulate) == "--"
or not Columns.FIELD_DESCRIPTION.get(row, sheet_has_autopopulate)
or not Columns.TYPE.get(row, sheet_has_autopopulate)
or len(row) > 10
):
continue
token, prop, is_required, is_recommended = \
convert_row_to_property(row, sheet_has_autopopulate)
token, prop, is_required, is_recommended = convert_row_to_property(
row, sheet_has_autopopulate
)
if token == "transaction_type_identifier":
trans_type_id = \
prop.get('fec_spec', {}).get(Columns.SAMPLE_DATA.name,
"") or ""
trans_type_hits[trans_type_id] = \
(trans_type_hits.get(trans_type_id) or 0) + 1
if (trans_type_hits[trans_type_id] > 1 or trans_type_id == ''):
output_file = trans_type_id + '-' + \
str(trans_type_hits[trans_type_id]) + '.json'
trans_type_id = (
prop.get("fec_spec", {}).get(Columns.SAMPLE_DATA.name, "") or ""
)
trans_type_hits[trans_type_id] = (
trans_type_hits.get(trans_type_id) or 0
) + 1
if trans_type_hits[trans_type_id] > 1 or trans_type_id == "":
output_file = (
trans_type_id + "-" + str(trans_type_hits[trans_type_id]) + JSON_EXT
)
else:
output_file = trans_type_id + '.json'
output_file = trans_type_id + JSON_EXT
# Catch and mark token (i.e. spec property) clashes for manual fixing.
if token in schema_properties:
token = token + '-DUPLICATE'
token = token + "-DUPLICATE"

if is_required:
required_rows.append(token)
Expand All @@ -185,17 +214,17 @@ def convert_row_to_property(row, sheet_has_autopopulate):# noqa

schema = {
"$schema": "https://json-schema.org/draft-07/schema#",
"$id": f'{SCHEMA_ID_PREFIX}/{output_file}',
"$id": f"{SCHEMA_ID_PREFIX}/{output_file}",
"version": VERSION,
"title": f'FEC {ws.title}',
"title": f"FEC {ws.title}",
"description": ws.cell(1, 1).value,
"type": "object",
"required": required_rows,
"fec_recommended": recommended_rows,
"properties": schema_properties,
"additionalProperties": False
"additionalProperties": False,
}
f = open(output_file, "w")
f.write(json.dumps(schema, indent=4))
f.close()
print('Done')
print("Done")
Loading

0 comments on commit 949dc97

Please sign in to comment.