Skip to content

Commit

Permalink
Merge pull request #32 from Open-Telecoms-Data/2022-11-09-validation-…
Browse files Browse the repository at this point in the history
…table

jsonschema_validation: Group errors by type
  • Loading branch information
James (ODSC) authored Nov 10, 2022
2 parents 84b7677 + 5ef73af commit b249d47
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 69 deletions.
76 changes: 76 additions & 0 deletions cove_ofds/jsonschema_validation_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# This function may be a candidate to move to libcoveofds?
# It could do with some testing, wherever it ends up
def add_type_to_json_schema_validation_error(data: dict) -> dict:

if data["validator"] == "prefixItems":
data["cove_type"] = "PrefixItems"

elif data["validator"] == "const":
data["cove_type"] = "Valuedoesnotmatchconstant"

elif data["validator"] == "minItems":
data["cove_type"] = "Emptyarray"

elif data["validator"] == "uniqueItems":
data["cove_type"] = "Nonuniqueitems"

# these 2 pattern checks are brittle
# using instance is not a great choice as that may easily change if the schema changes. TODO
elif data["validator"] == "pattern" and data["instance"] in [
"properties",
"features",
]:
data["cove_type"] = "Fieldnamedoesnotmatchpattern"

elif data["validator"] == "pattern" and data["instance"] == "describedby":
data["cove_type"] = "Valuedoesnotmatchpattern"

elif data["validator"] == "minLength":
data["cove_type"] = "Emptystring"

elif data["validator"] == "enum":
data["cove_type"] = "Valuedoesnotmatchanycodes."

elif data["validator"] == "type" and data["validator_value"] == "boolean":
data["cove_type"] = "Valueisnotaboolean"

elif data["validator"] == "type" and data["validator_value"] == "integer":
data["cove_type"] = "Valueisnotaninteger"

elif data["validator"] == "type" and data["validator_value"] == "number":
data["cove_type"] = "Valueisnotanumber"

elif data["validator"] == "type" and data["validator_value"] == "string":
data["cove_type"] = "Valueisnotastring"

elif data["validator"] == "type" and data["validator_value"] == "object":
data["cove_type"] = "Valueisnotanobject"

elif data["validator"] == "type" and data["validator_value"] == "array":
data["cove_type"] = "Valueisnotanarray"

elif data["validator"] == "required":
data["cove_type"] = "Missingrequiredfields"

elif data["validator"] == "minProperties":
data["cove_type"] = "Emptyobject"

elif data["validator"] == "format" and data["validator_value"] == "date":
data["cove_type"] = "Incorrectlyformatteddate"

elif data["validator"] == "format" and data["validator_value"] == "iri":
data["cove_type"] = "Incorrectlyformattediri"

elif data["validator"] == "format" and data["validator_value"] == "uri":
data["cove_type"] = "Incorrectlyformatteduri"

elif data["validator"] == "format" and data["validator_value"] == "uuid":
data["cove_type"] = "Incorrectlyformatteduuid"

else:
data["cove_type"] = "unknown"

# TODO this should be in libcoveofds
data["path_no_num"] = tuple(key for key in data["path"] if isinstance(key, str))

return data
31 changes: 25 additions & 6 deletions cove_ofds/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from libcoveofds.python_validate import PythonValidate
from libcoveofds.schema import OFDSSchema

import cove_ofds.jsonschema_validation_errors
from libcoveweb2.models import SuppliedDataFile
from libcoveweb2.process import ProcessDataTask

Expand Down Expand Up @@ -278,6 +279,9 @@ def get_context(self):
context["download_geojson_spans_url"] = os.path.join(
self.supplied_data.data_url(), "spans.geo.json"
)
context["download_geojson_meta_url"] = os.path.join(
self.supplied_data.data_url(), "geojson.meta.json"
)
context["download_geojson_nodes_size"] = os.stat(
self.nodes_file_name
).st_size
Expand Down Expand Up @@ -541,14 +545,29 @@ def process(self, process_data: dict) -> dict:
schema = OFDSSchema()
worker = JSONSchemaValidator(schema)

context = {"validation_errors": worker.validate(data)}
context["validation_errors"] = [i.json() for i in context["validation_errors"]]
context["validation_errors_count"] = len(context["validation_errors"])
context["validation_errors"] = group_data_list_by(
context["validation_errors"],
lambda i: str(i["path"]) + i["validator"] + i["message"],
# Get list of validation errors
validation_errors = worker.validate(data)
validation_errors = [i.json() for i in validation_errors]

# Add type to each
validation_errors = [
cove_ofds.jsonschema_validation_errors.add_type_to_json_schema_validation_error(
i
)
for i in validation_errors
]

# Add count
context = {"validation_errors_count": len(validation_errors)}

# group by type
validation_errors = group_data_list_by(
validation_errors, lambda i: str(i["cove_type"])
)

# and we are done
context["validation_errors"] = validation_errors

with open(self.data_filename, "w") as fp:
json.dump(context, fp, indent=4)

Expand Down
2 changes: 1 addition & 1 deletion cove_ofds/templates/cove_ofds/explore.html
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ <h4 class="panel-title">
</div>
<div id="validationTable" class="collapse in panel-body">
<p>{% trans 'The structure and format of your data does not conform to the OFDS schema. You should check your mapping and data pipeline for errors. For more information, see the ' %}<a target="_blank" href="https://open-fibre-data-standard.readthedocs.io/en/0.1-dev/reference">{% trans 'reference documentation' %}</a>.</p>
{% include "cove_ofds/validation_table.html" %}
{% include "cove_ofds/jsonschema_validation_panel.html" %}
</div>
</div>
{% else %}
Expand Down
151 changes: 151 additions & 0 deletions cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
{% load i18n %}

{% if 'prefixItems' in validation_errors %}
<h4>{% trans 'prefixItems' %}</h4>
<p>DESCIRPTION</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.prefixItems %}
{% endif %}


{% if 'Valuedoesnotmatchconstant' in validation_errors %}
<h4>{% trans 'Value does not match constant' %}</h4>
<p>You must update each value to match the constant specified in the schema.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valuedoesnotmatchconstant %}
{% endif %}


{% if 'prefixItems' in validation_errors %}
<h4>{% trans 'Empty array' %}</h4>
<p>You must omit empty arrays from your data in their entirety (key and value).</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Emptyarray %}
{% endif %}


{% if 'Nonuniqueitems' in validation_errors %}
<h4>{% trans 'Non-unique items' %}</h4>
<p>You must ensure that the items in each array are unique.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Nonuniqueitems %}
{% endif %}


{% if 'Fieldnamedoesnotmatchpattern' in validation_errors %}
<h4>{% trans 'Field name does not match pattern' %}</h4>
<p>You must ensure that fields in `Node.location` and `Span.route` are not named 'properties' or 'nodes'.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Fieldnamedoesnotmatchpattern %}
{% endif %}

{% if 'Valuedoesnotmatchpattern' in validation_errors %}
<h4>{% trans 'Value does not match pattern' %}</h4>
<p>You must ensure that only the first item in the `links` array has `.rel` set to 'describedBy`.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Fieldnamedoesnotmatchpattern %}
{% endif %}


{% if 'Emptystring' in validation_errors %}
<h4>{% trans 'Empty string' %}</h4>
<p>You must omit empty strings from your data in their entirety (key and value).</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Emptystring %}
{% endif %}


{% if 'Valuedoesnotmatchanycodes' in validation_errors %}
<h4>{% trans 'Value does not match any codes.' %}</h4>
<p>You must update each value to match a code from the codelist specified in the schema.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valuedoesnotmatchanycodes %}
{% endif %}


{% if 'Valueisnotaboolean' in validation_errors %}
<h4>{% trans 'Value is not a boolean' %}</h4>
<p>You must ensure that each value is either `true` or `false`. You should check that values are not enclosed in qoute characters (`"`).</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotaboolean %}
{% endif %}


{% if 'Valueisnotaninteger' in validation_errors %}
<h4>{% trans 'Value is not an integer' %}</h4>
<p>
You must ensure that each value contains only digits (`0-9`) and, optionally, the dot character (`.`). Integer values must have either no fractional part (e.g. `1`) or a zero fractional part (e.g. `1.0`).
You should check that values are not enclosed in quote characters, e.g. `1` is an integer, but `"1"` is a string.
</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotaninteger %}
{% endif %}


{% if 'Valueisnotanumber' in validation_errors %}
<h4>{% trans 'Value is not a number' %}</h4>
<p>You must ensure that each value contains only digits (`0-9`) and, optionally, the dot character (`.`). You should check that values are not enclosed in quote characters, e.g. `1` is an integer, but `"1"` is a string.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotanumber %}
{% endif %}


{% if 'Valueisnotastring' in validation_errors %}
<h4>{% trans 'Value is not a string' %}</h4>
<p>You must ensure that each value begins and ends with the quote character (`"`) and that any quotes within the value are escaped with a backslash (`\`).</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotastring %}
{% endif %}


{% if 'Valueisnotanobject' in validation_errors %}
<h4>{% trans 'Value is not an object' %}</h4>
<p>You must ensure that each value is enclosed in curly braces (`{` and `}`) and contains only key/value pairs.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotanobject %}
{% endif %}


{% if 'Valueisnotanarray' in validation_errors %}
<h4>{% trans 'Value is not an array' %}</h4>
<p>You must ensure that each value is enclosed in square brackets (`[` and `]`).</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotanarray %}
{% endif %}


{% if 'Missingrequiredfields' in validation_errors %}
<h4>{% trans 'Missing required fields' %}</h4>
<p>You must ensure that your data contains the required fields specified in the schema.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Missingrequiredfields %}
{% endif %}


{% if 'Emptyobject' in validation_errors %}
<h4>{% trans 'Empty object' %}</h4>
<p>You must omit empty objects from your data in their entirety (key and value).</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Emptyobject %}
{% endif %}


{% if 'Incorrectlyformatteddate' in validation_errors %}
<h4>{% trans 'Incorrectly formatted date' %}</h4>
<p>You must ensure that each date is in `"YYYY-MM-DD"` format.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteddate %}
{% endif %}


{% if 'Incorrectlyformattediri' in validation_errors %}
<h4>{% trans 'Incorrectly formatted iri' %}</h4>
<p>You must ensure that each iri is formatted according to <a href="https://www.rfc-editor.org/rfc/rfc3987" target="_blank">RFC3897</a>.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformattediri %}
{% endif %}


{% if 'Incorrectlyformatteduri' in validation_errors %}
<h4>{% trans 'Incorrectly formatted uri' %}</h4>
<p>You must ensure that each uri is formatted according to <a href="https://www.rfc-editor.org/rfc/rfc3986" target="_blank">RFC3896</a>.</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteduri %}
{% endif %}


{% if 'Incorrectlyformatteduuid' in validation_errors %}
<h4>{% trans 'Incorrectly formatted uuid' %}</h4>
<p>
You must ensure that each uuid is formatted according to <a href="https://datatracker.ietf.org/doc/html/rfc4122" target="_blank">RFC4122</a>.
For more information, see <a href="https://open-fibre-data-standard.readthedocs.io/en/0.1-dev/guidance/publication.html#how-to-generate-universally-unique-identifiers" target="_blank">how to generate universally unique identifiers</a>.
</p>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteduuid %}
{% endif %}


{% if 'unknown' in validation_errors %}
<h4>{% trans 'unknown' %}</h4>
{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.unknown %}
{% endif %}
39 changes: 39 additions & 0 deletions cove_ofds/templates/cove_ofds/jsonschema_validation_table.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{% load i18n %}
{% load cove_tags %}

<table class="table table-condensed">
<thead>
<tr>
<th>{% trans 'Identifiers' %}</th>
<th>{% trans 'Path' %}</th>
<th>{% trans 'Value' %}</Th>
</tr>
</thead>
<tbody>
{% for error in validation_errors_for_table %}
<tr>
<td>
{% if error.data_ids.network_id or error.data_ids.span_id or error.data_ids.node_id %}
{% if error.data_ids.network_id %}
<div>{% trans 'Network' %}: {{ error.data_ids.network_id }}</div>
{% endif %}
{% if error.data_ids.span_id %}
<div>{% trans 'Span' %}: {{ error.data_ids.span_id }}</div>
{% endif %}
{% if error.data_ids.node_id %}
<div>{% trans 'Node' %}: {{ error.data_ids.node_id }}</div>
{% endif %}
{% else %}
N/A
{% endif %}
</td>
<td>
/{{ error.path|join:"/" }}
</td>
<td>
{{ error.instance }}
</td>
</tr>
{% endfor %}
</tbody>
</table>
62 changes: 0 additions & 62 deletions cove_ofds/templates/cove_ofds/validation_table.html

This file was deleted.

0 comments on commit b249d47

Please sign in to comment.