Skip to content

Commit

Permalink
Merge pull request ckan#46 from DataShades/CHDEXCHAE-57
Browse files Browse the repository at this point in the history
CHDEXCHAE-57 / check if there is a schema before validation
  • Loading branch information
duttonw authored Jul 20, 2023
2 parents dafb9a5 + 81044aa commit 05f7d83
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 30 deletions.
18 changes: 18 additions & 0 deletions ckanext/validation/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from six import string_types
from ckantoolkit import url_for, _, config, asbool, literal, h

from ckanext.validation.utils import get_default_schema


def _get_helpers():
validators = (
Expand Down Expand Up @@ -34,6 +36,9 @@ def get_validation_badge(resource, in_listing=False):
if not resource.get('validation_status'):
return ''

if not _get_schema_or_default_schema(resource):
return ''

statuses = {
'success': _('valid'),
'failure': _('failure'),
Expand Down Expand Up @@ -67,6 +72,19 @@ def get_validation_badge(resource, in_listing=False):
title=resource.get('validation_timestamp', ''))


def _get_schema_or_default_schema(resource):

if asbool(resource.get('align_default_schema')):
schema = get_default_schema(resource['package_id'])
else:
schema = resource.get('schema')

if schema and isinstance(schema, string_types):
schema = schema if is_url_valid(schema) else json.loads(schema)

return schema


def validation_extract_report_from_errors(errors):

report = None
Expand Down
7 changes: 7 additions & 0 deletions ckanext/validation/logic/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ def resource_validation_run(context, data_dict):

resource = tk.get_action(u'resource_show')(context, {u'id': resource_id})

if not resource.get('schema'):
try:
tk.get_action(u'resource_validation_delete')(context, data_dict)
except tk.ObjectNotFound:
pass
return

# TODO: limit to sysadmins
async_job = data_dict.get(u'async', True)

Expand Down
19 changes: 16 additions & 3 deletions ckanext/validation/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ckan.tests import factories

import ckanext.validation.helpers as h
from ckanext.validation.tests.helpers import SCHEMA


def _assert_validation_badge_status(resource, status):
Expand All @@ -31,34 +32,46 @@ def test_get_validation_badge_no_validation(self, mock_is_validatable):

assert h.get_validation_badge(resource) == ''

def test_get_validation_badge_success(self, mock_is_validatable):
def test_hide_validation_badge_no_schema(self, mock_is_validatable):
resource = factories.Resource(
format='CSV',
validation_status='success',
validation_timestamp=datetime.datetime.utcnow().isoformat())

assert h.get_validation_badge(resource) == ''

def test_get_validation_badge_success(self, mock_is_validatable):
resource = factories.Resource(
format='CSV',
validation_status='success',
validation_timestamp=datetime.datetime.utcnow().isoformat(),
schema=SCHEMA)

_assert_validation_badge_status(resource, 'success')

def test_get_validation_badge_failure(self, mock_is_validatable):
resource = factories.Resource(
format='CSV',
validation_status='failure',
validation_timestamp=datetime.datetime.utcnow().isoformat())
validation_timestamp=datetime.datetime.utcnow().isoformat(),
schema=SCHEMA)

_assert_validation_badge_status(resource, 'invalid')

def test_get_validation_badge_error(self, mock_is_validatable):
resource = factories.Resource(
format='CSV',
validation_status='error',
validation_timestamp=datetime.datetime.utcnow().isoformat())
validation_timestamp=datetime.datetime.utcnow().isoformat(),
schema=SCHEMA)

_assert_validation_badge_status(resource, 'error')

def test_get_validation_badge_other(self, mock_is_validatable):
resource = factories.Resource(
format='CSV',
validation_status='not-sure',
schema=SCHEMA,
)

_assert_validation_badge_status(resource, 'unknown')
Expand Down
76 changes: 56 additions & 20 deletions ckanext/validation/tests/test_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ def test_resource_validation_run_not_exists(self):
call_action('resource_validation_run', resource_id='not_exists')

def test_resource_validation_wrong_format(self):
resource = factories.Resource(format='pdf')
resource = factories.Resource(format='pdf', schema=SCHEMA)

with pytest.raises(tk.ValidationError) as err:
call_action('resource_validation_run', resource_id=resource['id'])

assert 'Unsupported resource format' in err.value.error_dict['format']

def test_resource_validation_no_url_or_upload(self):
resource = factories.Resource(url='', format='csv')
resource = factories.Resource(url='', format='csv', schema=SCHEMA)

with pytest.raises(tk.ValidationError) as err:
call_action('resource_validation_run', resource_id=resource['id'])
Expand All @@ -59,7 +59,7 @@ def test_resource_validation_no_url_or_upload(self):
def test_resource_validation_with_url(self, mocked_responses):
url = 'http://example.com'
mocked_responses.add(responses.GET, url, body=VALID_CSV, stream=True)
resource = factories.Resource(url=url, format='csv')
resource = factories.Resource(url=url, format='csv', schema=SCHEMA)

call_action('resource_validation_run', resource_id=resource['id'])

Expand Down Expand Up @@ -90,7 +90,7 @@ def test_resource_validation_resets_existing_validation_object(
url = 'https://some.url'
mocked_responses.add(responses.GET, url, body=VALID_CSV, stream=True)

resource = {'format': 'csv', 'url': url}
resource = {'format': 'csv', 'url': url, 'schema': SCHEMA}

dataset = factories.Dataset(resources=[resource])

Expand Down Expand Up @@ -252,6 +252,41 @@ def test_validation_fails_no_validation_object_stored(self):

assert not Session.query(Validation).count()

def test_validation_skips_no_schema_provided(self):
"""If the schema is missed - no validation entity should be saved in database"""
dataset = factories.Dataset()

mock_upload = MockFileStorage(io.BytesIO(VALID_CSV), 'valid.csv')

call_action('resource_create',
package_id=dataset['id'],
format='csv',
upload=mock_upload,
url_type='upload')

assert not Session.query(Validation).count()

def test_validation_report_delete_when_schema_removed(self):
"""If the schema is deleted - no validation entity should be saved in database"""
dataset = factories.Dataset()

mock_upload = MockFileStorage(io.BytesIO(VALID_CSV), 'valid.csv')

resource_1 = call_action('resource_create',
package_id=dataset['id'],
format='csv',
upload=mock_upload,
url_type='upload',
schema=SCHEMA)

assert Session.query(Validation).count()

call_action('resource_patch',
id=resource_1['id'],
schema='')

assert not Session.query(Validation).count()

def test_validation_passes_on_upload(self):
dataset = factories.Dataset()

Expand Down Expand Up @@ -522,14 +557,14 @@ def test_run_sysadmin(self):
def test_run_non_auth_user(self):
user = factories.User()
org = factories.Organization()
dataset = factories.Dataset(owner_org=org['id'],
resources=[factories.Resource()])
dataset = factories.Dataset(owner_org=org['id'])
resource = factories.Resource(package_id=dataset["id"])
context = {'user': user['name'], 'model': model}

with pytest.raises(tk.NotAuthorized):
call_auth('resource_validation_run',
context=context,
resource_id=dataset['resources'][0]['id'])
resource_id=resource['id'])

@pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False)
@pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False)
Expand All @@ -539,13 +574,13 @@ def test_run_auth_user(self):
'name': user['name'],
'capacity': 'editor'
}])
dataset = factories.Dataset(owner_org=org['id'],
resources=[factories.Resource()])
dataset = factories.Dataset(owner_org=org['id'])
resource = factories.Resource(package_id=dataset["id"])
context = {'user': user['name'], 'model': model}

assert call_auth('resource_validation_run',
context=context,
resource_id=dataset['resources'][0]['id'])
resource_id=resource['id'])

@pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False)
@pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False)
Expand Down Expand Up @@ -574,14 +609,15 @@ def test_delete_sysadmin(self):
def test_delete_non_auth_user(self):
user = factories.User()
org = factories.Organization()
dataset = factories.Dataset(owner_org=org['id'],
resources=[factories.Resource()])
dataset = factories.Dataset(owner_org=org['id'])
resource = factories.Resource(package_id=dataset["id"])

context = {'user': user['name'], 'model': model}

with pytest.raises(tk.NotAuthorized):
call_auth('resource_validation_delete',
context=context,
resource_id=dataset['resources'][0]['id'])
resource_id=resource['id'])

@pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False)
@pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False)
Expand All @@ -591,13 +627,13 @@ def test_delete_auth_user(self):
'name': user['name'],
'capacity': 'editor'
}])
dataset = factories.Dataset(owner_org=org['id'],
resources=[factories.Resource()])
dataset = factories.Dataset(owner_org=org['id'])
resource = factories.Resource(package_id=dataset["id"])
context = {'user': user['name'], 'model': model}

assert call_auth('resource_validation_delete',
context=context,
resource_id=dataset['resources'][0]['id'])
resource_id=resource['id'])

@pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False)
@pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False)
Expand All @@ -615,25 +651,25 @@ def test_show_anon_public_dataset(self):
user = factories.User()
org = factories.Organization()
dataset = factories.Dataset(owner_org=org['id'],
resources=[factories.Resource()],
private=False)
resource = factories.Resource(package_id=dataset["id"])
context = {'user': user['name'], 'model': model}

assert call_auth('resource_validation_show',
context=context,
resource_id=dataset['resources'][0]['id'])
resource_id=resource['id'])

@pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False)
@pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False)
def test_show_anon_private_dataset(self):
user = factories.User()
org = factories.Organization()
dataset = factories.Dataset(owner_org=org['id'],
resources=[factories.Resource()],
private=True)
resource = factories.Resource(package_id=dataset["id"])
context = {'user': user['name'], 'model': model}

with pytest.raises(tk.NotAuthorized):
call_auth('resource_validation_run',
context=context,
resource_id=dataset['resources'][0]['id'])
resource_id=resource['id'])
25 changes: 19 additions & 6 deletions ckanext/validation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import ckanext.validation.settings as s
from ckanext.validation.interfaces import IDataValidation
from ckanext.validation.validation_status_helper import ValidationStatusHelper, StatusTypes
from ckanext.validation.helpers import is_url_valid
from ckanext.validation.validators import resource_schema_validator

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -50,7 +49,7 @@ def process_schema_fields(data_dict):
uploader._get_underlying_file(schema_upload).read())

elif schema_url:
if not is_url_valid(schema_url):
if not tk.h.is_url_valid(schema_url):
raise tk.ValidationError({u'schema_url': ['Must be a valid URL']})

try:
Expand Down Expand Up @@ -99,10 +98,24 @@ def run_sync_validation(resource_data):
schema = resource_data.get('schema')

if tk.asbool(resource_data.get('align_default_schema')):
schema = _get_default_schema(resource_data["package_id"])
schema = get_default_schema(resource_data["package_id"])

if schema and isinstance(schema, string_types):
schema = schema if is_url_valid(schema) else json.loads(schema)
schema = schema if tk.h.is_url_valid(schema) else json.loads(schema)

if not schema:
resource_id = resource_data.get('id')
if not resource_id:
return

context = {u'ignore_auth': True}
data_dict = {u'resource_id': resource_id}

try:
tk.get_action(u'resource_validation_delete')(context, data_dict)
except tk.ObjectNotFound:
pass
return

_format = resource_data.get('format', '').lower()
options = get_resource_validation_options(resource_data)
Expand All @@ -112,7 +125,7 @@ def run_sync_validation(resource_data):
if is_uploaded_file(new_file):
source = _get_new_file_stream(new_file)
else:
if is_url_valid(resource_data['url']):
if tk.h.is_url_valid(resource_data['url']):
source = resource_data['url']
else:
source = _get_uploaded_resource_path(resource_data)
Expand Down Expand Up @@ -226,7 +239,7 @@ def is_resource_could_be_validated(context, data_dict):
return False


def _get_default_schema(package_id):
def get_default_schema(package_id):
"""Dataset could have a default_schema, that could be used
to validate resource"""

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# http://packaging.python.org/en/latest/tutorial.html#version
version='v0.0.8-qgov.10',
version='0.0.8',

description='Data description and validation for CKAN with Frictionless Data tools.',

Expand Down

0 comments on commit 05f7d83

Please sign in to comment.