diff --git a/ckanext/validation/helpers.py b/ckanext/validation/helpers.py index ddb83c13..5252a4ef 100644 --- a/ckanext/validation/helpers.py +++ b/ckanext/validation/helpers.py @@ -5,6 +5,8 @@ from six import string_types from ckantoolkit import url_for, _, config, asbool, literal, h +from ckanext.validation.utils import get_default_schema + def _get_helpers(): validators = ( @@ -34,6 +36,9 @@ def get_validation_badge(resource, in_listing=False): if not resource.get('validation_status'): return '' + if not _get_schema_or_default_schema(resource): + return '' + statuses = { 'success': _('valid'), 'failure': _('failure'), @@ -67,6 +72,19 @@ def get_validation_badge(resource, in_listing=False): title=resource.get('validation_timestamp', '')) +def _get_schema_or_default_schema(resource): + + if asbool(resource.get('align_default_schema')): + schema = get_default_schema(resource['package_id']) + else: + schema = resource.get('schema') + + if schema and isinstance(schema, string_types): + schema = schema if is_url_valid(schema) else json.loads(schema) + + return schema + + def validation_extract_report_from_errors(errors): report = None diff --git a/ckanext/validation/logic/action.py b/ckanext/validation/logic/action.py index 84e3b598..818132e5 100644 --- a/ckanext/validation/logic/action.py +++ b/ckanext/validation/logic/action.py @@ -50,6 +50,13 @@ def resource_validation_run(context, data_dict): resource = tk.get_action(u'resource_show')(context, {u'id': resource_id}) + if not resource.get('schema'): + try: + tk.get_action(u'resource_validation_delete')(context, data_dict) + except tk.ObjectNotFound: + pass + return + # TODO: limit to sysadmins async_job = data_dict.get(u'async', True) diff --git a/ckanext/validation/tests/test_helpers.py b/ckanext/validation/tests/test_helpers.py index 5e92c0d6..2c50a453 100644 --- a/ckanext/validation/tests/test_helpers.py +++ b/ckanext/validation/tests/test_helpers.py @@ -6,6 +6,7 @@ from ckan.tests import factories import ckanext.validation.helpers as h +from ckanext.validation.tests.helpers import SCHEMA def _assert_validation_badge_status(resource, status): @@ -31,19 +32,29 @@ def test_get_validation_badge_no_validation(self, mock_is_validatable): assert h.get_validation_badge(resource) == '' - def test_get_validation_badge_success(self, mock_is_validatable): + def test_hide_validation_badge_no_schema(self, mock_is_validatable): resource = factories.Resource( format='CSV', validation_status='success', validation_timestamp=datetime.datetime.utcnow().isoformat()) + assert h.get_validation_badge(resource) == '' + + def test_get_validation_badge_success(self, mock_is_validatable): + resource = factories.Resource( + format='CSV', + validation_status='success', + validation_timestamp=datetime.datetime.utcnow().isoformat(), + schema=SCHEMA) + _assert_validation_badge_status(resource, 'success') def test_get_validation_badge_failure(self, mock_is_validatable): resource = factories.Resource( format='CSV', validation_status='failure', - validation_timestamp=datetime.datetime.utcnow().isoformat()) + validation_timestamp=datetime.datetime.utcnow().isoformat(), + schema=SCHEMA) _assert_validation_badge_status(resource, 'invalid') @@ -51,7 +62,8 @@ def test_get_validation_badge_error(self, mock_is_validatable): resource = factories.Resource( format='CSV', validation_status='error', - validation_timestamp=datetime.datetime.utcnow().isoformat()) + validation_timestamp=datetime.datetime.utcnow().isoformat(), + schema=SCHEMA) _assert_validation_badge_status(resource, 'error') @@ -59,6 +71,7 @@ def test_get_validation_badge_other(self, mock_is_validatable): resource = factories.Resource( format='CSV', validation_status='not-sure', + schema=SCHEMA, ) _assert_validation_badge_status(resource, 'unknown') diff --git a/ckanext/validation/tests/test_logic.py b/ckanext/validation/tests/test_logic.py index 1688c39d..44423af4 100644 --- a/ckanext/validation/tests/test_logic.py +++ b/ckanext/validation/tests/test_logic.py @@ -39,7 +39,7 @@ def test_resource_validation_run_not_exists(self): call_action('resource_validation_run', resource_id='not_exists') def test_resource_validation_wrong_format(self): - resource = factories.Resource(format='pdf') + resource = factories.Resource(format='pdf', schema=SCHEMA) with pytest.raises(tk.ValidationError) as err: call_action('resource_validation_run', resource_id=resource['id']) @@ -47,7 +47,7 @@ def test_resource_validation_wrong_format(self): assert 'Unsupported resource format' in err.value.error_dict['format'] def test_resource_validation_no_url_or_upload(self): - resource = factories.Resource(url='', format='csv') + resource = factories.Resource(url='', format='csv', schema=SCHEMA) with pytest.raises(tk.ValidationError) as err: call_action('resource_validation_run', resource_id=resource['id']) @@ -59,7 +59,7 @@ def test_resource_validation_no_url_or_upload(self): def test_resource_validation_with_url(self, mocked_responses): url = 'http://example.com' mocked_responses.add(responses.GET, url, body=VALID_CSV, stream=True) - resource = factories.Resource(url=url, format='csv') + resource = factories.Resource(url=url, format='csv', schema=SCHEMA) call_action('resource_validation_run', resource_id=resource['id']) @@ -90,7 +90,7 @@ def test_resource_validation_resets_existing_validation_object( url = 'https://some.url' mocked_responses.add(responses.GET, url, body=VALID_CSV, stream=True) - resource = {'format': 'csv', 'url': url} + resource = {'format': 'csv', 'url': url, 'schema': SCHEMA} dataset = factories.Dataset(resources=[resource]) @@ -252,6 +252,41 @@ def test_validation_fails_no_validation_object_stored(self): assert not Session.query(Validation).count() + def test_validation_skips_no_schema_provided(self): + """If the schema is missed - no validation entity should be saved in database""" + dataset = factories.Dataset() + + mock_upload = MockFileStorage(io.BytesIO(VALID_CSV), 'valid.csv') + + call_action('resource_create', + package_id=dataset['id'], + format='csv', + upload=mock_upload, + url_type='upload') + + assert not Session.query(Validation).count() + + def test_validation_report_delete_when_schema_removed(self): + """If the schema is deleted - no validation entity should be saved in database""" + dataset = factories.Dataset() + + mock_upload = MockFileStorage(io.BytesIO(VALID_CSV), 'valid.csv') + + resource_1 = call_action('resource_create', + package_id=dataset['id'], + format='csv', + upload=mock_upload, + url_type='upload', + schema=SCHEMA) + + assert Session.query(Validation).count() + + call_action('resource_patch', + id=resource_1['id'], + schema='') + + assert not Session.query(Validation).count() + def test_validation_passes_on_upload(self): dataset = factories.Dataset() @@ -522,14 +557,14 @@ def test_run_sysadmin(self): def test_run_non_auth_user(self): user = factories.User() org = factories.Organization() - dataset = factories.Dataset(owner_org=org['id'], - resources=[factories.Resource()]) + dataset = factories.Dataset(owner_org=org['id']) + resource = factories.Resource(package_id=dataset["id"]) context = {'user': user['name'], 'model': model} with pytest.raises(tk.NotAuthorized): call_auth('resource_validation_run', context=context, - resource_id=dataset['resources'][0]['id']) + resource_id=resource['id']) @pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False) @@ -539,13 +574,13 @@ def test_run_auth_user(self): 'name': user['name'], 'capacity': 'editor' }]) - dataset = factories.Dataset(owner_org=org['id'], - resources=[factories.Resource()]) + dataset = factories.Dataset(owner_org=org['id']) + resource = factories.Resource(package_id=dataset["id"]) context = {'user': user['name'], 'model': model} assert call_auth('resource_validation_run', context=context, - resource_id=dataset['resources'][0]['id']) + resource_id=resource['id']) @pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False) @@ -574,14 +609,15 @@ def test_delete_sysadmin(self): def test_delete_non_auth_user(self): user = factories.User() org = factories.Organization() - dataset = factories.Dataset(owner_org=org['id'], - resources=[factories.Resource()]) + dataset = factories.Dataset(owner_org=org['id']) + resource = factories.Resource(package_id=dataset["id"]) + context = {'user': user['name'], 'model': model} with pytest.raises(tk.NotAuthorized): call_auth('resource_validation_delete', context=context, - resource_id=dataset['resources'][0]['id']) + resource_id=resource['id']) @pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False) @@ -591,13 +627,13 @@ def test_delete_auth_user(self): 'name': user['name'], 'capacity': 'editor' }]) - dataset = factories.Dataset(owner_org=org['id'], - resources=[factories.Resource()]) + dataset = factories.Dataset(owner_org=org['id']) + resource = factories.Resource(package_id=dataset["id"]) context = {'user': user['name'], 'model': model} assert call_auth('resource_validation_delete', context=context, - resource_id=dataset['resources'][0]['id']) + resource_id=resource['id']) @pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False) @@ -615,13 +651,13 @@ def test_show_anon_public_dataset(self): user = factories.User() org = factories.Organization() dataset = factories.Dataset(owner_org=org['id'], - resources=[factories.Resource()], private=False) + resource = factories.Resource(package_id=dataset["id"]) context = {'user': user['name'], 'model': model} assert call_auth('resource_validation_show', context=context, - resource_id=dataset['resources'][0]['id']) + resource_id=resource['id']) @pytest.mark.ckan_config("ckanext.validation.run_on_create_sync", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_sync", False) @@ -629,11 +665,11 @@ def test_show_anon_private_dataset(self): user = factories.User() org = factories.Organization() dataset = factories.Dataset(owner_org=org['id'], - resources=[factories.Resource()], private=True) + resource = factories.Resource(package_id=dataset["id"]) context = {'user': user['name'], 'model': model} with pytest.raises(tk.NotAuthorized): call_auth('resource_validation_run', context=context, - resource_id=dataset['resources'][0]['id']) + resource_id=resource['id']) diff --git a/ckanext/validation/utils.py b/ckanext/validation/utils.py index 97c43c65..0d75c8d9 100644 --- a/ckanext/validation/utils.py +++ b/ckanext/validation/utils.py @@ -21,7 +21,6 @@ import ckanext.validation.settings as s from ckanext.validation.interfaces import IDataValidation from ckanext.validation.validation_status_helper import ValidationStatusHelper, StatusTypes -from ckanext.validation.helpers import is_url_valid from ckanext.validation.validators import resource_schema_validator log = logging.getLogger(__name__) @@ -50,7 +49,7 @@ def process_schema_fields(data_dict): uploader._get_underlying_file(schema_upload).read()) elif schema_url: - if not is_url_valid(schema_url): + if not tk.h.is_url_valid(schema_url): raise tk.ValidationError({u'schema_url': ['Must be a valid URL']}) try: @@ -99,10 +98,24 @@ def run_sync_validation(resource_data): schema = resource_data.get('schema') if tk.asbool(resource_data.get('align_default_schema')): - schema = _get_default_schema(resource_data["package_id"]) + schema = get_default_schema(resource_data["package_id"]) if schema and isinstance(schema, string_types): - schema = schema if is_url_valid(schema) else json.loads(schema) + schema = schema if tk.h.is_url_valid(schema) else json.loads(schema) + + if not schema: + resource_id = resource_data.get('id') + if not resource_id: + return + + context = {u'ignore_auth': True} + data_dict = {u'resource_id': resource_id} + + try: + tk.get_action(u'resource_validation_delete')(context, data_dict) + except tk.ObjectNotFound: + pass + return _format = resource_data.get('format', '').lower() options = get_resource_validation_options(resource_data) @@ -112,7 +125,7 @@ def run_sync_validation(resource_data): if is_uploaded_file(new_file): source = _get_new_file_stream(new_file) else: - if is_url_valid(resource_data['url']): + if tk.h.is_url_valid(resource_data['url']): source = resource_data['url'] else: source = _get_uploaded_resource_path(resource_data) @@ -226,7 +239,7 @@ def is_resource_could_be_validated(context, data_dict): return False -def _get_default_schema(package_id): +def get_default_schema(package_id): """Dataset could have a default_schema, that could be used to validate resource""" diff --git a/setup.py b/setup.py index 3029939e..aa28a505 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # http://packaging.python.org/en/latest/tutorial.html#version - version='v0.0.8-qgov.10', + version='0.0.8', description='Data description and validation for CKAN with Frictionless Data tools.',