Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resource Format Auto Detect #1369

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion ckanext/canada/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from ckan.controllers.package import PackageController
from ckan.logic import parse_params, NotFound

from ckanext.canada.helpers import normalize_strip_accents, canada_date_str_to_datetime
from ckanext.canada.helpers import normalize_strip_accents, canada_date_str_to_datetime, canada_guess_mimetype
from ckanext.canada.urlsafe import url_part_escape, url_part_unescape
from pylons.i18n import _
from pylons import config, session
Expand Down Expand Up @@ -738,6 +738,31 @@ def action(self, logic_function, ver=None):
return self._finish_ok(return_dict)


def guess_resource_format(self, ver=None):
try:
request_data = self._get_request_data(try_url_params=False)
except ValueError as e:
log.info('Bad API request data: %s', e)
return self._finish_bad_request(_('JSON Error: %s') % e)
if not isinstance(request_data, dict):
# this occurs if request_data is blank
log.info('Bad API request data - not dict: %r', request_data)
return self._finish_bad_request(_('Bad request data: %s') %
'Request data JSON decoded to %r but '
'it needs to be a dictionary.' % request_data)
if not request_data.get('url', None):
log.info('Bad API request data - missing url key: %r', request_data)
return self._finish_bad_request(_('Bad request data: %s') %
'Request data %r missing url key.' % request_data)
mimetype = canada_guess_mimetype(request_data.get('url', None))
return_dict = {
'success': True,
'result': mimetype
}
#TODO: convert mimetype to scheming value?
return self._finish_ok(return_dict)


def _log_api_access(context, data_dict):
if 'package' not in context:
if 'resource_id' not in data_dict:
Expand Down
22 changes: 22 additions & 0 deletions ckanext/canada/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import geomet.wkt as wkt
import json as json
from markupsafe import Markup, escape
from six.moves.urllib.parse import urlparse
import mimetypes

ORG_MAY_PUBLISH_OPTION = 'canada.publish_datasets_organization_name'
ORG_MAY_PUBLISH_DEFAULT_NAME = 'tb-ct'
Expand Down Expand Up @@ -545,3 +547,23 @@ def get_user_email(user_id):

except NotFound as e:
return ""


def canada_guess_mimetype(url):
"""
Returns mimetype based on url.
"""
mimetype, encoding = mimetypes.guess_type(url)
if mimetype:
return mimetype
else:
# if we cannot guess the mimetype, check if
# it is an actual web address
# and we can set the mimetype to text/html.
# Uploaded files have only the filename as url,
# so check scheme to determine if it's
# an actual web address
parsed = urlparse(url)
if parsed.scheme:
return 'text/html'
return None
21 changes: 21 additions & 0 deletions ckanext/canada/internal/static/registry_resource_edit.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
window.addEventListener('load', function(){
$(document).ready(function() {

let formatField = $('#field-format');
if ( formatField.length > 0 ){

$(formatField).select2({});
$(formatField).parent()
.children('#s2id_field-format')
.addClass('conrtol-medium')
.removeClass('form-control')
.css({'display': 'block'});

}

let urlField = $('#field-image-url');
let uploadField = $('#field-image-upload');
// TODO: onchange above fields to clear the select2 field and call guess_resource_format endpoint

});
});
74 changes: 73 additions & 1 deletion ckanext/canada/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@
import os.path
import logging
from pylons.i18n import _
from six import string_types
import ckan.plugins as p
from ckan.lib.plugins import DefaultDatasetForm, DefaultTranslation
import ckan.lib.helpers as hlp
from ckan.logic import validators as logic_validators
from ckan.logic import (
validators as logic_validators,
check_access
)
from routes.mapper import SubMapper
from paste.reloader import watch_file

Expand All @@ -21,6 +25,7 @@
from ckanext.canada import helpers
from ckanext.canada import activity as act
from ckanext.extendedactivity.plugins import IActivity
from ckanext.scheming.helpers import scheming_get_preset

import json

Expand Down Expand Up @@ -153,6 +158,13 @@ def before_map(self, map):
controller='ckanext.canada.controller:CanadaDatastoreController',
action='delete_datastore_table',
)
map.connect(
'/api{ver:/3|}/util/guess_resource_format',
ver='/3',
action='guess_resource_format',
controller='ckanext.canada.controller:CanadaApiController',
conditions={'method':['POST']},
)

return map

Expand Down Expand Up @@ -236,6 +248,7 @@ def get_actions(self):
},
resource_view_update=resource_view_update_bilingual,
resource_view_create=resource_view_create_bilingual,
format_autocomplete=schema_format_autocomplete,
)

# IDataValidation
Expand Down Expand Up @@ -298,6 +311,51 @@ def resource_view_update_bilingual(up_func, context, data_dict):
)


@chained_action
def schema_format_autocomplete(func, context, data_dict):
'''Return a list of resource formats whose names contain a string.

:param q: the string to search for
:type q: string
:param limit: the maximum number of resource formats to return (optional,
default: ``5``)
:type limit: int

:rtype: list of strings

'''
check_access('format_autocomplete', context, data_dict)

q = data_dict['q']
limit = data_dict.get('limit', 8)

fmt_choices = scheming_get_preset('canada_resource_format')['choices']

#TODO: move this to JS method with localized choices??

choices = []
for f in fmt_choices:
if len(choices) >= limit:
return choices
if 'label' in f: # check labels if they exist. Can be str, or dict of {'en':str,'fr':str}
if isinstance(f['label'], string_types) \
and q.lower() in f['label'].lower(): # check string type label
choices.append(f['value'])
continue
elif isinstance(f['label'], dict) \
and h.lang() in f['label'] \
and q.lower() in f['label'][h.lang()].lower(): # check dict type label
choices.append(f['value'])
continue
if 'replaces' in f and q.lower() in [e.lower() for e in f['replaces']]: # check replacements list
choices.append(f['value'])
continue
if q.lower() in f['value'].lower(): # check the actual value
choices.append(f['value'])

return choices


class DataGCCAPublic(p.SingletonPlugin, DefaultTranslation):
"""
Plugin for public-facing version of Open Government site, aka the "portal"
Expand Down Expand Up @@ -505,6 +563,7 @@ class DataGCCAForms(p.SingletonPlugin, DefaultDatasetForm):
"""
p.implements(p.IActions)
p.implements(p.IValidators, inherit=True)
p.implements(p.IResourceController, inherit=True)

# IActions

Expand Down Expand Up @@ -577,10 +636,23 @@ def get_validators(self):
validators.json_string,
'json_string_has_en_fr_keys':
validators.json_string_has_en_fr_keys,
'canada_resource_format_replacements':
validators.canada_resource_format_replacements,
'canada_guess_resource_format':
validators.canada_guess_resource_format,
'resource_schema_validator':
validators.canada_resource_schema_validator,
}

# IResourceController

def before_update(self, context, resource, data_dict):
"""
Set the old resource url in the context for use in the validators.
"""
if resource.get('url', None) and resource.get('package_id', None): # insure that it is a resource
context['old_resource_url'] = resource.get('url', None)


class LogExtraMiddleware(object):
def __init__(self, app, config):
Expand Down
16 changes: 7 additions & 9 deletions ckanext/canada/schemas/presets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3220,12 +3220,13 @@ presets:
label:
en: "Format"
fr: "Format"
form_snippet: select.html
display_snippet: select.html
validators: canada_non_related_required scheming_choices
form_placeholder:
en: "eg. CSV, XML or JSON"
fr: "par exemple CSV, XML ou JSON"
validators: canada_guess_resource_format scheming_required clean_format unicode_safe canada_non_related_required canada_resource_format_replacements scheming_choices
required: true
form_include_blank_choice: true
form_panel: resource
form_snippet: select.html
choices:
- value: AAC
mimetype: audio/mp4
Expand Down Expand Up @@ -3299,12 +3300,9 @@ presets:
mimetype: application/postscript
replaces: [eps]
- value: ESRI REST
- value: EXE
mimetype: application/vnd.microsoft.portable-executable
replaces: [exe]
- value: FGDB/GDB
openness_score: 3
replaces: ["FGDB / GDB", "fgdb / gdb"]
replaces: ["FGDB / GDB", "fgdb / gdb"]
mimetype: application/x-filegdb
- value: FITS
openness_score: 3
Expand Down Expand Up @@ -3368,7 +3366,7 @@ presets:
replaces: [jpeg 2000]
- value: JPG
mimetype: image/jpeg
replaces: [jpg]
replaces: ["jpg", "jpeg"]
- value: JSON
mimetype: application/json
openness_score: 3
Expand Down
11 changes: 11 additions & 0 deletions ckanext/canada/templates/internal/package/resource_edit_base.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,14 @@
{{ h.build_nav_icon('resource_data', _('DataStore'), id=pkg.name, resource_id=res.id) }}
{% endif %}
{% endblock %}

{% block scripts %}
<script type="text/javascript" src="{{ h.url_for_static('/registry_resource_edit.js') }}" ></script>
{{ super() }}
{% endblock %}

{% block custom_styles %}
{{ super() }}
<link rel="stylesheet" href="{{ h.url_for_static('base/vendor/select2/select2.css') }}" />
<link rel="stylesheet" href="{{ h.url_for_static('base/vendor/select2/select2-bootstrap.css') }}" />
{% endblock %}
54 changes: 54 additions & 0 deletions ckanext/canada/tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,57 @@ def test_validation_schema(self):
assert_raises(ValidationError,
self.sysadmin_action.resource_create,
**resource_data)


def test_auto_resource_format(self):
"Resource formats should be guessed at creation and during URL change"
pkg = self.sysadmin_action.package_create(**self.complete_pkg)

# an empty format should be guessed at creation

resource_data = {
'name_translated': {'en': u'Full text.', 'fr': u'Full text.'},
'url': u'http://www.annakarenina.com/download/test.jpeg',
'size': 42,
'resource_type': 'dataset',
'language': ['zxx'],
'package_id': pkg['id']
}

resource_dict = self.sysadmin_action.resource_create(**resource_data)

assert 'format' in resource_dict
assert resource_dict['format'] == 'JPG'

resource_data['id'] = resource_dict['id']

# changing a resource url should re-guess the format

resource_data['format'] = 'JPG'
resource_data['url'] = 'http://www.annakarenina.com/download/test.docx'

resource_dict = self.sysadmin_action.resource_update(**resource_data)

assert 'format' in resource_dict
assert resource_dict['format'] == 'DOCX'

# a file upload should have its format guessed

resource_data['format'] = 'DOCX'
resource_data['url'] = 'test.png'

resource_dict = self.sysadmin_action.resource_update(**resource_data)

assert 'format' in resource_dict
assert resource_dict['format'] == 'PNG'

# format guessing should soft fallback to HTML
# if the url is a web address without an extension

resource_data['format'] = 'PNG'
resource_data['url'] = 'http://www.annakarenina.com/download'

resource_dict = self.sysadmin_action.resource_update(**resource_data)

assert 'format' in resource_dict
assert resource_dict['format'] == 'HTML'
1 change: 0 additions & 1 deletion ckanext/canada/tests/test_webforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ def test_new_dataset_missing_fields(self):
assert 'Title (English): Missing value' in response
assert 'Title (French): Missing value' in response
assert 'Resource Type: Missing value' in response
assert 'Format: Missing value' in response


def filled_dataset_form(self, response):
Expand Down
Loading
Loading