Skip to content

Commit

Permalink
feat(dev): harvest schema;
Browse files Browse the repository at this point in the history
- Used ckanext-scheming for harvest plugin.
- Added en/fr schema.
- Added templating.
  • Loading branch information
JVickery-TBS committed Oct 4, 2024
1 parent ec80ff7 commit 753c0ca
Show file tree
Hide file tree
Showing 26 changed files with 654 additions and 354 deletions.
29 changes: 4 additions & 25 deletions ckanext/canada/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import ckanext.datastore.backend.postgres as datastore

from ckanext.canada import triggers
from ckanext.canada.harvesters import PORTAL_SYNC_ID
from ckanext.canada.harvesters import PORTAL_SYNC_ID, HARVESTER_ID

BOM = "\N{bom}"

Expand Down Expand Up @@ -1908,11 +1908,11 @@ def _drop_function(name, verbose=False):
pass


@canada.command(short_help="Creates the Portal Sync Harvester if it does not already exist.")
@canada.command(short_help="Creates harvest database tables.")
@click.option('-f', '--refresh', is_flag=True, type=click.BOOL, help='Forces the refresh of all the source objects in the database.')
@click.option('-q', '--quiet', is_flag=True, type=click.BOOL, help='Suppresses human inspection.')
def init_portal_harvester(refresh=False, quiet=False):
"""Creates the Portal Sync Harvester if it does not already exist."""
def init_harvest_plugin(refresh=False, quiet=False):
"""Creates harvest database tables."""

# check to see if the harvet_object table exists.
# bad workaround, but subclassing a plugin does not allow
Expand Down Expand Up @@ -1960,24 +1960,3 @@ def init_portal_harvester(refresh=False, quiet=False):
model.Session.query(model.Package).filter(model.Package.id == PORTAL_SYNC_ID).delete()
model.Session.commit()
_success_message('Successfully purged harvest source objects.')

try:
package = get_action('package_show')(_get_site_user_context(), {'id': PORTAL_SYNC_ID})
if package.get('state') != 'active':
raise NotFound
_success_message('Portal Sync Harvester already exists.')
except NotFound:
_error_message('Portal Sync Harvester does not exist, creating it now...')
pkg_dict = {
'type': 'harvest',
'id': PORTAL_SYNC_ID,
'name': PORTAL_SYNC_ID,
'title': 'Portal Sync',
'source_type': 'portal_sync',
'url': toolkit.config.get('ckan.site_url', 'registry'),
'source': 'registry',
'target': 'portal',
}
get_action('package_create')(_get_site_user_context(), pkg_dict)
_success_message('Created the Portal Sync Harvester.')
pass
16 changes: 13 additions & 3 deletions ckanext/canada/harvesters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ckanext.harvest.interfaces import IHarvester

PORTAL_SYNC_ID = 'portal_sync_harvester'
HARVESTER_ID = 'portal_sync'


class PortalSync(plugins.SingletonPlugin):
Expand All @@ -14,10 +15,19 @@ class PortalSync(plugins.SingletonPlugin):
"""
plugins.implements(IHarvester)


def info(self):
return {
'name': 'portal_sync',
'title': plugins.toolkit._('Portal Sync'),
'description': plugins.toolkit._('Syncs Datasets, Resources, Views, and DataStores from the Registry to the Portal.'),
'name': HARVESTER_ID,
'title': 'Portal Sync',
'title_translated': {
'en': 'Portal Sync',
'fr': 'FR Portal Sync FR',
},
'description': 'Syncs Datasets, Resources, Views, and DataStores from the Registry to the Portal.',
'description_translated': {
'en': 'Syncs Datasets, Resources, Views, and DataStores from the Registry to the Portal.',
'fr': 'FR Syncs Datasets, Resources, Views, and DataStores from the Registry to the Portal. FR',
},
'form_config_interface': 'Text'
}
7 changes: 7 additions & 0 deletions ckanext/canada/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,3 +901,10 @@ def is_user_locked(user_name):
return True

return False


def get_harvester_info(source_type=None):
harvesters_info = t.get_action('harvesters_info_show')({'user': g.user}, {})
for harvester_info in harvesters_info:
if harvester_info.get('name') == source_type:
return harvester_info
182 changes: 64 additions & 118 deletions ckanext/canada/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
from ckan.lib.app_globals import set_app_global
from ckan.plugins.core import plugin_loaded

from ckan.logic.schema import default_extras_schema
from ckan.logic.converters import convert_to_extras, convert_from_extras

from ckan.plugins.toolkit import (
g,
h,
Expand Down Expand Up @@ -48,6 +45,7 @@
from ckanext.security.plugin import CkanSecurityPlugin
from ckanext.harvest.plugin import Harvest
from ckanext.harvest.views import harvester
from ckanext.harvest.logic import validators as harvest_validators
from ckanext.canada.view import (
canada_views,
CanadaDatasetEditView,
Expand Down Expand Up @@ -201,46 +199,72 @@ class CanadaHarvestPlugin(Harvest):
"""
p.implements(p.IValidators, inherit=True)

disallow_views = ['harvester.edit',
'harvester.delete']
disabled_endpoints = ['harvester.about']


# IValidators
def get_validators(self):
return {'canada_harvester_id':
validators.canada_harvester_id,
'canada_harvester_type':
validators.canada_harvester_type,
'canada_harvester_source_type':
validators.canada_harvester_source_type,
'canada_harvester_url':
validators.canada_harvester_url,
'canada_harvester_source':
validators.canada_harvester_source,
'canada_harvester_target':
validators.canada_harvester_target,
'canada_harvester_title':
validators.canada_harvester_title,}
# IDatasetForm
def package_types(self):
"""Use ckanext-scheming"""
return []


# IActions
def get_actions(self):
action_functions = super(CanadaHarvestPlugin, self).get_actions()
action_functions['package_show'] = logic.portal_sync_package_show
return action_functions
# ITemplateHelpers
def get_helpers(self):
helper_functions = super(CanadaHarvestPlugin, self).get_helpers()
helper_functions['get_harvester_info'] = helpers.get_harvester_info
return helper_functions


# IFacets
def dataset_facets(self, facets_dict, package_type):
if package_type != 'harvest':
return facets_dict

return {'frequency': _('Frequency'),
'source_type': _('Source Type'),
'organization': _('Organization'),}


# IValidators
def get_validators(self):
return {'portal_sync_id':
validators.portal_sync_id,
'portal_sync_limit':
validators.portal_sync_limit,
'harvest_source_url_validator':
harvest_validators.harvest_source_url_validator,
'harvest_source_type_exists':
harvest_validators.harvest_source_type_exists,
'harvest_source_config_validator':
harvest_validators.harvest_source_config_validator,
'harvest_source_extra_validator':
harvest_validators.harvest_source_extra_validator,
'harvest_source_frequency_exists':
harvest_validators.harvest_source_frequency_exists,
'dataset_type_exists':
harvest_validators.dataset_type_exists,
'harvest_source_convert_from_config':
harvest_validators.harvest_source_convert_from_config,
'harvest_source_id_exists':
harvest_validators.harvest_source_id_exists,
'harvest_job_exists':
harvest_validators.harvest_job_exists,
'harvest_object_extras_validator':
harvest_validators.harvest_object_extras_validator,}


# IAuthFunctions
def get_auth_functions(self):
auth_functions = super(CanadaHarvestPlugin, self).get_auth_functions()
auth_functions['harvest_log_list'] = auth.harvest_log_list
#TODO: check other auth functions from ckanext-harvest that need limitations??
return auth_functions


#IBlueprint
def get_blueprint(self):
"""Custom blueprints for the Portal Sync single harvest source."""
harvester.before_request(self._redirect_harvest_dataset_endpoints)
harvester.before_request(self._redirect_harvest_endpoints)
return [harvester]


Expand All @@ -250,94 +274,10 @@ def _not_sysadmin(self, contextual_user=None):
return not contextual_user or not is_sysadmin(contextual_user)


def _redirect_harvest_dataset_endpoints(self):
if self._not_sysadmin():
def _redirect_harvest_endpoints(self):
if self._not_sysadmin() or request.endpoint in self.disabled_endpoints:
return abort(404)
#FIXME: redirect loop!! based on self.disallow_views??
# return h.redirect_to('harvester.admin', id=PORTAL_SYNC_ID)


def _redirect_harvest_views(self):
if self._not_sysadmin():
return abort(404)
#FIXME: redirect loop!! based on self.disallow_views??
# return h.redirect_to('harvester.admin', id=PORTAL_SYNC_ID)


#IDatasetForm
def prepare_dataset_blueprint(self, package_type, blueprint):
# type: (str,Blueprint) -> Blueprint
"""Redirect Harvest endpoints accessed from /harvest/"""
if package_type == 'harvest':
blueprint.before_request(self._redirect_harvest_dataset_endpoints)
return blueprint


#IDatasetForm
def prepare_resource_blueprint(self, package_type, blueprint):
# type: (str,Blueprint) -> Blueprint
"""Redirect Harvest endpoints accessed from /harvest/"""
if package_type == 'harvest':
blueprint.before_request(self._redirect_harvest_dataset_endpoints)
return blueprint


#IDatasetForm
def validate(self, context, data_dict, schema, action):
"""Only sysadmins can create a harvest source."""
if data_dict.get('type') == 'harvest' and self._not_sysadmin(context.get('user')):
return data_dict, {'type': [
"Unsupported dataset type: {t}".format(t=data_dict.get('type'))]}


#IDatasetForm
def create_package_schema(self):
ignore = get_validator('ignore')
canada_harvester_id = get_validator('canada_harvester_id')
canada_harvester_type = get_validator('canada_harvester_type')
canada_harvester_source_type = get_validator('canada_harvester_source_type')
canada_harvester_url = get_validator('canada_harvester_url')
canada_harvester_source = get_validator('canada_harvester_source')
canada_harvester_target = get_validator('canada_harvester_target')
canada_harvester_title = get_validator('canada_harvester_title')

return {
'id': [canada_harvester_id],
'name': [canada_harvester_id],
'type': [canada_harvester_type],
'source_type': [canada_harvester_source_type, convert_to_extras],
'url': [canada_harvester_url],
'source': [canada_harvester_source, convert_to_extras],
'target': [canada_harvester_target, convert_to_extras],
'title': [canada_harvester_title],
'extras': default_extras_schema(),
'__extras': [ignore],
}


#IDatasetForm
def update_package_schema(self):
return self.create_package_schema()


#IDatasetForm
def show_package_schema(self):
not_empty = get_validator('not_empty')
ignore = get_validator('ignore')
package_id_exists = get_validator('package_id_exists')

return {
'id': [not_empty, package_id_exists],
'name': [not_empty],
'type': [not_empty],
'source_type': [convert_from_extras, not_empty],
'url': [not_empty],
'source': [convert_from_extras, not_empty],
'target': [convert_from_extras, not_empty],
'title': [not_empty],
'extras': default_extras_schema(),
'__extras': [ignore],
}


class CanadaDatasetsPlugin(SchemingDatasetsPlugin):
Expand Down Expand Up @@ -388,7 +328,7 @@ def get_blueprint(self):
return blueprints


def _redirect_pd_dataset_endpoints(blueprint):
def _redirect_dataset_endpoints(blueprint):
"""
Runs before request for /dataset and /dataset/<pkg id>/resource
Expand All @@ -404,7 +344,8 @@ def _redirect_pd_dataset_endpoints(blueprint):
return h.redirect_to('canada.type_redirect',
resource_name=package_type)
if package_type == 'harvest':
return abort(404)
if '_resource.' in request.endpoint or '.resources' in request.endpoint or not is_sysadmin(g.user):
return abort(404)


#IDatasetForm
Expand All @@ -430,7 +371,7 @@ def prepare_dataset_blueprint(self, package_type, blueprint):
strict_slashes=False
)
# redirect PD endpoints accessed from /dataset/<pd pkg id>
blueprint.before_request(self._redirect_pd_dataset_endpoints)
blueprint.before_request(self._redirect_dataset_endpoints)
return blueprint


Expand All @@ -450,7 +391,7 @@ def prepare_resource_blueprint(self, package_type, blueprint):
methods=['GET', 'POST']
)
# redirect PD endpoints accessed from /dataset/<pd pkg id>/resource
blueprint.before_request(self._redirect_pd_dataset_endpoints)
blueprint.before_request(self._redirect_dataset_endpoints)
return blueprint

# IDataValidation
Expand Down Expand Up @@ -824,7 +765,9 @@ def update_config(self, config):
ckanext.canada:schemas/dataset.yaml
ckanext.canada:schemas/info.yaml
ckanext.canada:schemas/prop.yaml
"""
""" + (
"ckanext.canada:schemas/harvest.yaml" if plugin_loaded('canada_harvest') else ""
)
config['scheming.organization_schemas'] = 'ckanext.canada:schemas/organization.yaml'

# Pretty output for Feeds
Expand Down Expand Up @@ -854,6 +797,9 @@ def update_config(self, config):
def dataset_facets(self, facets_dict, package_type):
''' Update the facets_dict and return it. '''

if package_type == 'harvest':
return facets_dict

facets_dict.update({
'portal_type': _('Portal Type'),
'organization': _('Organization'),
Expand Down
Loading

0 comments on commit 753c0ca

Please sign in to comment.