Skip to content

Commit

Permalink
feat(dev): harvest;
Browse files Browse the repository at this point in the history
- Continued PortalSync class.
- Limited clearing functionality on the PortalSync type.
- Added some flag fields to packages to store last sync time.
  • Loading branch information
JVickery-TBS committed Oct 16, 2024
1 parent adc641f commit dc787f8
Show file tree
Hide file tree
Showing 10 changed files with 518 additions and 92 deletions.
32 changes: 32 additions & 0 deletions ckanext/canada/assets/public/canada_public.css
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,38 @@ body[data-package-type="harvest"] main.container .nav.nav-tabs li.active a{
body[data-package-type="harvest"] main.container .nav.nav-tabs li:not(.active) a{
color: #892f34 !important;
}
.harvest-state-button,
.harvest-state-button:hover,
.harvest-state-button:active,
.harvest-state-button:focus{
text-decoration: none !important;
outline: none !important;
}
.harvest-state-button.errored:hover span,
.harvest-state-button.errored:active span,
.harvest-state-button.errored:focus span{
background-color: #954143;
}
.harvest-state-button.added:hover span,
.harvest-state-button.added:active span,
.harvest-state-button.added:focus span{
background-color: #79db69;
}
.harvest-state-button.updated:hover span,
.harvest-state-button.updated:active span,
.harvest-state-button.updated:focus span{
background-color: #a277ff;
}
.harvest-state-button.deleted:hover span,
.harvest-state-button.deleted:active span,
.harvest-state-button.deleted:focus span{
background-color: #dc7c7f;
}
.harvest-state-button.not-modified:hover span,
.harvest-state-button.not-modified:active span,
.harvest-state-button.not-modified:focus span{
background-color: #c4c4c4;
}
/*#########################
## END ##
## Harvest Package Type ##
Expand Down
9 changes: 9 additions & 0 deletions ckanext/canada/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,12 @@ def organization_show(context, data_dict):
def harvest_log_list(context, data_dict):
"""Only sysadmins can view harvest logs."""
return {'success': False}


def harvest_source_update(context, data_dict):
"""
Only sysadmins can update harvest sources.
A fair amount of harvest auth actions cascade to this one.
"""
return {'success': False}
323 changes: 233 additions & 90 deletions ckanext/canada/harvesters.py

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions ckanext/canada/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import ckan.lib.datapreview as datapreview

from ckanext.security.cache.login import max_login_attempts
from ckanext.harvest.model import HarvestObject

try:
from ckanext.xloader.utils import XLoaderFormats
Expand Down Expand Up @@ -908,3 +909,23 @@ def get_harvester_info(source_type=None):
for harvester_info in harvesters_info:
if harvester_info.get('name') == source_type:
return harvester_info


def get_packages_from_harvest_job(job, action):
"""
Returns a list of detailed Package info for HarvestObjects
from a given action state and HarvestJob.
"""
results = model.Session.query(HarvestObject.guid)\
.filter(HarvestObject.harvest_job_id == job.get('id'))\
.filter(HarvestObject.report_status == action).all()
if not results:
return []
harvested_packages = []
for package_id in results:
try:
pkg_dict = t.get_action('package_show')({'user': g.user}, {'id': package_id})
except (t.ObjectNotFound, t.NotAuthorized):
continue
harvested_packages.append(pkg_dict)
return harvested_packages
28 changes: 27 additions & 1 deletion ckanext/canada/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
from ckanext.scheming.helpers import scheming_get_preset

from ckanext.datastore.backend import DatastoreBackend
from ckanext.canada.harvesters import PORTAL_SYNC_ID
from ckanext.canada.harvesters import PORTAL_SYNC_ID, HARVESTER_ID
from ckanext.harvest.model import HarvestSource
from ckanext.harvest.logic.action.update import harvest_source_clear as super__harvest_source_clear

MIMETYPES_AS_DOMAINS = [
'application/x-msdos-program', # .com
Expand Down Expand Up @@ -567,3 +569,27 @@ def portal_sync_package_show(up_func, context, data_dict):
}

return package


def harvest_source_clear(context, data_dict):
"""
Wraps the harvest_source_clear from the Harvest plugin to prevent
Dataset deletion/purging for the PortalSync source type.
Switches the harvest_source_clear action to the harvest_source_job_history_clear action.
"""
check_access('harvest_source_clear', context, data_dict)

harvest_source_id = data_dict.get('id')

source = HarvestSource.get(harvest_source_id)
if not source:
log.error('Harvest source %s does not exist', harvest_source_id)
raise ObjectNotFound('Harvest source %s does not exist' % harvest_source_id)

harvest_source_id = source.id

if source.type == HARVESTER_ID:
return get_action('harvest_source_job_history_clear')({'user': g.user}, {'id': harvest_source_id, 'keep_current': False})

return super__harvest_source_clear(context, data_dict)
13 changes: 12 additions & 1 deletion ckanext/canada/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def package_types(self):
def get_helpers(self):
helper_functions = super(CanadaHarvestPlugin, self).get_helpers()
helper_functions['get_harvester_info'] = helpers.get_harvester_info
helper_functions['get_packages_from_harvest_job'] = helpers.get_packages_from_harvest_job
return helper_functions


Expand Down Expand Up @@ -255,10 +256,18 @@ def get_validators(self):
harvest_validators.harvest_object_extras_validator,}


# IActions
def get_actions(self):
action_functions = super(CanadaHarvestPlugin, self).get_actions()
action_functions['harvest_source_clear'] = logic.harvest_source_clear
return action_functions


# IAuthFunctions
def get_auth_functions(self):
auth_functions = super(CanadaHarvestPlugin, self).get_auth_functions()
auth_functions['harvest_log_list'] = auth.harvest_log_list
auth_functions['harvest_source_update'] = auth.harvest_source_update
#TODO: check other auth functions from ckanext-harvest that need limitations??
return auth_functions

Expand Down Expand Up @@ -491,7 +500,9 @@ def before_index(self, data_dict):
if 'fgp_viewer' in data_dict.get('display_flags', []):
data_dict['fgp_viewer'] = 'map_view'

titles = json.loads(data_dict.get('title_translated', '{}'))
titles = data_dict.get('title_translated', '{}')
if not isinstance(titles, dict):
titles = json.loads(titles)
data_dict['title_fr'] = titles.get('fr', '')
data_dict['title_string'] = titles.get('en', '')

Expand Down
3 changes: 3 additions & 0 deletions ckanext/canada/schemas/dataset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ dataset_fields:
- preset: canada_dataset_id
- preset: canada_dataset_name

- preset: canada_harvest_date
- preset: canada_harvest_current

- preset: canada_collection
form_restrict_choices_to:
- primary
Expand Down
17 changes: 17 additions & 0 deletions ckanext/canada/schemas/presets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ presets:
validators: if_empty_same_as(id)
unicode not_empty canada_validate_generate_uuid

# We store the last successfuly sync datetime from ckanext.canada.harvesters:PortalSync
- preset_name: canada_harvest_date
values:
create_validators: ignore_missing ignore_not_sysadmin string_safe isodate
display_snippet: null
field_name: last_sync_time
form_snippet: null

# We store a flag if the package is current or not from ckanext.canada.harvesters:PortalSync
# NOTE: boolean_validator may be False if value is missing, so check for last_sync_time
- preset_name: canada_harvest_current
values:
create_validators: ignore_missing ignore_not_sysadmin string_safe boolean_validator
display_snippet: null
field_name: up_to_date
form_snippet: null

# Field = Collection Type.
# Display the English or the French Description from the Collection Type code table (refer to Data Migration), depending on the selected language.
# {The collection (domain specific) to which the metadata record belongs, Example: Non-Spatial}
Expand Down
127 changes: 127 additions & 0 deletions ckanext/canada/templates/snippets/job_details.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
{#
NOTE: copied from ckanext-harvest for lack of blocks.
Adds expansion to show what was modified. TODO: upstream contrib??

Displays information for a particular harvest job, including:

* counts for added, updated, deleted or errored datasets
* table with general details
* table with a summary of the most common errors on this job

job - dictized harvest job object

Example:

{% snippet 'snippets/job_details.html', job=job %}

#}

{% set stats = job.stats %}

{% if job.status == 'Finished' %}
<p>
{# Add link to custom view to show what was modified #}
{% if 'errored' in stats and stats['errored'] > 0 %}
<a href="javascript:void(0);" class="harvest-state-button errored" data-toggle="collapse" data-target="#info-errored" aria-controlls="info-errored">
{% endif %}
<span class="label label-important" data-diff="error">
{% if 'errored' in stats and stats['errored'] > 0 %}
{{ stats['errored'] }}
{% else %}
0
{% endif %}
{{ _('errors') }}
</span>
{% if 'errored' in stats and stats['errored'] > 0 %}
</a>
{% endif %}
{% for action in ['added', 'updated', 'deleted', 'not modified'] %}
{# Add link to custom view to show what was modified #}
{% if action in stats and stats[action] > 0 %}
<a href="javascript:void(0);" class="harvest-state-button {{ action.replace(' ', '-') }}" data-toggle="collapse" data-target="#info-{{ action.replace(' ', '-') }}" aria-controlls="info-{{ action.replace(' ', '-') }}">
{% endif %}
<span class="label" data-diff="{{ action }}">
{% if action in stats and stats[action] > 0 %}
{{ stats[action] }}
{% else %}
0
{% endif %}
{{ _(action) }}
</span>
{% if action in stats and stats[action] > 0 %}
</a>
{% endif %}
{% endfor %}
</p>
{# Add expand sections for above buttons #}
{% if 'errored' in stats and stats['errored'] > 0 %}
<div class="panel panel-primary collapse harvest-state-details" id="info-errored" aria-labelledby="info-errored">
<div class="panel-heading">
<div class="panel-title">{{ stats['errored'] }}&nbsp;{{ _('errors') }}</div>
</div>
{% set _packages = h.get_packages_from_harvest_job(job, 'errored') %}
<ul class="list-group">
<li class="list-group-item"></li>
</ul>
</div>
{% endif %}
{% for action in ['added', 'updated', 'deleted', 'not modified'] %}
{% if action in stats and stats[action] > 0 %}
<div class="panel panel-primary collapse harvest-state-details" id="info-{{ action.replace(' ', '-') }}" aria-labelledby="info-{{ action.replace(' ', '-') }}">
<div class="panel-heading">
<div class="panel-title">{{ stats[action] }}&nbsp;{{ _(action) }}</div>
</div>
{% set _packages = h.get_packages_from_harvest_job(job, action) %}
<ul class="list-group">
{% for _package in _packages %}
<li class="list-group-item">
<strong>{% link_for h.get_translated(_package, 'title'), named_route=_package.type + '.read', id=_package.id %}</strong><br />
<small><i class="fa fa-building-o" aria-hidden="true"></i>&nbsp;{{ h.split_piped_bilingual_field(_package.organization.title, h.lang()) }}</small><br />
<small><i class="fa fa-file-code-o" aria-hidden="true"></i>&nbsp;{% link_for _package.id, named_route='api.action', logic_function='package_show', id=_package.id %}</small>
</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% endfor %}
{% endif %}

<h3 class="hide-heading">{{ _('Details') }}</h3>
<table class="table table-striped table-bordered table-condensed">
<colgroup>
<col width="15">
<col width="85">
</colgroup>
<tr>
<th>{{ _('Id') }}</th>
<td>{{ job.id }}</td>
</tr>
<tr>
<th>{{ _('Created') }}</th>
<td>
<span class="automatic-local-datetime" data-datetime="{{ h.render_datetime(job.created, date_format='%Y-%m-%dT%H:%M:%S%z') }}">
{{ h.render_datetime(job.created, with_hours=True) }}
</span>
</td>
</tr>
<tr>
<th>{{ _('Started') }}</th>
<td>
<span class="automatic-local-datetime" data-datetime="{{ h.render_datetime(job.gather_started, date_format='%Y-%m-%dT%H:%M:%S%z') }}">
{{ h.render_datetime(job.gather_started, with_hours=True) }}
</span>
</td>
</tr>
<tr>
<th>{{ _('Finished') }}</th>
<td>
<span class="automatic-local-datetime" data-datetime="{{ h.render_datetime(job.finished, date_format='%Y-%m-%dT%H:%M:%S%z') }}">
{{ h.render_datetime(job.finished, with_hours=True) }}
</span>
</td>
</tr>
<tr>
<th>{{ _('Status') }}</th>
<td>{{ _(job.status) }}</td>
</tr>
</table>
37 changes: 37 additions & 0 deletions ckanext/canada/templates/source/admin_base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{% ckan_extends %}

{% block content_action %}
<div class="content_action btn-group">
{% if harvest_source.status and harvest_source.status.last_job and (harvest_source.status.last_job.status == 'New' or harvest_source.status.last_job.status == 'Running') %}
<a class="btn btn-default disabled" rel="tooltip" title="There already is an unrun job for this source"><i class="fa fa-lg fa-refresh icon-refresh icon-large"></i> Reharvest</a>
{% else %}
{% set locale = h.dump_json({'content': _('This will re-run the harvesting for this source. Any updates at the source will overwrite the local datasets. Sources with a large number of datasets may take a significant amount of time to finish harvesting. Please confirm you would like us to start reharvesting.')}) %}
<a href="{{ h.url_for('harvester.refresh', id=harvest_source.id) }}" class="btn btn-default" data-module="confirm-action" data-module-i18n="{{ locale }}"
title="{{ _('Start a new harvesting job for this harvest source now') }}">
<i class="fa fa-refresh icon-refresh"></i>
{{ _('Reharvest') }}
</a>
{% endif %}
{% if harvest_source.status and harvest_source.status.last_job and (harvest_source.status.last_job.status == 'Running') %}
<a href="{{ h.url_for('harvester.job_abort', source=harvest_source.name, id=harvest_source.status.last_job.id) }}" class="btn btn-default" title="Stop this Job">
<i class="fa fa-ban icon-ban-circle"></i>
{{ _('Stop') }}
</a>
{% endif %}
{% if harvest_source.source_type == 'portal_sync' %}
{% set locale = h.dump_json({'content': _('Warning: This will remove all previous job reports for this source. Are you sure you want to continue?')}) %}
{% set _title = _('Delete all harvest jobs and reports from this source') %}
{% else %}
{% set locale = h.dump_json({'content': _('Warning: This will remove all datasets for this source, as well as all previous job reports. Are you sure you want to continue?')}) %}
{% set _title = _('Delete all harvest jobs and existing datasets from this source') %}
{% endif %}
<a href="{{ h.url_for('harvester.clear', id=harvest_source.id) }}" class="btn btn-default" data-module="confirm-action" data-module-i18n="{{ locale }}"
title="{{ _title }}">
{{ _('Clear') }}
</a>
<a href="{{ h.url_for('{0}.read'.format(c.dataset_type), id=harvest_source.id) }}" class="btn btn-default">
<i class="fa fa-eye eye-open"></i>
{{ _('View harvest source') }}
</a>
</div>
{% endblock %}

0 comments on commit dc787f8

Please sign in to comment.