Skip to content

Commit

Permalink
DKAN support (#129)
Browse files Browse the repository at this point in the history
  • Loading branch information
abulte authored Jan 6, 2020
1 parent 8cc64ca commit ce4f109
Show file tree
Hide file tree
Showing 11 changed files with 528 additions and 87 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
- image: mongo:3.2
- image: redis
- image: udata/elasticsearch:2.4.5
- image: postgres:alpine
- image: postgres:11-alpine
name: db
environment:
POSTGRES_DB: ckan
Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Current (in progress)

- Nothing yet
- DKAN support [#129](https://github.com/opendatateam/udata-ckan/pull/129)

## 1.2.3 (2019-05-29)

Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ services:
- POSTGRES_PASSWORD=ckan
- DS_RO_PASS=datastore
db:
image: postgres:alpine
image: postgres:11-alpine
environment:
- POSTGRES_DB=ckan
- POSTGRES_USER=ckan
Expand Down
1 change: 1 addition & 0 deletions requirements/install.pip
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
udata>=1.6.0
requests==2.21.0
humanfriendly==4.18
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def pip(filename):
entry_points={
'udata.harvesters': [
'ckan = udata_ckan.harvesters:CkanBackend',
'dkan = udata_ckan.harvesters:DkanBackend',
],
'udata.models': [
'ckan = udata_ckan.models',
Expand Down
226 changes: 226 additions & 0 deletions tests/data/dkan-french-w-license.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
{
"help": "Return the metadata of a dataset (package) and its resources. :param id: the id or name of the dataset :type id: string",
"success": true,
"result": [
{
"id": "04be6288-696d-4331-850d-a144871a7e3a",
"name": "antennes-regionales-de-la-region-hauts-de-france-au-01102019-0",
"title": "Antennes régionales de la Région Hauts-de-France (au 01/10/2019)",
"author_email": "[email protected]",
"maintainer": "Opendata de la Région Hauts-de-France",
"maintainer_email": "[email protected]",
"license_title": "http://www.etalab.gouv.fr/pages/Licence_ouverte_Open_licence-5899923.html",
"notes": "<p>Liste et coordonnées des antennes de proximité du conseil régional Hauts-de-France.</p>\n",
"url": "https://opendata.hautsdefrance.fr/?q=dataset/antennes-regionales-de-la-region-hauts-de-france-au-01102019-0",
"state": "Active",
"private": true,
"revision_timestamp": "jeu, 19/12/2019 - 03:00",
"metadata_created": "mar, 10/12/2019 - 09:23",
"metadata_modified": "2019-09-30 22:00:00",
"creator_user_id": "235f2695-89bd-4a0d-8bcf-b6e26b7b3981",
"type": "Dataset",
"resources": [
{
"id": "33f30271-cd5c-49ae-b44b-595caae16126",
"revision_id": "",
"url": "https://geocatalogue.hautsdefrance.fr/geonetwork/srv/api/records/4b5f8e1b-de37-47cd-9203-37a59f318b09/attachments/coordonnees_antennes.xlsx",
"description": "<p>Tableau des données</p>\n",
"format": "xlsx",
"state": "Active",
"revision_timestamp": "jeu, 19/12/2019 - 03:00",
"name": "coordonnees_antennes.xlsx",
"mimetype": "xlsx",
"size": "42 octets",
"created": "jeu, 19/12/2019 - 03:00",
"resource_group_id": "b72cd25d-1cec-49f6-8c71-297bd373fa01",
"last_modified": "Date changed jeu, 19/12/2019 - 03:00"
},
{
"id": "ab5948b1-95be-4806-ad8d-efaa9ffe43dc",
"revision_id": "",
"url": "https://sig.hautsdefrance.fr/ext/mv/?config=antenne_regionale.xml#",
"description": "<p>Visionneuse cartographique avec une représentation simplifiée des données</p>\n",
"format": "",
"state": "Active",
"revision_timestamp": "jeu, 19/12/2019 - 03:00",
"name": "Visionneuse mviewer",
"mimetype": "",
"size": "",
"created": "jeu, 19/12/2019 - 03:00",
"resource_group_id": "b72cd25d-1cec-49f6-8c71-297bd373fa01",
"last_modified": "Date changed jeu, 19/12/2019 - 03:00"
}
],
"tags": [
{
"id": "0800bf74-0728-48ef-b6bb-6e458feff785",
"vocabulary_id": "2",
"name": "ADMINISTRATION"
},
{
"id": "3e213764-c884-402b-88f5-097a5de38876",
"vocabulary_id": "2",
"name": "ADRESSE"
},
{
"id": "8da693f4-e4d3-432e-8192-fa4d12ee21e8",
"vocabulary_id": "2",
"name": "AISNE"
},
{
"id": "40577117-4987-4588-b08c-e1636fa0865b",
"vocabulary_id": "2",
"name": "ANTENNE REGIONALE"
},
{
"id": "46af567e-d862-4d65-8912-0f7eff3f94a8",
"vocabulary_id": "2",
"name": "DONNEES OUVERTES"
},
{
"id": "713c6627-5caa-4434-9583-159b8f9fbfea",
"vocabulary_id": "2",
"name": "HAUTS-DE-FRANCE"
},
{
"id": "1e14a78d-127f-4401-ac4a-09159f5d22b2",
"vocabulary_id": "2",
"name": "NORD"
},
{
"id": "08f08873-ce41-465b-a055-826f6cee0ca4",
"vocabulary_id": "2",
"name": "OISE"
},
{
"id": "de6a5ac7-c9cb-42c8-b6a9-506740c54be3",
"vocabulary_id": "2",
"name": "PAS-DE-CALAIS"
},
{
"id": "b428bef2-9a4c-4545-9a70-1549ae2384a7",
"vocabulary_id": "2",
"name": "POLITIQUE REGIONALE"
},
{
"id": "f8676e82-0864-4b8f-afd2-2ca3368ccb53",
"vocabulary_id": "2",
"name": "PROXIMITE"
},
{
"id": "c18de608-ae9b-4043-81e1-27e367461a0d",
"vocabulary_id": "2",
"name": "SOMME"
},
{
"id": "33efc6a3-86b4-4ca3-b9b1-301d8ce7a379",
"vocabulary_id": "2",
"name": "Services d utilité publique et services publics"
}
],
"groups": [
{
"description": "<p>Conseil régional des Hauts-de-France</p>\n",
"id": "b72cd25d-1cec-49f6-8c71-297bd373fa01",
"image_display_url": "https://opendata.hautsdefrance.fr/sites/default/files/Logo%20R%C3%A9gion%20HDF-pourleweb.jpg",
"title": "Région Hauts-de-France",
"name": "group/region-hauts-de-france"
}
],
"extras": [
{
"key": "access_constraints",
"value": "[]"
},
{
"key": "bbox-east-long",
"value": "4.65820313"
},
{
"key": "bbox-north-lat",
"value": "51.16423318"
},
{
"key": "bbox-south-lat",
"value": "48.80546301"
},
{
"key": "bbox-west-long",
"value": "1.18652344"
},
{
"key": "contact-email",
"value": "[email protected]"
},
{
"key": "coupled-resource",
"value": "[]"
},
{
"key": "dataset-reference-date",
"value": "[{&quot;type&quot;: &quot;revision&quot;, &quot;value&quot;: &quot;2019-10-01&quot;}]"
},
{
"key": "frequency-of-update",
"value": "asNeeded"
},
{
"key": "graphic-preview-description",
"value": "logohdf.png"
},
{
"key": "graphic-preview-file",
"value": "https://geocatalogue.hautsdefrance.fr/geonetwork/srv/api/records/4b5f8e1b-de37-47cd-9203-37a59f318b09/attachments/logohdf.png"
},
{
"key": "guid",
"value": "4b5f8e1b-de37-47cd-9203-37a59f318b09"
},
{
"key": "licence",
"value": "[]"
},
{
"key": "metadata-date",
"value": "2019-11-13T11:24:54"
},
{
"key": "metadata-language",
"value": "fre"
},
{
"key": "metadata_created",
"value": "2019-11-13T11:24:54"
},
{
"key": "metadata_modified",
"value": "2019-11-13T11:24:54"
},
{
"key": "resource-type",
"value": "dataset"
},
{
"key": "responsible-party",
"value": "[{&quot;name&quot;: &quot;R\\u00e9gion Hauts-de-France&quot;, &quot;roles&quot;: [&quot;pointOfContact&quot;]}]"
},
{
"key": "spatial_harvester",
"value": "true"
},
{
"key": "harvest_object_id",
"value": "281dfceb-ed90-46da-aef6-4ad5e1348150"
},
{
"key": "harvest_source_id",
"value": "c6a6feb7-fc9a-4a26-ab6a-cd13024c4fdd"
},
{
"key": "harvest_source_title",
"value": "Données ouvertes publiées par la région Hauts-de-France"
}
]
}
]
}
102 changes: 102 additions & 0 deletions tests/test_dkan_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import json
import pytest
import os

from datetime import datetime

from udata.app import create_app
from udata.core.organization.factories import OrganizationFactory
from udata.harvest import actions
from udata.harvest.tests.factories import HarvestSourceFactory
from udata.models import Dataset
from udata.settings import Defaults, Testing
from udata.tests.plugin import drop_db


DKAN_TEST_INSTANCE = 'http://demo.getdkan.com'


def data_path(filename):
'''Get a test data path'''
return os.path.join(os.path.dirname(__file__), 'data', filename)


class DkanSettings(Testing):
PLUGINS = ['dkan']


@pytest.fixture(scope='module')
def app(request):
'''Create an udata app once for the module. '''
app = create_app(Defaults, override=DkanSettings)
with app.app_context():
drop_db(app)
yield app
with app.app_context():
drop_db(app)


@pytest.fixture(scope='module')
def source(app):
'''
Create an harvest source for an organization.
The source is created once for the module.
'''
with app.app_context():
org = OrganizationFactory()
return HarvestSourceFactory(backend='dkan',
url=DKAN_TEST_INSTANCE,
organization=org)


def test_dkan_demo_harvest(source, app):
'''
Harvest DKAN_TEST_INSTANCE and check some datasets are created
'''
with app.app_context():
actions.run(source.slug)
source.reload()
job = source.get_last_job()

assert len(job.items) > 0
datasets = Dataset.objects.filter(organization=source.organization)
assert len(job.items) == datasets.count()

for dataset in datasets:
assert len(dataset.resources) > 0

assert job.status == 'done'


def test_dkan_french_w_license(app, rmock):
'''CKAN Harvester should accept the minimum dataset payload'''
DKAN_URL = 'https://harvest.me/'
API_URL = '{}api/3/action/'.format(DKAN_URL)
PACKAGE_LIST_URL = '{}package_list'.format(API_URL)
PACKAGE_SHOW_URL = '{}package_show'.format(API_URL)

with open(data_path('dkan-french-w-license.json')) as ifile:
data = json.loads(ifile.read())

org = OrganizationFactory()
source = HarvestSourceFactory(backend='dkan', url=DKAN_URL, organization=org)
rmock.get(PACKAGE_LIST_URL, json={'success': True, 'result': ['fake-name']}, status_code=200,
headers={'Content-Type': 'application/json'})
rmock.get(PACKAGE_SHOW_URL, json=data, status_code=200,
headers={'Content-Type': 'application/json'})
actions.run(source.slug)
source.reload()
assert source.get_last_job().status == 'done'

datasets = Dataset.objects.filter(organization=org)
assert len(datasets) > 0

q = {'extras__harvest:remote_id': '04be6288-696d-4331-850d-a144871a7e3a'}
dataset = datasets.get(**q)
assert dataset.created_at == datetime(2019, 12, 10, 0, 0)
assert dataset.last_modified == datetime(2019, 9, 30, 0, 0)
assert len(dataset.resources) == 2
assert 'xlsx' in [r.format for r in dataset.resources]
Loading

0 comments on commit ce4f109

Please sign in to comment.