From 0c045f80480d3831f9f96055a6425bc2ff3f88e7 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Sun, 3 May 2015 15:31:29 +0300 Subject: [PATCH 01/29] add support for organizations --- dkan_migrate_base.info | 1 + dkan_migrate_base_dataset.inc | 4 +- dkan_migrate_base_organization.inc | 68 ++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 dkan_migrate_base_organization.inc diff --git a/dkan_migrate_base.info b/dkan_migrate_base.info index 5a05dbb..8b2bf0b 100644 --- a/dkan_migrate_base.info +++ b/dkan_migrate_base.info @@ -8,5 +8,6 @@ dependencies[] = list dependencies[] = number files[] = dkan_migrate_base.migrate.inc files[] = dkan_migrate_base_group.inc +files[] = dkan_migrate_base_organization.inc files[] = dkan_migrate_base_dataset.inc files[] = dkan_migrate_base_resource.inc diff --git a/dkan_migrate_base_dataset.inc b/dkan_migrate_base_dataset.inc index 707d74c..ca871bf 100644 --- a/dkan_migrate_base_dataset.inc +++ b/dkan_migrate_base_dataset.inc @@ -75,7 +75,7 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { public function getCkanDatasetFields() { return array( "license_title" => "License Title", - "maintainer" => "Maintaier", + "maintainer" => "Maintainer", "relationships_as_object" => "", "private" => "Private", "maintainer_email" => "", @@ -109,7 +109,7 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { // This doesn't actually exist but we are adding it later in prepareRow. "uid" => "User ID", "resource_ids" => "Resource IDS", - "group_ids" => "Group IDS", + "group_ids" => "Group IDS", ); } diff --git a/dkan_migrate_base_organization.inc b/dkan_migrate_base_organization.inc new file mode 100644 index 0000000..8c4fa31 --- /dev/null +++ b/dkan_migrate_base_organization.inc @@ -0,0 +1,68 @@ +getCkanOrganizationFields(); + $list_url = $this->endpoint . 'organization_list'; + $item_url = $this->endpoint . 'organization_show?id=:id'; + $this->page = isset($arguments['page']) ? $arguments['page'] : ''; + $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; + + $this->source = new MigrateSourceList(new CKANListJSON( + $list_url, + array('page' => $this->page, + 'offset' => $this->offset, + ) + ), + new CKANItemJSON($item_url, $fields), $fields); + + $this->map = new MigrateSQLMap( + $this->machineName, + array( + 'uuid' => array( + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'description' => 'id', + ), + ), + MigrateDestinationNode::getKeySchema() + ); + + $this->destination = new MigrateDestinationNode('organization'); + $this->addFieldMapping('id', 'uuid'); + $this->addFieldMapping('uuid', 'id'); + $this->addFieldMapping('title', 'title'); + $this->addFieldMapping('body', 'description'); + $this->addFieldMapping('created', 'created'); + $this->addFieldMapping('changed', 'revision_timestamp'); + $this->addFieldMapping('path', 'name'); + $this->addFieldMapping('field_image', 'image_url'); + } + + /** + * Creates list of fields for CKAN Dataset. + */ + public function getCkanOrganizationFields() { + return array( + "title" => "Title", + "created" => "Created", + "description" => "Description", + "revision_timestamp" => "Changed", + "name" => "Path", + "image_url" => "Image URL", + ); + } +} From ae106a47ac86470ab68d4abeaa8079c94505b54d Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Sun, 3 May 2015 15:48:18 +0300 Subject: [PATCH 02/29] add support for organizations in datasets --- dkan_migrate_base.migrate.inc | 6 ++ dkan_migrate_base_dataset.inc | 129 ++++++++++++++++++---------------- 2 files changed, 74 insertions(+), 61 deletions(-) diff --git a/dkan_migrate_base.migrate.inc b/dkan_migrate_base.migrate.inc index d32893b..e0bbc13 100644 --- a/dkan_migrate_base.migrate.inc +++ b/dkan_migrate_base.migrate.inc @@ -263,6 +263,12 @@ abstract class MigrateDKAN extends Migration { } } + public function getOrganizationId($uuid) { + if ($nid = entity_get_id_by_uuid('node', array($uuid))) { + return $nid[$uuid]; + } + } + /** * Looks up user if they exist, if not creates them. * diff --git a/dkan_migrate_base_dataset.inc b/dkan_migrate_base_dataset.inc index ca871bf..4a23c91 100644 --- a/dkan_migrate_base_dataset.inc +++ b/dkan_migrate_base_dataset.inc @@ -16,11 +16,11 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { parent::__construct($arguments); $fields = $this->getCkanDatasetFields(); - $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'package_list'; - $list_url = $this->endpoint . $list_url; - $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'package_show?id=:id'; - $item_url = $this->endpoint . $item_url; - $this->page = isset($arguments['page']) ? $arguments['page'] : ''; + $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'package_list'; + $list_url = $this->endpoint . $list_url; + $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'package_show?id=:id'; + $item_url = $this->endpoint . $item_url; + $this->page = isset($arguments['page']) ? $arguments['page'] : ''; $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; $this->highwaterField = array( @@ -29,24 +29,25 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $this->source = new MigrateSourceList(new CKANListJSON( $list_url, - array('page' => $this->page, - 'offset' => $this->offset, + array( + 'page' => $this->page, + 'offset' => $this->offset, ) ), - new CKANItemJSON($item_url, $fields), $fields); + new CKANItemJSON($item_url, $fields), $fields); $this->map = new MigrateSQLMap( - $this->machineName, - array( - 'uuid' => array( - 'type' => 'varchar', - 'length' => 255, - 'not null' => TRUE, - 'description' => 'id', - ), - ), - MigrateDestinationNode::getKeySchema() - ); + $this->machineName, + array( + 'uuid' => array( + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'description' => 'id', + ), + ), + MigrateDestinationNode::getKeySchema() + ); $this->destination = new MigrateDestinationNode('dataset', array('text_format' => 'html')); @@ -67,6 +68,7 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $this->addFieldMapping('field_tags', 'tag_names'); $this->addFieldMapping('field_additional_info', 'field_additional_info_key'); $this->addFieldMapping('field_additional_info:second', 'field_additional_info_value'); + $this->addFieldMapping('field_organization_ref', 'organization_id'); } /** @@ -74,42 +76,43 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { */ public function getCkanDatasetFields() { return array( - "license_title" => "License Title", - "maintainer" => "Maintainer", - "relationships_as_object" => "", - "private" => "Private", - "maintainer_email" => "", - "revision_timestamp" => "Revision Date", - "id" => "UUID", - "metadata_created" => "Created Date", - "metadata_modified" => "Modified Date", - "author" => "Author", - "author_email" => "Author Email", - "state" => "State", - "version" => "Version", - "creator_user_id" => "Author UUID", - "type" => "Node Type", - "resources" => "Resources", - "num_resources" => "Number of Resources", - "tag_names" => "Tags", - "tracking_summary" => "Tracking Summary", - "groups" => "Groups", - "license_id" => "Licence ID", + "license_title" => "License Title", + "maintainer" => "Maintainer", + "relationships_as_object" => "", + "private" => "Private", + "maintainer_email" => "", + "revision_timestamp" => "Revision Date", + "id" => "UUID", + "metadata_created" => "Created Date", + "metadata_modified" => "Modified Date", + "author" => "Author", + "author_email" => "Author Email", + "state" => "State", + "version" => "Version", + "creator_user_id" => "Author UUID", + "type" => "Node Type", + "resources" => "Resources", + "num_resources" => "Number of Resources", + "tag_names" => "Tags", + "tracking_summary" => "Tracking Summary", + "groups" => "Groups", + "license_id" => "Licence ID", "relationships_as_subject" => "", - "num_tags" => "Number of Tags", - "organization" => "Organization", - "name" => "Name slug", - "isopen" => "Is Open (bollean)", - "url" => "URL", - "notes" => "Description", - "owner_org" => "Owner Organization", - "extras" => "Extras", - "title" => "Title", - "revision_id" => "Revision ID", + "num_tags" => "Number of Tags", + "organization" => "Organization", + "name" => "Name slug", + "isopen" => "Is Open (bollean)", + "url" => "URL", + "notes" => "Description", + "owner_org" => "Owner Organization", + "extras" => "Extras", + "title" => "Title", + "revision_id" => "Revision ID", // This doesn't actually exist but we are adding it later in prepareRow. - "uid" => "User ID", - "resource_ids" => "Resource IDS", - "group_ids" => "Group IDS", + "uid" => "User ID", + "resource_ids" => "Resource IDS", + "group_ids" => "Group IDS", + "organization_id" => "Organization ID", ); } @@ -117,8 +120,8 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { * Implements prepareRow. */ public function prepareRow($row) { - $row->uid = $this->getUser($row->creator_user_id); - $row->name = 'dataset/' . $row->name; + $row->uid = $this->getUser($row->creator_user_id); + $row->name = 'dataset/' . $row->name; $row->resource_ids = array(); if (isset($row->resources)) { @@ -141,10 +144,14 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { } } + if (isset($row->organization)) { + $row->organization_id = $this->getOrganizationId($row->organization->id); + } + // Get unix timestamp values for dates. - $row->metadata_created = $this->StringToTime($row->metadata_created); + $row->metadata_created = $this->StringToTime($row->metadata_created); $row->metadata_modified = $this->StringToTime($row->metadata_modified); - $row->spatialText = NULL; + $row->spatialText = NULL; if (isset($row->extras)) { foreach ($row->extras as $extra) { if ($extra->key == 'spatial-text') { @@ -154,7 +161,7 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $row->spatial = $extra->value; } else { - $row->field_additional_info_key[] = $extra->key; + $row->field_additional_info_key[] = $extra->key; $row->field_additional_info_value[] = $extra->value; } } @@ -168,11 +175,11 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $node->uuid = $row->id; // Turn off pathauto for import and set path. $node->path['pathauto'] = FALSE; - $node->path['alias'] = $row->name; + $node->path['alias'] = $row->name; // Converts geojson spatial for saving field. if (isset($node->field_spatial[$node->language][0]['wkt']) && $spatial = $node->field_spatial[$node->language][0]['wkt']) { - $geophp = geophp_load(); - $geometry = geoPHP::load($spatial, 'json'); + $geophp = geophp_load(); + $geometry = geoPHP::load($spatial, 'json'); $node->field_spatial[$node->language][0] = geofield_get_values_from_geometry($geometry); } } From f10219786ef63b3448653dfc5eccf94869faafb7 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Mon, 4 May 2015 08:41:23 +0300 Subject: [PATCH 03/29] add support for HTTP Parallel requests --- dkan_migrate_base.info | 1 + dkan_migrate_base.migrate.inc | 3 ++ dkan_migrate_base.module | 75 ++++++++++++++++++++++++++++----- dkan_migrate_base_dataset.inc | 33 ++++++++++++++- dkan_migrate_base_resource.inc | 77 ++++++++++++++++++---------------- 5 files changed, 140 insertions(+), 49 deletions(-) diff --git a/dkan_migrate_base.info b/dkan_migrate_base.info index 8b2bf0b..d068a3d 100644 --- a/dkan_migrate_base.info +++ b/dkan_migrate_base.info @@ -6,6 +6,7 @@ dependencies[] = dkan_dataset dependencies[] = migrate (7.x-2.x-dev) dependencies[] = list dependencies[] = number +dependencies[] = httprl files[] = dkan_migrate_base.migrate.inc files[] = dkan_migrate_base_group.inc files[] = dkan_migrate_base_organization.inc diff --git a/dkan_migrate_base.migrate.inc b/dkan_migrate_base.migrate.inc index e0bbc13..0d293fa 100644 --- a/dkan_migrate_base.migrate.inc +++ b/dkan_migrate_base.migrate.inc @@ -263,6 +263,9 @@ abstract class MigrateDKAN extends Migration { } } + /** + * Looks up nid for organization. + */ public function getOrganizationId($uuid) { if ($nid = entity_get_id_by_uuid('node', array($uuid))) { return $nid[$uuid]; diff --git a/dkan_migrate_base.module b/dkan_migrate_base.module index 689c09e..0611fc7 100644 --- a/dkan_migrate_base.module +++ b/dkan_migrate_base.module @@ -37,20 +37,73 @@ function dkan_migrate_base_create_resource_list($endpoint, $file_name = 'public: function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { $package_list = $endpoint . 'package_list'; $resource_ids = array('help' => t('List of resource ids for %endpoint', array('endpoint' => $endpoint))); - $item_url = $endpoint . 'package_show?id=:id'; - $response = drupal_http_request($package_list); - $json = $response->data; - $data = drupal_json_decode($json); + $item_url = $endpoint . 'package_show?id=:id'; + $response = drupal_http_request($package_list); + $json = $response->data; + $data = drupal_json_decode($json); + + // Buffer for the URLs to query. + $urls = array(); + foreach ($data['result'] as $id) { $dataset_show = preg_replace(array_fill(0, count($id), '/:id/'), $id, $item_url, 1); - $dataset_response = drupal_http_request($dataset_show); - $dataset_json = $dataset_response->data; - $dataset_data = drupal_json_decode($dataset_json); - $resources = $dataset_data['result']['resources']; - foreach ($resources as $key => $resource) { - $resource_ids['result'][] = $resource['id']; - } + + // Queue up the request. + $urls[] = $dataset_show; } + + + // Prepare the results array, it will be passed in the httprl options as a reference, + // so that the callback function can add the return values. + $resource_ids['result'] = array(); + + // Set the httprl call options. + $options = array( +// 'async_connect' => FALSE, + 'domain_connections' => 25, + 'global_connections' => 50, + 'callback' => array( + array('function' => 'dkan_migrate_base_handle_response',), + &$resource_ids['result'] + ), + 'global_timeout' => 60, + 'connect_timeout' => 30, + 'dns_timeout' => 5, + ); + +// echo "Will retrieve " . sizeof($urls) . " URLs.\n"; + + while (0 < sizeof($urls)) { + httprl_request(array_splice($urls, 0, 1000), $options); + httprl_send_request(); + +// echo sizeof($urls) . ' '; + } + + // Save the results. +// echo "Got " . sizeof($resource_ids['result']) . " resource IDs.\n"; + file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); + return $resource_ids; } + +function dkan_migrate_base_handle_response($response, &$results) { + +// echo "."; + + // Check that the response code is OK (200). + if ((int) $response->code !== 200) { + echo "An error occurred [ code :: {$response->code} ]\n"; + return NULL; + } + + $dataset_json = $response->data; + $dataset_data = drupal_json_decode($dataset_json); + $resources = $dataset_data['result']['resources']; + foreach ($resources as $key => $resource) { + $results[] = $resource['id']; + } + + return NULL; +} diff --git a/dkan_migrate_base_dataset.inc b/dkan_migrate_base_dataset.inc index 4a23c91..1b5ba17 100644 --- a/dkan_migrate_base_dataset.inc +++ b/dkan_migrate_base_dataset.inc @@ -120,16 +120,29 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { * Implements prepareRow. */ public function prepareRow($row) { - $row->uid = $this->getUser($row->creator_user_id); + +// $start_time = time(); // TODO: REMOVE + +// $row->uid = $this->getUser($row->creator_user_id); // TODO: RESTORE $row->name = 'dataset/' . $row->name; $row->resource_ids = array(); +// $delta_time = time() - $start_time; // TODO: REMOVE +// echo "GetUser: $delta_time secs.\n"; // TODO: REMOVE + +// $start_time = time(); // TODO: REMOVE + if (isset($row->resources)) { foreach ($row->resources as $resource) { $row->resource_ids[] = $this->getResourceId($resource->id); } } +// $delta_time = time() - $start_time; // TODO: REMOVE +// echo "GetResourceId: $delta_time secs.\n"; // TODO: REMOVE + +// $start_time = time(); // TODO: REMOVE + $tags = taxonomy_vocabulary_machine_name_load('tags'); if (isset($row->tags)) { foreach ($row->tags as $tag) { @@ -138,16 +151,31 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { } } +// $delta_time = time() - $start_time; // TODO: REMOVE +// echo "Taxonomy: $delta_time secs.\n"; // TODO: REMOVE +// +// $start_time = time(); // TODO: REMOVE + if (isset($row->groups)) { foreach ($row->groups as $group) { $row->group_ids[] = $this->getGroupId($group->id); } } +// $delta_time = time() - $start_time; // TODO: REMOVE +// echo "GetGroupId: $delta_time secs.\n"; // TODO: REMOVE +// +// $start_time = time(); // TODO: REMOVE + if (isset($row->organization)) { $row->organization_id = $this->getOrganizationId($row->organization->id); } +// $delta_time = time() - $start_time; // TODO: REMOVE +// echo "GetOrganizationId: $delta_time secs.\n"; // TODO: REMOVE +// +// $start_time = time(); // TODO: REMOVE + // Get unix timestamp values for dates. $row->metadata_created = $this->StringToTime($row->metadata_created); $row->metadata_modified = $this->StringToTime($row->metadata_modified); @@ -166,6 +194,9 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { } } } + +// $delta_time = time() - $start_time; // TODO: REMOVE +// echo "Spatial: $delta_time secs.\n"; // TODO: REMOVE } /** diff --git a/dkan_migrate_base_resource.inc b/dkan_migrate_base_resource.inc index 9960761..590fffd 100644 --- a/dkan_migrate_base_resource.inc +++ b/dkan_migrate_base_resource.inc @@ -20,24 +20,24 @@ class MigrateCkanResourceBase extends MigrateCkanBase { ); $fields = array( - "name" => "Name", - "id" => "UUID", - "description" => "Description", - "format" => "Format", - "created" => "Created Date", - "last_modified" => "Modified Date", - "url" => "URL", - "revision_id" => "Revision ID", + "name" => "Name", + "id" => "UUID", + "description" => "Description", + "format" => "Format", + "created" => "Created Date", + "last_modified" => "Modified Date", + "url" => "URL", + "revision_id" => "Revision ID", // Not sure if this is just Mimetype. "mimetype_inner" => "Mimetype Inner", // This doesn't actually exist but we are adding it later in prepareRow. - "uid" => "User ID", - "file" => "file", + "uid" => "User ID", + "file" => "file", ); - $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'resource_list'; - $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'resource_show?id=:id'; - $this->page = isset($arguments['page']) ? $arguments['page'] : ''; + $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'resource_list'; + $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'resource_show?id=:id'; + $this->page = isset($arguments['page']) ? $arguments['page'] : ''; $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; if ($list_url == 'resource_list') { @@ -59,24 +59,25 @@ class MigrateCkanResourceBase extends MigrateCkanBase { $this->source = new MigrateSourceList(new CKANListJSON( $list_url, - array('page' => $this->page, - 'offset' => $this->offset, + array( + 'page' => $this->page, + 'offset' => $this->offset, ) ), - new CKANItemJSON($item_url, $fields), $fields); + new CKANItemJSON($item_url, $fields), $fields); $this->map = new MigrateSQLMap( - $this->machineName, - array( - 'uuid' => array( - 'type' => 'varchar', - 'length' => 255, - 'not null' => TRUE, - 'description' => 'id', - ), - ), - MigrateDestinationNode::getKeySchema() - ); + $this->machineName, + array( + 'uuid' => array( + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'description' => 'id', + ), + ), + MigrateDestinationNode::getKeySchema() + ); $this->destination = new MigrateDestinationNode('resource', array('text_format' => 'html')); $this->addFieldMapping('id', 'uuid'); @@ -98,30 +99,32 @@ class MigrateCkanResourceBase extends MigrateCkanBase { // TODO: // + Find way to get user name for creator of resource // + Improve preview for files stuck as links. - $row->created = $this->StringToTime($row->created); - $row->created = $this->StringToTime($row->last_modified); - $row->group_ids = array($row->resource_group_id); + $row->created = $this->StringToTime($row->created); + $row->created = $this->StringToTime($row->last_modified); + $row->group_ids = array($row->resource_group_id); $row->last_modified = $this->StringToTime($row->last_modified); $row->name = $row->name ? $row->name : $row->description; // Tax terms in Drupal are case sensitive. It is better to have a single // 'html' term instead of 'html' and 'HTML'. // TODO: move to hook_node_update in dkan_dataset. - $format = taxonomy_vocabulary_machine_name_load('format'); + $format = taxonomy_vocabulary_machine_name_load('format'); $row->format = strtolower($row->format); $this->createTax($row->format, 'format', $format->vid); // Decide which of DKAN's three fields is best for resource file. if ($row->url_type == 'upload' || $row->resource_type == 'file.upload' || $row->resource_type == 'file') { - $name = explode('/', $row->url); - $name = $name[count($name) - 1]; - $uri = 'public://' . $name; - $file = $this->downloadExternalFile($row->url, $uri); + $name = explode('/', $row->url); + $name = $name[count($name) - 1]; + $uri = 'public://' . $name; + + $file = $this->downloadExternalFile($row->url, $uri); + $row->file = $file['fid']; } else { // CKAN API doesn't make it clear if file is link to API or just file. - $field = field_info_instance('node', 'field_link_remote_file', 'resource'); + $field = field_info_instance('node', 'field_link_remote_file', 'resource'); $extensions = explode(' ', $field['settings']['file_extensions']); if (in_array($row->format, $extensions) && $row->format != 'html') { $row->file_remote_link = $row->url; @@ -137,7 +140,7 @@ class MigrateCkanResourceBase extends MigrateCkanBase { */ public function prepare($node, stdClass $row) { // UUID doesn't show up as a field so had to do this. - $node->uuid = $row->id; + $node->uuid = $row->id; $row->format = isset($row->format) && $row->format ? $row->format : 'data'; } } From d30ac37bce063396c439d484050d5c8c6944193f Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 12:40:52 +0300 Subject: [PATCH 04/29] ignore IDEA project files --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f11b75 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/ From 92dc66e56b2aa5949e43dbb19bfe866008dd206a Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 13:29:45 +0300 Subject: [PATCH 05/29] remove customizations from this files --- dkan_migrate_base.migrate.inc | 135 +++++++++++++++++++++------------- dkan_migrate_base_dataset.inc | 66 +++++++---------- 2 files changed, 108 insertions(+), 93 deletions(-) diff --git a/dkan_migrate_base.migrate.inc b/dkan_migrate_base.migrate.inc index 0d293fa..71cce48 100644 --- a/dkan_migrate_base.migrate.inc +++ b/dkan_migrate_base.migrate.inc @@ -11,30 +11,42 @@ function dkan_migrate_base_migrate_api() { $api = array( // Migrate API, not CKAN's of course. - 'api' => 2, - 'groups' => array( + 'api' => 2, + 'groups' => array( 'dkan' => array( 'title' => t('DKAN'), ), ), 'migrations' => array( - 'ckan_dataset_base' => array( + 'ckan_dataset_base' => array( 'class_name' => 'MigrateCkanDatasetBase', 'group_name' => 'dkan', - 'title' => t('CKAN Dataset Base'), + 'title' => t('CKAN Dataset Base'), ), - 'ckan_group_base' => array( + 'ckan_group_base' => array( 'class_name' => 'MigrateCkanGroupBase', 'group_name' => 'dkan', - 'title' => t('CKAN Group Base'), + 'title' => t('CKAN Group Base'), ), 'ckan_resource_base' => array( 'class_name' => 'MigrateCkanResourceBase', 'group_name' => 'dkan', - 'title' => t('CKAN Resource Base'), + 'title' => t('CKAN Resource Base'), ), ), ); + + if (FALSE === node_type_get_type('organization')) { + drupal_set_message(t("Organization content type not detected. MigrateCkanOrganizationBase migration not registered.")); + } + else { + $api['migrations']['ckan_organization_base'] = array( + 'class_name' => 'MigrateCkanOrganizationBase', + 'group_name' => 'dkan', + 'title' => t('CKAN Organization Base'), + ); + } + return $api; } @@ -45,27 +57,27 @@ class CKANListJSON extends MigrateListJSON { public function __construct($list_url, $http_options = array()) { parent::__construct($list_url); $this->httpOptions = $http_options; - $this->page = isset($http_options['page']) ? $http_options['page'] : ''; - $this->offset = isset($http_options['offset']) ? $http_options['offset'] : ''; + + // WAS: How many elements to retrieve. This parameter has been commented out, I couldn't find any documentation nor any reference of it in the code. + // $this->page = isset($http_options['page']) ? $http_options['page'] : ''; + + // The starting offset (by default start from the first element). + $this->offset = isset($http_options['offset']) && is_numeric($http_options['offset']) ? $http_options['offset'] : 0; + + // In order to support mtm (multi-threaded migrate) we need to get the *limit* parameter (NULL equals no limit). + $this->limit = isset($http_options['limit']) && is_numeric($http_options['limit']) ? $http_options['limit'] : NULL; } + /** * The default implementation assumes the IDs are top-level array elements. */ protected function getIDsFromJSON(array $data) { - $ids = array(); - $datasets = 0; - $total = $this->page + $this->offset; - foreach ($data['result'] as $item) { - if ($datasets < $this->offset) { - $datasets++; - continue; - } - $ids[] = $item; - $datasets++; - if ($total && $datasets >= $total) { - break; - } - } + + // Get the portion of results within the specified boundaries (starting from *offset*). + $ids = array_slice($data['result'], $this->offset, $this->limit); + + $this->log("returning " . sizeof($ids) . " items [ offset :: {$this->offset} ][ limit :: " . (isset($this->limit) ? $this->limit : 'not set') . " ]"); + return $ids; } @@ -83,7 +95,7 @@ class CKANListJSON extends MigrateListJSON { } else { $response = drupal_http_request($this->listUrl, $this->httpOptions); - $json = $response->data; + $json = $response->data; } migrate_instrument_stop("Retrieve $this->listUrl"); if ($json) { @@ -92,8 +104,7 @@ class CKANListJSON extends MigrateListJSON { return $this->getIDsFromJSON($data); } } - Migration::displayMessage(t('Loading of !listurl failed:', - array('!listurl' => $this->listUrl))); + Migration::displayMessage(t('Loading of !listurl failed:', array('!listurl' => $this->listUrl))); return NULL; } @@ -107,7 +118,7 @@ class CKANListJSON extends MigrateListJSON { } else { $response = drupal_http_request($this->listUrl, $this->httpOptions); - $json = $response->data; + $json = $response->data; } if ($json) { $data = drupal_json_decode($json); @@ -115,12 +126,24 @@ class CKANListJSON extends MigrateListJSON { $count = count($data['result']); } } - // Only return page number if that many actually exist. - if ($count > $this->page) { - $count = $this->page; + + // If a limit has been set and the count is larger then that, we return the limit. + if (isset($this->limit) && $count > $this->limit) { + $count = $this->limit; } return $count; } + + /** + * Log a message. + * + * @param $message + */ + private function log($message) { + + Migration::displayMessage($message); + + } } class CKANItemJSON extends MigrateItemJSON { @@ -135,6 +158,7 @@ class CKANItemJSON extends MigrateItemJSON { } return $ids; } + /** * Parses for 'results' instead of base. */ @@ -145,7 +169,7 @@ class CKANItemJSON extends MigrateItemJSON { } else { $response = drupal_http_request($this->listUrl, $this->httpOptions); - $json = $response->data; + $json = $response->data; } if ($json) { $data = drupal_json_decode($json); @@ -169,8 +193,9 @@ class CKANItemJSON extends MigrateItemJSON { } if ($json && isset($json->error) && $json->error->message == 'Access denied') { $migration = Migration::currentMigration(); - $message = t('Access denied for !objecturl', array('!objecturl' => $item_url)); - $migration->getMap()->saveMessage(array($id), $message, MigrationBase::MESSAGE_ERROR); + $message = t('Access denied for !objecturl', array('!objecturl' => $item_url)); + $migration->getMap() + ->saveMessage(array($id), $message, MigrationBase::MESSAGE_ERROR); $result = $this->emptyItem($id); return $result; } @@ -180,8 +205,9 @@ class CKANItemJSON extends MigrateItemJSON { return $json->result; } $migration = Migration::currentMigration(); - $message = t('Loading of !objecturl failed:', array('!objecturl' => $item_url)); - $migration->getMap()->saveMessage(array($id), $message, MigrationBase::MESSAGE_ERROR); + $message = t('Loading of !objecturl failed:', array('!objecturl' => $item_url)); + $migration->getMap() + ->saveMessage(array($id), $message, MigrationBase::MESSAGE_ERROR); return new stdClass(); } @@ -189,10 +215,10 @@ class CKANItemJSON extends MigrateItemJSON { * Creates a stub entry. */ public function emptyItem($id) { - $result = new stdClass(); - $result->id = substr($id, 0, 35); + $result = new stdClass(); + $result->id = substr($id, 0, 35); $result->title = t('Access denied for %id', array('%id' => $id)); - $result->name = $id; + $result->name = $id; return $result; } } @@ -210,10 +236,10 @@ abstract class MigrateDKAN extends Migration { return $term; } else { - $new_term = new stdClass(); + $new_term = new stdClass(); $new_term->name = $name; - $new_term->vid = $vid; - $term = taxonomy_term_save($new_term); + $new_term->vid = $vid; + $term = taxonomy_term_save($new_term); return $term; } } @@ -234,9 +260,12 @@ abstract class MigrateDKAN extends Migration { * Gets Group Nid by title if exists. */ public function getGroupNidByTitle($title) { - $type = 'group'; - $result = db_query("SELECT n.nid FROM {node} n WHERE n.title = :title AND n.type = :type", array(":title"=> $title, ":type"=> $type)); - $nid = $result->fetchField(); + $type = 'group'; + $result = db_query("SELECT n.nid FROM {node} n WHERE n.title = :title AND n.type = :type", array( + ":title" => $title, + ":type" => $type + )); + $nid = $result->fetchField(); if ($nid) { return $nid; } @@ -291,9 +320,9 @@ abstract class MigrateDKAN extends Migration { 'uuid' => $creator_user_id, ); // Get User name from CKAN API. - $response = drupal_http_request($this->endpoint . 'user_show?id=' . $creator_user_id); - $json = $response->data; - $data = drupal_json_decode($json); + $response = drupal_http_request($this->endpoint . 'user_show?id=' . $creator_user_id); + $json = $response->data; + $data = drupal_json_decode($json); if ($name = $data['result']['name']) { $current_uid = db_query("SELECT uid from {users} WHERE name = :name", array(":name" => $name))->fetchField(); if ($current_uid) { @@ -302,7 +331,7 @@ abstract class MigrateDKAN extends Migration { } else { $new_user['name'] = $name; - $account = user_save(NULL, $new_user); + $account = user_save(NULL, $new_user); return $account->uid; } } @@ -325,17 +354,17 @@ abstract class MigrateDKAN extends Migration { */ public function downloadExternalFile($url, $uri, $save_mode = FILE_EXISTS_RENAME, $manage_file = TRUE) { - $url_info = parse_url($url); + $url_info = parse_url($url); $url_path_info = pathinfo($url_info['path']); // This helps with filenames with spaces. - $url = $url_info['scheme'] . '://' . $url_info['host'] . $url_path_info['dirname'] . '/' . rawurlencode($url_path_info['basename']); + $url = $url_info['scheme'] . '://' . $url_info['host'] . $url_path_info['dirname'] . '/' . rawurlencode($url_path_info['basename']); // Need to remove the filename from the uri. - $uri_target = file_uri_target($uri); - $uri_scheme = file_uri_scheme($uri); + $uri_target = file_uri_target($uri); + $uri_scheme = file_uri_scheme($uri); $uri_path_info = pathinfo($uri_target); - $directory = file_stream_wrapper_uri_normalize($uri_scheme . "://" . $uri_path_info['dirname']); + $directory = file_stream_wrapper_uri_normalize($uri_scheme . "://" . $uri_path_info['dirname']); if (file_prepare_directory($directory, FILE_CREATE_DIRECTORY)) { $drupal_result = drupal_http_request($url); diff --git a/dkan_migrate_base_dataset.inc b/dkan_migrate_base_dataset.inc index 1b5ba17..7d0cad0 100644 --- a/dkan_migrate_base_dataset.inc +++ b/dkan_migrate_base_dataset.inc @@ -16,11 +16,16 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { parent::__construct($arguments); $fields = $this->getCkanDatasetFields(); - $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'package_list'; - $list_url = $this->endpoint . $list_url; - $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'package_show?id=:id'; - $item_url = $this->endpoint . $item_url; - $this->page = isset($arguments['page']) ? $arguments['page'] : ''; + $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'package_list'; + $list_url = $this->endpoint . $list_url; + $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'package_show?id=:id'; + $item_url = $this->endpoint . $item_url; + + // Get the shared key for multi-threaded imports, or use the machine name. + $this->shared_key = isset($arguments['shared_key']) ? $arguments['shared_key'] : $this->machineName; + + // The following are provided by the multi-threaded environment. + $this->limit = isset($arguments['limit']) ? $arguments['limit'] : ''; $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; $this->highwaterField = array( @@ -30,14 +35,14 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $this->source = new MigrateSourceList(new CKANListJSON( $list_url, array( - 'page' => $this->page, + 'limit' => $this->limit, 'offset' => $this->offset, ) ), new CKANItemJSON($item_url, $fields), $fields); $this->map = new MigrateSQLMap( - $this->machineName, + $this->shared_key, // The shared key is provided by superclasses for compatibility with multi-threading, or is set to the machine name. array( 'uuid' => array( 'type' => 'varchar', @@ -51,7 +56,6 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $this->destination = new MigrateDestinationNode('dataset', array('text_format' => 'html')); - $this->addFieldMapping('title', 'title'); $this->addFieldMapping('field_license', 'license_title'); $this->addFieldMapping('created', 'metadata_created'); $this->addFieldMapping('changed', 'metadata_modified'); @@ -69,6 +73,9 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $this->addFieldMapping('field_additional_info', 'field_additional_info_key'); $this->addFieldMapping('field_additional_info:second', 'field_additional_info_value'); $this->addFieldMapping('field_organization_ref', 'organization_id'); + + // The title_255 property represents the dataset title, truncated to fit in 255 characters (a hard limit on the title field). + $this->addFieldMapping('title', 'title_255'); } /** @@ -107,6 +114,7 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { "owner_org" => "Owner Organization", "extras" => "Extras", "title" => "Title", + "title_255" => "Title (truncated to 255 characters)", "revision_id" => "Revision ID", // This doesn't actually exist but we are adding it later in prepareRow. "uid" => "User ID", @@ -121,16 +129,16 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { */ public function prepareRow($row) { -// $start_time = time(); // TODO: REMOVE + // Adding instrumentation here as this call appears to take 7 secs. + migrate_instrument_start("Get user [ name :: $row->name ][ creator_user_id :: $row->creator_user_id ]"); + $row->uid = $this->getUser($row->creator_user_id); + migrate_instrument_stop("Get user [ name :: $row->name ][ creator_user_id :: $row->creator_user_id ]"); -// $row->uid = $this->getUser($row->creator_user_id); // TODO: RESTORE $row->name = 'dataset/' . $row->name; $row->resource_ids = array(); -// $delta_time = time() - $start_time; // TODO: REMOVE -// echo "GetUser: $delta_time secs.\n"; // TODO: REMOVE - -// $start_time = time(); // TODO: REMOVE + // Create a truncated version of the title that can fit Drupal fields max 255 characters. + $row->title_255 = substr($row->title, 0, 255); if (isset($row->resources)) { foreach ($row->resources as $resource) { @@ -138,12 +146,9 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { } } -// $delta_time = time() - $start_time; // TODO: REMOVE -// echo "GetResourceId: $delta_time secs.\n"; // TODO: REMOVE - -// $start_time = time(); // TODO: REMOVE - - $tags = taxonomy_vocabulary_machine_name_load('tags'); + // Initialize the tag names array. + $row->tag_names = array(); + $tags = taxonomy_vocabulary_machine_name_load('tags'); if (isset($row->tags)) { foreach ($row->tags as $tag) { $this->createTax($tag->name, 'tags', $tags->vid); @@ -151,31 +156,16 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { } } -// $delta_time = time() - $start_time; // TODO: REMOVE -// echo "Taxonomy: $delta_time secs.\n"; // TODO: REMOVE -// -// $start_time = time(); // TODO: REMOVE - if (isset($row->groups)) { foreach ($row->groups as $group) { $row->group_ids[] = $this->getGroupId($group->id); } } -// $delta_time = time() - $start_time; // TODO: REMOVE -// echo "GetGroupId: $delta_time secs.\n"; // TODO: REMOVE -// -// $start_time = time(); // TODO: REMOVE - if (isset($row->organization)) { $row->organization_id = $this->getOrganizationId($row->organization->id); } -// $delta_time = time() - $start_time; // TODO: REMOVE -// echo "GetOrganizationId: $delta_time secs.\n"; // TODO: REMOVE -// -// $start_time = time(); // TODO: REMOVE - // Get unix timestamp values for dates. $row->metadata_created = $this->StringToTime($row->metadata_created); $row->metadata_modified = $this->StringToTime($row->metadata_modified); @@ -189,14 +179,10 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $row->spatial = $extra->value; } else { - $row->field_additional_info_key[] = $extra->key; - $row->field_additional_info_value[] = $extra->value; + $row->field_additional_info_key[] = $extra->key; } } } - -// $delta_time = time() - $start_time; // TODO: REMOVE -// echo "Spatial: $delta_time secs.\n"; // TODO: REMOVE } /** From b92851db998ef0167b65021b6824d182dc7b6121 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 13:40:04 +0300 Subject: [PATCH 06/29] remove httprl calls --- dkan_migrate_base.module | 75 ++++++---------------------------------- 1 file changed, 11 insertions(+), 64 deletions(-) diff --git a/dkan_migrate_base.module b/dkan_migrate_base.module index 0611fc7..689c09e 100644 --- a/dkan_migrate_base.module +++ b/dkan_migrate_base.module @@ -37,73 +37,20 @@ function dkan_migrate_base_create_resource_list($endpoint, $file_name = 'public: function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { $package_list = $endpoint . 'package_list'; $resource_ids = array('help' => t('List of resource ids for %endpoint', array('endpoint' => $endpoint))); - $item_url = $endpoint . 'package_show?id=:id'; - $response = drupal_http_request($package_list); - $json = $response->data; - $data = drupal_json_decode($json); - - // Buffer for the URLs to query. - $urls = array(); - + $item_url = $endpoint . 'package_show?id=:id'; + $response = drupal_http_request($package_list); + $json = $response->data; + $data = drupal_json_decode($json); foreach ($data['result'] as $id) { $dataset_show = preg_replace(array_fill(0, count($id), '/:id/'), $id, $item_url, 1); - - // Queue up the request. - $urls[] = $dataset_show; + $dataset_response = drupal_http_request($dataset_show); + $dataset_json = $dataset_response->data; + $dataset_data = drupal_json_decode($dataset_json); + $resources = $dataset_data['result']['resources']; + foreach ($resources as $key => $resource) { + $resource_ids['result'][] = $resource['id']; + } } - - - // Prepare the results array, it will be passed in the httprl options as a reference, - // so that the callback function can add the return values. - $resource_ids['result'] = array(); - - // Set the httprl call options. - $options = array( -// 'async_connect' => FALSE, - 'domain_connections' => 25, - 'global_connections' => 50, - 'callback' => array( - array('function' => 'dkan_migrate_base_handle_response',), - &$resource_ids['result'] - ), - 'global_timeout' => 60, - 'connect_timeout' => 30, - 'dns_timeout' => 5, - ); - -// echo "Will retrieve " . sizeof($urls) . " URLs.\n"; - - while (0 < sizeof($urls)) { - httprl_request(array_splice($urls, 0, 1000), $options); - httprl_send_request(); - -// echo sizeof($urls) . ' '; - } - - // Save the results. -// echo "Got " . sizeof($resource_ids['result']) . " resource IDs.\n"; - file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); - return $resource_ids; } - -function dkan_migrate_base_handle_response($response, &$results) { - -// echo "."; - - // Check that the response code is OK (200). - if ((int) $response->code !== 200) { - echo "An error occurred [ code :: {$response->code} ]\n"; - return NULL; - } - - $dataset_json = $response->data; - $dataset_data = drupal_json_decode($dataset_json); - $resources = $dataset_data['result']['resources']; - foreach ($resources as $key => $resource) { - $results[] = $resource['id']; - } - - return NULL; -} From 54f3fd885f3af37c559c2cd5aa1623d91d69733f Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 13:40:17 +0300 Subject: [PATCH 07/29] remove httprl dependency --- dkan_migrate_base.info | 1 - 1 file changed, 1 deletion(-) diff --git a/dkan_migrate_base.info b/dkan_migrate_base.info index d068a3d..8b2bf0b 100644 --- a/dkan_migrate_base.info +++ b/dkan_migrate_base.info @@ -6,7 +6,6 @@ dependencies[] = dkan_dataset dependencies[] = migrate (7.x-2.x-dev) dependencies[] = list dependencies[] = number -dependencies[] = httprl files[] = dkan_migrate_base.migrate.inc files[] = dkan_migrate_base_group.inc files[] = dkan_migrate_base_organization.inc From edadb5897caea710609f257822a8a57efd21c6a0 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 16:01:28 +0300 Subject: [PATCH 08/29] require the httprl library for parallel HTTP reqs --- dkan_migrate_base.info | 1 + 1 file changed, 1 insertion(+) diff --git a/dkan_migrate_base.info b/dkan_migrate_base.info index 8b2bf0b..d068a3d 100644 --- a/dkan_migrate_base.info +++ b/dkan_migrate_base.info @@ -6,6 +6,7 @@ dependencies[] = dkan_dataset dependencies[] = migrate (7.x-2.x-dev) dependencies[] = list dependencies[] = number +dependencies[] = httprl files[] = dkan_migrate_base.migrate.inc files[] = dkan_migrate_base_group.inc files[] = dkan_migrate_base_organization.inc From d7387a478cca829e388b621fe96c0fb3919cf054 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 17:43:34 +0300 Subject: [PATCH 09/29] add support for httprl --- dkan_migrate_base.module | 120 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 8 deletions(-) diff --git a/dkan_migrate_base.module b/dkan_migrate_base.module index 689c09e..081d1dd 100644 --- a/dkan_migrate_base.module +++ b/dkan_migrate_base.module @@ -23,7 +23,13 @@ function dkan_migrate_base_migrations_disable() { function dkan_migrate_base_create_resource_list($endpoint, $file_name = 'public://ckan-migrate-resource_list', $update = FALSE) { // Update if requested or the file doesn't exist. if ($update || (!$update && !file_exists($file_name))) { + + // Uncomment this to profile the request: $start_time = time(); echo "dkan_migrate_base_create_resource_list_items [ endpoint :: $endpoint ] ...\n"; + $resource_ids = dkan_migrate_base_create_resource_list_items($endpoint, $file_name); + + // Uncomment this to profile the request: echo "dkan_migrate_base_create_resource_list_items " . time() - $start_time . " secs\n"; + file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); } } @@ -37,16 +43,68 @@ function dkan_migrate_base_create_resource_list($endpoint, $file_name = 'public: function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { $package_list = $endpoint . 'package_list'; $resource_ids = array('help' => t('List of resource ids for %endpoint', array('endpoint' => $endpoint))); - $item_url = $endpoint . 'package_show?id=:id'; - $response = drupal_http_request($package_list); - $json = $response->data; - $data = drupal_json_decode($json); + $item_url = $endpoint . 'package_show?id=:id'; + $response = drupal_http_request($package_list); + $json = $response->data; + $data = drupal_json_decode($json); + + // Buffer for the URLs to query. + $urls = array_map(function ($id) use ($item_url) { + return preg_replace(array_fill(0, count($id), '/:id/'), $id, $item_url, 1); + }, $data['result']); + + // Prepare the results array, it will be passed in the httprl options as a reference, + // so that the callback function can add the return values. + $resource_ids['result'] = array(); + + // Set the httprl call options. + $options = array( + 'domain_connections' => 100, + 'global_connections' => 100, + 'callback' => array( + array('function' => 'dkan_migrate_base_handle_response',), + &$resource_ids['result'] + ), + 'global_timeout' => 60, + 'connect_timeout' => 30, + 'dns_timeout' => 5, + ); + + dkan_migrate_base_log("Parsing " . sizeof($urls) . " URLs."); + + // Queue requests while there are URLs. + while (0 < sizeof($urls)) { + httprl_request(array_splice($urls, 0, 500), $options); // Process URLs with batch of 1.000 URLs. + httprl_send_request(); + + dkan_migrate_base_log(sizeof($urls) . " URLs remaining."); + } + + dkan_migrate_base_log(sizeof($resource_ids['result']) . " resources retrieved."); + + file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); + return $resource_ids; +} + +/** + * Restrieves resource list from CKAN site. + * + * @return array + * List of resource uuids. + */ +function dkan_migrate_base_create_resource_list_items_sync($endpoint, $file_name) { + $package_list = $endpoint . 'package_list'; + $resource_ids = array('help' => t('List of resource ids for %endpoint', array('endpoint' => $endpoint))); + $item_url = $endpoint . 'package_show?id=:id'; + $response = drupal_http_request($package_list); + $json = $response->data; + $data = drupal_json_decode($json); foreach ($data['result'] as $id) { - $dataset_show = preg_replace(array_fill(0, count($id), '/:id/'), $id, $item_url, 1); + $dataset_show = preg_replace(array_fill(0, count($id), '/:id/'), $id, $item_url, 1); $dataset_response = drupal_http_request($dataset_show); - $dataset_json = $dataset_response->data; - $dataset_data = drupal_json_decode($dataset_json); - $resources = $dataset_data['result']['resources']; + $dataset_json = $dataset_response->data; + $dataset_data = drupal_json_decode($dataset_json); + $resources = $dataset_data['result']['resources']; foreach ($resources as $key => $resource) { $resource_ids['result'][] = $resource['id']; } @@ -54,3 +112,49 @@ function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); return $resource_ids; } + + +/** + * Receive a completed response and the current results array. + * + * @param object $response A completed request from httprl holding the response data. + * @param array $results An array of results with the resources IDs. + */ +function dkan_migrate_base_handle_response($response, &$results) { + + // Check that the response code is OK (200). + if ((int) $response->code !== 200) { + + // TODO: retry. + + dkan_migrate_base_log("An error occurred [ url :: $response->url ][ code :: $response->code ].", 'warning'); + return; + } + + // If the response data is not set, log a warning and return. + if (!isset($response->data)) { + dkan_migrate_base_log("Received an empty response.", 'warning'); + return; + } + + $dataset_json = $response->data; + $dataset_data = drupal_json_decode($dataset_json); + + // If the response data doesn't include the required data, log a warning and return. + if (!isset($dataset_data['result']['resources'])) { + dkan_migrate_base_log("Received an invalid response.", MigrationBase::MESSAGE_WARNING); + return; + } + + foreach ($dataset_data['result']['resources'] as $key => $resource) { + $results[] = $resource['id']; + } +} + +function dkan_migrate_base_log($message, $type = 'status') { + + if (function_exists('drush_log')) { + drush_log($message, $type); + } + +} From b0a9ea208bdc5efb931dcc83d6739b87a0647c2e Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 17:44:40 +0300 Subject: [PATCH 10/29] add support for multi-threaded drush (using the *limit* parameter; the *page* parameter has been removed) --- dkan_migrate_base_organization.inc | 48 +++++++++++++++++------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/dkan_migrate_base_organization.inc b/dkan_migrate_base_organization.inc index 8c4fa31..3d8527b 100644 --- a/dkan_migrate_base_organization.inc +++ b/dkan_migrate_base_organization.inc @@ -14,32 +14,38 @@ class MigrateCkanOrganizationBase extends MigrateCkanBase { */ public function __construct($arguments) { parent::__construct($arguments); - $fields = $this->getCkanOrganizationFields(); + $fields = $this->getCkanOrganizationFields(); $list_url = $this->endpoint . 'organization_list'; $item_url = $this->endpoint . 'organization_show?id=:id'; - $this->page = isset($arguments['page']) ? $arguments['page'] : ''; + + // Get the shared key for multi-threaded imports, or use the machine name. + $this->shared_key = isset($arguments['shared_key']) ? $arguments['shared_key'] : $this->machineName; + + // The following are provided by the multi-threaded environment. + $this->limit = isset($arguments['limit']) ? $arguments['limit'] : ''; $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; $this->source = new MigrateSourceList(new CKANListJSON( $list_url, - array('page' => $this->page, - 'offset' => $this->offset, + array( + 'limit' => $this->limit, + 'offset' => $this->offset, ) ), - new CKANItemJSON($item_url, $fields), $fields); + new CKANItemJSON($item_url, $fields), $fields); $this->map = new MigrateSQLMap( - $this->machineName, - array( - 'uuid' => array( - 'type' => 'varchar', - 'length' => 255, - 'not null' => TRUE, - 'description' => 'id', - ), - ), - MigrateDestinationNode::getKeySchema() - ); + $this->shared_key, + array( + 'uuid' => array( + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'description' => 'id', + ), + ), + MigrateDestinationNode::getKeySchema() + ); $this->destination = new MigrateDestinationNode('organization'); $this->addFieldMapping('id', 'uuid'); @@ -57,12 +63,12 @@ class MigrateCkanOrganizationBase extends MigrateCkanBase { */ public function getCkanOrganizationFields() { return array( - "title" => "Title", - "created" => "Created", - "description" => "Description", + "title" => "Title", + "created" => "Created", + "description" => "Description", "revision_timestamp" => "Changed", - "name" => "Path", - "image_url" => "Image URL", + "name" => "Path", + "image_url" => "Image URL", ); } } From b62ee294d88b92cfd392925bed0079daff56bc27 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 17:45:11 +0300 Subject: [PATCH 11/29] add support for multi-threaded drush (using the *limit* parameter; the *page* parameter has been removed) --- dkan_migrate_base_resource.inc | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/dkan_migrate_base_resource.inc b/dkan_migrate_base_resource.inc index 590fffd..1e50725 100644 --- a/dkan_migrate_base_resource.inc +++ b/dkan_migrate_base_resource.inc @@ -37,7 +37,12 @@ class MigrateCkanResourceBase extends MigrateCkanBase { $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'resource_list'; $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'resource_show?id=:id'; - $this->page = isset($arguments['page']) ? $arguments['page'] : ''; + + // Get the shared key for multi-threaded imports, or use the machine name. + $this->shared_key = isset($arguments['shared_key']) ? $arguments['shared_key'] : $this->machineName; + + // The following are provided by the multi-threaded environment. + $this->limit = isset($arguments['limit']) ? $arguments['limit'] : ''; $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; if ($list_url == 'resource_list') { @@ -60,14 +65,14 @@ class MigrateCkanResourceBase extends MigrateCkanBase { $this->source = new MigrateSourceList(new CKANListJSON( $list_url, array( - 'page' => $this->page, + 'limit' => $this->limit, 'offset' => $this->offset, ) ), new CKANItemJSON($item_url, $fields), $fields); $this->map = new MigrateSQLMap( - $this->machineName, + $this->shared_key, array( 'uuid' => array( 'type' => 'varchar', @@ -105,6 +110,9 @@ class MigrateCkanResourceBase extends MigrateCkanBase { $row->last_modified = $this->StringToTime($row->last_modified); $row->name = $row->name ? $row->name : $row->description; + + migrate_instrument_start("taxonomy_vocabulary_machine_name_load"); + // Tax terms in Drupal are case sensitive. It is better to have a single // 'html' term instead of 'html' and 'HTML'. // TODO: move to hook_node_update in dkan_dataset. @@ -112,14 +120,20 @@ class MigrateCkanResourceBase extends MigrateCkanBase { $row->format = strtolower($row->format); $this->createTax($row->format, 'format', $format->vid); + migrate_instrument_start("taxonomy_vocabulary_machine_name_load"); + // Decide which of DKAN's three fields is best for resource file. if ($row->url_type == 'upload' || $row->resource_type == 'file.upload' || $row->resource_type == 'file') { $name = explode('/', $row->url); $name = $name[count($name) - 1]; $uri = 'public://' . $name; + migrate_instrument_start("downloadExternalFile [ url :: $row->url ]"); + $file = $this->downloadExternalFile($row->url, $uri); + migrate_instrument_stop("downloadExternalFile [ url :: $row->url ]"); + $row->file = $file['fid']; } else { From f28b7b75fa9c5fdd7376bdccfa9cbec59c944c85 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 7 May 2015 18:18:05 +0300 Subject: [PATCH 12/29] move the list of fields to a separate function (like the other classes) to enable superclasses to override it --- dkan_migrate_base_resource.inc | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/dkan_migrate_base_resource.inc b/dkan_migrate_base_resource.inc index 1e50725..3e0c955 100644 --- a/dkan_migrate_base_resource.inc +++ b/dkan_migrate_base_resource.inc @@ -15,26 +15,13 @@ class MigrateCkanResourceBase extends MigrateCkanBase { public function __construct($arguments) { parent::__construct($arguments); + // Allow superclass to override the list of fields. + $fields = $this->getCkanResourceFields(); + $this->highwaterField = array( 'name' => 'revision_timestamp', ); - $fields = array( - "name" => "Name", - "id" => "UUID", - "description" => "Description", - "format" => "Format", - "created" => "Created Date", - "last_modified" => "Modified Date", - "url" => "URL", - "revision_id" => "Revision ID", - // Not sure if this is just Mimetype. - "mimetype_inner" => "Mimetype Inner", - // This doesn't actually exist but we are adding it later in prepareRow. - "uid" => "User ID", - "file" => "file", - ); - $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'resource_list'; $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'resource_show?id=:id'; @@ -157,4 +144,22 @@ class MigrateCkanResourceBase extends MigrateCkanBase { $node->uuid = $row->id; $row->format = isset($row->format) && $row->format ? $row->format : 'data'; } + + public function getCkanResourceFields() { + return array( + "name" => "Name", + "id" => "UUID", + "description" => "Description", + "format" => "Format", + "created" => "Created Date", + "last_modified" => "Modified Date", + "url" => "URL", + "revision_id" => "Revision ID", + // Not sure if this is just Mimetype. + "mimetype_inner" => "Mimetype Inner", + // This doesn't actually exist but we are adding it later in prepareRow. + "uid" => "User ID", + "file" => "file", + ); + } } From cb9f8f84befe5d9e985fe39c5f17cc3dc5a43215 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Fri, 8 May 2015 09:32:20 +0300 Subject: [PATCH 13/29] remove httprl dependency to maintain compatibility with environments that can't support it --- dkan_migrate_base.info | 1 - 1 file changed, 1 deletion(-) diff --git a/dkan_migrate_base.info b/dkan_migrate_base.info index d068a3d..8b2bf0b 100644 --- a/dkan_migrate_base.info +++ b/dkan_migrate_base.info @@ -6,7 +6,6 @@ dependencies[] = dkan_dataset dependencies[] = migrate (7.x-2.x-dev) dependencies[] = list dependencies[] = number -dependencies[] = httprl files[] = dkan_migrate_base.migrate.inc files[] = dkan_migrate_base_group.inc files[] = dkan_migrate_base_organization.inc From 3d97ee823c20f1915cdd1cf90e8a91daaa351f8d Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Fri, 8 May 2015 09:33:12 +0300 Subject: [PATCH 14/29] when uninstalling, deregister the organization migration; use httprl only if available --- dkan_migrate_base.module | 60 ++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/dkan_migrate_base.module b/dkan_migrate_base.module index 081d1dd..cb800c9 100644 --- a/dkan_migrate_base.module +++ b/dkan_migrate_base.module @@ -12,6 +12,11 @@ function dkan_migrate_base_migrations_disable() { Migration::deregisterMigration('ckan_dataset_base'); Migration::deregisterMigration('ckan_group_base'); Migration::deregisterMigration('ckan_resource_base'); + + // Deregister the Organization migration if registered. + if (MigrationBase::getInstance('ckan_organization_base')) { + Migration::deregisterMigration('ckan_organization_base'); + } } /** @@ -41,6 +46,8 @@ function dkan_migrate_base_create_resource_list($endpoint, $file_name = 'public: * List of resource uuids. */ function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { + + // Get the list of packages (datasets). $package_list = $endpoint . 'package_list'; $resource_ids = array('help' => t('List of resource ids for %endpoint', array('endpoint' => $endpoint))); $item_url = $endpoint . 'package_show?id=:id'; @@ -48,6 +55,27 @@ function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { $json = $response->data; $data = drupal_json_decode($json); + // Use httprl_request if available. + $resource_ids['result'] = (function_exists('httprl_request') + ? dkan_migrate_base_create_resource_list_items_httprl($item_url, $data) + : dkan_migrate_base_create_resource_list_items_std($item_url, $data)); + + // Store the results to a local file. + file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); + + return $resource_ids; +} + +/** + * Retrieve the list of resources from CKAN using httprl. + * + * @param string $item_url The URL to the single dataset (with the :id placeholder). + * @param array $data The list of datasets. + * + * @return array The list of resources IDs. + */ +function dkan_migrate_base_create_resource_list_items_httprl($item_url, $data) { + // Buffer for the URLs to query. $urls = array_map(function ($id) use ($item_url) { return preg_replace(array_fill(0, count($id), '/:id/'), $id, $item_url, 1); @@ -55,7 +83,7 @@ function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { // Prepare the results array, it will be passed in the httprl options as a reference, // so that the callback function can add the return values. - $resource_ids['result'] = array(); + $results = array(); // Set the httprl call options. $options = array( @@ -63,7 +91,7 @@ function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { 'global_connections' => 100, 'callback' => array( array('function' => 'dkan_migrate_base_handle_response',), - &$resource_ids['result'] + &$results ), 'global_timeout' => 60, 'connect_timeout' => 30, @@ -80,25 +108,23 @@ function dkan_migrate_base_create_resource_list_items($endpoint, $file_name) { dkan_migrate_base_log(sizeof($urls) . " URLs remaining."); } - dkan_migrate_base_log(sizeof($resource_ids['result']) . " resources retrieved."); + dkan_migrate_base_log(sizeof($results) . " resources retrieved."); - file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); - return $resource_ids; + return $results; } /** * Restrieves resource list from CKAN site. * - * @return array - * List of resource uuids. + * @param string $item_url The URL to the single dataset (with the :id placeholder). + * @param array $data The list of datasets. + * + * @return array The list of resources IDs. */ -function dkan_migrate_base_create_resource_list_items_sync($endpoint, $file_name) { - $package_list = $endpoint . 'package_list'; - $resource_ids = array('help' => t('List of resource ids for %endpoint', array('endpoint' => $endpoint))); - $item_url = $endpoint . 'package_show?id=:id'; - $response = drupal_http_request($package_list); - $json = $response->data; - $data = drupal_json_decode($json); +function dkan_migrate_base_create_resource_list_items_std($item_url, $data) { + + $results = array(); + foreach ($data['result'] as $id) { $dataset_show = preg_replace(array_fill(0, count($id), '/:id/'), $id, $item_url, 1); $dataset_response = drupal_http_request($dataset_show); @@ -106,11 +132,11 @@ function dkan_migrate_base_create_resource_list_items_sync($endpoint, $file_name $dataset_data = drupal_json_decode($dataset_json); $resources = $dataset_data['result']['resources']; foreach ($resources as $key => $resource) { - $resource_ids['result'][] = $resource['id']; + $results[] = $resource['id']; } } - file_unmanaged_save_data(json_encode($resource_ids), $file_name, FILE_EXISTS_REPLACE); - return $resource_ids; + + return $results; } From a78342571a05146e7317b09263522218cb34bdbf Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Fri, 8 May 2015 09:33:31 +0300 Subject: [PATCH 15/29] fix comments --- dkan_migrate_base_organization.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dkan_migrate_base_organization.inc b/dkan_migrate_base_organization.inc index 3d8527b..6a6c44a 100644 --- a/dkan_migrate_base_organization.inc +++ b/dkan_migrate_base_organization.inc @@ -2,11 +2,11 @@ /** * @file - * Migration Class for Groups + * Migration Class for Organizations */ /** - * Migrate CKAN group + * Migrate CKAN organization */ class MigrateCkanOrganizationBase extends MigrateCkanBase { /** From a65c5d7f843f14a739c92a7102dd91eb9c8af118 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Tue, 12 May 2015 10:09:05 +0300 Subject: [PATCH 16/29] fix line removed setting extra value; only set extra key/value if value is set --- dkan_migrate_base_dataset.inc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dkan_migrate_base_dataset.inc b/dkan_migrate_base_dataset.inc index 7d0cad0..7af31e4 100644 --- a/dkan_migrate_base_dataset.inc +++ b/dkan_migrate_base_dataset.inc @@ -178,8 +178,9 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { elseif ($extra->key == 'spatial') { $row->spatial = $extra->value; } - else { - $row->field_additional_info_key[] = $extra->key; + elseif (!empty($extra->value)) { + $row->field_additional_info_key[] = $extra->key; + $row->field_additional_info_value[] = $extra->value; } } } From 54ca79ad32c8b512a09d2eaaf7b49a79e0492806 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 21 May 2015 12:03:55 +0300 Subject: [PATCH 17/29] when a URL fails put it back to the URLs array --- dkan_migrate_base.module | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dkan_migrate_base.module b/dkan_migrate_base.module index cb800c9..db7123d 100644 --- a/dkan_migrate_base.module +++ b/dkan_migrate_base.module @@ -91,7 +91,8 @@ function dkan_migrate_base_create_resource_list_items_httprl($item_url, $data) { 'global_connections' => 100, 'callback' => array( array('function' => 'dkan_migrate_base_handle_response',), - &$results + &$results, + &$urls, ), 'global_timeout' => 60, 'connect_timeout' => 30, @@ -146,12 +147,14 @@ function dkan_migrate_base_create_resource_list_items_std($item_url, $data) { * @param object $response A completed request from httprl holding the response data. * @param array $results An array of results with the resources IDs. */ -function dkan_migrate_base_handle_response($response, &$results) { +function dkan_migrate_base_handle_response($response, &$results, &$urls) { // Check that the response code is OK (200). if ((int) $response->code !== 200) { // TODO: retry. + // Put the URL back to the URLs array. + $urls[] = $response->url; dkan_migrate_base_log("An error occurred [ url :: $response->url ][ code :: $response->code ].", 'warning'); return; From 3db7f430f5aecaa7a0764845fb0f23c332a97431 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 21 May 2015 12:39:28 +0300 Subject: [PATCH 18/29] add retries to organization and dataset import --- dkan_migrate_base.migrate.inc | 24 ++++++++++++++++++++++++ dkan_migrate_base.module | 3 +-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/dkan_migrate_base.migrate.inc b/dkan_migrate_base.migrate.inc index 71cce48..3ac1429 100644 --- a/dkan_migrate_base.migrate.inc +++ b/dkan_migrate_base.migrate.inc @@ -211,6 +211,30 @@ class CKANItemJSON extends MigrateItemJSON { return new stdClass(); } + /** + * Override the superclass loadJSONUrl in order to use the standard {@link drupal_http_request} + * and retry if the remote system returns an error. Current settings are to + * retry every 5 secs. for a total of 5 times top. + * + * @param string $item_url The item URL. + * + * @return mixed The object instance. + */ + public function loadJSONUrl($item_url) { + + $response = drupal_http_request($item_url, empty($this->httpOptions) ? array() : $this->httpOptions); + + $retries = 0; + while (200 !== (int)$response->code && 5 > $retries++){ + sleep(5); + echo("Retrying [ url :: $item_url ][ response code :: $response->code ][ retries :: $retries ]\n"); + $response = drupal_http_request($item_url, empty($this->httpOptions) ? array() : $this->httpOptions); + } + + $json = $response->data; + return json_decode($json); + } + /** * Creates a stub entry. */ diff --git a/dkan_migrate_base.module b/dkan_migrate_base.module index db7123d..504b9d4 100644 --- a/dkan_migrate_base.module +++ b/dkan_migrate_base.module @@ -152,8 +152,7 @@ function dkan_migrate_base_handle_response($response, &$results, &$urls) { // Check that the response code is OK (200). if ((int) $response->code !== 200) { - // TODO: retry. - // Put the URL back to the URLs array. + // Put the URL back to the URLs to process, so we don't loose one single call. $urls[] = $response->url; dkan_migrate_base_log("An error occurred [ url :: $response->url ][ code :: $response->code ].", 'warning'); From 39ebc17a9a0b27f52688ebdbff6f1ea51e71df14 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Wed, 27 May 2015 19:09:44 +0300 Subject: [PATCH 19/29] initial support for a filter on IDs --- dkan_migrate_base.migrate.inc | 30 ++++++++++++++++++++++++++++-- dkan_migrate_base_dataset.inc | 1 + dkan_migrate_base_resource.inc | 3 ++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/dkan_migrate_base.migrate.inc b/dkan_migrate_base.migrate.inc index 3ac1429..b5bdef9 100644 --- a/dkan_migrate_base.migrate.inc +++ b/dkan_migrate_base.migrate.inc @@ -66,6 +66,9 @@ class CKANListJSON extends MigrateListJSON { // In order to support mtm (multi-threaded migrate) we need to get the *limit* parameter (NULL equals no limit). $this->limit = isset($http_options['limit']) && is_numeric($http_options['limit']) ? $http_options['limit'] : NULL; + + // A regex filter to select the dataset(s) to import. + $this->filter = !empty($http_options['filter']) ? $http_options['filter'] : NULL; } /** @@ -97,10 +100,17 @@ class CKANListJSON extends MigrateListJSON { $response = drupal_http_request($this->listUrl, $this->httpOptions); $json = $response->data; } + migrate_instrument_stop("Retrieve $this->listUrl"); if ($json) { $data = drupal_json_decode($json); if ($data) { + + // If a filter has been specified, then apply it. + if (!empty($this->filter)) { + $data = $this->applyFilter($data); + } + return $this->getIDsFromJSON($data); } } @@ -108,6 +118,18 @@ class CKANListJSON extends MigrateListJSON { return NULL; } + protected function applyFilter($data) { + + // Replace the results with the filtered results. + $data['result'] = array_filter($data['result'], function ($item) { + return (1 === preg_match("|$this->filter|", $item)); + }); + + echo("Filtering [ after count :: " . sizeof($data['result']) . " ]"); + + return $data; + } + /** * Implements computeCount(). */ @@ -225,13 +247,13 @@ class CKANItemJSON extends MigrateItemJSON { $response = drupal_http_request($item_url, empty($this->httpOptions) ? array() : $this->httpOptions); $retries = 0; - while (200 !== (int)$response->code && 5 > $retries++){ + while (200 !== (int) $response->code && 5 > $retries++) { sleep(5); echo("Retrying [ url :: $item_url ][ response code :: $response->code ][ retries :: $retries ]\n"); $response = drupal_http_request($item_url, empty($this->httpOptions) ? array() : $this->httpOptions); } - $json = $response->data; + $json = $response->data; return json_decode($json); } @@ -421,6 +443,10 @@ abstract class MigrateCkanBase extends MigrateDKAN { */ public function __construct($arguments) { $this->endpoint = isset($arguments['endpoint']) ? $arguments['endpoint'] : 'http://demo.getdkan.com/api/3/action/'; + + // Set the regex filter if provided. + $this->filter = isset($arguments['filter']) ? $arguments['filter'] : ''; + parent::__construct($arguments); } } diff --git a/dkan_migrate_base_dataset.inc b/dkan_migrate_base_dataset.inc index 7af31e4..6fc82c0 100644 --- a/dkan_migrate_base_dataset.inc +++ b/dkan_migrate_base_dataset.inc @@ -37,6 +37,7 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { array( 'limit' => $this->limit, 'offset' => $this->offset, + 'filter' => $this->filter, ) ), new CKANItemJSON($item_url, $fields), $fields); diff --git a/dkan_migrate_base_resource.inc b/dkan_migrate_base_resource.inc index 3e0c955..37d23b3 100644 --- a/dkan_migrate_base_resource.inc +++ b/dkan_migrate_base_resource.inc @@ -54,6 +54,7 @@ class MigrateCkanResourceBase extends MigrateCkanBase { array( 'limit' => $this->limit, 'offset' => $this->offset, + 'filter' => $this->filter, ) ), new CKANItemJSON($item_url, $fields), $fields); @@ -127,7 +128,7 @@ class MigrateCkanResourceBase extends MigrateCkanBase { // CKAN API doesn't make it clear if file is link to API or just file. $field = field_info_instance('node', 'field_link_remote_file', 'resource'); $extensions = explode(' ', $field['settings']['file_extensions']); - if (in_array($row->format, $extensions) && $row->format != 'html') { + if (in_array(strtolower($row->format), $extensions) && $row->format != 'html') { $row->file_remote_link = $row->url; } else { From c15bec24e7dee5af02d08bc22a7a492e239aa2ff Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 28 May 2015 14:52:29 +0300 Subject: [PATCH 20/29] add dkan_migrate_base_resources.inc among the files --- dkan_migrate_base_resources.inc | 374 ++++++++++++++++++++++++++++++++ 1 file changed, 374 insertions(+) create mode 100644 dkan_migrate_base_resources.inc diff --git a/dkan_migrate_base_resources.inc b/dkan_migrate_base_resources.inc new file mode 100644 index 0000000..7cf8d39 --- /dev/null +++ b/dkan_migrate_base_resources.inc @@ -0,0 +1,374 @@ +machine_name = $arguments['machine_name']; + } + + /** + * Overload the prepareRow function to import the resources. Then run the + * standard import. + * + * @param object $row + */ + public function prepareRow($row) { + // Migrate the resources for this dataset. + $this->migrateResources($row->resources); + + parent::prepareRow($row); + } + + /** + * Migrate the specified resources. + * + * @param array $resources An array of resources ids. + */ + protected function migrateResources($resources) { + + // Return if there are no resources. + if (!is_array($resources)) { + return; + } + + $machine_name = $this->machine_name . '_resources'; + $filename = "public://$machine_name"; + + drupal_set_message(t('Migrating ' . sizeof($resources) . ' resource(s) [ machine name :: ' . $machine_name . ' ][ filename :: @filename]...', array('@filename' => $filename))); + + file_unmanaged_save_data(json_encode($resources), $filename, FILE_EXISTS_REPLACE); + + $current_migration = self::$currentMigration; + // Register the migration, then get its instance and start it. + MigrationBase::registerMigration('MigrateCkanResources', $machine_name, array( + 'migrate_list' => $filename, + 'migrate_item' => $filename, + )); + + echo("Getting migration instance [ machine name :: $machine_name ]\n"); + $migration = Migration::getInstance($machine_name); + + echo('Processing import [ migration :: ' . (isset($migration) ? 'true' : 'false') . ' ]' . "\n"); + $result = $migration->processImport(); + + echo("De-registering the migration [ machine name :: $machine_name ]\n"); + // Remove the migration. + MigrationBase::deregisterMigration($machine_name); + + self::$currentMigration = $current_migration; + + file_unmanaged_delete($filename); + + echo("Migration completed [ result :: $result ][ machine name :: $machine_name ]\n"); + + } + +} + +class DkanMigrateList extends MigrateList { + + private $items; + + public function __construct($items) { + parent::__construct(); + + $this->items = $items; + } + + /** + * Implementors are expected to return a string representing where the listing + * is obtained from (a URL, file directory, etc.) + * + * @return string + */ + public function __toString() { + // TODO: Implement __toString() method. + return 'TODO: ...'; + } + + /** + * Implementors are expected to return an array of unique IDs, suitable for + * passing to the MigrateItem class to retrieve the data for a single item. + * + * @return Mixed, iterator or array + */ + public function getIdList() { + + return array_keys($this->items); + } + + /** + * Implementors are expected to return a count of IDs available to be migrated. + * + * @return int + */ + public function computeCount() { + + return (is_array($this->items) ? sizeof($this->items) : -1); + } + + /** + * Implementors are expected to return a count of IDs available to be migrated. + * + * @return int + */ + public function count() { + + return $this->computeCount(); + } +} + +class DkanMigrateItem extends MigrateItem { + + private $items; + + public function __construct($items = array()) { + parent::__construct(); + + $this->$items = $items; + } + + /** + * Implementors are expected to return an object representing a source item. + * + * @param mixed $id + * + * @return stdClass + */ + public function getItem($id) { + + return $this->items[$id]; + } +} + +/** + * A dynamic migration that is reused for each source CSV file. + */ +class MigrateCkanResources extends MigrateCkanBase { + /** + * Here we go. + */ + public function __construct($arguments) { + parent::__construct($arguments); + + // Allow superclass to override the list of fields. + // TODO: candidate to be moved to the superclass. + $fields = $this->getCkanResourceFields(); + + $this->highwaterField = array( + 'name' => 'revision_timestamp', + ); + + // TODO: candidate to be moved to the superclass. + $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'resource_list'; + $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'resource_show?id=:id'; + + // Get the shared key for multi-threaded imports, or use the machine name. + $this->shared_key = isset($arguments['shared_key']) ? $arguments['shared_key'] : $this->machineName; + + // TODO: candidate to be moved to the superclass. + // The following are provided by the multi-threaded environment. + $this->limit = isset($arguments['limit']) ? $arguments['limit'] : ''; + $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; + +// if ($list_url == 'resource_list') { +// $file_name = 'public://ckan-migrate-resource_list_' . get_class($this); +// if (!file_exists($file_name)) { +// drupal_set_message('resource_show created'); +// // CKAN doesn't have a resource_list endpoint. This function fakes it. +// dkan_migrate_base_create_resource_list($this->endpoint, $file_name, TRUE); +// variable_set('dkan_migrate_base_ckan-migrate-resource_list', date('r', time())); +// } +// else { +// $date = variable_get('dkan_resource_list_' . get_class($this), t('unknown')); +// drupal_set_message(t('ckan-migrate-resource_list file downloaded locally. Last updated %date.', array('%date' => $date))); +// } +// $list_url = file_create_url($file_name); +// } + +// $item_url = $this->endpoint . $item_url; + + // TODO: candidate to be moved to the superclass. + + $filename = drupal_realpath($arguments['migrate_list']); + if (empty($filename)) { + drupal_set_message('This migration needs a filename', 'warn'); + return; + } + $resources = json_decode(file_get_contents($filename)); + +// $this->migrate_list = new DkanMigrateList($resources); +// $this->migrate_item = new DkanMigrateItem($resources); + +// echo("Resources loaded [ count :: " . sizeof($resources) . " ][ filename :: $filename ][ compute count :: " . $this->migrate_list->computeCount() . " ]\n"); + + $this->source = new MigrateSourceList(new ExampleListJSON($filename), + new ExampleItemJSON($filename, array()), $fields); + +// $this->source = new MigrateSourceList($this->migrate_list, $this->migrate_item, $fields); + + $this->map = new MigrateSQLMap( + $this->shared_key, + array( + 'uuid' => array( + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'description' => 'id', + ), + ), + MigrateDestinationNode::getKeySchema() + ); + + $this->destination = new MigrateDestinationNode('resource', array('text_format' => 'html')); + $this->addFieldMapping('id', 'uuid'); + $this->addFieldMapping('title', 'name'); + $this->addFieldMapping('created', 'created'); + $this->addFieldMapping('body', 'description'); + $this->addFieldMapping('field_format', 'format'); + $this->addFieldMapping('field_upload', 'file'); + $this->addFieldMapping('field_link_remote_file', 'file_remote_link'); + $this->addFieldMapping('field_link_api', 'link'); + $this->addFieldMapping('og_group_ref', 'group_ids'); + $this->addFieldMapping('changed', 'last_modified'); + } + + /** + * Implements prepareRow. + */ + public function prepareRow($row) { + + echo("Preparing row"); + // TODO: + // + Find way to get user name for creator of resource + // + Improve preview for files stuck as links. + $row->created = $this->StringToTime($row->created); + $row->created = $this->StringToTime($row->last_modified); + $row->group_ids = array($row->resource_group_id); + $row->last_modified = $this->StringToTime($row->last_modified); + + $row->name = $row->name ? $row->name : $row->description; + + migrate_instrument_start("taxonomy_vocabulary_machine_name_load"); + + // Tax terms in Drupal are case sensitive. It is better to have a single + // 'html' term instead of 'html' and 'HTML'. + // TODO: move to hook_node_update in dkan_dataset. + $format = taxonomy_vocabulary_machine_name_load('format'); + $row->format = strtolower($row->format); + $this->createTax($row->format, 'format', $format->vid); + + migrate_instrument_start("taxonomy_vocabulary_machine_name_load"); + + // Decide which of DKAN's three fields is best for resource file. + if ($row->url_type == 'upload' || $row->resource_type == 'file.upload' || $row->resource_type == 'file') { + $name = explode('/', $row->url); + $name = $name[count($name) - 1]; + $uri = 'public://' . $name; + + migrate_instrument_start("downloadExternalFile [ url :: $row->url ]"); + + $file = $this->downloadExternalFile($row->url, $uri); + + migrate_instrument_stop("downloadExternalFile [ url :: $row->url ]"); + + $row->file = $file['fid']; + } + else { + // CKAN API doesn't make it clear if file is link to API or just file. + $field = field_info_instance('node', 'field_link_remote_file', 'resource'); + $extensions = explode(' ', $field['settings']['file_extensions']); + if (in_array($row->format, $extensions) && $row->format != 'html') { + $row->file_remote_link = trim($row->url); + } + else { + $row->link = $row->url; + } + } + } + + /** + * Implements prepare. + */ + public function prepare($node, stdClass $row) { + // UUID doesn't show up as a field so had to do this. + $node->uuid = $row->id; + $row->format = isset($row->format) && $row->format ? $row->format : 'data'; + } + + public function getCkanResourceFields() { + return array( + "name" => "Name", + "id" => "UUID", + "description" => "Description", + "format" => "Format", + "created" => "Created Date", + "last_modified" => "Modified Date", + "url" => "URL", + "revision_id" => "Revision ID", + // Not sure if this is just Mimetype. + "mimetype_inner" => "Mimetype Inner", + // This doesn't actually exist but we are adding it later in prepareRow. + "uid" => "User ID", + "file" => "file", + ); + } +} + +class ExampleListJSON extends MigrateListJSON { + /** + * The default implementation assumes the IDs are top-level array elements, + * but the array elements are the data items - we need to look inside them + * for the IDs. + */ + protected function getIDsFromJSON(array $data) { + $ids = array(); + foreach ($data as $item) { + $ids[] = $item['id']; + } + return $ids; + } +} + +class ExampleItemJSON extends MigrateItemJSON { + protected $data = array(); + /** + * Two problems with the default getItem() - it reads and parses the JSON on + * each call, which we don't want to do for multiple items in the file; and, + * we need to retrieve a given item from the file according to its 'id' element. + */ + public function getItem($id) { + // We cache the parsed JSON at $this->data. + if (empty($this->data)) { + $data = $this->loadJSONUrl($this->itemUrl); + if ($data) { + // Let's index the array by the ID for easy retrieval. + foreach ($data as $item) { + $this->data[$item->id] = $item; + } + } + else { + // Error-handling here.... + } + } + // Return the requested item + if (isset($this->data[$id])) { + return $this->data[$id]; + } + else { + return NULL; + } + } +} \ No newline at end of file From 33651759ad1773482a3784bd759eb34a140b7b80 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 28 May 2015 15:07:36 +0300 Subject: [PATCH 21/29] add supporting classes to enable migration of resources while importing datasets --- dkan_migrate_base_resources.inc | 207 +++++--------------------------- 1 file changed, 32 insertions(+), 175 deletions(-) diff --git a/dkan_migrate_base_resources.inc b/dkan_migrate_base_resources.inc index 7cf8d39..45e65a3 100644 --- a/dkan_migrate_base_resources.inc +++ b/dkan_migrate_base_resources.inc @@ -43,36 +43,35 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { return; } + drupal_set_message(t('Migrating %count resource(s)', array('%count' => sizeof($resources)))); + + // Create a machine name by combining the dataset machine name with '_resources', + // then create a file with that machine name, that will include a JSON representation + // of the resources related to a dataset (provided as an argument to this function. $machine_name = $this->machine_name . '_resources'; $filename = "public://$machine_name"; - drupal_set_message(t('Migrating ' . sizeof($resources) . ' resource(s) [ machine name :: ' . $machine_name . ' ][ filename :: @filename]...', array('@filename' => $filename))); - file_unmanaged_save_data(json_encode($resources), $filename, FILE_EXISTS_REPLACE); - $current_migration = self::$currentMigration; // Register the migration, then get its instance and start it. - MigrationBase::registerMigration('MigrateCkanResources', $machine_name, array( - 'migrate_list' => $filename, - 'migrate_item' => $filename, - )); + MigrationBase::registerMigration('MigrateCkanResources', $machine_name, array('resources' => $filename)); - echo("Getting migration instance [ machine name :: $machine_name ]\n"); - $migration = Migration::getInstance($machine_name); - - echo('Processing import [ migration :: ' . (isset($migration) ? 'true' : 'false') . ' ]' . "\n"); - $result = $migration->processImport(); + // Migrate has a static reference to the current migration that will be overwritten + // by the following import process, therefore we save the current reference + // and restore it when process is complete. + $current_migration = self::$currentMigration; - echo("De-registering the migration [ machine name :: $machine_name ]\n"); - // Remove the migration. - MigrationBase::deregisterMigration($machine_name); + $migration = Migration::getInstance($machine_name); + $result = $migration->processImport(); self::$currentMigration = $current_migration; + // Delete the file with the list of resources and remove the migration. file_unmanaged_delete($filename); + MigrationBase::deregisterMigration($machine_name); - echo("Migration completed [ result :: $result ][ machine name :: $machine_name ]\n"); - + // Return the result from the processImport call. + return $result; } } @@ -154,180 +153,37 @@ class DkanMigrateItem extends MigrateItem { } /** - * A dynamic migration that is reused for each source CSV file. + * */ -class MigrateCkanResources extends MigrateCkanBase { - /** - * Here we go. - */ +class MigrateCkanResources extends MigrateCkanResourceBase { + public function __construct($arguments) { parent::__construct($arguments); // Allow superclass to override the list of fields. - // TODO: candidate to be moved to the superclass. $fields = $this->getCkanResourceFields(); - $this->highwaterField = array( - 'name' => 'revision_timestamp', - ); - - // TODO: candidate to be moved to the superclass. - $list_url = isset($arguments['list_url']) ? $arguments['list_url'] : 'resource_list'; - $item_url = isset($arguments['item_url']) ? $arguments['item_url'] : 'resource_show?id=:id'; - - // Get the shared key for multi-threaded imports, or use the machine name. - $this->shared_key = isset($arguments['shared_key']) ? $arguments['shared_key'] : $this->machineName; - - // TODO: candidate to be moved to the superclass. - // The following are provided by the multi-threaded environment. - $this->limit = isset($arguments['limit']) ? $arguments['limit'] : ''; - $this->offset = isset($arguments['offset']) ? $arguments['offset'] : ''; - -// if ($list_url == 'resource_list') { -// $file_name = 'public://ckan-migrate-resource_list_' . get_class($this); -// if (!file_exists($file_name)) { -// drupal_set_message('resource_show created'); -// // CKAN doesn't have a resource_list endpoint. This function fakes it. -// dkan_migrate_base_create_resource_list($this->endpoint, $file_name, TRUE); -// variable_set('dkan_migrate_base_ckan-migrate-resource_list', date('r', time())); -// } -// else { -// $date = variable_get('dkan_resource_list_' . get_class($this), t('unknown')); -// drupal_set_message(t('ckan-migrate-resource_list file downloaded locally. Last updated %date.', array('%date' => $date))); -// } -// $list_url = file_create_url($file_name); -// } - -// $item_url = $this->endpoint . $item_url; - - // TODO: candidate to be moved to the superclass. - - $filename = drupal_realpath($arguments['migrate_list']); + // Get the filename of the local file containing the JSON representation of + // the resources. + $filename = drupal_realpath($arguments['resources']); if (empty($filename)) { drupal_set_message('This migration needs a filename', 'warn'); return; } - $resources = json_decode(file_get_contents($filename)); - -// $this->migrate_list = new DkanMigrateList($resources); -// $this->migrate_item = new DkanMigrateItem($resources); - -// echo("Resources loaded [ count :: " . sizeof($resources) . " ][ filename :: $filename ][ compute count :: " . $this->migrate_list->computeCount() . " ]\n"); - $this->source = new MigrateSourceList(new ExampleListJSON($filename), - new ExampleItemJSON($filename, array()), $fields); - -// $this->source = new MigrateSourceList($this->migrate_list, $this->migrate_item, $fields); - - $this->map = new MigrateSQLMap( - $this->shared_key, - array( - 'uuid' => array( - 'type' => 'varchar', - 'length' => 255, - 'not null' => TRUE, - 'description' => 'id', - ), - ), - MigrateDestinationNode::getKeySchema() + // The source is set in the super-class, we now overwrite it using our own + // implementation. + $this->source = new MigrateSourceList( + new DkanMigrateListJSON($filename), + new DkanMigrateItemJSON($filename, array()), + $fields ); - $this->destination = new MigrateDestinationNode('resource', array('text_format' => 'html')); - $this->addFieldMapping('id', 'uuid'); - $this->addFieldMapping('title', 'name'); - $this->addFieldMapping('created', 'created'); - $this->addFieldMapping('body', 'description'); - $this->addFieldMapping('field_format', 'format'); - $this->addFieldMapping('field_upload', 'file'); - $this->addFieldMapping('field_link_remote_file', 'file_remote_link'); - $this->addFieldMapping('field_link_api', 'link'); - $this->addFieldMapping('og_group_ref', 'group_ids'); - $this->addFieldMapping('changed', 'last_modified'); - } - - /** - * Implements prepareRow. - */ - public function prepareRow($row) { - - echo("Preparing row"); - // TODO: - // + Find way to get user name for creator of resource - // + Improve preview for files stuck as links. - $row->created = $this->StringToTime($row->created); - $row->created = $this->StringToTime($row->last_modified); - $row->group_ids = array($row->resource_group_id); - $row->last_modified = $this->StringToTime($row->last_modified); - - $row->name = $row->name ? $row->name : $row->description; - - migrate_instrument_start("taxonomy_vocabulary_machine_name_load"); - - // Tax terms in Drupal are case sensitive. It is better to have a single - // 'html' term instead of 'html' and 'HTML'. - // TODO: move to hook_node_update in dkan_dataset. - $format = taxonomy_vocabulary_machine_name_load('format'); - $row->format = strtolower($row->format); - $this->createTax($row->format, 'format', $format->vid); - - migrate_instrument_start("taxonomy_vocabulary_machine_name_load"); - - // Decide which of DKAN's three fields is best for resource file. - if ($row->url_type == 'upload' || $row->resource_type == 'file.upload' || $row->resource_type == 'file') { - $name = explode('/', $row->url); - $name = $name[count($name) - 1]; - $uri = 'public://' . $name; - - migrate_instrument_start("downloadExternalFile [ url :: $row->url ]"); - - $file = $this->downloadExternalFile($row->url, $uri); - - migrate_instrument_stop("downloadExternalFile [ url :: $row->url ]"); - - $row->file = $file['fid']; - } - else { - // CKAN API doesn't make it clear if file is link to API or just file. - $field = field_info_instance('node', 'field_link_remote_file', 'resource'); - $extensions = explode(' ', $field['settings']['file_extensions']); - if (in_array($row->format, $extensions) && $row->format != 'html') { - $row->file_remote_link = trim($row->url); - } - else { - $row->link = $row->url; - } - } - } - - /** - * Implements prepare. - */ - public function prepare($node, stdClass $row) { - // UUID doesn't show up as a field so had to do this. - $node->uuid = $row->id; - $row->format = isset($row->format) && $row->format ? $row->format : 'data'; - } - - public function getCkanResourceFields() { - return array( - "name" => "Name", - "id" => "UUID", - "description" => "Description", - "format" => "Format", - "created" => "Created Date", - "last_modified" => "Modified Date", - "url" => "URL", - "revision_id" => "Revision ID", - // Not sure if this is just Mimetype. - "mimetype_inner" => "Mimetype Inner", - // This doesn't actually exist but we are adding it later in prepareRow. - "uid" => "User ID", - "file" => "file", - ); } } -class ExampleListJSON extends MigrateListJSON { + +class DkanMigrateListJSON extends MigrateListJSON { /** * The default implementation assumes the IDs are top-level array elements, * but the array elements are the data items - we need to look inside them @@ -342,8 +198,9 @@ class ExampleListJSON extends MigrateListJSON { } } -class ExampleItemJSON extends MigrateItemJSON { +class DkanMigrateItemJSON extends MigrateItemJSON { protected $data = array(); + /** * Two problems with the default getItem() - it reads and parses the JSON on * each call, which we don't want to do for multiple items in the file; and, From 1e06be9c81d22b0f3a630ea2725e177836b38ca3 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 28 May 2015 15:08:33 +0300 Subject: [PATCH 22/29] require the new dkan_migrate_base_resources.inc file --- dkan_migrate_base.info | 1 + 1 file changed, 1 insertion(+) diff --git a/dkan_migrate_base.info b/dkan_migrate_base.info index 8b2bf0b..30e7a2a 100644 --- a/dkan_migrate_base.info +++ b/dkan_migrate_base.info @@ -11,3 +11,4 @@ files[] = dkan_migrate_base_group.inc files[] = dkan_migrate_base_organization.inc files[] = dkan_migrate_base_dataset.inc files[] = dkan_migrate_base_resource.inc +files[] = dkan_migrate_base_resources.inc From d18d31eaa00cf984b3a9fbddca96b5e5493bd9b7 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 28 May 2015 15:19:57 +0300 Subject: [PATCH 23/29] removing excessive log messages --- dkan_migrate_base.migrate.inc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dkan_migrate_base.migrate.inc b/dkan_migrate_base.migrate.inc index b5bdef9..b609c59 100644 --- a/dkan_migrate_base.migrate.inc +++ b/dkan_migrate_base.migrate.inc @@ -79,7 +79,7 @@ class CKANListJSON extends MigrateListJSON { // Get the portion of results within the specified boundaries (starting from *offset*). $ids = array_slice($data['result'], $this->offset, $this->limit); - $this->log("returning " . sizeof($ids) . " items [ offset :: {$this->offset} ][ limit :: " . (isset($this->limit) ? $this->limit : 'not set') . " ]"); + $this->log("returning " . sizeof($ids) . " item(s) [ offset :: {$this->offset} ][ limit :: " . (isset($this->limit) ? $this->limit : 'not set') . " ]"); return $ids; } @@ -125,8 +125,6 @@ class CKANListJSON extends MigrateListJSON { return (1 === preg_match("|$this->filter|", $item)); }); - echo("Filtering [ after count :: " . sizeof($data['result']) . " ]"); - return $data; } From 9f35728dabb65f2d899fff9338bc76ce9d5a66b5 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Thu, 28 May 2015 17:37:50 +0300 Subject: [PATCH 24/29] return if the resources parameter hasn't been provided --- dkan_migrate_base_resource.inc | 1 + dkan_migrate_base_resources.inc | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dkan_migrate_base_resource.inc b/dkan_migrate_base_resource.inc index 37d23b3..5532b9c 100644 --- a/dkan_migrate_base_resource.inc +++ b/dkan_migrate_base_resource.inc @@ -164,3 +164,4 @@ class MigrateCkanResourceBase extends MigrateCkanBase { ); } } + diff --git a/dkan_migrate_base_resources.inc b/dkan_migrate_base_resources.inc index 45e65a3..7ed8bbd 100644 --- a/dkan_migrate_base_resources.inc +++ b/dkan_migrate_base_resources.inc @@ -160,6 +160,11 @@ class MigrateCkanResources extends MigrateCkanResourceBase { public function __construct($arguments) { parent::__construct($arguments); + // Check that the required resources argument is provided. + if (!isset($arguments['resources'])) { + return; + } + // Allow superclass to override the list of fields. $fields = $this->getCkanResourceFields(); @@ -228,4 +233,4 @@ class DkanMigrateItemJSON extends MigrateItemJSON { return NULL; } } -} \ No newline at end of file +} From eb743918936692b63c8f11e6d5209ec35207a7f3 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Fri, 29 May 2015 15:48:25 +0300 Subject: [PATCH 25/29] add support for a *field_migrate_lock* boolean field which tells the migrate process not to import the dataset --- dkan_migrate_base_dataset.inc | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/dkan_migrate_base_dataset.inc b/dkan_migrate_base_dataset.inc index 6fc82c0..1ba30cb 100644 --- a/dkan_migrate_base_dataset.inc +++ b/dkan_migrate_base_dataset.inc @@ -135,7 +135,26 @@ class MigrateCkanDatasetBase extends MigrateCkanBase { $row->uid = $this->getUser($row->creator_user_id); migrate_instrument_stop("Get user [ name :: $row->name ][ creator_user_id :: $row->creator_user_id ]"); - $row->name = 'dataset/' . $row->name; + $row->name = 'dataset/' . $row->name; + + // Get the system path for the dataset alias and check if a node exists and + // whether it has the Migrate Lock field set to 1, in which case we skip the + // import of the node. + if (NULL !== ($path = drupal_lookup_path('source', $row->name)) + && NULL !== ($node = menu_get_object($type = 'node', 1, $path)) + && isset($node->field_migrate_lock[LANGUAGE_NONE][0]['value']) + && is_numeric($node->field_migrate_lock[LANGUAGE_NONE][0]['value']) + && 1 === (int) $node->field_migrate_lock[LANGUAGE_NONE][0]['value'] + ) { + + $this->saveMessage( + t('Dataset @name is locked and won\'t be imported (resources are still imported). To disable the lock clear the \'Migrate Lock\' in the node edit screen.', array('@name' => $row->name)), + MigrationBase::MESSAGE_INFORMATIONAL + ); + return FALSE; + } + + $row->resource_ids = array(); // Create a truncated version of the title that can fit Drupal fields max 255 characters. From 43591ef078b01a85059566c1be48f9a9e0af0d82 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Fri, 29 May 2015 15:48:38 +0300 Subject: [PATCH 26/29] prepareRow shall always return a value --- dkan_migrate_base_resources.inc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dkan_migrate_base_resources.inc b/dkan_migrate_base_resources.inc index 7ed8bbd..0c9458c 100644 --- a/dkan_migrate_base_resources.inc +++ b/dkan_migrate_base_resources.inc @@ -28,7 +28,8 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { // Migrate the resources for this dataset. $this->migrateResources($row->resources); - parent::prepareRow($row); + // This method should return TRUE/FALSE (see http://www.drupalcontrib.org/api/drupal/contributions!migrate!includes!migration.inc/function/Migration::prepareRow/7) + return parent::prepareRow($row); } /** From 0aa8b9dadebf2d15352d51991a281096353a9716 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Sun, 31 May 2015 12:12:04 +0300 Subject: [PATCH 27/29] add support for custom class names --- dkan_migrate_base_resources.inc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dkan_migrate_base_resources.inc b/dkan_migrate_base_resources.inc index 0c9458c..1cc9b30 100644 --- a/dkan_migrate_base_resources.inc +++ b/dkan_migrate_base_resources.inc @@ -12,10 +12,17 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { private $machine_name = ''; + // A custom class name to handle Resources imports. This class should extend + // MigrateCkanResources. + private $resources_class_name = ''; + public function __construct($arguments) { parent::__construct($arguments); $this->machine_name = $arguments['machine_name']; + + // Get the Resources class name or use the default implementation. + $this->resources_class_name = isset($arguments['resources_class_name']) ? $arguments['resources_class_name'] : 'MigrateCkanResources'; } /** @@ -55,7 +62,7 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { file_unmanaged_save_data(json_encode($resources), $filename, FILE_EXISTS_REPLACE); // Register the migration, then get its instance and start it. - MigrationBase::registerMigration('MigrateCkanResources', $machine_name, array('resources' => $filename)); + MigrationBase::registerMigration($this->resources_class_name, $machine_name, array('resources' => $filename)); // Migrate has a static reference to the current migration that will be overwritten // by the following import process, therefore we save the current reference From bd83d154c2857a24d813d429bc82b556dbeffb3e Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Wed, 3 Jun 2015 21:12:40 +0300 Subject: [PATCH 28/29] use the dataset name as part of the new migrate task name --- dkan_migrate_base_resources.inc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dkan_migrate_base_resources.inc b/dkan_migrate_base_resources.inc index 1cc9b30..f729c08 100644 --- a/dkan_migrate_base_resources.inc +++ b/dkan_migrate_base_resources.inc @@ -33,7 +33,7 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { */ public function prepareRow($row) { // Migrate the resources for this dataset. - $this->migrateResources($row->resources); + $this->migrateResources($row->name, $row->resources); // This method should return TRUE/FALSE (see http://www.drupalcontrib.org/api/drupal/contributions!migrate!includes!migration.inc/function/Migration::prepareRow/7) return parent::prepareRow($row); @@ -44,7 +44,7 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { * * @param array $resources An array of resources ids. */ - protected function migrateResources($resources) { + protected function migrateResources($name, $resources) { // Return if there are no resources. if (!is_array($resources)) { @@ -56,7 +56,7 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { // Create a machine name by combining the dataset machine name with '_resources', // then create a file with that machine name, that will include a JSON representation // of the resources related to a dataset (provided as an argument to this function. - $machine_name = $this->machine_name . '_resources'; + $machine_name = $this->machine_name . '_' . $name . '_resources'; $filename = "public://$machine_name"; file_unmanaged_save_data(json_encode($resources), $filename, FILE_EXISTS_REPLACE); From f6285abd678837866df7aa6f79a35c5e4489aae2 Mon Sep 17 00:00:00 2001 From: David Riccitelli Date: Wed, 3 Jun 2015 21:16:08 +0300 Subject: [PATCH 29/29] use the UUID of the dataset --- dkan_migrate_base_resources.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dkan_migrate_base_resources.inc b/dkan_migrate_base_resources.inc index f729c08..9ccfc57 100644 --- a/dkan_migrate_base_resources.inc +++ b/dkan_migrate_base_resources.inc @@ -33,7 +33,7 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { */ public function prepareRow($row) { // Migrate the resources for this dataset. - $this->migrateResources($row->name, $row->resources); + $this->migrateResources($row->id, $row->resources); // This method should return TRUE/FALSE (see http://www.drupalcontrib.org/api/drupal/contributions!migrate!includes!migration.inc/function/Migration::prepareRow/7) return parent::prepareRow($row); @@ -56,7 +56,7 @@ abstract class MigrateCkanResourcesBase extends MigrateCkanDatasetBase { // Create a machine name by combining the dataset machine name with '_resources', // then create a file with that machine name, that will include a JSON representation // of the resources related to a dataset (provided as an argument to this function. - $machine_name = $this->machine_name . '_' . $name . '_resources'; + $machine_name = $this->machine_name . '_' . str_replace('-', '_', $name) . '_resources'; $filename = "public://$machine_name"; file_unmanaged_save_data(json_encode($resources), $filename, FILE_EXISTS_REPLACE);