Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Add a multi_value_processor plugin to the tide_data_pipeline module to handle multiple values. #85

Merged
merged 4 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .circleci/code_coverage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash
##
# Generate coverage report.
#
set -e
echo "==> Generate code coverage report"
ahoy cli "phpdbg -qrr vendor/bin/phpunit ./dpc-sdp --coverage-html /app/coverage-report"
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: build

on: push

jobs:
set_status_in_progress:
name: set_status_in_progress
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?php

namespace Drupal\tide_data_pipeline\Plugin\DatasetDestination;

use Drupal\Core\Form\FormStateInterface;
use Drupal\data_pipelines\Entity\DatasetInterface;
use Drupal\data_pipelines_elasticsearch\Plugin\DatasetDestination\ElasticSearchDestination;

/**
* A class for providing JSON as an output.
*
* @DatasetDestination(
* id="sdp_elasticsearch",
* label="SDP ElasticSearch",
* description="Writes datasets to an sdp managed elasticsearch index"
* )
*/
class TideElasticSearchDestination extends ElasticSearchDestination {

/**
* {@inheritdoc}
*/
public function defaultConfiguration(): array {
return [
'hash_prefix' => '',
] + parent::defaultConfiguration();
}

/**
* {@inheritdoc}
*/
public function buildConfigurationForm(array $form, FormStateInterface $form_state): array {
$form = parent::buildConfigurationForm($form, $form_state);
$form['hash_prefix'] = [
'#type' => 'textfield',
'#title' => $this->t('Index Hash Prefix'),
'#description' => $this->t('The index hash prefix to use.'),
'#default_value' => $this->configuration['hash_prefix'],
];
return $form;
}

/**
* {@inheritdoc}
*/
public function processCleanup(DatasetInterface $dataset, array $invalidDeltas): bool {
$full_index_id = $this->getFullIndexId($dataset->getMachineName());
try {
$bulk = ['body' => []];
foreach ($invalidDeltas as $delta) {
$bulk['body'][] = [
'delete' => [
'_index' => $full_index_id,
'_id' => $dataset->getMachineName() . ':' . $delta,
],
];
}
if (count($bulk['body']) > 0) {
$this->getClient()->bulk($bulk);
}
return TRUE;
}
catch (\Exception $e) {
$this->logger->error("The invalid dataset data could not be purged due to @message", [
'@message' => $e->getMessage(),
]);
}
return FALSE;
}

/**
* Returns the actual index id.
*/
protected function getFullIndexId(string $machineName): string {
$hashPrefix = $this->configuration['hash_prefix'] ?? '';
$prefix = $this->configuration['prefix'] ?? '';

if ($hashPrefix && $prefix) {
return "{$hashPrefix}--{$prefix}{$machineName}";
}

return ($prefix ?: '') . $machineName;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?php

declare(strict_types=1);

namespace Drupal\tide_data_pipeline\Plugin\DatasetTransform;

use Drupal\data_pipelines\DatasetData;
use Drupal\data_pipelines\Transform\TransformPluginBase;

/**
* Splits a string into multiple values and optionally processes them.
*
* @DatasetTransform(
* id="multi_value_processor",
* fields=TRUE,
* records=FALSE
* )
*/
class MultiValueProcessor extends TransformPluginBase {

/**
* {@inheritdoc}
*/
public function defaultConfiguration() {
return parent::defaultConfiguration() + [
'separator' => '',
'callback' => NULL,
'parameters' => [],
// Default to first argument.
'value_position' => 0,
];
}

/**
* {@inheritdoc}
*/
protected function doTransformField(string $field_name, DatasetData $record): DatasetData {
$record = parent::doTransformRecord($record);
if ($record->offsetExists($field_name) && !empty($record[$field_name])) {
$separator = $this->configuration['separator'];
$callback = $this->configuration['callback'];
$parameters = $this->configuration['parameters'];
$value_position = $this->configuration['value_position'];
$parts = explode($separator, $record[$field_name]);
$cleaned_parts = array_values(array_filter(array_map('trim', $parts), function ($part) {
return $part !== '';
}));

// Process the parts if a callback is provided.
if (is_callable($callback)) {
$cleaned_parts = array_map(function ($value) use ($callback, $parameters, $value_position) {
$typed_parameters = array_map([$this, 'convertParameter'], $parameters);
$args = $typed_parameters;
array_splice($args, $value_position, 0, [$value]);
return call_user_func_array($callback, $args);
}, $cleaned_parts);
}
$record[$field_name] = $cleaned_parts;
}
return $record;
}

/**
* Converts a parameter to its appropriate type.
*/
private function convertParameter($parameter) {
if (is_numeric($parameter)) {
return $parameter + 0;
}
if ($parameter === 'true' || $parameter === 'false') {
return $parameter === 'true';
}
if (is_string($parameter) && defined($parameter)) {
return constant($parameter);
}
return $parameter;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
pipeline_with_multi_value_processor_transform:
label: 'Multiple Value Processor transform'
transforms:
field:
Suburbs:
- plugin: multi_value_processor
separator: ';'
callback: str_pad
parameters:
- 10
- '0'
- STR_PAD_LEFT

pipeline_with_strtoupper:
label: 'Multiple Value Processor with strtoupper'
transforms:
field:
Suburbs:
- plugin: multi_value_processor
separator: ';'
callback: strtoupper

pipeline_with_mb_convert_case:
label: 'Multiple Value Processor with mb_convert_case'
transforms:
field:
Suburbs:
- plugin: multi_value_processor
separator: ';'
callback: mb_convert_case
parameters:
- 'MB_CASE_UPPER'

pipeline_with_substr:
label: 'Multiple Value Processor with substr'
transforms:
field:
Suburbs:
- plugin: multi_value_processor
separator: ';'
callback: substr
parameters:
- '0'
- 'true'

pipeline_with_str_replace:
label: 'Multiple Value Processor with str_replace'
transforms:
field:
Suburbs:
- plugin: multi_value_processor
separator: ';'
callback: str_replace
parameters:
- 'a'
- 'A'
value_position: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: Data Pipelines - Test Pipelines
package: Testing
description: 'Provides tests data set plugins'
core_version_requirement: ^10.1
php: 8.0
type: module
dependencies:
- data_pipelines:data_pipelines
- data_pipelines_test:data_pipelines_test
- tide_data_pipeline:tide_data_pipeline
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
<?php

declare(strict_types=1);

namespace Drupal\Tests\tide_data_pipeline\Kernel\Transform;

use Drupal\data_pipelines\Entity\Dataset;
use Drupal\Tests\data_pipelines\Kernel\Transform\TransformTest;

/**
* Defines a class for testing transform functionality.
*
* @coversDefaultClass \Drupal\tide_data_pipeline\Transform\MultiValueProcessor
* @group data_pipelines
*/
class TideSearchTransformTest extends TransformTest {

/**
* {@inheritdoc}
*/
protected static $modules = [
'options',
'link',
'file',
'entity',
'data_pipelines',
'data_pipelines_test',
'user',
'system',
'tide_data_pipeline',
'tide_data_pipelines_test',
];

/**
* Test mutiple_value_processor transform.
*/
public function testMultipleValueProcessorTransform(): void {
$file = $this->getTestFile(dirname(__DIR__, 2) . '/fixtures/test-pipeline-multiple_value_processor.csv');
$dataset = Dataset::create([
'source' => 'csv:file',
'name' => $this->randomMachineName(),
'machine_name' => mb_strtolower($this->randomMachineName()),
'pipeline' => 'pipeline_with_multi_value_processor_transform',
'csv_file' => $file,
]);
$data = iterator_to_array($dataset->getDataIterator());
$this->assertCount(2, $data);
$this->assertEquals(['0Dandenong', 'Dandenong North'], $data[0]['Suburbs']);
$this->assertEquals(['00000Boneo', '000Outtrim'], $data[1]['Suburbs']);
}

/**
* Test multiple_value_processor transform with strtoupper callback.
*/
public function testMultipleValueProcessorTransformWithStrtoupper(): void {
$file = $this->getTestFile(dirname(__DIR__, 2) . '/fixtures/test-pipeline-multiple_value_processor.csv');
$dataset = Dataset::create([
'source' => 'csv:file',
'name' => $this->randomMachineName(),
'machine_name' => mb_strtolower($this->randomMachineName()),
'pipeline' => 'pipeline_with_strtoupper',
'csv_file' => $file,
]);
$data = iterator_to_array($dataset->getDataIterator());
$this->assertCount(2, $data);
$this->assertEquals(['DANDENONG', 'DANDENONG NORTH'], $data[0]['Suburbs']);
$this->assertEquals(['BONEO', 'OUTTRIM'], $data[1]['Suburbs']);
}

/**
* Test multiple_value_processor transform with mb_convert_case.
*/
public function testMultipleValueProcessorTransformWithConvertCase(): void {
$file = $this->getTestFile(dirname(__DIR__, 2) . '/fixtures/test-pipeline-multiple_value_processor.csv');
$dataset = Dataset::create([
'source' => 'csv:file',
'name' => $this->randomMachineName(),
'machine_name' => mb_strtolower($this->randomMachineName()),
'pipeline' => 'pipeline_with_mb_convert_case',
'csv_file' => $file,
]);
$data = iterator_to_array($dataset->getDataIterator());
$this->assertCount(2, $data);
$this->assertEquals(['DANDENONG', 'DANDENONG NORTH'], $data[0]['Suburbs']);
$this->assertEquals(['BONEO', 'OUTTRIM'], $data[1]['Suburbs']);
}

/**
* Test multiple_value_processor transform with substr.
*/
public function testMultipleValueProcessorTransformWithSubstr(): void {
$file = $this->getTestFile(dirname(__DIR__, 2) . '/fixtures/test-pipeline-multiple_value_processor.csv');
$dataset = Dataset::create([
'source' => 'csv:file',
'name' => $this->randomMachineName(),
'machine_name' => mb_strtolower($this->randomMachineName()),
'pipeline' => 'pipeline_with_substr',
'csv_file' => $file,
]);
$data = iterator_to_array($dataset->getDataIterator());
$this->assertCount(2, $data);
$this->assertEquals(['D', 'D'], $data[0]['Suburbs']);
$this->assertEquals(['B', 'O'], $data[1]['Suburbs']);
}

/**
* Test multiple_value_processor transform with replace.
*/
public function testMultipleValueProcessorTransformWithReplace(): void {
$file = $this->getTestFile(dirname(__DIR__, 2) . '/fixtures/test-pipeline-multiple_value_processor.csv');
$dataset = Dataset::create([
'source' => 'csv:file',
'name' => $this->randomMachineName(),
'machine_name' => mb_strtolower($this->randomMachineName()),
'pipeline' => 'pipeline_with_str_replace',
'csv_file' => $file,
]);
$data = iterator_to_array($dataset->getDataIterator());
$this->assertCount(2, $data);
$this->assertEquals(['DAndenong', 'DAndenong North'], $data[0]['Suburbs']);
$this->assertEquals(['Boneo', 'Outtrim'], $data[1]['Suburbs']);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Suburbs
Dandenong; Dandenong North
Boneo;Outtrim
Loading