Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PHRAS-3940 translator-group-jobs #4409

Merged
merged 3 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
303 changes: 303 additions & 0 deletions lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
<?php

namespace Alchemy\Phrasea\Command\Thesaurus\Translator;

use databox_field;
use DOMElement;
use DOMNode;
use DOMNodeList;
use Symfony\Component\Console\Output\OutputInterface;
use thesaurus;
use unicode;

class Action
{
const NEVER_CLEANUP_SOURCE = 'never';
const ALWAYS_CLEANUP_SOURCE = 'always';
const CLEANUP_SOURCE_IF_TRANSLATED = 'if_translated';

/** @var Job */
private $job;

/** @var unicode */
private $unicode;

/** @var OutputInterface */
private $output;

/** @var string */
private $reportFormat;


private $active = true;
private $errors = [];
/** @var databox_field|null */
private $source_field;
/** @var databox_field[] */
private $destination_fields; // infos about the "destination_fields" (key=lng)
/** @var array list of field ids of "source_field" (unique) and "destination_fields" (many) */
private $selectRecordFieldIds;
/** @var bool */
private $cleanupDestination;

/** @var string */
private $cleanupSource = self::NEVER_CLEANUP_SOURCE;



/** @var DOMNodeList The thesaurus branch(es) linked to the "source_field" */
private $tbranches;

public function __construct(Job $job, $action_conf, Unicode $unicode, OutputInterface $output)
{
$this->job = $job;
$this->unicode = $unicode;
$this->output = $output;
$this->reportFormat = $this->job->getGlobalConfiguration()->getReportFormat();

if (array_key_exists('active', $action_conf) && $action_conf['active'] === false) {
$this->active = false;
return;
}


// get infos about the "source_field"
//
if (!($f = $job->getDataboxField($action_conf['source_field'])) ) {
$this->errors[] = sprintf("source field (%s) not found.", $action_conf['source_field']);
}
if (trim($f->get_tbranch()) === '') {
$this->errors[] = sprintf("source field (%s) not linked to thesaurus.", $action_conf['source_field']);
}
$this->tbranches = $job->getXpathTh()->query($f->get_tbranch());
if (!$this->tbranches || $this->tbranches->length <= 0) {
$this->errors[] = sprintf("thesaurus branch(es) of source field (%s) not found.", $this->source_field['tbranch']);
}
$this->source_field = [
'id' => $f->get_id(),
'name' => $f->get_name(),
'tbranch' => $f->get_tbranch(),
'lng' => array_key_exists('source_lng', $action_conf) ? $action_conf['source_lng'] : null
];
$this->selectRecordFieldIds[] = $this->source_field['id'];


// get infos about the "destination_fields"
//
$this->destination_fields = [];
foreach ($action_conf['destination_fields'] as $tf) {
list($lng, $fname) = explode(':', $tf);
if(!($f = $job->getDataboxField($fname)) ) {
$this->output->writeln(sprintf("<warning>undefined field (%s) (ignored).</warning>", $fname));
continue;
}
$this->destination_fields[$lng] = [
'id' => $f->get_id(),
'name' => $f->get_name(),
];

$this->selectRecordFieldIds[] = $this->destination_fields[$lng]['id'];
}

if (empty($this->destination_fields)) {
$this->errors[] = sprintf("no \"destination_field\" found.");
}

// misc settings
$this->cleanupDestination = array_key_exists('cleanup_destination', $action_conf) && $action_conf['cleanup_destination'] === true;
$this->cleanupSource = array_key_exists('cleanup_source', $action_conf) ? $action_conf['cleanup_source'] : self::NEVER_CLEANUP_SOURCE;
}

public function doAction(array $metas, array &$meta_to_delete, array&$meta_to_add)
{
if ($this->cleanupDestination) {
foreach ($this->destination_fields as $lng => $destination_field) {
$destination_field_id = $destination_field['id'];
if(array_key_exists($destination_field_id, $metas)) {
foreach ($metas[$destination_field_id] as $meta_id => $value) {
$meta_to_delete[$meta_id] = $value;
}
}
unset($meta_id, $value);
}
unset($lng, $destination_field, $destination_field_id);
}

$source_field_id = $this->source_field['id'];

if(!array_key_exists($source_field_id, $metas)) {
// no source field value for this record: nothing to do
return;
}

// loop on every value of the "source_field"
//
foreach ($metas[$source_field_id] as $source_meta_id => $source_value) {

$t = $this->splitTermAndContext($source_value);
$q = '@w=\'' . thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[0])) . '\'';
if ($t[1]) {
$q .= ' and @k=\'' . thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[1])) . '\'';
}
if(!is_null($this->source_field['lng'])) {
$q .= ' and @lng=\'' . thesaurus::xquery_escape($this->source_field['lng']) . '\'';
}
$q = '//sy[' . $q . ']/../sy';
unset($t);

// loop on every tbranch (one field may be linked to many branches)
//
$translations = []; // ONE translation per lng (first found in th)
/** @var DOMNode $tbranch */
foreach ($this->tbranches as $tbranch) {
if (!($nodes = $this->job->getXpathTh()->query($q, $tbranch))) {
$this->output->writeln(sprintf("\t\t\t- \"%s\" <warning>xpath error on (%s), ignored.</warning>", $source_value, $q));
continue;
}

// loop on every synonym
//
/** @var DOMElement $node */
foreach ($nodes as $node) {
$lng = $node->getAttribute('lng');

// ignore synonyms not in one of the "destination_field" languages
//
if (!array_key_exists($lng, $this->destination_fields)) {
continue;
}

$translated_value = $node->getAttribute('v');

$destination_field_id = $this->destination_fields[$lng]['id'];
if (!array_key_exists($lng, $translations)) {
if (
!array_key_exists($destination_field_id, $metas)
|| ($destination_meta_id = array_search($translated_value, $metas[$destination_field_id])) === false
) {
$translations[$lng] = [
'val' => $translated_value,
'id' => null,
'msg' => sprintf(" --> %s", $this->destination_fields[$lng]['name'])
];
$meta_to_add[$destination_field_id][] = $translated_value;
}
else {
$translations[$lng] = [
'val' => $translated_value,
'id' => $destination_meta_id,
'msg' => sprintf("already in %s", $this->destination_fields[$lng]['name'])
];
unset($meta_to_delete[$destination_meta_id]);
}
unset($destination_meta_id);
}
unset($lng, $destination_field_id, $translated_value);
}
unset($nodes, $node, $tbranch);
}
unset($q);

// cleanup source
//
if (empty($translations)) {
if($this->reportFormat === GlobalConfiguration::REPORT_FORMAT_ALL) {
$this->output->writeln(sprintf("\t\t\t- \"%s\" : no translation found.", $source_value));
}
$this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_NOT_TRANSLATED);
}
else if (count($translations) < count($this->destination_fields)) {
if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) {
$this->output->writeln(sprintf("\t\t\t- \"%s\" : incomplete translation.", $source_value));
}
$this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED);
}
else {
// complete translation (all target lng)
if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) {
$this->output->writeln(sprintf("\t\t\t- \"%s\" :", $source_value));
}
$this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_FULLY_TRANSLATED);

if ($this->cleanupSource === self::CLEANUP_SOURCE_IF_TRANSLATED) {
// do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
$used = false;
foreach($translations as $l => $t) {
if($t['id'] === $source_meta_id) {
$used = true;
break;
}
}
if(!$used) {
$meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
}
}
}

if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) {
foreach ($translations as $lng => $translation) {
$this->output->writeln(sprintf("\t\t\t\t- [%s] \"%s\" %s", $lng, $translation['val'], $translation['msg']));
}
}

if ($this->cleanupSource === self::ALWAYS_CLEANUP_SOURCE) {
// do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
$used = false;
foreach($translations as $l => $t) {
if($t['id'] === $source_meta_id) {
$used = true;
break;
}
}
if(!$used) {
$meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
}
}

unset($lng, $translations, $translation);
}
}

private function splitTermAndContext($word)
{
$term = trim($word);
$context = '';
if (($po = strpos($term, '(')) !== false) {
if (($pc = strpos($term, ')', $po)) !== false) {
$context = trim(substr($term, $po + 1, $pc - $po - 1));
$term = trim(substr($term, 0, $po));
}
else {
$context = trim(substr($term, $po + 1));
$term = trim(substr($term, 0, $po));
}
}

return [$term, $context];
}

/**
* @return bool
*/
public function isActive(): bool
{
return $this->active;
}

/**
* @return array
*/
public function getErrors(): array
{
return $this->errors;
}

/**
* @return array
*/
public function getSelectRecordFieldIds(): array
{
return $this->selectRecordFieldIds;
}

}
Loading