diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php
new file mode 100644
index 0000000000..dacf75727f
--- /dev/null
+++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php
@@ -0,0 +1,303 @@
+job = $job;
+ $this->unicode = $unicode;
+ $this->output = $output;
+ $this->reportFormat = $this->job->getGlobalConfiguration()->getReportFormat();
+ if (array_key_exists('active', $action_conf) && $action_conf['active'] === false) {
+ $this->active = false;
+ return;
+ }
+ // get infos about the "source_field"
+ //
+ if (!($f = $job->getDataboxField($action_conf['source_field'])) ) {
+ $this->errors[] = sprintf("source field (%s) not found.", $action_conf['source_field']);
+ }
+ if (trim($f->get_tbranch()) === '') {
+ $this->errors[] = sprintf("source field (%s) not linked to thesaurus.", $action_conf['source_field']);
+ }
+ $this->tbranches = $job->getXpathTh()->query($f->get_tbranch());
+ if (!$this->tbranches || $this->tbranches->length <= 0) {
+ $this->errors[] = sprintf("thesaurus branch(es) of source field (%s) not found.", $this->source_field['tbranch']);
+ }
+ $this->source_field = [
+ 'id' => $f->get_id(),
+ 'name' => $f->get_name(),
+ 'tbranch' => $f->get_tbranch(),
+ 'lng' => array_key_exists('source_lng', $action_conf) ? $action_conf['source_lng'] : null
+ ];
+ $this->selectRecordFieldIds[] = $this->source_field['id'];
+ // get infos about the "destination_fields"
+ //
+ $this->destination_fields = [];
+ foreach ($action_conf['destination_fields'] as $tf) {
+ list($lng, $fname) = explode(':', $tf);
+ if(!($f = $job->getDataboxField($fname)) ) {
+ $this->output->writeln(sprintf("undefined field (%s) (ignored).", $fname));
+ continue;
+ }
+ $this->destination_fields[$lng] = [
+ 'id' => $f->get_id(),
+ 'name' => $f->get_name(),
+ ];
+ $this->selectRecordFieldIds[] = $this->destination_fields[$lng]['id'];
+ }
+ if (empty($this->destination_fields)) {
+ $this->errors[] = sprintf("no \"destination_field\" found.");
+ }
+ // misc settings
+ $this->cleanupDestination = array_key_exists('cleanup_destination', $action_conf) && $action_conf['cleanup_destination'] === true;
+ $this->cleanupSource = array_key_exists('cleanup_source', $action_conf) ? $action_conf['cleanup_source'] : self::NEVER_CLEANUP_SOURCE;
+ }
+ public function doAction(array $metas, array &$meta_to_delete, array&$meta_to_add)
+ {
+ if ($this->cleanupDestination) {
+ foreach ($this->destination_fields as $lng => $destination_field) {
+ $destination_field_id = $destination_field['id'];
+ if(array_key_exists($destination_field_id, $metas)) {
+ foreach ($metas[$destination_field_id] as $meta_id => $value) {
+ $meta_to_delete[$meta_id] = $value;
+ }
+ }
+ unset($meta_id, $value);
+ }
+ unset($lng, $destination_field, $destination_field_id);
+ }
+ $source_field_id = $this->source_field['id'];
+ if(!array_key_exists($source_field_id, $metas)) {
+ // no source field value for this record: nothing to do
+ return;
+ }
+ // loop on every value of the "source_field"
+ //
+ foreach ($metas[$source_field_id] as $source_meta_id => $source_value) {
+ $t = $this->splitTermAndContext($source_value);
+ $q = '@w=\'' . thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[0])) . '\'';
+ if ($t[1]) {
+ $q .= ' and @k=\'' . thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[1])) . '\'';
+ }
+ if(!is_null($this->source_field['lng'])) {
+ $q .= ' and @lng=\'' . thesaurus::xquery_escape($this->source_field['lng']) . '\'';
+ }
+ $q = '//sy[' . $q . ']/../sy';
+ unset($t);
+ // loop on every tbranch (one field may be linked to many branches)
+ //
+ $translations = []; // ONE translation per lng (first found in th)
+ /** @var DOMNode $tbranch */
+ foreach ($this->tbranches as $tbranch) {
+ if (!($nodes = $this->job->getXpathTh()->query($q, $tbranch))) {
+ $this->output->writeln(sprintf("\t\t\t- \"%s\" xpath error on (%s), ignored.", $source_value, $q));
+ continue;
+ }
+ // loop on every synonym
+ //
+ /** @var DOMElement $node */
+ foreach ($nodes as $node) {
+ $lng = $node->getAttribute('lng');
+ // ignore synonyms not in one of the "destination_field" languages
+ //
+ if (!array_key_exists($lng, $this->destination_fields)) {
+ continue;
+ }
+ $translated_value = $node->getAttribute('v');
+ $destination_field_id = $this->destination_fields[$lng]['id'];
+ if (!array_key_exists($lng, $translations)) {
+ if (
+ !array_key_exists($destination_field_id, $metas)
+ || ($destination_meta_id = array_search($translated_value, $metas[$destination_field_id])) === false
+ ) {
+ $translations[$lng] = [
+ 'val' => $translated_value,
+ 'id' => null,
+ 'msg' => sprintf(" --> %s", $this->destination_fields[$lng]['name'])
+ ];
+ $meta_to_add[$destination_field_id][] = $translated_value;
+ }
+ else {
+ $translations[$lng] = [
+ 'val' => $translated_value,
+ 'id' => $destination_meta_id,
+ 'msg' => sprintf("already in %s", $this->destination_fields[$lng]['name'])
+ ];
+ unset($meta_to_delete[$destination_meta_id]);
+ }
+ unset($destination_meta_id);
+ }
+ unset($lng, $destination_field_id, $translated_value);
+ }
+ unset($nodes, $node, $tbranch);
+ }
+ unset($q);
+ // cleanup source
+ //
+ if (empty($translations)) {
+ if($this->reportFormat === GlobalConfiguration::REPORT_FORMAT_ALL) {
+ $this->output->writeln(sprintf("\t\t\t- \"%s\" : no translation found.", $source_value));
+ }
+ $this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_NOT_TRANSLATED);
+ }
+ else if (count($translations) < count($this->destination_fields)) {
+ if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) {
+ $this->output->writeln(sprintf("\t\t\t- \"%s\" : incomplete translation.", $source_value));
+ }
+ $this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED);
+ }
+ else {
+ // complete translation (all target lng)
+ if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) {
+ $this->output->writeln(sprintf("\t\t\t- \"%s\" :", $source_value));
+ }
+ $this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_FULLY_TRANSLATED);
+ if ($this->cleanupSource === self::CLEANUP_SOURCE_IF_TRANSLATED) {
+ // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
+ $used = false;
+ foreach($translations as $l => $t) {
+ if($t['id'] === $source_meta_id) {
+ $used = true;
+ break;
+ }
+ }
+ if(!$used) {
+ $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
+ }
+ }
+ }
+ if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) {
+ foreach ($translations as $lng => $translation) {
+ $this->output->writeln(sprintf("\t\t\t\t- [%s] \"%s\" %s", $lng, $translation['val'], $translation['msg']));
+ }
+ }
+ if ($this->cleanupSource === self::ALWAYS_CLEANUP_SOURCE) {
+ // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
+ $used = false;
+ foreach($translations as $l => $t) {
+ if($t['id'] === $source_meta_id) {
+ $used = true;
+ break;
+ }
+ }
+ if(!$used) {
+ $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
+ }
+ }
+ unset($lng, $translations, $translation);
+ }
+ }
+ private function splitTermAndContext($word)
+ {
+ $term = trim($word);
+ $context = '';
+ if (($po = strpos($term, '(')) !== false) {
+ if (($pc = strpos($term, ')', $po)) !== false) {
+ $context = trim(substr($term, $po + 1, $pc - $po - 1));
+ $term = trim(substr($term, 0, $po));
+ }
+ else {
+ $context = trim(substr($term, $po + 1));
+ $term = trim(substr($term, 0, $po));
+ }
+ }
+ return [$term, $context];
+ }
+ /**
+ * @return bool
+ */
+ public function isActive(): bool
+ {
+ return $this->active;
+ }
+ /**
+ * @return array
+ */
+ public function getErrors(): array
+ {
+ return $this->errors;
+ }
+ /**
+ * @return array
+ */
+ public function getSelectRecordFieldIds(): array
+ {
+ return $this->selectRecordFieldIds;
+ }
diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php
index c097c63634..e989c69380 100644
--- a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php
+++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php
@@ -5,6 +5,7 @@
use appbox;
use collection;
use databox;
+use databox_field;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Yaml\Yaml;
use Unicode;
@@ -13,6 +14,8 @@
const CONFIG_DIR = "/config/translator/";
const CONFIG_FILE = "configuration.yml";
+ const REPORT_FORMAT_ALL = "all";
+ const REPORT_FORMAT_TRANSLATED = "translated";
private $configuration = null;
@@ -47,7 +50,8 @@ private function __construct($appBox, Unicode $unicode, $global_conf, bool $dryR
$sbas_name = $databox->get_dbname();
$this->databoxes[$sbas_id] = [
'dbox' => $databox,
- 'collections' => []
+ 'collections' => [],
+ 'fields' => [],
$this->databoxes[$sbas_name] = &$this->databoxes[$sbas_id];
// list all collections
@@ -57,16 +61,44 @@ private function __construct($appBox, Unicode $unicode, $global_conf, bool $dryR
$this->databoxes[$sbas_id]['collections'][$coll_id] = $collection;
$this->databoxes[$sbas_id]['collections'][$coll_name] = &$this->databoxes[$sbas_id]['collections'][$coll_id];
+ // list all fields
+ /** @var databox_field $dbf */
+ foreach($databox->get_meta_structure() as $dbf) {
+ $field_id = $dbf->get_id();
+ $field_name = $dbf->get_name();
+ $this->databoxes[$sbas_id]['fields'][$field_id] = $dbf;
+ $this->databoxes[$sbas_id]['fields'][$field_name] = &$this->databoxes[$sbas_id]['fields'][$field_id];
+ }
foreach($global_conf['jobs'] as $job_name => $job_conf) {
- $this->jobs[$job_name] = new Job($this, $job_conf, $unicode, $output);
+ $job = new Job($this, $job_name, $job_conf, $unicode, $output);
+ if($job->isActive()) {
+ if($job->isValid()) {
+ $this->jobs[$job_name] = $job;
+ }
+ else {
+ $output->writeln("Configuration error(s)... :");
+ foreach ($job->getErrors() as $err) {
+ $output->writeln(sprintf(" - %s", $err));
+ }
+ $output->writeln("...Job ignored");
+ }
+ }
+ else {
+ unset($job);
+ $output->writeln(sprintf("job \"%s\" is inactive: ignored.", $job_name));
+ }
* @param appbox $appBox
+ * @param Unicode $unicode
* @param string $root
+ * @param bool $dryRun
+ * @param string $reportFormat
+ * @param OutputInterface $output
* @return GlobalConfiguration
* @throws ConfigurationException
@@ -109,6 +141,25 @@ public function getCollection($sbasIdOrName, $collIdOrName)
return $this->databoxes[$sbasIdOrName]['collections'][$collIdOrName] ?? null;
+ /**
+ * @param string|int $sbasIdOrName
+ * @return databox_field[]|null
+ */
+ public function getFields($sbasIdOrName)
+ {
+ return $this->databoxes[$sbasIdOrName] ?? null;
+ }
+ /**
+ * @param string|int $sbasIdOrName
+ * @param string|int $collIdOrName
+ * @return databox_field|null
+ */
+ public function getField($sbasIdOrName, $fieldIdOrName)
+ {
+ return $this->databoxes[$sbasIdOrName]['fields'][$fieldIdOrName] ?? null;
+ }
* @return bool
diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php
index e8d408d057..a9f76d4c9c 100644
--- a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php
+++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php
@@ -2,9 +2,8 @@
namespace Alchemy\Phrasea\Command\Thesaurus\Translator;
+use collection;
use databox;
-use DOMElement;
-use DOMNode;
use DOMNodeList;
use DOMXpath;
use PDO;
@@ -14,13 +13,20 @@
class Job
- const NEVER_CLEANUP_SOURCE = 'never';
- const ALWAYS_CLEANUP_SOURCE = 'always';
- const CLEANUP_SOURCE_IF_TRANSLATED = 'if_translated';
+ const CONDENSED_REPORT_NOT_TRANSLATED = 'notTranslated';
+ const CONDENSED_REPORT_INCOMPLETELY_TRANSLATED = 'incompletelyTranslated';
+ const CONDENSED_REPORT_FULLY_TRANSLATED = 'fullyTranslated';
private $active = true;
+ /** @var array[] */
+ private $condensedReportCounts = [
+ ];
/** @var string[] */
private $errors = []; // error messages while parsing conf
@@ -32,188 +38,128 @@ class Job
private $selectRecordsSql = null;
- /** @var array list of field ids of "source_field" (unique) and "destination_fields" (many) */
- private $selectRecordFieldIds;
- /**
- * @var OutputInterface
- */
+ /** @var OutputInterface */
private $output;
- private $source_field; // infos about the "source_field"
- private $destination_fields; // infos about the "destination_fields" (key=lng)
- /**
- * @var Unicode
- */
- private $unicode;
/** @var DOMXpath|false|thesaurus_xpath */
private $xpathTh;
- /**
- * @var DOMNodeList
- * The thesaurus branch(es) linked to the "source_field"
- */
- private $tbranches;
- /** @var bool */
- private $cleanupDestination;
+ /** @var int flush every n records */
+ private $bulk = 10;
- /** @var string */
- private $cleanupSource = self::NEVER_CLEANUP_SOURCE;
- /**
- * @var GlobalConfiguration
- */
+ /** @var GlobalConfiguration */
private $globalConfiguration;
- /**
- * @var array
- */
- private $job_conf;
- /**
- * @var \collection|null
- */
+ /** @var collection|null */
private $setCollection = null;
- /**
- * @var string
- */
+ /** @var string */
private $setStatus = null; // format 0xx1100xx01xxxx
- /**
- * @var array
- */
- private $notTranslated; // for condensed report
- /**
- * @var array
- */
- private $incompletelyTranslated; // for condensed report
- /**
- * @var array
- */
- private $fullyTranslated; // for condensed report
- /**
- * @var int
- */
+ /** @var Action[] */
+ private $actions;
+ /** @var array */
+ private $selectRecordFieldIds = []; // ids of fields required by actions
+ /** @var int */
private $recordsDone; // for condensed report
* @param GlobalConfiguration $globalConfiguration
+ * @param string $job_name
* @param array $job_conf
+ * @param Unicode $unicode
+ * @param OutputInterface $output
- public function __construct($globalConfiguration, $job_conf, Unicode $unicode, OutputInterface $output)
+ public function __construct(GlobalConfiguration $globalConfiguration, string $job_name, array $job_conf, Unicode $unicode, OutputInterface $output)
$this->globalConfiguration = $globalConfiguration;
- $this->job_conf = $job_conf;
- $this->unicode = $unicode;
$this->output = $output;
+ $this->actions = [];
+ $this->errors = [];
if (array_key_exists('active', $job_conf) && $job_conf['active'] === false) {
$this->active = false;
- $this->errors = [];
- foreach (['active', 'databox', 'source_field', 'destination_fields'] as $mandatory) {
+ foreach (['active', 'databox', 'actions'] as $mandatory) {
if (!isset($job_conf[$mandatory])) {
$this->errors[] = sprintf("Missing mandatory setting (%s).", $mandatory);
- if (!empty($this->errors)) {
- return;
- }
if (!($this->databox = $globalConfiguration->getDatabox($job_conf['databox']))) {
$this->errors[] = sprintf("unknown databox (%s).", $job_conf['databox']);
- return;
+ $ifCollection = null;
+ if(array_key_exists('if_collection', $job_conf)) {
+ if(!($ifCollection = $globalConfiguration->getCollection($this->databox->get_sbas_id(), $job_conf['if_collection']))) {
+ $this->errors[] = sprintf("unknown setCollection (%s).", $job_conf['if_collection']);
+ }
+ }
if(array_key_exists('set_collection', $job_conf)) {
if(!($this->setCollection = $globalConfiguration->getCollection($this->databox->get_sbas_id(), $job_conf['set_collection']))) {
$this->errors[] = sprintf("unknown setCollection (%s).", $job_conf['set_collection']);
- return;
if(array_key_exists('set_status', $job_conf)) {
$this->setStatus = $job_conf['set_status'];
- $cnx = $this->databox->get_connection();
- // get infos about the "source_field"
- //
- $sql = "SELECT `id`, `tbranch` FROM `metadatas_structure` WHERE `name` = :name AND `tbranch` != ''";
- $stmt = $cnx->executeQuery($sql, [':name' => $job_conf['source_field']]);
- $this->source_field = $stmt->fetch(PDO::FETCH_ASSOC);
- $stmt->closeCursor();
- if (!$this->source_field) {
- $this->errors[] = sprintf("field (%s) not found or not linked to thesaurus.", $job_conf['source_field']);
- return;
+ if(array_key_exists('bulk', $job_conf)) {
+ if( ($this->bulk = (int) $job_conf['bulk']) < 1) {
+ $this->errors[] = sprintf("bulk should be >= 1.");
+ }
- $this->source_field['lng'] = array_key_exists('source_lng', $job_conf) ? $job_conf['source_lng'] : null;
- $this->selectRecordFieldIds[] = $this->source_field['id'];
$this->xpathTh = $this->databox->get_xpath_thesaurus();
- $this->tbranches = $this->xpathTh->query($this->source_field['tbranch']);
- if (!$this->tbranches || $this->tbranches->length <= 0) {
- $this->errors[] = sprintf("thesaurus branch(es) (%s) not found.", $this->source_field['tbranch']);
- return;
- }
- // get infos about the "destination_fields"
+ // load actions
- $this->destination_fields = [];
- $sql = "SELECT `id`, `name` FROM `metadatas_structure` WHERE `name` = :name ";
- $stmt = $cnx->prepare($sql);
- foreach ($job_conf['destination_fields'] as $tf) {
- list($lng, $fname) = explode(':', $tf);
- $stmt->execute([':name' => $fname]);
- if (!($row = $stmt->fetch(PDO::FETCH_ASSOC))) {
- $this->output->writeln(sprintf("undefined field (%s) (ignored).", $fname));
- continue;
+ $this->selectRecordFieldIds = [];
+ foreach($job_conf['actions'] as $action_name => $action_conf) {
+ $action = new Action($this, $action_conf, $unicode, $this->output);
+ if($action->isActive()) {
+ $this->selectRecordFieldIds = array_merge($this->selectRecordFieldIds, $action->getSelectRecordFieldIds());
+ $this->errors = array_merge($this->errors, $action->getErrors());
+ $this->actions[$action_name] = $action;
+ }
+ else {
+ unset($action);
+ $output->writeln(sprintf("action \"%s\" of job \"%s\" is inactive: ignored.", $action_name, $job_name));
- $this->destination_fields[$lng] = $row;
- $stmt->closeCursor();
- $this->selectRecordFieldIds[] = $row['id'];
+ $this->selectRecordFieldIds = array_unique($this->selectRecordFieldIds);
- if (empty($this->destination_fields)) {
- $this->errors[] = sprintf("no \"destination_field\" found.");
+ if (!empty($this->errors)) {
- // misc settings
- $this->cleanupDestination = array_key_exists('cleanup_destination', $job_conf) && $job_conf['cleanup_destination'] === true;
- $this->cleanupSource = array_key_exists('cleanup_source', $job_conf) ? $job_conf['cleanup_source'] : self::NEVER_CLEANUP_SOURCE;
// build records select sql
- $selectRecordClauses = [];
- $this->selectRecordParams = [];
- if (array_key_exists('if_collection', $job_conf)) {
- if (!($coll = $globalConfiguration->getCollection($job_conf['databox'], $job_conf['if_collection']))) {
- $this->errors[] = sprintf("unknown collection (%s)", $job_conf['if_collection']);
- return;
- }
- $selectRecordClauses[] = "`coll_id` = :coll_id";
- $this->selectRecordParams[':coll_id'] = $coll->get_coll_id();
+ $selectRecordsClauses = [
+ '`record_id` > :minrid'
+ ];
+ $this->selectRecordParams = [
+ ':minrid' => 0
+ ];
+ if ($ifCollection) {
+ $selectRecordsClauses[] = "`coll_id` = :coll_id";
+ $this->selectRecordParams[':coll_id'] = $ifCollection->get_coll_id();
if (array_key_exists('if_status', $job_conf)) {
- $selectRecordClauses[] = "`status` & b:sb_and = b:sb_equ";
+ $selectRecordsClauses[] = "`status` & b:sb_and = b:sb_equ";
$this->selectRecordParams[':sb_and'] = str_replace(['0', 'x'], ['1', '0'], $job_conf['if_status']);
$this->selectRecordParams[':sb_equ'] = str_replace('x', '0', $job_conf['if_status']);
- $selectRecordClauses[] = "`meta_struct_id` IN ("
+ $cnx = $this->databox->get_connection();
+ $selectFieldsClause = "`meta_struct_id` IN ("
. join(
array_map(function ($id) use ($cnx) {
@@ -222,222 +168,109 @@ public function __construct($globalConfiguration, $job_conf, Unicode $unicode, O
. ")";
- $sql = "SELECT `record_id`, `meta_struct_id`, `metadatas`.`id` AS meta_id, `value` FROM";
- $sql .= " `record` INNER JOIN `metadatas` USING(`record_id`)";
- $sql .= " WHERE " . join(" AND ", $selectRecordClauses);
+ $sql = "SELECT `r1`.`record_id`, `meta_struct_id`, `metadatas`.`id` AS meta_id, `value` FROM\n";
+ $sql .= " (SELECT `record_id` FROM `record` WHERE ".join(" AND ", $selectRecordsClauses)." LIMIT ".$this->bulk.") AS `r1`\n";
+ $sql .= " LEFT JOIN `metadatas` ON(`metadatas`.`record_id`=`r1`.`record_id`\n";
+ $sql .= " AND " . $selectFieldsClause . ")\n";
$sql .= " ORDER BY `record_id` ASC";
$this->selectRecordsSql = $sql;
public function run()
- $cnx = $this->databox->get_connection();
- $stmt = $cnx->executeQuery($this->selectRecordsSql, $this->selectRecordParams);
- $currentRid = '?';
$this->recordsDone = 0;
- $this->notTranslated = [];
- $this->incompletelyTranslated = [];
- $this->fullyTranslated = [];
- $metas = $emptyValues = array_map(function () {
- return [];
- }, array_flip($this->selectRecordFieldIds));
- while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
- if ($currentRid == '?') {
- $currentRid = $row['record_id'];
+ $stmt = $this->databox->get_connection()->prepare($this->selectRecordsSql);
+// $metas = $emptyValues = array_map(function () {
+// return [];
+// }, array_flip($this->selectRecordFieldIds));
+ $minrid = 0;
+ do {
+ $nrows = 0;
+ $currentRid = '?';
+ $metas = [];
+ $this->selectRecordParams[':minrid'] = $minrid;
+ $stmt->execute($this->selectRecordParams);
+ while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
+ $nrows++;
+ if ($currentRid == '?') {
+ $currentRid = $row['record_id'];
+ }
+ if ($row['record_id'] !== $currentRid) {
+ // change record
+ $this->doRecord($currentRid, $metas); // flush previous record
+ $currentRid = $row['record_id'];
+ // $metas = $emptyValues;
+ $metas = [];
+ }
+ if ($row['meta_struct_id'] !== null) { // left join : a record may not have any required field
+ if (!array_key_exists($row['meta_struct_id'], $metas)) {
+ $metas[$row['meta_struct_id']] = [];
+ }
+ $metas[$row['meta_struct_id']][$row['meta_id']] = $row['value'];
+ }
- if ($row['record_id'] !== $currentRid) {
- // change record
- $this->doRecord($currentRid, $metas); // flush previous record
- $currentRid = $row['record_id'];
- $metas = $emptyValues;
+ if ($currentRid !== '?') {
+ $this->doRecord($currentRid, $metas); // flush last record
- $metas[$row['meta_struct_id']][$row['meta_id']] = $row['value'];
- }
- if($currentRid !== '?') {
- $this->doRecord($currentRid, $metas); // flush last record
+ $stmt->closeCursor();
+ $minrid = $currentRid;
- $stmt->closeCursor();
+ while($nrows > 0);
// condensed report
if($this->globalConfiguration->getReportFormat() === 'condensed') {
$this->output->writeln(sprintf("%d records done.", $this->recordsDone));
- if(!empty($this->notTranslated)) {
- ksort($this->notTranslated, SORT_STRING|SORT_FLAG_CASE);
+ if(!empty($this->condensedReportCounts[self::CONDENSED_REPORT_NOT_TRANSLATED])) {
+ ksort($this->condensedReportCounts[self::CONDENSED_REPORT_NOT_TRANSLATED], SORT_STRING|SORT_FLAG_CASE);
$this->output->writeln("Not translated terms:");
- foreach ($this->notTranslated as $term => $n) {
+ foreach ($this->condensedReportCounts[self::CONDENSED_REPORT_NOT_TRANSLATED] as $term => $n) {
$this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n));
- if(!empty($this->incompletelyTranslated)) {
- ksort($this->incompletelyTranslated, SORT_STRING|SORT_FLAG_CASE);
+ if(!empty($this->condensedReportCounts[self::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED])) {
$this->output->writeln("Incompletely translated terms:");
- foreach ($this->incompletelyTranslated as $term => $n) {
+ foreach ($this->condensedReportCounts[self::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED] as $term => $n) {
$this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n));
- if(!empty($this->fullyTranslated)) {
- ksort($this->fullyTranslated, SORT_STRING|SORT_FLAG_CASE);
+ if(!empty($this->condensedReportCounts[self::CONDENSED_REPORT_FULLY_TRANSLATED])) {
$this->output->writeln("Fully translated terms:");
- foreach ($this->fullyTranslated as $term => $n) {
+ foreach ($this->condensedReportCounts[self::CONDENSED_REPORT_FULLY_TRANSLATED] as $term => $n) {
$this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n));
- private function doRecord($record_id, $metas)
+ private function doRecord(string $record_id, array $metas)
$reportFormat = $this->globalConfiguration->getReportFormat();
if($reportFormat !== 'condensed') {
- $this->output->writeln(sprintf("record id: %s", $record_id));
+ $this->output->writeln(sprintf("\trecord id: %s", $record_id));
- $source_field_id = $this->source_field['id'];
$meta_to_delete = []; // key = id, to easily keep unique
$meta_to_add = [];
- if ($this->cleanupDestination) {
- foreach ($this->destination_fields as $lng => $destination_field) {
- $destination_field_id = $destination_field['id'];
- foreach ($metas[$destination_field_id] as $meta_id => $value) {
- $meta_to_delete[$meta_id] = $value;
- }
- unset($meta_id, $value);
- }
- unset($lng, $destination_field, $destination_field_id);
- }
- // loop on every value of the "source_field"
+ // play all actions
- foreach ($metas[$source_field_id] as $source_meta_id => $source_value) {
- $t = $this->splitTermAndContext($source_value);
- $q = '@w=\'' . \thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[0])) . '\'';
- if ($t[1]) {
- $q .= ' and @k=\'' . \thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[1])) . '\'';
- }
- if(!is_null($this->source_field['lng'])) {
- $q .= ' and @lng=\'' . \thesaurus::xquery_escape($this->source_field['lng']) . '\'';
- }
- $q = '//sy[' . $q . ']/../sy';
- unset($t);
- // loop on every tbranch (one field may be linked to many branches)
- //
- $translations = []; // ONE translation per lng (first found in th)
- /** @var DOMNode $tbranch */
- foreach ($this->tbranches as $tbranch) {
- if (!($nodes = $this->xpathTh->query($q, $tbranch))) {
- $this->output->writeln(sprintf(" - \"%s\" xpath error on (%s), ignored.", $source_value, $q));
- continue;
- }
- // loop on every synonym
- //
- /** @var DOMElement $node */
- foreach ($nodes as $node) {
- $lng = $node->getAttribute('lng');
- // ignore synonyms not in one of the "destination_field" languages
- //
- if (!array_key_exists($lng, $this->destination_fields)) {
- continue;
- }
- $translated_value = $node->getAttribute('v');
- $destination_field_id = $this->destination_fields[$lng]['id'];
- if (!array_key_exists($lng, $translations)) {
- if (($destination_meta_id = array_search($translated_value, $metas[$destination_field_id])) === false) {
- $translations[$lng] = [
- 'val' => $translated_value,
- 'id' => null,
- 'msg' => sprintf(" --> %s", $this->destination_fields[$lng]['name'])
- ];
- $meta_to_add[$destination_field_id][] = $translated_value;
- }
- else {
- $translations[$lng] = [
- 'val' => $translated_value,
- 'id' => $destination_meta_id,
- 'msg' => sprintf("already in %s", $this->destination_fields[$lng]['name'])
- ];
- unset($meta_to_delete[$destination_meta_id]);
- }
- unset($destination_meta_id);
- }
- unset($lng, $destination_field_id, $translated_value);
- }
- unset($nodes, $node, $tbranch);
- }
- unset($q);
- // cleanup source
- //
- if (empty($translations)) {
- if($reportFormat === 'all') {
- $this->output->writeln(sprintf(" - \"%s\" : no translation found.", $source_value));
- }
- $this->addToCondensedReport($source_value, $this->notTranslated);
+ foreach($this->actions as $action_name => $action) {
+ if($reportFormat !== 'condensed') {
+ $this->output->writeln(sprintf("\t\tplaying action \"%s\"", $action_name));
- else if (count($translations) < count($this->destination_fields)) {
- if(in_array($reportFormat, ['all', 'translated'])) {
- $this->output->writeln(sprintf(" - \"%s\" : incomplete translation.", $source_value));
- }
- $this->addToCondensedReport($source_value, $this->incompletelyTranslated);
- }
- else {
- // complete translation (all target lng)
- if(in_array($reportFormat, ['all', 'translated'])) {
- $this->output->writeln(sprintf(" - \"%s\" :", $source_value));
- }
- $this->addToCondensedReport($source_value, $this->fullyTranslated);
- if ($this->cleanupSource === self::CLEANUP_SOURCE_IF_TRANSLATED) {
- // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
- $used = false;
- foreach($translations as $l => $t) {
- if($t['id'] === $source_meta_id) {
- $used = true;
- break;
- }
- }
- if(!$used) {
- $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
- }
- }
- }
- if(in_array($reportFormat, ['all', 'translated'])) {
- foreach ($translations as $lng => $translation) {
- $this->output->writeln(sprintf(" - [%s] \"%s\" %s", $lng, $translation['val'], $translation['msg']));
- }
- }
- if ($this->cleanupSource === self::ALWAYS_CLEANUP_SOURCE) {
- // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination)
- $used = false;
- foreach($translations as $l => $t) {
- if($t['id'] === $source_meta_id) {
- $used = true;
- break;
- }
- }
- if(!$used) {
- $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id];
- }
- }
- unset($lng, $translations, $translation);
+ $action->doAction($metas, $meta_to_delete, $meta_to_add);
- unset($metas, $source_meta_id, $source_value);
+ unset($metas);
$actions = [];
@@ -494,36 +327,29 @@ private function doRecord($record_id, $metas)
- private function addToCondensedReport($term, &$where)
+ public function addToCondensedReport(string $term, string $where)
if($this->globalConfiguration->getReportFormat() !== 'condensed') {
- if(!array_key_exists($term, $where)) {
- $where[$term] = 0;
+ if(!array_key_exists($where, $this->condensedReportCounts)) {
+ $this->condensedReportCounts[$where] = [];
- $where[$term]++;
+ if(!array_key_exists($term, $this->condensedReportCounts[$where])) {
+ $this->condensedReportCounts[$where][$term] = 0;
+ }
+ $this->condensedReportCounts[$where][$term]++;
- private function splitTermAndContext($word)
- {
- $term = trim($word);
- $context = '';
- if (($po = strpos($term, '(')) !== false) {
- if (($pc = strpos($term, ')', $po)) !== false) {
- $context = trim(substr($term, $po + 1, $pc - $po - 1));
- $term = trim(substr($term, 0, $po));
- }
- else {
- $context = trim(substr($term, $po + 1));
- $term = trim(substr($term, 0, $po));
- }
- }
- return [$term, $context];
+ /**
+ * @return GlobalConfiguration
+ */
+ public function getGlobalConfiguration(): GlobalConfiguration
+ {
+ return $this->globalConfiguration;
* @return string[]
@@ -553,5 +379,16 @@ public function isActive(): bool
return $this->active;
+ public function getDataboxField(string $fieldIdOrName)
+ {
+ return $this->globalConfiguration->getField($this->databox->get_sbas_id(), $fieldIdOrName);
+ }
+ /**
+ * @return DOMXpath|false|thesaurus_xpath
+ */
+ public function getXpathTh()
+ {
+ return $this->xpathTh;
+ }
diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php
index 1758d50c82..b5a43ac9cb 100644
--- a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php
+++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php
@@ -94,22 +94,7 @@ protected function doExecute(InputInterface $input, OutputInterface $output)
foreach ($this->config->getJobs() as $jobName => $job) {
- $output->writeln(sprintf("======== Playing job %s ========", $jobName));
- if(!$job->isValid()) {
- $output->writeln("Configuration error(s)... :");
- foreach ($job->getErrors() as $err) {
- $output->writeln(sprintf(" - %s", $err));
- }
- $output->writeln("...Job ignored");
- continue;
- }
- if(!$job->isActive()) {
- $output->writeln(sprintf("job is inactive, skipped."));
- continue;
- }
+ $output->writeln(sprintf("Playing job \"%s\"", $jobName));