From 27e6f9564ec235a753f3e1d2d3030a0e41d9e783 Mon Sep 17 00:00:00 2001 From: Evildoor Date: Wed, 17 Apr 2019 16:16:06 +0200 Subject: [PATCH] Move ES consistency script into a separate stage. While the script is the stage 069's counterpart in data4es-consistency-check, they share no functionality. --- Utils/Dataflow/069_upload2es/README | 20 +------------ Utils/Dataflow/071_esConsistency/README | 28 +++++++++++++++++++ .../consistency.py | 0 Utils/Dataflow/run/data4es-consistency-check | 4 +-- 4 files changed, 31 insertions(+), 21 deletions(-) create mode 100644 Utils/Dataflow/071_esConsistency/README rename Utils/Dataflow/{069_upload2es => 071_esConsistency}/consistency.py (100%) diff --git a/Utils/Dataflow/069_upload2es/README b/Utils/Dataflow/069_upload2es/README index d1ea14686..a258165a5 100644 --- a/Utils/Dataflow/069_upload2es/README +++ b/Utils/Dataflow/069_upload2es/README @@ -4,7 +4,7 @@ Description ----------- -load_data.sh uploads prepared data to ElasticSearch. +Uploads prepared data to ElasticSearch. Input ----- @@ -18,24 +18,6 @@ JSON documents, one per line: ... }}} -Consistency ------------ -consistency.py checks that the data is present in ElasticSearch instead of -uploading it. Input comes from Stage 009(in consistency mode) and only needs 2 -fields for now: -{{{ -{taskid, task_timestamp} -... -}}} - -Consistency check can be run as following: - - ./consistency.py --conf elasticsearch_config - -For more information about running the check and its arguments, use: - - ./consistency.py -h - TODO ---- Make the stage aware of EOProcess/EOMessage markers diff --git a/Utils/Dataflow/071_esConsistency/README b/Utils/Dataflow/071_esConsistency/README new file mode 100644 index 000000000..13fe7a51b --- /dev/null +++ b/Utils/Dataflow/071_esConsistency/README @@ -0,0 +1,28 @@ +============= +* Stage 071 * +============= + +1. Description +-------------- +Checks that the given data is present in ElasticSearch. + +Input must contain at least 2 fields: +{{{ +{_type: ..., _id: ..., ...} +... +}}} + +_type and _id are required to retrieve the document from ES. All the other +fields are compared with the document's corresponding ones. Results of the +comparison are written to stderr. + +2. Running the stage +-------------------- +The stage can be run as following: + + ./consistency.py --conf elasticsearch_config + +For more information about running the stage and its arguments, use: + + ./consistency.py -h + diff --git a/Utils/Dataflow/069_upload2es/consistency.py b/Utils/Dataflow/071_esConsistency/consistency.py similarity index 100% rename from Utils/Dataflow/069_upload2es/consistency.py rename to Utils/Dataflow/071_esConsistency/consistency.py diff --git a/Utils/Dataflow/run/data4es-consistency-check b/Utils/Dataflow/run/data4es-consistency-check index 54d23a7de..5733826e4 100755 --- a/Utils/Dataflow/run/data4es-consistency-check +++ b/Utils/Dataflow/run/data4es-consistency-check @@ -20,6 +20,6 @@ cmd_016="$base_dir/../016_task2es/task2es.py -m s" # ES cfg_es=`get_config "es"` -cmd_069="$base_dir/../069_upload2es/consistency.py -m s --conf $cfg_es" +cmd_071="$base_dir/../071_esConsistency/consistency.py -m s --conf $cfg_es" -$cmd_009 | $cmd_016 | eop_filter | $cmd_069 >/dev/null +$cmd_009 | $cmd_016 | eop_filter | $cmd_071 >/dev/null