From 2e7cc75b112f446ef998b096a96b43e8c266bd1f Mon Sep 17 00:00:00 2001 From: Evildoor Date: Wed, 22 May 2019 12:47:06 +0200 Subject: [PATCH] Add _parent field handling. The field is required to get child documents such as output datasets. --- Utils/Dataflow/071_esConsistency/consistency.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Utils/Dataflow/071_esConsistency/consistency.py b/Utils/Dataflow/071_esConsistency/consistency.py index 61cc5c083..a9ad2b5b4 100755 --- a/Utils/Dataflow/071_esConsistency/consistency.py +++ b/Utils/Dataflow/071_esConsistency/consistency.py @@ -104,7 +104,7 @@ def es_connect(cfg): return True -def get_fields(index, _id, _type, fields): +def get_fields(index, _id, _type, fields, _parent): ''' Get fields value by given _id and _type. :param es: elasticsearch client @@ -123,7 +123,7 @@ def get_fields(index, _id, _type, fields): ''' try: results = es.get(index=index, doc_type=_type, id=_id, - _source=fields) + _source=fields, parent=_parent) except elasticsearch.exceptions.NotFoundError: return False return results['_source'] @@ -151,6 +151,10 @@ def process(stage, message): log('Insufficient ES info in data:' + str(data), 'WARN') return False + _parent = None + if '_parent' in data: + _parent = data.pop('_parent') + # Crutch. Remove unwanted (for now) fields added by Stage 016. for field in ['phys_category', 'chain_data', 'chain_id']: if field in data: @@ -164,7 +168,7 @@ def process(stage, message): 'WARN') return False - es_data = get_fields(INDEX, _id, _type, data.keys()) + es_data = get_fields(INDEX, _id, _type, data.keys(), _parent) if data != es_data: log('Document ({0}, {1}) differs between Oracle and ' 'ES: Oracle:{2} ES:{3}'.format(_type, _id, data, es_data), 'WARN')