Skip to content

Commit

Permalink
remove duplicate while counting deprecated rs
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Nov 3, 2023
1 parent a6157c1 commit 5d81f42
Showing 1 changed file with 11 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def get_deprecated_counts_for_assembly(mongo_source, assembly_accession):
filter_criteria = {"inactiveObjects.asm": assembly_accession, "eventType": "DEPRECATED"}
if prev_release_end_for_assembly:
filter_criteria["createdDate"] = {"$gt": prev_release_end_for_assembly}
return query_mongo(mongo_source, filter_criteria, "new_deprecated_rs")
return query_mongo_distinct(mongo_source, filter_criteria, "new_deprecated_rs", "accession")


def query_mongo(mongo_source, filter_criteria, metric):
Expand All @@ -304,6 +304,16 @@ def query_mongo(mongo_source, filter_criteria, metric):
return total_count


def query_mongo_distinct(mongo_source, filter_criteria, metric, distinct_field):
distinct_documents = set()
for collection_name in collections[metric]:
logger.info(f'Querying mongo: db.{collection_name}.distinct({distinct_field}, {filter_criteria})')
collection = mongo_source.mongo_handle[mongo_source.db_name][collection_name]
documents = collection.distinct(distinct_field, filter_criteria)
distinct_documents.update(documents)
return len(distinct_documents)


def insert_counts_in_db(private_config_xml_file, metrics_for_assembly, ranges_per_assembly, release_version):
with get_metadata_connection_handle("production_processing", private_config_xml_file) as metadata_connection_handle:
fill_data_for_current_release(metadata_connection_handle, metrics_for_assembly, ranges_per_assembly,
Expand Down

0 comments on commit 5d81f42

Please sign in to comment.