Skip to content

Commit

Permalink
Merge pull request #644 from alliance-genome/query_context_manager
Browse files Browse the repository at this point in the history
Query context manager
  • Loading branch information
oblodgett authored Jan 26, 2023
2 parents d4c9808 + f7676b6 commit 242d1cb
Show file tree
Hide file tree
Showing 24 changed files with 809 additions and 856 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
neotime==1.7.4
py2neo
cryptography==3.3.1 #cryptography required for py2neo, but current version (3.4.4 at 2021/02/11) causes conflicts with pip version
neo4j-driver==5.1.0
neo4j
ijson==2.4
pytest
jsonschema
Expand Down
2 changes: 1 addition & 1 deletion src/aggregate_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def run_loader(self):
for k in metadata:
fields.append(k + ": " + json.dumps(metadata[k]))
load_rel = "CREATE (o:AllianceReleaseInfo {" + ",".join(fields) + "})"
Neo4jHelper().run_single_query(load_rel)
Neo4jHelper().run_single_query_no_return(load_rel)

file_transactor = FileTransactor()
file_transactor.start_threads(data_manager.get_file_transactor_thread_settings())
Expand Down
38 changes: 18 additions & 20 deletions src/etl/biogrid_orcs_xref_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,26 +76,24 @@ def get_generators(self, entrez_ids):
"""Get Generators."""

biogrid_orcs_data_list = []
return_set = Neo4jHelper.run_single_parameter_query(self.gene_crossref_query_template,
entrez_ids)

for record in return_set:
gene_primary_key = record["g.primaryKey"]
mod_local_id = record["g.modLocalId"]
global_cross_ref_id = record["cr.globalCrossRefId"]
url = self.etlh.rdh2.return_url_from_key_value('NCBI_Gene', global_cross_ref_id.split(":")[1], 'biogrid/orcs')
biogrid_orcs_xref = ETLHelper.get_xref_dict(global_cross_ref_id.split(":")[1],
"NCBI_Gene",
"gene/biogrid_orcs",
"gene/biogrid_orcs",
"BioGRID CRISPR Screen Cell Line Phenotypes",
url,
global_cross_ref_id+"gene/biogrid_orcs")

biogrid_orcs_xref["genePrimaryKey"] = gene_primary_key
biogrid_orcs_xref["modLocalId"] = mod_local_id

biogrid_orcs_data_list.append(biogrid_orcs_xref)
with Neo4jHelper.run_single_parameter_query(self.gene_crossref_query_template, entrez_ids) as return_set:
for record in return_set:
gene_primary_key = record["g.primaryKey"]
mod_local_id = record["g.modLocalId"]
global_cross_ref_id = record["cr.globalCrossRefId"]
url = self.etlh.rdh2.return_url_from_key_value('NCBI_Gene', global_cross_ref_id.split(":")[1], 'biogrid/orcs')
biogrid_orcs_xref = ETLHelper.get_xref_dict(global_cross_ref_id.split(":")[1],
"NCBI_Gene",
"gene/biogrid_orcs",
"gene/biogrid_orcs",
"BioGRID CRISPR Screen Cell Line Phenotypes",
url,
global_cross_ref_id+"gene/biogrid_orcs")

biogrid_orcs_xref["genePrimaryKey"] = gene_primary_key
biogrid_orcs_xref["modLocalId"] = mod_local_id

biogrid_orcs_data_list.append(biogrid_orcs_xref)

yield [biogrid_orcs_data_list]

11 changes: 5 additions & 6 deletions src/etl/closure_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,11 @@ def get_closure_terms(self, data_provider):
query = self.retrieve_isa_partof_closure_query_template % (data_provider, data_provider)
self.logger.debug("Query to Run: %s", query)

return_set = Neo4jHelper().run_single_query(query)

closure_data = []
for record in return_set:
row = dict(child_id=record["childTerm.primaryKey"],
parent_id=record["parentTerm.primaryKey"])
closure_data.append(row)
with Neo4jHelper().run_single_query(query) as return_set:
for record in return_set:
row = dict(child_id=record["childTerm.primaryKey"],
parent_id=record["parentTerm.primaryKey"])
closure_data.append(row)

yield [closure_data]
2 changes: 1 addition & 1 deletion src/etl/disease_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def delete_empty_nodes(self):
AND size(keys(dd)) = 1
DETACH DELETE (dd)"""

Neo4jHelper.run_single_query(delete_empty_do_nodes_query)
Neo4jHelper.run_single_query_no_return(delete_empty_do_nodes_query)

def _process_sub_type(self, sub_type):

Expand Down
57 changes: 28 additions & 29 deletions src/etl/expression_atlas_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,14 @@ def _load_and_process_data(self):

@staticmethod
def _get_primary_gene_ids_to_ensembl_ids():
return_set = Neo4jHelper.run_single_query(ExpressionAtlasETL.get_all_gene_primary_to_ensmbl_ids_query)
return {record["c.localId"].lower(): record["g.primaryKey"] for record in return_set}
with Neo4jHelper.run_single_query(ExpressionAtlasETL.get_all_gene_primary_to_ensmbl_ids_query) as return_set:
return {record["c.localId"].lower(): record["g.primaryKey"] for record in return_set}

@staticmethod
def _get_mod_gene_symbol_to_primary_ids(data_provider):
return_set = Neo4jHelper.run_single_parameter_query(
ExpressionAtlasETL.get_mod_gene_symbol_to_primary_ids_query,
data_provider)
return {record["g.symbol"].lower(): record["g.primaryKey"] for record in return_set}
with Neo4jHelper.run_single_parameter_query(ExpressionAtlasETL.get_mod_gene_symbol_to_primary_ids_query,
data_provider) as return_set:
return {record["g.symbol"].lower(): record["g.primaryKey"] for record in return_set}

# Returns only pages for genes that we have in the Alliance
def _get_expression_atlas_gene_pages(self, sub_type,
Expand Down Expand Up @@ -122,29 +121,29 @@ def _process_sub_type(self, sub_type, ensg_to_gene_primary_id_map):

def get_generators(self, expression_atlas_gene_pages, data_provider, batch_size):
"""Get Generators."""
return_set = Neo4jHelper.run_single_parameter_query(
with Neo4jHelper.run_single_parameter_query(
ExpressionAtlasETL.get_genes_with_expression_atlas_links_query,
list(expression_atlas_gene_pages.keys())
)

counter = 0
cross_reference_list = []
for record in return_set:
counter += 1
cross_reference = ETLHelper.get_xref_dict(
record["g.primaryKey"].split(":")[1],
"ExpressionAtlas_gene",
"gene/expression-atlas",
"gene/expressionAtlas",
record["g.modLocalId"],
expression_atlas_gene_pages[record["g.primaryKey"].lower()],
data_provider + ":" + record["g.modLocalId"] + "gene/expression-atlas")
cross_reference["genePrimaryKey"] = record["g.primaryKey"]
cross_reference_list.append(cross_reference)
if counter > batch_size:
) as return_set:

counter = 0
cross_reference_list = []
for record in return_set:
counter += 1
cross_reference = ETLHelper.get_xref_dict(
record["g.primaryKey"].split(":")[1],
"ExpressionAtlas_gene",
"gene/expression-atlas",
"gene/expressionAtlas",
record["g.modLocalId"],
expression_atlas_gene_pages[record["g.primaryKey"].lower()],
data_provider + ":" + record["g.modLocalId"] + "gene/expression-atlas")
cross_reference["genePrimaryKey"] = record["g.primaryKey"]
cross_reference_list.append(cross_reference)
if counter > batch_size:
yield [cross_reference_list]
counter = 0
cross_reference_list = []

if counter > 0:
yield [cross_reference_list]
counter = 0
cross_reference_list = []

if counter > 0:
yield [cross_reference_list]
2 changes: 1 addition & 1 deletion src/etl/expression_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ def add_other(self):
ON CREATE SET othergo.type = 'other'
ON CREATE SET othergo.subset = 'goslim_agr' """

Neo4jHelper.run_single_query(add_other_query)
Neo4jHelper.run_single_query_no_return(add_other_query)

def get_generators(self, expression_file, batch_size): # noqa
"""Get Generators."""
Expand Down
21 changes: 10 additions & 11 deletions src/etl/expression_ribbon_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,18 @@ def get_ribbon_terms(self):
"""Get ribbon terms."""
self.logger.debug("made it to the gocc ribbon retrieve")

return_set_rt = Neo4jHelper().run_single_query(self.expression_gocc_ribbon_retrieve_query)
gocc_ribbon_data = []
for record in return_set_rt:
row = {"ebe_id": record["ebe.primaryKey"],
"go_id": record["slimTerm.primaryKey"]}
gocc_ribbon_data.append(row)
with Neo4jHelper().run_single_query(self.expression_gocc_ribbon_retrieve_query) as return_set_rt:
for record in return_set_rt:
row = {"ebe_id": record["ebe.primaryKey"],
"go_id": record["slimTerm.primaryKey"]}
gocc_ribbon_data.append(row)

gocc_self_ribbon_data = []

return_set_srt = Neo4jHelper().run_single_query(self.gocc_self_ribbon_ebes_query)
for record in return_set_srt:
row = {"ebe_id": record["ebe.primaryKey"],
"go_id": record["got.primaryKey"]}
gocc_self_ribbon_data.append(row)
with Neo4jHelper().run_single_query(self.gocc_self_ribbon_ebes_query) as return_set_srt:
for record in return_set_srt:
row = {"ebe_id": record["ebe.primaryKey"],
"go_id": record["got.primaryKey"]}
gocc_self_ribbon_data.append(row)

yield [gocc_ribbon_data, gocc_self_ribbon_data]
10 changes: 4 additions & 6 deletions src/etl/expression_ribbon_other_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,10 @@ def get_ribbon_terms(self):
"""Get Ribbon Terms."""
self.logger.debug("made it to the gocc ribbon retrieve")

return_set_rle = Neo4jHelper().run_single_query(self.ribbonless_ebes_query)

gocc_ribbonless_data = []

for record in return_set_rle:
row = dict(ebe_id=record["ebe.primaryKey"])
gocc_ribbonless_data.append(row)
with Neo4jHelper().run_single_query(self.ribbonless_ebes_query) as return_set_rle:
for record in return_set_rle:
row = dict(ebe_id=record["ebe.primaryKey"])
gocc_ribbonless_data.append(row)

yield [gocc_ribbonless_data]
Loading

0 comments on commit 242d1cb

Please sign in to comment.