diff --git a/kgx/cli/cli_utils.py b/kgx/cli/cli_utils.py index 262e2e4b..8fac8480 100644 --- a/kgx/cli/cli_utils.py +++ b/kgx/cli/cli_utils.py @@ -383,6 +383,7 @@ def _process_knowledge_source(ksf: str, spec: str) -> Union[str, bool, Tuple]: if spec.lower() == "true": return True elif spec.lower() == "false": + print("returning false") return False else: # If a Tuple, expect a comma-delimited string? @@ -520,6 +521,7 @@ def transform( pool.join() graphs = [r.get() for r in results] else: + print("no transform config") source_dict: Dict = { "input": { "format": input_format, @@ -555,7 +557,9 @@ def transform( + "' are all rewrite specifications!" ) else: + print("not a tuple") source_dict["input"][ksf] = ksf_spec + print(source_dict) name = os.path.basename(inputs[0]) transform_source( diff --git a/kgx/source/json_source.py b/kgx/source/json_source.py index baa637b5..adb49306 100644 --- a/kgx/source/json_source.py +++ b/kgx/source/json_source.py @@ -94,5 +94,5 @@ def read_edges(self, filename: str) -> Generator: FH = gzip.open(filename, "rb") else: FH = open(filename, "rb") - for e in ijson.items(FH, "edges.item"): + for e in ijson.items(FH, "edges.item", use_float=True): yield self.read_edge(e) diff --git a/kgx/source/tsv_source.py b/kgx/source/tsv_source.py index 2150d6ff..1842ec56 100644 --- a/kgx/source/tsv_source.py +++ b/kgx/source/tsv_source.py @@ -285,5 +285,5 @@ def read_edge(self, edge: Dict) -> Optional[Tuple]: key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) if self.check_edge_filter(edge_data): - self.node_properties.update(edge_data.keys()) + self.edge_properties.update(edge_data.keys()) return s, o, key, edge_data diff --git a/kgx/utils/infores.py b/kgx/utils/infores.py index 402fce72..201edb32 100644 --- a/kgx/utils/infores.py +++ b/kgx/utils/infores.py @@ -337,16 +337,13 @@ def set_provenance_map(self, kwargs: Dict): """ if "default_provenance" in kwargs: self.default_provenance = kwargs.pop("default_provenance") + print(self.default_provenance) - # Biolink 2.0 knowledge_source 'knowledge_source' derived fields - ksf_found = False + ksf_found = [] for ksf in knowledge_provenance_properties: if ksf in kwargs: - if not ksf_found: - ksf_found = ksf # save the first one found, for later + ksf_found.append(ksf) ksf_value = kwargs.pop(ksf) - # Check if the ksf_value is a multi-valued catalog of patterns for a - # given knowledge graph field, indexed on each distinct regex pattern if isinstance(ksf_value, dict): for ksf_pattern in ksf_value.keys(): if ksf not in self.mapping: @@ -357,7 +354,9 @@ def set_provenance_map(self, kwargs: Dict): ) else: ir = self.get_mapping(ksf) + print(ir) self.mapping[ksf] = ir.set_provenance_map_entry(ksf_value) + print("mapping", self.mapping) # if none specified, add at least one generic 'knowledge_source' if not ksf_found: @@ -368,9 +367,8 @@ def set_provenance_map(self, kwargs: Dict): else: self.mapping["knowledge_source"] = ir.default(self.default_provenance) - # TODO: better to lobby the team to totally deprecated this, even for Nodes? if "provided_by" not in self.mapping: - self.mapping["provided_by"] = self.mapping[ksf_found] + self.mapping["provided_by"] = ir.default(self.default_provenance) def set_provenance(self, ksf: str, data: Dict): """ @@ -386,6 +384,7 @@ def set_provenance(self, ksf: str, data: Dict): """ if ksf not in data.keys(): + print(data.keys()) if ksf in self.mapping and not isinstance(self.mapping[ksf], dict): data[ksf] = self.mapping[ksf]() # get default ksf value? else: @@ -396,6 +395,7 @@ def set_provenance(self, ksf: str, data: Dict): # If data is s a non-string iterable # then, coerce into a simple list of sources if isinstance(data[ksf], (list, set, tuple)): + print("is a tuple???") sources = list(data[ksf]) else: # Otherwise, just assumed to be a scalar @@ -442,7 +442,7 @@ def set_node_provenance(self, node_data: Dict): def set_edge_provenance(self, edge_data: Dict): """ Sets the node knowledge_source value for the current node. Edge knowledge_source properties - include the full Biolink 2.0 'knowledge_source' related properties. + include the 'knowledge_source' related properties. Parameters ---------- diff --git a/tests/unit/test_cli_utils.py b/tests/unit/test_cli_utils.py index 835d79de..cfe6ce18 100644 --- a/tests/unit/test_cli_utils.py +++ b/tests/unit/test_cli_utils.py @@ -604,6 +604,7 @@ def test_transform_error(): except ValueError: assert ValueError + def test_transform_knowledge_source_suppression(): """ Transform graph from TSV to JSON.