Skip to content

Commit

Permalink
Merge pull request #418 from biolink/poetry
Browse files Browse the repository at this point in the history
adding poetry, fixes #423, #403
  • Loading branch information
sierra-moxon authored Feb 14, 2023
2 parents 82c3511 + 3af9a89 commit 04a7e55
Show file tree
Hide file tree
Showing 30 changed files with 4,389 additions and 460 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/pypi-release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Publish Python Package

on:
release:
types: [ created ]

jobs:
build-n-publish:
name: Build and publish Python 🐍 distributions 📦 to PyPI
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2

- name: Set up Python
uses: actions/[email protected]
with:
python-version: 3.9

- name: Install Poetry
run: |
pip install poetry
poetry self add "poetry-dynamic-versioning[plugin]"
- name: Build source and wheel archives
run: poetry build

- name: Publish distribution 📦 to PyPI
uses: pypa/[email protected]
with:
user: __token__
password: ${{ secrets.KGX_PYPI_TOKEN }}
53 changes: 33 additions & 20 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,44 @@ on:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
test:
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10" ]

runs-on: ${{ matrix.os }}

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2

- uses: actions/setup-python@v2
name: setup python environment
with:
python-version: 3.9
#----------------------------------------------
# install poetry
#----------------------------------------------
- name: Install Poetry
run: pipx install poetry

#----------------------------------------------
# check-out repo and set-up python
#----------------------------------------------
- name: Check out repository
uses: actions/checkout@v3

- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest pytest-cov
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'

#----------------------------------------------
# install your root project, if required
#----------------------------------------------
- name: Install library
run: poetry install --no-interaction

#----------------------------------------------
# run test suite
#----------------------------------------------
- name: Setup Neo4j Docker
run: |
docker run --detach --name kgx-neo4j-unit-test -p 8484:7474 -p 8888:7687 --env NEO4J_AUTH=neo4j/test neo4j:4.3.0
Expand All @@ -47,11 +64,7 @@ jobs:

- name: Run tests
run: |
coverage erase
pytest --cov=kgx --cov-report xml:coverage.xml tests/*
coverage combine --append || true
coverage report
coverage xml
poetry run pytest tests/*
- name: SonarCloud Scan
uses: SonarSource/sonarcloud-github-action@master
Expand Down
30 changes: 0 additions & 30 deletions .github/workflows/run_tox.yml

This file was deleted.

2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ MAINTAINER Sierra Moxon "[email protected]"
RUN git clone https://github.com/biolink/kgx

# Setup
RUN cd kgx && git checkout tags/1.7.0 && pip install -r requirements.txt
RUN cd kgx && git checkout tags/2.0.0 && pip install -r requirements.txt
&& python setup.py install


Expand Down
13 changes: 4 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
export PYTHONPATH=.

test: unit-tests integration-tests

unit-tests:
pytest tests/unit/test_source/*.py
pytest tests/unit/test_sink/*.py
pytest tests/unit/*.py
poetry run pytest tests/unit/test_source/*.py
poetry run pytest tests/unit/test_sink/*.py
poetry run pytest tests/unit/*.py


integration-tests:
pytest tests/*.py

typecheck:
mypy kgx --ignore-missing-imports
poetry run pytest tests/integration/*.py
3 changes: 2 additions & 1 deletion kgx/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,15 @@ def get_graph_store_class() -> Any:

# Biolink Release number should be a well formed Semantic Versioning (patch is optional?)
semver_pattern = re.compile(r"^\d+\.\d+\.\d+$")
semver_pattern_v = re.compile(r"^v\d+\.\d+\.\d+$")


def get_biolink_model_schema(biolink_release: Optional[str] = None) -> Optional[str]:
"""
Get Biolink Model Schema
"""
if biolink_release:
if not semver_pattern.fullmatch(biolink_release):
if not semver_pattern.fullmatch(biolink_release) and not semver_pattern_v.fullmatch(biolink_release):
raise TypeError(
"The 'biolink_release' argument '"
+ biolink_release
Expand Down
6 changes: 3 additions & 3 deletions kgx/sink/neo_sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def _write_node_cache(self) -> None:
filtered_categories = [x for x in categories if x not in self._seen_categories]
self.create_constraints(filtered_categories)
for category in self.node_cache.keys():
log.info("Generating UNWIND for category: {}".format(category))
log.debug("Generating UNWIND for category: {}".format(category))
cypher_category = category.replace(
self.CATEGORY_DELIMITER, self.CYPHER_CATEGORY_DELIMITER
)
Expand Down Expand Up @@ -139,13 +139,13 @@ def _write_edge_cache(self) -> None:
batch_size = 10000
for predicate in self.edge_cache.keys():
query = self.generate_unwind_edge_query(predicate)
log.info(query)
log.debug(query)
edges = self.edge_cache[predicate]
for x in range(0, len(edges), batch_size):
y = min(x + batch_size, len(edges))
batch = edges[x:y]
log.debug(f"Batch {x} - {y}")
log.info(edges[x:y])
log.debug(edges[x:y])
try:
self.session.run(
query, parameters={"relationship": predicate, "edges": batch}
Expand Down
12 changes: 0 additions & 12 deletions kgx/sink/rdf_sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,18 +401,6 @@ def _get_property_type(self, p: str) -> str:
t = self.property_types[f"biolink:{p}"]
else:
t = "xsd:string"
# if value:
# if isinstance(value, (list, set, tuple)):
# x = value[0]
# if self.graph.has_node(x):
# t = 'uriorcurie'
# else:
# t = 'xsd:string'
# else:
# if self.graph.has_node(value):
# t = 'uriorcurie'
# else:
# t = 'xsd:string'
return t

def process_predicate(self, p: Optional[Union[URIRef, str]]) -> Tuple:
Expand Down
1 change: 0 additions & 1 deletion kgx/source/owl_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def load_graph(self, rdfgraph: rdflib.Graph, **kwargs: Any) -> None:
if not isinstance(o, rdflib.term.BNode):
if p not in self.excluded_predicates:
yield from self.triple(s, p, o)

for s, p, o in rdfgraph.triples((None, None, None)):
if isinstance(s, rdflib.term.BNode) or isinstance(o, rdflib.term.BNode):
continue
Expand Down
16 changes: 6 additions & 10 deletions kgx/source/rdf_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ def triple(self, s: URIRef, p: URIRef, o: URIRef) -> None:
if s_curie.startswith("biolink") or s_curie.startswith("OBAN"):
log.warning(f"Skipping {s} {p} {o}")
elif s_curie in self.reified_nodes:
# subject is a reified node
self.add_node_attribute(s, key=prop_uri, value=o)
elif p in self.reification_predicates:
# subject is a reified node
Expand Down Expand Up @@ -334,8 +333,6 @@ def dereify(self, n: str, node: Dict) -> None:
node["predicate"] = "biolink:related_to"
if "relation" not in node:
node["relation"] = node["predicate"]
# if 'category' in node:
# del node['category']
if "subject" in node and "object" in node:
self.edge_properties.update(node.keys())
self.add_edge(node["subject"], node["object"], node["predicate"], node)
Expand Down Expand Up @@ -389,12 +386,6 @@ def add_node_attribute(
if isinstance(value, rdflib.term.Identifier):
if isinstance(value, rdflib.term.URIRef):
value_curie = self.prefix_manager.contract(value)
# if self.prefix_manager.get_prefix(value_curie) not in {'biolink'} \
# and mapped_key not in {'type', 'category', 'predicate', 'relation', 'predicate'}:
# d = self.add_node(value)
# value = d['id']
# else:
# value = value_curie
value = value_curie
else:
value = value.toPython()
Expand Down Expand Up @@ -589,6 +580,8 @@ def process_predicate(self, p: Optional[Union[URIRef, str]]) -> Tuple:
property_name = p
predicate = f":{p}"
element = self.get_biolink_element(p)
if not element:
element = self.get_biolink_element(predicate)
canonical_uri = None
if element:
if isinstance(element, SlotDefinition):
Expand All @@ -614,7 +607,6 @@ def process_predicate(self, p: Optional[Union[URIRef, str]]) -> Tuple:
if not predicate:
predicate = element_uri
else:
# no mapping to biolink model;
# look at predicate mappings
element_uri = None
if p in self.predicate_mapping:
Expand Down Expand Up @@ -847,6 +839,10 @@ def get_biolink_element(self, predicate: Any) -> Optional[Element]:
mapping = toolkit.get_element_by_mapping(predicate)
if mapping:
element = toolkit.get_element(mapping)
else:
mapping = toolkit.get_element_by_mapping(reference)
if mapping:
element = toolkit.get_element(mapping)
except ValueError as e:
self.owner.log_error(
entity=str(predicate),
Expand Down
1 change: 1 addition & 0 deletions kgx/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def transform(
# Optional process() data stream inspector
self.inspector = inspector

print("input args", input_args)
if input_format in {"neo4j", "graph"}:
source = self.get_source(input_format)
source.set_prefix_map(prefix_map)
Expand Down
10 changes: 8 additions & 2 deletions kgx/utils/kgx_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import stringcase
from linkml_runtime.linkml_model.meta import (
TypeDefinitionName,
EnumDefinition,
ElementName,
SlotDefinition,
ClassDefinition,
Expand All @@ -30,6 +31,7 @@

CORE_NODE_PROPERTIES = {"id", "name"}
CORE_EDGE_PROPERTIES = {"id", "subject", "predicate", "object", "type"}
XSD_STRING = "xsd:string"


class GraphEntityType(Enum):
Expand Down Expand Up @@ -512,12 +514,14 @@ def get_type_for_property(p: str) -> str:
"""
toolkit = get_toolkit()
e = toolkit.get_element(p)
t = "xsd:string"
t = XSD_STRING
if e:
if isinstance(e, ClassDefinition):
t = "uriorcurie"
elif isinstance(e, TypeDefinition):
t = e.uri
elif isinstance(e, EnumDefinition):
t = "uriorcurie"
else:
r = e.range
if isinstance(r, SlotDefinition):
Expand All @@ -528,7 +532,9 @@ def get_type_for_property(p: str) -> str:
elif isinstance(r, ElementName):
t = get_type_for_property(r)
else:
t = "xsd:string"
t = XSD_STRING
if t is None:
t = XSD_STRING
return t


Expand Down
16 changes: 11 additions & 5 deletions kgx/utils/rdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
sentencecase_to_camelcase,
get_biolink_ancestors,
)

from pprint import pprint
log = get_logger()

OBAN = Namespace("http://purl.org/oban/")
Expand Down Expand Up @@ -150,15 +150,20 @@ def get_biolink_element(
"""
toolkit = get_toolkit()
element = None
reference = None
if prefix_manager.is_iri(predicate):
predicate_curie = prefix_manager.contract(predicate)
else:
predicate_curie = predicate
if prefix_manager.is_curie(predicate_curie):
reference = prefix_manager.get_reference(predicate_curie)
element = toolkit.get_element(predicate_curie)
if element is None:
reference = prefix_manager.get_reference(predicate_curie)
else:
reference = predicate_curie
element = toolkit.get_element(reference)
if element is None and reference is not None:
element = toolkit.get_element(reference)
if not element:
try:
mapping = toolkit.get_element_by_mapping(predicate)
Expand Down Expand Up @@ -208,13 +213,16 @@ def process_predicate(
predicate = f":{p}"
element = get_biolink_element(prefix_manager, p)
canonical_uri = None
if element is None:
element = get_biolink_element(prefix_manager, predicate)
if element:
if isinstance(element, SlotDefinition):
# predicate corresponds to a biolink slot
if element.definition_uri:
element_uri = prefix_manager.contract(element.definition_uri)
else:
element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
canonical_uri = element_uri
if element.slot_uri:
canonical_uri = element.slot_uri
elif isinstance(element, ClassDefinition):
Expand All @@ -235,6 +243,4 @@ def process_predicate(
if p in predicate_mapping:
property_name = predicate_mapping[p]
predicate = f":{property_name}"
# cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri,
# 'predicate': predicate, 'property_name': property_name}
return element_uri, canonical_uri, predicate, property_name
Loading

0 comments on commit 04a7e55

Please sign in to comment.