Skip to content

Commit

Permalink
add example
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinschaper committed Oct 3, 2024
1 parent 0774a22 commit 10253c3
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 0 deletions.
4 changes: 4 additions & 0 deletions examples/string-linkml-writer/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ingest_title: 'String DB'
ingest_url: 'https://string-db.org'
description: 'STRING: functional protein association networks'
rights: 'https://string-db.org/cgi/access.pl?footer_active_subpage=licensing'
24 changes: 24 additions & 0 deletions examples/string-linkml-writer/protein-links-detailed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import uuid

from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction, Gene

from koza.cli_utils import get_koza_app

koza_app = get_koza_app('protein-links-detailed')
koza_map = koza_app.get_map('entrez-2-string')

while (row := koza_app.get_row()) is not None:
gene_a = Gene(id="NCBIGene:" + koza_map[row["protein1"]]["entrez"])
gene_b = Gene(id="NCBIGene:" + koza_map[row["protein2"]]["entrez"])

pairwise_gene_to_gene_interaction = PairwiseGeneToGeneInteraction(
id="uuid:" + str(uuid.uuid1()),
subject=gene_a.id,
object=gene_b.id,
predicate="biolink:interacts_with",
knowledge_level="not_provided",
agent_type="not_provided",
)
koza_app.write(gene_a, gene_b, writer="nodes")
koza_app.write(pairwise_gene_to_gene_interaction, writer="edges")
koza_app.write()
58 changes: 58 additions & 0 deletions examples/string-linkml-writer/protein-links-detailed.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: 'protein-links-detailed'

delimiter: ' '

files:
- './examples/data/string.tsv'
- './examples/data/string2.tsv'

metadata: !include './examples/string/metadata.yaml'

columns:
- 'protein1'
- 'protein2'
- 'neighborhood'
- 'fusion'
- 'cooccurence'
- 'coexpression'
- 'experimental'
- 'database'
- 'textmining'
- 'combined_score' : 'int'

filters:
- inclusion: 'include'
column: 'combined_score'
filter_code: 'lt'
value: 700

depends_on:
- './examples/maps/entrez-2-string.yaml'

writers:
"nodes":
filename: 'protein_links_nodes.tsv'
linkml_schema: 'biolink_model.schema:biolink_model.yaml'
classes:
- 'Gene'
"edges":
filename: 'protein_links_edges.tsv'
linkml_schema: 'biolink_model.schema:biolink_model.yaml'
classes:
- 'PairwiseGeneToGeneInteraction'

#node_properties:
# - 'id'
# - 'category'
# - 'provided_by'
#
#edge_properties:
# - 'id'
# - 'subject'
# - 'predicate'
# - 'object'
# - 'category'
# - 'relation'
# - 'provided_by'

transform_mode: 'loop'

0 comments on commit 10253c3

Please sign in to comment.