From ff2a7126a467888ac53daaa14c339f81d8af088c Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 3 Dec 2018 11:03:13 +0100 Subject: [PATCH] Update to bio2bel v0.2.0 (#16) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update setup * Fix flake8 * Update src/bio2bel_mirtarbase/manager.py * Update constants.py * Update constants.py * Update iteration and tests * Add tests for counting the human genes with Bio2BEL HGNC * Create .readthedocs.yml * Fix tests * Bump version: 0.1.3-dev → 0.2.0 --- .bumpversion.cfg | 2 +- .readthedocs.yml | 8 ++++++ .travis.yml | 2 -- docs/source/conf.py | 2 +- setup.py | 19 +++++++++++-- src/bio2bel_mirtarbase/__init__.py | 2 +- src/bio2bel_mirtarbase/constants.py | 2 +- src/bio2bel_mirtarbase/manager.py | 22 +++++++++------ src/bio2bel_mirtarbase/models.py | 43 ++++++++++++++++------------- tests/constants.py | 28 ++++++++----------- tests/test_build_db.py | 28 +++++++++++++------ tox.ini | 12 ++------ 12 files changed, 99 insertions(+), 71 deletions(-) create mode 100644 .readthedocs.yml diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 6b5f347..14454e6 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.3-dev +current_version = 0.2.0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..5865d7c --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,8 @@ +# See: https://docs.readthedocs.io/en/latest/yaml-config.html +build: + image: latest +python: + version: 3.6 + pip_install: true + extra_requirements: + - docs diff --git a/.travis.yml b/.travis.yml index ef48c3d..f09f51f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,6 @@ jobs: - env: TOXENV=flake8 - env: TOXENV=pyroma - env: TOXENV=xenon - - env: TOXENV=vulture # docs stage - stage: docs env: TOXENV=doc8 @@ -31,7 +30,6 @@ jobs: matrix: allow_failures: - env: TOXENV=xenon - - env: TOXENV=vulture install: - sh -c 'if [ "$TOXENV" = "py" ]; then pip install tox codecov; else pip install tox; fi' diff --git a/docs/source/conf.py b/docs/source/conf.py index b29b1d4..fcd7bde 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -60,7 +60,7 @@ # built documents. # # The full version, including alpha/beta/rc tags. -release = '0.1.3-dev' +release = '0.2.0' # The short X.Y version. parsed_version = re.match( diff --git a/setup.py b/setup.py index 40aca04..8316ea1 100644 --- a/setup.py +++ b/setup.py @@ -18,13 +18,15 @@ 'Intended Audience :: Science/Research', 'Operating System :: OS Independent', 'Programming Language :: Python', + 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Scientific/Engineering :: Bio-Informatics', 'License :: OSI Approved :: MIT License', ] INSTALL_REQUIRES = [ - 'pybel>=0.12.1', - 'bio2bel>=0.1.5', + 'pybel>=0.12.0,<0.13.0', + 'bio2bel>=0.2.0,<0.3.0', 'bio2bel_hgnc>=0.1.0', 'bio2bel_entrez>=0.1.0', 'bio2bel_mirbase', @@ -35,7 +37,18 @@ 'tqdm', ] EXTRAS_REQUIRE = { - 'web': ['flask', 'flask_admin'], + 'web': [ + 'flask', + 'flask_admin', + ], + 'docs': [ + 'flask', + 'flask_admin', + 'sphinx', + 'sphinx-rtd-theme', + 'sphinx-click', + 'sphinx-autodoc-typehints', + ], } ENTRY_POINTS = { 'bio2bel': [ diff --git a/src/bio2bel_mirtarbase/__init__.py b/src/bio2bel_mirtarbase/__init__.py index 01aa8ab..49c97ba 100644 --- a/src/bio2bel_mirtarbase/__init__.py +++ b/src/bio2bel_mirtarbase/__init__.py @@ -4,7 +4,7 @@ from .manager import Manager # noqa: F401 -__version__ = '0.1.3-dev' +__version__ = '0.2.0' __title__ = 'bio2bel_mirtarbase' __description__ = "A package for converting miRTarBase to BEL" diff --git a/src/bio2bel_mirtarbase/constants.py b/src/bio2bel_mirtarbase/constants.py index f8caded..da5b45a 100644 --- a/src/bio2bel_mirtarbase/constants.py +++ b/src/bio2bel_mirtarbase/constants.py @@ -6,7 +6,7 @@ from bio2bel.utils import get_data_dir -VERSION = '0.1.3-dev' +VERSION = '0.2.0' MODULE_NAME = 'mirtarbase' DATA_DIR = get_data_dir(MODULE_NAME) diff --git a/src/bio2bel_mirtarbase/manager.py b/src/bio2bel_mirtarbase/manager.py index 67772b4..74d6ed2 100644 --- a/src/bio2bel_mirtarbase/manager.py +++ b/src/bio2bel_mirtarbase/manager.py @@ -3,15 +3,14 @@ """Manager for Bio2BEL miRTarBase.""" import logging +import time from typing import List, Mapping, Optional -import bio2bel_entrez -import bio2bel_mirbase -import time -from bio2bel_entrez.manager import VALID_ENTREZ_NAMESPACES from tqdm import tqdm +import bio2bel_entrez import bio2bel_hgnc +import bio2bel_mirbase from bio2bel import AbstractManager from bio2bel.manager.bel_manager import BELManagerMixin from bio2bel.manager.flask_manager import FlaskMixin @@ -22,8 +21,14 @@ from .models import Base, Evidence, Interaction, Mirna, Species, Target from .parser import get_data +__all__ = [ + 'Manager', +] + log = logging.getLogger(__name__) +VALID_ENTREZ_NAMESPACES = {'egid', 'eg', 'entrez', 'ncbigene'} + def _build_entrez_map(hgnc_manager: bio2bel_hgnc.Manager) -> Mapping[str, HumanGene]: """Build a mapping from entrez gene identifiers to their database models from :py:mod:`bio2bel_hgnc.models`.""" @@ -266,14 +271,14 @@ def enrich_rnas(self, graph: BELGraph): log.debug('enriching inhibitors of RNA') count = 0 - for node in graph: + for node in list(graph): if node[FUNCTION] != RNA: continue - if NAMESPACE not in node: + namespace = node.get(NAMESPACE) + if namespace is None: continue - namespace = node[NAMESPACE] identifier = node.get(IDENTIFIER) name = node.get(NAME) @@ -292,7 +297,7 @@ def enrich_rnas(self, graph: BELGraph): for interaction in target.interactions: for evidence in interaction.evidences: count += 1 - evidence.add_to_graph(graph) + evidence._add_to_graph(graph, evidence.interaction.mirna.as_bel(), node) log.debug('added %d MTIs', count) @@ -334,6 +339,7 @@ def enrich_mirnas(self, graph: BELGraph): log.debug('added %d MTIs', count) def get_mirna_interaction_evidences(self): + """Get interaction evidences.""" return self.session \ .query(Mirna, Interaction, Evidence) \ .join(Interaction) \ diff --git a/src/bio2bel_mirtarbase/models.py b/src/bio2bel_mirtarbase/models.py index e4385c1..085a42a 100644 --- a/src/bio2bel_mirtarbase/models.py +++ b/src/bio2bel_mirtarbase/models.py @@ -8,6 +8,7 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship +import pybel.dsl from pybel import BELGraph from pybel.dsl import mirna, rna from .constants import MODULE_NAME @@ -16,11 +17,11 @@ MIRBASE = 'mirbase' HGNC = 'hgnc' -MIRNA_TABLE_NAME = '{}_mirna'.format(MODULE_NAME) -TARGET_TABLE_NAME = '{}_target'.format(MODULE_NAME) -SPECIES_TABLE_NAME = '{}_species'.format(MODULE_NAME) -EVIDENCE_TABLE_NAME = '{}_evidence'.format(MODULE_NAME) -INTERACTION_TABLE_NAME = '{}_interaction'.format(MODULE_NAME) +MIRNA_TABLE_NAME = f'{MODULE_NAME}_mirna' +TARGET_TABLE_NAME = f'{MODULE_NAME}_target' +SPECIES_TABLE_NAME = f'{MODULE_NAME}_species' +EVIDENCE_TABLE_NAME = f'{MODULE_NAME}_evidence' +INTERACTION_TABLE_NAME = f'{MODULE_NAME}_interaction' # create base class Base = declarative_base() @@ -42,7 +43,7 @@ def to_json(self, include_id: bool = True) -> Mapping: :param include_id: Include the database identifier? """ rv = { - 'name': str(self.name) + 'name': str(self.name), } if include_id: @@ -98,7 +99,7 @@ class Target(Base): hgnc_symbol = Column(String(32), nullable=True, unique=True, index=True, doc="HGNC gene symbol") hgnc_id = Column(String(32), nullable=True, unique=True, index=True, doc="HGNC gene identifier") - species_id = Column(Integer, ForeignKey('{}.id'.format(SPECIES_TABLE_NAME)), nullable=False, doc='The host species') + species_id = Column(Integer, ForeignKey(f'{SPECIES_TABLE_NAME}.id'), nullable=False, doc='The host species') species = relationship('Species') def __str__(self): # noqa: D105 @@ -115,7 +116,7 @@ def serialize_to_entrez_node(self) -> rna: def serialize_to_hgnc_node(self) -> rna: """Serialize to PyBEL node data dictionary.""" if self.hgnc_id is None: - raise ValueError('missing HGNC information for Entrez Gene {}'.format(self.entrez_id)) + raise ValueError(f'missing HGNC information for Entrez Gene {self.entrez_id}') return rna( namespace=HGNC, @@ -129,7 +130,7 @@ def to_json(self, include_id=True) -> Mapping: 'species': self.species.to_json(), 'identifiers': [ self.serialize_to_entrez_node(), - self.serialize_to_hgnc_node() + self.serialize_to_hgnc_node(), ] } @@ -149,11 +150,11 @@ class Interaction(Base): mirtarbase_id = Column(String(64), nullable=False, unique=True, index=True, doc="miRTarBase interaction identifier which is unique for a pair of miRNA and RNA targets") - mirna_id = Column(Integer, ForeignKey("{}.id".format(MIRNA_TABLE_NAME)), nullable=False, index=True, + mirna_id = Column(Integer, ForeignKey(f'{MIRNA_TABLE_NAME}.id'), nullable=False, index=True, doc='The miRTarBase identifier of the interacting miRNA') mirna = relationship(Mirna, backref="interactions") - target_id = Column(Integer, ForeignKey("{}.id".format(TARGET_TABLE_NAME)), nullable=False, index=True, + target_id = Column(Integer, ForeignKey(f'{TARGET_TABLE_NAME}.id'), nullable=False, index=True, doc='The Entrez gene identifier of the interacting RNA') target = relationship(Target, backref="interactions") @@ -163,7 +164,7 @@ class Interaction(Base): ) def __str__(self): # noqa: D105 - return '{} =| {}'.format(self.mirna.name, self.target.name) + return f'{self.mirna.name} =| {self.target.name}' class Evidence(Base): @@ -179,7 +180,7 @@ class Evidence(Base): doc="Type and strength of the miRNA - target interaction. E.g. 'Functional MTI (Weak)'") reference = Column(String(255), nullable=False, doc="Reference PubMed Identifier") - interaction_id = Column(Integer, ForeignKey("{}.id".format(INTERACTION_TABLE_NAME)), + interaction_id = Column(Integer, ForeignKey(f'{INTERACTION_TABLE_NAME}.id'), doc='The interaction for which this evidence was captured') interaction = relationship(Interaction, backref="evidences") @@ -188,14 +189,18 @@ def __str__(self): # noqa: D105 def add_to_graph(self, graph: BELGraph) -> str: """Add this edge to the BEL graph and return the ket for that edge.""" - try: - target_node = self.interaction.target.serialize_to_hgnc_node() - except ValueError: - target_node = self.interaction.target.serialize_to_entrez_node() + return self._add_to_graph( + graph, + self.interaction.mirna.as_bel(), + self.interaction.target.serialize_to_entrez_node(), + + ) + def _add_to_graph(self, graph: BELGraph, source: pybel.dsl.MicroRna, target: pybel.dsl.Rna) -> str: + """Add this edge to the BEL graph and return the ket for that edge.""" return graph.add_directly_decreases( - self.interaction.mirna.as_bel(), - target_node, + source, + target, evidence=str(self.support), citation=str(self.reference), annotations={ diff --git a/tests/constants.py b/tests/constants.py index 30d98e1..c395756 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -5,9 +5,8 @@ import json import os -import bio2bel_mirbase - import bio2bel_hgnc +import bio2bel_mirbase import bio2bel_mirtarbase from bio2bel.testing import AbstractTemporaryCacheClassMixin @@ -25,21 +24,6 @@ class TemporaryFilledCacheMixin(AbstractTemporaryCacheClassMixin): hgnc_manager: bio2bel_hgnc.Manager mirbase_manager: bio2bel_mirbase.Manager - @classmethod - def setUpClass(cls): - """Create temporary file and populate database.""" - super().setUpClass() - - cls.hgnc_manager = bio2bel_hgnc.Manager(connection=cls.connection) - cls.hgnc_manager._create_tables() - json_data = cls.hgnc_manager.load_hgnc_json(hgnc_file_path=TEST_HGNC_JSON) - cls.hgnc_manager.insert_hgnc(hgnc_dict=json_data, silent=True, low_memory=False) - - cls.mirbase_manager = bio2bel_mirbase.Manager(connection=cls.connection) - with open(TEST_MIRBASE_JSON) as file: - mirbase_list = json.load(file) - cls.mirbase_manager._populate_list(mirbase_list) - @classmethod def populate(cls): """Fill the HGNC and mirTarBase databases. @@ -58,4 +42,14 @@ def populate(cls): MIRT000006 hsa-miR-146a-5p Homo sapiens CXCR4 7852 Homo sapiens Microarray Functional MTI (Weak) 20375304 MIRT000012 hsa-miR-122-5p Homo sapiens CYP7A1 1581 Homo sapiens qRT-PCR//Luciferase reporter assay Functional MTI 20351063 """ + cls.hgnc_manager = bio2bel_hgnc.Manager(connection=cls.connection) + cls.hgnc_manager._create_tables() + json_data = cls.hgnc_manager.load_hgnc_json(hgnc_file_path=TEST_HGNC_JSON) + cls.hgnc_manager.insert_hgnc(hgnc_dict=json_data, silent=True, low_memory=False) + + cls.mirbase_manager = bio2bel_mirbase.Manager(connection=cls.connection) + with open(TEST_MIRBASE_JSON) as file: + mirbase_list = json.load(file) + cls.mirbase_manager._populate_definitions_helper(mirbase_list) + cls.manager.populate(TEST_MIRTARBASE_EXCEL) diff --git a/tests/test_build_db.py b/tests/test_build_db.py index f363163..ee3568d 100644 --- a/tests/test_build_db.py +++ b/tests/test_build_db.py @@ -6,7 +6,7 @@ from bio2bel_mirtarbase.models import Evidence, HGNC, MIRBASE, Mirna, NCBIGENE, Species, Target from pybel import BELGraph from pybel.constants import FUNCTION, IDENTIFIER, NAME, NAMESPACE -from pybel.dsl import mirna, rna +from pybel.dsl import BaseAbundance, mirna, rna from tests.constants import TemporaryFilledCacheMixin hif1a_symbol = 'HIF1A' @@ -22,6 +22,10 @@ class TestBuildDatabase(TemporaryFilledCacheMixin): """Test the database.""" + def test_count_human_genes(self): + """Test the number of genes in Bio2BEL HGNC.""" + self.assertEqual(2, self.hgnc_manager.count_human_genes()) + def test_count_mirnas(self): """Test the number of miRNAs.""" self.assertEqual(5, self.manager.count_mirnas()) @@ -117,6 +121,7 @@ def test_target(self): target = self.manager.query_target_by_entrez_id('7852') self.assertIsNotNone(target) self.assertEqual("CXCR4", target.name) + self.assertIsNotNone(target.hgnc_id) self.assertEqual("2561", target.hgnc_id) def check_hif1a(self, model: Target): @@ -126,8 +131,11 @@ def check_hif1a(self, model: Target): """ self.assertIsNotNone(model) self.assertEqual('HIF1A', model.name) + self.assertIsNotNone(model.hgnc_id) self.assertEqual('4910', model.hgnc_id) + self.assertIsNotNone(model.hgnc_symbol) self.assertEqual('HIF1A', model.hgnc_symbol) + self.assertIsNotNone(model.entrez_id) self.assertEqual('3091', model.entrez_id) self.assertEqual(1, len(model.interactions)) # all different evidences to hsa-miR-20a-5p @@ -147,25 +155,29 @@ def test_target_by_hgnc_symbol(self): model = self.manager.query_target_by_hgnc_symbol(hif1a_symbol) self.check_hif1a(model) - def help_enrich_hif1a(self, node_data): + def help_enrich_hif1a(self, node: BaseAbundance): """Help check that different versions of HIF1A can be enriched properly. - :param pybel.dsl.BaseAbundance node_data: A PyBEL data dictionary + :param pybel.dsl.BaseAbundance node: A PyBEL data dictionary """ - self.assertTrue(NAME in node_data or IDENTIFIER in node_data, - msg='Node missing information: {}'.format(node_data)) + self.assertIsInstance(node, BaseAbundance) + self.assertTrue(NAME in node or IDENTIFIER in node, + msg='Node missing information: {}'.format(node)) graph = BELGraph() - graph.add_node_from_data(node_data) + graph.add_node_from_data(node) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) self.manager.enrich_rnas(graph) # should enrich with the HIF1A - hsa-miR-20a-5p interaction - self.assertEqual(2, graph.number_of_nodes()) + self.assertEqual(2, graph.number_of_nodes(), msg=f""" + Nodes: + {", ".join(map(str, graph))} + """) self.assertEqual(3, graph.number_of_edges()) self.assertIn(mi2_data, graph) - self.assertTrue(graph.has_edge(mi2_data, node_data)) + self.assertTrue(graph.has_edge(mi2_data, node)) def test_enrich_hgnc_symbol(self): """Test enrichment of an HGNC gene symbol node.""" diff --git a/tox.ini b/tox.ini index 1d82fcf..c280ebb 100644 --- a/tox.ini +++ b/tox.ini @@ -3,7 +3,6 @@ envlist = coverage-clean manifest flake8 - vulture xenon pyroma readme @@ -45,12 +44,6 @@ deps = commands = flake8 src/bio2bel_mirtarbase/ tests/ setup.py -[testenv:vulture] -deps = vulture -skip_install = true -commands = vulture src/bio2bel_mirtarbase/ -description = Run the vulture tool to look for dead code. - [testenv:xenon] deps = xenon skip_install = true @@ -80,9 +73,8 @@ deps = [testenv:docs] changedir = docs -deps = - sphinx - sphinx_rtd_theme +extras = + docs commands = mkdir -p {envtmpdir} cp -r source {envtmpdir}/source