Skip to content

Commit

Permalink
feat: enhancements for ModelDB
Browse files Browse the repository at this point in the history
- Additional formats
- Relaxed license requirement for projects
- Handled additional edge cases with references and PubMed Central images
  • Loading branch information
jonrkarr committed Jan 11, 2022
1 parent d264b78 commit 2ba7407
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 17 deletions.
2 changes: 1 addition & 1 deletion biosimulators_utils/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.157'
__version__ = '0.1.158'
24 changes: 24 additions & 0 deletions biosimulators_utils/combine/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class CombineArchiveContentFormat(str, enum.Enum):
DOCX = 'http://purl.org/NET/mediatypes/application/vnd.openxmlformats-officedocument.wordprocessingml.document'
EPS = 'http://purl.org/NET/mediatypes/application/postscript'
Escher = 'http://purl.org/NET/mediatypes/application/escher+json'
GENESIS = 'http://purl.org/NET/mediatypes/text/x-genesis'
GIF = 'http://purl.org/NET/mediatypes/image/gif'
GINML = 'http://purl.org/NET/mediatypes/application/ginml+xml'
GMSH_MESH = 'http://purl.org/NET/mediatypes/model/mesh'
Expand All @@ -156,11 +157,18 @@ class CombineArchiveContentFormat(str, enum.Enum):
JSON = 'http://purl.org/NET/mediatypes/application/json'
Kappa = 'http://purl.org/NET/mediatypes/text/x-kappa'
LEMS = 'http://purl.org/NET/mediatypes/application/lems+xml'
MAPLE_WORKSHEET = 'http://purl.org/NET/mediatypes/application/x-maple'
MARKDOWN = 'http://purl.org/NET/mediatypes/text/markdown'
MASS = 'http://purl.org/NET/mediatypes/application/mass+json'
MATHEMATICA_NOTEBOOK = 'http://purl.org/NET/mediatypes/application/vnd.wolfram.mathematica'
MATLAB = 'http://purl.org/NET/mediatypes/text/x-matlab'
MATLAB_DATA = 'http://purl.org/NET/mediatypes/application/x-matlab-data'
MATLAB_FIGURE = 'http://purl.org/NET/mediatypes/application/matlab-fig'
MorpheusML = 'http://purl.org/NET/mediatypes/application/morpheusml+xml'
NCS = 'http://purl.org/NET/mediatypes/text/x-ncs'
NeuroML = 'http://identifiers.org/combine.specifications/neuroml'
NEURON_SESSION = 'http://purl.org/NET/mediatypes/text/x-nrn-ses'
NMODL = 'http://purl.org/NET/mediatypes/text/x-nmodl'
NuML = 'http://purl.org/NET/mediatypes/application/numl+xml'
ODT = 'http://purl.org/NET/mediatypes/application/vnd.oasis.opendocument.text'
OMEX = 'http://identifiers.org/combine.specifications/omex'
Expand All @@ -172,6 +180,7 @@ class CombineArchiveContentFormat(str, enum.Enum):
pharmML = 'http://purl.org/NET/mediatypes/application/pharmml+xml'
PHP = 'http://purl.org/NET/mediatypes/application/x-httpd-php'
PNG = 'http://purl.org/NET/mediatypes/image/png'
POSTSCRIPT = 'http://purl.org/NET/mediatypes/application/postscript'
PPT = 'http://purl.org/NET/mediatypes/application/vnd.ms-powerpoint'
PPTX = 'http://purl.org/NET/mediatypes/application/vnd.openxmlformats-officedocument.presentationml.presentation'
PSD = 'http://purl.org/NET/mediatypes/image/vnd.adobe.photoshop'
Expand All @@ -191,6 +200,7 @@ class CombineArchiveContentFormat(str, enum.Enum):
SED_ML = 'http://identifiers.org/combine.specifications/sed-ml'
SHOCKWAVE_FLASH = 'http://purl.org/NET/mediatypes/application/x-shockwave-flash'
SimBiology_Project = 'http://purl.org/NET/mediatypes/application/x-sbproj'
SLI = 'http://purl.org/NET/mediatypes/text/x-sli'
Smoldyn = 'http://purl.org/NET/mediatypes/text/smoldyn+plain'
SO = 'http://purl.org/NET/mediatypes/application/x-sharedlib'
SQL = 'http://purl.org/NET/mediatypes/application/sql'
Expand All @@ -205,6 +215,8 @@ class CombineArchiveContentFormat(str, enum.Enum):
WEBP = 'http://purl.org/NET/mediatypes/image/webp'
XML = 'http://purl.org/NET/mediatypes/application/xml'
XPP = 'http://purl.org/NET/mediatypes/text/x-xpp'
XPP_AUTO = 'http://purl.org/NET/mediatypes/text/x-xpp-auto'
XPP_SET = 'http://purl.org/NET/mediatypes/text/x-xpp-set'
XLS = 'http://purl.org/NET/mediatypes/application/vnd.ms-excel'
XLSX = 'http://purl.org/NET/mediatypes/application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
XSL = 'http://purl.org/NET/mediatypes/application/xslfo+xml'
Expand Down Expand Up @@ -237,6 +249,7 @@ class CombineArchiveContentFormatPattern(str, enum.Enum):
DOCX = r'^https?://purl\.org/NET/mediatypes/application/vnd\.openxmlformats-officedocument\.wordprocessingml\.document$'
EPS = r'^https?://purl\.org/NET/mediatypes/(application/postscript|application/eps|application/x-eps|image/eps|image/x-eps)$'
Escher = r'^https?://purl\.org/NET/mediatypes/application/escher\+json$'
GENESIS = r'^https?://purl\.org/NET/mediatypes/text/x-genesis$'
GIF = r'^https?://purl\.org/NET/mediatypes/image/gif$'
GINML = r'^https?://purl\.org/NET/mediatypes/application/ginml\+xml$'
GMSH_MESH = r'^https?://purl\.org/NET/mediatypes/model/mesh$'
Expand All @@ -262,11 +275,18 @@ class CombineArchiveContentFormatPattern(str, enum.Enum):
JSON = r'^https?://purl\.org/NET/mediatypes/application/json$'
Kappa = r'^https?://purl\.org/NET/mediatypes/text/x-kappa$'
LEMS = r'^https?://purl\.org/NET/mediatypes/application/lems\+xml$'
MAPLE_WORKSHEET = r'^https?://purl\.org/NET/mediatypes/application/x-maple$'
MARKDOWN = r'^https?://purl\.org/NET/mediatypes/text/markdown$'
MASS = r'^https?://purl\.org/NET/mediatypes/application/mass\+json$'
MATHEMATICA_NOTEBOOK = r'^https?://purl\.org/NET/mediatypes/application/vnd\.wolfram\.mathematica$'
MATLAB = r'^https?://purl\.org/NET/mediatypes/text/x-matlab$'
MATLAB_DATA = r'^https?://purl\.org/NET/mediatypes/application/x-matlab-data$'
MATLAB_FIGURE = r'^https?://purl\.org/NET/mediatypes/application/matlab-fig$'
MorpheusML = r'^https?://purl\.org/NET/mediatypes/application/morpheusml\+xml$'
NCS = r'^https?://purl\.org/NET/mediatypes/text/x-ncs$'
NeuroML = r'^https?://identifiers\.org/combine\.specifications/neuroml($|\.)'
NEURON_SESSION = r'^https?://purl\.org/NET/mediatypes/text/x-nrn-ses$'
NMODL = r'^https?://purl\.org/NET/mediatypes/text/x-nmodl$'
NuML = r'^https?://purl\.org/NET/mediatypes/application/numl\+xml$'
ODT = r'^https?://purl\.org/NET/mediatypes/application/vnd\.oasis\.opendocument\.text$'
OMEX = r'^https?://identifiers\.org/combine\.specifications/omex($|\.)'
Expand All @@ -278,6 +298,7 @@ class CombineArchiveContentFormatPattern(str, enum.Enum):
pharmML = r'^https?://purl\.org/NET/mediatypes/application/pharmml\+xml$'
PHP = r'^https?://purl\.org/NET/mediatypes/(application/x-httpd-php|application/x-httpd-php-source|application/x-php|text/x-php)$'
PNG = r'^https?://purl\.org/NET/mediatypes/image/png$'
POSTSCRIPT = r'^https?://purl\.org/NET/mediatypes/application/postscript$'
PPT = r'^https?://purl\.org/NET/mediatypes/application/vnd\.ms-powerpoint$'
PPTX = r'^https?://purl\.org/NET/mediatypes/application/vnd\.openxmlformats-officedocument\.presentationml\.presentation$'
PSD = (
Expand Down Expand Up @@ -307,6 +328,7 @@ class CombineArchiveContentFormatPattern(str, enum.Enum):
SED_ML = r'^https?://identifiers\.org/combine\.specifications/sed\-?ml($|\.)'
SHOCKWAVE_FLASH = r'^https?://purl\.org/NET/mediatypes/(application/x-shockwave-flash|application/vnd\.adobe\.flash-movie)$'
SimBiology_Project = r'^https?://purl\.org/NET/mediatypes/application/x-sbproj$'
SLI = r'^https?://purl\.org/NET/mediatypes/text/x-sli$'
Smoldyn = r'^https?://purl\.org/NET/mediatypes/text/smoldyn\+plain$'
SO = r'^https?://purl\.org/NET/mediatypes/application/x-sharedlib$'
SQL = r'^https?://purl\.org/NET/mediatypes/application/sql$'
Expand All @@ -323,6 +345,8 @@ class CombineArchiveContentFormatPattern(str, enum.Enum):
XLSX = r'^https?://purl\.org/NET/mediatypes/application/vnd\.openxmlformats-officedocument\.spreadsheetml\.sheet$'
XML = r'^https?://purl\.org/NET/mediatypes/application/xml$'
XPP = r'^https?://purl\.org/NET/mediatypes/text/x-xpp$'
XPP_AUTO = r'^https?://purl\.org/NET/mediatypes/text/x-xpp-auto$'
XPP_SET = r'^https?://purl\.org/NET/mediatypes/text/x-xpp-set$'
XSL = r'^https?://purl\.org/NET/mediatypes/(application/xslfo\+xml|text/xsl)$'
XUL = r'^https?://purl\.org/NET/mediatypes/text/xul$'
XYZ = r'^https?://purl\.org/NET/mediatypes/chemical/x-xyz$'
Expand Down
2 changes: 1 addition & 1 deletion biosimulators_utils/omex_meta/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ class OmexMetadataSchema(str, enum.Enum):
'multiple_allowed': False,
'has_uri': True,
'has_label': True,
'required': True
'required': False
},
'http://purl.org/spar/scoro/funder': {
'namespace': {
Expand Down
4 changes: 2 additions & 2 deletions biosimulators_utils/omex_meta/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import dateutil.parser
import os
import re
import validators
import uritools

__all__ = [
'validate_biosimulations_metadata',
Expand Down Expand Up @@ -106,7 +106,7 @@ def validate_biosimulations_metadata_for_uri(metadata, validate_minimal_metadata

for object in objects:
if object and object['uri']:
if not validators.url(object['uri']):
if not uritools.isuri(object['uri']):
errors.append(['URI `{}` of attribute `{}` ({}) is not a valid URI.'.format(
object['uri'], predicate_type['attribute'], predicate_type['uri'])])
else:
Expand Down
30 changes: 21 additions & 9 deletions biosimulators_utils/ref/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def get_reference_from_pubmed(pubmed_id=None, doi=None):
pubmed_id = str(record['IdList'][0])

record = get_entrez_record('pubmed', pubmed_id)
if record.get('DOI', None) != doi:
return None
else:
return None

Expand Down Expand Up @@ -170,7 +172,12 @@ def get_reference_from_crossref(id, session=requests):
:obj:`JournalArticle`: data about a reference
"""
response = session.get('https://api.crossref.org/works/' + id)
response.raise_for_status()
try:
response.raise_for_status()
except requests.exceptions.HTTPError:
if response.status_code == 404:
return None
raise
record = response.json()['message']

return JournalArticle(
Expand Down Expand Up @@ -231,17 +238,22 @@ def get_pubmed_central_open_access_graphics(id, dirname, session=requests):
if len(caption):
caption = caption[0]

if caption is not None:
caption = ''.join([lxml.etree.tostring(child).decode('utf8') for child in caption.getchildren()])

graphic = figure.xpath('graphic')
if len(graphic):
graphic = graphic[0]

graphics.append(PubMedCentralOpenAccesGraphic(
id=oa_id + '/' + figure.attrib['id'],
label=label.text.strip('.') if label is not None else None,
caption=''.join([lxml.etree.tostring(child).decode('utf8')
for child in caption.getchildren()]) if caption is not None else None,
filename=os.path.join(dirname, id, graphic.attrib['{{{}}}href'.format(graphic.nsmap['xlink'])] + ".jpg"),
))
elif graphic == []:
graphic = None

if graphic is not None:
graphics.append(PubMedCentralOpenAccesGraphic(
id=oa_id + '/' + figure.attrib['id'],
label=label.text.strip('.') if label is not None else None,
caption=caption,
filename=os.path.join(dirname, id, graphic.attrib['{{{}}}href'.format(graphic.nsmap['xlink'])] + ".jpg"),
))

return graphics

Expand Down
10 changes: 10 additions & 0 deletions biosimulators_utils/sedml/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,11 @@ class ModelLanguage(str, enum.Enum):
MASS = 'urn:sedml:language:mass'
MorpheusML = 'urn:sedml:language:morpheusml'
NeuroML = 'urn:sedml:language:neuroml'
NMODL = 'urn:sedml:language:nmodl'
pharmML = 'urn:sedml:language:pharmml'
RBA = 'urn:sedml:language:rba'
SBML = 'urn:sedml:language:sbml'
SLI = 'urn:sedml:language:sli'
Smoldyn = 'urn:sedml:language:smoldyn'
VCML = 'urn:sedml:language:vcml'
XPP = 'urn:sedml:language:xpp'
Expand All @@ -84,16 +86,20 @@ class ModelLanguagePattern(str, enum.Enum):
BNGL = r'^urn:sedml:language:bngl(\.|$)'
CellML = r'^urn:sedml:language:cellml(\.\d+_\d+)?$'
CopasiML = r'^urn:sedml:language:copasiml(\.|$)'
GENESIS = r'^urn:sedml:language:genesis(\.|$)'
GINML = r'^urn:sedml:language:ginml(\.|$)'
HOC = r'^urn:sedml:language:hoc(\.|$)'
Kappa = r'^urn:sedml:language:kappa(\.|$)'
LEMS = r'^urn:sedml:language:lems(\.|$)'
MASS = r'^urn:sedml:language:mass(\.|$)'
MorpheusML = r'^urn:sedml:language:morpheusml(\.|$)'
NCS = r'^urn:sedml:language:ncs(\.|$)'
NeuroML = r'^urn:sedml:language:neuroml(\.version-\d+_\d+_\d+\.level\-\d+)?$'
NMODL = r'^urn:sedml:language:nmodl(\.|$)'
pharmML = r'^urn:sedml:language:pharmml(\.|$)'
RBA = r'^urn:sedml:language:rba(\.|$)'
SBML = r'^urn:sedml:language:sbml(\.level\-\d+\.version\-\d+)?$'
SLI = r'^urn:sedml:language:sli(\.|$)'
Smoldyn = r'^urn:sedml:language:smoldyn(\.|$)'
VCML = r'^urn:sedml:language:vcml(\.|$)'
XPP = r'^urn:sedml:language:xpp(\.|$)'
Expand All @@ -105,16 +111,20 @@ class ModelLanguageEdamId(str, enum.Enum):
BNGL = 'format_3972'
CellML = 'format_3240'
CopasiML = 'format_9003'
GENESIS = 'format_9056'
GINML = 'format_9009'
HOC = 'format_9005'
Kappa = 'format_9006'
LEMS = 'format_9004'
MASS = 'format_9011'
MorpheusML = 'format_9002'
NCS = 'format_9057'
NeuroML = 'format_3971'
NMODL = 'format_9052'
pharmML = 'format_9007'
RBA = 'format_9012'
SBML = 'format_2585'
SLI = 'format_9054'
Smoldyn = 'format_9001'
VCML = 'format_9000'
XPP = 'format_9010'
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ requests_cache
setuptools
simplejson
termcolor
validators
uritools
yamldown
2 changes: 1 addition & 1 deletion tests/omex_meta/test_omex_meta_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def test_BiosimulationsOmexMetaReader_run(self):

filename = os.path.join(self.FIXTURE_DIR, 'missing-uri.rdf')
metadata, errors, warnings = io.BiosimulationsOmexMetaReader().run(filename, working_dir=self.dir_name)
self.assertIn('is required', flatten_nested_list_of_strings(errors))
self.assertEqual(errors, [])
self.assertEqual(warnings, [])

filename = os.path.join(self.FIXTURE_DIR, 'missing-label-2.rdf')
Expand Down
4 changes: 2 additions & 2 deletions tests/omex_meta/test_omex_meta_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,15 @@ def test_validate_biosimulations_metadata_for_uri(self):
self.assertEqual(len(md), 1)

md, errors, _ = read_omex_meta_file([self.FIXTURE_THIRD_A, self.FIXTURE_THIRD_C], config=config)
self.assertIn('is required', flatten_nested_list_of_strings(errors))
self.assertEqual(errors, [])
self.assertEqual(len(md), 2)

md, errors, _ = read_omex_meta_file([self.FIXTURE_THIRD_B, self.FIXTURE_THIRD_C], config=config)
self.assertIn('is required', flatten_nested_list_of_strings(errors))
self.assertEqual(len(md), 2)

md, errors, _ = read_omex_meta_file([self.FIXTURE_THIRD_A], config=config)
self.assertIn('is required', flatten_nested_list_of_strings(errors))
self.assertEqual(errors, [])
self.assertEqual(len(md), 1)

md, errors, _ = read_omex_meta_file([self.FIXTURE_THIRD_B], config=config)
Expand Down

0 comments on commit 2ba7407

Please sign in to comment.