title | redirect_from |
---|---|
WikiPathways SPARQL queries |
/index.php/Help:WikiPathways_SPARQL_queries |
On sparql.wikipathways.org WikiPathways content is replicated in a SPARQL endpoint. Queries can be performed in three ways:
- Either go to the endpoint directly and create your own SPARQL query.
- Copy and paste an example query listed below in the endpoint.
- Adapt a code example to programmatically make a SPARQL query
This project is written up in the "Using the Semantic Web for Rapid Integration of WikiPathways with Other Biological Online Data Resources" paper.
Due to an Apache update, we are now creating RDF data according to SPARQL 1.1.
However, our SPARQL endpoint running on Virtuoso is still using SPARQL 1.0.
This influences the way to query strings, and might affect federated queries.
Please remove the ^^xsd:string suffix
, as shown in the example below.
Within the example queries, we have omitted the prefixes. These prefixes are automatically used in the SPARQL endpoint. The following prefixes are used in the WikiPathways RDF:
PREFIX gpml: <http://vocabularies.wikipathways.org/gpml#>
PREFIX wp: <http://vocabularies.wikipathways.org/wp#>
PREFIX cur: <http://vocabularies.wikipathways.org/wp#Curation:>
PREFIX wprdf: <http://rdf.wikipathways.org/>
PREFIX biopax: <http://www.biopax.org/release/biopax-level3.owl#>
PREFIX cas: <https://identifiers.org/cas/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX ncbigene:<https://identifiers.org/ncbigene/>
PREFIX pubmed: <http://www.ncbi.nlm.nih.gov/pubmed/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
List the information about the data sets in the SPARQL endpoint:
select distinct ?dataset (str(?titleLit) as ?title) ?date ?license where {
?dataset a void:Dataset ;
dcterms:title ?titleLit ;
dcterms:license ?license ;
pav:createdOn ?date .
}
SELECT DISTINCT ?organism (str(?label) as ?name)
WHERE {
?concept wp:organism ?organism ;
wp:organismName ?label .
}
SELECT DISTINCT (str(?title) as ?pathway) (str(?label) as ?organism)
WHERE {
?pw dc:title ?title ;
wp:organism ?organism ;
wp:organismName ?label .
}
SELECT DISTINCT ?organism (str(?label) as ?name) (count(?pw) as ?pathwayCount)
WHERE {
?pw dc:title ?title ;
wp:organism ?organism ;
wp:organismName ?label .
}
ORDER BY DESC(?pathwayCount)
The following query list all mouse pathways. ?wpIdentifier
is the link through identifiers.org,
?pathway
points to the RDF version of WikiPathways and ?page is the revision which is loaded
in the SPARQL endpoint.
SELECT DISTINCT ?wpIdentifier ?pathway ?page
WHERE {
?pathway dc:title ?title .
?pathway foaf:page ?page .
?pathway dc:identifier ?wpIdentifier .
?pathway wp:organismName "Mus musculus" .
}
ORDER BY ?wpIdentifier
List all pathways per instance of a particular gene or protein (wp:GeneProduct
)
SELECT DISTINCT ?pathway (str(?label) as ?geneProduct)
WHERE {
?geneProduct a wp:GeneProduct .
?geneProduct rdfs:label ?label .
?geneProduct dcterms:isPartOf ?pathway .
?pathway a wp:Pathway .
FILTER regex(str(?label), "CYP").
}
List all groups and complexes per instance of a particular gene or protein (wp:GeneProduct
)
SELECT DISTINCT ?pathway (str(?label) as ?geneProduct)
WHERE {
?geneProduct a wp:GeneProduct .
?geneProduct rdfs:label ?label .
?geneProduct dcterms:isPartOf ?pathway .
FILTER NOT EXISTS { ?pathway a wp:Interaction } .
FILTER NOT EXISTS { ?pathway a wp:Pathway } .
FILTER regex(str(?label), "CYP").
}
List all the genes and proteins (wp:GeneProduct
) associated with a particular pathway WPID.
select distinct ?pathway (str(?label) as ?geneProduct) where {
?geneProduct a wp:GeneProduct .
?geneProduct rdfs:label ?label .
?geneProduct dcterms:isPartOf ?pathway .
?pathway a wp:Pathway .
?pathway dcterms:identifier "WP1560" .
}
In WikiPathways, pathways can be tagged with ontology terms from Pathway, Cell Line and Disease
ontology. The following query returns a pathway count for each term from any of the available
ontologies. These terms are collectively modeled as wp:pathwayOntology
; but this includes
all ontologies, not just the "Pathway" ontology.
SELECT DISTINCT ?pwOntologyTerm count(?pwOntologyTerm) as ?pathwayCount
WHERE {
?pathwayRDF wp:ontologyTag ?pwOntologyTerm .
}
ORDER BY DESC(?pathwayCount)
We can also count the number of pathways by curation and community tag:
SELECT ?curationTag (count(DISTINCT ?pathway) as ?pathwayCount)
WHERE {
?pathway wp:ontologyTag ?curationTag .
FILTER contains(STR(?curationTag), "Curation:")
}
ORDER BY DESC(?pathwayCount)
In WikiPathways, pathways can be tagged with ontology terms from Pathway, Cell Line and Disease ontology. The following query returns a list of pathways tagged with PW_0000296.
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT ?pathway (str(?titleLit) AS ?title)
WHERE {
?pathwayRDF wp:ontologyTag obo:PW_0000296 ;
foaf:page ?pathway ;
dc:title ?titleLit .
}
List all the ontology terms tagged on a particular pathway.
SELECT (?o as ?pwOntologyTerm) (str(?titleLit) as ?title) ?pathway
WHERE {
?pathwayRDF wp:ontologyTag ?o ;
foaf:page ?pathway ;
dc:title ?titleLit ;
dcterms:identifier "WP1560" .
FILTER (! regex(str(?pathway), "group"))
}
List all the ontology terms tagged on a particular pathway.
SELECT DISTINCT ?pathway (str(?titleLit) as ?title)
WHERE {
?pathway wp:ontologyTag cur:Reactome_Approved ;
dc:title ?titleLit .
}
Converts all Metabolite identifiers to LipidMaps (provided by BridgeDb), and create an ordered list of pathways including lipid compounds.
prefix lipidmaps: <https://identifiers.org/lipidmaps/>
select distinct ?pathwayRes (str(?wpid) as ?pathway)
(str(?title) as ?pathwayTitle)
(count(distinct ?lipidID) AS ?LipidsInPWs)
where {
?metabolite a wp:Metabolite ;
dcterms:identifier ?id ;
dcterms:isPartOf ?pathwayRes ;
wp:bdbLipidMaps ?lipidID .
?pathwayRes a wp:Pathway ;
wp:organismName "Homo sapiens" ;
dcterms:identifier ?wpid ;
dc:title ?title .
}
ORDER BY DESC(?LipidsInPWs)
Counts unique LIPID MAPS identifier (provided by BridgeDb) for the fatty acid (FA) class, other examples are provided as a comment.
select count(distinct ?lipidID) as ?IndividualLipidsPerClass_FA
where { ?metabolite a wp:Metabolite ;
dcterms:identifier ?id ;
dcterms:isPartOf ?pathwayRes ;
wp:bdbLipidMaps ?lipidID .
?pathwayRes a wp:Pathway ;
wp:organismName "Homo sapiens" ;
dcterms:identifier ?wpid ;
dc:title ?title .
FILTER regex(str(?lipidID), "FA" ). # Other classes: GL, GP, SP, ST, PR, SL, PK
}
Filter all unique LIPID MAPS identifier (provided by BridgeDb) for the fatty acid (FA) class, and find all pathways with individual lipids in there.
select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (count(distinct ?lipidID) AS ?FA_LipidsInPWs)
where { ?metabolite a wp:Metabolite ;
dcterms:identifier ?id ;
dcterms:isPartOf ?pathwayRes ;
wp:bdbLipidMaps ?lipidID .
?pathwayRes a wp:Pathway ;
wp:organismName "Homo sapiens" ;
dcterms:identifier ?wpid ;
dc:title ?title .
FILTER regex(str(?lipidID), "FA" ). # Fatty acids, Other classes: GL, GP, SP, ST, PR, SL, PK
}
ORDER BY DESC(?FA_LipidsInPWs
Though strictly speaking, it guesstimates it, because it counts the number of unique metabolite identifiers. Normalization in the RDF generation code ensures we do not double count metabolites with identifiers from different databases, but it still differentially counts metabolites with different charge states.
select (count(distinct ?metabolite) as ?count) (str(?label) as ?species) where {
?metabolite a wp:Metabolite ;
dcterms:isPartOf ?pw .
?pw dc:title ?title ;
wp:organism ?organism ;
wp:organismName ?label .
} GROUP BY ?label ORDER BY DESC(?count)
Find all interactions that are connected to a particular datanode. (wp:Interaction).
#Find all interactions that are connected to a particular datanode.
SELECT DISTINCT ?interaction ?pathway WHERE {
?pathway a wp:Pathway .
?interaction dcterms:isPartOf ?pathway .
?interaction a wp:Interaction .
?interaction wp:participants <https://identifiers.org/ensembl/ENSG00000125845> .
}
SELECT DISTINCT (str(?datasourceLit) as ?datasource)
WHERE {
?concept dc:source ?datasourceLit
}
SELECT (str(?datasourceLit) as ?datasource)
(count(DISTINCT ?dataNode) as ?numberEntries)
WHERE {
?concept dc:source ?datasourceLit ;
wp:isAbout ?dataNode .
}
ORDER BY DESC(?numberEntries)
Return all compounds annotated with the "ChEMBL compound" as data source and the pathways they are in
SELECT DISTINCT ?identifier ?pathway
WHERE {
?concept dcterms:isPartOf ?pathway . ?pathway a wp:Pathway .
?concept dc:source "ChEMBL compound" .
?concept dc:identifier ?identifier .
}
SELECT (COUNT(DISTINCT ?pubmed) AS ?count)
WHERE {
?pubmed a wp:PublicationReference .
MINUS { ?pubmed dcterms:isPartOf/wp:ontologyTag cur:AnalysisCollection }
{ ?pubmed dcterms:isPartOf/wp:ontologyTag cur:Reactome_Approved }
}
SELECT (COUNT(DISTINCT ?pubmed) AS ?count)
WHERE {
?pubmed a wp:PublicationReference .
{ ?pubmed dcterms:isPartOf/wp:ontologyTag cur:AnalysisCollection }
MINUS { ?pubmed dcterms:isPartOf/wp:ontologyTag cur:Reactome_Approved }
}
SELECT (COUNT(DISTINCT ?pubmed) AS ?count)
WHERE {
?pubmed a wp:PublicationReference .
{ ?pubmed dcterms:isPartOf/wp:ontologyTag cur:AnalysisCollection }
{ ?pubmed dcterms:isPartOf/wp:ontologyTag cur:Reactome_Approved }
}