Skip to content

Commit

Permalink
Switch to InterPro API (#74)
Browse files Browse the repository at this point in the history
* Update URL and function to fetch UniProt accession

* Add function to fetch protein length

* Fetch Pfam hits and protein features from InterPro

* Get domain/motif featues from InterPro API

* Update images

* Remove unused functions/variables

* Update README

* Remove deleted options
  • Loading branch information
matthiasblum authored Oct 5, 2023
1 parent 5593816 commit 3702093
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 260 deletions.
13 changes: 2 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

A simple 'lollipop' mutation diagram generator that tries to make things
simple and easy by automating as much as possible. It uses the
[UniProt REST API](http://www.uniprot.org/uploadlists/) and/or
[Pfam API](http://pfam-legacy.xfam.org/help#tabview=tab9) to automate translation
[UniProt REST API](http://www.uniprot.org/uploadlists/) and
[InterPro API](https://interpro-documentation.readthedocs.io/en/latest/faq.html#application-programming-interface-api) to automate translation
of Gene Symbols and lookup domain/motif features for display. If
variant changes are provided, it will also annotate them to the diagram
using the "lollipops" markers that give the tool it's name.
Expand Down Expand Up @@ -79,15 +79,6 @@ the area is exponentially proportional to the count indicated. Examples:
-dpi=300 set DPI (PNG output only)
```

#### Alternative input sources:

```
-pfam use Pfam legacy as an alternative to uniprot for
fetching domain/motif information
-l=filename.json use local file instead of Pfam API for graphic data
see: http://pfam-legacy.xfam.org/help#tabview=tab9
```

## Installation

Head over to the [Releases](https://github.com/joiningdata/lollipops/releases) to
Expand Down
48 changes: 48 additions & 0 deletions data/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,54 @@ type GraphicResponse struct {
Regions []GraphicFeature `json:"regions"`
}

type InterProMetaData struct {
Accession string `json:"accession"`
Name string `json:"name"`
Type string `json:"type"`
}

type InterProExtraField struct {
ShortName string `json:"short_name"`
}

type InterProFragment struct {
Start json.Number `json:"start"`
End json.Number `json:"end"`
SeqFeature string `json:"seq_feature"`
}

type InterProLocation struct {
Fragments []InterProFragment `json:"fragments"`
}

type InterProMatch struct {
Locations []InterProLocation `json:"entry_protein_locations"`
}

type InterProEntry struct {
Metadata InterProMetaData `json:"metadata"`
Matches []InterProMatch `json:"proteins"`
ExtraFields InterProExtraField `json:"extra_fields"`
}

type InterProEntryResponse struct {
Entries []InterProEntry `json:"results"`
}

type InterProFeature struct {
Accession string `json:"accession"`
Database string `json:"source_database"`
Locations []InterProLocation `json:"locations"`
}

type UniProtSequence struct {
Length int `json:"length"`
}

type UniProtResponse struct {
Sequence UniProtSequence `json:"sequence"`
}

func GetLocalGraphicData(filename string) (*GraphicResponse, error) {
f, err := os.Open(filename)
if err != nil {
Expand Down
177 changes: 177 additions & 0 deletions data/interpro.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
//
// Lollipops diagram generation framework for genetic variations.
// Copyright (C) 2015 Jeremy Jay <[email protected]>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

package data

import (
"encoding/json"
"fmt"
"io/ioutil"
"net"
"os"
"sort"
)

const PfamURL = "https://www.ebi.ac.uk/interpro/api/entry/pfam/protein/uniprot/%s/?extra_fields=short_name&page_size=100"
const PfamLink = "https://www.ebi.ac.uk/interpro/entry/pfam/%s"
const SequenceFeaturesURL = "https://www.ebi.ac.uk/interpro/api/protein/UniProt/%s/?extra_features=true"

func GetPfamProteinMatches(accession string) ([]GraphicFeature, error) {
queryURL := fmt.Sprintf(PfamURL, accession)
resp, err := httpGet(queryURL)
if err != nil {
if err, ok := err.(net.Error); ok && err.Timeout() {
fmt.Fprintf(os.Stderr, "Unable to connect to InterPro. Check your internet connection or try again later.")
os.Exit(1)
}
return nil, err
}
respBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("InterPro error: %s", resp.Status)
}

r := InterProEntryResponse{}
err = json.Unmarshal(respBytes, &r)
if err != nil {
return nil, err
}

var gs []GraphicFeature
for _, e := range r.Entries {
for _, m := range e.Matches {
for _, l := range m.Locations {
for _, f := range l.Fragments {
gf := GraphicFeature{
Text: e.ExtraFields.ShortName,
Type: e.Metadata.Type,
Start: f.Start,
End: f.End,
Link: fmt.Sprintf(PfamLink, e.Metadata.Accession),
Metadata: GraphicMetadata{
Description: e.Metadata.Name,
Identifier: e.Metadata.Accession,
},
}
gs = append(gs, gf)
}
}
}
}

sort.Slice(gs, func(i, j int) bool {
start1, _ := gs[i].Start.Int64()
start2, _ := gs[j].Start.Int64()

if start1 != start2 {
return start1 < start2
}

end1, _ := gs[i].End.Int64()
end2, _ := gs[j].End.Int64()
return end1 < end2
})

hexColors := [14]string{
"#2DCF00", "#FF5353", "#5B5BFF", "#EBD61D", "#BA21E0", "#FF9C42", "#FF7DFF",
"#B9264F", "#BABA21", "#C48484", "#1F88A7", "#CAFEB8", "#4A9586", "#CEB86C",
}

for i := 0; i < len(gs); i++ {
gs[i].Color = hexColors[i%len(hexColors)]
}

return gs, nil
}

func GetSequenceFeatures(accession string) ([]GraphicFeature, error) {
queryURL := fmt.Sprintf(SequenceFeaturesURL, accession)
resp, err := httpGet(queryURL)
if err != nil {
if err, ok := err.(net.Error); ok && err.Timeout() {
fmt.Fprintf(os.Stderr, "Unable to connect to InterPro. Check your internet connection or try again later.")
os.Exit(1)
}
return nil, err
}
respBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("InterPro error: %s", resp.Status)
}

data := make(map[string]InterProFeature)

err = json.Unmarshal(respBytes, &data)
if err != nil {
return nil, fmt.Errorf("InterPro error: %s", err)
}

var gs []GraphicFeature
featureDatabases := map[string]string{
"signalp_e": "sig_p",
"signalp_g+": "sig_p",
"signalp_g-": "sig_p",
"coils": "coiled_coil",
"tmhmm": "transmembrane",
}
for _, feature := range data {
if feature.Database == "mobidblt" {
for _, location := range feature.Locations {
for _, fragment := range location.Fragments {
if fragment.SeqFeature == "Consensus Disorder Prediction" {
gf := GraphicFeature{
Color: "#CCCCCC",
Type: "disorder",
Start: fragment.Start,
End: fragment.End,
}
gs = append(gs, gf)
}
}
}

continue
}

for feature_db, feature_type := range featureDatabases {
if feature.Database == feature_db {

for _, location := range feature.Locations {
for _, fragment := range location.Fragments {
gf := GraphicFeature{
Color: "#CCCCCC",
Type: feature_type,
Start: fragment.Start,
End: fragment.End,
}
gs = append(gs, gf)
}
}

break
}
}
}

return gs, nil
}
72 changes: 0 additions & 72 deletions data/pfam.go

This file was deleted.

Loading

0 comments on commit 3702093

Please sign in to comment.