Skip to content

Commit

Permalink
Merge branch 'features/v3.0.0/gohan-gene-api' into releases/v3.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
brouillette committed Oct 1, 2021
2 parents d3c00ae + 8b8f15b commit e4b8f6c
Show file tree
Hide file tree
Showing 18 changed files with 1,004 additions and 62 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,7 @@ bin/*
*/*/tmp

*.vcf
*.vcf.gz
*.vcf.gz

*/*/*.csv
*/*/*.gtf*
14 changes: 10 additions & 4 deletions src/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,27 +124,27 @@ func main() {

e.GET("/variants/get/by/variantId", mvc.VariantsGetByVariantId,
// middleware
gam.MandateChromosomeAttribute,
gam.ValidateOptionalChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.ValidatePotentialGenotypeQueryParameter)
e.GET("/variants/get/by/sampleId", mvc.VariantsGetBySampleId,
// middleware
gam.MandateChromosomeAttribute,
gam.ValidateOptionalChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.MandateSampleIdsPluralAttribute,
gam.ValidatePotentialGenotypeQueryParameter)

e.GET("/variants/count/by/variantId", mvc.VariantsCountByVariantId,
// middleware
gam.MandateChromosomeAttribute,
gam.ValidateOptionalChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.ValidatePotentialGenotypeQueryParameter)
e.GET("/variants/count/by/sampleId", mvc.VariantsCountBySampleId,
// middleware
gam.MandateChromosomeAttribute,
gam.ValidateOptionalChromosomeAttribute,
gam.MandateCalibratedBounds,
gam.MandateAssemblyIdAttribute,
gam.MandateSampleIdsSingularAttribute,
Expand All @@ -155,6 +155,12 @@ func main() {
gam.MandateAssemblyIdAttribute)
e.GET("/variants/ingestion/requests", mvc.GetAllVariantIngestionRequests)

// -- Genes
e.GET("/genes/overview", mvc.GetGenesOverview)
e.GET("/genes/search", mvc.GenesGetByNomenclatureWildcard,
// middleware
gam.ValidateOptionalChromosomeAttribute)

// Run
e.Logger.Fatal(e.Start(":" + cfg.Api.Port))
}
21 changes: 6 additions & 15 deletions src/api/middleware/chromosomeMiddleware.go
Original file line number Diff line number Diff line change
@@ -1,34 +1,25 @@
package middleware

import (
"api/models/constants/chromosome"
"net/http"
"strconv"

"github.com/labstack/echo"
)

/*
Echo middleware to ensure a valid `chromosome` HTTP query parameter was provided
*/
func MandateChromosomeAttribute(next echo.HandlerFunc) echo.HandlerFunc {
func ValidateOptionalChromosomeAttribute(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
// check for chromosome query parameter
chromQP := c.QueryParam("chromosome")
if len(chromQP) == 0 {
// if no id was provided return an error
return echo.NewHTTPError(http.StatusBadRequest, "Missing 'chromosome' query parameter for querying!")
}

// verify:
i, conversionErr := strconv.Atoi(chromQP)
if conversionErr != nil {
// if invalid chromosome
return echo.NewHTTPError(http.StatusBadRequest, "Error converting 'chromosome' query parameter! Check your input")
}

if i <= 0 {
// if chromosome less than 0
return echo.NewHTTPError(http.StatusBadRequest, "Please provide a 'chromosome' greater than 0!")
if len(chromQP) > 0 && !chromosome.IsValidHumanChromosome(chromQP) {
// if chromosome less than 1 or greater than 23
// and not 'x', 'y' or 'm'
return echo.NewHTTPError(http.StatusBadRequest, "Please provide a valid 'chromosome' (either 1-23, X, Y, or M)")
}

return next(c)
Expand Down
6 changes: 6 additions & 0 deletions src/api/models/constants/assembly-id/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const (
GRCh38 constants.AssemblyId = "GRCh38"
GRCh37 constants.AssemblyId = "GRCh37"
NCBI36 constants.AssemblyId = "NCBI36"
NCBI35 constants.AssemblyId = "NCBI35"
NCBI34 constants.AssemblyId = "NCBI34"
Other constants.AssemblyId = "Other"
)

Expand All @@ -22,6 +24,10 @@ func CastToAssemblyId(text string) constants.AssemblyId {
return GRCh37
case "ncbi36":
return NCBI36
case "ncbi35":
return NCBI35
case "ncbi34":
return NCBI34
case "other":
return Other
default:
Expand Down
37 changes: 37 additions & 0 deletions src/api/models/constants/chromosome/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package chromosome

import (
"strconv"
"strings"
)

func IsValidHumanChromosome(text string) bool {

// Check if number can be represented as an int as is non-zero
chromNumber, _ := strconv.Atoi(text)
if chromNumber > 0 {
// It can..
// Check if it in range 1-23
if chromNumber < 24 {
return true
}
} else {
// No it can't..
// Check if it is an X, Y..
loweredText := strings.ToLower(text)
switch loweredText {
case "x":
return true
case "y":
return true
}

// ..or M (MT)
switch strings.Contains(loweredText, "m") {
case true:
return true
}
}

return false
}
1 change: 1 addition & 0 deletions src/api/models/constants/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package constants
associated services.
*/
type AssemblyId string
type Chromosome string
type GenotypeQuery string
type SearchOperation string
type SortDirection string
Expand Down
8 changes: 8 additions & 0 deletions src/api/models/dtos.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,11 @@ type VariantResponseDataModel struct {
Count int `json:"count"`
Results []Variant `json:"results"` // []Variant
}

type GenesResponseDTO struct {
Status int `json:"status"`
Message string `json:"message"`
Term string `json:"term"`
Count int `json:"count"`
Results []Gene `json:"results"` // []Gene
}
10 changes: 9 additions & 1 deletion src/api/models/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
var VcfHeaders = []string{"chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format"}

type Variant struct {
Chrom int `json:"chrom"`
Chrom string `json:"chrom"`
Pos int `json:"pos"`
Id string `json:"id"`
Ref []string `json:"ref"`
Expand Down Expand Up @@ -45,3 +45,11 @@ type Genotype struct {
AlleleRight int `json:"alleleRight"` // -1 = no call (equivalent to a '.')
Zygosity c.Zygosity `json:"zygosity"`
}

type Gene struct {
Name string `json:"name"`
Chrom string `json:"chrom"`
Start int `json:"start"`
End int `json:"end"`
AssemblyId c.AssemblyId `json:"assemblyId"`
}
16 changes: 16 additions & 0 deletions src/api/models/ingest/structs/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package structs

import (
"api/models"
"sync"
)

type IngestionQueueStructure struct {
Variant *models.Variant
WaitGroup *sync.WaitGroup
}

type GeneIngestionQueueStructure struct {
Gene *models.Gene
WaitGroup *sync.WaitGroup
}
152 changes: 152 additions & 0 deletions src/api/mvc/genes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package mvc

import (
"api/contexts"
"api/models"
assemblyId "api/models/constants/assembly-id"
esRepo "api/repositories/elasticsearch"
"fmt"
"net/http"
"strconv"
"sync"

"github.com/labstack/echo"
"github.com/mitchellh/mapstructure"
)

func GenesGetByNomenclatureWildcard(c echo.Context) error {
cfg := c.(*contexts.GohanContext).Config
es := c.(*contexts.GohanContext).Es7Client

// Chromosome search term
chromosomeSearchTerm := c.QueryParam("chromosome")
if len(chromosomeSearchTerm) == 0 {
// if no chromosome is provided, assume "wildcard" search
chromosomeSearchTerm = "*"
}

// Name search term
term := c.QueryParam("term")

// Assembly ID
// perform wildcard search if empty/random parameter is passed
// - set to Unknown to trigger it
assId := assemblyId.Unknown
if assemblyId.CastToAssemblyId(c.QueryParam("assemblyId")) != assemblyId.Unknown {
// retrieve passed parameter if is valid
assId = assemblyId.CastToAssemblyId(c.QueryParam("assemblyId"))
}

// Size
var (
size int = 25
sizeCastErr error
)
if len(c.QueryParam("size")) > 0 {
sizeQP := c.QueryParam("size")
size, sizeCastErr = strconv.Atoi(sizeQP)
if sizeCastErr != nil {
size = 25
}
}

fmt.Printf("Executing wildcard genes search for term %s, assemblyId %s (max size: %d)\n", term, assId, size)

// Execute
docs := esRepo.GetGeneDocumentsByTermWildcard(cfg, es, chromosomeSearchTerm, term, assId, size)

docsHits := docs["hits"].(map[string]interface{})["hits"]
allDocHits := []map[string]interface{}{}
mapstructure.Decode(docsHits, &allDocHits)

// grab _source for each hit
var allSources []models.Gene

for _, r := range allDocHits {
source := r["_source"].(map[string]interface{})

// cast map[string]interface{} to struct
var resultingVariant models.Gene
mapstructure.Decode(source, &resultingVariant)

// accumulate structs
allSources = append(allSources, resultingVariant)
}

fmt.Printf("Found %d docs!\n", len(allSources))

geneResponseDTO := models.GenesResponseDTO{
Term: term,
Count: len(allSources),
Results: allSources,
Status: 200,
Message: "Success",
}

return c.JSON(http.StatusOK, geneResponseDTO)
}

func GetGenesOverview(c echo.Context) error {

resultsMap := map[string]interface{}{}
resultsMux := sync.RWMutex{}

es := c.(*contexts.GohanContext).Es7Client
cfg := c.(*contexts.GohanContext).Config

// retrieve aggregation of genes/chromosomes by assembly id
results := esRepo.GetGeneBucketsByKeyword(cfg, es)

// begin mapping results
geneChromosomeGroupBucketsMapped := []map[string]interface{}{}

// loop over top level aggregation and
// accumulated nested aggregations
if aggs, ok := results["aggregations"]; ok {
aggsMapped := aggs.(map[string]interface{})

if items, ok := aggsMapped["genes_assembly_id_group"]; ok {
itemsMapped := items.(map[string]interface{})

if buckets := itemsMapped["buckets"]; ok {
arrayMappedBuckets := buckets.([]interface{})

for _, mappedBucket := range arrayMappedBuckets {
geneChromosomeGroupBucketsMapped = append(geneChromosomeGroupBucketsMapped, mappedBucket.(map[string]interface{}))
}
}
}
}

individualAssemblyIdKeyMap := map[string]interface{}{}

// iterated over each assemblyId bucket
for _, chromGroupBucketMap := range geneChromosomeGroupBucketsMapped {

assemblyIdKey := fmt.Sprint(chromGroupBucketMap["key"])

numGenesPerChromMap := map[string]interface{}{}
bucketsMapped := map[string]interface{}{}

if chromGroupItem, ok := chromGroupBucketMap["genes_chromosome_group"]; ok {
chromGroupItemMapped := chromGroupItem.(map[string]interface{})

for _, chromBucket := range chromGroupItemMapped["buckets"].([]interface{}) {
doc_key := fmt.Sprint(chromBucket.(map[string]interface{})["key"]) // ensure strings and numbers are expressed as strings
doc_count := chromBucket.(map[string]interface{})["doc_count"]

// add to list of buckets by chromosome
bucketsMapped[doc_key] = doc_count
}
}

numGenesPerChromMap["numberOfGenesPerChromosome"] = bucketsMapped
individualAssemblyIdKeyMap[assemblyIdKey] = numGenesPerChromMap
}

resultsMux.Lock()
resultsMap["assemblyIDs"] = individualAssemblyIdKeyMap
resultsMux.Unlock()

return c.JSON(http.StatusOK, resultsMap)
}
4 changes: 2 additions & 2 deletions src/api/mvc/variants.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ func GetVariantsOverview(c echo.Context) error {
callGetBucketsByKeyword := func(key string, keyword string, _wg *sync.WaitGroup) {
defer _wg.Done()

results := esRepo.GetBucketsByKeyword(cfg, es, keyword)
results := esRepo.GetVariantsBucketsByKeyword(cfg, es, keyword)

// retrieve aggregations.items.buckets
bucketsMapped := []interface{}{}
Expand Down Expand Up @@ -281,7 +281,7 @@ func GetVariantsOverview(c echo.Context) error {

// get distribution of chromosomes
wg.Add(1)
go callGetBucketsByKeyword("chromosomes", "chrom", &wg)
go callGetBucketsByKeyword("chromosomes", "chrom.keyword", &wg)

// get distribution of variant IDs
wg.Add(1)
Expand Down
Loading

0 comments on commit e4b8f6c

Please sign in to comment.