Skip to content

Commit

Permalink
feat!: use reference service to provide assembly ID
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Dec 6, 2023
1 parent 1fad838 commit 54bcbc5
Show file tree
Hide file tree
Showing 11 changed files with 25 additions and 76 deletions.
2 changes: 1 addition & 1 deletion src/api/contexts/contexts.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ type (

// Convenient storage for relevant http context data
QueryParameters struct {
AssemblyId constants.AssemblyId
AssemblyId string
Alleles []string
Chromosome string
Genotype constants.GenotypeQuery
Expand Down
10 changes: 4 additions & 6 deletions src/api/middleware/assemblyMiddleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ package middleware

import (
"gohan/api/contexts"
"gohan/api/models/constants"
assid "gohan/api/models/constants/assembly-id"
"net/http"

"github.com/labstack/echo"
Expand All @@ -16,14 +14,14 @@ func MandateAssemblyIdAttribute(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
// check for assemblyId query parameter
assemblyId := c.QueryParam("assemblyId")
if len(assemblyId) == 0 || !assid.IsKnownAssemblyId(assemblyId) {
// if no id was provided, or it was invalid, return an error
return echo.NewHTTPError(http.StatusBadRequest, "Missing or unknown assemblyId!")
if len(assemblyId) == 0 {
// if no id was provided, return an error
return echo.NewHTTPError(http.StatusBadRequest, "Missing assemblyId!")
}

// forward a type-safe value down the pipeline
gc := c.(*contexts.GohanContext)
gc.AssemblyId = constants.AssemblyId(assemblyId)
gc.AssemblyId = assemblyId

return next(gc)
}
Expand Down
32 changes: 0 additions & 32 deletions src/api/models/constants/assembly-id/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,9 @@ package assemblyId

import (
"gohan/api/models/constants"
"strings"
)

const (
Unknown constants.AssemblyId = "Unknown"

GRCh38 constants.AssemblyId = "GRCh38"
GRCh37 constants.AssemblyId = "GRCh37"
NCBI36 constants.AssemblyId = "NCBI36"
NCBI35 constants.AssemblyId = "NCBI35"
NCBI34 constants.AssemblyId = "NCBI34"
Other constants.AssemblyId = "Other"
)

func CastToAssemblyId(text string) constants.AssemblyId {
switch strings.ToLower(text) {
case "grch38":
return GRCh38
case "grch37":
return GRCh37
case "ncbi36":
return NCBI36
case "ncbi35":
return NCBI35
case "ncbi34":
return NCBI34
case "other":
return Other
default:
return Unknown
}
}

func IsKnownAssemblyId(text string) bool {
// attempt to cast to assemblyId and
// return if unknown assemblyId
return CastToAssemblyId(text) != Unknown
}
10 changes: 3 additions & 7 deletions src/api/mvc/genes/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,11 +335,7 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error {
// Assembly ID
// perform wildcard search if empty/random parameter is passed
// - set to Unknown to trigger it
var assId constants.AssemblyId
if gc.AssemblyId != assemblyId.Unknown {
// retrieve passed parameter if is valid
assId = gc.AssemblyId
}
asmId := gc.AssemblyId

// Size
var (
Expand All @@ -354,10 +350,10 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error {
}
}

fmt.Printf("Executing wildcard genes search for term %s, assemblyId %s (max size: %d)\n", term, assId, size)
fmt.Printf("Executing wildcard genes search for term %s, assemblyId %s (max size: %d)\n", term, asmId, size)

// Execute
docs, geneErr := esRepo.GetGeneDocumentsByTermWildcard(cfg, es, chromosomeSearchTerm, term, assId, size)
docs, geneErr := esRepo.GetGeneDocumentsByTermWildcard(cfg, es, chromosomeSearchTerm, term, asmId, size)
if geneErr != nil {
return c.JSON(http.StatusOK, map[string]interface{}{
"status": 500,
Expand Down
9 changes: 2 additions & 7 deletions src/api/mvc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package mvc
import (
"gohan/api/contexts"
"gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
gq "gohan/api/models/constants/genotype-query"
"strings"

Expand All @@ -12,7 +11,7 @@ import (
"github.com/labstack/echo"
)

func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId, string) {
func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, string, string) {
gc := c.(*contexts.GohanContext)
es := gc.Es7Client

Expand Down Expand Up @@ -48,11 +47,7 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int,
}
}

assemblyId := a.Unknown
assemblyIdQP := c.QueryParam("assemblyId")
if len(assemblyIdQP) > 0 && a.IsKnownAssemblyId(assemblyIdQP) {
assemblyId = a.CastToAssemblyId(assemblyIdQP)
}
assemblyId := c.QueryParam("assemblyId")

return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString
}
9 changes: 4 additions & 5 deletions src/api/repositories/elasticsearch/genes.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (

"gohan/api/models"
"gohan/api/models/constants"
assemblyId "gohan/api/models/constants/assembly-id"
"gohan/api/utils"

"github.com/elastic/go-elasticsearch/v7"
Expand Down Expand Up @@ -106,7 +105,7 @@ func GetGeneBucketsByKeyword(cfg *models.Config, es *elasticsearch.Client) (map[
}

func GetGeneDocumentsByTermWildcard(cfg *models.Config, es *elasticsearch.Client,
chromosomeSearchTerm string, term string, assId constants.AssemblyId, size int) (map[string]interface{}, error) {
chromosomeSearchTerm string, term string, asmId string, size int) (map[string]interface{}, error) {

if cfg.Debug {
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
Expand All @@ -115,10 +114,10 @@ func GetGeneDocumentsByTermWildcard(cfg *models.Config, es *elasticsearch.Client
// Nomenclature Search Term
nomenclatureStringTerm := fmt.Sprintf("*%s*", term)

// Assembly Id Search Term (wildcard by default)
// Assembly ID Search Term (wildcard by default)
assemblyIdStringTerm := "*"
if assId != assemblyId.Unknown {
assemblyIdStringTerm = string(assId)
if asmId != "" {
assemblyIdStringTerm = asmId
}

var buf bytes.Buffer
Expand Down
3 changes: 1 addition & 2 deletions src/api/repositories/elasticsearch/variants.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (

"gohan/api/models"
c "gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
gq "gohan/api/models/constants/genotype-query"
s "gohan/api/models/constants/sort"
z "gohan/api/models/constants/zygosity"
Expand Down Expand Up @@ -465,7 +464,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config,
}})
}

if assemblyId != "" && assemblyId != a.Unknown {
if assemblyId != "" {
mustMap = append(mustMap, map[string]interface{}{
"match": map[string]interface{}{
"assemblyId": map[string]interface{}{
Expand Down
8 changes: 3 additions & 5 deletions src/api/tests/build/api/genes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import (
common "gohan/api/tests/common"

"gohan/api/models"
c "gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
ingest "gohan/api/models/ingest"

"gohan/api/models/constants/chromosome"
Expand Down Expand Up @@ -205,10 +203,10 @@ func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []dtos.
go func(_wg *sync.WaitGroup, _assemblyIdString string, _chromosomeString string) {
defer _wg.Done()

assemblyId := a.CastToAssemblyId(_assemblyIdString)
assemblyId := _assemblyIdString

// make the call
dto := buildQueryAndMakeGetGenesCall(_chromosomeString, "", assemblyId, _t, cfg)
dto := buildQueryAndMakeGetGenesCall(_chromosomeString, assemblyId, _t, cfg)

// ensure there is data returned
// (we'd be making a bad query, otherwise)
Expand All @@ -231,7 +229,7 @@ func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []dtos.
return allDtoResponses
}

func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) dtos.GenesResponseDTO {
func buildQueryAndMakeGetGenesCall(chromosome string, assemblyId string, _t *testing.T, _cfg *models.Config) dtos.GenesResponseDTO {

queryString := fmt.Sprintf("?chromosome=%s&assemblyId=%s", chromosome, assemblyId)

Expand Down
3 changes: 1 addition & 2 deletions src/api/tests/build/api/variants_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"time"

c "gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
gq "gohan/api/models/constants/genotype-query"
s "gohan/api/models/constants/sort"
z "gohan/api/models/constants/zygosity"
Expand Down Expand Up @@ -681,7 +680,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc

chrom := _combination[0]
sampleId := _combination[1]
assemblyId := a.CastToAssemblyId(_combination[2])
assemblyId := _combination[2]

// make the call
dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, uuid.Nil, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg)
Expand Down
5 changes: 2 additions & 3 deletions src/api/tests/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"fmt"
"gohan/api/models"
c "gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
gq "gohan/api/models/constants/genotype-query"
s "gohan/api/models/constants/sort"
testConsts "gohan/api/tests/common/constants"
Expand Down Expand Up @@ -215,7 +214,7 @@ func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, dataset uui

func BuildQueryAndMakeGetVariantsCall(
chromosome string, sampleId string, dataset uuid.UUID, includeInfo bool,
sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId,
sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId string,
referenceAllelePattern string, alternativeAllelePattern string, commaDeliminatedAlleles string,
ignoreStatusCode bool, _t *testing.T, _cfg *models.Config) dtos.VariantGetReponse {

Expand Down Expand Up @@ -286,7 +285,7 @@ func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, dat

chrom := _combination[0]
sampleId := _combination[1]
assemblyId := a.CastToAssemblyId(_combination[2])
assemblyId := _combination[2]

// make the call
dto := BuildQueryAndMakeGetVariantsCall(chrom, sampleId, dataset, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg)
Expand Down
10 changes: 4 additions & 6 deletions src/api/workflows/main.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
package workflows

import (
c "gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
)

type WorkflowSchema map[string]interface{}

var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{
Expand All @@ -22,6 +17,7 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{
"id": "project_dataset",
"type": "project:dataset",
"required": true,
"help": "The dataset to ingest the variants into.",
},
{
"id": "vcf_gz_file_names",
Expand All @@ -33,12 +29,14 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{
"id": "assembly_id",
"type": "enum",
"required": true,
"values": []c.AssemblyId{a.GRCh38, a.GRCh37},
"values": "{{ serviceUrls.reference }}/genomes?response_format=id_list",
},
{
"id": "filter_out_references",
"type": "boolean",
"required": true,
"help": "If this is checked, variant calls which are (0, 0) (i.e., homozygous reference " +
"calls) will not be ingested.",
},
// Injected inputs:
{
Expand Down

0 comments on commit 54bcbc5

Please sign in to comment.