From fa12ae316237a47b035308ad6428744066b27ef6 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 11 Dec 2023 13:39:55 -0500 Subject: [PATCH] fix: remove all mentions of constants.AssemblyId --- src/api/models/constants/assembly-id/main.go | 8 ++----- src/api/models/dtos/main.go | 17 +++++++------- src/api/models/indexes/main.go | 18 +++++++-------- src/api/mvc/genes/main.go | 23 +++++++++---------- src/api/repositories/elasticsearch/genes.go | 5 ++-- .../repositories/elasticsearch/variants.go | 4 ++-- src/api/services/ingestion.go | 2 +- 7 files changed, 35 insertions(+), 42 deletions(-) diff --git a/src/api/models/constants/assembly-id/main.go b/src/api/models/constants/assembly-id/main.go index 10f8edf..b4d7d23 100644 --- a/src/api/models/constants/assembly-id/main.go +++ b/src/api/models/constants/assembly-id/main.go @@ -1,10 +1,6 @@ package assemblyId -import ( - "gohan/api/models/constants" -) - const ( - GRCh38 constants.AssemblyId = "GRCh38" - GRCh37 constants.AssemblyId = "GRCh37" + GRCh38 string = "GRCh38" + GRCh37 string = "GRCh37" ) diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 6f3ee11..4ef2d2b 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -1,7 +1,6 @@ package dtos import ( - "gohan/api/models/constants" "gohan/api/models/indexes" "time" ) @@ -21,11 +20,11 @@ type VariantCountReponse struct { } type VariantResult struct { - Query string `json:"query,omitempty"` - AssemblyId constants.AssemblyId `json:"assembly_id"` - Chromosome string `json:"chromosome"` - Start int `json:"start"` - End int `json:"end"` + Query string `json:"query,omitempty"` + AssemblyId string `json:"assembly_id"` + Chromosome string `json:"chromosome"` + Start int `json:"start"` + End int `json:"end"` } type VariantGetResult struct { @@ -54,9 +53,9 @@ type VariantCall struct { Alleles []string `json:"alleles,omitempty"` // TODO: GenotypeProbability, PhredScaleLikelyhood ? - AssemblyId constants.AssemblyId `json:"assemblyId,omitempty"` - Dataset string `json:"dataset,omitempty"` - DocumentId string `json:"documentId,omitempty"` + AssemblyId string `json:"assemblyId,omitempty"` + Dataset string `json:"dataset,omitempty"` + DocumentId string `json:"documentId,omitempty"` } // --- Dataset diff --git a/src/api/models/indexes/main.go b/src/api/models/indexes/main.go index 0e84fe9..194e9d9 100644 --- a/src/api/models/indexes/main.go +++ b/src/api/models/indexes/main.go @@ -18,10 +18,10 @@ type Variant struct { Sample Sample `json:"sample"` - FileId string `json:"fileId"` - Dataset string `json:"dataset"` - AssemblyId c.AssemblyId `json:"assemblyId"` - CreatedTime time.Time `json:"createdTime"` + FileId string `json:"fileId"` + Dataset string `json:"dataset"` + AssemblyId string `json:"assemblyId"` + CreatedTime time.Time `json:"createdTime"` } type Info struct { @@ -51,9 +51,9 @@ type Genotype struct { } type Gene struct { - Name string `json:"name"` - Chrom string `json:"chrom"` - Start int `json:"start"` - End int `json:"end"` - AssemblyId c.AssemblyId `json:"assemblyId"` + Name string `json:"name"` + Chrom string `json:"chrom"` + Start int `json:"start"` + End int `json:"end"` + AssemblyId string `json:"assemblyId"` } diff --git a/src/api/mvc/genes/main.go b/src/api/mvc/genes/main.go index 9f293e3..e3efbb5 100644 --- a/src/api/mvc/genes/main.go +++ b/src/api/mvc/genes/main.go @@ -6,7 +6,6 @@ import ( "crypto/tls" "fmt" "gohan/api/contexts" - "gohan/api/models/constants" assemblyId "gohan/api/models/constants/assembly-id" "gohan/api/models/constants/chromosome" "gohan/api/models/dtos" @@ -51,7 +50,7 @@ func GenesIngest(c echo.Context) error { http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } - assemblyIdMap := map[constants.AssemblyId]string{ + assemblyIdMap := map[string]string{ assemblyId.GRCh38: "gencode.v38.annotation.gtf", assemblyId.GRCh37: "gencode.v19.annotation.gtf", // SKIP @@ -59,7 +58,7 @@ func GenesIngest(c echo.Context) error { // assemblyId.NCBI35: "hg17", // assemblyId.NCBI34: "hg16", } - assemblyIdGTFUrlMap := map[constants.AssemblyId]string{ + assemblyIdGTFUrlMap := map[string]string{ assemblyId.GRCh38: "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz", assemblyId.GRCh37: "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz", // SKIP @@ -79,7 +78,7 @@ func GenesIngest(c echo.Context) error { CreatedAt: fmt.Sprintf("%v", time.Now()), } - go func(_assId constants.AssemblyId, _fileName string, _assemblyWg *sync.WaitGroup, reqStat *ingest.GeneIngestRequest) { + go func(_asmId string, _fileName string, _assemblyWg *sync.WaitGroup, reqStat *ingest.GeneIngestRequest) { defer _assemblyWg.Done() var ( @@ -89,7 +88,7 @@ func GenesIngest(c echo.Context) error { gtfFile, err := os.Open(fmt.Sprintf("%s/%s", gtfPath, _fileName)) if err != nil { // Download the file - fullURLFile := assemblyIdGTFUrlMap[_assId] + fullURLFile := assemblyIdGTFUrlMap[_asmId] handleHardErr := func(err error) { msg := "Something went wrong: " + err.Error() @@ -193,13 +192,13 @@ func GenesIngest(c echo.Context) error { defer gtfFile.Close() // clean out genes currently in elasticsearch by assembly id - fmt.Printf("Cleaning out %s gene documents from genes index (if any)\n", string(_assId)) - esRepo.DeleteGenesByAssemblyId(cfg, es7Client, _assId) + fmt.Printf("Cleaning out %s gene documents from genes index (if any)\n", string(_asmId)) + esRepo.DeleteGenesByAssemblyId(cfg, es7Client, _asmId) fileScanner := bufio.NewScanner(gtfFile) fileScanner.Split(bufio.ScanLines) - fmt.Printf("Ingesting %s\n", string(_assId)) + fmt.Printf("Ingesting %s\n", string(_asmId)) reqStat.State = ingest.Running iz.GeneIngestRequestChan <- reqStat @@ -222,7 +221,7 @@ func GenesIngest(c echo.Context) error { go func(rowText string, _chromHeaderKey int, _startKey int, _endKey int, _nameHeaderKeys []int, _geneNameHeaderKeys []int, - _assId constants.AssemblyId, + _assId string, _gwg *sync.WaitGroup) { // fmt.Printf("row : %s\n", row) @@ -276,19 +275,19 @@ func GenesIngest(c echo.Context) error { Chrom: chromosomeClean, Start: start, End: end, - AssemblyId: _assId, + AssemblyId: _asmId, } iz.GeneIngestionBulkIndexingQueue <- &structs.GeneIngestionQueueStructure{ Gene: discoveredGene, WaitGroup: _gwg, } - }(rowText, chromHeaderKey, startKey, endKey, nameHeaderKeys, geneNameHeaderKeys, _assId, &geneWg) + }(rowText, chromHeaderKey, startKey, endKey, nameHeaderKeys, geneNameHeaderKeys, _asmId, &geneWg) } geneWg.Wait() - fmt.Printf("%s ingestion done!\n", _assId) + fmt.Printf("%s ingestion done!\n", _asmId) fmt.Printf("Deleting %s\n", unzippedFileName) err = os.Remove(fmt.Sprintf("%s/%s", gtfPath, unzippedFileName)) if err != nil { diff --git a/src/api/repositories/elasticsearch/genes.go b/src/api/repositories/elasticsearch/genes.go index 2347637..890ce0d 100644 --- a/src/api/repositories/elasticsearch/genes.go +++ b/src/api/repositories/elasticsearch/genes.go @@ -12,7 +12,6 @@ import ( "time" "gohan/api/models" - "gohan/api/models/constants" "gohan/api/utils" "github.com/elastic/go-elasticsearch/v7" @@ -216,7 +215,7 @@ func GetGeneDocumentsByTermWildcard(cfg *models.Config, es *elasticsearch.Client return result, nil } -func DeleteGenesByAssemblyId(cfg *models.Config, es *elasticsearch.Client, assId constants.AssemblyId) (map[string]interface{}, error) { +func DeleteGenesByAssemblyId(cfg *models.Config, es *elasticsearch.Client, asmId string) (map[string]interface{}, error) { if cfg.Debug { http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} @@ -226,7 +225,7 @@ func DeleteGenesByAssemblyId(cfg *models.Config, es *elasticsearch.Client, assId query := map[string]interface{}{ "query": map[string]interface{}{ "match": map[string]interface{}{ - "assemblyId": string(assId), + "assemblyId": asmId, }, }, } diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index 1bb3e3d..575a43b 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -109,7 +109,7 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e reference string, alternative string, alleles []string, size int, sortByPosition c.SortDirection, includeInfoInResultSet bool, - genotype c.GenotypeQuery, assemblyId c.AssemblyId, + genotype c.GenotypeQuery, assemblyId string, getSampleIdsOnly bool) (map[string]interface{}, error) { // begin building the request body. @@ -404,7 +404,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, chromosome string, lowerBound int, upperBound int, variantId string, sampleId string, datasetString string, reference string, alternative string, alleles []string, - genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) { + genotype c.GenotypeQuery, assemblyId string) (map[string]interface{}, error) { // begin building the request body. mustMap := []map[string]interface{}{{ diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index 239d883..e1a5503 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -350,7 +350,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto func (i *IngestionService) ProcessVcf( gzippedFilePath string, drsFileId string, dataset uuid.UUID, - assemblyId constants.AssemblyId, filterOutReferences bool, + assemblyId string, filterOutReferences bool, lineProcessingConcurrencyLevel int) { // --- reopen gzipped file after having been copied to the temporary api-drs