From 724f6acb59adc51139b44944d68add3932ca35a3 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 7 Nov 2024 13:37:07 +0100 Subject: [PATCH] [API] reverify entire dataset --- .../tests/sda/60_api_admin_test.sh | 14 +++ sda/cmd/api/api.go | 27 ++++- sda/cmd/api/api_test.go | 112 +++++++++++++++++- sda/internal/database/db_functions.go | 27 +++++ sda/internal/database/db_functions_test.go | 57 +++++++++ 5 files changed, 234 insertions(+), 3 deletions(-) diff --git a/.github/integration/tests/sda/60_api_admin_test.sh b/.github/integration/tests/sda/60_api_admin_test.sh index bec0b960d..8478afd21 100644 --- a/.github/integration/tests/sda/60_api_admin_test.sh +++ b/.github/integration/tests/sda/60_api_admin_test.sh @@ -21,4 +21,18 @@ resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer if [ "$resp" != "404" ]; then echo "Error when starting re-verification, expected 404 got: $resp" exit 1 +fi + +## trigger re-verification of dataset SYNC-001-12345 +resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -X PUT "http://api:8080/dataset/verify/SYNC-001-12345")" +if [ "$resp" != "200" ]; then + echo "Error when starting re-verification of dataset, expected 200 got: $resp" + exit 1 +fi + +## expect failure of missing dataset +resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -X PUT "http://api:8080/dataset/verify/SYNC-999-12345")" +if [ "$resp" != "404" ]; then + echo "Error when starting re-verification of missing dataset, expected 404 got: $resp" + exit 1 fi \ No newline at end of file diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index 8166e2e45..4d271e148 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -90,9 +90,10 @@ func setup(config *config.Config) *http.Server { if len(config.API.Admins) > 0 { r.POST("/file/ingest", isAdmin(), ingestFile) // start ingestion of a file r.POST("/file/accession", isAdmin(), setAccession) // assign accession ID to a file - r.PUT("/file/verify/*accession", isAdmin(), reVerify) // trigger reverification of a file + r.PUT("/file/verify/*accession", isAdmin(), reVerifyFile) // trigger reverification of a file r.POST("/dataset/create", isAdmin(), createDataset) // maps a set of files to a dataset r.POST("/dataset/release/*dataset", isAdmin(), releaseDataset) // Releases a dataset to be accessible + r.PUT("/dataset/verify/*dataset", isAdmin(), reVerifyDataset) // Re-verify all files in the dataset r.POST("/c4gh-keys/add", isAdmin(), addC4ghHash) // Adds a key hash to the database r.POST("/c4gh-keys/deprecate/*keyHash", isAdmin(), deprecateC4ghHash) // Deprecate a given key hash r.GET("/c4gh-keys/list", isAdmin(), listC4ghHashes) // Lists key hashes in the database @@ -633,3 +634,27 @@ func reVerifyFile(c *gin.Context) { c.Status(http.StatusOK) } + +func reVerifyDataset(c *gin.Context) { + dataset := strings.TrimPrefix(c.Param("dataset"), "/") + accessions, err := Conf.API.DB.GetDatasetFiles(dataset) + if err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + + return + } + if accessions == nil { + c.AbortWithStatusJSON(http.StatusNotFound, "dataset not found") + + return + } + + for _, accession := range accessions { + c, err = reVerify(c, accession) + if err != nil { + return + } + } + + c.Status(http.StatusOK) +} diff --git a/sda/cmd/api/api_test.go b/sda/cmd/api/api_test.go index ae908f2f3..461009659 100644 --- a/sda/cmd/api/api_test.go +++ b/sda/cmd/api/api_test.go @@ -1637,11 +1637,11 @@ func (suite *TestSuite) TestReVerifyFile() { // Mock request and response holders w := httptest.NewRecorder() - r := httptest.NewRequest(http.MethodPost, "/file/verify/accession_TestReVerify_01", http.NoBody) + r := httptest.NewRequest(http.MethodPut, "/file/verify/accession_TestReVerify_01", http.NoBody) r.Header.Add("Authorization", "Bearer "+suite.Token) _, router := gin.CreateTestContext(w) - router.POST("/file/verify/*accession", isAdmin(), reVerifyFile) + router.PUT("/file/verify/*accession", isAdmin(), reVerifyFile) router.ServeHTTP(w, r) okResponse := w.Result() @@ -1685,3 +1685,111 @@ func (suite *TestSuite) TestReVerifyFile_wrongAccession() { defer okResponse.Body.Close() assert.Equal(suite.T(), http.StatusNotFound, okResponse.StatusCode) } + +func (suite *TestSuite) TestReVerifyDataset() { + user := "TestReVerifyDataset" + for i := 0; i < 3; i++ { + filePath := fmt.Sprintf("/%v/TestReVerifyDataset-00%d.c4gh", user, i) + fileID, err := Conf.API.DB.RegisterFile(filePath, user) + if err != nil { + suite.FailNow("failed to register file in database") + } + + if err := Conf.API.DB.UpdateFileEventLog(fileID, "uploaded", fileID, user, "{}", "{}"); err != nil { + suite.FailNow("failed to update satus of file in database") + } + encSha := sha256.New() + _, err = encSha.Write([]byte("Checksum")) + if err != nil { + suite.FailNow("failed to calculate Checksum") + } + + decSha := sha256.New() + _, err = decSha.Write([]byte("DecryptedChecksum")) + if err != nil { + suite.FailNow("failed to calculate DecryptedChecksum") + } + + fileInfo := database.FileInfo{ + Checksum: fmt.Sprintf("%x", encSha.Sum(nil)), + Size: 1000, + Path: filePath, + DecryptedChecksum: fmt.Sprintf("%x", decSha.Sum(nil)), + DecryptedSize: 948, + } + if err := Conf.API.DB.SetArchived(fileInfo, fileID, fileID); err != nil { + suite.FailNow("failed to mark file as Archived") + } + + if err := Conf.API.DB.SetVerified(fileInfo, fileID, fileID); err != nil { + suite.FailNow("failed to mark file as Verified") + } + + stableID := fmt.Sprintf("%s_0%d", user, i) + if err := Conf.API.DB.SetAccessionID(stableID, fileID); err != nil { + suite.FailNowf("got (%s) when setting stable ID: %s, %s", err.Error(), stableID, fileID) + } + if err := Conf.API.DB.UpdateFileEventLog(fileID, "ready", fileID, "finalize", "{}", "{}"); err != nil { + suite.FailNowf("got (%s) when updating file status: %s", err.Error(), filePath) + } + } + + if err := Conf.API.DB.MapFilesToDataset("test-dataset-01", []string{"TestReVerifyDataset_00", "TestReVerifyDataset_01", "TestReVerifyDataset_02"}); err != nil { + suite.FailNow("failed to map files to dataset") + } + + gin.SetMode(gin.ReleaseMode) + assert.NoError(suite.T(), setupJwtAuth()) + Conf.API.Admins = []string{"dummy"} + Conf.Broker.SchemasPath = "../../schemas/isolated" + + // Mock request and response holders + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPut, "/dataset/verify/test-dataset-01", http.NoBody) + r.Header.Add("Authorization", "Bearer "+suite.Token) + + _, router := gin.CreateTestContext(w) + router.PUT("/dataset/verify/*dataset", isAdmin(), reVerifyDataset) + + router.ServeHTTP(w, r) + okResponse := w.Result() + defer okResponse.Body.Close() + assert.Equal(suite.T(), http.StatusOK, okResponse.StatusCode) + + // verify that the messages shows up in the queue + time.Sleep(10 * time.Second) // this is needed to ensure we don't get any false negatives + req, _ := http.NewRequest(http.MethodGet, "http://"+BrokerAPI+"/api/queues/sda/archived", http.NoBody) + req.SetBasicAuth("guest", "guest") + client := http.Client{Timeout: 30 * time.Second} + res, err := client.Do(req) + assert.NoError(suite.T(), err, "failed to query broker") + var data struct { + MessagesReady int `json:"messages_ready"` + } + body, err := io.ReadAll(res.Body) + res.Body.Close() + assert.NoError(suite.T(), err, "failed to read response from broker") + err = json.Unmarshal(body, &data) + assert.NoError(suite.T(), err, "failed to unmarshal response") + assert.Equal(suite.T(), 3, data.MessagesReady) +} + +func (suite *TestSuite) TestReVerifyDataset_wrongDatasetName() { + gin.SetMode(gin.ReleaseMode) + assert.NoError(suite.T(), setupJwtAuth()) + Conf.API.Admins = []string{"dummy"} + Conf.Broker.SchemasPath = "../../schemas/isolated" + + // Mock request and response holders + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPut, "/dataset/verify/wrong_dataset", http.NoBody) + r.Header.Add("Authorization", "Bearer "+suite.Token) + + _, router := gin.CreateTestContext(w) + router.PUT("/dataset/verify/*dataset", isAdmin(), reVerifyDataset) + + router.ServeHTTP(w, r) + okResponse := w.Result() + defer okResponse.Body.Close() + assert.Equal(suite.T(), http.StatusNotFound, okResponse.StatusCode) +} diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index 0eada7fdc..6d7756bce 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -894,3 +894,30 @@ func (dbs *SDAdb) GetDecryptedChecksum(id string) (string, error) { return unencryptedChecksum, nil } + +func (dbs *SDAdb) GetDatasetFiles(dataset string) ([]string, error) { + dbs.checkAndReconnectIfNeeded() + db := dbs.DB + + var accessions []string + rows, err := db.Query("SELECT stable_id FROM sda.files WHERE id IN (SELECT file_id FROM sda.file_dataset WHERE dataset_id = (SELECT id FROM sda.datasets WHERE stable_id = $1));", dataset) + if err != nil { + return nil, err + } + if rows.Err() != nil { + return nil, rows.Err() + } + defer rows.Close() + + for rows.Next() { + var accession string + err := rows.Scan(&accession) + if err != nil { + return nil, err + } + + accessions = append(accessions, accession) + } + + return accessions, nil +} diff --git a/sda/internal/database/db_functions_test.go b/sda/internal/database/db_functions_test.go index 8285d1714..344ec7dd0 100644 --- a/sda/internal/database/db_functions_test.go +++ b/sda/internal/database/db_functions_test.go @@ -843,3 +843,60 @@ func (suite *DatabaseTests) TestGetDecryptedChecksum() { assert.NoError(suite.T(), err, "failed to get verification data") assert.Equal(suite.T(), fmt.Sprintf("%x", decSha.Sum(nil)), checksum) } + +func (suite *DatabaseTests) TestGetDsatasetFiles() { + db, err := NewSDAdb(suite.dbConf) + assert.NoError(suite.T(), err, "got (%v) when creating new connection", err) + testCases := 3 + + for i := 0; i < testCases; i++ { + filePath := fmt.Sprintf("/%v/TestGetDsatasetFiles-00%d.c4gh", "User-Q", i) + fileID, err := db.RegisterFile(filePath, "User-Q") + if err != nil { + suite.FailNow("Failed to register file") + } + err = db.UpdateFileEventLog(fileID, "uploaded", fileID, "User-Q", "{}", "{}") + if err != nil { + suite.FailNow("Failed to update file event log") + } + + corrID, err := db.GetCorrID("User-Q", filePath) + if err != nil { + suite.FailNow("Failed to get CorrID for file") + } + assert.Equal(suite.T(), fileID, corrID) + + checksum := fmt.Sprintf("%x", sha256.New().Sum(nil)) + fileInfo := FileInfo{ + fmt.Sprintf("%x", sha256.New().Sum(nil)), + 1234, + filePath, + checksum, + 999, + } + err = db.SetArchived(fileInfo, fileID, corrID) + if err != nil { + suite.FailNow("failed to mark file as Archived") + } + + err = db.SetVerified(fileInfo, fileID, corrID) + if err != nil { + suite.FailNow("failed to mark file as Verified") + } + + stableID := fmt.Sprintf("accession_%s_0%d", "User-Q", i) + err = db.SetAccessionID(stableID, fileID) + if err != nil { + suite.FailNowf("got (%s) when setting stable ID: %s, %s", err.Error(), stableID, fileID) + } + } + + dID := "test-get-dataset-files-01" + if err := db.MapFilesToDataset(dID, []string{"accession_User-Q_00", "accession_User-Q_01", "accession_User-Q_02"}); err != nil { + suite.FailNow("failed to map files to dataset") + } + + accessions, err := db.GetDatasetFiles(dID) + assert.NoError(suite.T(), err, "failed to get accessions for a dataset") + assert.Equal(suite.T(), []string{"accession_User-Q_00", "accession_User-Q_01", "accession_User-Q_02"}, accessions) +}