Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] add endpoint to re-verify an entire dataset #1121

Open
wants to merge 4 commits into
base: feature/api-trigger-reverification
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/integration/sda/rbac.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"role": "admin",
"path": "/file/verify/:accession",
"action": "PUT"
},
{
"role": "admin",
"path": "/dataset/verify/:dataset",
"action": "PUT"
},
{
"role": "submission",
Expand Down
14 changes: 14 additions & 0 deletions .github/integration/tests/sda/60_api_admin_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,18 @@ resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer
if [ "$resp" != "404" ]; then
echo "Error when starting re-verification, expected 404 got: $resp"
exit 1
fi

## trigger re-verification of dataset SYNC-001-12345
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -X PUT "http://api:8080/dataset/verify/SYNC-001-12345")"
if [ "$resp" != "200" ]; then
echo "Error when starting re-verification of dataset, expected 200 got: $resp"
exit 1
fi

## expect failure of missing dataset
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -X PUT "http://api:8080/dataset/verify/SYNC-999-12345")"
if [ "$resp" != "404" ]; then
echo "Error when starting re-verification of missing dataset, expected 404 got: $resp"
exit 1
fi
47 changes: 40 additions & 7 deletions sda/cmd/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,10 @@ func setup(config *config.Config) *http.Server {
// submission endpoints below here
r.POST("/file/ingest", rbac(e), ingestFile) // start ingestion of a file
r.POST("/file/accession", rbac(e), setAccession) // assign accession ID to a file
r.PUT("/file/verify/:accession", rbac(e), reVerify) // trigger reverification of a file
r.PUT("/file/verify/:accession", rbac(e), reVerifyFile) // trigger reverification of a file
r.POST("/dataset/create", rbac(e), createDataset) // maps a set of files to a dataset
r.POST("/dataset/release/*dataset", rbac(e), releaseDataset) // Releases a dataset to be accessible
r.PUT("/dataset/verify/*dataset", rbac(e), reVerifyDataset) // Re-verify all files in the dataset
r.GET("/datasets/list", rbac(e), listAllDatasets) // Lists all datasets with their status
r.GET("/datasets/list/:username", rbac(e), listUserDatasets) // Lists datasets with their status for a specififc user
r.GET("/users", rbac(e), listActiveUsers) // Lists all users
Expand Down Expand Up @@ -644,9 +645,7 @@ func listDatasets(c *gin.Context) {
c.JSON(http.StatusOK, datasets)
}

func reVerify(c *gin.Context) {
accessionID := strings.TrimPrefix(c.Param("accession"), "/")

func reVerify(c *gin.Context, accessionID string) (*gin.Context, error) {
reVerify, err := Conf.API.DB.GetReVerificationData(accessionID)
if err != nil {
if strings.Contains(err.Error(), "sql: no rows in result set") {
Expand All @@ -656,30 +655,64 @@ func reVerify(c *gin.Context) {
c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error())
}

return
return c, err
}
corrID, err := Conf.API.DB.GetCorrID(reVerify.User, reVerify.FilePath)
if err != nil {
log.Errorf("failed to get CorrID for %s, %s", reVerify.User, reVerify.FilePath)
c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error())

return
return c, err
}

marshaledMsg, _ := json.Marshal(&reVerify)
if err := schema.ValidateJSON(fmt.Sprintf("%s/ingestion-verification.json", Conf.Broker.SchemasPath), marshaledMsg); err != nil {
log.Errorln(err.Error())
c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error())

return
return c, err
}

err = Conf.API.MQ.SendMessage(corrID, Conf.Broker.Exchange, "archived", marshaledMsg)
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error())

return c, err
}

return c, nil
}

func reVerifyFile(c *gin.Context) {
accessionID := strings.TrimPrefix(c.Param("accession"), "/")
c, err = reVerify(c, accessionID)
if err != nil {
return
}

c.Status(http.StatusOK)
}

func reVerifyDataset(c *gin.Context) {
dataset := strings.TrimPrefix(c.Param("dataset"), "/")
accessions, err := Conf.API.DB.GetDatasetFiles(dataset)
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error())

return
}
if accessions == nil {
c.AbortWithStatusJSON(http.StatusNotFound, "dataset not found")

return
}

for _, accession := range accessions {
c, err = reVerify(c, accession)
if err != nil {
return
}
}

c.Status(http.StatusOK)
}
16 changes: 16 additions & 0 deletions sda/cmd/api/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,22 @@ Admin endpoints are only available to a set of whitelisted users specified in th
curl -H "Authorization: Bearer $token" -X POST https://HOSTNAME/dataset/release/my-dataset-01
```

- `/dataset/verify/*dataset`
- accepts `PUT` requests with the dataset name as last part of the path`
- triggers reverification of all files in the dataset.

- Error codes
- `200` Query execute ok.
- `404` Error wrong dataset name.
- `401` Token user is not in the list of admins.
- `500` Internal error due to DB or MQ failures.

Example:

```bash
curl -H "Authorization: Bearer $token" -X PUT https://HOSTNAME/dataset/verify/my-dataset-01
```

- `/datasets/list`
- accepts `GET` requests
- Returns all datasets together with their status and last modified timestamp.
Expand Down
114 changes: 110 additions & 4 deletions sda/cmd/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1936,7 +1936,7 @@ func (suite *TestSuite) TestListDatasetsAsUser() {
assert.Equal(suite.T(), "API:dataset-01|registered", fmt.Sprintf("%s|%s", datasets[0].DatasetID, datasets[0].Status))
}

func (suite *TestSuite) TestReVerify() {
func (suite *TestSuite) TestReVerifyFile() {
user := "TestReVerify"
for i := 0; i < 3; i++ {
filePath := fmt.Sprintf("/%v/TestReVerify-00%d.c4gh", user, i)
Expand Down Expand Up @@ -1994,7 +1994,7 @@ func (suite *TestSuite) TestReVerify() {
r.Header.Add("Authorization", "Bearer "+suite.Token)

_, router := gin.CreateTestContext(w)
router.PUT("/file/verify/:accession", reVerify)
router.PUT("/file/verify/:accession", reVerifyFile)

router.ServeHTTP(w, r)
okResponse := w.Result()
Expand All @@ -2019,7 +2019,7 @@ func (suite *TestSuite) TestReVerify() {
assert.Equal(suite.T(), 1, data.MessagesReady)
}

func (suite *TestSuite) TestReVerify_wrongAccession() {
func (suite *TestSuite) TestReVerifyFile_wrongAccession() {
gin.SetMode(gin.ReleaseMode)
assert.NoError(suite.T(), setupJwtAuth())
Conf.Broker.SchemasPath = "../../schemas/isolated"
Expand All @@ -2030,7 +2030,113 @@ func (suite *TestSuite) TestReVerify_wrongAccession() {
r.Header.Add("Authorization", "Bearer "+suite.Token)

_, router := gin.CreateTestContext(w)
router.POST("/file/verify/:accession", reVerify)
router.POST("/file/verify/:accession", reVerifyFile)

router.ServeHTTP(w, r)
okResponse := w.Result()
defer okResponse.Body.Close()
assert.Equal(suite.T(), http.StatusNotFound, okResponse.StatusCode)
}

func (suite *TestSuite) TestReVerifyDataset() {
user := "TestReVerifyDataset"
for i := 0; i < 3; i++ {
filePath := fmt.Sprintf("/%v/TestReVerifyDataset-00%d.c4gh", user, i)
fileID, err := Conf.API.DB.RegisterFile(filePath, user)
if err != nil {
suite.FailNow("failed to register file in database")
}

if err := Conf.API.DB.UpdateFileEventLog(fileID, "uploaded", fileID, user, "{}", "{}"); err != nil {
suite.FailNow("failed to update satus of file in database")
}
encSha := sha256.New()
_, err = encSha.Write([]byte("Checksum"))
if err != nil {
suite.FailNow("failed to calculate Checksum")
}

decSha := sha256.New()
_, err = decSha.Write([]byte("DecryptedChecksum"))
if err != nil {
suite.FailNow("failed to calculate DecryptedChecksum")
}

fileInfo := database.FileInfo{
Checksum: fmt.Sprintf("%x", encSha.Sum(nil)),
Size: 1000,
Path: filePath,
DecryptedChecksum: fmt.Sprintf("%x", decSha.Sum(nil)),
DecryptedSize: 948,
}
if err := Conf.API.DB.SetArchived(fileInfo, fileID, fileID); err != nil {
suite.FailNow("failed to mark file as Archived")
}

if err := Conf.API.DB.SetVerified(fileInfo, fileID, fileID); err != nil {
suite.FailNow("failed to mark file as Verified")
}

stableID := fmt.Sprintf("%s_0%d", user, i)
if err := Conf.API.DB.SetAccessionID(stableID, fileID); err != nil {
suite.FailNowf("got (%s) when setting stable ID: %s, %s", err.Error(), stableID, fileID)
}
if err := Conf.API.DB.UpdateFileEventLog(fileID, "ready", fileID, "finalize", "{}", "{}"); err != nil {
suite.FailNowf("got (%s) when updating file status: %s", err.Error(), filePath)
}
}

if err := Conf.API.DB.MapFilesToDataset("test-dataset-01", []string{"TestReVerifyDataset_00", "TestReVerifyDataset_01", "TestReVerifyDataset_02"}); err != nil {
suite.FailNow("failed to map files to dataset")
}

gin.SetMode(gin.ReleaseMode)
assert.NoError(suite.T(), setupJwtAuth())
Conf.Broker.SchemasPath = "../../schemas/isolated"

// Mock request and response holders
w := httptest.NewRecorder()
r := httptest.NewRequest(http.MethodPut, "/dataset/verify/test-dataset-01", http.NoBody)
r.Header.Add("Authorization", "Bearer "+suite.Token)

_, router := gin.CreateTestContext(w)
router.PUT("/dataset/verify/*dataset", reVerifyDataset)

router.ServeHTTP(w, r)
okResponse := w.Result()
defer okResponse.Body.Close()
assert.Equal(suite.T(), http.StatusOK, okResponse.StatusCode)

// verify that the messages shows up in the queue
time.Sleep(10 * time.Second) // this is needed to ensure we don't get any false negatives
req, _ := http.NewRequest(http.MethodGet, "http://"+BrokerAPI+"/api/queues/sda/archived", http.NoBody)
req.SetBasicAuth("guest", "guest")
client := http.Client{Timeout: 30 * time.Second}
res, err := client.Do(req)
assert.NoError(suite.T(), err, "failed to query broker")
var data struct {
MessagesReady int `json:"messages_ready"`
}
body, err := io.ReadAll(res.Body)
res.Body.Close()
assert.NoError(suite.T(), err, "failed to read response from broker")
err = json.Unmarshal(body, &data)
assert.NoError(suite.T(), err, "failed to unmarshal response")
assert.Equal(suite.T(), 3, data.MessagesReady)
}

func (suite *TestSuite) TestReVerifyDataset_wrongDatasetName() {
gin.SetMode(gin.ReleaseMode)
assert.NoError(suite.T(), setupJwtAuth())
Conf.Broker.SchemasPath = "../../schemas/isolated"

// Mock request and response holders
w := httptest.NewRecorder()
r := httptest.NewRequest(http.MethodPut, "/dataset/verify/wrong_dataset", http.NoBody)
r.Header.Add("Authorization", "Bearer "+suite.Token)

_, router := gin.CreateTestContext(w)
router.PUT("/dataset/verify/*dataset", reVerifyDataset)

router.ServeHTTP(w, r)
okResponse := w.Result()
Expand Down
27 changes: 27 additions & 0 deletions sda/internal/database/db_functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -964,3 +964,30 @@ func (dbs *SDAdb) GetDecryptedChecksum(id string) (string, error) {

return unencryptedChecksum, nil
}

func (dbs *SDAdb) GetDatasetFiles(dataset string) ([]string, error) {
dbs.checkAndReconnectIfNeeded()
db := dbs.DB

var accessions []string
rows, err := db.Query("SELECT stable_id FROM sda.files WHERE id IN (SELECT file_id FROM sda.file_dataset WHERE dataset_id = (SELECT id FROM sda.datasets WHERE stable_id = $1));", dataset)
if err != nil {
return nil, err
}
if rows.Err() != nil {
return nil, rows.Err()
}
defer rows.Close()

for rows.Next() {
var accession string
err := rows.Scan(&accession)
if err != nil {
return nil, err
}

accessions = append(accessions, accession)
}

return accessions, nil
}
60 changes: 58 additions & 2 deletions sda/internal/database/db_functions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -859,8 +859,8 @@ func (suite *DatabaseTests) TestListUserDatasets() {
if err != nil {
suite.FailNowf("got (%s) when setting stable ID: %s, %s", err.Error(), stableID, fileID)
}
}

}
if err := db.MapFilesToDataset("test-user-dataset-01", []string{"accession_User-Q_00", "accession_User-Q_01", "accession_User-Q_02"}); err != nil {
suite.FailNow("failed to map files to dataset")
}
Expand All @@ -887,7 +887,6 @@ func (suite *DatabaseTests) TestListUserDatasets() {
if err != nil {
suite.FailNowf("got (%s) when setting stable ID: %s, %s", err.Error(), "stableID", fileID)
}

if err := db.MapFilesToDataset("test-wrong-user-dataset", []string{"stableID"}); err != nil {
suite.FailNow("failed to map files to dataset")
}
Expand Down Expand Up @@ -1019,3 +1018,60 @@ func (suite *DatabaseTests) TestGetDecryptedChecksum() {
assert.NoError(suite.T(), err, "failed to get verification data")
assert.Equal(suite.T(), fmt.Sprintf("%x", decSha.Sum(nil)), checksum)
}

func (suite *DatabaseTests) TestGetDsatasetFiles() {
db, err := NewSDAdb(suite.dbConf)
assert.NoError(suite.T(), err, "got (%v) when creating new connection", err)
testCases := 3

for i := 0; i < testCases; i++ {
filePath := fmt.Sprintf("/%v/TestGetDsatasetFiles-00%d.c4gh", "User-Q", i)
fileID, err := db.RegisterFile(filePath, "User-Q")
if err != nil {
suite.FailNow("Failed to register file")
}
err = db.UpdateFileEventLog(fileID, "uploaded", fileID, "User-Q", "{}", "{}")
if err != nil {
suite.FailNow("Failed to update file event log")
}

corrID, err := db.GetCorrID("User-Q", filePath)
if err != nil {
suite.FailNow("Failed to get CorrID for file")
}
assert.Equal(suite.T(), fileID, corrID)

checksum := fmt.Sprintf("%x", sha256.New().Sum(nil))
fileInfo := FileInfo{
fmt.Sprintf("%x", sha256.New().Sum(nil)),
1234,
filePath,
checksum,
999,
}
err = db.SetArchived(fileInfo, fileID, corrID)
if err != nil {
suite.FailNow("failed to mark file as Archived")
}

err = db.SetVerified(fileInfo, fileID, corrID)
if err != nil {
suite.FailNow("failed to mark file as Verified")
}

stableID := fmt.Sprintf("accession_%s_0%d", "User-Q", i)
err = db.SetAccessionID(stableID, fileID)
if err != nil {
suite.FailNowf("got (%s) when setting stable ID: %s, %s", err.Error(), stableID, fileID)
}
}

dID := "test-get-dataset-files-01"
if err := db.MapFilesToDataset(dID, []string{"accession_User-Q_00", "accession_User-Q_01", "accession_User-Q_02"}); err != nil {
suite.FailNow("failed to map files to dataset")
}

accessions, err := db.GetDatasetFiles(dID)
assert.NoError(suite.T(), err, "failed to get accessions for a dataset")
assert.Equal(suite.T(), []string{"accession_User-Q_00", "accession_User-Q_01", "accession_User-Q_02"}, accessions)
}
Loading