Skip to content

Commit

Permalink
cleanup and update datasetRetriever for v4
Browse files Browse the repository at this point in the history
  • Loading branch information
consolethinks committed Oct 11, 2024
1 parent f2c0564 commit a5c70ba
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 56 deletions.
25 changes: 20 additions & 5 deletions cmd/commands/datasetRetriever.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,10 @@ For further help see "` + MANUAL + `"`,
Args: exactArgsWithVersionException(1),
Run: func(cmd *cobra.Command, args []string) {
//consts & vars
const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3"
const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3"
const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3"

const PROD_RSYNC_RETRIEVE_SERVER string = "pb-retrieve.psi.ch"
const TEST_RSYNC_RETRIEVE_SERVER string = "pbt-retrieve.psi.ch"
const DEV_RSYNC_RETRIEVE_SERVER string = "arematest2in.psi.ch"
const LOCAL_RSYNC_RETRIEVE_SERVER string = "localhost"

// const PROD_RSYNC_RETRIEVE_SERVER string = "ebarema4in.psi.ch"
// const TEST_RSYNC_RETRIEVE_SERVER string = "ebaremat1in.psi.ch"
Expand Down Expand Up @@ -116,6 +113,7 @@ For further help see "` + MANUAL + `"`,
ownerGroup, _ := cmd.Flags().GetString("ownergroup")
testenvFlag, _ := cmd.Flags().GetBool("testenv")
devenvFlag, _ := cmd.Flags().GetBool("devenv")
localenvFlag, _ := cmd.Flags().GetBool("localenv")
showVersion, _ := cmd.Flags().GetBool("version")

if datasetUtils.TestFlags != nil {
Expand All @@ -141,6 +139,10 @@ For further help see "` + MANUAL + `"`,

datasetUtils.CheckForNewVersion(client, APP, VERSION)

if localenvFlag {
APIServer = LOCAL_API_SERVER
RSYNCServer = LOCAL_RSYNC_RETRIEVE_SERVER
}
if devenvFlag {
APIServer = DEV_API_SERVER
RSYNCServer = DEV_RSYNC_RETRIEVE_SERVER
Expand Down Expand Up @@ -186,10 +188,22 @@ For further help see "` + MANUAL + `"`,
}

// get sourceFolder and other dataset related info for all Datasets
datasetDetails, err := datasetUtils.GetDatasetDetails(client, APIServer, user["accessToken"], datasetList, ownerGroup)
datasetDetails, missingDatasetIds, err := datasetUtils.GetDatasetDetails(client, APIServer, user["accessToken"], datasetList, ownerGroup)
if err != nil {
log.Fatal(err)
}
fmt.Printf("\nFound datasets:\n")
fmt.Println("Dataset ID Size[MB] Owner SourceFolder")
fmt.Println("====================================================================================================")
for _, datasetDetail := range datasetDetails {
log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder)
}
if len(missingDatasetIds) > 0 {
fmt.Printf("\nThe following dataset id's were missing or had non-matching ownerGroups so they won't be copied: \n")
for _, id := range missingDatasetIds {
fmt.Printf(" - \"%s\"\n", id)
}
}

// assemble rsync commands to be submitted
batchCommands, destinationFolders := assembleRsyncCommands(user["username"], datasetDetails, destinationPath)
Expand Down Expand Up @@ -218,6 +232,7 @@ func init() {
datasetRetrieverCmd.Flags().String("ownergroup", "", "Defines to fetch only datasets of the specified ownerGroup (default is to fetch all available datasets)")
datasetRetrieverCmd.Flags().Bool("testenv", false, "Use test environment (qa) (default is to use production system)")
datasetRetrieverCmd.Flags().Bool("devenv", false, "Use development environment (default is to use production system)")
datasetRetrieverCmd.Flags().Bool("localenv", false, "Use local environment instead of production environment (developers only)")

datasetRetrieverCmd.MarkFlagsMutuallyExclusive("testenv", "devenv")
}
11 changes: 6 additions & 5 deletions datasetUtils/getAvailableDatasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ package datasetUtils
import (
"fmt"
"os/exec"
"regexp"
"strings"

version "github.com/mcuadros/go-version"
"regexp"
)

const DatasetIdPrefix = "20.500.11935"
Expand Down Expand Up @@ -50,13 +51,13 @@ func fetchDatasetsFromServer(username string, RSYNCServer string) ([]string, err
if err != nil {
return nil, fmt.Errorf("error getting rsync version: %w", err)
}

cmd := buildRsyncCommand(username, RSYNCServer, versionNumber)
out, err := cmd.Output()
if err != nil {
return nil, err
}

return parseRsyncOutput(out), nil
}

Expand Down Expand Up @@ -87,14 +88,14 @@ var getRsyncVersion = func() (string, error) {
return "", err
}
version := string(output)

// Use a regular expression to find the version number
re := regexp.MustCompile(`\d+\.\d+\.\d+`)
versionNumber := re.FindString(version)
if versionNumber == "" {
return "", fmt.Errorf("could not find version number in rsync version string: %s", version)
}

return versionNumber, nil
}

Expand Down
39 changes: 14 additions & 25 deletions datasetUtils/getDatasetDetails.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"slices"
"strings"

"github.com/fatih/color"
)

type Dataset struct {
Expand All @@ -35,10 +33,9 @@ The function sends HTTP GET requests to the API server in chunks of 100 datasets
Returns:
- A slice of Dataset structs containing the details of the datasets that match the owner group filter.
*/
func GetDatasetDetails(client *http.Client, APIServer string, accessToken string, datasetList []string, ownerGroup string) ([]Dataset, error) {
outputDatasetDetails := make([]Dataset, 0)
log.Println("Dataset ID Size[MB] Owner SourceFolder")
log.Println("====================================================================================================")
func GetDatasetDetails(client *http.Client, APIServer string, accessToken string, datasetList []string, ownerGroup string) ([]Dataset, []string, error) {
var returnedDatasets []Dataset
var missingDatasetIds []string

// split large request into chunks
chunkSize := 100
Expand All @@ -58,31 +55,23 @@ func GetDatasetDetails(client *http.Client, APIServer string, accessToken string

datasetDetails, err := fetchDatasetDetails(client, accessToken, myurl)
if err != nil {
return nil, err
return nil, nil, err
}

for _, datasetId := range datasetList[i:end] {
detailsFound := false
for _, datasetDetail := range datasetDetails {
if datasetDetail.Pid == datasetId {
detailsFound = true
if ownerGroup == "" || ownerGroup == datasetDetail.OwnerGroup {
outputDatasetDetails = append(outputDatasetDetails, datasetDetail)
color.Set(color.FgGreen)
}
log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder)
color.Unset()
break
}
datasetHasIdAndOwnerGroup := func(dataset Dataset) bool {
return dataset.Pid == datasetId && (ownerGroup == "" || dataset.OwnerGroup == ownerGroup)
}
if !detailsFound {
color.Set(color.FgRed)
log.Printf("Dataset %s no infos found in catalog - will not be copied !\n", datasetId)
color.Unset()

i := slices.IndexFunc(datasetDetails, datasetHasIdAndOwnerGroup) // linear search!
if i >= 0 {
returnedDatasets = append(returnedDatasets, datasetDetails[i]) // found id
} else {
missingDatasetIds = append(missingDatasetIds, datasetId) // id missing
}
}
}
return outputDatasetDetails, nil
return returnedDatasets, missingDatasetIds, nil
}

func fetchDatasetDetails(client *http.Client, token string, url string) ([]Dataset, error) {
Expand Down
42 changes: 21 additions & 21 deletions datasetUtils/getDatasetDetails_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ func TestGetDatasetDetails_EmptyList(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 0 {
t.Errorf("Expected 0 datasets, got %d", len(datasets))
Expand All @@ -41,19 +41,19 @@ func TestGetDatasetDetails_Non200StatusCode(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{"123"}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 0 {
t.Errorf("Expected 0 datasets, got %d", len(datasets))
Expand All @@ -68,19 +68,19 @@ func TestGetDatasetDetails_DatasetNotFound(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{"123"}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 0 {
t.Errorf("Expected 0 datasets, got %d", len(datasets))
Expand All @@ -95,19 +95,19 @@ func TestGetDatasetDetails_DatasetFound(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{"123"}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 1 {
t.Errorf("Expected 1 dataset, got %d", len(datasets))
Expand All @@ -117,4 +117,4 @@ func TestGetDatasetDetails_DatasetFound(t *testing.T) {
t.Errorf("Dataset details do not match expected values")
}
}
}
}

0 comments on commit a5c70ba

Please sign in to comment.