From a5c70ba214dbf1646e528d3c174bb660f4bcc4bf Mon Sep 17 00:00:00 2001 From: consolethinks Date: Fri, 11 Oct 2024 16:29:31 +0200 Subject: [PATCH] cleanup and update datasetRetriever for v4 --- cmd/commands/datasetRetriever.go | 25 ++++++++++++--- datasetUtils/getAvailableDatasets.go | 11 ++++--- datasetUtils/getDatasetDetails.go | 39 +++++++++--------------- datasetUtils/getDatasetDetails_test.go | 42 +++++++++++++------------- 4 files changed, 61 insertions(+), 56 deletions(-) diff --git a/cmd/commands/datasetRetriever.go b/cmd/commands/datasetRetriever.go index 0dd1769..4b5dca1 100644 --- a/cmd/commands/datasetRetriever.go +++ b/cmd/commands/datasetRetriever.go @@ -34,13 +34,10 @@ For further help see "` + MANUAL + `"`, Args: exactArgsWithVersionException(1), Run: func(cmd *cobra.Command, args []string) { //consts & vars - const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" - const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" - const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" - const PROD_RSYNC_RETRIEVE_SERVER string = "pb-retrieve.psi.ch" const TEST_RSYNC_RETRIEVE_SERVER string = "pbt-retrieve.psi.ch" const DEV_RSYNC_RETRIEVE_SERVER string = "arematest2in.psi.ch" + const LOCAL_RSYNC_RETRIEVE_SERVER string = "localhost" // const PROD_RSYNC_RETRIEVE_SERVER string = "ebarema4in.psi.ch" // const TEST_RSYNC_RETRIEVE_SERVER string = "ebaremat1in.psi.ch" @@ -116,6 +113,7 @@ For further help see "` + MANUAL + `"`, ownerGroup, _ := cmd.Flags().GetString("ownergroup") testenvFlag, _ := cmd.Flags().GetBool("testenv") devenvFlag, _ := cmd.Flags().GetBool("devenv") + localenvFlag, _ := cmd.Flags().GetBool("localenv") showVersion, _ := cmd.Flags().GetBool("version") if datasetUtils.TestFlags != nil { @@ -141,6 +139,10 @@ For further help see "` + MANUAL + `"`, datasetUtils.CheckForNewVersion(client, APP, VERSION) + if localenvFlag { + APIServer = LOCAL_API_SERVER + RSYNCServer = LOCAL_RSYNC_RETRIEVE_SERVER + } if devenvFlag { APIServer = DEV_API_SERVER RSYNCServer = DEV_RSYNC_RETRIEVE_SERVER @@ -186,10 +188,22 @@ For further help see "` + MANUAL + `"`, } // get sourceFolder and other dataset related info for all Datasets - datasetDetails, err := datasetUtils.GetDatasetDetails(client, APIServer, user["accessToken"], datasetList, ownerGroup) + datasetDetails, missingDatasetIds, err := datasetUtils.GetDatasetDetails(client, APIServer, user["accessToken"], datasetList, ownerGroup) if err != nil { log.Fatal(err) } + fmt.Printf("\nFound datasets:\n") + fmt.Println("Dataset ID Size[MB] Owner SourceFolder") + fmt.Println("====================================================================================================") + for _, datasetDetail := range datasetDetails { + log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder) + } + if len(missingDatasetIds) > 0 { + fmt.Printf("\nThe following dataset id's were missing or had non-matching ownerGroups so they won't be copied: \n") + for _, id := range missingDatasetIds { + fmt.Printf(" - \"%s\"\n", id) + } + } // assemble rsync commands to be submitted batchCommands, destinationFolders := assembleRsyncCommands(user["username"], datasetDetails, destinationPath) @@ -218,6 +232,7 @@ func init() { datasetRetrieverCmd.Flags().String("ownergroup", "", "Defines to fetch only datasets of the specified ownerGroup (default is to fetch all available datasets)") datasetRetrieverCmd.Flags().Bool("testenv", false, "Use test environment (qa) (default is to use production system)") datasetRetrieverCmd.Flags().Bool("devenv", false, "Use development environment (default is to use production system)") + datasetRetrieverCmd.Flags().Bool("localenv", false, "Use local environment instead of production environment (developers only)") datasetRetrieverCmd.MarkFlagsMutuallyExclusive("testenv", "devenv") } diff --git a/datasetUtils/getAvailableDatasets.go b/datasetUtils/getAvailableDatasets.go index b5a1c11..4ddb441 100644 --- a/datasetUtils/getAvailableDatasets.go +++ b/datasetUtils/getAvailableDatasets.go @@ -3,9 +3,10 @@ package datasetUtils import ( "fmt" "os/exec" + "regexp" "strings" + version "github.com/mcuadros/go-version" - "regexp" ) const DatasetIdPrefix = "20.500.11935" @@ -50,13 +51,13 @@ func fetchDatasetsFromServer(username string, RSYNCServer string) ([]string, err if err != nil { return nil, fmt.Errorf("error getting rsync version: %w", err) } - + cmd := buildRsyncCommand(username, RSYNCServer, versionNumber) out, err := cmd.Output() if err != nil { return nil, err } - + return parseRsyncOutput(out), nil } @@ -87,14 +88,14 @@ var getRsyncVersion = func() (string, error) { return "", err } version := string(output) - + // Use a regular expression to find the version number re := regexp.MustCompile(`\d+\.\d+\.\d+`) versionNumber := re.FindString(version) if versionNumber == "" { return "", fmt.Errorf("could not find version number in rsync version string: %s", version) } - + return versionNumber, nil } diff --git a/datasetUtils/getDatasetDetails.go b/datasetUtils/getDatasetDetails.go index 2d5487c..daa27bb 100644 --- a/datasetUtils/getDatasetDetails.go +++ b/datasetUtils/getDatasetDetails.go @@ -4,12 +4,10 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" + "slices" "strings" - - "github.com/fatih/color" ) type Dataset struct { @@ -35,10 +33,9 @@ The function sends HTTP GET requests to the API server in chunks of 100 datasets Returns: - A slice of Dataset structs containing the details of the datasets that match the owner group filter. */ -func GetDatasetDetails(client *http.Client, APIServer string, accessToken string, datasetList []string, ownerGroup string) ([]Dataset, error) { - outputDatasetDetails := make([]Dataset, 0) - log.Println("Dataset ID Size[MB] Owner SourceFolder") - log.Println("====================================================================================================") +func GetDatasetDetails(client *http.Client, APIServer string, accessToken string, datasetList []string, ownerGroup string) ([]Dataset, []string, error) { + var returnedDatasets []Dataset + var missingDatasetIds []string // split large request into chunks chunkSize := 100 @@ -58,31 +55,23 @@ func GetDatasetDetails(client *http.Client, APIServer string, accessToken string datasetDetails, err := fetchDatasetDetails(client, accessToken, myurl) if err != nil { - return nil, err + return nil, nil, err } for _, datasetId := range datasetList[i:end] { - detailsFound := false - for _, datasetDetail := range datasetDetails { - if datasetDetail.Pid == datasetId { - detailsFound = true - if ownerGroup == "" || ownerGroup == datasetDetail.OwnerGroup { - outputDatasetDetails = append(outputDatasetDetails, datasetDetail) - color.Set(color.FgGreen) - } - log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder) - color.Unset() - break - } + datasetHasIdAndOwnerGroup := func(dataset Dataset) bool { + return dataset.Pid == datasetId && (ownerGroup == "" || dataset.OwnerGroup == ownerGroup) } - if !detailsFound { - color.Set(color.FgRed) - log.Printf("Dataset %s no infos found in catalog - will not be copied !\n", datasetId) - color.Unset() + + i := slices.IndexFunc(datasetDetails, datasetHasIdAndOwnerGroup) // linear search! + if i >= 0 { + returnedDatasets = append(returnedDatasets, datasetDetails[i]) // found id + } else { + missingDatasetIds = append(missingDatasetIds, datasetId) // id missing } } } - return outputDatasetDetails, nil + return returnedDatasets, missingDatasetIds, nil } func fetchDatasetDetails(client *http.Client, token string, url string) ([]Dataset, error) { diff --git a/datasetUtils/getDatasetDetails_test.go b/datasetUtils/getDatasetDetails_test.go index 5c937a9..9c57945 100644 --- a/datasetUtils/getDatasetDetails_test.go +++ b/datasetUtils/getDatasetDetails_test.go @@ -14,19 +14,19 @@ func TestGetDatasetDetails_EmptyList(t *testing.T) { })) // Close the server when test finishes defer server.Close() - + // Use the mock server's URL as the API APIServer := server.URL accessToken := "testToken" datasetList := []string{} ownerGroup := "group1" - + // Create a new HTTP client client := &http.Client{} - + // Call the function to be tested - datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) - + datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) + // Check the result if len(datasets) != 0 { t.Errorf("Expected 0 datasets, got %d", len(datasets)) @@ -41,19 +41,19 @@ func TestGetDatasetDetails_Non200StatusCode(t *testing.T) { })) // Close the server when test finishes defer server.Close() - + // Use the mock server's URL as the API APIServer := server.URL accessToken := "testToken" datasetList := []string{"123"} ownerGroup := "group1" - + // Create a new HTTP client client := &http.Client{} - + // Call the function to be tested - datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) - + datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) + // Check the result if len(datasets) != 0 { t.Errorf("Expected 0 datasets, got %d", len(datasets)) @@ -68,19 +68,19 @@ func TestGetDatasetDetails_DatasetNotFound(t *testing.T) { })) // Close the server when test finishes defer server.Close() - + // Use the mock server's URL as the API APIServer := server.URL accessToken := "testToken" datasetList := []string{"123"} ownerGroup := "group1" - + // Create a new HTTP client client := &http.Client{} - + // Call the function to be tested - datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) - + datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) + // Check the result if len(datasets) != 0 { t.Errorf("Expected 0 datasets, got %d", len(datasets)) @@ -95,19 +95,19 @@ func TestGetDatasetDetails_DatasetFound(t *testing.T) { })) // Close the server when test finishes defer server.Close() - + // Use the mock server's URL as the API APIServer := server.URL accessToken := "testToken" datasetList := []string{"123"} ownerGroup := "group1" - + // Create a new HTTP client client := &http.Client{} - + // Call the function to be tested - datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) - + datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup) + // Check the result if len(datasets) != 1 { t.Errorf("Expected 1 dataset, got %d", len(datasets)) @@ -117,4 +117,4 @@ func TestGetDatasetDetails_DatasetFound(t *testing.T) { t.Errorf("Dataset details do not match expected values") } } -} \ No newline at end of file +}