Skip to content

Commit

Permalink
datasetIngestor: adjust to library changes, add filename checker call…
Browse files Browse the repository at this point in the history
…back function
  • Loading branch information
consolethinks committed Aug 23, 2024
1 parent 4eddb05 commit bb280c8
Showing 1 changed file with 49 additions and 18 deletions.
67 changes: 49 additions & 18 deletions cmd/commands/datasetIngestor.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,23 +241,24 @@ For Windows you need instead to specify -user username:password on the command l
copyFlag = false
}
checkCentralAvailability := !(cmd.Flags().Changed("copy") || cmd.Flags().Changed("nocopy") || beamlineAccount || copyFlag)
skip := ""
skipSymlinks := ""

// check if skip flag is globally defined via flags:
if cmd.Flags().Changed("linkfiles") {
switch linkfiles {
case "delete":
skip = "sA"
skipSymlinks = "sA"
case "keep":
skip = "kA"
skipSymlinks = "kA"
default:
skip = "dA" // default behaviour = keep internal for all
skipSymlinks = "dA" // default behaviour = keep internal for all
}
}

var skippedLinks uint = 0
var illegalFileNames uint = 0
localSymlinkCallback := createLocalSymlinkCallbackForFileLister(&skip, &skippedLinks)
localSymlinkCallback := createLocalSymlinkCallbackForFileLister(&skipSymlinks, &skippedLinks)
localFilepathFilterCallback := createLocalFilenameFilterCallback(&illegalFileNames)

// now everything is prepared, prepare to loop over all folders
var archivableDatasetList []string
Expand All @@ -272,13 +273,13 @@ For Windows you need instead to specify -user username:password on the command l
log.Printf("Scanning files in dataset %s", datasetSourceFolder)

// reset skip var. if not set for all datasets
if !(skip == "sA" || skip == "kA" || skip == "dA") {
skip = ""
if !(skipSymlinks == "sA" || skipSymlinks == "kA" || skipSymlinks == "dA") {
skipSymlinks = ""
}

// get filelist of dataset
fullFileArray, startTime, endTime, owner, numFiles, totalSize, err :=
datasetIngestor.GetLocalFileList(datasetSourceFolder, datasetFileListTxt, &illegalFileNames, localSymlinkCallback)
datasetIngestor.GetLocalFileList(datasetSourceFolder, datasetFileListTxt, localSymlinkCallback, localFilepathFilterCallback)
if err != nil {
log.Fatalf("Can't gather the filelist of \"%s\"", datasetSourceFolder)
}
Expand Down Expand Up @@ -316,6 +317,7 @@ For Windows you need instead to specify -user username:password on the command l
// check if data is accesible at archive server, unless beamline account (assumed to be centrally available always)
// and unless (no)copy flag defined via command line
if checkCentralAvailability {
log.Println("Checking if data is centrally available...")
sshErr, otherErr := datasetIngestor.CheckDataCentrallyAvailableSsh(user["username"], RSYNCServer, datasetSourceFolder, os.Stdout)
if otherErr != nil {
log.Fatalln("Cannot check if data is centrally available:", otherErr)
Expand All @@ -341,6 +343,8 @@ For Windows you need instead to specify -user username:password on the command l
log.Fatalln("Further ingests interrupted because copying is needed, but no copy wanted.")
}
}
} else {
log.Println("Data is present centrally.")
}
}

Expand Down Expand Up @@ -374,7 +378,7 @@ For Windows you need instead to specify -user username:password on the command l
if err != nil {
log.Println("Couldn't add attachment:", err)
}
log.Printf("Attachment file %v added to dataset %v\n", addAttachment, datasetId)
log.Printf("Attachment file %v added to dataset %v\n", addAttachment, datasetId)
}
if copyFlag {
// TODO rewrite SyncDataToFileserver
Expand Down Expand Up @@ -479,7 +483,7 @@ func init() {
datasetIngestorCmd.MarkFlagsMutuallyExclusive("nocopy", "copy")
}

func createLocalSymlinkCallbackForFileLister(skip *string, skippedLinks *uint) func(symlinkPath string, sourceFolder string) (bool, error) {
func createLocalSymlinkCallbackForFileLister(skipSymlinks *string, skippedLinks *uint) func(symlinkPath string, sourceFolder string) (bool, error) {
scanner := bufio.NewScanner(os.Stdin)
return func(symlinkPath string, sourceFolder string) (bool, error) {
keep := true
Expand All @@ -499,11 +503,11 @@ func createLocalSymlinkCallbackForFileLister(skip *string, skippedLinks *uint) f
}
}
//fmt.Printf("Skip variable:%v\n", *skip)
if *skip == "ka" || *skip == "kA" {
if *skipSymlinks == "ka" || *skipSymlinks == "kA" {
keep = true
} else if *skip == "sa" || *skip == "sA" {
} else if *skipSymlinks == "sa" || *skipSymlinks == "sA" {
keep = false
} else if *skip == "da" || *skip == "dA" {
} else if *skipSymlinks == "da" || *skipSymlinks == "dA" {
keep = strings.HasPrefix(pointee, sourceFolder)
} else {
color.Set(color.FgYellow)
Expand All @@ -519,14 +523,14 @@ func createLocalSymlinkCallbackForFileLister(skip *string, skippedLinks *uint) f
in this command then specify a capital 'A', e.g. (dA,kA,sA)
Do you want to keep the link in dataset or skip it (D(efault)/k(eep)/s(kip) ?`, sourceFolder)
scanner.Scan()
*skip = scanner.Text()
if *skip == "" {
*skip = "d"
*skipSymlinks = scanner.Text()
if *skipSymlinks == "" {
*skipSymlinks = "d"
}
if *skip == "d" || *skip == "dA" {
if *skipSymlinks == "d" || *skipSymlinks == "dA" {
keep = strings.HasPrefix(pointee, sourceFolder)
} else {
keep = (*skip != "s" && *skip != "sa" && *skip != "sA")
keep = (*skipSymlinks != "s" && *skipSymlinks != "sa" && *skipSymlinks != "sA")
}
}
if keep {
Expand All @@ -541,3 +545,30 @@ func createLocalSymlinkCallbackForFileLister(skip *string, skippedLinks *uint) f
return keep, nil
}
}

func createLocalFilenameFilterCallback(illegalFileNamesCounter *uint) func(filepath string) bool {
return func(filepath string) (keep bool) {
keep = true
// make sure that filenames do not contain characters like "\" or "*"
if strings.ContainsAny(filepath, "*\\") {
color.Set(color.FgRed)
log.Printf("Warning: the file %s contains illegal characters like *,\\ and will not be archived.", filepath)
color.Unset()
if illegalFileNamesCounter != nil {
*illegalFileNamesCounter++
}
keep = false
}
// and check for triple blanks, they are used to separate columns in messages
if keep && strings.Contains(filepath, " ") {
color.Set(color.FgRed)
log.Printf("Warning: the file %s contains 3 consecutive blanks which is not allowed. The file not be archived.", filepath)
color.Unset()
if illegalFileNamesCounter != nil {
*illegalFileNamesCounter++
}
keep = false
}
return keep
}
}

0 comments on commit bb280c8

Please sign in to comment.