Skip to content

Commit

Permalink
Merge branch 'main' into refactor/various_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
consolethinks authored Aug 26, 2024
2 parents 3e587d0 + 42f6368 commit 7b55151
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 109 deletions.
116 changes: 112 additions & 4 deletions cmd/commands/datasetIngestor.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ For Windows you need instead to specify -user username:password on the command l
// TODO ask archive system if sourcefolder is known to them. If yes no copy needed, otherwise
// a destination location is defined by the archive system
// for now let the user decide if he needs a copy

if nocopyFlag || beamlineAccount {
copyFlag = false
}
Expand All @@ -257,6 +256,12 @@ For Windows you need instead to specify -user username:password on the command l
}
}

var skippedLinks uint = 0
var illegalFileNames uint = 0
localSymlinkCallback := createLocalSymlinkCallbackForFileLister(&skipSymlinks, &skippedLinks)
localFilepathFilterCallback := createLocalFilenameFilterCallback(&illegalFileNames)

// now everything is prepared, prepare to loop over all folders
var archivableDatasetList []string
for _, datasetSourceFolder := range datasetPaths {
log.Printf("===== Ingesting: \"%s\" =====\n", datasetSourceFolder)
Expand All @@ -276,7 +281,7 @@ For Windows you need instead to specify -user username:password on the command l
// === get filelist of dataset ===
log.Printf("Getting filelist for \"%s\"...\n", datasetSourceFolder)
fullFileArray, startTime, endTime, owner, numFiles, totalSize, err :=
datasetIngestor.GetLocalFileList(datasetSourceFolder, datasetFileListTxt, &skipSymlinks)
datasetIngestor.GetLocalFileList(datasetSourceFolder, datasetFileListTxt, localSymlinkCallback, localFilepathFilterCallback)
if err != nil {
log.Fatalf("Can't gather the filelist of \"%s\"", datasetSourceFolder)
}
Expand Down Expand Up @@ -316,6 +321,7 @@ For Windows you need instead to specify -user username:password on the command l
// check if data is accesible at archive server, unless beamline account (assumed to be centrally available always)
// and unless (no)copy flag defined via command line
if checkCentralAvailability {
log.Println("Checking if data is centrally available...")
sshErr, otherErr := datasetIngestor.CheckDataCentrallyAvailableSsh(user["username"], RSYNCServer, datasetSourceFolder, os.Stdout)
if otherErr != nil {
log.Fatalln("Cannot check if data is centrally available:", otherErr)
Expand All @@ -341,6 +347,8 @@ For Windows you need instead to specify -user username:password on the command l
log.Fatalln("Further ingests interrupted because copying is needed, but no copy wanted.")
}
}
} else {
log.Println("Data is present centrally.")
}
}

Expand Down Expand Up @@ -375,7 +383,7 @@ For Windows you need instead to specify -user username:password on the command l
if err != nil {
log.Println("Couldn't add attachment:", err)
}
log.Printf("Attachment file %v added to dataset %v\n", addAttachment, datasetId)
log.Printf("Attachment file %v added to dataset %v\n", addAttachment, datasetId)
}
// === copying files ===
if copyFlag {
Expand Down Expand Up @@ -422,7 +430,17 @@ For Windows you need instead to specify -user username:password on the command l
log.Printf("Number of datasets not stored because of too many files:%v\nPlease note that this will cancel any subsequent archive steps from this job !\n", tooLargeDatasets)
}
color.Unset()
datasetIngestor.PrintFileInfos() // TODO: move this into cmd portion

// print file statistics
if skippedLinks > 0 {
color.Set(color.FgYellow)
log.Printf("Total number of link files skipped:%v\n", skippedLinks)
}
if illegalFileNames > 0 {
color.Set(color.FgRed)
log.Printf("Number of files ignored because of illegal filenames:%v\n", illegalFileNames)
}
color.Unset()

// stop here if empty datasets appeared
if emptyDatasets > 0 || tooLargeDatasets > 0 {
Expand Down Expand Up @@ -472,3 +490,93 @@ func init() {
datasetIngestorCmd.MarkFlagsMutuallyExclusive("testenv", "devenv", "localenv", "tunnelenv")
datasetIngestorCmd.MarkFlagsMutuallyExclusive("nocopy", "copy")
}

func createLocalSymlinkCallbackForFileLister(skipSymlinks *string, skippedLinks *uint) func(symlinkPath string, sourceFolder string) (bool, error) {
scanner := bufio.NewScanner(os.Stdin)
return func(symlinkPath string, sourceFolder string) (bool, error) {
keep := true
pointee, _ := os.Readlink(symlinkPath) // just pass the file name
if !filepath.IsAbs(pointee) {
dir, err := filepath.Abs(filepath.Dir(symlinkPath))
if err != nil {
return false, err
}
// log.Printf(" CWD path pointee :%v %v %v", dir, filepath.Dir(path), pointee)
pabs := filepath.Join(dir, filepath.Dir(symlinkPath), pointee)
pointee, err = filepath.EvalSymlinks(pabs)
if err != nil {
log.Printf("Could not follow symlink for file:%v %v", pabs, err)
keep = false
log.Printf("keep variable set to %v", keep)
}
}
//fmt.Printf("Skip variable:%v\n", *skip)
if *skipSymlinks == "ka" || *skipSymlinks == "kA" {
keep = true
} else if *skipSymlinks == "sa" || *skipSymlinks == "sA" {
keep = false
} else if *skipSymlinks == "da" || *skipSymlinks == "dA" {
keep = strings.HasPrefix(pointee, sourceFolder)
} else {
color.Set(color.FgYellow)
log.Printf("Warning: the file %s is a link pointing to %v.", symlinkPath, pointee)
color.Unset()
log.Printf(`
Please test if this link is meaningful and not pointing
outside the sourceFolder %s. The default behaviour is to
keep only internal links within a source folder.
You can also specify that you want to apply the same answer to ALL
subsequent links within the current dataset, by appending an a (dA,ka,sa).
If you want to give the same answer even to all subsequent datasets
in this command then specify a capital 'A', e.g. (dA,kA,sA)
Do you want to keep the link in dataset or skip it (D(efault)/k(eep)/s(kip) ?`, sourceFolder)
scanner.Scan()
*skipSymlinks = scanner.Text()
if *skipSymlinks == "" {
*skipSymlinks = "d"
}
if *skipSymlinks == "d" || *skipSymlinks == "dA" {
keep = strings.HasPrefix(pointee, sourceFolder)
} else {
keep = (*skipSymlinks != "s" && *skipSymlinks != "sa" && *skipSymlinks != "sA")
}
}
if keep {
color.Set(color.FgGreen)
log.Printf("You chose to keep the link %v -> %v.\n\n", symlinkPath, pointee)
} else {
color.Set(color.FgRed)
*skippedLinks++
log.Printf("You chose to remove the link %v -> %v.\n\n", symlinkPath, pointee)
}
color.Unset()
return keep, nil
}
}

func createLocalFilenameFilterCallback(illegalFileNamesCounter *uint) func(filepath string) bool {
return func(filepath string) (keep bool) {
keep = true
// make sure that filenames do not contain characters like "\" or "*"
if strings.ContainsAny(filepath, "*\\") {
color.Set(color.FgRed)
log.Printf("Warning: the file %s contains illegal characters like *,\\ and will not be archived.", filepath)
color.Unset()
if illegalFileNamesCounter != nil {
*illegalFileNamesCounter++
}
keep = false
}
// and check for triple blanks, they are used to separate columns in messages
if keep && strings.Contains(filepath, " ") {
color.Set(color.FgRed)
log.Printf("Warning: the file %s contains 3 consecutive blanks which is not allowed. The file not be archived.", filepath)
color.Unset()
if illegalFileNamesCounter != nil {
*illegalFileNamesCounter++
}
keep = false
}
return keep
}
}
Loading

0 comments on commit 7b55151

Please sign in to comment.