diff --git a/doc/rst/dcmp.1.rst b/doc/rst/dcmp.1.rst index 397c3018..32df5ee1 100644 --- a/doc/rst/dcmp.1.rst +++ b/doc/rst/dcmp.1.rst @@ -64,6 +64,10 @@ OPTIONS Open files with O_NOATIME flag. +.. option:: -H, --nohardlink + + Ignore hardlinks. + .. option:: --progress N Print progress message to stdout approximately every N seconds. diff --git a/doc/rst/dsync.1.rst b/doc/rst/dsync.1.rst index 248bf439..58d7b828 100644 --- a/doc/rst/dsync.1.rst +++ b/doc/rst/dsync.1.rst @@ -68,6 +68,10 @@ OPTIONS Delete extraneous files from destination. +.. option:: -H, --nohardlink + + Ignore hardlinks. + .. option:: -L, --dereference Dereference symbolic links and copy the target file or directory diff --git a/doc/rst/dwalk.1.rst b/doc/rst/dwalk.1.rst index 0e54f931..ce231589 100644 --- a/doc/rst/dwalk.1.rst +++ b/doc/rst/dwalk.1.rst @@ -70,6 +70,10 @@ OPTIONS Print files to the screen. +.. option:: -H, --nohardlink + + Ignore hardlinks. + .. option:: -L, --dereference Dereference symbolic links and walk the target file or directory diff --git a/man/dbcast.1 b/man/dbcast.1 index 263de20a..dc74b656 100644 --- a/man/dbcast.1 +++ b/man/dbcast.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DBCAST" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dbcast \- distributed broadcast . .nr rst2man-indent-level 0 . @@ -30,6 +27,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DBCAST" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dbcast \- distributed broadcast .SH SYNOPSIS .sp \fBdbcast [OPTION] SRC DEST\fP @@ -56,7 +56,7 @@ number of MPI processes. .TP .B \-s, \-\-size SIZE The chunk size in bytes used to segment files during the broadcast. -Units like “MB” and “GB” should be immediately follow the number +Units like "MB" and "GB" should be immediately follow the number without spaces (ex. 2MB). The default size is 1MB. It is recommended to use the stripe size of a file if this is known. .UNINDENT diff --git a/man/dbz2.1 b/man/dbz2.1 index e09d49b5..5f98dbe7 100644 --- a/man/dbz2.1 +++ b/man/dbz2.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DBZ2" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dbz2 \- distributed bz2 compression . .nr rst2man-indent-level 0 . @@ -30,6 +27,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DBZ2" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dbz2 \- distributed bz2 compression .SH SYNOPSIS .sp \fBdbz2 [OPTIONS] [\-z|\-d] FILE\fP @@ -64,7 +64,7 @@ Overwrite the output file, if it exists. .TP .B \-b, \-\-blocksize SIZE Set the compression block size, from 1 to 9. -Where 1=100kB … and 9=900kB. Default is 9. +Where 1=100kB ... and 9=900kB. Default is 9. .UNINDENT .INDENT 0.0 .TP diff --git a/man/dchmod.1 b/man/dchmod.1 index 7babf687..fdecb231 100644 --- a/man/dchmod.1 +++ b/man/dchmod.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DCHMOD" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dchmod \- distributed tool to set permissions and group . .nr rst2man-indent-level 0 . @@ -30,9 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DCHMOD" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dchmod \- distributed tool to set permissions and group .SH SYNOPSIS .sp -\fBdchmod [OPTION] PATH …\fP +\fBdchmod [OPTION] PATH ...\fP .SH DESCRIPTION .sp Parallel MPI application to recursively change permissions and/or group @@ -62,8 +62,8 @@ Change group to specified GROUP name or numeric group id. .TP .B \-m, \-\-mode MODE The mode to apply to each item. MODE may be octal or symbolic syntax -similar to \fBchmod(1)\fP\&. In symbolic notation, “ugoa” are supported -as are “rwxX”. As with chmod, if no leading letter “ugoa” is provided, +similar to \fBchmod(1)\fP\&. In symbolic notation, "ugoa" are supported +as are "rwxX". As with chmod, if no leading letter "ugoa" is provided, mode bits are combined with umask to determine the actual mode. .UNINDENT .INDENT 0.0 @@ -73,7 +73,7 @@ Attempt to change every item. By default, dchmod avoids unncessary chown and chmod calls, for example trying to change the group on an item that already has the correct group, or trying to change the group on an item that is not owned by the user running the tool. -With –force, dchmod executes chown/chmod calls on every item. +With \-\-force, dchmod executes chown/chmod calls on every item. .UNINDENT .INDENT 0.0 .TP @@ -96,7 +96,7 @@ Only modify items whose full path matches REGEX, processed by .INDENT 0.0 .TP .B \-n, \-\-name -Change –exclude and –match to apply to item name rather than its +Change \-\-exclude and \-\-match to apply to item name rather than its full path. .UNINDENT .INDENT 0.0 @@ -151,7 +151,7 @@ regex: .sp \fBmpirun \-np 128 dchmod \-\-name \-\-exclude ‘afilename’ \-\-mode u+rw /directory\fP .sp -Note: You can use –match to change file permissions on all of the +Note: You can use \-\-match to change file permissions on all of the files/directories that match the regex. .SH SEE ALSO .sp diff --git a/man/dcmp.1 b/man/dcmp.1 index 4ccf673b..63eb2185 100644 --- a/man/dcmp.1 +++ b/man/dcmp.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DCMP" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dcmp \- distributed compare . .nr rst2man-indent-level 0 . @@ -30,6 +27,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DCMP" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dcmp \- distributed compare .SH SYNOPSIS .sp \fBdcmp [OPTION] SRC DEST\fP @@ -54,17 +54,17 @@ in which case, each option should provide a different output file name. .INDENT 0.0 .TP .B \-t, \-\-text -Change –output to write files in text format rather than binary. +Change \-\-output to write files in text format rather than binary. .UNINDENT .INDENT 0.0 .TP .B \-b, \-\-base -Enable base checks and normal stdout results when –output is used. +Enable base checks and normal stdout results when \-\-output is used. .UNINDENT .INDENT 0.0 .TP .B \-\-bufsize SIZE -Set the I/O buffer to be SIZE bytes. Units like “MB” and “GB” may +Set the I/O buffer to be SIZE bytes. Units like "MB" and "GB" may immediately follow the number without spaces (e.g. 8MB). The default bufsize is 4MB. .UNINDENT @@ -72,8 +72,8 @@ bufsize is 4MB. .TP .B \-\-chunksize SIZE Multiple processes copy a large file in parallel by dividing it into chunks. -Set chunk to be at minimum SIZE bytes. Units like “MB” and -“GB” can immediately follow the number without spaces (e.g. 64MB). +Set chunk to be at minimum SIZE bytes. Units like "MB" and +"GB" can immediately follow the number without spaces (e.g. 64MB). The default chunksize is 4MB. .UNINDENT .INDENT 0.0 @@ -91,6 +91,11 @@ Use O_DIRECT to avoid caching file data. .UNINDENT .INDENT 0.0 .TP +.B \-\-open\-noatime +Open files with O_NOATIME flag. +.UNINDENT +.INDENT 0.0 +.TP .B \-\-progress N Print progress message to stdout approximately every N seconds. The number of seconds must be a non\-negative integer. @@ -125,7 +130,7 @@ Print the command usage, and the list of options available. .SH EXPRESSIONS .sp An expression is made up of one or more conditions, where each condition specifies a field and a state. -A single condition consists of a field name, an ‘=’ sign, and a state name. +A single condition consists of a field name, an \(aq=\(aq sign, and a state name. .sp Valid fields are listed below, along with the property of the entry that is checked. .TS @@ -288,7 +293,7 @@ CONTENT=COMMON => EXISTS=COMMON@TYPE=COMMON@SIZE=COMMON@CONTENT=COMMON .sp A successful check on any other field also implies that EXIST=COMMON. .sp -When used with the \-o option, one must also specify a file name at the end of the expression, separated with a ‘:’. +When used with the \-o option, one must also specify a file name at the end of the expression, separated with a \(aq:\(aq. The list of any entries that match the expression are written to the named file. For example, to list any entries matching the above expression to a file named outfile1, one should use the following option: @@ -303,7 +308,7 @@ one should use the following option: .UNINDENT .UNINDENT .sp -If the –base option is given or when no output option is specified, +If the \-\-base option is given or when no output option is specified, the following expressions are checked and numeric results are reported to stdout: .INDENT 0.0 .INDENT 3.5 diff --git a/man/dcp.1 b/man/dcp.1 index 0123e7d5..42b3a533 100644 --- a/man/dcp.1 +++ b/man/dcp.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DCP" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dcp \- distributed copy . .nr rst2man-indent-level 0 . @@ -30,6 +27,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DCP" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dcp \- distributed copy .SH SYNOPSIS .sp \fBdcp [OPTION] SRC DEST\fP @@ -46,7 +46,7 @@ file system, and it splits large file copies across multiple processes. .INDENT 0.0 .TP .B \-\-bufsize SIZE -Set the I/O buffer to be SIZE bytes. Units like “MB” and “GB” may +Set the I/O buffer to be SIZE bytes. Units like "MB" and "GB" may immediately follow the number without spaces (e.g. 8MB). The default bufsize is 4MB. .UNINDENT @@ -54,14 +54,14 @@ bufsize is 4MB. .TP .B \-\-chunksize SIZE Multiple processes copy a large file in parallel by dividing it into chunks. -Set chunk to be at minimum SIZE bytes. Units like “MB” and -“GB” can immediately follow the number without spaces (e.g. 64MB). +Set chunk to be at minimum SIZE bytes. Units like "MB" and +"GB" can immediately follow the number without spaces (e.g. 64MB). The default chunksize is 4MB. .UNINDENT .INDENT 0.0 .TP .B \-\-xattrs WHICH -Copy extended attributes (“xattrs”) from source files to target files. +Copy extended attributes ("xattrs") from source files to target files. WHICH determines which xattrs are copied. Options are to copy no xattrs, all xattrs, xattrs not excluded by /etc/xattr.conf, or all xattrs except those which have special meaning to Lustre. Certain xattrs control Lustre @@ -104,7 +104,7 @@ that each symbolic link refers to. .B \-P, \-\-no\-dereference Do not follow symbolic links in source paths. Effectviely allows symbolic links to be copied when the link target is not valid -or there is not permission to read the link’s target. +or there is not permission to read the link\(aqs target. .UNINDENT .INDENT 0.0 .TP @@ -118,6 +118,11 @@ Use O_DIRECT to avoid caching file data. .UNINDENT .INDENT 0.0 .TP +.B \-\-open\-noatime +Open files with O_NOATIME flag. +.UNINDENT +.INDENT 0.0 +.TP .B \-S, \-\-sparse Create sparse files when possible. .UNINDENT @@ -130,6 +135,18 @@ A value of 0 disables progress messages. .UNINDENT .INDENT 0.0 .TP +.B \-G, \-\-gid GID +Set the effective group ID to perform the copy operation. The copy +may fail if the group does not have sufficient privileges. +.UNINDENT +.INDENT 0.0 +.TP +.B \-U, \-\-uid UID +Set the effective user ID to perform the copy operation. The copy +may fail if the user does not have sufficient privileges. +.UNINDENT +.INDENT 0.0 +.TP .B \-v, \-\-verbose Run in verbose mode. .UNINDENT diff --git a/man/ddup.1 b/man/ddup.1 index f5fb3041..6d6fbdc9 100644 --- a/man/ddup.1 +++ b/man/ddup.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DDUP" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -ddup \- report files with identical content . .nr rst2man-indent-level 0 . @@ -30,6 +27,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DDUP" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +ddup \- report files with identical content .SH SYNOPSIS .sp \fBddup [OPTION] PATH\fP @@ -45,6 +45,11 @@ Multiple sets of duplicate files can be matched using this final reported hash. .SH OPTIONS .INDENT 0.0 .TP +.B \-\-open\-noatime +Open files with O_NOATIME flag, if possible. +.UNINDENT +.INDENT 0.0 +.TP .B \-d, \-\-debug LEVEL Set verbosity level. LEVEL can be one of: fatal, err, warn, info, dbg. .UNINDENT diff --git a/man/dfind.1 b/man/dfind.1 index d76e449a..93d28b11 100644 --- a/man/dfind.1 +++ b/man/dfind.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DFIND" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dfind \- distributed file filtering . .nr rst2man-indent-level 0 . @@ -30,9 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DFIND" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dfind \- distributed file filtering .SH SYNOPSIS .sp -\fBdfind [OPTION] [EXPRESSION] PATH …\fP +\fBdfind [OPTION] [EXPRESSION] PATH ...\fP .SH DESCRIPTION .sp Parallel MPI application to filter a list of files according to an expression. @@ -57,7 +57,7 @@ Write the processed list to a file. .INDENT 0.0 .TP .B \-t, \-\-text -Must be used with the –output option. Write processed list of files to +Must be used with the \-\-output option. Write processed list of files to FILE in ascii text format. .UNINDENT .INDENT 0.0 @@ -123,22 +123,22 @@ File was last accessed N days ago. .INDENT 0.0 .TP .B \-\-cmin N -File’s status was last changed N minutes ago. +File\(aqs status was last changed N minutes ago. .UNINDENT .INDENT 0.0 .TP .B \-\-cnewer FILE -File’s status was last changed more recently than FILE was modified. +File\(aqs status was last changed more recently than FILE was modified. .UNINDENT .INDENT 0.0 .TP .B \-\-ctime N -File’s status was last changed N days ago. +File\(aqs status was last changed N days ago. .UNINDENT .INDENT 0.0 .TP .B \-\-mmin N -File’s data was last modified N minutes ago. +File\(aqs data was last modified N minutes ago. .UNINDENT .INDENT 0.0 .TP @@ -148,12 +148,12 @@ File was modified more recently than FILE. .INDENT 0.0 .TP .B \-\-mtime N -File’s data was last modified N days ago. +File\(aqs data was last modified N days ago. .UNINDENT .INDENT 0.0 .TP .B \-\-gid N -File’s numeric group ID is N. +File\(aqs numeric group ID is N. .UNINDENT .INDENT 0.0 .TP @@ -163,7 +163,7 @@ File belongs to group NAME. .INDENT 0.0 .TP .B \-\-uid N -File’s numeric user ID is N. +File\(aqs numeric user ID is N. .UNINDENT .INDENT 0.0 .TP @@ -188,7 +188,7 @@ Full path to file matches POSIX regular expression REGEX. Regular expressions p .INDENT 0.0 .TP .B \-\-size N -File size is N bytes. Units can be used like ‘KB’, ‘MB’, ‘GB’. +File size is N bytes. Units can be used like \(aqKB\(aq, \(aqMB\(aq, \(aqGB\(aq. .UNINDENT .INDENT 0.0 .TP @@ -251,7 +251,7 @@ Print file name to stdout. .INDENT 0.0 .TP .B \-\-exec CMD ; -Execute command CMD on file. All following arguments are taken as arguments to the command until ‘;’ is encountered. The string ‘{}’ is replaced by the current file name. +Execute command CMD on file. All following arguments are taken as arguments to the command until \(aq;\(aq is encountered. The string \(aq{}\(aq is replaced by the current file name. .UNINDENT .SH EXAMPLES .INDENT 0.0 diff --git a/man/dreln.1 b/man/dreln.1 index 953adb4e..267d8ed8 100644 --- a/man/dreln.1 +++ b/man/dreln.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DRELN" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dreln \- distributed relink . .nr rst2man-indent-level 0 . @@ -30,9 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DRELN" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dreln \- distributed relink .SH SYNOPSIS .sp -\fBdreln [OPTION] OLDPATH NEWPATH PATH …\fP +\fBdreln [OPTION] OLDPATH NEWPATH PATH ...\fP .SH DESCRIPTION .sp Parallel MPI application to recursively update symlinks within a diff --git a/man/drm.1 b/man/drm.1 index 174f9dc4..a6c428e5 100644 --- a/man/drm.1 +++ b/man/drm.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DRM" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -drm \- distributed remove . .nr rst2man-indent-level 0 . @@ -30,9 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DRM" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +drm \- distributed remove .SH SYNOPSIS .sp -\fBdrm [OPTION] PATH…\fP +\fBdrm [OPTION] PATH...\fP .SH DESCRIPTION .sp Parallel MPI application to recursively delete a directory and its @@ -45,11 +45,11 @@ drm behaves like \fIrm \-rf\fP, but it is faster. .INDENT 0.0 .INDENT 3.5 DO NOT USE SHELL REGEX!!! -The –match and –exclude options use POSIX regex syntax. Because of +The \-\-match and \-\-exclude options use POSIX regex syntax. Because of this make sure that the shell does not try to interpret your regex before it gets passed to the program. You can generally use quotes around your regex to prevent the shell from expanding. An example of this using the -–match option with –dryrun would be: +\-\-match option with \-\-dryrun would be: .sp \fBmpirun \-np 128 drm \-\-dryrun \-v \-\-name \-\-match \(aqfile_.*\(aq /path/to/dir/*\fP .UNINDENT @@ -65,12 +65,12 @@ from the mpiFileUtils suite. .TP .B \-o, \-\-output FILE Write the list of items drm attempts to delete to FILE in mpiFileUtils format. -Format can be changed with –text option. +Format can be changed with \-\-text option. .UNINDENT .INDENT 0.0 .TP .B \-t, \-\-text -Must be used with the –output option. Write list of items drm attempts +Must be used with the \-\-output option. Write list of items drm attempts to delete to FILE in ascii text format. .UNINDENT .INDENT 0.0 @@ -97,22 +97,22 @@ Only remove items whose full path matches REGEX, processed by .INDENT 0.0 .TP .B \-\-name -Change –exclude and match to apply to item name rather than its +Change \-\-exclude and match to apply to item name rather than its full path. .UNINDENT .INDENT 0.0 .TP .B \-\-dryrun Print a list of files that \fBwould\fP be deleted without deleting -them. This is useful to check list of items satisfying –exclude or -–match options before actually deleting anything. +them. This is useful to check list of items satisfying \-\-exclude or +\-\-match options before actually deleting anything. .UNINDENT .INDENT 0.0 .TP .B \-\-aggressive This option will delete files during the walk phase, and then delete directories by level after the walk in drm. You cannot -use this option with –dryrun. +use this option with \-\-dryrun. .UNINDENT .INDENT 0.0 .TP diff --git a/man/dstripe.1 b/man/dstripe.1 index cfe6589c..c3ef32fd 100644 --- a/man/dstripe.1 +++ b/man/dstripe.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DSTRIPE" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dstripe \- restripe files on underlying storage . .nr rst2man-indent-level 0 . @@ -30,9 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DSTRIPE" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dstripe \- restripe files on underlying storage .SH SYNOPSIS .sp -\fBdstripe [OPTION] PATH…\fP +\fBdstripe [OPTION] PATH...\fP .SH DESCRIPTION .sp Parallel MPI application to restripe files. @@ -55,16 +55,16 @@ the lustre file system default is used. The default stripe count is .INDENT 0.0 .TP .B \-s, \-\-size STRIPE_SIZE -The stripe size to use during file restriping. Units like “MB” and -“GB” can immediately follow the number without spaces (ex. 2MB). The +The stripe size to use during file restriping. Units like "MB" and +"GB" can immediately follow the number without spaces (ex. 2MB). The default stripe size is 1MB. .UNINDENT .INDENT 0.0 .TP .B \-m, \-\-minsize SIZE The minimum size a file must be to be a candidate for restriping. -Files smaller than SIZE will not be restriped. Units like “MB” and -“GB” can immediately follow the number without spaces (ex. 2MB). The +Files smaller than SIZE will not be restriped. Units like "MB" and +"GB" can immediately follow the number without spaces (ex. 2MB). The default minimum file size is 0MB. .UNINDENT .INDENT 0.0 diff --git a/man/dsync.1 b/man/dsync.1 index 39380bcd..b27e55f4 100644 --- a/man/dsync.1 +++ b/man/dsync.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DSYNC" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dsync \- synchronize directory trees . .nr rst2man-indent-level 0 . @@ -30,6 +27,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DSYNC" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dsync \- synchronize directory trees .SH SYNOPSIS .sp \fBdsync [OPTION] SRC DEST\fP @@ -57,7 +57,7 @@ Batch files into groups of up to size N during copy operation. .INDENT 0.0 .TP .B \-\-bufsize SIZE -Set the I/O buffer to be SIZE bytes. Units like “MB” and “GB” may +Set the I/O buffer to be SIZE bytes. Units like "MB" and "GB" may immediately follow the number without spaces (e.g. 8MB). The default bufsize is 4MB. .UNINDENT @@ -65,14 +65,14 @@ bufsize is 4MB. .TP .B \-\-chunksize SIZE Multiple processes copy a large file in parallel by dividing it into chunks. -Set chunk to be at minimum SIZE bytes. Units like “MB” and -“GB” can immediately follow the number without spaces (e.g. 64MB). +Set chunk to be at minimum SIZE bytes. Units like "MB" and +"GB" can immediately follow the number without spaces (e.g. 64MB). The default chunksize is 4MB. .UNINDENT .INDENT 0.0 .TP .B \-\-xattrs WHICH -Copy extended attributes (“xattrs”) from source files to target files. +Copy extended attributes ("xattrs") from source files to target files. WHICH determines which xattrs are copied. Options are to copy no xattrs, all xattrs, xattrs not excluded by /etc/xattr.conf, or all xattrs except those which have special meaning to Lustre. Certain xattrs control Lustre @@ -110,7 +110,7 @@ that each symbolic link refers to. .B \-P, \-\-no\-dereference Do not follow symbolic links in source paths. Effectviely allows symbolic links to be copied when the link target is not valid -or there is not permission to read the link’s target. +or there is not permission to read the link\(aqs target. .UNINDENT .INDENT 0.0 .TP @@ -119,6 +119,11 @@ Use O_DIRECT to avoid caching file data. .UNINDENT .INDENT 0.0 .TP +.B \-\-open\-noatime +Open files with O_NOATIME flag. +.UNINDENT +.INDENT 0.0 +.TP .B \-\-link\-dest DIR Create hardlink in DEST to files in DIR when file is unchanged rather than create a new file. One can use this option to conserve @@ -141,6 +146,11 @@ Create sparse files when possible. .UNINDENT .INDENT 0.0 .TP +.B \-H, \-\-nohardlink +Ignore hardlinks. +.UNINDENT +.INDENT 0.0 +.TP .B \-\-progress N Print progress message to stdout approximately every N seconds. The number of seconds must be a non\-negative integer. diff --git a/man/dtar.1 b/man/dtar.1 index 739f7d7b..1e711380 100644 --- a/man/dtar.1 +++ b/man/dtar.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DTAR" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dtar \- create and extract a tar archive . .nr rst2man-indent-level 0 . @@ -30,9 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DTAR" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dtar \- create and extract a tar archive .SH SYNOPSIS .sp -\fBdtar [OPTION] \-c \-f ARCHIVE SOURCE…\fP +\fBdtar [OPTION] \-c \-f ARCHIVE SOURCE...\fP .sp \fBdtar [OPTION] \-x \-f ARCHIVE\fP .SH DESCRIPTION @@ -59,8 +59,8 @@ during extraction to benefit subsequent extractions of the same archive file. When extracting an archive, dtar skips the entry corresponding to its index. If other tools, like tar, are used to extract the archive, the index entry is extracted as a regular file that is placed in the current working directory -with a file extension of “.dtaridx” and having the same basename as the original archive file. -For an archive that was named “file.tar” when it was created, the dtar index file is named “file.tar.dtaridx”. +with a file extension of ".dtaridx" and having the same basename as the original archive file. +For an archive that was named "file.tar" when it was created, the dtar index file is named "file.tar.dtaridx". .SH LIMITATIONS .sp dtar only supports directories, regular files, and symlinks. @@ -137,13 +137,18 @@ Default does not record or extract xattrs. .UNINDENT .INDENT 0.0 .TP +.B \-\-open\-noatime +Open source files with O_NOATIME flag when creating archive. +.UNINDENT +.INDENT 0.0 +.TP .B \-\-fsync Call fsync before closing files after writing. .UNINDENT .INDENT 0.0 .TP .B \-\-bufsize SIZE -Set the I/O buffer to be SIZE bytes. Units like “MB” and “GB” may +Set the I/O buffer to be SIZE bytes. Units like "MB" and "GB" may immediately follow the number without spaces (e.g. 8MB). The default bufsize is 4MB. .UNINDENT @@ -151,8 +156,8 @@ bufsize is 4MB. .TP .B \-\-chunksize SIZE Multiple processes copy a large file in parallel by dividing it into chunks. -Set chunk to be at minimum SIZE bytes. Units like “MB” and -“GB” can immediately follow the number without spaces (e.g. 64MB). +Set chunk to be at minimum SIZE bytes. Units like "MB" and +"GB" can immediately follow the number without spaces (e.g. 64MB). The default chunksize is 4MB. .UNINDENT .INDENT 0.0 @@ -161,7 +166,7 @@ The default chunksize is 4MB. Set the memory limit to be SIZE bytes when reading archive files. For some archives, dtar can distribute the file across processes to store segments of the archive in memory for faster processing. -Units like “MB” and “GB” may immediately follow the number +Units like "MB" and "GB" may immediately follow the number without spaces (eg. 8MB). The default is 256MB. .UNINDENT .INDENT 0.0 diff --git a/man/dwalk.1 b/man/dwalk.1 index 3f58d2d5..a1842c4d 100644 --- a/man/dwalk.1 +++ b/man/dwalk.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "DWALK" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -dwalk \- distributed walk and list . .nr rst2man-indent-level 0 . @@ -30,9 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "DWALK" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +dwalk \- distributed walk and list .SH SYNOPSIS .sp -\fBdwalk [OPTION] PATH …\fP +\fBdwalk [OPTION] PATH ...\fP .SH DESCRIPTION .sp Parallel MPI application to recursively walk and list contents in a @@ -57,12 +57,12 @@ from the mpiFileUtils suite. .TP .B \-o, \-\-output FILE Write the processed list to FILE in binary format. Format can be changed -With –text option. +With \-\-text option. .UNINDENT .INDENT 0.0 .TP .B \-t, \-\-text -Must be used with the –output option. Write processed list of files to +Must be used with the \-\-output option. Write processed list of files to FILE in ascii text format. .UNINDENT .INDENT 0.0 @@ -103,6 +103,11 @@ Print files to the screen. .UNINDENT .INDENT 0.0 .TP +.B \-H, \-\-nohardlink +Ignore hardlinks. +.UNINDENT +.INDENT 0.0 +.TP .B \-L, \-\-dereference Dereference symbolic links and walk the target file or directory that each symbolic link refers to. diff --git a/man/mpifileutils.1 b/man/mpifileutils.1 index bc9ab3db..48ce2d43 100644 --- a/man/mpifileutils.1 +++ b/man/mpifileutils.1 @@ -1,8 +1,5 @@ .\" Man page generated from reStructuredText. . -.TH "MPIFILEUTILS" "1" "Feb 04, 2022" "0.11.1" "mpiFileUtils" -.SH NAME -mpifileutils \- mpiFileUtils Documentation . .nr rst2man-indent-level 0 . @@ -30,6 +27,9 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. +.TH "MPIFILEUTILS" "1" "Nov 07, 2023" "0.11.1" "mpiFileUtils" +.SH NAME +mpifileutils \- mpiFileUtils Documentation .SH OVERVIEW .sp High\-performance computing users generate large datasets using parallel applications that can run with thousands of processes. @@ -41,7 +41,7 @@ delivering orders of magnitude in performance speedup over their single\-process Furthermore, the libmfu library packages common functionality to simplify the creation of new tools, and it can even be invoked directly from within HPC applications. .sp -Video Overview: \fI\%“Scalable Management of HPC Datasets with mpiFileUtils”\fP, HPCKP‘20. +Video Overview: \fI\%"Scalable Management of HPC Datasets with mpiFileUtils"\fP, HPCKP\(aq20. .sp The figure below, taken from the above presentation, illustrates the potential performance improvement that one can achieve when scaling a tool like dcp to utilize more compute resources. @@ -89,7 +89,7 @@ To be certain of compatibility, it is recommended that one install libarchive\-3 mkdir install installdir=\(gapwd\(ga/install -wget https://github.com/libarchive/libarchive/releases/download/3.5.1/libarchive\-3.5.1.tar.gz +wget https://github.com/libarchive/libarchive/releases/download/v3.5.1/libarchive\-3.5.1.tar.gz tar \-zxf libarchive\-3.5.1.tar.gz cd libarchive\-3.5.1 ./configure \-\-prefix=$installdir @@ -136,6 +136,8 @@ Additional CMake options: .IP \(bu 2 \fB\-DENABLE_GPFS=[ON/OFF]\fP : specialization for GPFS, defaults to \fBOFF\fP .IP \(bu 2 +\fB\-DENABLE_HPSS=[ON/OFF]\fP : specialization for HPSS, defaults to \fBOFF\fP +.IP \(bu 2 \fB\-DENABLE_EXPERIMENTAL=[ON/OFF]\fP : build experimental tools, defaults to \fBOFF\fP .UNINDENT .SS DAOS support @@ -179,7 +181,7 @@ If HDF5 is installed under a standard system path then specifying the HDF5 path .sp To use \fI\%Spack\fP, it is recommended that one first create a \fIpackages.yaml\fP file to list system\-provided packages, like MPI. Without doing this, Spack will fetch and install an MPI library that may not work on your system. -Make sure that you’ve set up spack in your shell (see \fI\%these instructions\fP). +Make sure that you\(aqve set up Spack in your shell (see \fI\%these instructions\fP). .sp Once Spack has been configured, mpiFileUtils can be installed as: .INDENT 0.0 @@ -199,7 +201,7 @@ or to enable all features: .sp .nf .ft C -spack install mpifileutils +lustre +gpfs +experimental +spack install mpifileutils +lustre +gpfs +hpss +experimental .ft P .fi .UNINDENT @@ -223,7 +225,7 @@ cd deps wget https://github.com/hpc/libcircle/releases/download/v0.3/libcircle\-0.3.0.tar.gz wget https://github.com/llnl/lwgrp/releases/download/v1.0.4/lwgrp\-1.0.4.tar.gz wget https://github.com/llnl/dtcmp/releases/download/v1.1.4/dtcmp\-1.1.4.tar.gz - wget https://github.com/libarchive/libarchive/releases/download/3.5.1/libarchive\-3.5.1.tar.gz + wget https://github.com/libarchive/libarchive/releases/download/v3.5.1/libarchive\-3.5.1.tar.gz tar \-zxf libcircle\-0.3.0.tar.gz cd libcircle\-0.3.0 @@ -306,7 +308,7 @@ cmake ../mpifileutils .UNINDENT .UNINDENT .sp -The other way to use spack is to create a “view” to the installed dependencies. +The other way to use Spack is to create a "view" to the installed dependencies. Details on this are coming soon. .SS Project Design Principles .sp @@ -331,7 +333,7 @@ careful to learn the options of each tool. .SS Portability .sp The tools are intended to support common file systems used in HPC centers, like -Lustre, GPFS, and NFS. Additionally, methods in the library should be portable +Lustre, GPFS, NFS, and HPSS. Additionally, methods in the library should be portable and efficient across multiple file systems. Tool and library users can rely on mpiFileUtils to provide portable and performant implementations. .SS Composability @@ -403,7 +405,7 @@ One must launch the job using the MPI job launcher like mpirun or mpiexec. One .IP \(bu 2 Most tools do not checkpoint their progress. Be sure to request sufficient time in your allocation to allow the job to complete. One may need to start over from the beginning if a tool is interrupted. .IP \(bu 2 -One cannot pipe output of one tool to the input of another. However, the –input and –output file options are good approximations. +One cannot pipe output of one tool to the input of another. However, the \-\-input and \-\-output file options are good approximations. .IP \(bu 2 One cannot easily check the return codes of tools. Instead, inspect stdout and stderr output for errors. .UNINDENT @@ -538,7 +540,7 @@ dchmod \-\-group grp1 \-\-mode g+rw /path/to/walk .UNINDENT .UNINDENT .sp -drm is like “rm \-rf” but in parallel: +drm is like "rm \-rf" but in parallel: .INDENT 0.0 .INDENT 3.5 .sp @@ -575,7 +577,7 @@ dsync /path/src /path/dest .UNINDENT .UNINDENT .sp -For large directory trees, the –batch\-files option offers a type of checkpoint. +For large directory trees, the \-\-batch\-files option offers a type of checkpoint. It moves files in batches, and if interrupted, a restart picks up from the last completed batch.: .INDENT 0.0 .INDENT 3.5 @@ -588,7 +590,7 @@ dsync \-\-batch\-files 100000 /path/src /path/dest .UNINDENT .UNINDENT .sp -The tools can be composed in various ways using the –input and –output options. +The tools can be composed in various ways using the \-\-input and \-\-output options. For example, the following sequence of commands executes a purge operation, which deletes any file that has not been accessed in the past 180 days.: .INDENT 0.0 @@ -648,9 +650,9 @@ entries from one location to another or compare corresponding entries across two different lists. A file list can be serialized and written to or read from a file. .sp -Each MPI rank “owns” a portion of the list, and there are routines to step +Each MPI rank "owns" a portion of the list, and there are routines to step through the entries owned by that process. This portion is referred to as the -“local” list. Functions exist to get and set properties of the items in the +"local" list. Functions exist to get and set properties of the items in the local list, for example to get the path name, type, and size of a file. Functions dealing with the local list can be called by the MPI process independently of other MPI processes. diff --git a/src/common/mfu_flist.c b/src/common/mfu_flist.c index a0071f1e..b96aabf3 100644 --- a/src/common/mfu_flist.c +++ b/src/common/mfu_flist.c @@ -67,6 +67,9 @@ mfu_walk_opts_t* mfu_walk_opts_new(void) /* Don't dereference symbolic links by default */ opts->dereference = 0; + /* Don't ignore hardlink default */ + opts->nohardlink = 0; + return opts; } diff --git a/src/common/mfu_flist_walk.c b/src/common/mfu_flist_walk.c index 120976e4..c796ed5e 100644 --- a/src/common/mfu_flist_walk.c +++ b/src/common/mfu_flist_walk.c @@ -51,6 +51,7 @@ static flist_t* CURRENT_LIST; static int SET_DIR_PERMS; static int REMOVE_FILES; static int DEREFERENCE; +static int NOHARDLINK; static mfu_file_t** CURRENT_PFILE; /**************************************** @@ -535,6 +536,9 @@ static void walk_stat_process(CIRCLE_handle* handle) path, errno, strerror(errno)); return; } + if (!S_ISDIR(st.st_mode) && st.st_nlink > 1 && NOHARDLINK) { + return; + } /* increment our item count */ reduce_items++; @@ -605,6 +609,11 @@ void mfu_flist_walk_paths(uint64_t num_paths, const char** paths, DEREFERENCE = 1; } + /* if nohardlink is set to 1 then set global variable */ + NOHARDLINK = 0; + if (walk_opts->nohardlink) { + NOHARDLINK = 1; + } /* convert handle to flist_t */ flist_t* flist = (flist_t*) bflist; diff --git a/src/common/mfu_param_path.h b/src/common/mfu_param_path.h index 04b2d51e..901b1c3b 100644 --- a/src/common/mfu_param_path.h +++ b/src/common/mfu_param_path.h @@ -113,6 +113,7 @@ typedef struct { int remove; /* flag option to remove files during walk */ int use_stat; /* flag option on whether or not to stat files during walk */ int dereference; /* flag option to dereference symbolic links */ + int nohardlink; /* flag option to ignore hardlink */ } mfu_walk_opts_t; typedef enum { diff --git a/src/dcmp/dcmp.c b/src/dcmp/dcmp.c index 2d2c0332..99453c46 100644 --- a/src/dcmp/dcmp.c +++ b/src/dcmp/dcmp.c @@ -46,6 +46,7 @@ static void print_usage(void) #endif printf(" -s, --direct - open files with O_DIRECT\n"); printf(" --open-noatime - open files with O_NOATIME\n"); + printf(" -H, --nohardlink - ignore hardlink\n"); printf(" --progress - print progress every N seconds\n"); printf(" -v, --verbose - verbose output\n"); printf(" -q, --quiet - quiet output\n"); @@ -2122,6 +2123,7 @@ int main(int argc, char **argv) {"daos-api", 1, 0, 'x'}, {"direct", 0, 0, 's'}, {"open-noatime", 0, 0, 'U'}, + {"nohardlink", 0, 0, 'H'}, {"progress", 1, 0, 'R'}, {"verbose", 0, 0, 'v'}, {"quiet", 0, 0, 'q'}, @@ -2139,7 +2141,7 @@ int main(int argc, char **argv) unsigned long long bytes = 0; while (1) { int c = getopt_long( - argc, argv, "o:tbsvqldh", + argc, argv, "o:tbsvqldhH", long_options, &option_index ); @@ -2171,6 +2173,9 @@ int main(int argc, char **argv) copy_opts->buf_size = (size_t)bytes; } break; + case 'H': + walk_opts->nohardlink = 1; + break; case 'k': if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS || bytes == 0) { if (rank == 0) { diff --git a/src/dsync/dsync.c b/src/dsync/dsync.c index b7267fcc..db6270b2 100644 --- a/src/dsync/dsync.c +++ b/src/dsync/dsync.c @@ -69,6 +69,7 @@ static void print_usage(void) #endif printf(" -c, --contents - read and compare file contents rather than compare size and mtime\n"); printf(" -D, --delete - delete extraneous files from target\n"); + printf(" -H, --nohardlink - ignore hardlink\n"); printf(" -L, --dereference - copy original files instead of links\n"); printf(" -P, --no-dereference - don't follow links in source\n"); printf(" -s, --direct - open files with O_DIRECT\n"); @@ -3026,6 +3027,7 @@ int main(int argc, char **argv) {"daos-api", 1, 0, 'y'}, {"contents", 0, 0, 'c'}, {"delete", 0, 0, 'D'}, + {"nohardlink", 0, 0, 'H'}, {"dereference", 0, 0, 'L'}, {"no-dereference", 0, 0, 'P'}, {"direct", 0, 0, 's'}, @@ -3053,7 +3055,7 @@ int main(int argc, char **argv) while (1) { int c = getopt_long( - argc, argv, "b:cDso:LPSvqhX:", + argc, argv, "b:cDso:LPSvqhHX:", long_options, &option_index ); @@ -3113,6 +3115,9 @@ int main(int argc, char **argv) case 'D': options.delete = 1; break; + case 'H': + walk_opts->nohardlink = 1; + break; case 'L': /* turn on dereference. * turn off no_dereference */ diff --git a/src/dwalk/dwalk.c b/src/dwalk/dwalk.c index 3676c625..61f36540 100644 --- a/src/dwalk/dwalk.c +++ b/src/dwalk/dwalk.c @@ -319,6 +319,7 @@ static void print_usage(void) printf(" -d, --distribution : \n - print distribution by field\n"); printf(" -f, --file_histogram - print default size distribution of items\n"); printf(" -p, --print - print files to screen\n"); + printf(" -H, --nohardlink - ignore hardlink\n"); printf(" -L, --dereference - follow symbolic links\n"); printf(" --progress - print progress every N seconds\n"); printf(" -v, --verbose - verbose output\n"); @@ -390,6 +391,7 @@ int main(int argc, char** argv) {"distribution", 1, 0, 'd'}, {"file_histogram", 0, 0, 'f'}, {"print", 0, 0, 'p'}, + {"nohardlink", 0, 0, 'H'}, {"dereference", 0, 0, 'L'}, {"progress", 1, 0, 'R'}, {"verbose", 0, 0, 'v'}, @@ -401,7 +403,7 @@ int main(int argc, char** argv) int usage = 0; while (1) { int c = getopt_long( - argc, argv, "i:o:tls:d:fpLvqh", + argc, argv, "i:o:tls:d:fpLvqhH", long_options, &option_index ); @@ -432,6 +434,9 @@ int main(int argc, char** argv) case 'p': print = 1; break; + case 'H': + walk_opts->nohardlink = 1; + break; case 'L': walk_opts->dereference = 1; break;