diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86f9e4b..b65cf73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,11 +2,29 @@
 
 ### New features
 
-- adding `requirements.txt` for easy creation of environment in "spacesavers2" docker (#68, @kopardev)
+### Bug fixes
 
-## Bug fixes
+## spacesavers2 0.11.0
 
-- 
+### New features
+
+- Add `requirements.txt` for easy creation of environment in "spacesavers2" docker (#68, @kopardev)
+- `grubbers` has new `--outfile` argument.
+- `blamematrix` has now been moved into `mimeo`.
+- `mimeo` files.gz always includes the original file as the first one in the filelist.
+- `mimeo` now has kronatools compatible output. ktImportText is also run if in PATH to generate HTML report for duplicates only. (#46, @kopardev)
+- Update documentation.
+
+### Bug fixes
+
+- `e2e` overhauled, improved and well commented.
+- `grubbers` `--limit` can be < 1 GiB (float) (#70, @kopardev)
+- `grubbers` output file format changed. New original file column added. Original file is required by `usurp`.
+- `mimeo` `--duplicateonly` now correctly handles duplicates owned by different UIDs. (#71, @kopardev)
+    - Update `blamematrix` and to account for corrected duplicate handling in `mimeo`.
+- `usurp` now uses the new "original file" column from `grubbers` while creating hard-links.
+- Total size now closely resembles `df` results (fix #75 @kopardev)
+- Files with future timestamps are handled correctly (fix #76, @kopardev)
   
 ## spacesavers2 0.10.2
 
diff --git a/README.md b/README.md
index 90e8518..c31b6a2 100644
--- a/README.md
+++ b/README.md
@@ -20,19 +20,13 @@ Welcome! `spacesavers2`:
 
 > New improved parallel implementation of [`spacesavers`](https://github.com/CCBR/spacesavers). `spacesavers` is soon to be decommissioned!
 
-> Note: `spacesavers2` requires [python version 3.11](https://www.python.org/downloads/release/python-3110/) or later and the [xxhash](https://pypi.org/project/xxhash/) library. These dependencies are already installed on biowulf (as a conda env). The environment for running `spacesavers2` can get set up using:
->
-> ```bash
-> . "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && \
-> conda activate py311
-> ```
+> Note: `spacesavers2` requires [python version 3.11](https://www.python.org/downloads/release/python-3110/) or later and the [xxhash](https://pypi.org/project/xxhash/) library. These dependencies are already installed on biowulf (as a conda env).
 
 ## `spacesavers2` has the following Basic commands:
 
 - spacesavers2_catalog
 - spacesavers2_mimeo
 - spacesavers2_grubbers
-- spacesavers2_blamematrix
 - spacesavers2_e2e
 - spacesavers2_usurp
 
diff --git a/docs/assets/images/spacesavers2.png b/docs/assets/images/spacesavers2.png
index a1e9a16..7b1d669 100644
Binary files a/docs/assets/images/spacesavers2.png and b/docs/assets/images/spacesavers2.png differ
diff --git a/docs/blamematrix.md b/docs/blamematrix.md
deleted file mode 100644
index e4f60f6..0000000
--- a/docs/blamematrix.md
+++ /dev/null
@@ -1,38 +0,0 @@
-## spacesavers2_grubbers
-
-This takes in the `allusers.files.gz` generated by `spacesavers2_mimeo` and processes it to create a matrix with:
-
-- folder paths as row-names
-- usernames as column-names
-- duplicate bytes as values in the matrix
-
-Deleting these high-value duplicates first will have the biggest impact on the users overall digital footprint.
-
-### Inputs
-
-- `--filesgz` output file from `spacesavers2_mimeo`.
-- `--limit` lower cut-off for output display (default 5 GiB). This means that duplicates with overall size of less than 5 GiB will not be displayed.
-
-```bash
-% spacesavers2_blamematrix --help
-spacesavers2_blamematrix:00000.19s:version: v0.5
-usage: spacesavers2_blamematrix [-h] -f FILESGZ [-l LEVEL]
-
-spacesavers2_blamematrix: get per user duplicate sizes at a given folder level  (default 3)
-
-options:
-  -h, --help            show this help message and exit
-  -f FILESGZ, --filesgz FILESGZ
-                        spacesavers2_mimeo prefix.allusers.files.gz file
-  -l LEVEL, --level LEVEL
-                        folder level to use for creating matrix
-
-Version:
-    v0.5
-Example:
-    > spacesavers2_blamematrix -f /output/from/spacesavers2_mimeo/prefix.allusers.files.gz -d 3
-```
-
-### Outputs
-
-Counts matrix with duplicate bytes per user per folder.
diff --git a/docs/catalog.md b/docs/catalog.md
index 6405d9b..5b51044 100644
--- a/docs/catalog.md
+++ b/docs/catalog.md
@@ -58,7 +58,7 @@ Example:
 `spacesavers2_catalog` creates one semi-colon seperated output line per input file. Here is an example line:
 
 ```bash
-% head -n1 test.ls_out
+% head -n1 test.catalog
 "/data/CBLCCBR/kopardevn_tmp/spacesavers2_testing/_data_CCBR_Pipeliner_db_PipeDB_Indices.ls.old";False;1653453;47;372851499;1;1;5;5;37513;57886;4707e661a1f3beca1861b9e0e0177461;52e5038016c3dce5b6cdab635765cc79;
 ```
 The 13 items in the line are as follows:
diff --git a/docs/e2e.md b/docs/e2e.md
index a47b59e..d81e7aa 100644
--- a/docs/e2e.md
+++ b/docs/e2e.md
@@ -18,12 +18,16 @@ End-to-end run of spacesavers2
 
 options:
   -h, --help            show this help message and exit
-  -i INFOLDER, --infolder INFOLDER
-                        Folder to run spacesavers_ls on.
+  -f FOLDER, --folder FOLDER
+                        Folder to run spacesavers_catalog on.
   -p THREADS, --threads THREADS
                         number of threads to use
+  -d MAXDEPTH, --maxdepth MAXDEPTH
+                        maxdepth for mimeo
+  -l LIMIT, --limit LIMIT
+                        limit for running spacesavers_grubbers
   -q QUOTA, --quota QUOTA
                         total size of the volume (default = 200 for /data/CCBR)
   -o OUTFOLDER, --outfolder OUTFOLDER
-                        Folder where all spacesavers_finddup output files will be saved
+                        Folder where all spacesavers_e2e output files will be saved
 ```
\ No newline at end of file
diff --git a/docs/grubbers.md b/docs/grubbers.md
index 5c38682..a71e9e2 100644
--- a/docs/grubbers.md
+++ b/docs/grubbers.md
@@ -1,35 +1,38 @@
 ## spacesavers2_grubbers
 
-This takes in the `.files.gz` generated by `spacesavers2_mimeo` and processes it to:
+This takes in the `mimeo.files.gz` generated by `spacesavers2_mimeo` and processes it to:
 
 - sort duplicates by total size
 - reports the "high-value" duplicates.
 
-Deleting these high-value duplicates first will have the biggest impact on the users overall digital footprint
+Deleting these high-value duplicates first will have the biggest impact on the users overall digital footprint.
 
 ### Inputs
 
 - `--filesgz` output file from `spacesavers2_mimeo`.
-- `--limit` lower cut-off for output display (default 5 GiB). This means that duplicates with overall size of less than 5 GiB will not be displayed.
+- `--limit` lower cut-off for output display (default 5 GiB). This means that duplicates with overall size of less than 5 GiB will not be displayed. Set 0 to report all.
 
 ```bash
-% spacesavers2_grubbers --help
-spacesavers2_grubbers:00000.01s:version: v0.5
-usage: spacesavers2_grubbers [-h] -f FILESGZ [-l LIMIT]
+╰─○ spacesavers2_grubbers --help
+spacesavers2_grubbers:00000.00s:version: v0.10.2-dev
+usage: spacesavers2_grubbers [-h] -f FILESGZ [-l LIMIT] [-o OUTFILE] [-v]
 
 spacesavers2_grubbers: get list of large duplicates sorted by total size
 
 options:
   -h, --help            show this help message and exit
   -f FILESGZ, --filesgz FILESGZ
-                        spacesavers2_mimeo prefix.<user>.files.gz file
+                        spacesavers2_mimeo prefix.<user>.mimeo.files.gz file
   -l LIMIT, --limit LIMIT
-                        stop showing duplicates with total size smaller then (5 default) GiB
+                        stop showing duplicates with total size smaller than (5 default) GiB. Set 0 for unlimited.
+  -o OUTFILE, --outfile OUTFILE
+                        output tab-delimited file (default STDOUT)
+  -v, --version         show program's version number and exit
 
 Version:
-    v0.5
+    v0.10.2-dev
 Example:
-    > spacesavers2_grubbers -f /output/from/spacesavers2_mimeo/prefix.files.gz
+    > spacesavers2_grubbers -f /output/from/spacesavers2_finddup/prefix.files.gz
 ```
 
 ### Outputs
@@ -40,18 +43,16 @@ The output is displayed on STDOUT and is tab-delimited with these columns:
 | ------ | ------------------------------------- |
 | 1      | combined hash                         |
 | 2      | number of duplicates found            |
-| 3      | total size of all duplicates          |
-| 4      | size of each duplicate                |
-| 5      | ";"-separated list of duplicates      |
-| 6      | duplicate files                       |
+| 3      | total size of all duplicates (human readable)          |
+| 4      | size of each duplicate (human readable)               |
+| 5      | original file      |
+| 6      | ";"-separated list of duplicates files                       |
 
 Here is an example output line:
 
 ```bash
-ca269c980de3f0d8e6668b88d9065c8f#5003f92f52d71437741e4e79c4339a66       3       21.99 GiB       7.33 GiB        "/data/CCBR/ccbr754_Yoshimi/ccbr754/workdir_170403_postinitialrnas
-eq2/0h_1_S25.p2.Aligned.toTranscriptome.sorted.bam";"/data/CCBR/ccbr754_Yoshimi/ccbr754targz/data/CCBR/projects/ccbr754/workdir_170403_postinitialrnaseq2/0h_1_S25.p2.Aligned.toTr
-anscriptome.sorted.bam";"/data/CCBR/ccbr754_Yoshimi/ccbr754targz/data/CCBR/projects/ccbr754/workdir_170403_postinitialrnaseq2/0h_1_S25.p2.Aligned.toTranscriptome.sorted.sorted.ba
-m"
+183e9dc341073d9b75c817f5ed07b9ac#183e9dc341073d9b75c817f5ed07b9ac	5	0.07 KiB	0.01 KiB	"/data/CCBR/abdelmaksoudaa/test/a"	"/data/CCBR/abdelmaksoudaa/test/b";"/data/CCBR/abde
+lmaksoudaa/test/c";"/data/CCBR/abdelmaksoudaa/test/d";"/data/CCBR/abdelmaksoudaa/test/e";"/data/CCBR/abdelmaksoudaa/test/f"
 ```
 
 > `spacesavers2_grubbers` is typical used to find the "low-hanging" fruits ... aka ... the "high-value" duplicates which need to be deleted first to quickly have the biggest impact on the users overall digital footprint.
\ No newline at end of file
diff --git a/docs/mimeo.md b/docs/mimeo.md
index 215ae48..2336721 100644
--- a/docs/mimeo.md
+++ b/docs/mimeo.md
@@ -1,13 +1,13 @@
 ## spacesavers2_mimeo
 
-This takes in the `ls_out` generated by `spacesavers2_catalog` and processes it to:
+This takes in the `catalog` file generated by `spacesavers2_catalog` and processes it to:
 
 - find duplicates
 - create per-user summary reports for each user (and all users).
 
 ### Inputs
 
-- `--lsout` is the output file from  `spacesavers2_catalog`. Thus, `spacesavers2_catalog` needs to be run before running `spacesavers2_mimeo`.
+- `--catalog` is the output file from  `spacesavers2_catalog`. Thus, `spacesavers2_catalog` needs to be run before running `spacesavers2_mimeo`.
 - `--maxdepth` maximum folder depth upto which reports are aggregated
 - `--outdir` path to the output folder
 - `--prefix` prefix to be added to the output file names eg. date etc.
@@ -16,17 +16,16 @@ This takes in the `ls_out` generated by `spacesavers2_catalog` and processes it
 
 ```bash
 % spacesavers2_mimeo --help
-spacesavers2_mimeo:00000.02s:version: v0.5
-usage: spacesavers2_mimeo [-h] -f LSOUT [-d MAXDEPTH] [-o OUTDIR] [-p PREFIX] [-q QUOTA] [-z | --duplicatesonly | --no-duplicatesonly]
+usage: spacesavers2_mimeo [-h] -f CATALOG [-d MAXDEPTH] [-o OUTDIR] [-p PREFIX] [-q QUOTA] [-z | --duplicatesonly | --no-duplicatesonly] [-k | --kronaplot | --no-kronaplot] [-v]
 
 spacesavers2_mimeo: find duplicates
 
 options:
   -h, --help            show this help message and exit
-  -f LSOUT, --catalog LSOUT
+  -f CATALOG, --catalog CATALOG
                         spacesavers2_catalog output from STDIN or from catalog file
   -d MAXDEPTH, --maxdepth MAXDEPTH
-                        folder max. depth upto which reports are aggregated
+                        folder max. depth upto which reports are aggregated ... absolute path is used to calculate depth (Default: 10)
   -o OUTDIR, --outdir OUTDIR
                         output folder
   -p PREFIX, --prefix PREFIX
@@ -35,16 +34,21 @@ options:
                         total quota of the mount eg. 200 TB for /data/CCBR
   -z, --duplicatesonly, --no-duplicatesonly
                         Print only duplicates to per user output file.
+  -k, --kronaplot, --no-kronaplot
+                        Make kronaplots for duplicates.(ktImportText must be in PATH!)
+  -v, --version         show program's version number and exit
 
 Version:
-    v0.5
+    v0.10.2-dev
 Example:
-    > spacesavers2_mimeo -f /output/from/spacesavers2_catalog -o /path/to/output/folder -d 7 -q 10
+    > spacesavers2_mimeo -f /output/from/spacesavers2_catalog -o /path/to/output/folder -d 7 -q 10 -k
 ```
 
 ### Outputs
 
-After completion of run, `spacesavers2_mimeo` creates `.files.gz` (list of duplicate files) and `.summary.txt` (overall stats at various depths) files in the provided output folder. Here are the details:
+After completion of run, `spacesavers2_mimeo` creates `*.mimeo.files.gz` (list of files per user + one "allusers" file) and `.summary.txt` (overall stats at various depths) files in the provided output folder. if `-k` is provided (and ktImportText from [kronatools](https://github.com/marbl/Krona/wiki/KronaTools) is in PATH) then krona specific TSV and HTML pages are also generated. It also generates a `blamematrix.tsv` file with folders on rows and users on columns with duplicate bytes per-folder-per-user. This file can be used to create a "heatmap" to pinpoint folder with highest duplicates overall as well as on a per-user basis.
+
+Here are the details:
 
 #### Duplicates
 
@@ -54,7 +58,7 @@ After completion of run, `spacesavers2_mimeo` creates `.files.gz` (list of dupli
 - Check if each bin has unique sized files. If a bin has more than 1 size, then it needs to be binned further. Sometimes, xxHash of top and bottom chunks also gives the same combination of hash for differing files. These files will have different sizes. Hence, re-bin them accordingly.
 - If same size, then check inodes. If all files in the same bin have the same inode, then these are just hard-links. But, if there are multiple inodes, then we have **duplicates**!
 - If we have duplicates, then `spacesavers2_mimeo` keeps track of number of duplicates per bin. Number of duplicates is equal to number of inodes in each bin minus one.
-- If we have duplicates, then the oldest find is identified and considered to be the original file. All other files are marked _duplicate_, irrespective of user id.
+- If we have duplicates, then the oldest file is identified and considered to be the original file. All other files are marked _duplicate_, irrespective of user id.
 - duplicate files are reported in gzip format with the following columns for all users and per-user basis
 
 Here is what the `.files.gz` file columns (space-separated) represent:
@@ -63,17 +67,19 @@ Here is what the `.files.gz` file columns (space-separated) represent:
 | ------ | ------------------------------------------------ |
 | 1      | top chunk and bottom chunk hashes separated by "#" |
 | 2      | separator ":"                                    |
-| 3      | Number of duplicates                             |
+| 3      | Number of duplicates files (not duplicate inodes)                            |
 | 4      | Size of each file                                |
 | 5      | List of users duplicates serapated by "##"       |
 
-Each file in the last column above is ":" separated with the same 13 items as described in the `ls_out` file. The only difference is that the user id and group id are now replaced by user name and group name.
+> NOTE: Number of dupicate files can be greater than number of duplicate inodes as each file can have multiple hard links already. Hence, while calculating total duplicate bytes we use (total_number_of_unique_inodes_per_group_of_duplicate_files - 1) X size_of_each_file. The "minus 1" is to not count the size of the original file.
+
+Each file in the last column above is ";" separated with the same 13 items as described in the `catalog` file. The only difference is that the username and groupame are now appended to each file entry.
 
-Along with creating one `.files.gz` and `.summary.txt` file per user encountered, `spacesavers2_mimeo` also generates a `allusers.files.gz` file for all users combined. This file is later used by `spacesavers2_blamematrix` as input.
+Along with creating one `.mimeo.files.gz` and `.mimeo.summary.txt` file per user encountered, `spacesavers2_mimeo` also generates a `allusers.mimeo.files.gz` file for all users combined. This file is later used by `spacesavers2_blamematrix` as input.
 
 #### Summaries
 
-Summaries, files ending with `.summary.txt` are collected and reported for all users (`allusers.summary.txt`) and per-user (`USERNAME.summary.txt`) basis for user-defined depth (and beyond). The columns (tab-delimited) in the summary file:
+Summaries, files ending with `.mimeo.summary.txt` are collected and reported for all users (`allusers.mimeo.summary.txt`) and per-user (`USERNAME.mimeo.summary.txt`) basis for user-defined depth (and beyond). The columns (tab-delimited) in the summary file:
 
 | Column | Description                           |
 | ------ | ------------------------------------- |
@@ -93,3 +99,13 @@ Summaries, files ending with `.summary.txt` are collected and reported for all u
 
 For columns 10 through 13, the same logic is used as [spacesavers](https://ccbr.github.io/spacesavers/usage/df/).
 
+#### KronaTSV and KronaHTML
+
+- KronaTSV is tab-delimited with first column showing the number of duplicate bytes and every subsequent column giving the folder depths.
+- ktImportText is then used to convert the KronaTSV to KronaHTML which can be shared easily and only needs a HTML5 supporting browser for viewing.
+
+#### Blamematrix
+
+- rows are folders as 1 level deeper than the "mindepth"
+- columns are all individual usernames, plus an "allusers" column
+- only duplicate-bytes are reported
\ No newline at end of file
diff --git a/docs/usurp.md b/docs/usurp.md
index 3580055..708c6ad 100644
--- a/docs/usurp.md
+++ b/docs/usurp.md
@@ -19,10 +19,10 @@ The GRUBBER file has the following columns:
 | ------ | ------------------------------------- |
 | 1      | combined hash                         |
 | 2      | number of duplicates found            |
-| 3      | total size of all duplicates          |
-| 4      | size of each duplicate                |
-| 5      | ";"-separated list of duplicates      |
-| 6      | duplicate files                       |
+| 3      | total size of all duplicates (human readable)          |
+| 4      | size of each duplicate (human readable)               |
+| 5      | original file      |
+| 6      | ";"-separated list of duplicates files                       |
 
 ```bash
 usage: spacesavers2_usurp [-h] -g GRUBBER -x HASH [-f | --force | --no-force]
diff --git a/mkdocs.yml b/mkdocs.yml
index 3f8197e..3929983 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -102,6 +102,5 @@ nav:
       - catalog: catalog.md
       - mimeo: mimeo.md
       - grubbers: grubbers.md
-      - blamematrix: blamematrix.md
       - usurp: usurp.md
       - e2e: e2e.md
diff --git a/spacesavers2_blamematrix b/spacesavers2_blamematrix
deleted file mode 100755
index 20cf569..0000000
--- a/spacesavers2_blamematrix
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env python3
-import sys
-import os
-import gzip
-import textwrap
-import time
-
-from src.VersionCheck import version_check
-from src.VersionCheck import __version__
-
-version_check()
-
-# from src.FileDetails import FileDetails
-from src.dfUnit import fgzblamer
-
-# from src.Summary import Summary
-from src.utils import *
-from datetime import date
-
-import argparse
-
-
-def main():
-    start = time.time()
-    scriptname = os.path.basename(__file__)
-    elog = textwrap.dedent(
-        """\
-    Version:
-        {}
-    Example:
-        > spacesavers2_blamematrix -f /output/from/spacesavers2_finddup/prefix.allusers.files.gz -d 3
-        """.format(
-            __version__
-        )
-    )
-    parser = argparse.ArgumentParser(
-        description="spacesavers2_blamematrix: get per user duplicate sizes at a given folder level  (default 3)",
-        epilog=elog,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-
-    parser.add_argument(
-        "-f",
-        "--filesgz",
-        dest="filesgz",
-        required=True,
-        type=str,
-        default=sys.stdin,
-        help="spacesavers2_mimeo prefix.allusers.files.gz file",
-    )
-    parser.add_argument(
-        "-l",
-        "--level",
-        dest="level",
-        required=False,
-        type=int,
-        default=3,
-        help="folder level to use for creating matrix",
-    )
-    parser.add_argument("-v", "--version", action="version", version=__version__)
-
-    print_with_timestamp(
-        start=start, scriptname=scriptname, string="version: {}".format(__version__)
-    )
-
-    global args
-    args = parser.parse_args()
-
-    blamematrix = dict()
-    blamematrix["allusers"] = dict()
-    with gzip.open(os.path.join(args.filesgz), "rt") as filesgz:
-        for l in filesgz:
-            dfu = fgzblamer()
-            properly_set = dfu.set(l, args.level)
-            if not properly_set:
-                continue
-            for user in dfu.users:
-                if not user in blamematrix:
-                    blamematrix[user] = dict()
-                for folder in dfu.bm[user]:
-                    if not folder in blamematrix[user]:
-                        blamematrix[user][folder] = 0
-                    if not folder in blamematrix["allusers"]:
-                        blamematrix["allusers"][folder] = 0
-                    blamematrix[user][folder] += dfu.bm[user][folder]
-                    blamematrix["allusers"][folder] += dfu.bm[user][folder]
-
-    users = list(blamematrix.keys())
-    folders = list(blamematrix["allusers"].keys())
-    users2 = ["folder"]
-    users2.extend(users)
-    print("\t".join(users2))
-    for folder in folders:
-        print(folder, end="")
-        for user in users:
-            try:
-                hrsize = get_human_readable_size(blamematrix[user][folder])
-            except KeyError:
-                hrsize = "0"
-            print("\t{}".format(hrsize), end="")
-        print("")
-    print_with_timestamp(start=start, scriptname=scriptname, string="Done!")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/spacesavers2_catalog b/spacesavers2_catalog
index 5c98749..30b0d35 100755
--- a/spacesavers2_catalog
+++ b/spacesavers2_catalog
@@ -17,19 +17,17 @@ from pathlib import Path
 
 
 def task(f):
-    if not os.path.isfile(f):
-        return ""
-    else:
-        fd = FileDetails()
-        fd.initialize(
-            f,
-            buffersize=args.buffersize,
-            thresholdsize=args.ignoreheadersize,
-            tb=args.buffersize,
-            sed=sed,
-            bottomhash=args.bottomhash,
-        )
-        return "%s" % (fd)
+    fd = FileDetails()
+    fd.initialize(
+        f,
+        buffersize=args.buffersize,
+        thresholdsize=args.ignoreheadersize,
+        tb=args.buffersize,
+        sed=sed,
+        bottomhash=args.bottomhash,
+        st_block_byte_size=args.st_block_byte_size,
+    )
+    return "%s" % (fd)
 
 
 def main():
@@ -63,7 +61,7 @@ def main():
         required=False,
         type=int,
         default=4,
-        help="number of threads to be used",
+        help="number of threads to be used (default 4)",
     )
     parser.add_argument(
         "-b",
@@ -72,7 +70,7 @@ def main():
         required=False,
         type=int,
         default=128 * 1024,
-        help="buffersize for xhash creation",
+        help="buffersize for xhash creation (default=128 * 1028 bytes)",
     )
     parser.add_argument(
         "-i",
@@ -81,16 +79,25 @@ def main():
         required=False,
         type=int,
         default=1024 * 1024 * 1024,
-        help="this sized header of the file is ignored before extracting buffer of buffersize for xhash creation (only for special extensions files)",
+        help="this sized header of the file is ignored before extracting buffer of buffersize for xhash creation (only for special extensions files) default = 1024 * 1024 * 1024 bytes",
     )
     parser.add_argument(
-        "-s",
+        "-x",
         "--se",
         dest="se",
         required=False,
         type=str,
         default="bam,bai,bigwig,bw,csi",
-        help="comma separated list of special extensions",
+        help="comma separated list of special extensions (default=bam,bai,bigwig,bw,csi)",
+    )
+    parser.add_argument(
+        "-s",
+        "--st_block_byte_size",
+        dest="st_block_byte_size",
+        required=False,
+        default=512,
+        type=int,
+        help="st_block_byte_size on current filesystem (default 512)",
     )
     parser.add_argument(
         "-o",
@@ -120,7 +127,9 @@ def main():
 
     folder = args.folder
     p = Path(folder)
-    files = p.glob("**/*")
+    files = [p]
+    files2 = p.glob("**/*")
+    files.extend(files2)
 
     if args.outfile:
         outfh = open(args.outfile, "w")
diff --git a/spacesavers2_e2e b/spacesavers2_e2e
index 5fe2a3e..57d2bd2 100755
--- a/spacesavers2_e2e
+++ b/spacesavers2_e2e
@@ -11,10 +11,12 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 ARGPARSE_DESCRIPTION="End-to-end run of spacesavers2"
 source ${SCRIPT_DIR}/resources/argparse.bash || exit 1
 argparse "$@" <<EOF || exit 1
-parser.add_argument('-i','--infolder',required=True, help='Folder to run spacesavers_catalog on.')
+parser.add_argument('-f','--folder',required=True, help='Folder to run spacesavers_catalog on.')
 parser.add_argument('-p','--threads',required=False, help='number of threads to use', default=4)
+parser.add_argument('-d','--maxdepth',required=False, help='maxdepth for mimeo', default=4)
+parser.add_argument('-l','--limit',required=False, help='limit for running spacesavers_grubbers', default=5)
 parser.add_argument('-q','--quota',required=False, help='total size of the volume (default = 200 for /data/CCBR)', default=200)
-parser.add_argument('-o','--outfolder',required=True, help='Folder where all spacesavers_mimeo output files will be saved')
+parser.add_argument('-o','--outfolder',required=True, help='Folder where all spacesavers_e2e output files will be saved')
 EOF
 
 # assuming that python 3.11 is available with xxhash module
@@ -28,31 +30,62 @@ fi
 
 # run spacesavers2
 dt=$(date +%Y%m%d)
-prefix=$(echo "${dt}.${INFOLDER}"|sed "s/\//_/g")
-outfile_catalog="${prefix}.catalog"
-outfile_catalog_err="${prefix}.catalog_err"
-outfile_mimeo_log="${prefix}.mimeo_log"
-outfile_mimeo_err="${prefix}.mimeo_err"
+prefix=$(echo "${dt}.${FOLDER}"|sed "s/\//_/g")
+outfile_catalog="${OUTFOLDER}/${prefix}.catalog"
+outfile_catalog_err="${OUTFOLDER}/${prefix}.catalog.err"
+outfile_catalog_log="${OUTFOLDER}/${prefix}.catalog.log"
+outfile_mimeo_log="${OUTFOLDER}/${prefix}.mimeo.log"
+outfile_mimeo_err="${OUTFOLDER}/${prefix}.mimeo.err"
+
+outfile_blamematrix="${OUTFOLDER}/${prefix}.blamematrix.tsv"
+outfile_blamematrix_log="${OUTFOLDER}/${prefix}.blamematrix.log"
+outfile_blamematrix_err="${OUTFOLDER}/${prefix}.blamematrix.err"
 
 if [ ! -d $OUTFOLDER ];then mkdir -p $OUTFOLDER;fi
+
+# spacesavers2_catalog
 if [ "$?" == "0" ];then
 echo "Running spacesavers2_catalog" && \
-spacesavers2_catalog -f $INFOLDER -p $THREADS -e > ${OUTFOLDER}/${outfile_catalog} 2> ${OUTFOLDER}/${outfile_catalog_err}
+spacesavers2_catalog \
+    --folder $FOLDER \
+    --threads $THREADS \
+    --outfile ${outfile_catalog} \
+    --bottomhash \
+    > ${outfile_catalog_log} 2> ${outfile_catalog_err}
 fi
+
+sleep 60
+
+# spacesavers2_mimeo
 if [ "$?" == "0" ];then
-echo "Running spacesavers2_mimeo" && \
-spacesavers2_mimeo -f ${OUTFOLDER}/${outfile_catalog} -o ${OUTFOLDER} -q $QUOTA -z -d 3 -p $prefix > ${OUTFOLDER}/${outfile_mimeo_log} 2> ${OUTFOLDER}/${outfile_mimeo_err}
+echo "Running spacesavers2_mimeo" 
+command -V ktImportText 2>/dev/null || module load kronatools || (>&2 echo "module kronatools could not be loaded")
+spacesavers2_mimeo \
+    --catalog ${outfile_catalog} \
+    --outdir ${OUTFOLDER} \
+    --quota $QUOTA \
+    --duplicatesonly \
+    --maxdepth $MAXDEPTH \
+    --p $prefix \
+    --kronaplot \
+    > ${outfile_mimeo_log} 2> ${outfile_mimeo_err}
 fi
+
+sleep 60
+
+# spacesavers2_grubbers
 if [ "$?" == "0" ];then
 echo "Running spacesavers2_grubbers" && \
-for f in `ls ${OUTFOLDER}/${prefix}*files.gz`;do
-    outfile=`echo $f|sed "s/files.gz/grubbers.tsv/g"`
-    errfile=`echo $f|sed "s/files.gz/grubbers.err/g"`
-    spacesavers2_grubbers -f $f > $outfile 2> $errfile
+for filegz in `ls ${OUTFOLDER}/${prefix}*files.gz`;do
+    outfile=`echo $filegz|sed "s/mimeo.files.gz/grubbers.tsv/g"`
+    logfile=`echo $filegz|sed "s/mimeo.files.gz/grubbers.log/g"`
+    errfile=`echo $filegz|sed "s/mimeo.files.gz/grubbers.err/g"`
+    spacesavers2_grubbers \
+        --filesgz $filegz \
+        --limit $LIMIT \
+        --outfile $outfile \
+        > $logfile 2> $errfile
 done
 fi
-if [ "$?" == "0" ];then
-echo "Running spacesavers2_blamematrix" && \
-spacesavers2_blamematrix -f ${OUTFOLDER}/${prefix}.allusers.files.gz > ${OUTFOLDER}/${prefix}.blamematrix.tsv 2> ${OUTFOLDER}/${prefix}.blamematrix.err
-fi
-echo "Done!"
+
+echo "Done!"
\ No newline at end of file
diff --git a/spacesavers2_grubbers b/spacesavers2_grubbers
index 77a4709..adc2c59 100755
--- a/spacesavers2_grubbers
+++ b/spacesavers2_grubbers
@@ -46,16 +46,24 @@ def main():
         required=True,
         type=str,
         default=sys.stdin,
-        help="spacesavers2_mimeo prefix.<user>.files.gz file",
+        help="spacesavers2_mimeo prefix.<user>.mimeo.files.gz file",
     )
     parser.add_argument(
         "-l",
         "--limit",
         dest="limit",
         required=False,
-        type=int,
+        type=float,
         default=5,
-        help="stop showing duplicates with total size smaller then (5 default) GiB",
+        help="stop showing duplicates with total size smaller than (5 default) GiB. Set 0 for unlimited.",
+    )
+    parser.add_argument(
+        "-o",
+        "--outfile",
+        dest="outfile",
+        required=False,
+        type=str,
+        help="output tab-delimited file (default STDOUT)",
     )
     parser.add_argument("-v", "--version", action="version", version=__version__)
     print_with_timestamp(
@@ -70,24 +78,33 @@ def main():
         for l in filesgz:
             dfu = fgz()
             properly_set = dfu.set(l)
-            if not properly_set:
+            if not properly_set: # could not read line properly or there are no duplicates
                 continue
+            if dfu.ndup == 0: continue # in case mimeo was run without -z
             dups.append(dfu)
 
-    dups.sort()
+    dups.sort() # look at __lt__ ... its sorting from highest to lowest totalsize
     saved = 0
     top_limit = args.limit * 1024 * 1024 * 1024  # 5 GiB
+    if args.outfile:
+        of = open(args.outfile, "w")
+    else:
+        of = sys.stdout
+
     for fgitem in dups:
-        saved += fgitem.totalsize
-        if fgitem.totalsize < top_limit:
+        if fgitem.totalsize <= top_limit:
             break
-        print(fgitem)
+        saved += fgitem.totalsize
+        of.write("%s\n"%(fgitem))
+
+    if args.outfile:
+        of.close()
 
-    saved = get_human_readable_size(saved)
+    hrsaved = get_human_readable_size(saved)
     print_with_timestamp(
         start=start,
         scriptname=scriptname,
-        string="Deleting top grubbers will save {}!".format(saved),
+        string="Deleting top grubbers will save {} [ {} Bytes ] !".format(hrsaved,saved),
     )
     print_with_timestamp(start=start, scriptname=scriptname, string="Done!")
 
diff --git a/spacesavers2_mimeo b/spacesavers2_mimeo
index 9212e41..597f0d5 100755
--- a/spacesavers2_mimeo
+++ b/spacesavers2_mimeo
@@ -4,6 +4,8 @@ import os
 import gzip
 import textwrap
 import time
+import shutil
+import subprocess
 
 MINDEPTH = 3
 QUOTA_TB = 20
@@ -16,12 +18,12 @@ version_check()
 from src.FileDetails import FileDetails
 from src.dfUnit import dfUnit
 from src.Summary import Summary
+from src.Summary import pathlen
 from src.utils import *
 from datetime import date
 
 import argparse
 
-
 def process_hh(
     uid,
     hashhash,
@@ -30,57 +32,64 @@ def process_hh(
     maxdepth,
     uid2uname,
     gid2gname,
-    peruser_perfolder_summaries,
+    perfolder_summaries,
+    perfolder_dups,
     user_output,
 ):
     for h in hashhash.keys():
-        split_required = False
-        hashhash[h].compute(
-            hashhashsplits, split_required
+        # if files have the same forward and reverse hashes but different sizes then 
+        # hashes are split into multiple hashes with <underscore><splitnumber> suffix
+        # being added to the bottom hash for each size
+        split_required = hashhash[h].compute(
+            hashhashsplits
         )  # compute if split is needed or if we have duplicates
         if split_required:
-            continue  # split is required so move on to the next hash
+            continue  # split is required so move on to the next hash as new hashes with <underscore><splitnumber> have been created by compute and added to hashhashsplits ... deal with them there!
+        # get indexes to files in the flist that belong to user with uid
+        # if uid is zero, then get all file indexes
         uid_file_index = hashhash[h].get_user_file_index(uid)
-        if len(uid_file_index) == 0:
+        if len(uid_file_index) == 0: # user with uid has no files in this set
             continue
-        uid_dup_file_index = []
-        if hashhash[h].ndup > 1:
+        oldest_index = hashhash[h].oldest_index
+        foldest = hashhash[h].flist[oldest_index]
+        user_owns_original = False
+        if foldest.uid == uid or 0 == uid : user_owns_original = True
+        uid_file_index = list(filter(lambda x:x!=oldest_index,uid_file_index)) # remove oldest if present in list
+        inodes_already_summerized = [foldest.inode]
+        if hashhash[h].ndup_files > 0: # we have duplicates
             for i in uid_file_index:
                 f = hashhash[h].flist[i]
+                fpath = f.apath
+                parent = fpath.parent
                 fpaths = f.get_paths(mindepth, maxdepth)
-                if (
-                    i == hashhash[h].oldest_index
-                ):  # its the original file ... not a duplicate
+                if f.inode in inodes_already_summerized: # it is a hardlink
                     for p in fpaths:
-                        peruser_perfolder_summaries[p].non_dup_Bytes.append(f.size)
-                        peruser_perfolder_summaries[p].non_dup_ages.append(f.mtime)
+                        perfolder_summaries[p].ndup_files += 1
                 else:
-                    uid_dup_file_index.append(i)
+                    inodes_already_summerized.append(f.inode)
+                    if not parent in perfolder_dups:
+                        perfolder_dups[fpath.parent] = 0
+                    perfolder_dups[fpath.parent] += f.calculated_size
                     for p in fpaths:
-                        peruser_perfolder_summaries[p].dup_Bytes.append(f.size)
-                        peruser_perfolder_summaries[p].dup_ages.append(f.mtime)
-        else:  # ndup == 1 .. meaning there are no duplicates .. just one file
-            for i in uid_file_index:
-                f = hashhash[h].flist[i]
-                fpaths = f.get_paths(mindepth, maxdepth)
+                        perfolder_summaries[p].ndup_files+=1
+                        perfolder_summaries[p].dup_Bytes.append(f.calculated_size)
+                        perfolder_summaries[p].dup_ages.append(f.mtime)
+        else: # we only have 1 original file
+            if user_owns_original:
+                fpaths = foldest.get_paths(mindepth, maxdepth)
                 for p in fpaths:
-                    peruser_perfolder_summaries[p].non_dup_Bytes.append(f.size)
-                    peruser_perfolder_summaries[p].non_dup_ages.append(f.mtime)
-        if args.duplicatesonly:
-            if len(uid_dup_file_index) > 0:
-                user_output.write(
-                    "{}\n".format(
-                        hashhash[h].str_with_name(
-                            uid2uname, gid2gname, uid_dup_file_index
-                        )
-                    )
-                )
-        else:
-            user_output.write(
-                "{}\n".format(
-                    hashhash[h].str_with_name(uid2uname, gid2gname, uid_file_index)
-                )
+                    perfolder_summaries[p].nnondup_files += 1
+                    perfolder_summaries[p].non_dup_Bytes.append(foldest.calculated_size)
+                    perfolder_summaries[p].non_dup_ages.append(foldest.mtime)
+        out_index = []
+        out_index.append(oldest_index)
+        out_index.extend(uid_file_index)
+        if args.duplicatesonly and len(out_index)==1: continue
+        user_output.write(
+            "{}\n".format(
+                hashhash[h].str_with_name(uid2uname, gid2gname, out_index)
             )
+        )
 
 
 def main():
@@ -105,7 +114,7 @@ def main():
     parser.add_argument(
         "-f",
         "--catalog",
-        dest="lsout",
+        dest="catalog",
         required=True,
         type=str,
         default=sys.stdin,
@@ -119,7 +128,7 @@ def main():
         required=False,
         type=int,
         default=10,
-        help="folder max. depth upto which reports are aggregated",
+        help="folder max. depth upto which reports are aggregated ... absolute path is used to calculate depth (Default: 10)",
     )
 
     parser.add_argument(
@@ -160,6 +169,16 @@ def main():
         action=argparse.BooleanOptionalAction,
         help="Print only duplicates to per user output file.",
     )
+    
+    parser.add_argument(
+        "-k",
+        "--kronaplot",
+        dest="kronaplot",
+        required=False,
+        action=argparse.BooleanOptionalAction,
+        help="Make kronaplots for duplicates.(ktImportText must be in PATH!)",
+    )
+
     parser.add_argument("-v", "--version", action="version", version=__version__)
 
     print_with_timestamp(
@@ -170,31 +189,43 @@ def main():
     args = parser.parse_args()
     quota = args.quota * 1024 * 1024 * 1024 * 1024
 
+    if args.kronaplot:
+        ktImportText_in_path = False
+        if shutil.which("ktImportText") == None:
+            sys.stderr.write("ktImportText(from kronaTools) not found in PATH. kronaplots will not be generated.\n")
+        else:
+            ktImportText_in_path = True
+
     uid2uname = dict()
     gid2gname = dict()
     hashhash = dict()
-    users = set()  # list of all users found
+    users = set()  # list of all uids found
     users.add(0)  # 0 == all users
-    groups = set()  # list of groups
-    paths = set()
-    path_lens = []
+    groups = set()  # list of gids
+    paths = set() # set of all paths possible 
+    path_lens = set() # set of all path depths
     print_with_timestamp(
         start=start, scriptname=scriptname, string="Reading in catalog file..."
     )
     set_complete = True
-    with open(args.lsout) as lsout:
-        for l in lsout:
+    folder_info = dict()
+    with open(args.catalog) as catalog:
+        for l in catalog:
             fd = FileDetails()
             set_complete = fd.set(l)
             if not set_complete:
                 continue
-            if fd.issyml:
+            if fd.fld != "d" and fd.fld !="f": # not a file or folder
                 continue  # ignore all symlinks
             users.add(fd.uid)
             groups.add(fd.gid)
-            path_lens.append(get_file_depth(fd.apath))
+            path_lens.add(fd.get_depth())
             for p in fd.get_paths_at_all_depths():
                 paths.add(p)
+            if fd.fld == "d":
+                if not fd.apath in folder_info:
+                    folder_info[fd.apath] = fd
+                continue
             hash = fd.xhash_top + "#" + fd.xhash_bottom
             if hash == "#":  # happens when file cannot be read
                 sys.stderr.write(
@@ -245,8 +276,13 @@ def main():
         scriptname=scriptname,
         string="Total Number of users: %d" % len(users),
     )
-
+    blamematrixtsv = os.path.join(
+        os.path.abspath(args.outdir), args.prefix + "." + "blamematrix.tsv"
+    )
+    blamematrix = dict()
+    all_blamematrix_paths = set()
     for uid in users:
+        blamematrix[uid] = dict()
         print_with_timestamp(
             start=start,
             scriptname=scriptname,
@@ -258,11 +294,19 @@ def main():
             outfilenameprefix = get_username_groupname(uid)
 
         summaryfilepath = os.path.join(
-            os.path.abspath(args.outdir), outfilenameprefix + ".summary.txt"
+            os.path.abspath(args.outdir), outfilenameprefix + ".mimeo.summary.txt"
         )
         useroutputpath = os.path.join(
-            os.path.abspath(args.outdir), outfilenameprefix + ".files.gz"
+            os.path.abspath(args.outdir), outfilenameprefix + ".mimeo.files.gz"
         )
+        if args.kronaplot:
+            kronatsv = os.path.join(
+                os.path.abspath(args.outdir), outfilenameprefix + ".mimeo.krona.tsv"
+            )
+            if ktImportText_in_path:
+                kronahtml = os.path.join(
+                    os.path.abspath(args.outdir), outfilenameprefix + ".mimeo.krona.html"
+                )
 
         with open(summaryfilepath, "w") as user_summary:
             user_summary.write("%s\n" % (Summary.HEADER))
@@ -270,12 +314,23 @@ def main():
         with gzip.open(useroutputpath, "wt") as user_output, open(
             summaryfilepath, "a"
         ) as user_summary:
-            peruser_perfolder_summaries = dict()
+            perfolder_summaries = dict()
+            perfolder_dups = dict()
             for p in paths:
-                peruser_perfolder_summaries[p] = Summary(p)
-            hashhashsplits = (
-                dict()
-            )  # dict to collect instances where the files are NOT duplicates has same hashes but different sizes (and different inodes) ... new suffix is added to bottomhash .."_iterator"
+                perfolder_summaries[p] = Summary(p)
+                if not p in folder_info:
+                    folder_info[p] = FileDetails()
+                    folder_info[p].initialize(p)
+                fd = folder_info[p]
+                for p2 in fd.get_paths(mindepth,maxdepth):
+                    if not p2 in folder_info:
+                        folder_info[p2] = FileDetails()
+                        folder_info[p2].initialize(p2)
+                    fd2 = folder_info[p2]
+                    if fd2.uid == uid or uid == 0:
+                        perfolder_summaries[p2].folder_Bytes += fd.calculated_size
+
+            hashhashsplits = dict()  # dict to collect instances where the files are NOT duplicates has same hashes but different sizes (and different inodes) ... new suffix is added to bottomhash .."_iterator"
             process_hh(
                 uid,
                 hashhash,
@@ -284,30 +339,81 @@ def main():
                 maxdepth,
                 uid2uname,
                 gid2gname,
-                peruser_perfolder_summaries,
-                user_output,
-            )
-            hashhashsplitsdummy = dict()
-            process_hh(
-                uid,
-                hashhashsplits,
-                hashhashsplitsdummy,
-                mindepth,
-                maxdepth,
-                uid2uname,
-                gid2gname,
-                peruser_perfolder_summaries,
+                perfolder_summaries,
+                perfolder_dups,
                 user_output,
             )
+            if len(hashhashsplits) != 0: 
+                hashhashsplitsdummy = dict()
+                process_hh(
+                    uid,
+                    hashhashsplits,
+                    hashhashsplitsdummy,
+                    mindepth,
+                    maxdepth,
+                    uid2uname,
+                    gid2gname,
+                    perfolder_summaries,
+                    perfolder_dups,
+                    user_output,
+                )
+                del hashhashsplitsdummy
+            del hashhashsplits
             for p in paths:
-                peruser_perfolder_summaries[p].update_scores(quota)
-                user_summary.write(f"{peruser_perfolder_summaries[p]}\n")
+                perfolder_summaries[p].update_scores(quota)
+                user_summary.write(f"{perfolder_summaries[p]}\n")
+            for p in perfolder_summaries:
+                dummy = FileDetails()
+                dummy.initialize(p)
+                if dummy.get_depth() == mindepth + 1:
+                    all_blamematrix_paths.add(p)
+                    blamematrix[uid][p] = sum(perfolder_summaries[p].dup_Bytes)
+        
+        if args.kronaplot:
+            print_with_timestamp(
+                start=start,
+                scriptname=scriptname,
+                string="Creating Kronachart for user: %s" % (uid2uname[uid]),
+            )
+            with open(kronatsv,'w') as ktsv:
+                for p in perfolder_dups:
+                    path = str(p)
+                    path = path.replace('/','\t')
+                    path = path.replace('\t\t','\t')
+                    if perfolder_dups[p] != 0:
+                        ktsv.write("%d\t%s\n"%(perfolder_dups[p],path))
+            if ktImportText_in_path:
+                cmd = "ktImportText %s -o %s"%(kronatsv,kronahtml)
+                srun = subprocess.run(cmd,shell=True, capture_output=True, text=True)
+                if srun.returncode !=0:
+                    sys.stderr.write("%s\n"%(srun.stderr))    
 
     del hashhash
-    del hashhashsplits
-    del hashhashsplitsdummy
-    print_with_timestamp(start=start, scriptname=scriptname, string="Finished!")
 
+    print_with_timestamp(
+        start=start,
+        scriptname=scriptname,
+        string="Creating Blamematrix",
+    )
+    with open(blamematrixtsv,'w') as btsv:
+        outlist = ["path"]
+        uids = list(blamematrix.keys())
+        uids.sort()
+        for uid in uids:
+            outlist.append(uid2uname[uid])
+        btsv.write("\t".join(outlist)+"\n")
+        for p in all_blamematrix_paths:
+            outlist = [str(p)]
+            s = 0
+            for uid in uids:
+                if p in blamematrix[uid]:
+                    s += blamematrix[uid][p]
+                    outlist.append(str(blamematrix[uid][p]))
+                else:
+                    outlist.append(str(0))
+            if s != 0 : btsv.write("\t".join(outlist)+"\n")
+
+    print_with_timestamp(start=start, scriptname=scriptname, string="Finished!")
 
 if __name__ == "__main__":
     main()
diff --git a/spacesavers2_usurp b/spacesavers2_usurp
index baeded3..2c79c9a 100755
--- a/spacesavers2_usurp
+++ b/spacesavers2_usurp
@@ -11,10 +11,6 @@ from src.VersionCheck import __version__
 
 version_check()
 
-# from src.FileDetails import FileDetails
-from src.dfUnit import fgzblamer
-
-# from src.Summary import Summary
 from src.utils import *
 from datetime import date
 
@@ -70,9 +66,6 @@ def main():
     global args
     args = parser.parse_args()
 
-    # if args.version:
-    #     version_print()
-
     print_with_timestamp(
         start=start, scriptname=scriptname, string="version: {}".format(__version__)
     )
@@ -83,18 +76,30 @@ def main():
             l = l.strip().split("\t")
             lhash = l[0]
             if args.hash in lhash:
-                dupfiles = l[4].split(";")
-                original_copy = Path(dupfiles.pop(0).strip('"'))
+                original_copy = Path(l[4].strip('"'))
+                dupfiles = l[5].split("##")
                 print_with_timestamp(
                     start=start,
                     scriptname=scriptname,
                     string="Original copy: {}".format(original_copy),
                 )
+                if not os.access(original_copy, os.R_OK):
+                    print_with_timestamp(
+                        start=start,
+                        scriptname=scriptname,
+                        string="Original copy is not readable. Hardlinks cannot be created!",
+                    )
+                    exit(1)
+                inode_set = set()                   
                 for dup in dupfiles:
                     dup = Path(dup.strip('"'))
                     duptmp = Path(str(dup) + "." + str(uuid.uuid4()))
                     st = os.stat(dup)
-                    total_saved += st.st_size
+                    fsize = st.st_size
+                    finode = st.st_ino
+                    if not finode in inode_set:
+                        inode_set.add(finode)
+                        total_saved += fsize
                     print_with_timestamp(
                         start=start,
                         scriptname=scriptname,
@@ -110,20 +115,21 @@ def main():
                         os.remove(dup)
                         os.rename(duptmp, dup)
                     except OSError:
-                        print_with_timestamp(
-                            start=start,
-                            scriptname=scriptname,
-                            string="OSError occurred while creating hard-link. Probably trying to create a cross-device hard-link",
-                        )
                         if args.force:
                             print_with_timestamp(
                                 start=start,
                                 scriptname=scriptname,
-                                string="Creating sym-link instead!",
+                                string="Creating symlink file: {}".format(dup),
                             )
                             os.remove(dup)
                             os.symlink(original_copy, dup)
-                break
+                        else:
+                            print_with_timestamp(
+                                start=start,
+                                scriptname=scriptname,
+                                string="OSError occurred while creating hard-link. Probably trying to create a cross-device hard-link. Try using --force to create symlink instead.",
+                            )
+                # break
     total_saved_human_readable = get_human_readable_size(total_saved)
     print_with_timestamp(
         start=start,
diff --git a/src/FileDetails.py b/src/FileDetails.py
index 90d786d..ce7a4db 100644
--- a/src/FileDetails.py
+++ b/src/FileDetails.py
@@ -9,8 +9,8 @@
 except ImportError:
     exit(f"{sys.argv[0]} requires xxhash module")
 
-THRESHOLDSIZE = 1024 * 1024 * 1024
-BUFFERSIZE  = 128 * 1024
+THRESHOLDSIZE = 1024 * 1024 * 1024  # 1 MiB
+BUFFERSIZE  = 128 * 1024            # 128 KiB
 TB = THRESHOLDSIZE+BUFFERSIZE
 SEED = 20230502
 MINDEPTH = 3
@@ -21,14 +21,33 @@
     SED[se]=1
 
 def convert_time_to_age(t):
-    currenttime=int(time.time())
-    return int((currenttime - t)/86400)+1 
+    currenttime = int(time.time())
+    age = int((currenttime - t)/86400)+1
+    if age < 0: age = 0
+    return age
+
+def get_type(p):
+    x = "u" # unknown
+    if not p.exists():
+        x = "a" # absent
+        return x
+    if p.is_symlink():
+        x = "l" # link or symlink
+        return x
+    if p.is_dir():
+        x = "d" # directory
+        return x
+    if p.is_file():
+        x = "f" # file
+        return x
+    return x
 
 class FileDetails:
     def __init__(self):
         self.apath 	= ""    # absolute path of file
-        self.issyml	= False
+        self.fdl	= "u"   # is it file or directory or link or unknown or absent ... values are f d l u a
         self.size 	= -1
+        self.calculated_size = -1
         self.dev 	= -1
         self.inode 	= -1
         self.nlink 	= -1
@@ -40,12 +59,13 @@ def __init__(self):
         self.xhash_top      = ""
         self.xhash_bottom   = ""
 
-    def initialize(self,f,thresholdsize=THRESHOLDSIZE, buffersize=BUFFERSIZE, tb=TB, sed=SED, bottomhash=False):
+    def initialize(self,f,thresholdsize=THRESHOLDSIZE, buffersize=BUFFERSIZE, tb=TB, sed=SED, bottomhash=False,st_block_byte_size=512):
         self.apath  = Path(f).absolute()                         # path is of type PosixPath
         ext         = self.apath.suffix
-        self.issyml	= self.apath.is_symlink()                   # is a symbolic link
-        st		    = os.stat(self.apath)                       # gather all stats
+        self.fld	= get_type(self.apath)                      # get if it is a file or directory or link or unknown or absent
+        st          = self.apath.stat(follow_symlinks=False)    # gather stat results
         self.size 	= st.st_size                                # size in bytes
+        self.calculated_size    = st.st_blocks * st_block_byte_size            # st_blocks gives number of 512 bytes blocks used
         self.dev 	= st.st_dev                                 # Device id
         self.inode 	= st.st_ino                                 # Inode
         self.nlink 	= st.st_nlink		                        # number of hardlinks
@@ -54,39 +74,40 @@ def initialize(self,f,thresholdsize=THRESHOLDSIZE, buffersize=BUFFERSIZE, tb=TB,
         self.ctime	= convert_time_to_age(st.st_ctime)          # creation time
         self.uid	= st.st_uid                                 # user id
         self.gid	= st.st_gid                                 # group id
-        try:
-            with open(self.apath,'rb') as fh:
-                if ext in sed:
-                    if self.size > tb:
-                        data = fh.read(thresholdsize)
-                        data = fh.read(buffersize)
-                        self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
-                        if bottomhash:
-                            fh.seek(-1 * buffersize,2)
+        if self.fld == "f":
+            try:
+                with open(self.apath,'rb') as fh:
+                    if ext in sed:
+                        if self.size > tb:
+                            data = fh.read(thresholdsize)
                             data = fh.read(buffersize)
-                            self.xhash_bottom = xxhash.xxh128(data,seed=SEED).hexdigest()
+                            self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
+                            if bottomhash:
+                                fh.seek(-1 * buffersize,2)
+                                data = fh.read(buffersize)
+                                self.xhash_bottom = xxhash.xxh128(data,seed=SEED).hexdigest()
+                            else:
+                                self.xhash_bottom = self.xhash_top
                         else:
+                            data = fh.read()
+                            self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
                             self.xhash_bottom = self.xhash_top
                     else:
-                        data = fh.read()
-                        self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
-                        self.xhash_bottom = self.xhash_top
-                else:
-                    if self.size > buffersize:
-                        data = fh.read(buffersize)
-                        self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
-                        if bottomhash:
-                            fh.seek(-1 * buffersize,2)
+                        if self.size > buffersize:
                             data = fh.read(buffersize)
-                            self.xhash_bottom = xxhash.xxh128(data,seed=SEED).hexdigest()
+                            self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
+                            if bottomhash:
+                                fh.seek(-1 * buffersize,2)
+                                data = fh.read(buffersize)
+                                self.xhash_bottom = xxhash.xxh128(data,seed=SEED).hexdigest()
+                            else:
+                                self.xhash_bottom = self.xhash_top
                         else:
+                            data = fh.read()
+                            self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
                             self.xhash_bottom = self.xhash_top
-                    else:
-                        data = fh.read()
-                        self.xhash_top = xxhash.xxh128(data,seed=SEED).hexdigest()
-                        self.xhash_bottom = self.xhash_top
-        except:
-            sys.stderr.write("spacesavers2:{}:File cannot be read:{}\n".format(self.__class__.__name__,str(self.apath)))
+            except:
+                sys.stderr.write("spacesavers2:{}:File cannot be read:{}\n".format(self.__class__.__name__,str(self.apath)))
 
     def set(self,ls_line):
         original_ls_line=ls_line
@@ -105,9 +126,9 @@ def set(self,ls_line):
             self.nlink      = int(ls_line.pop(-1))
             self.inode      = int(ls_line.pop(-1))
             self.dev        = int(ls_line.pop(-1))
+            self.calculated_size       = int(ls_line.pop(-1))
             self.size       = int(ls_line.pop(-1))
-            issyml = ls_line.pop(-1)
-            self.issyml     = issyml == 'True'
+            self.fld        = ls_line.pop(-1)
             self.apath      = Path(";".join(ls_line))         # sometimes filename have ";" in them ... hence this!
             return True
         except:
@@ -115,12 +136,13 @@ def set(self,ls_line):
             # exit()            
             return False
     
-    def str_with_name(self,uid2uname,gid2gname):# method for printing output in finddup ... replace "xhash_top;xhash_bottom" with "username;groupname" at the end of the string
+    def str_with_name(self,uid2uname,gid2gname):# method for printing output in mimeo ... replace "xhash_top;xhash_bottom" with "username;groupname" at the end of the string
         # return_str = "\"%s\";"%(self.apath)
         # path may have newline char which should not be interpretted as new line char
         return_str = "\"%s\";"%(str(self.apath).encode('unicode_escape').decode('utf-8'))
-        # return_str += "%s;"%(self.issyml)
+        return_str += "%s;"%(self.fld)
         return_str += "%d;"%(self.size)
+        return_str += "%d;"%(self.calculated_size)
         return_str += "%d;"%(self.dev)
         return_str += "%d;"%(self.inode)
         return_str += "%d;"%(self.nlink)
@@ -137,9 +159,10 @@ def __str__(self):
         # return_str = "\"%s\";"%(self.apath)
         # path may have newline char which should not be interpretted as new line char
         return_str = "\"%s\";"%(str(self.apath).encode('unicode_escape').decode('utf-8'))
-        return_str += "%s;"%(self.issyml)
+        return_str += "%s;"%(self.fld)
         return_str += "%d;"%(self.size)
-        return_str += "%d;"%(self.dev)
+        return_str += "%d;"%(self.calculated_size)
+        return_str += "%d;"%(self.dev) # device id
         return_str += "%d;"%(self.inode)
         return_str += "%d;"%(self.nlink)
         return_str += "%d;"%(self.atime)
@@ -151,11 +174,27 @@ def __str__(self):
         return_str += "%s;"%(self.xhash_bottom)
         return return_str
 
-    def get_paths_at_all_depths(self): # for files
-        return self.apath.parents[:-1]
+    def get_paths_at_all_depths(self): # for files and folders
+        p = self.apath
+        paths = []
+        if self.fld == "d":
+            paths.append(p)
+        paths.extend(p.parents[:-1])    # remove the last one ... which will be '/'
+        return paths
 
     def get_paths(self,mindepth,maxdepth):
-        parents = list(self.apath.parents[0:-1])
-        parents = list(filter(lambda x:get_folder_depth(x) <= maxdepth,parents))
-        parents = list(filter(lambda x:get_folder_depth(x) >= mindepth,parents))
-        return parents
+        paths = self.get_paths_at_all_depths()
+        paths = list(filter(lambda x:get_folder_depth(x) <= maxdepth,paths))
+        paths = list(filter(lambda x:get_folder_depth(x) >= mindepth,paths))
+        return paths
+
+    def get_depth(self):
+        p = self.apath
+        try:
+            if p.is_dir(): # folder
+                return len(list(p.parents))
+            else: # file
+                return len(list(p.parents)) - 1
+        except:
+            print('get_file_depth error for file:"{}", type:{}'.format(path, type(path)))
+            exit()
\ No newline at end of file
diff --git a/src/Summary.py b/src/Summary.py
index e055f0a..28435fa 100644
--- a/src/Summary.py
+++ b/src/Summary.py
@@ -29,8 +29,11 @@ class Summary:
     
     def __init__(self,path):
         self.path = path
+        self.nnondup_files = 0
+        self.ndup_files = 0
         self.non_dup_Bytes = []
         self.dup_Bytes = []
+        self.folder_Bytes = 0
         self.non_dup_ages = []
         self.dup_ages = []
         self.non_dup_age_scores = []
@@ -69,7 +72,7 @@ def print_header(self):
 
     def __str__(self):
         dup_Bytes = sum(self.dup_Bytes)
-        tot_Bytes = sum(self.non_dup_Bytes) + dup_Bytes
+        tot_Bytes = sum(self.non_dup_Bytes) + dup_Bytes + self.folder_Bytes
         try:
             dup_mean_age = sum(self.dup_ages)/len(self.dup_ages)
         except ZeroDivisionError:
@@ -78,8 +81,10 @@ def __str__(self):
             tot_mean_age = (sum(self.dup_ages) + sum(self.non_dup_ages))/(len(self.dup_ages)+len(self.non_dup_ages))
         except ZeroDivisionError:
             tot_mean_age = 0
-        dup_files = len(self.dup_Bytes)
-        tot_files = dup_files + len(self.non_dup_Bytes)
+        # dup_files = len(self.dup_Bytes)
+        # tot_files = dup_files + len(self.non_dup_Bytes)
+        dup_files = self.ndup_files
+        tot_files = self.nnondup_files + dup_files
         return_str = str(self.path)+"\t"
         return_str += "%d\t"%(tot_Bytes)
         return_str += "%d\t"%(dup_Bytes)
@@ -101,3 +106,12 @@ def __str__(self):
         return_str += "%d"%(self.OverallScore)
         return return_str
 
+class pathlen:
+    def __init__(self,p,dupbytes):
+        self.path=p
+        self.len=len(p.split("/"))
+        self.dupbytes=dupbytes
+
+    def __str__(self):
+        returnstr="%s"%(self.path)
+        return returnstr
diff --git a/src/VERSION b/src/VERSION
index 5eef0f1..d9df1bb 100644
--- a/src/VERSION
+++ b/src/VERSION
@@ -1 +1 @@
-0.10.2
+0.11.0
diff --git a/src/dfUnit.py b/src/dfUnit.py
index 6100e1f..73c9e1b 100644
--- a/src/dfUnit.py
+++ b/src/dfUnit.py
@@ -3,7 +3,7 @@
 
 def get_filename_from_fgzlistitem(string):
     string = string.strip().split(";")[:-1]
-    for i in range(9):
+    for i in range(11):
         dummy = string.pop(-1)
     filename = ";".join(string)
     return filename
@@ -12,23 +12,29 @@ def get_filename_from_fgzlistitem(string):
 class dfUnit:
     def __init__(self,hash):
         self.hash   = hash  # typically hash_top + "#" + hash_bottom
-        self.flist  = []    # list of _ls files with the same hash
-        self.fsize  = -1    # size of each file
+        self.flist  = []    # list of catalog files with the same hash
+        self.fsize  = -1    # calculated size of each file
         self.ndup   = -1    # files in flist with same size, but different inode (they already have the same hash)
+        self.ndup_files = -1 # number of duplicate files ... used for counting duplicate files
+        self.ndup_inode = -1 # number of duplicate inodes ... used for counting duplicate bytes
         self.size_set   = set()    # set of unique sizes ... if len(size_set) then split is required
+        self.calculated_size_list = []
         self.uid_list   = []        # list of uids of files added
         self.inode_list = []        # list of inodes of files added
-        self.oldest_inode   = -1    # oldest_ ... is for the file which is NOT the duplicate
+        self.oldest_inode   = -1    # oldest_ ... is for the file which is NOT the duplicate or is the original
         self.oldest_index   = -1
         self.oldest_age     = -1
         self.oldest_uid     = -1
 
-    
+    def nfiles_with_hash(self): # return number of files in this hash (total ... all users included)
+        return len(self.flist)
+
     def add_fd(self,fd):
         # add the file to flist
         self.flist.append(fd)
         # add size if not already present
         self.size_set.add(fd.size)
+        self.calculated_size_list.append(fd.calculated_size)
         # add uid
         self.uid_list.append(fd.uid)
         # add inode
@@ -44,7 +50,14 @@ def filter_flist_by_uid(self,uid):
         for i,f in enumerate(self.flist):
             if f.uid == uid : self.keep.append(i)
                 
-    def compute(self,hashhashsplits,split_required): # 1. move oldest to the first position 2. find ndup 3. find size 4. filter by uid 5. get depth folder
+    def compute(self,hashhashsplits): 
+        # find if files have the same hashes, but different sizes then...
+        #   1. split them into different hashes by size and 
+        #   2. append them to hashhashsplits
+        # else ... aka .. .no spliting is required
+        #   1. count number of duplicate inodes and
+        #   2. size of each file
+        split_required = False
         # check if spliting is required
         if len(self.size_set) > 1: # more than 1 size in this hash
             split_required = True
@@ -57,13 +70,16 @@ def compute(self,hashhashsplits,split_required): # 1. move oldest to the first p
                     if fd.size == size:
                         hashhashsplits[newhash].add_fd(fd)
         else: # there only 1 size ... no splits required
-            self.ndup   = len(self.inode_list) - 1  #ndup is zero if same size and only 1 inode
-            self.fsize  = self.flist[0].size
+            self.ndup   = len(self.inode_list) - 1  #ndup is zero if same len(size_set)==1 and len(inode_list)==1
+            self.ndup_inode = len(set(self.inode_list)) - 1
+            self.ndup_files = len(self.inode_list) - 1
+            self.fsize  = self.flist[0].calculated_size
+        return split_required
     
     def get_user_file_index(self,uid):
         uid_file_index = []
         if not uid in self.uid_list:
-            if uid == 0: uid_file_index = list(range(0,len(self.flist)))
+            if uid == 0: uid_file_index = list(range(0,len(self.flist))) # uid == 0 is all users
             return uid_file_index
         else:
             for i,j in enumerate(self.flist):
@@ -73,19 +89,20 @@ def get_user_file_index(self,uid):
 
     
     def __str__(self):
-        return "{0} : {1} {2} {3}".format(self.hash, self.ndup, self.fsize,"##".join(map(lambda x:str(x),self.flist)))
+        return "{0} : {1} {2} {3}".format(self.hash, self.ndup_inode, self.fsize,"##".join(map(lambda x:str(x),self.flist)))
         
     def str_with_name(self,uid2uname, gid2gname,findex):
-        return "{0} : {1} {2} {3}".format(self.hash, self.ndup, self.fsize,"##".join(map(lambda x:x.str_with_name(uid2uname,gid2gname),[self.flist[i] for i in findex])))
+        return "{0} : {1} {2} {3}".format(self.hash, self.ndup_inode, self.fsize,"##".join(map(lambda x:x.str_with_name(uid2uname,gid2gname),[self.flist[i] for i in findex])))
 
 
 class fgz: # used by grubber
     def __init__(self):
         self.hash = ""
-        self.ndup = -1
+        self.ndup = -1 # number of duplicate files and not duplicate inodes
         self.filesize = -1
         self.totalsize = -1
-        self.fds = []
+        self.fds = [] # list of duplicate files
+        self.of = "" # original file 
     
     def __lt__(self,other):
         return self.totalsize > other.totalsize
@@ -94,9 +111,10 @@ def __str__(self):
         outstring=[]
         outstring.append(str(self.hash))
         outstring.append(str(self.ndup))
-        outstring.append(get_human_readable_size(self.totalsize))
-        outstring.append(get_human_readable_size(self.filesize))
-        outstring.append(";".join(map(lambda x:get_filename_from_fgzlistitem(x),self.fds)))
+        outstring.append(str(self.totalsize))
+        outstring.append(str(self.filesize))
+        outstring.append(get_filename_from_fgzlistitem(self.of))
+        outstring.append("##".join(map(lambda x:get_filename_from_fgzlistitem(x),self.fds)))
         return "\t".join(outstring)
         # return "{0} {1} {2} {3} {4}".format(self.hash,self.ndup,get_human_readable_size(self.totalsize), get_human_readable_size(self.filesize), ";".join(map(lambda x:get_filename_from_fgzlistitem(x),self.fds)))
         # return "{0} {1} {2} {3} {4}".format(self.hash,self.ndup,self.totalsize, self.filesize, ";".join(map(lambda x:get_filename_from_fgzlistitem(x),self.fds)))
@@ -107,100 +125,19 @@ def set(self,inputline):
         try:
             inputline = inputline.strip().split(" ")
             if len(inputline) < 5:
-                raise Exception("Less than 5 items in the line.")
+                raise Exception("Less than 5 items in mimeo.files.gz line.")
             self.hash = inputline.pop(0)
-            dummy = inputline.pop(0)
+            dummy = inputline.pop(0) # the colon
             total_ndup = int(inputline.pop(0))
-            if total_ndup == 0: # may be finddup was run to output all files .. not just dups
+            if total_ndup == 0: # may be mimeo was run to output all files .. not just dups .. aka without the -z option
                 return False
             self.filesize = int(inputline.pop(0))
-            full_fds = " ".join(inputline)
-            fds = full_fds.split("##")
-            self.ndup = len(fds) # these are user number of duplicates/files
-            if self.ndup == (total_ndup + 1): # one file is the original ... other are all duplicates
-                dummy = fds.pop(0)
-                self.ndup -= 1
-            self.fds = fds
-            self.totalsize = self.ndup * self.filesize
-            return True
-        except:
-            sys.stderr.write("spacesavers2:{0}:files.gz Do not understand line:{1} with {2} elements.\n".format(self.__class__.__name__,original_line,len(inputline)))
-            # exit()            
-            return False
-
-
-class FileDetails2:
-    def __init__(self):
-        self.apath = ""
-        self.size = -1
-        self.dev = -1
-        self.inode = -1
-        self.nlink = -1
-        self.mtime = -1
-        self.uid = -1
-        self.gid = -1
-        self.uname = ""
-        self.gname = ""
-    
-    def set(self,fgzline):
-        original_fgzline=fgzline
-        # print(ls_line)
-        try:
-            fgzline         = fgzline.strip().replace("\"","").split(";")[:-1]
-            if len(fgzline) < 10:
-                raise Exception("Less than 10 items in the line.")
-            self.gname      = fgzline.pop(-1)
-            self.uname      = fgzline.pop(-1)
-            self.gid        = int(fgzline.pop(-1))
-            self.uid        = int(fgzline.pop(-1))
-            self.mtime      = int(fgzline.pop(-1)) 
-            self.nlink      = int(fgzline.pop(-1))
-            self.inode      = int(fgzline.pop(-1))
-            self.dev        = int(fgzline.pop(-1))
-            self.size       = int(fgzline.pop(-1))
-            apath           = ";".join(fgzline)
-            apath           = apath.strip("\"")
-            self.apath      = Path(apath)         # sometimes filename have ";" in them ... hence this!
-            return True
-        except:
-            sys.stderr.write("spacesavers2:{0}:catalog Do not understand line:\"{1}\" with {2} elements.\n".format(self.__class__.__name__,original_fgzline,len(fgzline)))
-            # exit()            
-            return False        
-
-class fgzblamer: # used by blamematrix
-    def __init__(self):
-        self.hash = ""
-        self.ndup = -1
-        self.users = set()
-        self.folders = set()
-        self.bm = dict()
-        self.fds = []
-    
-    def set(self,inputline,depth):
-        original_line = inputline
-        try:
-            inputline = inputline.strip().split(" ")
-            if len(inputline) < 5:
-                raise Exception("Less than 5 items in the line.")
-            self.hash = inputline.pop(0)
-            dummy = inputline.pop(0)
-            self.ndup = int(inputline.pop(0))
-            if self.ndup == 0 or self.ndup == 1: return False                       
-            self.filesize = int(inputline.pop(0))
-            full_fds = " ".join(inputline)
+            full_fds = " ".join(inputline) # bcos file names can contain spaces
             fds = full_fds.split("##")
-            for f in fds:
-                fd = FileDetails2()
-                fd.set(f)
-                self.users.add(fd.uname)
-                fad=get_folder_at_depth(fd.apath,depth)
-                self.folders.add(fad)
-                if not fd.uname in self.bm:
-                    self.bm[fd.uname] = dict()
-                if not fad in self.bm[fd.uname]:
-                    self.bm[fd.uname][fad] = 0
-                self.bm[fd.uname][fad] += self.filesize
-            self.fds = []
+            self.ndup = total_ndup  # these are user number of duplicates/files
+            self.of = fds.pop(0)    # one file is the original
+            self.fds = fds          # others are dupicates
+            self.totalsize = total_ndup * self.filesize
             return True
         except:
             sys.stderr.write("spacesavers2:{0}:files.gz Do not understand line:{1} with {2} elements.\n".format(self.__class__.__name__,original_line,len(inputline)))
diff --git a/src/utils.py b/src/utils.py
index 0c67e74..68c4b51 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -72,14 +72,6 @@ def get_folder_depth(path):
     return len(list(path.parents))
 
 
-def get_file_depth(path):
-    try:
-        return len(list(path.parents)) - 1
-    except:
-        print('get_file_depth error for file:"{}", type:{}'.format(path, type(path)))
-        exit()
-
-
 def get_timestamp(start):
     e = time.time()
     return "%08.2fs" % (e - start)