From 58fdf250e831ced8479b60505a8525c4cdbdb4db Mon Sep 17 00:00:00 2001
From: Pollus Brodeur <pollusb@gmail.com>
Date: Sun, 14 Apr 2024 10:47:21 -0400
Subject: [PATCH] Fix issue #11

---
 PSOneTools/2.4/Find-PSOneDuplicateFile.ps1    | 72 +++++++--------
 .../2.4/Find-PSOneDuplicateFileFast.ps1       | 92 +++++++++----------
 2 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/PSOneTools/2.4/Find-PSOneDuplicateFile.ps1 b/PSOneTools/2.4/Find-PSOneDuplicateFile.ps1
index 6eaa3ff..1104cc6 100644
--- a/PSOneTools/2.4/Find-PSOneDuplicateFile.ps1
+++ b/PSOneTools/2.4/Find-PSOneDuplicateFile.ps1
@@ -9,11 +9,11 @@
 
       .EXAMPLE
       $Path = [Environment]::GetFolderPath('MyDocuments')
-	  Find-PSOneDuplicateFile -Path $Path 
+	  Find-PSOneDuplicateFile -Path $Path
       Find duplicate files in the user documents folder
 
       .EXAMPLE
-      Find-PSOneDuplicateFile -Path c:\windows -Filter *.log 
+      Find-PSOneDuplicateFile -Path c:\windows -Filter *.log
       find log files in the Windows folder with duplicate content
 
       .LINK
@@ -27,27 +27,27 @@
     [String]
     [Parameter(Mandatory)]
     $Path,
-  
-    # Filter to apply. Default is '*' (all Files) 
+
+    # Filter to apply. Default is '*' (all Files)
     [String]
     $Filter = '*'
   )
 
   # get a hashtable of all files of size greater 0
   # grouped by their length
-  
-  
+
+
   # ENUMERATE ALL FILES RECURSIVELY
   # call scriptblocks directly and pipe them together
   # this is by far the fastest way and much faster than
   # using Foreach-Object:
-  & { 
+  & {
     try
     {
       # try and use the fast API way of enumerating files recursively
       # this FAILS whenever there is any "Access Denied" errors
       Write-Progress -Activity 'Acquiring Files' -Status 'Fast Method'
-      [IO.DirectoryInfo]::new($Path).GetFiles('*', 'AllDirectories')
+      [IO.DirectoryInfo]::new($Path).GetFiles($Filter, 'AllDirectories')
     }
     catch
     {
@@ -55,7 +55,7 @@
       Write-Progress -Activity 'Acquiring Files' -Status 'Falling Back to Slow Method'
       Get-ChildItem -Path $Path -File -Recurse -ErrorAction Ignore
     }
-  } | 
+  } |
   # EXCLUDE EMPTY FILES:
   # use direct process blocks with IF (which is much faster than Where-Object):
   & {
@@ -68,37 +68,37 @@
         $_
       }
     }
-  } | 
+  } |
   # GROUP FILES BY LENGTH, AND RETURN ONLY FILES WHERE THERE IS AT LEAST ONE
   # OTHER FILE WITH SAME SIZE
   # use direct scriptblocks with own hashtable (which is much faster than Group-Object)
-  & { 
-    begin 
+  & {
+    begin
     # start with an empty hashtable
-    { $hash = @{} } 
+    { $hash = @{} }
 
-    process 
-    { 
+    process
+    {
       # group files by their length
       # (use "length" as hashtable key)
       $file = $_
       $key = $file.Length.toString()
-      
+
       # if we see this key for the first time, create a generic
       # list to hold group items, and store FileInfo objects in this list
       # (specialized generic lists are faster than ArrayList):
-      if ($hash.ContainsKey($key) -eq $false) 
+      if ($hash.ContainsKey($key) -eq $false)
       {
         $hash[$key] = [Collections.Generic.List[System.IO.FileInfo]]::new()
       }
       # add file to appropriate hashtable key:
       $hash[$key].Add($file)
-    } 
-  
-    end 
-    { 
+    }
+
+    end
+    {
       # return only the files from groups with at least two files
-      # (if there is only one file with a given length, then it 
+      # (if there is only one file with a given length, then it
       # cannot have any duplicates for sure):
       foreach($pile in $hash.Values)
       {
@@ -109,8 +109,8 @@
           $pile
         }
       }
-    } 
-  } | 
+    }
+  } |
   # CALCULATE THE NUMBER OF FILES TO HASH
   # collect all files and hand over en-bloc
   & {
@@ -119,58 +119,58 @@
   # GROUP FILES BY HASH, AND RETURN ONLY HASHES THAT HAVE AT LEAST TWO FILES:
   # use a direct scriptblock call with a hashtable (much faster than Group-Object):
   & {
-    begin 
+    begin
     {
       # start with an empty hashtable
       $hash = @{}
-      
+
       # since this is a length procedure, a progress bar is in order
       # keep a counter of processed files:
       $c = 0
     }
-      
+
     process
     {
       $totalNumber = $_.Count
       foreach($file in $_)
       {
-      
+
         # update progress bar
         $c++
-      
+
         # update progress bar every 20 files:
         if ($c % 20 -eq 0)
         {
           $percentComplete = $c * 100 / $totalNumber
           Write-Progress -Activity 'Hashing File Content' -Status $file.Name -PercentComplete $percentComplete
         }
-      
+
         # use the file hash of this file PLUS file length as a key to the hashtable
         # use the fastest algorithm SHA1
         $result = Get-FileHash -Path $file.FullName -Algorithm SHA1
         $key = '{0}:{1}' -f $result.Hash, $file.Length
-      
+
         # if we see this key the first time, add a generic list to this key:
         if ($hash.ContainsKey($key) -eq $false)
         {
           $hash.Add($key, [Collections.Generic.List[System.IO.FileInfo]]::new())
         }
-      
+
         # add the file to the approriate group:
         $hash[$key].Add($file)
       }
     }
-      
+
     end
     {
       # remove all hashtable keys with only one file in them
-      
+
       # first, CLONE the list of hashtable keys
       # (we cannot remove hashtable keys while enumerating the live
       # keys list):
       # remove keys
       $keys = @($hash.Keys).Clone()
-      
+
       # enumerate all keys...
       foreach($key in $keys)
       {
@@ -180,7 +180,7 @@
           $hash.Remove($key)
         }
       }
-       
+
       # return the hashtable with only duplicate files left:
       $hash
     }
diff --git a/PSOneTools/2.4/Find-PSOneDuplicateFileFast.ps1 b/PSOneTools/2.4/Find-PSOneDuplicateFileFast.ps1
index 18cd0cb..86a9d49 100644
--- a/PSOneTools/2.4/Find-PSOneDuplicateFileFast.ps1
+++ b/PSOneTools/2.4/Find-PSOneDuplicateFileFast.ps1
@@ -13,11 +13,11 @@
 
       .EXAMPLE
       $Path = [Environment]::GetFolderPath('MyDocuments')
-      Find-PSOneDuplicateFileFast -Path $Path 
+      Find-PSOneDuplicateFileFast -Path $Path
       Find duplicate files in the user documents folder
 
       .EXAMPLE
-      Find-PSOneDuplicateFileFast -Path c:\windows -Filter *.log 
+      Find-PSOneDuplicateFileFast -Path c:\windows -Filter *.log
       find log files in the Windows folder with duplicate content
 
       .LINK
@@ -31,18 +31,18 @@
     [String]
     [Parameter(Mandatory)]
     $Path,
-  
-    # Filter to apply. Default is '*' (all Files) 
+
+    # Filter to apply. Default is '*' (all Files)
     [String]
     $Filter = '*',
-    
+
     # when there are multiple files with same partial hash
     # they may still be different. When setting this switch,
     # full hashes are calculated which may take a very long time
     # for large files and/or slow networks
     [switch]
     $TestPartialHash,
-    
+
     # use partial hashes for files larger than this:
     [int64]
     $MaxFileSize = 100KB
@@ -50,19 +50,19 @@
 
   # get a hashtable of all files of size greater 0
   # grouped by their length
-  
-  
+
+
   # ENUMERATE ALL FILES RECURSIVELY
   # call scriptblocks directly and pipe them together
   # this is by far the fastest way and much faster than
   # using Foreach-Object:
-  & { 
+  & {
     try
     {
       # try and use the fast API way of enumerating files recursively
       # this FAILS whenever there is any "Access Denied" errors
       Write-Progress -Activity 'Acquiring Files' -Status 'Fast Method'
-      [IO.DirectoryInfo]::new($Path).GetFiles('*', 'AllDirectories')
+      [IO.DirectoryInfo]::new($Path).GetFiles($Filter, 'AllDirectories')
     }
     catch
     {
@@ -70,7 +70,7 @@
       Write-Progress -Activity 'Acquiring Files' -Status 'Falling Back to Slow Method'
       Get-ChildItem -Path $Path -File -Recurse -ErrorAction Ignore
     }
-  } | 
+  } |
   # EXCLUDE EMPTY FILES:
   # use direct process blocks with IF (which is much faster than Where-Object):
   & {
@@ -83,37 +83,37 @@
         $_
       }
     }
-  } | 
+  } |
   # GROUP FILES BY LENGTH, AND RETURN ONLY FILES WHERE THERE IS AT LEAST ONE
   # OTHER FILE WITH SAME SIZE
   # use direct scriptblocks with own hashtable (which is much faster than Group-Object)
-  & { 
-    begin 
+  & {
+    begin
     # start with an empty hashtable
-    { $hash = @{} } 
+    { $hash = @{} }
 
-    process 
-    { 
+    process
+    {
       # group files by their length
       # (use "length" as hashtable key)
       $file = $_
       $key = $file.Length.toString()
-      
+
       # if we see this key for the first time, create a generic
       # list to hold group items, and store FileInfo objects in this list
       # (specialized generic lists are faster than ArrayList):
-      if ($hash.ContainsKey($key) -eq $false) 
+      if ($hash.ContainsKey($key) -eq $false)
       {
         $hash[$key] = [Collections.Generic.List[System.IO.FileInfo]]::new()
       }
       # add file to appropriate hashtable key:
       $hash[$key].Add($file)
-    } 
-  
-    end 
-    { 
+    }
+
+    end
+    {
       # return only the files from groups with at least two files
-      # (if there is only one file with a given length, then it 
+      # (if there is only one file with a given length, then it
       # cannot have any duplicates for sure):
       foreach($pile in $hash.Values)
       {
@@ -124,8 +124,8 @@
           $pile
         }
       }
-    } 
-  } | 
+    }
+  } |
   # CALCULATE THE NUMBER OF FILES TO HASH
   # collect all files and hand over en-bloc
   & {
@@ -134,37 +134,37 @@
   # GROUP FILES BY HASH, AND RETURN ONLY HASHES THAT HAVE AT LEAST TWO FILES:
   # use a direct scriptblock call with a hashtable (much faster than Group-Object):
   & {
-    begin 
+    begin
     {
       # start with an empty hashtable
       $hash = @{}
-      
+
       # since this is a length procedure, a progress bar is in order
       # keep a counter of processed files:
       $c = 0
     }
-      
+
     process
     {
       $totalNumber = $_.Count
       foreach($file in $_)
       {
-      
+
         # update progress bar
         $c++
-      
+
         # update progress bar every 20 files:
         if ($c % 20 -eq 0 -or $file.Length -gt 100MB)
         {
           $percentComplete = $c * 100 / $totalNumber
           Write-Progress -Activity 'Hashing File Content' -Status $file.Name -PercentComplete $percentComplete
         }
-      
+
         # use the file hash of this file PLUS file length as a key to the hashtable
         # use the fastest algorithm SHA1, and use partial hashes for files larger than 100KB:
         $bufferSize = [Math]::Min(100KB, $MaxFileSize)
         $result = Get-PsOneFileHash -StartPosition 1KB -Length $MaxFileSize -BufferSize $bufferSize -AlgorithmName SHA1 -Path $file.FullName
-        
+
         # add a "P" to partial hashes:
         if ($result.IsPartialHash) {
           $partialHash = 'P'
@@ -173,27 +173,27 @@
         {
           $partialHash = ''
         }
-        
-        
+
+
         $key = '{0}:{1}{2}' -f $result.Hash, $file.Length, $partialHash
-      
+
         # if we see this key the first time, add a generic list to this key:
         if ($hash.ContainsKey($key) -eq $false)
         {
           $hash.Add($key, [Collections.Generic.List[System.IO.FileInfo]]::new())
         }
-      
+
         # add the file to the approriate group:
         $hash[$key].Add($file)
       }
     }
-      
+
     end
     {
       # remove all hashtable keys with only one file in them
-      
-      
-      
+
+
+
       # do a detail check on partial hashes
       if ($TestPartialHash)
       {
@@ -223,10 +223,10 @@
           }
         }
       }
-      
+
       # enumerate all keys...
       $keys = @($hash.Keys).Clone()
-      
+
       foreach($key in $keys)
       {
         # ...if key has only one file, remove it:
@@ -235,9 +235,9 @@
           $hash.Remove($key)
         }
       }
-       
-      
-       
+
+
+
       # return the hashtable with only duplicate files left:
       $hash
     }