Skip to content

Commit

Permalink
check for missing files option
Browse files Browse the repository at this point in the history
  • Loading branch information
jooleer committed May 9, 2023
1 parent 4af4333 commit 3c8bbb4
Showing 1 changed file with 34 additions and 25 deletions.
59 changes: 34 additions & 25 deletions folder_hash_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,25 @@
parser.add_argument('-s', '--secondary', help='Secondary directory, f.e. -s \"D:\\folder2\\\" or -s \"/home/user/dir2\"')
parser.add_argument('-d', '--disable', action='store_true', help='Disable multithreading (recommended when both directories are on the same drive)')
parser.add_argument('-m', '--missing', action='store_true', help='Search for missing files in secondary directory')
parser.add_argument('-n', '--nmissing', action='store_true', help='Search for missing files in primary directory')
parser.add_argument('-v', '--verbose', action='store_true', help='Enables verbose logging')
parser.add_argument('-c', '--custom', action='store_true', help='Use custom/hardcoded variables in stead of -p -s command-line arguments')

args = parser.parse_args()

# define the paths of the two directories to compare
if(args.custom):
folder1_path = r""
folder2_path = r""
primary_directory = r""
secondary_directory = r""
if(args.verbose):
print(f"Comparing:\n{folder1_path}\nagainst:\n{folder2_path}\n")
print(f"Comparing:\n{primary_directory}\nagainst:\n{secondary_directory}\n")
else:
if(not args.primary) or (not args.secondary):
sys.exit("No primary or secondary folder given, use -h for help")
folder1_path = args.primary
folder2_path = args.secondary
primary_directory = args.primary
secondary_directory = args.secondary
if(args.verbose):
print(f"Comparing:\n{folder1_path}\nagainst:\n{folder2_path}\n")
print(f"Comparing:\n{primary_directory}\nagainst:\n{secondary_directory}\n")

# hash algorythm (CRC32, MD5, SHA256)
hash_algorithm = "CRC32"
Expand Down Expand Up @@ -118,25 +119,25 @@ def main():
# start time
start = time.time()

f1_amount = get_files_amount(folder1_path)
f2_amount = get_files_amount(folder2_path)
f1_amount = get_files_amount(primary_directory)
f2_amount = get_files_amount(secondary_directory)

# multithreading
if(args.disable):
# run without multithreading
if(args.verbose):
print(bcolors.UNDERLINE + "Running jobs without multithreading" + bcolors.ENDC)
folder1_hashes = folder_generate_hashes(folder1_path)
folder2_hashes = folder_generate_hashes(folder2_path)
folder1_hashes = folder_generate_hashes(primary_directory)
folder2_hashes = folder_generate_hashes(secondary_directory)

else:
# use multithreading
if(args.verbose):
print(bcolors.UNDERLINE + "Running jobs with multithreading" + bcolors.ENDC)

pool = ThreadPool(processes=2)
async_result1 = pool.apply_async(folder_generate_hashes, args = (folder1_path, ))
async_result2 = pool.apply_async(folder_generate_hashes, args = (folder2_path, ))
async_result1 = pool.apply_async(folder_generate_hashes, args = (primary_directory, ))
async_result2 = pool.apply_async(folder_generate_hashes, args = (secondary_directory, ))

# close and join pools
pool.close()
Expand All @@ -146,25 +147,33 @@ def main():
folder1_hashes = async_result1.get()
folder2_hashes = async_result2.get()

# check for missing files in primary directory
if(args.nmissing):
for file_path in get_all_files(secondary_directory):
relative_path = os.path.relpath(file_path, secondary_directory)
if relative_path not in folder1_hashes:
if(args.verbose):
print(bcolors.WARNING + f"{relative_path} is missing from {primary_directory}." + bcolors.ENDC)
logging.info(f"[WARNING - MISSING FILE]: {relative_path}")
files_missing += 1
if files_missing > 0:
print(bcolors.FAIL + f"{files_missing} files missing from primary directory: {primary_directory}" + bcolors.ENDC)
else:
print(bcolors.OKGREEN + f"No files missing from primary directory: {primary_directory}" + bcolors.ENDC)

# check for missing files in secondary directory
if(args.missing):
# check for missing files in folder 1
# for file_path in get_all_files(folder2_path):
# relative_path = os.path.relpath(file_path, folder2_path)
# if relative_path not in folder1_hashes:
# if(args.verbose):
# print(bcolors.WARNING + f"{relative_path} is missing from {folder1_path}." + bcolors.ENDC)
# logging.info(f"[WARNING - MISSING FILE]: {relative_path}")
# files_missing += 1

# check for missing files in folder 2
for file_path in get_all_files(folder1_path):
relative_path = os.path.relpath(file_path, folder1_path)
for file_path in get_all_files(primary_directory):
relative_path = os.path.relpath(file_path, primary_directory)
if relative_path not in folder2_hashes:
if(args.verbose):
print(bcolors.WARNING + f"{relative_path} is missing from {folder2_path}." + bcolors.ENDC)
print(bcolors.WARNING + f"{relative_path} is missing from {secondary_directory}." + bcolors.ENDC)
logging.info(f"[WARNING - MISSING FILE]: {relative_path}")
files_missing += 1
if files_missing > 0:
print(bcolors.FAIL + f"{files_missing} files missing from secondary directory: {secondary_directory}" + bcolors.ENDC)
else:
print(bcolors.OKGREEN + f"No files missing from secondary directory: {secondary_directory}" + bcolors.ENDC)


# compare the hash values for each file in both folders
Expand Down

0 comments on commit 3c8bbb4

Please sign in to comment.