-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdf_validator.sh
78 lines (66 loc) · 2.55 KB
/
pdf_validator.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
input_directory="/mnt/your-drive-letterk/a-directory-or-top-level" # Replace with the path to your input directory
interval=1000 # Output progress every 1000 files
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
output_dir="/mnt/k/PDF_Validation_Output_${timestamp}"
log_file="Validation_Log_${timestamp}.txt"
mkdir -p "$output_dir" # Create the output directory
echo "Scanning directory: $input_directory"
echo "Logging details to: $output_dir/$log_file"
echo
count=0
total_invalid=0
deleted_count=0
start_time=$(date +%s)
if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win"* ]]; then
# Get a list of attached disks on Windows
disks=($(wmic logicaldisk get caption | grep -E '^[A-Z]:' | awk '{print $1}'))
echo "Attached disks on Windows:"
for disk in "${disks[@]}"; do
echo "$disk"
done
echo
fi
find "$input_directory" -type f -name "*.pdf" | while read -r pdf_file; do
if [ $((count % interval)) -eq 0 ]; then
if [ $count -gt 0 ]; then
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
estimated_completion=$((elapsed_time * (total_files - count) / count))
fi
echo "Processed: $count files"
echo "Invalid PDFs found: $total_invalid"
echo "Files deleted: $deleted_count"
echo "Last processed file: $pdf_file"
if [ $count -gt 0 ]; then
echo "Estimated time to completion: $estimated_completion seconds"
fi
echo
echo "Processed: $count files" >> "$output_dir/$log_file"
echo "Invalid PDFs found: $total_invalid" >> "$output_dir/$log_file"
echo "Files deleted: $deleted_count" >> "$output_dir/$log_file"
echo "Last processed file: $pdf_file" >> "$output_dir/$log_file"
if [ $count -gt 0 ]; then
echo "Estimated time to completion: $estimated_completion seconds" >> "$output_dir/$log_file"
fi
fi
echo "Examining file: $pdf_file"
pdfinfo "$pdf_file" &> /dev/null
if [ $? -ne 0 ]; then
rm "$pdf_file"
echo "Deleted corrupted or invalid PDF: $pdf_file"
echo "Deleted corrupted or invalid PDF: $pdf_file" >> "$output_dir/$log_file"
total_invalid=$((total_invalid + 1))
deleted_count=$((deleted_count + 1))
fi
count=$((count + 1))
done
end_time=$(date +%s)
total_files=$((count))
echo
echo "Scan complete."
echo "Processed: $count files"
echo "Invalid PDFs found: $total_invalid"
echo "Files deleted: $deleted_count"
echo "Time taken: $((end_time - start_time)) seconds"
echo "Output saved to: $output_dir"