forked from filebot/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
duplicates.groovy
123 lines (97 loc) · 2.93 KB
/
duplicates.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env filebot -script
delete = 'DELETE'.equalsIgnoreCase(_args.action)
binary = 'BINARY'.equalsIgnoreCase(_args.mode)
// Binary Duplicates: Keep Input Argument Order
// Logical Duplicates: Order by Video Quality
order = 'INPUT' .equalsIgnoreCase(_args.order) ? 'INPUT'
: 'QUALITY'.equalsIgnoreCase(_args.order) ? 'QUALITY'
: 'SIZE' .equalsIgnoreCase(_args.order) ? 'SIZE'
: 'DATE' .equalsIgnoreCase(_args.order) ? 'DATE'
: 'TIME' .equalsIgnoreCase(_args.order) ? 'TIME'
: binary ? 'INPUT' : 'QUALITY'
// sanity checks
if (args.size() == 0) {
die "Invalid usage: no input"
}
def group(files) {
// Binary Duplicates: Group by File Size, then Fast MovieHash, then CRC32 via Xattr
if (binary) {
def groups = [:]
// 0. Group by File Key (i.e. physical link duplicates are always binary duplicates)
def links = files.groupBy{ f -> any{ f.key }{ f.canonicalFile }{ f } }.entrySet()
// 1. Group by File Size
links.groupBy{ it.value[0].length() }.each{ size, size_fs ->
if (size_fs.size() == 1) {
groups += [ (size_fs[0].key) : size_fs[0].value ]
return
}
// 2. Group by MovieHash
size_fs.groupBy{ it.value[0].hash('moviehash') }.each{ hash, hash_fs ->
if (hash_fs.size() == 1) {
groups += [ (hash_fs[0].key) : hash_fs[0].value ]
return
}
// 3. Group by CRC32 via Xattr
hash_fs.groupBy{ it.value[0].CRC32 }.each{ crc, crc_fs ->
groups += [ ([size, hash, crc]) : crc_fs.collectMany{ it.value } ]
}
}
}
return groups
}
// Logical Duplicates: Group by Xattr Metadata Object
return files.groupBy{ f ->
return allOf{ f.metadata }{ f.metadata && _args.format ? getMediaInfo(f, _args.format) : null }
}
}
def order(files) {
switch(order) {
case 'INPUT':
return files
case 'QUALITY':
return files.toSorted(VideoQuality.DESCENDING_ORDER)
case 'SIZE':
return files.toSorted{ -(it.length()) }
case 'DATE':
return files.toSorted{ -(it.mediaCharacteristics?.creationTime?.toEpochMilli() ?: it.creationDate) }
case 'TIME':
return files.toSorted{ -(it.lastModified()) }
}
}
// select video files (and preserve input argument order)
def files = args.collectMany{ it.getFiles{ it.isVideo() } }
def duplicates = []
group(files).each{ g, fs ->
if (g && fs.size() > 1) {
log.info "[*] ${g.join(' / ')}"
order(fs).eachWithIndex{ f, i ->
if (i == 0) {
log.finest "[+] 1. $f"
} else {
log.warning "[-] ${i+1}. $f"
duplicates += f
}
}
}
}
// no duplicates; return with NOOP
if (duplicates.size() == 0) {
die "0 duplicates", ExitCode.NOOP
}
// continue with post-processing
log.fine "${duplicates.size()} duplicates"
// -mediainfo post-processing
if (_args.mediaInfo) {
getMediaInfo(file: duplicates)
}
// -rename post-processing
if (_args.rename) {
rename(file: duplicates, db: binary ? 'file' : 'xattr')
}
// delete duplicate files
if (delete) {
duplicates.each{ f ->
log.info "[DELETE] $f"
f.trash()
}
}