-
Notifications
You must be signed in to change notification settings - Fork 17
/
update.py
executable file
·328 lines (270 loc) · 9.99 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#!/usr/bin/env python3
import pathlib
import readline
import hashlib
import signal
import shutil
import json
import glob
import sys
import os
assert sys.version_info >= (3, 6), "Python version too old. Python >=3.6.0 needed."
# subset of https://spdx.org/licenses/
valid_licenses = ["", "Public Domain", "CC0-1.0", "Unlicense", "CC-BY-3.0", "CC-BY-NC-SA-3.0", "CC-BY-NC-SA-4.0", "CC-BY-SA-4.0", "CC-BY-NC-4.0", "CC-BY-SA-3.0", "CC-BY-SA-3.0", "GFDL-1.3-or-later", "LAL-1.3"]
valid_languages = ["", "dutch", "english", "french", "german"]
# Only works on *nix systems
def rlinput(prompt, prefill=""):
readline.set_startup_hook(lambda: readline.insert_text(prefill))
try:
return input(prompt)
finally:
readline.set_startup_hook()
def check_duplicate_images():
def hash_file(path):
with open(path, "rb") as file:
hasher = hashlib.sha1()
hasher.update(file.read())
return hasher.hexdigest()
dups_found = False
hashes = {}
for entry in glob.glob("images/", recursive=True):
if os.path.isfile(entry):
hash = hash_file(entry)
if hash in hashes:
print("Warning: Files identical: '{}' and '{}'".format(entry, hashes[hash]))
dups_found = True
else:
hashes[hash] = entry
return dups_found
def get_defaults_entry(db, prev, image):
name, ext = os.path.splitext(image)
common_prefix = ''
common_key = ''
for key in db:
p = os.path.commonprefix([key, name])
if len(p) > len(common_prefix):
common_prefix = p
common_key = key
if len(common_prefix) > 4 and ((100 * len(common_prefix)) / len(name)) > 60:
# common prefix is >60% of the image name length
return db[common_key]
else:
# use previous image meta data as default
return prev
def is_valid_author(author):
return True
def is_valid_title(author):
return True
def is_valid_notes(notes):
return True
def is_valid_year(year):
y = int(year)
if y > 1970 and y < 2077:
return True
return False
def is_valid_tags(tags):
if tags.lower() != tags:
print("Only lower case letters please.")
return False
return True
def is_valid_license(licenses):
for i, license in enumerate(licenses.split("/")):
if i >= 2:
print("Only two licenses allowed")
return False
if license not in valid_licenses:
print("'{}'' not in {}".format(license, valid_licenses))
return False
return True
def is_valid_language(language):
if language not in valid_languages:
print("Valid languages: {}".format(valid_languages))
return False
return True
def is_valid_link(link):
if len(link) > 0:
if not link.startswith("https://") and not link.startswith("http://"):
print("Link must start with https://")
return False
return True
def ask_value(prompt, is_valid, prefill=""):
value = rlinput(prompt, prefill)
while not is_valid(value):
value = rlinput(prompt, value)
return value.strip()
# add or update image
def handle_image(i, n, prev, db, image):
print('#######################################')
print('[{}/{}] "images/{}"'.format(i, n, image))
print('#######################################')
# get default values
default = get_defaults_entry(db, prev[0], image)
tags = default.get("tags", "")
title = default.get("title", "")
author = default.get("author", "")
notes = default.get("notes", "")
license = default.get("license", "")
language = default.get("language", "")
link = default.get("link", "")
year = default.get("year","")
while True:
tags = ask_value("Tags: ", is_valid_tags, tags)
title = ask_value("Title: ", is_valid_title, title)
author = ask_value("Author: ", is_valid_author, author)
notes = ask_value("Notes: ", is_valid_notes, notes)
license = ask_value("License: ", is_valid_license, license)
language = ask_value("Language: ", is_valid_language, language)
link = ask_value("Link: ", is_valid_link, link)
year = ask_value("Release Year: ", is_valid_year, year)
answer = ask_value("next (1), again (2), skip (3), exit (4): ",
lambda v: v in ["1", "2", "3", "4"], "1")
if answer == "1":
break
if answer == "2":
pass
if answer == "3":
return 0
if answer == "4":
return -1
obj = {}
if len(tags) > 0:
obj["tags"] = tags
if len(title) > 0:
obj["title"] = title
if len(language) > 0:
obj["language"] = language
if len(author) > 0:
obj["author"] = author
if len(notes) > 0:
obj["notes"] = notes
if len(license) > 0:
obj["license"] = license
if len(link) > 0:
obj["link"] = link
if len(year) > 0:
obj["year"] = year
db[image] = obj
prev[0] = obj
print("done")
return 1
def add_previews(db):
def find_images_paths(name):
image_exts = (".png", ".svg", ".pdf")
images = []
for entry in glob.glob("images/{}/*".format(name), recursive=True):
if not os.path.isfile(entry):
continue
if entry.endswith(image_exts):
images.append(entry)
# sort by extensions
def indexOf(ext):
for i, e in enumerate(image_exts):
if e == ext:
return i
return len(image_exts)
return sorted(images, key=lambda image: indexOf(pathlib.Path(image).suffix))
for name in db:
if not os.path.isfile("images/{}/preview.webp".format(name)):
image_paths = find_images_paths(name)
print("Create preview image: 'images/{}/preview.webp'".format(name))
done = False
for path in image_paths:
print("Try with '{}'".format(path))
rc = os.system("magick '{}' -resize 300 'images/{}/preview.webp'".format(path, name))
if rc == 0:
done = True
break
if not done:
if len(image_paths) == 0:
print("No image found for images/{}/preview.webp".format(name))
else:
print("Failed to create preview for images/{}/preview.webp".format(name))
def update_file_listings(path, create_index=False):
entries = []
for entry in glob.glob("{}/*".format(path)):
if not entry.endswith("/index.html"):
entries.append(entry)
if create_index:
with open("{}/index.html".format(path), "w") as file:
name = os.path.basename(path)
file.write("<!DOCTYPE html>\n")
file.write("<html>\n <head>\n")
file.write(" <title>Files for {}</title>\n".format(name))
file.write(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n")
file.write(" <link rel=\"stylesheet\" href=\"../../listing.css\">\n")
file.write(" </head>\n <body>\n")
file.write(" <h1>Files for {}</h1>\n".format(name))
file.write(" <hr>\n <ul>\n")
for entry in entries:
name = os.path.basename(entry)
if name != "preview.webp":
file.write(" <li><a href=\"{}\">{}</a></li>\n".format(name, name))
file.write(" </ul>\n </body>\n</html>\n")
for entry in entries:
if os.path.isdir(entry):
update_file_listings(entry, True)
def save_database(db, new_image_count):
# write anyway, this will format manual edits to data.json
with open("data.json", "w") as outfile:
json.dump(db, outfile, indent=" ", sort_keys=True)
print("Wrote {} new entries to data.json => done".format(new_image_count))
def main():
def get_database():
with open("data.json") as file:
return json.load(file)
def get_image_set():
images = set()
for image in os.listdir("images/"):
if os.path.isdir("images/{}".format(image)):
images.add(image)
return images
db = get_database()
db_images = db.keys()
images = []
if len(sys.argv) > 1:
for image in sys.argv[1:]:
if not image.startswith("images/"):
filename = os.path.basename(image)
base = os.path.splitext(filename)[0].lower()
dst_folder = "images/{}".format(base)
if not os.path.isdir(dst_folder):
os.makedirs(dst_folder)
shutil.copyfile(image, "{}/{}".format(dst_folder, filename))
images.append(base)
elif os.path.isdir(image):
images.append(os.path.basename(image))
else:
print("folder {} does not exist".format(image))
sys.exit(1)
else:
images = list(get_image_set() - set(db_images))
images.sort()
images = list(set(images)) # make list distinct
if check_duplicate_images():
print("Please remove duplicate files first!")
return
old_image_count = len(db_images)
new_image_count = 0
def sigint_handler():
if new_image_count > 0:
print("\nNothing saved")
print("")
sys.exit(0)
if len(images) > 0:
# Exit Ctrl+C gracefully
signal.signal(signal.SIGINT, lambda sig, frame: sigint_handler())
answer = input("Start to add {} new image folders [Y, n]? ".format(len(images)))
if answer == "n":
return
prev = [{}] # use list for pass by reference
for i, image in enumerate(images):
ret = handle_image(i + 1, len(images), prev, db, image)
if ret > 0:
new_image_count += 1
if ret < 0:
break
add_previews(db)
update_file_listings("images")
save_database(db, new_image_count)
if __name__ == "__main__":
main()