forked from duckduckgrayduck/bulk-reprocress-addon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
32 lines (25 loc) · 1.01 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""
This DocumentCloud Add-On allows you to bulk reprocress documents on DocumentCloud
"""
from documentcloud.addon import SoftTimeOutAddOn
from documentcloud.constants import BULK_LIMIT
from documentcloud.toolbox import grouper
class Reprocess(SoftTimeOutAddOn):
"""Force reprocress documents given ocr, ocr engine, and language"""
def main(self):
"""The main add-on functionality goes here."""
ocr = self.data.get("force_ocr", False)
lang = self.data["language"]
if self.data.get("sure"):
for doc_group in grouper(self.get_documents(), BULK_LIMIT):
doc_group = [
{"id": d.id, "force_ocr": ocr, "language": lang}
for d in doc_group if d is not None
]
self.client.post("documents/process/", json=doc_group)
else:
self.set_message(
"You did not select sure, this Add-On did not do anything."
)
if __name__ == "__main__":
Reprocess().main()