generated from MuckRock/documentcloud-hello-world-addon
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b53734f
commit 765224f
Showing
1 changed file
with
12 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,24 @@ | ||
""" | ||
This is a hello world add-on for DocumentCloud. | ||
It demonstrates how to write a add-on which can be activated from the | ||
DocumentCloud add-on system and run using Github Actions. It receives data | ||
from DocumentCloud via the request dispatch and writes data back to | ||
DocumentCloud using the standard API | ||
This is an Add-On that uses docTR https://github.com/mindee/doctr to OCR documents for DocumentCloud | ||
""" | ||
|
||
from documentcloud.addon import AddOn | ||
|
||
|
||
class HelloWorld(AddOn): | ||
"""An example Add-On for DocumentCloud.""" | ||
class docTR(AddOn): | ||
"""Class definition""" | ||
|
||
def main(self): | ||
"""The main add-on functionality goes here.""" | ||
# fetch your add-on specific data | ||
name = self.data.get("name", "world") | ||
|
||
self.set_message("Hello World start!") | ||
|
||
# add a hello note to the first page of each selected document | ||
model = ocr_predictor(pretrained=True) | ||
for document in self.get_documents(): | ||
# get_documents will iterate through all documents efficiently, | ||
# either selected or by query, dependeing on which is passed in | ||
document.annotations.create(f"Hello {name}!", 0) | ||
|
||
with open("hello.txt", "w+") as file_: | ||
file_.write("Hello world!") | ||
self.upload_file(file_) | ||
|
||
self.set_message("Hello World end!") | ||
self.send_mail("Hello World!", "We finished!") | ||
|
||
pdf_name = f"'{document.title}.pdf'" | ||
with archive.open(f"{document.slug} - {document.id}.pdf", "w") as pdf: | ||
pdf.write(document.pdf) | ||
doc = DocumentFile.from_pdf(pdf_name) | ||
result = model(doc) | ||
json_export = result.export() | ||
print(json_export) | ||
|
||
if __name__ == "__main__": | ||
HelloWorld().main() | ||
docTR().main() |