Skip to content

Commit

Permalink
Barebones of main.py
Browse files Browse the repository at this point in the history
  • Loading branch information
duckduckgrayduck committed Sep 19, 2023
1 parent b53734f commit 765224f
Showing 1 changed file with 12 additions and 26 deletions.
38 changes: 12 additions & 26 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,24 @@
"""
This is a hello world add-on for DocumentCloud.
It demonstrates how to write a add-on which can be activated from the
DocumentCloud add-on system and run using Github Actions. It receives data
from DocumentCloud via the request dispatch and writes data back to
DocumentCloud using the standard API
This is an Add-On that uses docTR https://github.com/mindee/doctr to OCR documents for DocumentCloud
"""

from documentcloud.addon import AddOn


class HelloWorld(AddOn):
"""An example Add-On for DocumentCloud."""
class docTR(AddOn):
"""Class definition"""

def main(self):
"""The main add-on functionality goes here."""
# fetch your add-on specific data
name = self.data.get("name", "world")

self.set_message("Hello World start!")

# add a hello note to the first page of each selected document
model = ocr_predictor(pretrained=True)
for document in self.get_documents():
# get_documents will iterate through all documents efficiently,
# either selected or by query, dependeing on which is passed in
document.annotations.create(f"Hello {name}!", 0)

with open("hello.txt", "w+") as file_:
file_.write("Hello world!")
self.upload_file(file_)

self.set_message("Hello World end!")
self.send_mail("Hello World!", "We finished!")

pdf_name = f"'{document.title}.pdf'"
with archive.open(f"{document.slug} - {document.id}.pdf", "w") as pdf:
pdf.write(document.pdf)
doc = DocumentFile.from_pdf(pdf_name)
result = model(doc)
json_export = result.export()
print(json_export)

if __name__ == "__main__":
HelloWorld().main()
docTR().main()

0 comments on commit 765224f

Please sign in to comment.