From 765224fe7e938a9c5ab8fc626386703341ec3911 Mon Sep 17 00:00:00 2001 From: s <102841251+duckduckgrayduck@users.noreply.github.com> Date: Tue, 19 Sep 2023 15:24:40 -0500 Subject: [PATCH] Barebones of main.py --- main.py | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/main.py b/main.py index fc5f449..d39e46a 100644 --- a/main.py +++ b/main.py @@ -1,38 +1,24 @@ """ -This is a hello world add-on for DocumentCloud. - -It demonstrates how to write a add-on which can be activated from the -DocumentCloud add-on system and run using Github Actions. It receives data -from DocumentCloud via the request dispatch and writes data back to -DocumentCloud using the standard API +This is an Add-On that uses docTR https://github.com/mindee/doctr to OCR documents for DocumentCloud """ from documentcloud.addon import AddOn -class HelloWorld(AddOn): - """An example Add-On for DocumentCloud.""" +class docTR(AddOn): + """Class definition""" def main(self): """The main add-on functionality goes here.""" - # fetch your add-on specific data - name = self.data.get("name", "world") - - self.set_message("Hello World start!") - - # add a hello note to the first page of each selected document + model = ocr_predictor(pretrained=True) for document in self.get_documents(): - # get_documents will iterate through all documents efficiently, - # either selected or by query, dependeing on which is passed in - document.annotations.create(f"Hello {name}!", 0) - - with open("hello.txt", "w+") as file_: - file_.write("Hello world!") - self.upload_file(file_) - - self.set_message("Hello World end!") - self.send_mail("Hello World!", "We finished!") - + pdf_name = f"'{document.title}.pdf'" + with archive.open(f"{document.slug} - {document.id}.pdf", "w") as pdf: + pdf.write(document.pdf) + doc = DocumentFile.from_pdf(pdf_name) + result = model(doc) + json_export = result.export() + print(json_export) if __name__ == "__main__": - HelloWorld().main() + docTR().main()