From 3ea7478f048f99f94ab520776f103bc865e2e992 Mon Sep 17 00:00:00 2001 From: Jojo Ortiz Date: Wed, 17 Apr 2024 14:56:03 -0700 Subject: [PATCH 1/4] add parse.sh & extract_parse.sh, move common methods/var to open_parser_base.sh --- extract_parse.sh | 29 ++++++++++++++++++++ open_parser.sh => open_parser_base.sh | 39 +++------------------------ parse.sh | 21 +++++++++++++++ 3 files changed, 53 insertions(+), 36 deletions(-) create mode 100644 extract_parse.sh rename open_parser.sh => open_parser_base.sh (80%) create mode 100644 parse.sh diff --git a/extract_parse.sh b/extract_parse.sh new file mode 100644 index 0000000..786591c --- /dev/null +++ b/extract_parse.sh @@ -0,0 +1,29 @@ +#!/bin/bash +source open_parser_base.sh + +if [ "$#" -lt 3 ]; then + echo "Error: Missing arguments + Usage: $0 " + exit 1 +fi + +apiKey="$1" +func="$2" +file_path="$3" + +upload +if [ "$func" == "extract" ]; then + extract +elif [ "$func" == "parse" ]; then + prompt="$4" + mode="$5" + if [ -z "$mode" ] || [ "$mode" == "" ] || [ "$mode" == "advanced" ]; then + textract=true + else + textract=false + fi + upload + parse +fi + +echo "$result" diff --git a/open_parser.sh b/open_parser_base.sh similarity index 80% rename from open_parser.sh rename to open_parser_base.sh index 306da6f..a01ee58 100644 --- a/open_parser.sh +++ b/open_parser_base.sh @@ -1,5 +1,5 @@ -UPLOAD_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload" -EXTRACT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract" +UPLOAD_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload" +EXTRACT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract" PARSE_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/parse" uid="null" @@ -30,13 +30,6 @@ upload() { exit 1 fi - # res=$(echo "$tmp_data" | jq -r 'to_entries | map("-F \"\(.key)=\(.value)\" \\ ") | .[] ') - # echo "${res[@]}" - # local status=$(curl -X POST \ - # "${res[@]}" \ - # -F "file=@$file_path" \ - # "$tmp_url") - local aws_access_key_id=$(echo "$tmp_data" | jq -r '."AWSAccessKeyId"') local x_amz_security_token=$(echo "$tmp_data" | jq -r '."x-amz-security-token"') local policy=$(echo "$tmp_data" | jq -r '."policy"') @@ -60,8 +53,6 @@ upload() { -F "x-amz-meta-user_prompt=$x_amz_meta_user_prompt" \ -F "file=@$file_path" \ "$tmp_url") - - # echo "upload done" } extract() { @@ -76,7 +67,6 @@ extract() { -d "$payload" \ "$EXTRACT_URL") - # echo "$response" result=$(echo "$response" | jq -r '.result.file_content') } @@ -94,28 +84,5 @@ parse() { -d "$payload" \ "$PARSE_URL") - # echo "$response" result=$(echo "$response" | jq -r '.result') -} - - -apiKey="$1" -func="$2" -file_path="$3" - -upload -if [ "$func" == "extract" ]; then - extract -elif [ "$func" == "parse" ]; then - prompt="$4" - mode="$5" - if [ -z "$mode" ] || [ "$mode" == "" ] || [ "$mode" == "advanced" ]; then - textract=true - else - textract=false - fi - upload - parse -fi - -echo "$result" +} \ No newline at end of file diff --git a/parse.sh b/parse.sh new file mode 100644 index 0000000..f2e1565 --- /dev/null +++ b/parse.sh @@ -0,0 +1,21 @@ +#!/bin/bash +source open_parser_base.sh + +# Check if number of arguments is not equal to 3 +if [ "$#" -lt 2 ]; then + echo "Error: Missing arguments + Usage: $0 " + exit 1 +fi + +apiKey="$1" +file_path="$2" +prompt="$3" +textract=false + +echo "Parsing $file_path..." + +upload +parse + +echo "$result" \ No newline at end of file From 77392cbf380d8aefc36f7d2f11af342bedc4dcab Mon Sep 17 00:00:00 2001 From: Jojo Ortiz Date: Wed, 17 Apr 2024 14:56:23 -0700 Subject: [PATCH 2/4] add .vscode --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b70daee..65a8a3d 100644 --- a/.gitignore +++ b/.gitignore @@ -160,4 +160,7 @@ cython_debug/ #.idea/ # mac -.DS_Store \ No newline at end of file +.DS_Store + +# vscode +.vscode/ \ No newline at end of file From c33eb171d6eee18c749e4d4e32f96a8f2a39cbaa Mon Sep 17 00:00:00 2001 From: Jojo Ortiz Date: Wed, 17 Apr 2024 14:56:35 -0700 Subject: [PATCH 3/4] update README for parse.sh example --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ee5f16..1b8907b 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,17 @@ pip3 install open_parser ``` ## :bashfile usage +To use OpenParse via `curl` requests, you can run the following bash command from the root folder of this repository: +``` +bash parse.sh +``` +For example, to extract a table from a PDF file, you can run the following command: ``` -bash open_parser.sh +bash parse.sh gl************************************** /path/to/your/file.pdf "Return the table in a JSON format with + each box's key and value." ``` + ## :scroll: Examples OpenParse can extract text, numbers and symbols from PDF, images, etc. Check out each notebook below to run OpenParse within 10 lines of code! From 0f04aa02c7b481799173663d195099582184d26c Mon Sep 17 00:00:00 2001 From: Jojo Ortiz Date: Wed, 17 Apr 2024 15:03:17 -0700 Subject: [PATCH 4/4] address review comments --- README.md | 3 +-- parse.sh | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 1b8907b..93b9ce7 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,7 @@ bash parse.sh "