Skip to content

Commit

Permalink
Merge pull request #20 from CambioML/jojo-branch
Browse files Browse the repository at this point in the history
Add parse.sh, update README
  • Loading branch information
Cambio ML authored Apr 17, 2024
2 parents c6c80be + 0f04aa0 commit 6d0523c
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 38 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,7 @@ cython_debug/
#.idea/

# mac
.DS_Store
.DS_Store

# vscode
.vscode/
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@ pip3 install open_parser
```
## :bashfile usage
To use OpenParse via `curl` requests, you can run the following bash command from the root folder of this repository:
```
bash parse.sh <your apiKey> <file path> <prompt for parse (optional, default="")>
```
For example, to extract a table from a PDF file, you can run the following command:
```
bash open_parser.sh <your apiKey> <job type: extract | parse> <file path> <prompt for parse (optional, default="")> <parse mode (optional, default=basic): basic | advanced>
bash parse.sh gl************************************** /path/to/your/file.pdf "Return the table in a JSON format with each box's key and value."
```
## :scroll: Examples
OpenParse can extract text, numbers and symbols from PDF, images, etc. Check out each notebook below to run OpenParse within 10 lines of code!
Expand Down
29 changes: 29 additions & 0 deletions extract_parse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
source open_parser_base.sh

if [ "$#" -lt 3 ]; then
echo "Error: Missing arguments
Usage: $0 <api_key> <job type: extract | parse> <file path> <prompt for parse (optional, default="")> <parse mode (optional, default=basic): basic | advanced>"
exit 1
fi

apiKey="$1"
func="$2"
file_path="$3"

upload
if [ "$func" == "extract" ]; then
extract
elif [ "$func" == "parse" ]; then
prompt="$4"
mode="$5"
if [ -z "$mode" ] || [ "$mode" == "" ] || [ "$mode" == "advanced" ]; then
textract=true
else
textract=false
fi
upload
parse
fi

echo "$result"
39 changes: 3 additions & 36 deletions open_parser.sh → open_parser_base.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
UPLOAD_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload"
EXTRACT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract"
UPLOAD_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload"
EXTRACT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract"
PARSE_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/parse"

uid="null"
Expand Down Expand Up @@ -30,13 +30,6 @@ upload() {
exit 1
fi

# res=$(echo "$tmp_data" | jq -r 'to_entries | map("-F \"\(.key)=\(.value)\" \\ ") | .[] ')
# echo "${res[@]}"
# local status=$(curl -X POST \
# "${res[@]}" \
# -F "file=@$file_path" \
# "$tmp_url")

local aws_access_key_id=$(echo "$tmp_data" | jq -r '."AWSAccessKeyId"')
local x_amz_security_token=$(echo "$tmp_data" | jq -r '."x-amz-security-token"')
local policy=$(echo "$tmp_data" | jq -r '."policy"')
Expand All @@ -60,8 +53,6 @@ upload() {
-F "x-amz-meta-user_prompt=$x_amz_meta_user_prompt" \
-F "file=@$file_path" \
"$tmp_url")

# echo "upload done"
}

extract() {
Expand All @@ -76,7 +67,6 @@ extract() {
-d "$payload" \
"$EXTRACT_URL")

# echo "$response"
result=$(echo "$response" | jq -r '.result.file_content')
}

Expand All @@ -94,28 +84,5 @@ parse() {
-d "$payload" \
"$PARSE_URL")

# echo "$response"
result=$(echo "$response" | jq -r '.result')
}


apiKey="$1"
func="$2"
file_path="$3"

upload
if [ "$func" == "extract" ]; then
extract
elif [ "$func" == "parse" ]; then
prompt="$4"
mode="$5"
if [ -z "$mode" ] || [ "$mode" == "" ] || [ "$mode" == "advanced" ]; then
textract=true
else
textract=false
fi
upload
parse
fi

echo "$result"
}
20 changes: 20 additions & 0 deletions parse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
source open_parser_base.sh

if [ "$#" -lt 2 ]; then
echo "Error: Missing arguments
Usage: $0 <apiKey> <file_path> <prompt (optional, default="")>"
exit 1
fi

apiKey="$1"
file_path="$2"
prompt="$3"
textract=false

echo "Parsing $file_path..."

upload
parse

echo "$result"

0 comments on commit 6d0523c

Please sign in to comment.