Skip to content

Commit

Permalink
Merge pull request #17 from Sdddell/main
Browse files Browse the repository at this point in the history
add bash script, modify the return format
  • Loading branch information
Cambio ML authored Apr 16, 2024
2 parents 35c8f2b + 6f23370 commit 79cd88e
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 4 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ conda activate openparse
pip3 install open_parser
```
## :bashfile usage
```
bash open_parser.sh <your apiKey> <job type: extract | parse> <file path> <prompt for parse (optional, default="")> <parse mode (optional, default=basic): basic | advanced>
```
## :scroll: Examples
OpenParse can extract text, numbers and symbols from PDF, images, etc. Check out each notebook below to run OpenParse within 10 lines of code!
Expand Down
121 changes: 121 additions & 0 deletions open_parser.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
UPLOAD_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload"
EXTRACT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract"
PARSE_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/parse"

uid="null"
jid="null"
s3_key="null"

result=""

upload() {
local response=$(curl -s -X GET \
-H "x-api-key: $apiKey" \
"$UPLOAD_URL?fileName=$file_path")

local url_info=$(echo "$response" | jq -r '.presignedUrl')
local tmp_url=$(echo "$url_info" | jq -r '.url')
local tmp_data=$(echo "$url_info" | jq -r '.fields')
s3_key=$(echo "$tmp_data" | jq -r '.key')
jid=$(echo "$response" | jq -r '.jobId')
uid=$(echo "$response" | jq -r '.userId')

if [ "$uid" = "null" ] || [ "$uid" == "null" ]; then
exit 1
fi
if [ "$jid" = "null" ] || [ "$jid" == "null" ]; then
exit 1
fi
if [ "$s3_key" = "null" ] || [ "$s3_key" == "null" ]; then
exit 1
fi

# res=$(echo "$tmp_data" | jq -r 'to_entries | map("-F \"\(.key)=\(.value)\" \\ ") | .[] ')
# echo "${res[@]}"
# local status=$(curl -X POST \
# "${res[@]}" \
# -F "file=@$file_path" \
# "$tmp_url")

local aws_access_key_id=$(echo "$tmp_data" | jq -r '."AWSAccessKeyId"')
local x_amz_security_token=$(echo "$tmp_data" | jq -r '."x-amz-security-token"')
local policy=$(echo "$tmp_data" | jq -r '."policy"')
local signature=$(echo "$tmp_data" | jq -r '."signature"')
local x_amz_meta_jobid=$(echo "$tmp_data" | jq -r '."x-amz-meta-jobid"')
local x_amz_meta_userid=$(echo "$tmp_data" | jq -r '."x-amz-meta-userid"')
local x_amz_meta_filename=$(echo "$tmp_data" | jq -r '."x-amz-meta-filename"')
local x_amz_meta_jobtype=$(echo "$tmp_data" | jq -r '."x-amz-meta-jobtype"')
local x_amz_meta_user_prompt=$(echo "$tmp_data" | jq -r '."x-amz-meta-user_prompt"')

local status=$(curl -s -X POST \
-F "key=$s3_key" \
-F "AWSAccessKeyId=$aws_access_key_id" \
-F "x-amz-security-token=$x_amz_security_token" \
-F "policy=$policy" \
-F "signature=$signature" \
-F "x-amz-meta-jobid=$x_amz_meta_jobid" \
-F "x-amz-meta-userid=$x_amz_meta_userid" \
-F "x-amz-meta-filename=$x_amz_meta_filename" \
-F "x-amz-meta-jobtype=$x_amz_meta_jobtype" \
-F "x-amz-meta-user_prompt=$x_amz_meta_user_prompt" \
-F "file=@$file_path" \
"$tmp_url")

# echo "upload done"
}

extract() {
local payload='{
"userId": "'"$uid"'",
"jobId": "'"$jid"'",
"fileKey": "'"$s3_key"'"
}'

local response=$(curl -s -X POST \
-H "x-api-key: $apiKey" \
-d "$payload" \
"$EXTRACT_URL")

# echo "$response"
result=$(echo "$response" | jq -r '.result.file_content')
}

parse() {
local payload='{
"userId": "'"$uid"'",
"jobId": "'"$jid"'",
"fileKey": "'"$s3_key"'",
"user_prompt": "'"$prompt"'",
"use_textract": "'"$textract"'"
}'

local response=$(curl -s -X POST \
-H "x-api-key: $apiKey" \
-d "$payload" \
"$PARSE_URL")

# echo "$response"
result=$(echo "$response" | jq -r '.result')
}


apiKey="$1"
func="$2"
file_path="$3"

upload
if [ "$func" == "extract" ]; then
extract
elif [ "$func" == "parse" ]; then
prompt="$4"
mode="$5"
if [ -z "$mode" ] || [ "$mode" == "" ] || [ "$mode" == "advanced" ]; then
textract=true
else
textract=false
fi
upload
parse
fi

echo "$result"
8 changes: 4 additions & 4 deletions open_parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def setAPIKey(self, apiKey):
def extract(self, file_path):
user_id, job_id, s3_key = self._request_and_upload_by_apiKey(file_path)
result = self._request_file_extraction(user_id, job_id, s3_key)
return result["file_content"]
return json.loads(result)["result"]["file_content"]

def parse(self, file_path, prompt, mode="advanced"):
user_id, job_id, s3_key = self._request_and_upload_by_apiKey(file_path)
result = self._request_info_extraction(user_id, job_id, s3_key, mode, prompt)
return result
return json.loads(result)["result"]

def _error_handler(self, response):
if response.status_code == 403:
Expand Down Expand Up @@ -73,7 +73,7 @@ def _request_file_extraction(self, user_id, job_id, s3_key):

if response.status_code == 200:
print("Extraction success.")
return json.loads(response.json()["result"])
return response.text

self._error_handler(response)

Expand All @@ -93,6 +93,6 @@ def _request_info_extraction(self, user_id, job_id, s3_key, mode, prompt=""):

if response.status_code == 200:
print("Extraction success.")
return json.loads(response.json()["result"])
return response.text

self._error_handler(response)

0 comments on commit 79cd88e

Please sign in to comment.