From 6f23370b58d1b0a4ad5d98a87396542196ee2fab Mon Sep 17 00:00:00 2001 From: Jingyi Date: Tue, 16 Apr 2024 08:43:16 +0000 Subject: [PATCH] add bash script, modify the return format --- README.md | 5 ++ open_parser.sh | 121 ++++++++++++++++++++++++++++++++++++++++++++ open_parser/base.py | 8 +-- 3 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 open_parser.sh diff --git a/README.md b/README.md index 80d0b99..8ee5f16 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,11 @@ conda activate openparse pip3 install open_parser ``` +## :bashfile usage + +``` +bash open_parser.sh +``` ## :scroll: Examples OpenParse can extract text, numbers and symbols from PDF, images, etc. Check out each notebook below to run OpenParse within 10 lines of code! diff --git a/open_parser.sh b/open_parser.sh new file mode 100644 index 0000000..306da6f --- /dev/null +++ b/open_parser.sh @@ -0,0 +1,121 @@ +UPLOAD_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload" +EXTRACT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract" +PARSE_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/parse" + +uid="null" +jid="null" +s3_key="null" + +result="" + +upload() { + local response=$(curl -s -X GET \ + -H "x-api-key: $apiKey" \ + "$UPLOAD_URL?fileName=$file_path") + + local url_info=$(echo "$response" | jq -r '.presignedUrl') + local tmp_url=$(echo "$url_info" | jq -r '.url') + local tmp_data=$(echo "$url_info" | jq -r '.fields') + s3_key=$(echo "$tmp_data" | jq -r '.key') + jid=$(echo "$response" | jq -r '.jobId') + uid=$(echo "$response" | jq -r '.userId') + + if [ "$uid" = "null" ] || [ "$uid" == "null" ]; then + exit 1 + fi + if [ "$jid" = "null" ] || [ "$jid" == "null" ]; then + exit 1 + fi + if [ "$s3_key" = "null" ] || [ "$s3_key" == "null" ]; then + exit 1 + fi + + # res=$(echo "$tmp_data" | jq -r 'to_entries | map("-F \"\(.key)=\(.value)\" \\ ") | .[] ') + # echo "${res[@]}" + # local status=$(curl -X POST \ + # "${res[@]}" \ + # -F "file=@$file_path" \ + # "$tmp_url") + + local aws_access_key_id=$(echo "$tmp_data" | jq -r '."AWSAccessKeyId"') + local x_amz_security_token=$(echo "$tmp_data" | jq -r '."x-amz-security-token"') + local policy=$(echo "$tmp_data" | jq -r '."policy"') + local signature=$(echo "$tmp_data" | jq -r '."signature"') + local x_amz_meta_jobid=$(echo "$tmp_data" | jq -r '."x-amz-meta-jobid"') + local x_amz_meta_userid=$(echo "$tmp_data" | jq -r '."x-amz-meta-userid"') + local x_amz_meta_filename=$(echo "$tmp_data" | jq -r '."x-amz-meta-filename"') + local x_amz_meta_jobtype=$(echo "$tmp_data" | jq -r '."x-amz-meta-jobtype"') + local x_amz_meta_user_prompt=$(echo "$tmp_data" | jq -r '."x-amz-meta-user_prompt"') + + local status=$(curl -s -X POST \ + -F "key=$s3_key" \ + -F "AWSAccessKeyId=$aws_access_key_id" \ + -F "x-amz-security-token=$x_amz_security_token" \ + -F "policy=$policy" \ + -F "signature=$signature" \ + -F "x-amz-meta-jobid=$x_amz_meta_jobid" \ + -F "x-amz-meta-userid=$x_amz_meta_userid" \ + -F "x-amz-meta-filename=$x_amz_meta_filename" \ + -F "x-amz-meta-jobtype=$x_amz_meta_jobtype" \ + -F "x-amz-meta-user_prompt=$x_amz_meta_user_prompt" \ + -F "file=@$file_path" \ + "$tmp_url") + + # echo "upload done" +} + +extract() { + local payload='{ + "userId": "'"$uid"'", + "jobId": "'"$jid"'", + "fileKey": "'"$s3_key"'" + }' + + local response=$(curl -s -X POST \ + -H "x-api-key: $apiKey" \ + -d "$payload" \ + "$EXTRACT_URL") + + # echo "$response" + result=$(echo "$response" | jq -r '.result.file_content') +} + +parse() { + local payload='{ + "userId": "'"$uid"'", + "jobId": "'"$jid"'", + "fileKey": "'"$s3_key"'", + "user_prompt": "'"$prompt"'", + "use_textract": "'"$textract"'" + }' + + local response=$(curl -s -X POST \ + -H "x-api-key: $apiKey" \ + -d "$payload" \ + "$PARSE_URL") + + # echo "$response" + result=$(echo "$response" | jq -r '.result') +} + + +apiKey="$1" +func="$2" +file_path="$3" + +upload +if [ "$func" == "extract" ]; then + extract +elif [ "$func" == "parse" ]; then + prompt="$4" + mode="$5" + if [ -z "$mode" ] || [ "$mode" == "" ] || [ "$mode" == "advanced" ]; then + textract=true + else + textract=false + fi + upload + parse +fi + +echo "$result" diff --git a/open_parser/base.py b/open_parser/base.py index 0f1a658..23309ff 100644 --- a/open_parser/base.py +++ b/open_parser/base.py @@ -26,12 +26,12 @@ def setAPIKey(self, apiKey): def extract(self, file_path): user_id, job_id, s3_key = self._request_and_upload_by_apiKey(file_path) result = self._request_file_extraction(user_id, job_id, s3_key) - return result["file_content"] + return json.loads(result)["result"]["file_content"] def parse(self, file_path, prompt, mode="advanced"): user_id, job_id, s3_key = self._request_and_upload_by_apiKey(file_path) result = self._request_info_extraction(user_id, job_id, s3_key, mode, prompt) - return result + return json.loads(result)["result"] def _error_handler(self, response): if response.status_code == 403: @@ -73,7 +73,7 @@ def _request_file_extraction(self, user_id, job_id, s3_key): if response.status_code == 200: print("Extraction success.") - return json.loads(response.json()["result"]) + return response.text self._error_handler(response) @@ -93,6 +93,6 @@ def _request_info_extraction(self, user_id, job_id, s3_key, mode, prompt=""): if response.status_code == 200: print("Extraction success.") - return json.loads(response.json()["result"]) + return response.text self._error_handler(response)