From 8a36d3f74b4811e841bf7b14a53754674eab2870 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 18 Nov 2024 16:31:21 -0600 Subject: [PATCH] Add Extract Images sample programs --- .../JSON Payload/extracted-images.cs | 51 ++++++++++ .../Multipart Payload/extracted-images.cs | 28 ++++++ .../JSON Payload/ExtractedImages.java | 96 +++++++++++++++++++ .../Multipart Payload/ExtractedImages.java | 66 +++++++++++++ .../JSON Payload/extracted-images.js | 47 +++++++++ .../Multipart Payload/extracted-images.js | 33 +++++++ .../JSON Payload/extracted-images.php | 33 +++++++ .../Multipart Payload/extracted-images.php | 39 ++++++++ .../JSON Payload/extracted-images.py | 39 ++++++++ .../Multipart Payload/extracted-images.py | 36 +++++++ .../JSON Payload/extracted-images.sh | 14 +++ .../Multipart Payload/extracted-images.sh | 7 ++ 12 files changed, 489 insertions(+) create mode 100644 DotNET/Endpoint Examples/JSON Payload/extracted-images.cs create mode 100644 DotNET/Endpoint Examples/Multipart Payload/extracted-images.cs create mode 100644 Java/Endpoint Examples/JSON Payload/ExtractedImages.java create mode 100644 Java/Endpoint Examples/Multipart Payload/ExtractedImages.java create mode 100644 JavaScript/Endpoint Examples/JSON Payload/extracted-images.js create mode 100644 JavaScript/Endpoint Examples/Multipart Payload/extracted-images.js create mode 100644 PHP/Endpoint Examples/JSON Payload/extracted-images.php create mode 100644 PHP/Endpoint Examples/Multipart Payload/extracted-images.php create mode 100644 Python/Endpoint Examples/JSON Payload/extracted-images.py create mode 100644 Python/Endpoint Examples/Multipart Payload/extracted-images.py create mode 100755 cURL/Endpoint Examples/JSON Payload/extracted-images.sh create mode 100755 cURL/Endpoint Examples/Multipart Payload/extracted-images.sh diff --git a/DotNET/Endpoint Examples/JSON Payload/extracted-images.cs b/DotNET/Endpoint Examples/JSON Payload/extracted-images.cs new file mode 100644 index 0000000..dd969d0 --- /dev/null +++ b/DotNET/Endpoint Examples/JSON Payload/extracted-images.cs @@ -0,0 +1,51 @@ + +using Newtonsoft.Json.Linq; +using System.Text; + +using (var httpClient = new HttpClient { BaseAddress = new Uri("https://api.pdfrest.com") }) +{ + using (var uploadRequest = new HttpRequestMessage(HttpMethod.Post, "upload")) + { + uploadRequest.Headers.TryAddWithoutValidation("Api-Key", "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + uploadRequest.Headers.Accept.Add(new("application/json")); + + var uploadByteArray = File.ReadAllBytes("/path/to/file"); + var uploadByteAryContent = new ByteArrayContent(uploadByteArray); + uploadByteAryContent.Headers.TryAddWithoutValidation("Content-Type", "application/octet-stream"); + uploadByteAryContent.Headers.TryAddWithoutValidation("Content-Filename", "filename.pdf"); + + + uploadRequest.Content = uploadByteAryContent; + var uploadResponse = await httpClient.SendAsync(uploadRequest); + + var uploadResult = await uploadResponse.Content.ReadAsStringAsync(); + + Console.WriteLine("Upload response received."); + Console.WriteLine(uploadResult); + + JObject uploadResultJson = JObject.Parse(uploadResult); + var uploadedID = uploadResultJson["files"][0]["id"]; + using (var extractRequest = new HttpRequestMessage(HttpMethod.Post, "extracted-images")) + { + extractRequest.Headers.TryAddWithoutValidation("Api-Key", "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + extractRequest.Headers.Accept.Add(new("application/json")); + + extractRequest.Headers.TryAddWithoutValidation("Content-Type", "application/json"); + + + JObject parameterJson = new JObject + { + ["id"] = uploadedID, + ["pages"] = "1-last" + }; + + extractRequest.Content = new StringContent(parameterJson.ToString(), Encoding.UTF8, "application/json"); ; + var extractResponse = await httpClient.SendAsync(extractRequest); + + var extractResult = await extractResponse.Content.ReadAsStringAsync(); + + Console.WriteLine("Processing response received."); + Console.WriteLine(extractResult); + } + } +} diff --git a/DotNET/Endpoint Examples/Multipart Payload/extracted-images.cs b/DotNET/Endpoint Examples/Multipart Payload/extracted-images.cs new file mode 100644 index 0000000..77c9860 --- /dev/null +++ b/DotNET/Endpoint Examples/Multipart Payload/extracted-images.cs @@ -0,0 +1,28 @@ +using System.Text; + +using (var httpClient = new HttpClient { BaseAddress = new Uri("https://api.pdfrest.com") }) +{ + using (var request = new HttpRequestMessage(HttpMethod.Post, "extracted-images")) + { + request.Headers.TryAddWithoutValidation("Api-Key", "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + request.Headers.Accept.Add(new("application/json")); + var multipartContent = new MultipartFormDataContent(); + + var byteArray = File.ReadAllBytes("/path/to/file"); + var byteAryContent = new ByteArrayContent(byteArray); + multipartContent.Add(byteAryContent, "file", "file_name"); + byteAryContent.Headers.TryAddWithoutValidation("Content-Type", "application/pdf"); + + var byteArrayOption = new ByteArrayContent(Encoding.UTF8.GetBytes("1-last")); + multipartContent.Add(byteArrayOption, "pages"); + + + request.Content = multipartContent; + var response = await httpClient.SendAsync(request); + + var apiResult = await response.Content.ReadAsStringAsync(); + + Console.WriteLine("API response received."); + Console.WriteLine(apiResult); + } +} diff --git a/Java/Endpoint Examples/JSON Payload/ExtractedImages.java b/Java/Endpoint Examples/JSON Payload/ExtractedImages.java new file mode 100644 index 0000000..1ec93c1 --- /dev/null +++ b/Java/Endpoint Examples/JSON Payload/ExtractedImages.java @@ -0,0 +1,96 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import okhttp3.*; +import org.json.JSONArray; +import org.json.JSONObject; + +public class ExtractedImages { + + // Specify the path to your file here, or as the first argument when running the program. + private static final String DEFAULT_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File inputFile; + if (args.length > 0) { + inputFile = new File(args[0]); + } else { + inputFile = new File(DEFAULT_FILE_PATH); + } + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + String uploadString = uploadFile(inputFile); + JSONObject uploadJSON = new JSONObject(uploadString); + if (uploadJSON.has("error")) { + System.out.println("Error during upload: " + uploadString); + return; + } + JSONArray fileArray = uploadJSON.getJSONArray("files"); + + JSONObject fileObject = fileArray.getJSONObject(0); + + String uploadedID = fileObject.get("id").toString(); + + String JSONString = String.format("{\"id\":\"%s\",\"pages\":\"1-last\" }", uploadedID); + + final RequestBody requestBody = + RequestBody.create(JSONString, MediaType.parse("application/json")); + + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/extracted-images") + .post(requestBody) + .build(); + try { + OkHttpClient client = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + + Response response = client.newCall(request).execute(); + System.out.println("Processing Result code " + response.code()); + if (response.body() != null) { + System.out.println(prettyJson(response.body().string())); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static String prettyJson(String json) { + // https://stackoverflow.com/a/9583835/11996393 + return new JSONObject(json).toString(4); + } + + // This function is just a copy of the 'Upload.java' file to upload a binary file + private static String uploadFile(File inputFile) { + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody requestBody = + RequestBody.create(inputFile, MediaType.parse("application/pdf")); + + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .header("Content-Filename", "File.pdf") + .url("https://api.pdfrest.com/upload") + .post(requestBody) + .build(); + try { + OkHttpClient client = new OkHttpClient().newBuilder().build(); + Response response = client.newCall(request).execute(); + System.out.println("Upload Result code " + response.code()); + if (response.body() != null) { + return response.body().string(); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + return ""; + } +} diff --git a/Java/Endpoint Examples/Multipart Payload/ExtractedImages.java b/Java/Endpoint Examples/Multipart Payload/ExtractedImages.java new file mode 100644 index 0000000..17ff354 --- /dev/null +++ b/Java/Endpoint Examples/Multipart Payload/ExtractedImages.java @@ -0,0 +1,66 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import okhttp3.MediaType; +import okhttp3.MultipartBody; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; +import org.json.JSONObject; + +public class ExtractedImages { + + // Specify the path to your file here, or as the first argument when running the program. + private static final String DEFAULT_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + private static final String PAGES = "1-last"; + + public static void main(String[] args) { + File inputFile; + if (args.length > 0) { + inputFile = new File(args[0]); + } else { + inputFile = new File(DEFAULT_FILE_PATH); + } + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody inputFileRequestBody = + RequestBody.create(inputFile, MediaType.parse("application/pdf")); + RequestBody requestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", inputFile.getName(), inputFileRequestBody) + .addFormDataPart("pages", PAGES) + .addFormDataPart("output", "pdfrest_extracted_images") + .build(); + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/extracted-images") + .post(requestBody) + .build(); + try { + OkHttpClient client = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + Response response = client.newCall(request).execute(); + System.out.println("Result code " + response.code()); + if (response.body() != null) { + System.out.println(prettyJson(response.body().string())); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static String prettyJson(String json) { + // https://stackoverflow.com/a/9583835/11996393 + return new JSONObject(json).toString(4); + } +} diff --git a/JavaScript/Endpoint Examples/JSON Payload/extracted-images.js b/JavaScript/Endpoint Examples/JSON Payload/extracted-images.js new file mode 100644 index 0000000..50516c7 --- /dev/null +++ b/JavaScript/Endpoint Examples/JSON Payload/extracted-images.js @@ -0,0 +1,47 @@ +var axios = require("axios"); +var FormData = require("form-data"); +var fs = require("fs"); + +var upload_data = fs.createReadStream("/path/to/file"); + +var upload_config = { + method: "post", + maxBodyLength: Infinity, + url: "https://api.pdfrest.com/upload", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + "Content-Filename": "filename.pdf", + "Content-Type": "application/octet-stream", + }, + data: upload_data, // set the data to be sent with the request +}; + +// send request and handle response or error +axios(upload_config) + .then(function (upload_response) { + console.log(JSON.stringify(upload_response.data)); + var uploaded_id = upload_response.data.files[0].id; + + var extract_config = { + method: "post", + maxBodyLength: Infinity, + url: "https://api.pdfrest.com/extracted-images", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + "Content-Type": "application/json", + }, + data: { id: uploaded_id, pages: "1-last" }, // set the data to be sent with the request + }; + + // send request and handle response or error + axios(extract_config) + .then(function (extract_response) { + console.log(JSON.stringify(extract_response.data)); + }) + .catch(function (error) { + console.log(error); + }); + }) + .catch(function (error) { + console.log(error); + }); diff --git a/JavaScript/Endpoint Examples/Multipart Payload/extracted-images.js b/JavaScript/Endpoint Examples/Multipart Payload/extracted-images.js new file mode 100644 index 0000000..c0f5b1f --- /dev/null +++ b/JavaScript/Endpoint Examples/Multipart Payload/extracted-images.js @@ -0,0 +1,33 @@ +// This request demonstrates how to reduce the file size of a PDF. +var axios = require('axios'); +var FormData = require('form-data'); +var fs = require('fs'); + +// Create a new form data instance and append the PDF file and parameters to it +var data = new FormData(); +data.append('file', fs.createReadStream('/path/to/file')); +data.append('pages', '1-last'); +data.append('output', 'pdfrest_extracted_images'); + +// define configuration options for axios request +var config = { + method: 'post', + maxBodyLength: Infinity, // set maximum length of the request body + url: 'https://api.pdfrest.com/extracted-images', + headers: { + 'Api-Key': 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', // Replace with your API key + ...data.getHeaders() // set headers for the request + }, + data : data // set the data to be sent with the request +}; + +// send request and handle response or error +axios(config) +.then(function (response) { + console.log(JSON.stringify(response.data)); +}) +.catch(function (error) { + console.log(error); +}); + +// If you would like to download the file instead of getting the JSON response, please see the 'get-resource-id-endpoint.js' sample. \ No newline at end of file diff --git a/PHP/Endpoint Examples/JSON Payload/extracted-images.php b/PHP/Endpoint Examples/JSON Payload/extracted-images.php new file mode 100644 index 0000000..f65fda1 --- /dev/null +++ b/PHP/Endpoint Examples/JSON Payload/extracted-images.php @@ -0,0 +1,33 @@ + false]); +$upload_headers = [ + 'api-key' => 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', + 'content-filename' => 'filename.pdf', + 'Content-Type' => 'application/octet-stream' +]; +$upload_body = file_get_contents('/path/to/file'); +$upload_request = new Request('POST', 'https://api.pdfrest.com/upload', $upload_headers, $upload_body); +$upload_res = $upload_client->sendAsync($upload_request)->wait(); +echo $upload_res->getBody() . PHP_EOL; + +$upload_response_json = json_decode($upload_res->getBody()); + +$uploaded_id = $upload_response_json->{'files'}[0]->{'id'}; + +echo "Successfully uploaded with an id of: " . $uploaded_id . PHP_EOL; + +$extract_client = new Client(['http_errors' => false]); +$extract_headers = [ + 'api-key' => 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', + 'Content-Type' => 'application/json' +]; +$extract_body = '{"id":"'.$uploaded_id.'", "pages": "1-last"}'; +$extract_request = new Request('POST', 'https://api.pdfrest.com/extracted-images', $extract_headers, $extract_body); +$extract_res = $extract_client->sendAsync($extract_request)->wait(); +echo $extract_res->getBody() . PHP_EOL; diff --git a/PHP/Endpoint Examples/Multipart Payload/extracted-images.php b/PHP/Endpoint Examples/Multipart Payload/extracted-images.php new file mode 100644 index 0000000..9a5ce48 --- /dev/null +++ b/PHP/Endpoint Examples/Multipart Payload/extracted-images.php @@ -0,0 +1,39 @@ + 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' // Set the API key in the headers for authentication. +]; + +$options = [ + 'multipart' => [ + [ + 'name' => 'file', // Specify the field name for the file. + 'contents' => Utils::tryFopen('/path/to/file', 'r'), // Open the file specified by the '/path/to/file' for reading. + 'filename' => '/path/to/file', // Set the filename for the file containing images, in this case, '/path/to/file'. + 'headers' => [ + 'Content-Type' => '' // Set the Content-Type header for the file. + ] + ], + [ + 'name' => 'pages', // Specify the field name for the target page numbers. + 'contents' => '1-last' // Set the value for the target pages (in this case, '1-last', or all pages). + ], + [ + 'name' => 'output', // Specify the field name for the output option. + 'contents' => 'pdfrest_extracted_images' // Set the value for the output option (in this case, 'pdfrest_extracted_images'). + ] + ] +]; + +$request = new Request('POST', 'https://api.pdfrest.com/extracted-images', $headers); // Create a new HTTP POST request with the API endpoint and headers. + +$res = $client->sendAsync($request, $options)->wait(); // Send the asynchronous request and wait for the response. + +echo $res->getBody(); // Output the response body, which contains the extracted image content. diff --git a/Python/Endpoint Examples/JSON Payload/extracted-images.py b/Python/Endpoint Examples/JSON Payload/extracted-images.py new file mode 100644 index 0000000..20355bb --- /dev/null +++ b/Python/Endpoint Examples/JSON Payload/extracted-images.py @@ -0,0 +1,39 @@ +import requests +import json + +with open('/path/to/file', 'rb') as f: + upload_data = f.read() + +print("Uploading file...") +upload_response = requests.post(url='https://api.pdfrest.com/upload', + data=upload_data, + headers={'Content-Type': 'application/octet-stream', 'Content-Filename': 'file.pdf', "API-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"}) + +print("Upload response status code: " + str(upload_response.status_code)) + +if upload_response.ok: + upload_response_json = upload_response.json() + print(json.dumps(upload_response_json, indent = 2)) + + + uploaded_id = upload_response_json['files'][0]['id'] + extract_data = { "id" : uploaded_id, "pages" : "1-last" } + print(json.dumps(extract_data, indent = 2)) + + + print("Processing file...") + extract_response = requests.post(url='https://api.pdfrest.com/extracted-images', + data=json.dumps(extract_data), + headers={'Content-Type': 'application/json', "API-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"}) + + + + print("Processing response status code: " + str(extract_response.status_code)) + if extract_response.ok: + extract_response_json = extract_response.json() + print(json.dumps(extract_response_json, indent = 2)) + + else: + print(extract_response.text) +else: + print(upload_response.text) diff --git a/Python/Endpoint Examples/Multipart Payload/extracted-images.py b/Python/Endpoint Examples/Multipart Payload/extracted-images.py new file mode 100644 index 0000000..b1da237 --- /dev/null +++ b/Python/Endpoint Examples/Multipart Payload/extracted-images.py @@ -0,0 +1,36 @@ +from requests_toolbelt import MultipartEncoder +import requests +import json + +extracted_images_endpoint_url = 'https://api.pdfrest.com/extracted-images' + +# The /extracted-images endpoint can take a single PDF file or id as input. +# This sample demonstrates image extraction from all pages of a document. +mp_encoder_extractedImages = MultipartEncoder( + fields={ + 'file': ('file_name.pdf', open('/path/to/file', 'rb'), 'application/pdf'), + 'output' : 'example_extractedImages_out', + 'pages': '1-last', + } +) + +# Let's set the headers that the extracted-images endpoint expects. +# Since MultipartEncoder is used, the 'Content-Type' header gets set to 'multipart/form-data' via the content_type attribute below. +headers = { + 'Accept': 'application/json', + 'Content-Type': mp_encoder_extractedImages.content_type, + 'Api-Key': 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' # place your api key here +} + +print("Sending POST request to extracted-images endpoint...") +response = requests.post(extracted_images_endpoint_url, data=mp_encoder_extractedImages, headers=headers) + +print("Response status code: " + str(response.status_code)) + +if response.ok: + response_json = response.json() + print(json.dumps(response_json, indent = 2)) +else: + print(response.text) + +# If you would like to download the file instead of getting the JSON response, please see the 'get-resource-id-endpoint.py' sample. diff --git a/cURL/Endpoint Examples/JSON Payload/extracted-images.sh b/cURL/Endpoint Examples/JSON Payload/extracted-images.sh new file mode 100755 index 0000000..bceb178 --- /dev/null +++ b/cURL/Endpoint Examples/JSON Payload/extracted-images.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +UPLOAD_ID=$(curl --location 'https://api.pdfrest.com/upload' \ +--header 'Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' \ +--header 'content-filename: filename.pdf' \ +--data-binary '@/path/to/file' \ + | jq -r '.files.[0].id') + +echo "File successfully uploaded with an ID of: $UPLOAD_ID" + +curl 'https://api.pdfrest.com/extracted-images' \ +--header 'Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' \ +--header 'Content-Type: application/json' \ +--data-raw "{ \"id\": \"$UPLOAD_ID\", \"pages\": \"1-last\"}" | jq -r '.' diff --git a/cURL/Endpoint Examples/Multipart Payload/extracted-images.sh b/cURL/Endpoint Examples/Multipart Payload/extracted-images.sh new file mode 100755 index 0000000..771f7c5 --- /dev/null +++ b/cURL/Endpoint Examples/Multipart Payload/extracted-images.sh @@ -0,0 +1,7 @@ +curl -X POST "https://api.pdfrest.com/extracted-images" \ + -H "Accept: application/json" \ + -H "Content-Type: multipart/form-data" \ + -H "Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" \ + -F "file=@/path/to/file" \ + -F "output=example_out" \ + -F "pages=1-last"