From 3cb5f581e52f4204fa4ba851bf80a1fa1c36ab14 Mon Sep 17 00:00:00 2001 From: Taylor Smith Date: Fri, 29 Sep 2023 14:38:44 -0500 Subject: [PATCH 1/2] Add sample calls to extract-text --- DotNET/Single Calls/extract-text.cs | 25 +++++++++++ Java/Single Calls/ExtractText.java | 60 +++++++++++++++++++++++++ JavaScript/Single Calls/extract-text.js | 31 +++++++++++++ PHP/Single Calls/extract-text.php | 31 +++++++++++++ Python/Single Calls/extract-text.py | 32 +++++++++++++ cURL/Single Calls/extract-text.sh | 5 +++ 6 files changed, 184 insertions(+) create mode 100644 DotNET/Single Calls/extract-text.cs create mode 100644 Java/Single Calls/ExtractText.java create mode 100644 JavaScript/Single Calls/extract-text.js create mode 100644 PHP/Single Calls/extract-text.php create mode 100644 Python/Single Calls/extract-text.py create mode 100755 cURL/Single Calls/extract-text.sh diff --git a/DotNET/Single Calls/extract-text.cs b/DotNET/Single Calls/extract-text.cs new file mode 100644 index 0000000..5fb2547 --- /dev/null +++ b/DotNET/Single Calls/extract-text.cs @@ -0,0 +1,25 @@ +using System.Text; + +using (var httpClient = new HttpClient { BaseAddress = new Uri("https://api.pdfrest.com") }) +{ + using (var request = new HttpRequestMessage(HttpMethod.Post, "extract-text")) + { + request.Headers.TryAddWithoutValidation("Api-Key", "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + request.Headers.Accept.Add(new("application/json")); + var multipartContent = new MultipartFormDataContent(); + + var byteArray = File.ReadAllBytes("/path/to/file"); + var byteAryContent = new ByteArrayContent(byteArray); + multipartContent.Add(byteAryContent, "file", "file_name"); + byteAryContent.Headers.TryAddWithoutValidation("Content-Type", "application/pdf"); + + + request.Content = multipartContent; + var response = await httpClient.SendAsync(request); + + var apiResult = await response.Content.ReadAsStringAsync(); + + Console.WriteLine("API response received."); + Console.WriteLine(apiResult); + } +} diff --git a/Java/Single Calls/ExtractText.java b/Java/Single Calls/ExtractText.java new file mode 100644 index 0000000..71d1a2b --- /dev/null +++ b/Java/Single Calls/ExtractText.java @@ -0,0 +1,60 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import okhttp3.MediaType; +import okhttp3.MultipartBody; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; +import org.json.JSONObject; + +public class ExtractText { + + // Specify the path to your file here, or as the first argument when running the program. + private static final String DEFAULT_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File inputFile; + if (args.length > 0) { + inputFile = new File(args[0]); + } else { + inputFile = new File(DEFAULT_FILE_PATH); + } + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody inputFileRequestBody = + RequestBody.create(inputFile, MediaType.parse("application/pdf")); + RequestBody requestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", inputFile.getName(), inputFileRequestBody) + .build(); + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/extract-text") + .post(requestBody) + .build(); + try { + OkHttpClient client = new OkHttpClient().newBuilder().build(); + Response response = client.newCall(request).execute(); + System.out.println("Result code " + response.code()); + if (response.body() != null) { + System.out.println(prettyJson(response.body().string())); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static String prettyJson(String json) { + // https://stackoverflow.com/a/9583835/11996393 + return new JSONObject(json).toString(4); + } +} diff --git a/JavaScript/Single Calls/extract-text.js b/JavaScript/Single Calls/extract-text.js new file mode 100644 index 0000000..4ca8c17 --- /dev/null +++ b/JavaScript/Single Calls/extract-text.js @@ -0,0 +1,31 @@ +// This request demonstrates how to extract text from a PDF document. +var axios = require("axios"); +var FormData = require("form-data"); +var fs = require("fs"); + +// Create a new form data instance and append the PDF file and parameters to it +var data = new FormData(); +data.append("file", fs.createReadStream("/path/to/file")); + +// define configuration options for axios request +var config = { + method: "post", + maxBodyLength: Infinity, // set maximum length of the request body + url: "https://api.pdfrest.com/extract-text", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + ...data.getHeaders(), // set headers for the request + }, + data: data, // set the data to be sent with the request +}; + +// send request and handle response or error +axios(config) + .then(function (response) { + console.log(JSON.stringify(response.data)); + }) + .catch(function (error) { + console.log(error); + }); + +// If you would like to download the file instead of getting the JSON response, please see the 'get-resource-id-endpoint.js' sample. diff --git a/PHP/Single Calls/extract-text.php b/PHP/Single Calls/extract-text.php new file mode 100644 index 0000000..fd6c7fa --- /dev/null +++ b/PHP/Single Calls/extract-text.php @@ -0,0 +1,31 @@ + 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' // Set the API key in the headers for authentication. +]; + +$options = [ + 'multipart' => [ + [ + 'name' => 'file', // Specify the field name for the file. + 'contents' => Utils::tryFopen('/path/to/file', 'r'), // Open the file specified by '/path/to/file' for reading. + 'filename' => '/path/to/file', // Set the filename for the file to be processed, in this case, '/path/to/file'. + 'headers' => [ + 'Content-Type' => '' // Set the Content-Type header for the file. + ] + ] + ] +]; + +$request = new Request('POST', 'https://api.pdfrest.com/extract-text', $headers); // Create a new HTTP POST request with the API endpoint and headers. + +$res = $client->sendAsync($request, $options)->wait(); // Send the asynchronous request and wait for the response. + +echo $res->getBody(); // Output the response body, which contains the text extracted from the document. diff --git a/Python/Single Calls/extract-text.py b/Python/Single Calls/extract-text.py new file mode 100644 index 0000000..741bb8f --- /dev/null +++ b/Python/Single Calls/extract-text.py @@ -0,0 +1,32 @@ +from requests_toolbelt import MultipartEncoder +import requests +import json + +extract_text_endpoint_url = 'https://api.pdfrest.com/extract-text' + +# The /extract-text endpoint can take a single PDF file or id as input. +#This sample demonstrates querying the title, page count, document language and author +mp_encoder_extractText = MultipartEncoder( + fields={ + 'file': ('file_name', open('/path/to/file', 'rb'), 'application/pdf'), + } +) + +# Let's set the headers that the extract-text endpoint expects. +# Since MultipartEncoder is used, the 'Content-Type' header gets set to 'multipart/form-data' via the content_type attribute below. +headers = { + 'Accept': 'application/json', + 'Content-Type': mp_encoder_extractText.content_type, + 'Api-Key': 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' # place your api key here +} + +print("Sending POST request to extract-text endpoint...") +response = requests.post(extract_text_endpoint_url, data=mp_encoder_extractText, headers=headers) + +print("Response status code: " + str(response.status_code)) + +if response.ok: + response_json = response.json() + print(json.dumps(response_json, indent = 2)) +else: + print(response.text) diff --git a/cURL/Single Calls/extract-text.sh b/cURL/Single Calls/extract-text.sh new file mode 100755 index 0000000..b9823f5 --- /dev/null +++ b/cURL/Single Calls/extract-text.sh @@ -0,0 +1,5 @@ +curl -X POST "https://api.pdfrest.com/extract-text" \ + -H "Accept: application/json" \ + -H "Content-Type: multipart/form-data" \ + -H "Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" \ + -F "file=@/path/to/file" From 26b94a51f5f558a8fd4e0d73705f73e3aca25526 Mon Sep 17 00:00:00 2001 From: Taylor Smith Date: Fri, 29 Sep 2023 16:45:32 -0500 Subject: [PATCH 2/2] Update PHP readme to properly instruct the user on installing Guzzle --- PHP/README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/PHP/README.md b/PHP/README.md index 912fb76..a5bae05 100644 --- a/PHP/README.md +++ b/PHP/README.md @@ -14,11 +14,7 @@ In this directory you will find sample calls to single endpoints, as well as mor 2. Navigate to the directory containing the `php` file. -3. Run the following command to install the required dependencies (Guzzle HTTP client): - -```bash -composer install -``` +3. Install the required dependencies (Guzzle HTTP client) by following the instructions at https://docs.guzzlephp.org/en/stable/overview.html ### Usage