From 3318500477f9730afca8689158e6ff6939175db6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Fabr=C3=A9gat?= Date: Thu, 22 Feb 2024 12:11:35 -0800 Subject: [PATCH] adds the `format` option --- README.md | 12 +++++++----- main.mjs | 20 ++++++++++---------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 66ac60a..f04fcdf 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Additional parameters can be sent to customize the conversion process: * `lastPage`: The last page to extract. Default is the last page of the document. * `password`: The password to unlock the PDF. Default is none. * `normalizeWhitespace`: If set to `true`, the server normalizes the whitespace in the extracted text. Default is `true`. -* `raw`: If set to `true`, the server returns the raw text extracted from the PDF as `text/plain`, else the text is returned as `text/json`. Default is `false`. +* `format`: The output format. Supported values are `text` (the server returns the raw text as `text/plain`) or `json` (the server returns a JSON object as `text/json`). Default is `text`. The server returns `200` if the conversion was successful and the images are available in the response body. In case of error, the server returns a `400` status code with a JSON object containing the error message (format: `{error: string}`). @@ -36,16 +36,18 @@ Convert a PDF file to text with a JSON response: curl -X POST -F "file=@/path/to/file.pdf" http://localhost:3000/convert -o example.json ``` -Convert a password-protected PDF file to text with a JSON response: +Convert a PDF file to text: ```bash -curl -X POST -F "file=@/path/to/file.pdf" -F "password=XXX" http://localhost:3000/convert -o example.json +curl -X POST -F "file=@/path/to/file.pdf" http://localhost:3000/convert ``` -Convert a PDF file to text with a raw text response: +Extract a password-protected PDF file's text content as JSON and save it to a file: ```bash -curl -X POST -F "file=@/path/to/file.pdf" -F "raw=true" http://localhost:3000/convert -o example.txt +curl -X POST -F "file=@/path/to/file.pdf" -F "password=XXX" -F "format=json" http://localhost:3000/convert -o example.json ``` + + ## License This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. diff --git a/main.mjs b/main.mjs index 0615141..1269f74 100644 --- a/main.mjs +++ b/main.mjs @@ -49,8 +49,16 @@ app.post('/convert', upload.single('file'), async (req, res) => { } )(); - // send the text - if (req.body.raw === "true") { + // send the content as raw text or JSON + if (String(req.body.format).toLowerCase() === "json") { + delete extractResult.filename; + + extractResult.pages.forEach(page => { + page.content.forEach(content => delete content.fontName); + }); + + res.json(extractResult); + } else { res.send( extractResult.pages.reduce( (acc1, page) => acc1 + page.content.reduce( @@ -60,14 +68,6 @@ app.post('/convert', upload.single('file'), async (req, res) => { '', ), ); - } else { - delete extractResult.filename; - - extractResult.pages.forEach(page => { - page.content.forEach(content => delete content.fontName); - }); - - res.json(extractResult); } // cleaning up