Skip to content
This repository has been archived by the owner on Dec 11, 2024. It is now read-only.

Commit

Permalink
dev
Browse files Browse the repository at this point in the history
  • Loading branch information
joanfabregat committed Feb 23, 2024
1 parent 4dc4319 commit c59c059
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 17 deletions.
29 changes: 25 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ composer require codeinc/pdf2txt-client

This client requires a running instance of the [pdf2txt](https://github.com/codeinchq/pdf2txt) service. The service can be run locally [using Docker](https://hub.docker.com/r/codeinchq/pdf2txt) or deployed to a server.

### Base example:
### Extracting text from a local file:
```php
use CodeInc\Pdf2TxtClient\Pdf2TxtClient;
use CodeInc\Pdf2TxtClient\Exception;
Expand All @@ -25,7 +25,7 @@ $localPdfPath = '/path/to/local/file.pdf';
try {
// convert
$client = new Pdf2TxtClient($apiBaseUri);
$stream = $client->convertLocalFile($localPdfPath);
$stream = $client->extract($localPdfPath);

// display the text
echo (string)$stream;
Expand All @@ -35,7 +35,28 @@ catch (Exception $e) {
}
```

### With options:
### Extracting text from a stream:
```php
use CodeInc\Pdf2TxtClient\Pdf2TxtClient;
use CodeInc\Pdf2TxtClient\Exception;

$apiBaseUri = 'http://localhost:3000/';
$pdfStream = '...'; // an instance of `Psr\Http\Message\StreamInterface`

try {
// convert
$client = new Pdf2TxtClient($apiBaseUri);
$textStream = $client->extract($pdfStream);

// display the text
echo (string)$textStream;
}
catch (Exception $e) {
// handle exception
}
```

### With additional options:
```php
use CodeInc\Pdf2TxtClient\Pdf2TxtClient;
use CodeInc\Pdf2TxtClient\ConvertOptions;
Expand All @@ -52,7 +73,7 @@ $convertOption = new ConvertOptions(
try {
// convert
$client = new Pdf2TxtClient($apiBaseUri);
$jsonResponse = $client->convertLocalFile($localPdfPath, $convertOption);
$jsonResponse = $client->extractFromLocalFile($localPdfPath, $convertOption);
$decodedJson = $client->processJsonResponse($jsonResponse);

// display the text in a JSON format
Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codeinc/pdf2txt-client",
"version": "v1.2",
"version": "v1.3",
"description": "A PHP client for the pdf2txt service",
"homepage": "https://github.com/codeinchq/pdf2txt-php-client",
"type": "library",
Expand Down
8 changes: 4 additions & 4 deletions src/Pdf2TxtClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public function __construct(
* @return StreamInterface
* @throws Exception
*/
public function convert(mixed $stream, ConvertOptions $options = new ConvertOptions()): StreamInterface
public function extract(mixed $stream, ConvertOptions $options = new ConvertOptions()): StreamInterface
{
try {
// building the multipart stream
Expand Down Expand Up @@ -129,7 +129,7 @@ public function processJsonResponse(StreamInterface $response): array
* @return StreamInterface
* @throws Exception
*/
public function convertLocalFile(string $pdfPath, ConvertOptions $options = new ConvertOptions()): StreamInterface
public function extractFromLocalFile(string $pdfPath, ConvertOptions $options = new ConvertOptions()): StreamInterface
{
$f = fopen($pdfPath, 'r');
if ($f === false) {
Expand All @@ -139,7 +139,7 @@ public function convertLocalFile(string $pdfPath, ConvertOptions $options = new
);
}

return $this->convert($f, $options);
return $this->extract($f, $options);
}

/**
Expand All @@ -153,6 +153,6 @@ private function getConvertEndpointUri(): string
if (!str_ends_with($url, '/')) {
$url .= '/';
}
return "{$url}convert";
return "{$url}extract";
}
}
21 changes: 13 additions & 8 deletions tests/Pdf2TxtClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
use PHPUnit\Framework\TestCase;
use Psr\Http\Message\StreamInterface;

/**
* Class Pdf2TxtClientTest
*
* @see Pdf2TxtClient
*/
final class Pdf2TxtClientTest extends TestCase
{
private const string DEFAULT_PDF2TEXT_BASE_URL = 'http://localhost:3000';
Expand All @@ -27,11 +32,11 @@ final class Pdf2TxtClientTest extends TestCase
private const string TEST_PDF_RESULT_JSON = __DIR__.'/assets/file.json';

/**
* @throws Exception|JsonException
* @throws Exception
*/
public function testConvertLocalFileToText(): void
public function testExtractionFromLocalFileToText(): void
{
$stream = $this->getNewClient()->convertLocalFile(self::TEST_PDF_PATH);
$stream = $this->getNewClient()->extractFromLocalFile(self::TEST_PDF_PATH);
$this->assertInstanceOf(StreamInterface::class, $stream, "The stream is not valid");

$text = (string)$stream;
Expand All @@ -40,12 +45,12 @@ public function testConvertLocalFileToText(): void
}

/**
* @throws Exception|JsonException
* @throws Exception
*/
public function testConvertLocalFileToRawJson(): void
public function testExtractionFromLocalFileToRawJson(): void
{
$client = $this->getNewClient();
$stream = $client->convertLocalFile(self::TEST_PDF_PATH, new ConvertOptions(format: Format::json));
$stream = $client->extractFromLocalFile(self::TEST_PDF_PATH, new ConvertOptions(format: Format::json));
$this->assertInstanceOf(StreamInterface::class, $stream, "The stream is not valid");

$rawJson = (string)$stream;
Expand All @@ -56,10 +61,10 @@ public function testConvertLocalFileToRawJson(): void
/**
* @throws Exception|JsonException
*/
public function testConvertLocalFileToProcessedJson(): void
public function testExtractionFromLocalFileToProcessedJson(): void
{
$client = $this->getNewClient();
$stream = $client->convertLocalFile(self::TEST_PDF_PATH, new ConvertOptions(format: Format::json));
$stream = $client->extractFromLocalFile(self::TEST_PDF_PATH, new ConvertOptions(format: Format::json));
$this->assertInstanceOf(StreamInterface::class, $stream, "The stream is not valid");

$json = $client->processJsonResponse($stream);
Expand Down

0 comments on commit c59c059

Please sign in to comment.