From 8e198150d407f68c3440124a9e383e8449cd6a93 Mon Sep 17 00:00:00 2001 From: Mounika <72865791+MOUNIKA0536@users.noreply.github.com> Date: Fri, 2 Dec 2022 21:13:48 +0530 Subject: [PATCH] fix(deps)!: update pdf2json from 2.0.1 to 3.0.1 (#125) Fixes https://github.com/adrienjoly/npm-pdfreader/issues/124. BREAKING CHANGE: switch `pdfreader` from commonJS to ES Module, as pdf2json did. --- .eslintrc.json | 2 +- PdfReader.js | 16 +++++++--------- README.md | 6 +++--- Rule.js | 12 ++++++------ index.js | 24 ++++++++++++++++-------- lib/ColumnsParser.js | 6 ++---- lib/LOG.js | 11 +++++------ lib/SequentialParser.js | 4 +--- lib/TableParser.js | 4 +--- lib/parseColumns.js | 4 ++-- lib/parseTable.js | 21 +++++++-------------- package-lock.json | 24 ++++++++++++------------ package.json | 3 ++- parse.js | 6 ++++-- parseAsBuffer.js | 8 +++++--- test/test.js | 11 +++++++---- 16 files changed, 81 insertions(+), 81 deletions(-) diff --git a/.eslintrc.json b/.eslintrc.json index cc47cd4..1b59724 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,4 +1,4 @@ { "extends": ["plugin:prettier/recommended"], - "parserOptions": { "ecmaVersion": 2020 } + "parserOptions": { "ecmaVersion": 2020, "sourceType": "module" } } diff --git a/PdfReader.js b/PdfReader.js index 7bacd74..98bb8dd 100644 --- a/PdfReader.js +++ b/PdfReader.js @@ -11,8 +11,8 @@ * **/ -var LOG = require("./lib/LOG.js"); -var PFParser = require("pdf2json/pdfparser"); // doc: https://github.com/modesty/pdf2json +import { log as LOG } from "./lib/LOG.js"; +import PDFParser from "pdf2json"; // doc: https://github.com/modesty/pdf2json function forEachItem(pdf, handler) { var pageNumber = 0; @@ -37,7 +37,7 @@ function forEachItem(pdf, handler) { handler(); } -function PdfReader(options) { +export function PdfReader(options) { LOG("PdfReader"); // only displayed if LOG.js was first loaded with `true` as init parameter this.options = options || {}; } @@ -49,9 +49,9 @@ PdfReader.prototype.parseFileItems = function (pdfFilePath, itemHandler) { itemHandler(null, { file: { path: pdfFilePath } }); var pdfParser; if (this.options.password) { - pdfParser = new PFParser(null, null, this.options.password); + pdfParser = new PDFParser(null, null, this.options.password); } else { - pdfParser = new PFParser(); + pdfParser = new PDFParser(); } pdfParser.on("pdfParser_dataError", itemHandler); @@ -69,9 +69,9 @@ PdfReader.prototype.parseBuffer = function (pdfBuffer, itemHandler) { itemHandler(null, { file: { buffer: pdfBuffer } }); var pdfParser; if (this.options.password) { - pdfParser = new PFParser(null, null, this.options.password); + pdfParser = new PDFParser(null, null, this.options.password); } else { - pdfParser = new PFParser(); + pdfParser = new PDFParser(); } pdfParser.on("pdfParser_dataError", itemHandler); @@ -81,5 +81,3 @@ PdfReader.prototype.parseBuffer = function (pdfBuffer, itemHandler) { var verbosity = this.options.debug ? 1 : 0; pdfParser.parseBuffer(pdfBuffer, verbosity); }; - -module.exports = PdfReader; diff --git a/README.md b/README.md index 380de5e..bd56d8a 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ It's up to your callback to process these items into a data structure of your ch For example: ```javascript -const { PdfReader } = require("pdfreader"); +import { PdfReader } from "pdfreader"; new PdfReader().parseFileItems("test/sample.pdf", (err, item) => { if (err) console.error("error:", err); @@ -75,8 +75,8 @@ new PdfReader({ password: "YOUR_PASSWORD" }).parseFileItems( As above, but reading from a buffer in memory rather than from a file referenced by path. For example: ```javascript -const fs = require("fs"); -const { PdfReader } = require("pdfreader"); +import fs from "fs"; +import { PdfReader } from "pdfreader"; fs.readFile("test/sample.pdf", (err, pdfBuffer) => { // pdfBuffer contains the file content diff --git a/Rule.js b/Rule.js index 803fa11..b8c5b27 100644 --- a/Rule.js +++ b/Rule.js @@ -4,13 +4,15 @@ * This content is released under the MIT License. **/ -var LOG = require("./lib/LOG.js"); +import { log as LOG } from "./lib/LOG.js"; +import { parseColumns } from "./lib/parseColumns.js"; +import { parseTable } from "./lib/parseTable.js"; /** * regexp: a regular expression which a PDF item's text must match in order to execute that rule. * => a Rule object exposes "accumulators": methods that defines the data extraction strategy of a rule. **/ -function Rule(regexp) { +export function Rule(regexp) { this.regexp = regexp; var self = this; // proxy accumulators methods @@ -180,11 +182,9 @@ Rule.addAccumulator("accumulateFromSameX", function () { /** * This accumulator will store a table by detecting its columns, given their names. **/ -Rule.addAccumulator("parseColumns", require("./lib/parseColumns.js")); +Rule.addAccumulator("parseColumns", parseColumns); /** * This accumulator will store a table by detecting its columns, given their count. **/ -Rule.addAccumulator("parseTable", require("./lib/parseTable.js")); - -module.exports = Rule; +Rule.addAccumulator("parseTable", parseTable); diff --git a/index.js b/index.js index 04c0164..ea38662 100644 --- a/index.js +++ b/index.js @@ -1,8 +1,16 @@ -exports.PdfReader = require("./PdfReader"); -exports.Rule = require("./Rule"); -exports.LOG = require("./lib/LOG.js"); -exports.parseTable = require("./lib/parseTable.js"); -exports.parseColumns = require("./lib/parseColumns.js"); -exports.SequentialParser = require("./lib/SequentialParser.js"); // experimental -exports.TableParser = require("./lib/TableParser.js"); -exports.ColumnsParser = require("./lib/ColumnsParser.js"); +export { PdfReader } from "./PdfReader.js"; +export { Rule } from "./Rule.js"; +export * as LOG from "./lib/LOG.js"; +import * as parseTableExports from "./lib/parseTable.js"; +export const parseTable = Object.assign( + parseTableExports.parseTable, + parseTableExports +); +import * as parseColumnsExports from "./lib/parseColumns.js"; +export const parseColumns = Object.assign( + parseColumnsExports.parseColumns, + parseColumnsExports +); +export { SequentialParser } from "./lib/SequentialParser.js"; // experimental +export { TableParser } from "./lib/TableParser.js"; +export { ColumnsParser } from "./lib/ColumnsParser.js"; diff --git a/lib/ColumnsParser.js b/lib/ColumnsParser.js index 97316b2..d0996c8 100644 --- a/lib/ColumnsParser.js +++ b/lib/ColumnsParser.js @@ -5,7 +5,7 @@ * This content is released under the MIT License. **/ -var LOG = require("./LOG.js"); +import { log as LOG } from "./LOG.js"; function getColumnIndex(cols, x) { var bestDist = null; @@ -20,7 +20,7 @@ function getColumnIndex(cols, x) { return i - 1; } -function ColumnsParser(colNames) { +export function ColumnsParser(colNames) { this.cols = []; var cols = this.cols, colNames = colNames.slice(), // clone (for parameter immutability) @@ -48,5 +48,3 @@ function ColumnsParser(colNames) { } }; } - -module.exports = ColumnsParser; diff --git a/lib/LOG.js b/lib/LOG.js index 72bb739..ffef8c5 100644 --- a/lib/LOG.js +++ b/lib/LOG.js @@ -4,7 +4,7 @@ * This content is released under the MIT License. **/ -var util = require("util"); +import util from "util"; var nullLog = function LOG() {}; @@ -17,11 +17,10 @@ var realLog = function LOG() { var LOG = nullLog; -module.exports = function () { +export function log() { LOG.apply(null, arguments); -}; +} -module.exports.toggle = function (enabled) { +export function toggle(enabled) { LOG = !enabled ? nullLog : realLog; - return module.exports; -}; +} diff --git a/lib/SequentialParser.js b/lib/SequentialParser.js index afc1fdd..3ed5f84 100644 --- a/lib/SequentialParser.js +++ b/lib/SequentialParser.js @@ -3,7 +3,7 @@ * Provides a list of parsed `fields`. * Calls `callback(error, this)` when all accumulators were processed, or when processing a null item. **/ -function SequentialParser(accumulators, callback) { +export function SequentialParser(accumulators, callback) { var step = 0; var fields = {}; return { @@ -32,5 +32,3 @@ function SequentialParser(accumulators, callback) { }, }; } - -module.exports = SequentialParser; diff --git a/lib/TableParser.js b/lib/TableParser.js index 614ae51..0ef369e 100644 --- a/lib/TableParser.js +++ b/lib/TableParser.js @@ -6,7 +6,7 @@ * This content is released under the MIT License. **/ -function TableParser() { +export function TableParser() { this.rows = {}; } @@ -114,5 +114,3 @@ function renderMatrix(matrix) { TableParser.prototype.renderMatrix = function () { return renderMatrix(this.getMatrix()); }; - -module.exports = TableParser; diff --git a/lib/parseColumns.js b/lib/parseColumns.js index 22f5c3c..112bfe4 100644 --- a/lib/parseColumns.js +++ b/lib/parseColumns.js @@ -6,9 +6,9 @@ * This content is released under the MIT License. **/ -var LOG = require("./LOG.js"); +import { log as LOG } from "./LOG.js"; -module.exports = function (/* columns */) { +export const parseColumns = function (/* columns */) { this.output = []; this.cols = Array.prototype.slice.apply(arguments); var colNames = this.cols, diff --git a/lib/parseTable.js b/lib/parseTable.js index 5d52fd2..cb5fa88 100644 --- a/lib/parseTable.js +++ b/lib/parseTable.js @@ -64,7 +64,7 @@ function fillTab(str) { return str.substr(0, 7); } -function renderTable(table) { +export function renderTable(table) { return (table || []) .map(function (row) { return (row || []).map(fillTab).join("\t"); @@ -72,7 +72,7 @@ function renderTable(table) { .join("\n"); } -function renderMatrix(matrix) { +export function renderMatrix(matrix) { return (matrix || []) .map(function (row) { return (row || []).map(joinCellCollisions("+")).join("\t"); @@ -80,7 +80,7 @@ function renderMatrix(matrix) { .join("\n"); } -function renderRows(rows) { +export function renderRows(rows) { return (rows || []) .map(function (row, rowId) { var cells = [rowId + ":"]; @@ -91,7 +91,7 @@ function renderRows(rows) { .join("\n"); } -function renderItems(items) { +export function renderItems(items) { return items .map(function (i) { return [i.y, i.x, i.text].join("\t"); @@ -113,7 +113,7 @@ function buildMatrix(rows, classifyColumn) { return matrix; } -function detectCollisions(matrix) { +export function detectCollisions(matrix) { var collisions = []; (matrix || []).map(function (row, rowN) { (row || []).map(function (cellItems, colN) { @@ -128,7 +128,7 @@ function detectCollisions(matrix) { return collisions; } -function makeAccumulator(nbRows, headerRow) { +export const parseTable = function makeAccumulator(nbRows, headerRow) { var rule = this, items = []; @@ -155,11 +155,4 @@ function makeAccumulator(nbRows, headerRow) { }); return accumulate; // then the same function will be run on all following items, until another rule is triggered -} - -module.exports = makeAccumulator; -module.exports.renderItems = renderItems; -module.exports.renderRows = renderRows; -module.exports.renderMatrix = renderMatrix; -module.exports.renderTable = renderTable; -module.exports.detectCollisions = detectCollisions; +}; diff --git a/package-lock.json b/package-lock.json index c85eaf9..545076b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.0-development", "license": "MIT", "dependencies": { - "pdf2json": "2.0.1" + "pdf2json": "3.0.1" }, "devDependencies": { "@semantic-release/changelog": "^6.0.1", @@ -6473,25 +6473,25 @@ } }, "node_modules/pdf2json": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/pdf2json/-/pdf2json-2.0.1.tgz", - "integrity": "sha512-gj1kZOV2cA+bHyyZiwNU8AVduT4L+7dQODtJZRev7/23TZ0Mz5ILVLsaCLX2p7A8cdBYyDq6zXEBUBvtXgpIZg==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/pdf2json/-/pdf2json-3.0.1.tgz", + "integrity": "sha512-1pNsakC8F+OuFS72U+ZI0u8J/voPYYDYHj/0B/7ywYUm3w0QurkDBy3pH35kzygjgmJJVHk2a9I6gBswFF8hQQ==", "bundleDependencies": [ "@xmldom/xmldom" ], "dependencies": { - "@xmldom/xmldom": "^0.7.5" + "@xmldom/xmldom": "^0.8.6" }, "bin": { "pdf2json": "bin/pdf2json" }, "engines": { "node": ">=14.18.0", - "npm": ">=6.14.15" + "npm": ">=8.12.1" } }, "node_modules/pdf2json/node_modules/@xmldom/xmldom": { - "version": "0.7.5", + "version": "0.8.6", "inBundle": true, "license": "MIT", "engines": { @@ -12651,15 +12651,15 @@ "dev": true }, "pdf2json": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/pdf2json/-/pdf2json-2.0.1.tgz", - "integrity": "sha512-gj1kZOV2cA+bHyyZiwNU8AVduT4L+7dQODtJZRev7/23TZ0Mz5ILVLsaCLX2p7A8cdBYyDq6zXEBUBvtXgpIZg==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/pdf2json/-/pdf2json-3.0.1.tgz", + "integrity": "sha512-1pNsakC8F+OuFS72U+ZI0u8J/voPYYDYHj/0B/7ywYUm3w0QurkDBy3pH35kzygjgmJJVHk2a9I6gBswFF8hQQ==", "requires": { - "@xmldom/xmldom": "^0.7.5" + "@xmldom/xmldom": "^0.8.6" }, "dependencies": { "@xmldom/xmldom": { - "version": "0.7.5", + "version": "0.8.6", "bundled": true } } diff --git a/package.json b/package.json index ec23c22..304bec3 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,6 @@ { "name": "pdfreader", + "type": "module", "version": "0.0.0-development", "description": "Read text and parse tables from PDF files. Supports tabular data with automatic column detection, and rule-based parsing.", "main": "index.js", @@ -38,7 +39,7 @@ }, "homepage": "https://github.com/adrienjoly/npm-pdfreader", "dependencies": { - "pdf2json": "2.0.1" + "pdf2json": "3.0.1" }, "devDependencies": { "@semantic-release/changelog": "^6.0.1", diff --git a/parse.js b/parse.js index 2d120bf..4a5548b 100644 --- a/parse.js +++ b/parse.js @@ -1,5 +1,7 @@ -var LOG = require("./lib/LOG.js").toggle(false); -var PdfReader = require("./index.js").PdfReader; +import { toggle } from "./lib/LOG.js"; +import { PdfReader } from "./index.js"; + +toggle(false); function printRawItems(filename, callback) { new PdfReader().parseFileItems(filename, function (err, item) { diff --git a/parseAsBuffer.js b/parseAsBuffer.js index 6d42306..d55bc8c 100644 --- a/parseAsBuffer.js +++ b/parseAsBuffer.js @@ -1,6 +1,8 @@ -var LOG = require("./lib/LOG.js").toggle(false); -var PdfReader = require("./index.js").PdfReader; -var fs = require("fs"); +import fs from "fs"; +import { toggle } from "./lib/LOG.js"; +import { PdfReader } from "./index.js"; + +toggle(false); function printRawItems(pdfBuffer, callback) { new PdfReader().parseBuffer(pdfBuffer, function (err, item) { diff --git a/test/test.js b/test/test.js index 53f466b..0928003 100644 --- a/test/test.js +++ b/test/test.js @@ -1,7 +1,10 @@ -const assert = require("assert"); -const test = require("ava"); -const LOG = require("../lib/LOG.js").toggle(false); -const lib = require("../"); +import assert from "assert"; +import test from "ava"; +import { toggle } from "../lib/LOG.js"; +import * as lib from "../index.js"; + +toggle(false); + const PdfReader = lib.PdfReader; const Rule = lib.Rule;