Skip to content

Commit

Permalink
used docpa to parse and traverse
Browse files Browse the repository at this point in the history
  • Loading branch information
coderosh committed Jan 14, 2022
1 parent 610aab8 commit 6194594
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 23 deletions.
6 changes: 2 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "jsonfromtable",
"version": "3.0.0",
"version": "3.1.0",
"description": "Convert html tables to object (or array)",
"main": "dist/index.js",
"scripts": {
Expand Down Expand Up @@ -37,9 +37,7 @@
},
"homepage": "https://github.com/coderosh/jsonfromtable#readme",
"dependencies": {
"css-select": "^4.2.0",
"dom-serializer": "^1.3.2",
"htmlparser2": "^7.2.0",
"docpa": "^1.0.0",
"node-fetch": "^2.6.1"
},
"gitHooks": {
Expand Down
30 changes: 15 additions & 15 deletions src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
import render from 'dom-serializer'
import { selectAll, selectOne } from 'css-select'
import { DomUtils, parseDocument } from 'htmlparser2'

const { getAttributeValue, textContent } = DomUtils
import Docpa from 'docpa'

function getRowWithColumns(
tableDoc: ReturnType<typeof parseDocument>,
tableDoc: Docpa,
selectors: [string, string],
shouldBeText: boolean,
trim: boolean
) {
const rowSelector = selectors[0] || 'tr'
const colSelector = selectors[1] || 'td,th'

return selectAll(rowSelector, tableDoc).map((tr) =>
selectAll(colSelector, tr).map((td) => {
const rowspan = (td && +getAttributeValue(td as any, 'rowspan')!) || 1
const colspan = (td && +getAttributeValue(td as any, 'colspan')!) || 1
const value =
(td && (shouldBeText ? textContent(td) : render(td.children))) || ''
return tableDoc.querySelectorAll(rowSelector).map((tr) => {
if (!tr) return []
return tr.querySelectorAll(colSelector).map((td) => {
if (!td) return { value: '', colspan: 1, rowspan: 1 }

const rowspan = +(td.getAttribute('rowspan') || 1) || 1
const colspan = +(td.getAttribute('colspan') || 1) || 1

const value = (shouldBeText ? td.textContent : td.innerHTML) || ''

return { value: trim ? value.trim() : value, colspan, rowspan }
})
)
})
}

interface ParseTableOptions {
Expand All @@ -39,8 +39,8 @@ function parseTable(html: string, options: ParseTableOptions) {
trim = true,
} = options

const document = parseDocument(html)
const table = selectOne(tableSelector, document)
const document = new Docpa(html)
const table = document.querySelector(tableSelector)

if (!table) throw new Error(`${tableSelector} not found in document.`)

Expand Down
17 changes: 13 additions & 4 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1247,10 +1247,10 @@ cross-spawn@^7.0.2, cross-spawn@^7.0.3:
shebang-command "^2.0.0"
which "^2.0.1"

css-select@^4.2.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/css-select/-/css-select-4.2.0.tgz#ab28276d3afb00cc05e818bd33eb030f14f57895"
integrity sha512-6YVG6hsH9yIb/si3Th/is8Pex7qnVHO6t7q7U6TIUnkQASGbS8tnUDBftnPynLNnuUl/r2+PTd0ekiiq7R0zJw==
css-select@^4.2.1:
version "4.2.1"
resolved "https://registry.yarnpkg.com/css-select/-/css-select-4.2.1.tgz#9e665d6ae4c7f9d65dbe69d0316e3221fb274cdd"
integrity sha512-/aUslKhzkTNCQUB2qTX84lVmfia9NyjP3WpDGtj/WxhwBzWBYUV3DgUpurHTme8UTPcPlAD1DJ+b0nN/t50zDQ==
dependencies:
boolbase "^1.0.0"
css-what "^5.1.0"
Expand Down Expand Up @@ -1364,6 +1364,15 @@ dir-glob@^3.0.1:
dependencies:
path-type "^4.0.0"

docpa@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/docpa/-/docpa-1.0.0.tgz#98a27c64f30777ac169e494f271b12bf55b3bbde"
integrity sha512-v1aSAg6uLEAES4IIuFqQxl7+DrCpWkBf/8IJJo0vSU7FLMU6lTSzvizPvx0TSFgmsu93klxGDGlRIBKkJ4JDxA==
dependencies:
css-select "^4.2.1"
dom-serializer "^1.3.2"
htmlparser2 "^7.2.0"

doctrine@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-2.1.0.tgz#5cd01fc101621b42c4cd7f5d1a66243716d3f39d"
Expand Down

0 comments on commit 6194594

Please sign in to comment.