Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: replace-next-paragraph #17

Open
wants to merge 3 commits into
base: production
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions src/createTokenizerParser.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { ParseData, ParserDef } from './types'
import { REPLACE_NEXT_PARAGRAPH, type ParseData, type ParserDef } from './types'
import { compileTokens, createParseData, isInline } from './utils'

/**
Expand All @@ -14,19 +14,19 @@ export function createTokenizerParser(parsers: ParserDef[]) {
const tokens: Map<string, ParserDef> = new Map(parsers.map((x) => [x.name, x]))
const tokenizer = compileTokens(tokens)

function* parseIter(str: string): IterableIterator<ParseData> {
function* parseIter(str: string, nextParagraph?: string): IterableIterator<ParseData> {
let i = 0
while (i < str.length) {
const val = parseNext(str, i)
if (val[1] !== i) {
yield createParseData(str.slice(i, val[1]), i, val[1])
const parsedData = parseNext(str, i, nextParagraph)
if (parsedData[1] !== i) {
yield createParseData(str.slice(i, parsedData[1]), i, parsedData[1])
}
i = val[2]
yield val
i = parsedData[2]
yield parsedData
}
}

function parseNext(src: string, startIndex: number): ParseData {
function parseNext(src: string, startIndex: number, nextParagraph?: string): ParseData {
tokenizer.lastIndex = -1
const match = tokenizer.exec(src.slice(startIndex))

Expand All @@ -51,6 +51,7 @@ export function createTokenizerParser(parsers: ParserDef[]) {
src,
length,
lastIndex,
nextParagraph,
...tokenizerResult,
})

Expand All @@ -60,11 +61,16 @@ export function createTokenizerParser(parsers: ParserDef[]) {
}

/** Parse a single Markdown paragraph into an HTML String. */
function parseParagraph(md: string): string {
return [...parseIter(md)]
.map(([x]) => x)
.flat(Infinity)
.join('')
function parseParagraph(
md: string,
nextParagraph?: string
): [Result: string, REPLACE_NEXT_PARAGRAPH?: symbol] {
const results = [...parseIter(md, nextParagraph)]
const paragraphContent = results.map(([x]) => x).join('')
const lastResult = results[results.length - 1]
return lastResult[3]?.[REPLACE_NEXT_PARAGRAPH]
? [paragraphContent, REPLACE_NEXT_PARAGRAPH]
: [paragraphContent]
}

/** Parse Markdown into an HTML String. */
Expand Down Expand Up @@ -97,9 +103,19 @@ export function createTokenizerParser(parsers: ParserDef[]) {
}, [])

let i = restitchedFencedBlocks.length
/** The previous paragraph content BEFORE wrapping in `<p></p>` */
let nextParagraph: string | undefined = undefined
while (i--) {
const part = restitchedFencedBlocks[i]
const p = parseParagraph(part)
const [p, replaceSymbol] = parseParagraph(part, nextParagraph)
if (
replaceSymbol === REPLACE_NEXT_PARAGRAPH &&
nextParagraph &&
result.endsWith(nextParagraph)
) {
result = result.slice(0, -nextParagraph.length)
}
nextParagraph = p
result = (p && (!p.startsWith('<') || isInline(p)) ? `<p>${p.trim()}</p>` : p) + result
}
return result.trim()
Expand Down
2 changes: 1 addition & 1 deletion src/parsers/boldItalicsStrikethrough.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ export const bis: ParserDef = {
regex: /(?<t>\*\*?|__?|~~)(?<content>.*?)(?<ns>[^\\\n])\k<t>/,
handler: ({ t, content, ns }, { parseParagraph }) => {
const el = t === '~~' ? 's' : t.length === 1 ? 'em' : 'strong'
return wrap(el, parseParagraph((content ?? '') + ns))
return wrap(el, parseParagraph((content ?? '') + ns)[0])
},
}
2 changes: 1 addition & 1 deletion src/parsers/hashHeading.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ import { wrap } from '../utils'
export const hashHeading: ParserDef = {
name: 'hashHeading',
regex: /^(?<level>#{1,6})\s*(?<txt>.+)(?:\n|$)/,
handler: ({ level, txt }, { parseParagraph }) => wrap(`h${level.length}`, parseParagraph(txt)),
handler: ({ level, txt }, { parseParagraph }) => wrap(`h${level.length}`, parseParagraph(txt)[0]),
}
35 changes: 24 additions & 11 deletions src/parsers/ol.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,36 @@
import type { ParserDef } from '../types'
import { wrap } from '../utils'
import { REPLACE_NEXT_PARAGRAPH } from '../types'
import { createParseData, wrap } from '../utils'

const trimItem = (x: string) => x.replace(/^\d+[.)]\s+/, '').trim()

function grabContentFromNextOl(nextOl: string): string {
const match = nextOl.match(/<ol(?:\sstart="\d+")?>([.\n\r\t\S\s]+)<\/ol>/)
return match ? match[1] : ''
}

export const ol: ParserDef = {
name: 'ol',
regex: /^(?<all>(?:\n?\d+[.)]\s+)[.\n\r\t\S\s]+)/,
handler: ({ all }, { parseParagraph }) => {
handler: ({ all }, { parseParagraph, nextParagraph }) => {
// prep nodes
const parts = all.split(/\n(?=\d)/g)
const spacedList = parts.some((item) => item.endsWith('\n'))
const [first, ...rest] = parts
const listNodes = [
wrap('li', parseParagraph(trimItem(first))[0]),
...rest.map((x) => wrap('li', parseParagraph(trimItem(x))[0])),
]
// calculate content
const shouldPrepend = nextParagraph?.endsWith('</li></ol>')
if (shouldPrepend) listNodes.push(grabContentFromNextOl(nextParagraph!))
// calculate attrs
const startsWith = first.match(/^\d+/)?.[0]
const pWrap = spacedList
? (str: string) => wrap('p', parseParagraph(str))
: (str: string) => parseParagraph(str)
return wrap(
'ol',
[wrap('li', pWrap(trimItem(first))), ...rest.map((x) => wrap('li', pWrap(trimItem(x))))],
{ start: startsWith == '1' ? null : startsWith }
)
const attrs = startsWith && startsWith !== '1' ? { start: startsWith } : undefined

return shouldPrepend
? createParseData(wrap('ol', listNodes, attrs), NaN, NaN, {
[REPLACE_NEXT_PARAGRAPH]: true,
})
: wrap('ol', listNodes, attrs)
},
}
2 changes: 1 addition & 1 deletion src/parsers/quote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ export const quote: ParserDef = {
.split('\n')
.map((x) => x.slice(1).trim())
.join('\n')
)
)[0]
),
}
2 changes: 1 addition & 1 deletion src/parsers/table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export const table: ParserDef = {
const html = cells.map((row, i) =>
wrap(
'tr',
row.map((x) => wrap(i === 0 && hasHeading ? 'th' : 'td', parseParagraph(x)))
row.map((x) => wrap(i === 0 && hasHeading ? 'th' : 'td', parseParagraph(x)[0]))
)
)
return createParseData(wrap('table', html), index, lastIndex + (tableParse.at(-1)?.[2] ?? 0), {
Expand Down
25 changes: 16 additions & 9 deletions src/parsers/ul.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import type { ParserDef } from '../types'
import { wrap } from '../utils'
import { REPLACE_NEXT_PARAGRAPH } from '../types'
import { createParseData, wrap } from '../utils'

export const ul: ParserDef = {
name: 'ul',
regex: /^(?<all>(?:\n?[*+-]\s+[.\n\r\t\S\s]+)+)/,
handler: ({ all }, { parseParagraph }) =>
wrap(
'ul',
all
.slice(1)
.split(/\n[*+-]/g)
.map((x) => wrap('li', parseParagraph(x.trim())))
),
handler: ({ all }, { parseParagraph, nextParagraph }) => {
const listNodes = all
.slice(1)
.split(/\n[*+-]/g)
.map((x) => wrap('li', parseParagraph(x.trim())[0]))
const shouldPrepend =
nextParagraph?.startsWith('<ul><li>') && nextParagraph?.endsWith('</li></ul>')
const content = shouldPrepend ? listNodes.join('') + nextParagraph!.slice(4, -5) : listNodes
return shouldPrepend
? createParseData(wrap('ul', content), NaN, NaN, {
[REPLACE_NEXT_PARAGRAPH]: true,
})
: wrap('ul', content)
},
}
2 changes: 1 addition & 1 deletion src/parsers/underlineHeading.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ export const underlineHeading: ParserDef = {
name: 'underlineHeading',
regex: /^(?<txt>[^\n]+)\n(?<line>-{3,}|={3,})(?:\n|$)/,
handler: ({ txt, line }, { parseParagraph }) =>
wrap(`h${line[0] === '=' ? '1' : '2'}`, parseParagraph(txt)),
wrap(`h${line[0] === '=' ? '1' : '2'}`, parseParagraph(txt)[0]),
}
14 changes: 10 additions & 4 deletions src/types.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
type MaybeArray<T> = T | T[]
export const REPLACE_NEXT_PARAGRAPH = Symbol('Replace')

export type ParseData = [
result: MaybeArray<string>,
result: string,
/** Start index of match */
startIndex: number,
/** end index of match, this is useful if your end up parsing more than the tokeniser originally provided */
stopIndex: number,
/** any data you wish to forward can be included here for later parsers to use */
/**
* any data you wish to forward can be included here for later parsers to use
*
* If { [REPLACE_NEXT_PARAGRAPH]: true } is returned as part of this data, the previous paragraph will be replaced with the result of this one
*/
data?: Record<string | symbol, unknown>
]

Expand All @@ -22,8 +26,10 @@ export type ParserFunction = (
length: number
/** index of the last char of match, (equal to index + length) */
lastIndex: number
/** The previous paragraph content BEFORE wrapping in `<p></p>` */
nextParagraph: string | undefined
/** for recursive parsing of tokens */
parseParagraph: (str: string) => string
parseParagraph: (str: string) => [Result: string, REPLACE_NEXT_PARAGRAPH?: symbol]
parseNext: (str: string, start: number) => ParseData
parseIter: (str: string) => IterableIterator<ParseData>
parse: (str: string) => string
Expand Down
20 changes: 6 additions & 14 deletions test/list.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ describe('lists', () => {
)
})

// test('Allows line breaking in lists — treat double line breaks as single in lists - ul', () => {
// expect(starkdown('- One\n\n- Two')).toEqual('<ul><li>One</li><li>Two</li></ul>')
// })
test('Allows line breaking in lists — treat double line breaks as single in lists - ul', () => {
expect(starkdown('- One\n\n- Two')).toEqual('<ul><li>One</li><li>Two</li></ul>')
})

// test('Allows line breaking in lists — treat double line breaks as single in lists - ol', () => {
// expect(starkdown('1. One\n\n2. Two')).toEqual('<ol><li>One</li><li>Two</li></ol>')
// })
test('Allows line breaking in lists — treat double line breaks as single in lists - ol', () => {
expect(starkdown('1. One\n\n2. Two')).toEqual('<ol><li>One</li><li>Two</li></ol>')
})

// [FUTURE FEATURE?]
// test('Allows line breaking in lists — treat double line breaks as single in lists & add `.wide` class - ul', () => {
Expand Down Expand Up @@ -116,12 +116,4 @@ describe('lists', () => {
'<ol><li>One<br />A<br />B</li><li>Two</li></ol>'
)
})

// Due to the way that we're dealing with newlines and paragraphs, this doesn't work at the moment but
// keeping this here to make sure that we can just uncomment it in the future
// test.only('parses an ordered list and adds paragraphs inbetween them if double spaced', () => {
// expect(starkdown('1) Ordered\n2) Lists\n\n4) Numbers are ignored')).toEqual(
// '<ol><li><p>Ordered<p></li><li><p>Lists<p></li><li><p>Numbers are ignored<p></li></ol>'
// )
// })
})