From 7ab5b3cf457701a9ff2ecb2f8846134b5ce4ba12 Mon Sep 17 00:00:00 2001 From: mesqueeb Date: Thu, 22 Jun 2023 10:26:19 +0900 Subject: [PATCH 1/2] feat: implement lists with double line breaks in between --- src/createTokenizerParser.ts | 44 +++++++++++++++++-------- src/parsers/boldItalicsStrikethrough.ts | 2 +- src/parsers/hashHeading.ts | 2 +- src/parsers/ol.ts | 35 +++++++++++++------- src/parsers/quote.ts | 2 +- src/parsers/table.ts | 2 +- src/parsers/ul.ts | 25 +++++++++----- src/parsers/underlineHeading.ts | 2 +- src/types.ts | 14 +++++--- test/list.test.ts | 8 ----- 10 files changed, 85 insertions(+), 51 deletions(-) diff --git a/src/createTokenizerParser.ts b/src/createTokenizerParser.ts index 5142529..c245b7d 100644 --- a/src/createTokenizerParser.ts +++ b/src/createTokenizerParser.ts @@ -1,4 +1,4 @@ -import type { ParseData, ParserDef } from './types' +import { REPLACE_NEXT_PARAGRAPH, type ParseData, type ParserDef } from './types' import { compileTokens, createParseData, isInline } from './utils' /** @@ -14,19 +14,19 @@ export function createTokenizerParser(parsers: ParserDef[]) { const tokens: Map = new Map(parsers.map((x) => [x.name, x])) const tokenizer = compileTokens(tokens) - function* parseIter(str: string): IterableIterator { + function* parseIter(str: string, nextParagraph?: string): IterableIterator { let i = 0 while (i < str.length) { - const val = parseNext(str, i) - if (val[1] !== i) { - yield createParseData(str.slice(i, val[1]), i, val[1]) + const parsedData = parseNext(str, i, nextParagraph) + if (parsedData[1] !== i) { + yield createParseData(str.slice(i, parsedData[1]), i, parsedData[1]) } - i = val[2] - yield val + i = parsedData[2] + yield parsedData } } - function parseNext(src: string, startIndex: number): ParseData { + function parseNext(src: string, startIndex: number, nextParagraph?: string): ParseData { tokenizer.lastIndex = -1 const match = tokenizer.exec(src.slice(startIndex)) @@ -51,6 +51,7 @@ export function createTokenizerParser(parsers: ParserDef[]) { src, length, lastIndex, + nextParagraph, ...tokenizerResult, }) @@ -60,11 +61,16 @@ export function createTokenizerParser(parsers: ParserDef[]) { } /** Parse a single Markdown paragraph into an HTML String. */ - function parseParagraph(md: string): string { - return [...parseIter(md)] - .map(([x]) => x) - .flat(Infinity) - .join('') + function parseParagraph( + md: string, + nextParagraph?: string + ): [Result: string, REPLACE_NEXT_PARAGRAPH?: symbol] { + const results = [...parseIter(md, nextParagraph)] + const paragraphContent = results.map(([x]) => x).join('') + const lastResult = results[results.length - 1] + return lastResult[3]?.[REPLACE_NEXT_PARAGRAPH] + ? [paragraphContent, REPLACE_NEXT_PARAGRAPH] + : [paragraphContent] } /** Parse Markdown into an HTML String. */ @@ -97,9 +103,19 @@ export function createTokenizerParser(parsers: ParserDef[]) { }, []) let i = restitchedFencedBlocks.length + /** The previous paragraph content BEFORE wrapping in `

` */ + let nextParagraph: string | undefined = undefined while (i--) { const part = restitchedFencedBlocks[i] - const p = parseParagraph(part) + const [p, replaceSymbol] = parseParagraph(part, nextParagraph) + if ( + replaceSymbol === REPLACE_NEXT_PARAGRAPH && + nextParagraph && + result.endsWith(nextParagraph) + ) { + result = result.slice(0, -nextParagraph.length) + } + nextParagraph = p result = (p && (!p.startsWith('<') || isInline(p)) ? `

${p.trim()}

` : p) + result } return result.trim() diff --git a/src/parsers/boldItalicsStrikethrough.ts b/src/parsers/boldItalicsStrikethrough.ts index 53ba166..d523f2b 100644 --- a/src/parsers/boldItalicsStrikethrough.ts +++ b/src/parsers/boldItalicsStrikethrough.ts @@ -6,6 +6,6 @@ export const bis: ParserDef = { regex: /(?\*\*?|__?|~~)(?.*?)(?[^\\\n])\k/, handler: ({ t, content, ns }, { parseParagraph }) => { const el = t === '~~' ? 's' : t.length === 1 ? 'em' : 'strong' - return wrap(el, parseParagraph((content ?? '') + ns)) + return wrap(el, parseParagraph((content ?? '') + ns)[0]) }, } diff --git a/src/parsers/hashHeading.ts b/src/parsers/hashHeading.ts index baa406a..cb98608 100644 --- a/src/parsers/hashHeading.ts +++ b/src/parsers/hashHeading.ts @@ -4,5 +4,5 @@ import { wrap } from '../utils' export const hashHeading: ParserDef = { name: 'hashHeading', regex: /^(?#{1,6})\s*(?.+)(?:\n|$)/, - handler: ({ level, txt }, { parseParagraph }) => wrap(`h${level.length}`, parseParagraph(txt)), + handler: ({ level, txt }, { parseParagraph }) => wrap(`h${level.length}`, parseParagraph(txt)[0]), } diff --git a/src/parsers/ol.ts b/src/parsers/ol.ts index 1d4625b..20edd78 100644 --- a/src/parsers/ol.ts +++ b/src/parsers/ol.ts @@ -1,23 +1,36 @@ import type { ParserDef } from '../types' -import { wrap } from '../utils' +import { REPLACE_NEXT_PARAGRAPH } from '../types' +import { createParseData, wrap } from '../utils' const trimItem = (x: string) => x.replace(/^\d+[.)]\s+/, '').trim() +function grabContentFromNextOl(nextOl: string): string { + const match = nextOl.match(/([.\n\r\t\S\s]+)<\/ol>/) + return match ? match[1] : '' +} + export const ol: ParserDef = { name: 'ol', regex: /^(?(?:\n?\d+[.)]\s+)[.\n\r\t\S\s]+)/, - handler: ({ all }, { parseParagraph }) => { + handler: ({ all }, { parseParagraph, nextParagraph }) => { + // prep nodes const parts = all.split(/\n(?=\d)/g) - const spacedList = parts.some((item) => item.endsWith('\n')) const [first, ...rest] = parts + const listNodes = [ + wrap('li', parseParagraph(trimItem(first))[0]), + ...rest.map((x) => wrap('li', parseParagraph(trimItem(x))[0])), + ] + // calculate content + const shouldPrepend = nextParagraph?.endsWith('') + if (shouldPrepend) listNodes.push(grabContentFromNextOl(nextParagraph!)) + // calculate attrs const startsWith = first.match(/^\d+/)?.[0] - const pWrap = spacedList - ? (str: string) => wrap('p', parseParagraph(str)) - : (str: string) => parseParagraph(str) - return wrap( - 'ol', - [wrap('li', pWrap(trimItem(first))), ...rest.map((x) => wrap('li', pWrap(trimItem(x))))], - { start: startsWith == '1' ? null : startsWith } - ) + const attrs = startsWith && startsWith !== '1' ? { start: startsWith } : undefined + + return shouldPrepend + ? createParseData(wrap('ol', listNodes, attrs), NaN, NaN, { + [REPLACE_NEXT_PARAGRAPH]: true, + }) + : wrap('ol', listNodes, attrs) }, } diff --git a/src/parsers/quote.ts b/src/parsers/quote.ts index 8ae4ea8..e974f78 100644 --- a/src/parsers/quote.ts +++ b/src/parsers/quote.ts @@ -12,6 +12,6 @@ export const quote: ParserDef = { .split('\n') .map((x) => x.slice(1).trim()) .join('\n') - ) + )[0] ), } diff --git a/src/parsers/table.ts b/src/parsers/table.ts index 3f8af1e..a443016 100644 --- a/src/parsers/table.ts +++ b/src/parsers/table.ts @@ -20,7 +20,7 @@ export const table: ParserDef = { const html = cells.map((row, i) => wrap( 'tr', - row.map((x) => wrap(i === 0 && hasHeading ? 'th' : 'td', parseParagraph(x))) + row.map((x) => wrap(i === 0 && hasHeading ? 'th' : 'td', parseParagraph(x)[0])) ) ) return createParseData(wrap('table', html), index, lastIndex + (tableParse.at(-1)?.[2] ?? 0), { diff --git a/src/parsers/ul.ts b/src/parsers/ul.ts index eab4268..8e8ac6b 100644 --- a/src/parsers/ul.ts +++ b/src/parsers/ul.ts @@ -1,15 +1,22 @@ import type { ParserDef } from '../types' -import { wrap } from '../utils' +import { REPLACE_NEXT_PARAGRAPH } from '../types' +import { createParseData, wrap } from '../utils' export const ul: ParserDef = { name: 'ul', regex: /^(?(?:\n?[*+-]\s+[.\n\r\t\S\s]+)+)/, - handler: ({ all }, { parseParagraph }) => - wrap( - 'ul', - all - .slice(1) - .split(/\n[*+-]/g) - .map((x) => wrap('li', parseParagraph(x.trim()))) - ), + handler: ({ all }, { parseParagraph, nextParagraph }) => { + const listNodes = all + .slice(1) + .split(/\n[*+-]/g) + .map((x) => wrap('li', parseParagraph(x.trim())[0])) + const shouldPrepend = + nextParagraph?.startsWith('
  • ') && nextParagraph?.endsWith('
') + const content = shouldPrepend ? listNodes.join('') + nextParagraph!.slice(4, -5) : listNodes + return shouldPrepend + ? createParseData(wrap('ul', content), NaN, NaN, { + [REPLACE_NEXT_PARAGRAPH]: true, + }) + : wrap('ul', content) + }, } diff --git a/src/parsers/underlineHeading.ts b/src/parsers/underlineHeading.ts index 9fa5f96..fe97071 100644 --- a/src/parsers/underlineHeading.ts +++ b/src/parsers/underlineHeading.ts @@ -5,5 +5,5 @@ export const underlineHeading: ParserDef = { name: 'underlineHeading', regex: /^(?[^\n]+)\n(?-{3,}|={3,})(?:\n|$)/, handler: ({ txt, line }, { parseParagraph }) => - wrap(`h${line[0] === '=' ? '1' : '2'}`, parseParagraph(txt)), + wrap(`h${line[0] === '=' ? '1' : '2'}`, parseParagraph(txt)[0]), } diff --git a/src/types.ts b/src/types.ts index 97e55c9..5bb2e38 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,12 +1,16 @@ -type MaybeArray = T | T[] +export const REPLACE_NEXT_PARAGRAPH = Symbol('Replace') export type ParseData = [ - result: MaybeArray, + result: string, /** Start index of match */ startIndex: number, /** end index of match, this is useful if your end up parsing more than the tokeniser originally provided */ stopIndex: number, - /** any data you wish to forward can be included here for later parsers to use */ + /** + * any data you wish to forward can be included here for later parsers to use + * + * If { [REPLACE_NEXT_PARAGRAPH]: true } is returned as part of this data, the previous paragraph will be replaced with the result of this one + */ data?: Record ] @@ -22,8 +26,10 @@ export type ParserFunction = ( length: number /** index of the last char of match, (equal to index + length) */ lastIndex: number + /** The previous paragraph content BEFORE wrapping in `

` */ + nextParagraph: string | undefined /** for recursive parsing of tokens */ - parseParagraph: (str: string) => string + parseParagraph: (str: string) => [Result: string, REPLACE_NEXT_PARAGRAPH?: symbol] parseNext: (str: string, start: number) => ParseData parseIter: (str: string) => IterableIterator parse: (str: string) => string diff --git a/test/list.test.ts b/test/list.test.ts index 44f062a..2e5c489 100644 --- a/test/list.test.ts +++ b/test/list.test.ts @@ -116,12 +116,4 @@ describe('lists', () => { '
  1. One
    A
    B
  2. Two
' ) }) - - // Due to the way that we're dealing with newlines and paragraphs, this doesn't work at the moment but - // keeping this here to make sure that we can just uncomment it in the future - // test.only('parses an ordered list and adds paragraphs inbetween them if double spaced', () => { - // expect(starkdown('1) Ordered\n2) Lists\n\n4) Numbers are ignored')).toEqual( - // '
  1. Ordered

  2. Lists

  3. Numbers are ignored

' - // ) - // }) }) From 98262e2ea3fbde6272a986015b77e21499c0702f Mon Sep 17 00:00:00 2001 From: mesqueeb Date: Thu, 22 Jun 2023 10:33:20 +0900 Subject: [PATCH 2/2] Revert "fix: temporary comment out future feature" This reverts commit c1c18bc8ba9977bd9d8a644ef1a4f12da19ea93a. --- test/list.test.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/list.test.ts b/test/list.test.ts index e8d4783..2e5c489 100644 --- a/test/list.test.ts +++ b/test/list.test.ts @@ -60,13 +60,13 @@ describe('lists', () => { ) }) - // test('Allows line breaking in lists — treat double line breaks as single in lists - ul', () => { - // expect(starkdown('- One\n\n- Two')).toEqual('
  • One
  • Two
') - // }) + test('Allows line breaking in lists — treat double line breaks as single in lists - ul', () => { + expect(starkdown('- One\n\n- Two')).toEqual('
  • One
  • Two
') + }) - // test('Allows line breaking in lists — treat double line breaks as single in lists - ol', () => { - // expect(starkdown('1. One\n\n2. Two')).toEqual('
  1. One
  2. Two
') - // }) + test('Allows line breaking in lists — treat double line breaks as single in lists - ol', () => { + expect(starkdown('1. One\n\n2. Two')).toEqual('
  1. One
  2. Two
') + }) // [FUTURE FEATURE?] // test('Allows line breaking in lists — treat double line breaks as single in lists & add `.wide` class - ul', () => {