Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(search): clean markdown elements in search contents #2457

Merged
merged 13 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/plugins/search/component.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,13 @@ function doSearch(value) {

let html = '';
matches.forEach((post, i) => {
const content = post.content ? `...${post.content}...` : '';
const title = (post.title || '').replace(/<[^>]+>/g, '');
html += /* html */ `
<div class="matching-post" aria-label="search result ${i + 1}">
<a href="${post.url}" title="${title}">
<p class="title clamp-1">${post.title}</p>
<p class="content clamp-2">${post.content}</p>
<p class="content clamp-2">${content}</p>
</a>
</div>
`;
Expand Down
197 changes: 197 additions & 0 deletions src/plugins/search/markdown-to-txt.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
/**
* This is a function to convert markdown to txt based on markedjs v13+.
* Copies the escape/unescape functions from [lodash](https://www.npmjs.com/package/lodash) instead import to reduce the size.
*/
import { marked } from 'marked';

const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39);/g;
const reHasEscapedHtml = RegExp(reEscapedHtml.source);
const htmlUnescapes = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
};

function unescape(string) {
return string && reHasEscapedHtml.test(string)
? string.replace(reEscapedHtml, entity => htmlUnescapes[entity] || "'")
: string || '';
}

const reUnescapedHtml = /[&<>"']/g;
const reHasUnescapedHtml = RegExp(reUnescapedHtml.source);
const htmlEscapes = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#39;',
};

function escape(string) {
return string && reHasUnescapedHtml.test(string)
? string.replace(reUnescapedHtml, chr => htmlEscapes[chr])
: string || '';
}

function helpersCleanup(string) {
return string && string.replace('!>', '').replace('?>', '');
}

const markdownToTxtRenderer = {
space() {
return '';
},

code({ text }) {
const code = text.replace(/\n$/, '');
return escape(code);
},

blockquote({ tokens }) {
return this.parser?.parse(tokens) || '';
},

html() {
return '';
},

heading({ tokens }) {
return this.parser?.parse(tokens) || '';
},

hr() {
return '';
},

list(token) {
let body = '';
for (let j = 0; j < token.items.length; j++) {
const item = token.items[j];
body += this.listitem?.(item);
}

return body;
},

listitem(item) {
let itemBody = '';
if (item.task) {
const checkbox = this.checkbox?.({ checked: !!item.checked });
if (item.loose) {
if (item.tokens.length > 0 && item.tokens[0].type === 'paragraph') {
item.tokens[0].text = checkbox + ' ' + item.tokens[0].text;
if (
item.tokens[0].tokens &&
item.tokens[0].tokens.length > 0 &&
item.tokens[0].tokens[0].type === 'text'
) {
item.tokens[0].tokens[0].text =
checkbox + ' ' + item.tokens[0].tokens[0].text;
}
} else {
item.tokens.unshift({
type: 'text',
raw: checkbox + ' ',
text: checkbox + ' ',
});
}
} else {
itemBody += checkbox + ' ';
}
}

itemBody += this.parser?.parse(item.tokens, !!item.loose);

return `${itemBody || ''}`;
},

checkbox() {
return '';
},

paragraph({ tokens }) {
return this.parser?.parseInline(tokens) || '';
},

table(token) {
let header = '';

let cell = '';
for (let j = 0; j < token.header.length; j++) {
cell += this.tablecell?.(token.header[j]);
}
header += this.tablerow?.({ text: cell });

let body = '';
for (let j = 0; j < token.rows.length; j++) {
const row = token.rows[j];

cell = '';
for (let k = 0; k < row.length; k++) {
cell += this.tablecell?.(row[k]);
}

body += this.tablerow?.({ text: cell });
}

return header + ' ' + body;
},

tablerow({ text }) {
return text;
},

tablecell(token) {
return this.parser?.parseInline(token.tokens) || '';
},

strong({ text }) {
return text;
},

em({ tokens }) {
return this.parser?.parseInline(tokens) || '';
},

codespan({ text }) {
return text;
},

br() {
return ' ';
},

del({ tokens }) {
return this.parser?.parseInline(tokens);
},

link({ tokens, href, title }) {
// Remain the href and title attributes for searching, so is the image
// e.g. [filename](_media/example.js ':include :type=code :fragment=demo')
// Result: filename _media/example.js :include :type=code :fragment=demo
return `${this.parser?.parseInline(tokens) || ''} ${href || ''} ${title || ''}`;
},

image({ title, text, href }) {
return `${text || ''} ${href || ''} ${title || ''}`;
},

text(token) {
return token.tokens
? this.parser?.parseInline(token.tokens) || ''
: token.text || '';
},
};
const _marked = marked.setOptions({ renderer: markdownToTxtRenderer });

export function markdownToTxt(markdown) {
const unmarked = _marked.parse(markdown);
const unescaped = unescape(unmarked);
const helpersCleaned = helpersCleanup(unescaped);
return helpersCleaned.trim();
}

export default markdownToTxt;
11 changes: 6 additions & 5 deletions src/plugins/search/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
getAndRemoveConfig,
getAndRemoveDocsifyIgnoreConfig,
} from '../../core/render/utils.js';
import { markdownToTxt } from './markdown-to-txt.js';
import Dexie from 'dexie';

let INDEXES = {};
Expand Down Expand Up @@ -134,7 +135,7 @@ export function genIndex(path, content = '', router, depth, indexKey) {
index[slug] = {
slug,
title: path !== '/' ? path.slice(1) : 'Home Page',
body: token.text || '',
body: markdownToTxt(token.text || ''),
path: path,
indexKey: indexKey,
};
Expand All @@ -150,12 +151,12 @@ export function genIndex(path, content = '', router, depth, indexKey) {
token.text = getTableData(token);
token.text = getListData(token);

index[slug].body += '\n' + (token.text || '');
index[slug].body += '\n' + markdownToTxt(token.text || '');
} else {
token.text = getTableData(token);
token.text = getListData(token);

index[slug].body = token.text || '';
index[slug].body = markdownToTxt(token.text || '');
}

index[slug].path = path;
Expand Down Expand Up @@ -229,8 +230,8 @@ export function search(query) {
start = indexContent < 11 ? 0 : indexContent - 10;
end = start === 0 ? 100 : indexContent + keyword.length + 90;

if (postContent && end > postContent.length) {
end = postContent.length;
if (handlePostContent && end > handlePostContent.length) {
end = handlePostContent.length;
}

const matchContent =
Expand Down
101 changes: 101 additions & 0 deletions test/e2e/search.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -232,4 +232,105 @@ test.describe('Search Plugin Tests', () => {
await page.keyboard.press('z');
await expect(searchFieldElm).toBeFocused();
});
test('search result should remove markdown code block', async ({ page }) => {
const docsifyInitConfig = {
markdown: {
homepage: `
# Hello World

searchHere
\`\`\`js
console.log('Hello World');
\`\`\`
`,
},
scriptURLs: ['/dist/plugins/search.js'],
};

const searchFieldElm = page.locator('input[type=search]');
const resultsHeadingElm = page.locator('.results-panel .content');

await docsifyInit(docsifyInitConfig);
await searchFieldElm.fill('searchHere');
// there is a newline after searchHere and the markdown part ```js ``` it should be removed
expect(await resultsHeadingElm.textContent()).toContain(
"...searchHere\nconsole.log('Hello World');...",
);
});

test('search result should remove file markdown and keep href attribution for files', async ({
page,
}) => {
const docsifyInitConfig = {
markdown: {
homepage: `
# Hello World
![filename](_media/example.js ':include :type=code :fragment=demo')
`,
},
scriptURLs: ['/dist/plugins/search.js'],
};

const searchFieldElm = page.locator('input[type=search]');
const resultsHeadingElm = page.locator('.results-panel .content');

await docsifyInit(docsifyInitConfig);
await searchFieldElm.fill('filename');
expect(await resultsHeadingElm.textContent()).toContain(
'...filename _media/example.js :include :type=code :fragment=demo...',
);
});

test('search result should remove checkbox markdown and keep related values', async ({
page,
}) => {
const docsifyInitConfig = {
markdown: {
homepage: `
# Hello World

- [ ] Task 1
- [x] SearchHere
- [ ] Task 3
`,
},
scriptURLs: ['/dist/plugins/search.js'],
};

const searchFieldElm = page.locator('input[type=search]');
const resultsHeadingElm = page.locator('.results-panel .content');

await docsifyInit(docsifyInitConfig);
await searchFieldElm.fill('SearchHere');
// remove the checkbox markdown and keep the related values
expect(await resultsHeadingElm.textContent()).toContain(
'...Task 1 SearchHere Task 3...',
);
});

test('search result should remove docsify self helper markdown and keep related values', async ({
page,
}) => {
const docsifyInitConfig = {
markdown: {
homepage: `
# Hello World

!> SearchHere to check it!

`,
},
scriptURLs: ['/dist/plugins/search.js'],
};

const searchFieldElm = page.locator('input[type=search]');
const resultsHeadingElm = page.locator('.results-panel .content');

await docsifyInit(docsifyInitConfig);
await searchFieldElm.fill('SearchHere');
// remove the helper markdown and keep the related values
expect(await resultsHeadingElm.textContent()).toContain(
'...SearchHere to check it!...',
);
});
});
Loading