Skip to content

Commit

Permalink
Decode HTML entities for previews and search index
Browse files Browse the repository at this point in the history
fixes #15

we use the "he" library for this as the stripping has to happen at build
time in `extendPageData`, i.e. in a NodeJS process that does not have
access to the browser facilities for converting HTML entities.

We don't just strip the entities as depending on the language, they may
be important searchable characters (e.g. special non-ASCII letters of
the language).
  • Loading branch information
karottenreibe committed Apr 27, 2020
1 parent 948524c commit 143e244
Show file tree
Hide file tree
Showing 3 changed files with 915 additions and 1,252 deletions.
9 changes: 6 additions & 3 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const { path } = require('@vuepress/shared-utils')
const he = require('he');

module.exports = (options) => ({
extendPageData ($page) {
Expand All @@ -7,9 +8,11 @@ module.exports = (options) => ({
}
// _strippedContent does not contain the YAML frontmatter
const { html } = $page._context.markdown.render($page._strippedContent)
const text = html
.replace(/(<[^>]+>)+/g, " ") // remove HTML tags
.replace(/^\s*#\s/gm, "") // remove header anchors inserted by vuepress
const text = he.decode( // decode HTML entities like &quot;
html
.replace(/(<[^>]+>)+/g, " ") // remove HTML tags
.replace(/^\s*#\s/gm, "") // remove header anchors inserted by vuepress
)
$page.content = text
},
alias: {
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"license": "MIT",
"homepage": "https://github.com/z3by/vuepress-plugin-flexsearch#readme",
"devDependencies": {
"vuepress": "^1.4.1"
"vuepress": "^1.4.1",
"he": "^1.2.0"
},
"dependencies": {
"flexsearch": "nextapps-de/flexsearch"
Expand Down
Loading

0 comments on commit 143e244

Please sign in to comment.