-
-
Notifications
You must be signed in to change notification settings - Fork 758
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
489 additions
and
441 deletions.
There are no files selected for viewing
43 changes: 43 additions & 0 deletions
43
packages/core/src/plugins/html-deserializer/utils/collapse-white-space/collapseString.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import { TrimEndRule, TrimStartRule } from './types'; | ||
|
||
export const collapseString = ( | ||
text: string, | ||
{ | ||
trimStart = 'collapse', | ||
trimEnd = 'collapse', | ||
shouldCollapseWhiteSpace = true, | ||
whiteSpaceIncludesNewlines = true, | ||
}: { | ||
trimStart?: TrimStartRule; | ||
trimEnd?: TrimEndRule; | ||
shouldCollapseWhiteSpace?: boolean; | ||
whiteSpaceIncludesNewlines?: boolean; | ||
} = {} | ||
) => { | ||
if (trimStart === 'all') { | ||
text = text.replace(/^\s+/, ''); | ||
} | ||
|
||
if (trimEnd === 'single-newline') { | ||
// Strip at most one newline from the end | ||
text = text.replace(/\n$/, ''); | ||
} | ||
|
||
if (shouldCollapseWhiteSpace) { | ||
if (whiteSpaceIncludesNewlines) { | ||
text = text.replaceAll(/\s+/g, ' '); | ||
} else { | ||
// Collapse horizontal whitespace | ||
text = text.replaceAll(/[^\S\n\r]+/g, ' '); | ||
|
||
/** | ||
* Trim horizontal whitespace from the start and end of lines (behavior | ||
* of pre-line). | ||
*/ | ||
text = text.replaceAll(/^[^\S\n\r]+/gm, ''); | ||
text = text.replaceAll(/[^\S\n\r]+$/gm, ''); | ||
} | ||
} | ||
|
||
return text; | ||
}; |
2 changes: 1 addition & 1 deletion
2
...rializer/utils/collapseWhiteSpace.spec.ts → ...se-white-space/collapseWhiteSpace.spec.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 17 additions & 0 deletions
17
packages/core/src/plugins/html-deserializer/utils/collapse-white-space/collapseWhiteSpace.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import { collapseWhiteSpaceElement } from './collapseWhiteSpaceElement'; | ||
import { CollapseWhiteSpaceState } from './types'; | ||
|
||
// Entrypoint | ||
export const collapseWhiteSpace = (element: HTMLElement) => { | ||
const clonedElement = element.cloneNode(true) as HTMLElement; | ||
|
||
// Mutable state object | ||
const state: CollapseWhiteSpaceState = { | ||
inlineFormattingContext: null, | ||
whiteSpaceRule: 'normal', | ||
}; | ||
|
||
collapseWhiteSpaceElement(clonedElement, state); | ||
|
||
return clonedElement; | ||
}; |
13 changes: 13 additions & 0 deletions
13
...re/src/plugins/html-deserializer/utils/collapse-white-space/collapseWhiteSpaceChildren.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import { collapseWhiteSpaceNode } from './collapseWhiteSpaceNode'; | ||
import { CollapseWhiteSpaceState } from './types'; | ||
|
||
export const collapseWhiteSpaceChildren = ( | ||
node: Node, | ||
state: CollapseWhiteSpaceState | ||
) => { | ||
const childNodes = Array.from(node.childNodes); | ||
|
||
for (const childNode of childNodes) { | ||
collapseWhiteSpaceNode(childNode, state); | ||
} | ||
}; |
38 changes: 38 additions & 0 deletions
38
...ore/src/plugins/html-deserializer/utils/collapse-white-space/collapseWhiteSpaceElement.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import { collapseWhiteSpaceChildren } from './collapseWhiteSpaceChildren'; | ||
import { inferWhiteSpaceRule } from './inferWhiteSpaceRule'; | ||
import { isHtmlInlineElement } from './isHtmlInlineElement'; | ||
import { endInlineFormattingContext } from './stateTransforms'; | ||
import { CollapseWhiteSpaceState } from './types'; | ||
|
||
export const collapseWhiteSpaceElement = ( | ||
element: HTMLElement, | ||
state: CollapseWhiteSpaceState | ||
) => { | ||
const isInlineElement = isHtmlInlineElement(element); | ||
const previousWhiteSpaceRule = state.whiteSpaceRule; | ||
const inferredWhiteSpaceRule = inferWhiteSpaceRule(element); | ||
|
||
if (inferredWhiteSpaceRule) { | ||
state.whiteSpaceRule = inferredWhiteSpaceRule; | ||
} | ||
|
||
/** | ||
* Note: We do not want to start an inline formatting context until we | ||
* encounter a text node. | ||
*/ | ||
|
||
// End any existing inline formatting context | ||
if (!isInlineElement) { | ||
endInlineFormattingContext(state); | ||
} | ||
|
||
collapseWhiteSpaceChildren(element, state); | ||
|
||
// Do not let inline formatting context break out of block elements | ||
if (!isInlineElement) { | ||
endInlineFormattingContext(state); | ||
} | ||
|
||
// Restore previous whiteSpaceRule | ||
state.whiteSpaceRule = previousWhiteSpaceRule; | ||
}; |
23 changes: 23 additions & 0 deletions
23
...s/core/src/plugins/html-deserializer/utils/collapse-white-space/collapseWhiteSpaceNode.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import { isHtmlElement } from '../isHtmlElement'; | ||
import { isHtmlText } from '../isHtmlText'; | ||
import { collapseWhiteSpaceChildren } from './collapseWhiteSpaceChildren'; | ||
import { collapseWhiteSpaceElement } from './collapseWhiteSpaceElement'; | ||
import { collapseWhiteSpaceText } from './collapseWhiteSpaceText'; | ||
import { CollapseWhiteSpaceState } from './types'; | ||
|
||
export const collapseWhiteSpaceNode = ( | ||
node: Node, | ||
state: CollapseWhiteSpaceState | ||
) => { | ||
if (isHtmlElement(node)) { | ||
collapseWhiteSpaceElement(node as HTMLElement, state); | ||
return; | ||
} | ||
|
||
if (isHtmlText(node)) { | ||
collapseWhiteSpaceText(node as Text, state); | ||
return; | ||
} | ||
|
||
collapseWhiteSpaceChildren(node, state); | ||
}; |
69 changes: 69 additions & 0 deletions
69
...s/core/src/plugins/html-deserializer/utils/collapse-white-space/collapseWhiteSpaceText.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import { collapseString } from './collapseString'; | ||
import { isLastNonEmptyTextOfInlineFormattingBlock } from './isLastNonEmptyTextOfInlineFormattingBlock'; | ||
import { upsertInlineFormattingContext } from './stateTransforms'; | ||
import { CollapseWhiteSpaceState, TrimEndRule, TrimStartRule } from './types'; | ||
|
||
export const collapseWhiteSpaceText = ( | ||
text: Text, | ||
state: CollapseWhiteSpaceState | ||
) => { | ||
const textContent = text.textContent || ''; | ||
const isWhiteSpaceOnly = textContent.trim() === ''; | ||
|
||
// Do not start an inline formatting context with a whiteSpace-only text node | ||
if (state.inlineFormattingContext || !isWhiteSpaceOnly) { | ||
upsertInlineFormattingContext(state); | ||
} | ||
|
||
const { whiteSpaceRule } = state; | ||
|
||
/** | ||
* Note: Due to the way HTML strings are parsed in htmlStringToDOMNode, up to | ||
* one newline is already trimmed from the start of text nodes inside <pre> | ||
* elements. If we do so again here, we may remove too many newlines. This | ||
* only applies to actual <pre> elements, not elements with the white-space | ||
* CSS property. | ||
*/ | ||
const trimStart: TrimStartRule = (() => { | ||
if (whiteSpaceRule !== 'normal') return 'collapse'; | ||
|
||
if ( | ||
!state.inlineFormattingContext || | ||
state.inlineFormattingContext.atStart || | ||
state.inlineFormattingContext.lastHasTrailingWhiteSpace | ||
) | ||
return 'all'; | ||
|
||
return 'collapse'; | ||
})(); | ||
|
||
const trimEnd: TrimEndRule = (() => { | ||
if (whiteSpaceRule === 'normal') return 'collapse'; | ||
if (isLastNonEmptyTextOfInlineFormattingBlock(text)) | ||
return 'single-newline'; | ||
return 'collapse'; | ||
})(); | ||
|
||
const shouldCollapseWhiteSpace: boolean = { | ||
normal: true, | ||
'actual-pre': false, | ||
pre: false, | ||
'pre-line': true, | ||
}[whiteSpaceRule]; | ||
|
||
const whiteSpaceIncludesNewlines = whiteSpaceRule !== 'pre-line'; | ||
|
||
const collapsedTextContent = collapseString(textContent || '', { | ||
trimStart, | ||
trimEnd, | ||
shouldCollapseWhiteSpace, | ||
whiteSpaceIncludesNewlines, | ||
}); | ||
|
||
if (state.inlineFormattingContext && shouldCollapseWhiteSpace) { | ||
state.inlineFormattingContext.lastHasTrailingWhiteSpace = | ||
collapsedTextContent.endsWith(' '); | ||
} | ||
|
||
text.textContent = collapsedTextContent; | ||
}; |
17 changes: 17 additions & 0 deletions
17
packages/core/src/plugins/html-deserializer/utils/collapse-white-space/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/** | ||
* @file Automatically generated by barrelsby. | ||
*/ | ||
|
||
export * from './collapseString'; | ||
export * from './collapseWhiteSpace'; | ||
export * from './collapseWhiteSpaceChildren'; | ||
export * from './collapseWhiteSpaceElement'; | ||
export * from './collapseWhiteSpaceNode'; | ||
export * from './collapseWhiteSpaceText'; | ||
export * from './inferWhiteSpaceRule'; | ||
export * from './inlineTagNames'; | ||
export * from './isHtmlBlockElement'; | ||
export * from './isHtmlInlineElement'; | ||
export * from './isLastNonEmptyTextOfInlineFormattingBlock'; | ||
export * from './stateTransforms'; | ||
export * from './types'; |
32 changes: 32 additions & 0 deletions
32
...ages/core/src/plugins/html-deserializer/utils/collapse-white-space/inferWhiteSpaceRule.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import { WhiteSpaceRule } from './types'; | ||
|
||
export const inferWhiteSpaceRule = ( | ||
element: HTMLElement | ||
): WhiteSpaceRule | null => { | ||
const whiteSpaceProperty = element.style.whiteSpace; | ||
|
||
switch (whiteSpaceProperty) { | ||
case 'normal': | ||
case 'nowrap': { | ||
return 'normal'; | ||
} | ||
case 'pre': | ||
case 'pre-wrap': | ||
case 'break-spaces': { | ||
return 'pre'; | ||
} | ||
case 'pre-line': { | ||
return 'pre-line'; | ||
} | ||
} | ||
|
||
if (element.tagName === 'PRE') { | ||
return 'actual-pre'; | ||
} | ||
|
||
if (whiteSpaceProperty === 'initial') { | ||
return 'normal'; | ||
} | ||
|
||
return null; | ||
}; |
107 changes: 107 additions & 0 deletions
107
packages/core/src/plugins/html-deserializer/utils/collapse-white-space/inlineTagNames.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/** | ||
* # Methodology | ||
* | ||
* ## Step 1. Get the list of all standard tag names | ||
* | ||
* Go to https://developer.mozilla.org/en-US/docs/Web/HTML/Element and run the | ||
* following in the console to generate a JSON array of tag names: | ||
* | ||
* ```js | ||
* JSON.stringify( | ||
* Array.from(document.querySelectorAll('article table td:first-child')).map((td) => { | ||
* const body = document.createElement('body'); | ||
* body.innerHTML = td.textContent; | ||
* return body.firstChild?.tagName; | ||
* }).filter((tagName) => tagName) | ||
* ); | ||
* ``` | ||
* | ||
* Output (as of 2023-11-06): | ||
* | ||
* ```json | ||
* '["BASE","LINK","META","STYLE","TITLE","ADDRESS","ARTICLE","ASIDE","FOOTER","HEADER","H1","HGROUP","MAIN","NAV","SECTION","SEARCH","BLOCKQUOTE","DD","DIV","DL","DT","FIGCAPTION","FIGURE","HR","LI","MENU","OL","P","PRE","UL","A","ABBR","B","BDI","BDO","BR","CITE","CODE","DATA","DFN","EM","I","KBD","MARK","Q","RP","RT","RUBY","S","SAMP","SMALL","SPAN","STRONG","SUB","SUP","TIME","U","VAR","WBR","AREA","AUDIO","IMG","MAP","TRACK","VIDEO","EMBED","IFRAME","OBJECT","PICTURE","PORTAL","SOURCE","svg","math","CANVAS","NOSCRIPT","SCRIPT","DEL","INS","TABLE","BUTTON","DATALIST","FIELDSET","FORM","INPUT","LABEL","LEGEND","METER","OPTGROUP","OPTION","OUTPUT","PROGRESS","SELECT","TEXTAREA","DETAILS","DIALOG","SUMMARY","SLOT","TEMPLATE","ACRONYM","BIG","CENTER","CONTENT","DIR","FONT","IMG","MARQUEE","MENUITEM","NOBR","NOEMBED","NOFRAMES","PARAM","PLAINTEXT","RB","RTC","SHADOW","STRIKE","TT","XMP"]' | ||
* ``` | ||
* | ||
* ## Step 2. For each tag name, determine the default browser style | ||
* | ||
* Open an empty HTML file in the browser and run the following in the console: | ||
* | ||
* ```js | ||
* const tagNames = JSON.parse(<JSON string from step 1>); | ||
* | ||
* JSON.stringify( | ||
* tagNames.filter((tagName) => { | ||
* const element = document.createElement(tagName); | ||
* document.body.appendChild(element); | ||
* const display = window.getComputedStyle(element).display; | ||
* element.remove(); | ||
* return display.startsWith('inline'); | ||
* }) | ||
* ); | ||
* ``` | ||
* | ||
* Place the result in the array below (accurate as of 2023-11-06). | ||
*/ | ||
|
||
export const inlineTagNames = new Set([ | ||
'A', | ||
'ABBR', | ||
'B', | ||
'BDI', | ||
'BDO', | ||
'BR', | ||
'CITE', | ||
'CODE', | ||
'DATA', | ||
'DFN', | ||
'EM', | ||
'I', | ||
'KBD', | ||
'MARK', | ||
'Q', | ||
'S', | ||
'SAMP', | ||
'SMALL', | ||
'SPAN', | ||
'STRONG', | ||
'SUB', | ||
'SUP', | ||
'TIME', | ||
'U', | ||
'VAR', | ||
'WBR', | ||
'IMG', | ||
'MAP', | ||
'TRACK', | ||
'VIDEO', | ||
'EMBED', | ||
'IFRAME', | ||
'OBJECT', | ||
'PICTURE', | ||
'PORTAL', | ||
'SOURCE', | ||
'svg', | ||
'math', | ||
'CANVAS', | ||
'DEL', | ||
'INS', | ||
'BUTTON', | ||
'INPUT', | ||
'LABEL', | ||
'METER', | ||
'OUTPUT', | ||
'PROGRESS', | ||
'SELECT', | ||
'TEXTAREA', | ||
'ACRONYM', | ||
'BIG', | ||
'CONTENT', | ||
'FONT', | ||
'IMG', | ||
'MARQUEE', | ||
'MENUITEM', | ||
'NOBR', | ||
'SHADOW', | ||
'STRIKE', | ||
'TT', | ||
]); |
8 changes: 8 additions & 0 deletions
8
packages/core/src/plugins/html-deserializer/utils/collapse-white-space/isHtmlBlockElement.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import { isHtmlElement } from '../isHtmlElement'; | ||
import { isHtmlInlineElement } from './isHtmlInlineElement'; | ||
|
||
export const isHtmlBlockElement = (node: Node): boolean => { | ||
if (!isHtmlElement(node)) return false; | ||
const element = node as HTMLElement; | ||
return !isHtmlInlineElement(element); | ||
}; |
Oops, something went wrong.