Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Want to handle whitespace in pasted HTML according to HTML spec #2718

Closed
wants to merge 11 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,36 @@ describe('when deserializing all plugins', () => {
).toEqual(output.children);
});
});

describe('when stripWhitespace is true', () => {
// https://github.com/udecode/plate/issues/2713#issuecomment-1780118687
const html = `<p>\n Hello world\n </p>\n\n <p>\n one two \n three\n</p>\n\n<pre>\nhello one two\nthree\nfour\n</pre>\n\n<div style="white-space: pre">\nhello one two\nthree\nfour\n</div>\n\n<div style="white-space: pre-line">\nhello one two\nthree\nfour\n</div>`;
const element = getHtmlDocument(html).body;

const expectedOutput = [
{
text: 'Hello world',
},
{
text: 'one two three',
},
{
text: 'hello one two\nthree\nfour',
},
{
text: '\nhello one two\nthree\nfour',
},
{
text: '\nhello one two\nthree\nfour',
}
];

it('should strip Whitespace by style', () => {
const convertedDocumentFragment = deserializeHtml(createPlateEditor(), {
element,
stripWhitespace: true,
});

expect(convertedDocumentFragment).toEqual(expectedOutput);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ export const deserializeHtml = <V extends Value>(
): EDescendant<V>[] => {
// for serializer
if (typeof element === 'string') {
element = htmlStringToDOMNode(element, stripWhitespace);
element = htmlStringToDOMNode(element);
}

const fragment = deserializeHtmlElement(editor, element) as EDescendant<V>[];
const fragment = deserializeHtmlElement(editor, element, stripWhitespace) as EDescendant<V>[];

return normalizeDescendantsToDocumentFragment(editor, {
descendants: fragment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import { deserializeHtmlNode } from './deserializeHtmlNode';
*/
export const deserializeHtmlElement = <V extends Value>(
editor: PlateEditor<V>,
element: HTMLElement
element: HTMLElement,
stripWhitespace = true
): DeserializeHtmlNodeReturnType<EDescendant<V>> => {
return deserializeHtmlNode(editor)(element);
return deserializeHtmlNode(editor, stripWhitespace)(element);
};
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ import { isHtmlElement } from './isHtmlElement';
* Deserialize HTML element or child node.
*/
export const deserializeHtmlNode =
<V extends Value>(editor: PlateEditor<V>) =>
<V extends Value>(
editor: PlateEditor<V>,
stripWhitespace = true,
) =>
(
node: HTMLElement | ChildNode
): DeserializeHtmlNodeReturnType<EDescendant<V>> => {
const textNode = htmlTextNodeToString(node);
const textNode = htmlTextNodeToString(node, stripWhitespace);
if (textNode) return textNode;

if (!isHtmlElement(node)) return null;
Expand All @@ -27,13 +30,13 @@ export const deserializeHtmlNode =
if (breakLine) return breakLine;

// body
const fragment = htmlBodyToFragment(editor, node as HTMLElement);
const fragment = htmlBodyToFragment(editor, node as HTMLElement, stripWhitespace);
if (fragment) return fragment;

// element
const element = htmlElementToElement(editor, node as HTMLElement);
const element = htmlElementToElement(editor, node as HTMLElement, stripWhitespace);
if (element) return element;

// leaf
return htmlElementToLeaf(editor, node as HTMLElement);
return htmlElementToLeaf(editor, node as HTMLElement, stripWhitespace);
};
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ import { deserializeHtmlNode } from './deserializeHtmlNode';

export const deserializeHtmlNodeChildren = <V extends Value>(
editor: PlateEditor<V>,
node: HTMLElement | ChildNode
node: HTMLElement | ChildNode,
stripWhitespace: boolean
) =>
Array.from(node.childNodes).flatMap(
deserializeHtmlNode(editor)
deserializeHtmlNode(editor, stripWhitespace)
) as DeserializeHtmlChildren<EDescendant<V>>[];
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ jsx;
*/
export const htmlBodyToFragment = <V extends Value>(
editor: PlateEditor<V>,
element: HTMLElement
element: HTMLElement,
stripWhitespace = true
): EDescendant<V>[] | undefined => {
if (element.nodeName === 'BODY') {
return jsx(
'fragment',
{},
deserializeHtmlNodeChildren(editor, element)
deserializeHtmlNodeChildren(editor, element, stripWhitespace)
) as EDescendant<V>[];
}
};
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import { pipeDeserializeHtmlElement } from './pipeDeserializeHtmlElement';
*/
export const htmlElementToElement = <V extends Value>(
editor: PlateEditor<V>,
element: HTMLElement
element: HTMLElement,
stripWhitespace = true
) => {
const deserialized = pipeDeserializeHtmlElement(editor, element);

Expand All @@ -19,7 +20,7 @@ export const htmlElementToElement = <V extends Value>(

let descendants =
node.children ??
(deserializeHtmlNodeChildren(editor, element) as TDescendant[]);
(deserializeHtmlNodeChildren(editor, element, stripWhitespace) as TDescendant[]);
if (descendants.length === 0 || withoutChildren) {
descendants = [{ text: '' }];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ import { pipeDeserializeHtmlLeaf } from './pipeDeserializeHtmlLeaf';
*/
export const htmlElementToLeaf = <V extends Value>(
editor: PlateEditor<V>,
element: HTMLElement
element: HTMLElement,
stripWhitespace = true
) => {
const node = pipeDeserializeHtmlLeaf(editor, element);

return deserializeHtmlNodeChildren(editor, element).reduce(
return deserializeHtmlNodeChildren(editor, element, stripWhitespace).reduce(
(arr: TDescendant[], child) => {
if (!child) return arr;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,9 @@
*/
export const htmlStringToDOMNode = (
rawHtml: string,
stripWhitespace = true
) => {
const node = document.createElement('body');
node.innerHTML = rawHtml;

if (stripWhitespace) {
node.innerHTML = node.innerHTML.replaceAll(/(\r\n|[\t\n\r])/g, '');
}

return node;
};
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { htmlTextNodeToString } from './htmlTextNodeToString';

describe('htmlTextNodeToString', () => {

describe('when empty div element', () => {
it('should be undefined', () => {
const input = document.createElement('div');
Expand All @@ -24,7 +25,7 @@ describe('htmlTextNodeToString', () => {
const input = document.createTextNode('\n\n\n\n\n');
const output = null;

expect(htmlTextNodeToString(input)).toEqual(output);
expect(htmlTextNodeToString(input, false)).toEqual(output);
});
});

Expand All @@ -33,7 +34,46 @@ describe('htmlTextNodeToString', () => {
const input = document.createTextNode('\n\n\ntest\n\ntest\n\n');
const output = 'test\n\ntest';

expect(htmlTextNodeToString(input)).toEqual(output);
expect(htmlTextNodeToString(input, false)).toEqual(output);
});
});

describe('when htmlTextNodeToString', () => {
const text = `${'\n'} ${'\t'} hello ${'\n'}world!`;
const baseInput = (whiteSpace): Text => {
const textNode = document.createTextNode(text);
const parentDom = document.createElement('div');
parentDom.style.whiteSpace = whiteSpace;

parentDom.append(textNode);

return textNode;
};

it('white-space: normal', () => {
12joan marked this conversation as resolved.
Show resolved Hide resolved
expect(htmlTextNodeToString(baseInput('normal'))).toEqual(`hello world!`);
});

it('white-space: nowrap', () => {
expect(htmlTextNodeToString(baseInput('nowrap'))).toEqual(`hello world!`);
});

it('white-space: pre', () => {
expect(htmlTextNodeToString(baseInput('pre'))).toEqual(text);
});

it('white-space: pre-wrap', () => {
expect(htmlTextNodeToString(baseInput('pre-wrap'))).toEqual(text);
});

it('white-space: pre-line', () => {
expect(htmlTextNodeToString(baseInput('pre-line'))).toEqual(
`${'\n'} hello ${'\n'}world!`
);
});

it('white-space: break-spaces', () => {
expect(htmlTextNodeToString(baseInput('break-spaces'))).toEqual(text);
});
});
12joan marked this conversation as resolved.
Show resolved Hide resolved
});
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,99 @@
*/
import { isHtmlText } from './isHtmlText';

export const htmlTextNodeToString = (node: HTMLElement | ChildNode) => {
function findParentElementWithWhiteSpace(node: HTMLElement | ChildNode): HTMLElement | null {
let parentNode = node.parentNode as HTMLElement;

while (parentNode != null) {

if (parentNode.nodeType === Node.ELEMENT_NODE) {
const style = parentNode.style;

// The <pre> default style is "white-space: pre;"
if (parentNode.nodeName === 'PRE') return parentNode;

if (style.whiteSpace && style.whiteSpace !== 'inherit') {
return parentNode;
}
}

parentNode = parentNode.parentNode as HTMLElement;
}

return node.parentNode as HTMLElement;
}

// https://github.com/udecode/plate/pull/2718#discussion_r1375418430
// Strip exactly one \n from the end of the text node
const stripOneNewlineFromEnd = (str = '') => str.replace(/((?<!\n)\n(?!\n))$/g, '');

// Strip exactly one \n from the start of the text node
const stripOneNewlineFromStart = (str = '') => str.replace(/^((?<!\n)\n(?!\n))/g, '');

const mergeWhitespace = (node: HTMLElement | ChildNode) => {
let parentNode = findParentElementWithWhiteSpace(node);
if (!parentNode) return;

let parentWhiteSpace = parentNode.style.whiteSpace;

// The <pre> default style is "white-space: pre;"
if (parentNode.nodeName === 'PRE') {
// For white-space: pre or pre-line:
// Strip exactly one \n from the start of the text node
// Strip exactly one \n from the end of the text node
node.textContent =
node.textContent && stripOneNewlineFromStart(stripOneNewlineFromEnd(node.textContent));
return;
}

if (!parentWhiteSpace) {
parentWhiteSpace = 'normal';
}

switch (parentWhiteSpace) {
case 'unset':
case 'initial': // Browser's default styles.
case 'normal':
case 'nowrap': {
node.textContent =
node.textContent && node.textContent.trim().replaceAll(/\s+/g, ' ');
break;
}
// For white-space: pre or pre-line:
// Do not strip any \n from the start of the text node
// Strip exactly one \n from the end of the text node
case 'pre-line': {
node.textContent =
node.textContent && stripOneNewlineFromEnd(node.textContent.replaceAll(/[\t ]+/g, ' '));
break;
}
case 'pre':
node.textContent =
node.textContent && stripOneNewlineFromEnd(node.textContent);
break;
case 'break-spaces':
case 'pre-wrap':
case 'revert': // "revert" and "revert-layer" are expected to be supported in the future.
case 'revert-layer':
default: {
break;
}
}
};

export const htmlTextNodeToString = (
node: HTMLElement | ChildNode,
stripWhitespace = true
12joan marked this conversation as resolved.
Show resolved Hide resolved
) => {
if (isHtmlText(node)) {
const trimmedText = node.textContent?.replace(/^\n+|\n+$/g, '') ?? '';

if (stripWhitespace) {
mergeWhitespace(node);
} else {
node.textContent = node.textContent?.replace(/^\n+|\n+$/g, '') ?? '';
}

const trimmedText = node.textContent ?? '';
return trimmedText.length > 0 ? trimmedText : null;
}
};
Loading