Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse white space according to the HTML spec #2729

Merged
merged 15 commits into from
Nov 8, 2023
6 changes: 6 additions & 0 deletions .changeset/clean-sloths-draw.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'@udecode/plate-core': patch
---

**This is a breaking change meant to be part of v25, hence the patch.**
On `deserializeHtml`, replace `stripWhitespace` with `collapseWhiteSpace`, defaulting to true. The `collapseWhiteSpace` option aims to parse white space in HTML according to the HTML specification, ensuring greater accuracy when pasting HTML from browsers.
5 changes: 5 additions & 0 deletions .changeset/mighty-ties-rescue.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@udecode/plate-serializer-docx': patch
---

Before sending DOCX HTML to be deserialized, wrap it in a `<div>` with `white-space: pre-wrap` to prevent white space from being collapsed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { TrimEndRule, TrimStartRule } from './types';

export const collapseString = (
text: string,
{
trimStart = 'collapse',
trimEnd = 'collapse',
shouldCollapseWhiteSpace = true,
whiteSpaceIncludesNewlines = true,
}: {
trimStart?: TrimStartRule;
trimEnd?: TrimEndRule;
shouldCollapseWhiteSpace?: boolean;
whiteSpaceIncludesNewlines?: boolean;
} = {}
) => {
if (trimStart === 'all') {
text = text.replace(/^\s+/, '');
}

if (trimEnd === 'single-newline') {
// Strip at most one newline from the end
text = text.replace(/\n$/, '');
}

if (shouldCollapseWhiteSpace) {
if (whiteSpaceIncludesNewlines) {
text = text.replaceAll(/\s+/g, ' ');
} else {
// Collapse horizontal whitespace
text = text.replaceAll(/[^\S\n\r]+/g, ' ');

/**
* Trim horizontal whitespace from the start and end of lines (behavior
* of pre-line).
*/
text = text.replaceAll(/^[^\S\n\r]+/gm, '');
text = text.replaceAll(/[^\S\n\r]+$/gm, '');
}
}

return text;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
import { htmlStringToDOMNode } from '../htmlStringToDOMNode';
import { collapseWhiteSpace } from './collapseWhiteSpace';

const expectCollapsedWhiteSpace = (input: string, expected: string) => {
const element = htmlStringToDOMNode(input);
const collapsedElement = collapseWhiteSpace(element);
const output = collapsedElement.innerHTML;
expect(output).toEqual(expected);
};

describe('collapseWhiteSpace', () => {
describe('when there are no special block types or styles', () => {
describe('when whitespace is already collapsed', () => {
it('does not alter simple collapsed HTML', () => {
const input = '<p>Hello world!</p>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('does not remove single space in text node in inline formatting context', () => {
const input = '<p>Hello<strong> </strong>world</p>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});
});

describe('when whitespace is not collapsed', () => {
it('removes whitespace between block elements', () => {
const input =
' \n\n\n <p>Hello world!</p> \n\n <p>How are you?</p> \n ';
const expected = '<p>Hello world!</p><p>How are you?</p>';
expectCollapsedWhiteSpace(input, expected);
});

it('removes all whitespace at start of block element', () => {
const input = '<div> \n\n <p>Hello world!</p></div>';
const expected = '<div><p>Hello world!</p></div>';
expectCollapsedWhiteSpace(input, expected);
});

it('collapses whitespace at end of block element', () => {
const input = '<p>Hello world! \n\n\n</p>';
const expected = '<p>Hello world! </p>';
expectCollapsedWhiteSpace(input, expected);
});

it('collapses whitespace inside text nodes', () => {
const input = '<p>Hello \n world!</p>';
const expected = '<p>Hello world!</p>';
expectCollapsedWhiteSpace(input, expected);
});

it('removes whitespace at start of inline formatting context', () => {
const input = '<p> <strong> Hello</strong> world!</p>';
const expected = '<p><strong>Hello</strong> world!</p>';
expectCollapsedWhiteSpace(input, expected);
});

it('collapses whitespace at end of inline formatting context', () => {
const input = '<p><strong>Hello </strong> world! \n </p>';
const expected = '<p><strong>Hello </strong>world! </p>';
expectCollapsedWhiteSpace(input, expected);
});

it('span does not interrupt inline formatting context', () => {
const input = '<div>Hello<span></span> world!</div>';
const expected = '<div>Hello<span></span> world!</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('div does interrupt inline formatting context', () => {
const input = '<div>Hello<div></div> world!</div>';
const expected = '<div>Hello<div></div>world!</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('tracks whitespace across multiple inline nodes (case 1)', () => {
const input = '<p><strong>Hello </strong><em> world!</em></p>';
const expected = '<p><strong>Hello </strong><em>world!</em></p>';
expectCollapsedWhiteSpace(input, expected);
});

it('tracks whitespace across multiple inline nodes (case 2)', () => {
const input = '<p><strong>Hello</strong><em> world!</em></p>';
const expected = '<p><strong>Hello</strong><em> world!</em></p>';
expectCollapsedWhiteSpace(input, expected);
});
});
});

describe('when inside an actual <pre> element', () => {
it('preserves whitespace, including newlines, except for one newline at start and end', () => {
const input = '<pre>\n\n\n one two\nthree \n \n\n</pre>';
const expected = '<pre>\n\n one two\nthree \n \n</pre>';
expectCollapsedWhiteSpace(input, expected);
});

it('removes single newline at start of <pre> element', () => {
const input = '<pre>\nhello world</pre>';
const expected = '<pre>hello world</pre>';
expectCollapsedWhiteSpace(input, expected);
});

it('does not remove single newline in first text node of <pre> element if not a direct child', () => {
const input = '<pre><span>\nhello</span> world</pre>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('removes single newline at end of <pre> element', () => {
const input = '<pre>hello world\n</pre>';
const expected = '<pre>hello world</pre>';
expectCollapsedWhiteSpace(input, expected);
});

it('removes single new line at the end of nested inline formatting context', () => {
const input = '<pre><div>hello world\n</div>x\n</pre>';
const expected = '<pre><div>hello world</div>x</pre>';
expectCollapsedWhiteSpace(input, expected);
});

it('does not remove single newline at the start of nested blocks', () => {
const input = '<pre><div>\nhello world</div></pre>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('does not remove single newline when not at the end of some inline formatting context', () => {
const input = '<pre><span>hello world\n</span> </pre>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('removes single newline at the end of inline formatting context: case 1', () => {
const input = '<pre><span>hello world\n</span><span></span></pre>';
const expected = '<pre><span>hello world</span><span></span></pre>';
expectCollapsedWhiteSpace(input, expected);
});

it('removes single newline at the end of inline formatting context: case 2', () => {
const input = '<pre><span>hello world\n</span><div></div>x</pre>';
const expected = '<pre><span>hello world</span><div></div>x</pre>';
expectCollapsedWhiteSpace(input, expected);
});
});

describe('when `white-space: pre` is applied to a block element', () => {
it('preserves whitespace, including newlines, except for one newline at end', () => {
const input =
'<div style="white-space: pre">\n one two\nthree \n \n\n\n\n</div>';
const expected =
'<div style="white-space: pre">\n one two\nthree \n \n\n\n</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('removes single newline at end of top-level block', () => {
const input = '<div style="white-space: pre">\nhello world\n</div>';
const expected = '<div style="white-space: pre">\nhello world</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('removes single new line at the end of nested inline formatting context', () => {
const input =
'<div style="white-space: pre"><div>hello world\n</div>\n</div>';
const expected =
'<div style="white-space: pre"><div>hello world</div></div>';
expectCollapsedWhiteSpace(input, expected);
});

it('does not remove single newline when not at the end of some inline formatting context', () => {
const input =
'<div style="white-space: pre"><span>hello world\n</span>x</div>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('removes single newline at the end of some inline formatting block', () => {
const input =
'<div style="white-space: pre"><span>hello world\n</span><span></span></div>';
const expected =
'<div style="white-space: pre"><span>hello world</span><span></span></div>';
expectCollapsedWhiteSpace(input, expected);
});
});

describe('when `white-space: pre` is applied to an inline element', () => {
it('does not let trailing whitespace affect subsequent text nodes', () => {
const input =
'<span><strong style="white-space: pre">Hello </strong><em> world</em></span>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});
});

// TODO: Explore what happens when a pre-line contains inline elements.
describe('when `white-space: pre-line` is applied to a block element', () => {
it('collapses horizontal whitespace', () => {
const input = '<div style="white-space: pre-line">Hello world!</div>';
const expected = '<div style="white-space: pre-line">Hello world!</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('preserves newlines, except for one newline at end', () => {
const input =
'<div style="white-space: pre-line">\n\n\n one two\nthree \n \n\n\n\n</div>';
const expected =
'<div style="white-space: pre-line">\n\n\none two\nthree\n\n\n\n</div>';
expectCollapsedWhiteSpace(input, expected);
});
});

describe('when a div is made inline using CSS', () => {
it('inline div does not interrupt inline formatting context', () => {
const input =
'<div>Hello<div style="display: inline"></div> world!</div>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('inline-block div does not interrupt inline formatting context', () => {
const input =
'<div>Hello<div style="display: inline-block"></div> world!</div>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('inline-grid div does not interrupt inline formatting context', () => {
const input =
'<div>Hello<div style="display: inline-grid"></div> world!</div>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('inline-flex div does not interrupt inline formatting context', () => {
const input =
'<div>Hello<div style="display: inline-flex"></div> world!</div>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});

it('inline flow div does not interrupt inline formatting context', () => {
const input =
'<div>Hello<div style="display: inline flow"></div> world!</div>';
const expected = input;
expectCollapsedWhiteSpace(input, expected);
});
});

describe('when a span is made block using CSS', () => {
it('block span does interrupt inline formatting context', () => {
const input =
'<div>Hello<span style="display: block"></span> world!</div>';
const expected =
'<div>Hello<span style="display: block"></span>world!</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('flex span does interrupt inline formatting context', () => {
const input =
'<div>Hello<span style="display: flex"></span> world!</div>';
const expected =
'<div>Hello<span style="display: flex"></span>world!</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('grid span does interrupt inline formatting context', () => {
const input =
'<div>Hello<span style="display: grid"></span> world!</div>';
const expected =
'<div>Hello<span style="display: grid"></span>world!</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('table span does interrupt inline formatting context', () => {
const input =
'<div>Hello<span style="display: table"></span> world!</div>';
const expected =
'<div>Hello<span style="display: table"></span>world!</div>';
expectCollapsedWhiteSpace(input, expected);
});

it('block flow span does interrupt inline formatting context', () => {
const input =
'<div>Hello<span style="display: block flow"></span> world!</div>';
const expected =
'<div>Hello<span style="display: block flow"></span>world!</div>';
expectCollapsedWhiteSpace(input, expected);
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { collapseWhiteSpaceElement } from './collapseWhiteSpaceElement';
import { CollapseWhiteSpaceState } from './types';

// Entrypoint
export const collapseWhiteSpace = (element: HTMLElement) => {
const clonedElement = element.cloneNode(true) as HTMLElement;

// Mutable state object
const state: CollapseWhiteSpaceState = {
inlineFormattingContext: null,
whiteSpaceRule: 'normal',
};

collapseWhiteSpaceElement(clonedElement, state);

return clonedElement;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { collapseWhiteSpaceNode } from './collapseWhiteSpaceNode';
import { CollapseWhiteSpaceState } from './types';

export const collapseWhiteSpaceChildren = (
node: Node,
state: CollapseWhiteSpaceState
) => {
const childNodes = Array.from(node.childNodes);

for (const childNode of childNodes) {
collapseWhiteSpaceNode(childNode, state);
}
};
Loading
Loading