Skip to content

Commit

Permalink
add wip deserialization
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonas-C committed Jan 21, 2025
1 parent 723f815 commit 681ab36
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html

exports[`articleContentConverter convert article that is a mix of inline and block object by wrapping the inline 1`] = `"<section><h2>Section 1</h2>Some text that slate wants to delete <em>blabla</em></section><section><h2>Section 2</h2></section><section><h2>Section 3</h2></section>"`;
exports[`articleContentConverter convert article that is a mix of inline and block object by wrapping the inline 1`] = `"<section><h2>Section 1</h2><p>Some text that slate wants to delete <em>blabla</em></p></section><section><h2>Section 2</h2></section><section><h2>Section 3</h2></section>"`;

exports[`articleContentConverter convert learningresource content 1`] = `
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import { inlineContentToEditorValue, blockContentToEditorValue } from "../deserializeFromHtml";
import { blockContentToHTML, inlineContentToHTML } from "../serializeToHtml";

// TODO: The snapshots this file generates are invalid because we don't support all embeds.

const contentHTML = `<section><h2>Lorem ipsum</h2></section>`;

const contentHTMLWithSections = `<section><h2>Section 1</h2></section><section><h2>Section 2</h2></section><section><h2>Section 3</h2></section>`;
Expand Down
139 changes: 136 additions & 3 deletions packages/editor/src/serialization/html/deserializeFromHtml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
*
*/

import { Node, type Descendant } from "slate";
import { jsx as slatejsx } from "slate-hyperscript";
import { Element, Node, Text, type Descendant } from "slate";
import type { SlateSerializer } from "../../types";
import { SECTION_ELEMENT_TYPE } from "../../plugins/section/sectionTypes";
import { PARAGRAPH_ELEMENT_TYPE } from "../../plugins/paragraph/paragraphTypes";
import { PARAGRAPH_ELEMENT_TYPE, type ParagraphElement } from "../../plugins/paragraph/paragraphTypes";
import { commonSerializers, extendedSerializers } from "./htmlSerializers";
import { LINK_ELEMENT_TYPE } from "../../plugins/link/linkTypes";

// TODO: This entire file should be refactored and reconsidered. Our current deserialization is too complex.

// TODO: This should be of noop type
const DEFAULT_NOOP: Descendant[] = [{ type: "paragraph", children: [{ text: "" }] }];
Expand Down Expand Up @@ -65,10 +69,139 @@ export const deserializeFromHtml = (html: string, rules: SlateSerializer[], noop

const document = new DOMParser().parseFromString(noop ? `<div data-noop="true">${html}</div>` : html, "text/html");
const nodes = Array.from(document.body.children).map(deserialize);
const normalizedNodes = nodes.map((n) => (Node.isNodeList(n) ? n[0] : n)).filter((n) => !!n);

const normalizedNodes = nodes.map((n) => convertFromHTML(Node.isNodeList(n) ? n[0] : n)).filter((n) => !!n);
return normalizedNodes;
};

export const inlines: Element["type"][] = [
// TYPE_CONCEPT_INLINE,
// TYPE_FOOTNOTE,
LINK_ELEMENT_TYPE,
// TYPE_CONTENT_LINK,
// TYPE_MATHML,
// TYPE_SPAN,
// TYPE_COMMENT_INLINE,
];

export const blocks: Element["type"][] = [
// TYPE_ASIDE,
// TYPE_FRAMED_CONTENT,
// TYPE_CODEBLOCK,
// TYPE_DETAILS,
// TYPE_AUDIO,
// TYPE_EMBED_BRIGHTCOVE,
// TYPE_EMBED_ERROR,
// TYPE_EXTERNAL,
// TYPE_H5P,
// TYPE_IMAGE,
// TYPE_FILE,
// TYPE_RELATED,
// TYPE_TABLE,
// TYPE_PITCH,
// TYPE_GRID,
// TYPE_KEY_FIGURE,
// TYPE_CAMPAIGN_BLOCK,
];

const addEmptyTextNodes = (node: Element) => {
const { children } = node;

node.children = children.reduce((acc, cur, index) => {
if (!Text.isText(cur)) {
if (index === 0) {
acc.push({ text: "" });
} else if (!Text.isText(acc[acc.length - 1])) {
acc.push({ text: "" });
}
}

acc.push(cur);
return acc;
}, [] as Descendant[]);
if (!Text.isText(node.children[node.children.length - 1])) {
node.children.push({ text: "" });
}
};

const addEmptyParagraphs = (node: Element) => {
const { children } = node;

node.children = children.reduce((acc, cur, index) => {
if (Element.isElement(cur)) {
if (blocks.includes(cur.type)) {
if (index === 0) {
// this used to be defaultParagraphBlock
acc.push({ type: "paragraph", children: [{ text: "" }] });
} else {
const lastNode = acc[acc.length - 1];
if (Element.isElement(lastNode) && blocks.includes(lastNode.type)) {
// this used to be defaultParagraphBlock
acc.push({ type: "paragraph", children: [{ text: "" }] });
}
}
}
}

acc.push(cur);
return acc;
}, [] as Descendant[]);
const lastNode = node.children[node.children.length - 1];
if (Element.isElement(lastNode) && blocks.includes(lastNode.type)) {
// this used to be defaultParagraphBlock
node.children.push({ type: "paragraph", children: [{ text: "" }] });
}
};

export function convertFromHTML(root: Descendant | null) {
const wrapMixedChildren = (node: Descendant): Descendant => {
if (Element.isElement(node)) {
const children = node.children;

const blockChildren = children.filter((child) => Element.isElement(child) && !inlines.includes(child.type));
const mixed = blockChildren.length > 0 && blockChildren.length !== children.length;
if (!mixed) {
node.children = children.map(wrapMixedChildren);
if (blockChildren.length === 0 && children.length > 0) {
addEmptyTextNodes(node);
} else {
addEmptyParagraphs(node);
}
return node;
}
const cleanNodes = [];
let openWrapperBlock;
for (const child of children) {
if (Text.isText(child) || (Element.isElement(child) && inlines.includes(child.type))) {
if (Node.string(child) === "" || Node.string(child) === " ") {
continue;
}
if (!openWrapperBlock) {
openWrapperBlock = slatejsx("element", { type: "paragraph" }, []) as ParagraphElement;
cleanNodes.push(openWrapperBlock);
}
openWrapperBlock.children.push(child);
} else {
openWrapperBlock = null;
if (child.type === "paragraph" && child.children.length === 0) {
continue;
}
cleanNodes.push(child);
}
}
addEmptyParagraphs(node);

node.children = cleanNodes.map(wrapMixedChildren);
}
return node;
};

if (root) {
return wrapMixedChildren(root);
}
return;
}

// /**
// * Slate does not allow a block to contain both blocks and inline nodes, so this code checks if the original
// * html violates this constraint and wraps consecutive inline nodes in a paragraph.
Expand Down

0 comments on commit 681ab36

Please sign in to comment.