diff --git a/packages/download-notion/src/filterTree.ts b/packages/download-notion/src/filterTree.ts new file mode 100644 index 0000000..f572c82 --- /dev/null +++ b/packages/download-notion/src/filterTree.ts @@ -0,0 +1,55 @@ +import { NotionObjectResponse, NotionObjectTree } from "notion-downloader" + +import { NotionDatabase } from "./NotionDatabase" +import { getNotionObject } from "./NotionObjectUtils" +import { NotionPage } from "./NotionPage" +import { verbose } from "./log" + +export function filterTree( + objectsTree: NotionObjectTree, + expectedStatusTag: string +) { + function shouldFilterPageStatus( + notionObject: NotionDatabase | NotionPage + ): boolean { + return ( + expectedStatusTag !== "" && + notionObject.object == "page" && + expectedStatusTag !== "*" && + notionObject.status !== expectedStatusTag + ) + } + + const nodeAction = ( + objectResponse: NotionObjectResponse, + parentContext: { shouldRemove: boolean }, + tree: NotionObjectTree + ) => { + if (parentContext.shouldRemove) { + verbose( + `Skipping [${objectResponse.object}] (${objectResponse.id}) because parent has been filtered` + ) + tree.removeObject(objectResponse.id) + return { shouldRemove: true } + } + + if ( + // TODO: We should filter databases as well (for wikis) + objectResponse.object === "page" && + shouldFilterPageStatus(getNotionObject(objectResponse) as NotionPage) + ) { + const notionObject = getNotionObject(objectResponse) as NotionPage + verbose( + `Skipping [${objectResponse.object}] (${objectResponse.id}) ${ + notionObject.title + } ${`because it has status ${notionObject.status}`}` + ) + + tree.removeObject(notionObject.id) + return { shouldRemove: true } + } + return { shouldRemove: false } + } + + objectsTree.traverse(nodeAction, { shouldRemove: false }) +} diff --git a/packages/download-notion/src/getFileTreeMap.ts b/packages/download-notion/src/getFileTreeMap.ts index 01e30fa..c042ff7 100644 --- a/packages/download-notion/src/getFileTreeMap.ts +++ b/packages/download-notion/src/getFileTreeMap.ts @@ -1,25 +1,17 @@ -import { - NotionObjectResponse, - NotionObjectTree, - NotionObjectTreeNode, -} from "notion-downloader" +import { NotionObjectResponse, NotionObjectTree } from "notion-downloader" import { FilesManager } from "./FilesManager" import { LayoutStrategy } from "./LayoutStrategy" -import { NotionDatabase } from "./NotionDatabase" import { NotionImage } from "./NotionImage" import { getNotionObject } from "./NotionObjectUtils" -import { NotionPage } from "./NotionPage" -import { NotionObjectsData } from "./objects_utils" -import { traverseTree } from "./traverseTree" -export async function getFileTreeMap( +export function getFileTreeMap( currentPath: string, objectsTree: NotionObjectTree, databaseIsRootLevel: boolean, layoutStrategy: LayoutStrategy, filesManager: FilesManager -): Promise { +) { const nodeAction = ( objectResponse: NotionObjectResponse, parentContext: { diff --git a/packages/download-notion/src/notionPull.ts b/packages/download-notion/src/notionPull.ts index 57a1b2f..7966011 100644 --- a/packages/download-notion/src/notionPull.ts +++ b/packages/download-notion/src/notionPull.ts @@ -224,15 +224,17 @@ export async function notionPull(options: NotionPullOptions): Promise { endGroup() group("Stage 2: Filtering pages...") - // await filterTree(objectsTree, objectsData, options.conversion.statusTag) - const allObjectsMap = objectsToObjectsMap(objectsData) + filterTree(objectsTree, options.conversion.statusTag) + + // TODO: allObjectsMap should not be needed, instead, getting the ancestors should be handled by the objectsTree + const allObjectsMap = objectsToObjectsMap(objectsTree.data) endGroup() group("Stage 3: Building paths...") // -------- FILES --------- - await getFileTreeMap( + getFileTreeMap( "", // Start context objectsTree, options.rootDbAsFolder, @@ -263,20 +265,14 @@ export async function notionPull(options: NotionPullOptions): Promise { ) : "" ) - // ----- Pages ---- - const pages = Object.values(objectsData.page).map( - (page) => new NotionPage(page) - ) + const pages = objectsTree.getPages().map((page) => new NotionPage(page)) - // ----- Databases ---- - const databases: NotionDatabase[] = Object.values(objectsData.database).map( - (db) => new NotionDatabase(db) - ) + const databases = objectsTree + .getDatabases() + .map((db) => new NotionDatabase(db)) // ----- Images ---- // TODO: If image belongs to a page that was filtered (E.g. because of status), this fails! - const imageBlocks = Object.values(objectsData.block).filter( - (block) => block.type === "image" - ) + const imageBlocks = objectsTree.getBlocks("image") // Process images saves them to the filesMap and also updates the markdown files await processImages({ diff --git a/packages/download-notion/src/traverseTree.ts b/packages/download-notion/src/traverseTree.ts deleted file mode 100644 index 41925e6..0000000 --- a/packages/download-notion/src/traverseTree.ts +++ /dev/null @@ -1,59 +0,0 @@ -import { NotionObjectTreeNode } from "notion-downloader" - -import { NotionDatabase } from "./NotionDatabase" -import { NotionPage } from "./NotionPage" -import { NotionObjectsData } from "./objects_utils" - -export function getNotionObject( - objectData: NotionObjectsData, - currentID: string, - type: "page" | "database" -) { - if (type === "page") { - return getNotionPage(objectData, currentID) - } else if (type === "database") { - return getNotionDatabase(objectData, currentID) - } else { - throw new Error(`Unknown object type: ${type}`) - } -} - -export function getNotionPage( - objectsData: NotionObjectsData, - currentID: string -) { - const pageResponse = objectsData.page[currentID] - return new NotionPage(pageResponse) -} -export function getNotionDatabase( - objectsData: NotionObjectsData, - currentID: string -) { - const databaseResponse = objectsData.database[currentID] - return new NotionDatabase(databaseResponse) -} - -export async function traverseTree( - parentContext: T, - objectsTreeNode: NotionObjectTreeNode, - objectsData: NotionObjectsData, - nodeAction: (notionObject: NotionDatabase | NotionPage, parentContext: T) => T -): Promise { - if (objectsTreeNode.object == "block") { - // TODO: Handle block objects - return - } - - const notionObject = getNotionObject( - objectsData, - objectsTreeNode.id, - objectsTreeNode.object - ) - - // Execute the node action and get the new context - const newContext = nodeAction(notionObject, parentContext) - - for (const childObject of objectsTreeNode.children) { - await traverseTree(newContext, childObject, objectsData, nodeAction) - } -} diff --git a/packages/notion-downloader/src/index.ts b/packages/notion-downloader/src/index.ts index 40e5687..291d421 100644 --- a/packages/notion-downloader/src/index.ts +++ b/packages/notion-downloader/src/index.ts @@ -4,7 +4,12 @@ export { fetchNotionObjectTree, } from "./fetch-notion-object-tree" -export { NotionObjectTreeNode, BlockObjectTreeNode } from "./notion-object-tree" +export { + NotionObjectTreeNode, + BlockObjectTreeNode, + NotionObjectTree, + NotionObjectResponse, +} from "./notion-object-tree" export { cacheOptionsSchema } from "./schema" export { diff --git a/packages/notion-downloader/src/notion-object-tree.ts b/packages/notion-downloader/src/notion-object-tree.ts index 42eba0e..ccaa5fc 100644 --- a/packages/notion-downloader/src/notion-object-tree.ts +++ b/packages/notion-downloader/src/notion-object-tree.ts @@ -70,6 +70,29 @@ export class NotionObjectTree { } } + // TODO: Consider doing the get operations starting from a specific node + getPages(): PageObjectResponse[] { + return Object.values(this.data.page) + } + + getDatabases(): DatabaseObjectResponse[] { + return Object.values(this.data.database) + } + + getBlocks(): BlockObjectResponse[] + getBlocks( + type: T + ): Extract[] + getBlocks(type?: string): BlockObjectResponse[] { + if (type) { + return Object.values(this.data.block).filter( + (block): block is Extract => + block.type === type + ) + } + return Object.values(this.data.block) + } + traverse( nodeAction: ( objectResponse: NotionObjectResponse, @@ -80,6 +103,9 @@ export class NotionObjectTree { startNode: NotionObjectTreeNode = this.tree ) { const objectResponse = this.data[startNode.object][startNode.id] + if (!objectResponse) { + throw new Error(`Object response not found for id: ${startNode.id}`) + } const newContext = nodeAction(objectResponse, parentContext, this) for (const child of startNode.children) { @@ -145,6 +171,10 @@ export class NotionObjectTree { removeObject(id: string) { const node = this.getNodeById(id) if (!node) return + // First let children remove themselves, and then remove up to the start node + for (const child of node.children) { + this.removeObject(child.id) + } // Delete from data delete this.data[node.object][id] // Delete from tree @@ -158,9 +188,6 @@ export class NotionObjectTree { } // Delete from mapping this.idToNodeMap.delete(id) - for (const child of node.children) { - this.removeObject(child.id) - } } }