Skip to content

Commit efae761

Browse files
committed
Add more page content
Signed-off-by: Daishan Peng <[email protected]>
1 parent 9d2ea2a commit efae761

File tree

3 files changed

+209
-18
lines changed

3 files changed

+209
-18
lines changed

src/index.ts

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ import { Client } from "@notionhq/client";
22
import dotenv from "dotenv";
33
import { writeFile, mkdir } from "fs/promises";
44
import path from "path";
5-
import { SearchResponse } from "@notionhq/client/build/src/api-endpoints";
5+
import { PageObjectResponse, SearchResponse } from "@notionhq/client/build/src/api-endpoints";
6+
import { getPageContent } from "./page";
7+
import * as fs from "node:fs";
68

79
dotenv.config();
810

@@ -12,18 +14,25 @@ async function main() {
1214
});
1315

1416
// Function to write a page to a file
15-
async function writePageToFile(page: any, directory: string) {
16-
const pageId = page.id.replace(/-/g, '');
17-
const filePath = path.join(directory, `${pageId}.data`);
18-
await writeFile(filePath, JSON.stringify(page, null, 2));
19-
console.log(`Wrote page ${pageId} to ${filePath}`);
17+
async function writePageToFile(page: PageObjectResponse, directory: string) {
18+
const pageId = page.id;
19+
const pageContent = await getPageContent(notion, pageId);
20+
const fileDir = path.join(directory, pageId.toString());
21+
await mkdir(fileDir, { recursive: true });
22+
let title = ((page.properties?.title ?? page.properties?.Name) as any)?.title[0]?.plain_text?.trim().replaceAll(/\//g, "-");
23+
if (!title) {
24+
title = pageId.toString();
25+
}
26+
const filePath = path.join(fileDir, title + ".md");
27+
fs.writeFileSync(filePath, pageContent, "utf8");
2028
}
2129

2230
// Function to fetch all pages
2331
async function fetchAllPages() {
2432
let pages: any[] = [];
2533
let cursor: string | undefined = undefined;
2634

35+
2736
while (true) {
2837
const response: SearchResponse = await notion.search({
2938
filter: {
@@ -47,15 +56,22 @@ async function main() {
4756

4857
// Fetch all pages
4958
const pages = await fetchAllPages();
59+
let metadata: Map<string, {
60+
url: string;
61+
}> = new Map();
5062

51-
// Define the output directory
5263
const outputDir = path.join(process.env.WORKSPACE_DIR!!, 'knowledge', 'integrations', 'notion');
53-
await mkdir(outputDir, { recursive: true }); // Ensure the directory exists
64+
await mkdir(outputDir, { recursive: true });
65+
66+
for (const page of pages) {
67+
await writePageToFile(page, outputDir);
68+
metadata.set(page.id, {
69+
url: page.url,
70+
})
71+
}
5472

55-
// Write all pages to files
56-
await Promise.all(
57-
pages.map((page) => writePageToFile(page, outputDir))
58-
);
73+
const metadataPath = path.join(outputDir, 'metadata.json');
74+
await writeFile(metadataPath, JSON.stringify(Object.fromEntries(metadata)), 'utf8');
5975

6076
console.log(`Finished writing ${pages.length} pages to ${outputDir}`);
6177
}

src/page.ts

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import { Client } from "@notionhq/client";
2+
import { BlockObjectResponse } from "@notionhq/client/build/src/api-endpoints";
3+
4+
5+
export async function getPageContent(client: Client, id: string, indentation = 0): Promise<string> {
6+
const blocks = await client.blocks.children.list({block_id: id})
7+
let result: string = '';
8+
for (let b of blocks.results) {
9+
let block = b as BlockObjectResponse;
10+
// Tables are complicated, so we handle them completely separately
11+
if (block.type === "table") {
12+
result += await printTable(client, b)
13+
continue
14+
}
15+
16+
result += await printBlock(client, b as BlockObjectResponse, indentation)
17+
if (block.has_children && block.type !== "child_page" && block.type !== "synced_block") {
18+
result += await getPageContent(client, b.id, indentation + 2)
19+
}
20+
}
21+
return result
22+
}
23+
24+
async function printBlock(client: Client, b: BlockObjectResponse, indentation: number): Promise<string> {
25+
let result: string = ""
26+
if (indentation > 0) {
27+
result += " ".repeat(indentation)
28+
}
29+
switch (b.type) {
30+
case "bookmark":
31+
if (b.bookmark.caption !== null && richTextArrayToString(b.bookmark.caption) !== "") {
32+
result += `Bookmark: ${b.bookmark.url} (${richTextArrayToString(b.bookmark.caption)})`
33+
} else {
34+
result += `Bookmark: ${b.bookmark.url}`
35+
}
36+
break
37+
case "bulleted_list_item":
38+
result += `- ${richTextArrayToString(b.bulleted_list_item.rich_text)}`
39+
break
40+
case "callout":
41+
result += `> ${richTextArrayToString(b.callout.rich_text)}`
42+
break
43+
case "child_database":
44+
result += `Child Database: ${b.child_database.title}`
45+
break
46+
case "child_page":
47+
result += `Child Page: ${b.child_page.title}`
48+
break
49+
case "code":
50+
if (b.code.language !== null) {
51+
result += "```" + b.code.language + "\n"
52+
} else {
53+
result += "```\n"
54+
}
55+
result += richTextArrayToString(b.code.rich_text)
56+
result += "\n```"
57+
if (b.code.caption !== null && richTextArrayToString(b.code.caption) !== "") {
58+
result += `\n(${richTextArrayToString(b.code.caption)})`
59+
}
60+
break
61+
case "divider":
62+
result += "-------------------------------------"
63+
break
64+
case "embed":
65+
result += `Embed: ${b.embed.url}`
66+
break
67+
case "equation":
68+
result += `Equation: ${b.equation.expression}`
69+
break
70+
case "file":
71+
result += fileToString("File", b.file)
72+
break
73+
case "heading_1":
74+
result += `# ${richTextArrayToString(b.heading_1.rich_text)}`
75+
break
76+
case "heading_2":
77+
result += `## ${richTextArrayToString(b.heading_2.rich_text)}`
78+
break
79+
case "heading_3":
80+
result += `### ${richTextArrayToString(b.heading_3.rich_text)}`
81+
break
82+
case "image":
83+
result += fileToString("Image", b.image)
84+
break
85+
case "link_preview":
86+
result += b.link_preview.url
87+
break
88+
case "numbered_list_item":
89+
result += `1. ${richTextArrayToString(b.numbered_list_item.rich_text)}`
90+
break
91+
case "paragraph":
92+
result += richTextArrayToString(b.paragraph.rich_text)
93+
break
94+
case "pdf":
95+
result += fileToString("PDF", b.pdf)
96+
break
97+
case "quote":
98+
result += "\"\"\"\n"
99+
result += richTextArrayToString(b.quote.rich_text)
100+
result += "\n\"\"\""
101+
break
102+
case "synced_block":
103+
if (b.synced_block.synced_from !== null) {
104+
await getPageContent(client, b.synced_block.synced_from.block_id, indentation)
105+
}
106+
break
107+
case "to_do":
108+
if (b.to_do.checked) {
109+
result += `[x] ${richTextArrayToString(b.to_do.rich_text)}`
110+
} else {
111+
result += `[ ] ${richTextArrayToString(b.to_do.rich_text)}`
112+
}
113+
break
114+
case "toggle":
115+
result += `> ${richTextArrayToString(b.toggle.rich_text)}`
116+
break
117+
case "video":
118+
result += fileToString("Video", b.video)
119+
break
120+
}
121+
return result.replace("\n", "\n" + " ".repeat(indentation))
122+
}
123+
124+
export function richTextArrayToString(richTextArray: any[]) {
125+
let result = ""
126+
for (let r of richTextArray) {
127+
result += r.plain_text + " "
128+
}
129+
return result
130+
}
131+
132+
function fileToString(prefix: any, file: any) {
133+
let result = ""
134+
if (file.type === "file") {
135+
result = `${prefix}: ${file.file.url} (expires ${file.file.expiry_time})`
136+
} else if (file.type === "external") {
137+
result = `External ${prefix}: ${file.external.url}`
138+
}
139+
if (file.caption !== null && richTextArrayToString(file.caption) !== "") {
140+
result += ` (${richTextArrayToString(file.caption)})`
141+
}
142+
return result
143+
}
144+
145+
async function printTable(client: Client, table: any) {
146+
const children = await client.blocks.children.list({block_id: table.id})
147+
if (table.table.has_column_header && children.results.length > 0) {
148+
printTableRow((children.results[0] as any).table_row, table.table.has_row_header, true)
149+
for (let i = 1; i < children.results.length; i++) {
150+
printTableRow((children.results[i] as any).table_row, table.table.has_row_header, false)
151+
}
152+
} else {
153+
for (let r of children.results) {
154+
printTableRow((r as any).table_row, table.table.has_row_header, false)
155+
}
156+
}
157+
}
158+
159+
function printTableRow(row: any, boldFirst: any, boldAll: any): string {
160+
let result = "|"
161+
if (boldAll) {
162+
for (let c of row.cells) {
163+
result += ` **${richTextArrayToString(c)}** |`
164+
}
165+
let len = result.length
166+
result += "\n|" + "-".repeat(len - 2) + "|"
167+
} else if (boldFirst && row.cells.length > 0) {
168+
result += ` **${richTextArrayToString(row.cells[0])}** |`
169+
for (let i = 1; i < row.cells.length; i++) {
170+
result += ` ${richTextArrayToString(row.cells[i])} |`
171+
}
172+
} else {
173+
for (let c of row.cells) {
174+
result += ` ${richTextArrayToString(c)} |`
175+
}
176+
}
177+
return result
178+
}

tool.gpt

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
Name: Sync Notion Pages
22
Description: Provides access to the Notion API (read-only)
3-
Credential: github.com/gptscript-ai/gateway-oauth2 as notion.read with NOTION_TOKEN as env and notion as integration
4-
Context: syncPages
3+
Credential: github.com/gptscript-ai/gateway-oauth2 as notion.sync-file with NOTION_TOKEN as env and notion as integration
4+
5+
#!/usr/bin/env npm --prefix ${GPTSCRIPT_TOOL_DIR} run start
56

6-
---
7-
Name: syncPages
8-
Description: Syncs Notion Pages
97

10-
#!/usr/bin/env npm run start

0 commit comments

Comments
 (0)