Skip to content

Commit

Permalink
fix: get internal links working
Browse files Browse the repository at this point in the history
  • Loading branch information
hatton committed Aug 9, 2022
1 parent c314bf8 commit bb31930
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 26 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"notion-download": "node dist/index.js",
"cmdhelp": "ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts",
"// test out with a private sample notion db": "",
"pull-test": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
"pull-test": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
"// test with a semi-stable/public site:": "",
"pull-sample": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --locales en,es,fr,de --log-level verbose",
"pull-sample-with-paths": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --img-output-path ./sample_img"
Expand Down
2 changes: 1 addition & 1 deletion src/HierarchicalNamedLayoutStrategy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export class HierarchicalNamedLayoutStrategy extends LayoutStrategy {
context: string,
levelLabel: string
): string {
const path = context + "/" + sanitize(levelLabel);
const path = context + "/" + sanitize(levelLabel).replaceAll(" ", "-");

//console.log("Creating level " + path);
fs.mkdirSync(dirRoot + "/" + path, { recursive: true });
Expand Down
15 changes: 9 additions & 6 deletions src/NotionPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,15 @@ export class NotionPage {
}

public matchesLinkId(id: string): boolean {
return (
const match =
id === this.pageId || // from a link_to_page.pageId, which still has the dashes
id === this.pageId.replace(/-/g, "")
); // from inline links, which are lacking the dashes
id === this.pageId.replaceAll("-", ""); // from inline links, which are lacking the dashes

logDebug(
`matchedLinkId`,
`comparing pageId:${this.pageId} to id ${id} --> ${match.toString()}`
);
return match;
}

public get type(): PageType {
Expand Down Expand Up @@ -98,9 +103,7 @@ export class NotionPage {
public get slug(): string {
const explicitSlug = this.getPlainTextProperty("Slug", "");
if (explicitSlug) return explicitSlug;
return encodeURIComponent(
this.nameOrTitle.toLowerCase().replaceAll(" ", "-")
)
return encodeURIComponent(this.nameOrTitle.replaceAll(" ", "-"))
.replaceAll("%3A", "-")
.replaceAll("--", "-");
}
Expand Down
4 changes: 0 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ program
"-i, --img-output-path <string>",
"Path to directory where images will be stored. If this is not included, images will be placed in the same directory as the document that uses them, which then allows for localization of screenshots."
)
// .option(
// "-l, --internal-link-prefix <string>",
// "when converting a link from one page to another, prefix the with this path instead of the default, which is rooted at the markdown-output-path."
// )
.option(
"-p, --img-prefix-in-markdown <string>",
"When referencing an image from markdown, prefix with this path instead of the full img-output-path. Should be used only in conjunction with --img-output-path."
Expand Down
51 changes: 37 additions & 14 deletions src/links.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import { LayoutStrategy } from "./LayoutStrategy";
import { verbose, warning } from "./log";
import { error, verbose, warning } from "./log";
import { NotionPage } from "./NotionPage";

export function convertInternalLinks(
markdown: string,
pages: NotionPage[],
layoutStrategy: LayoutStrategy
): string {
//console.log(JSON.stringify(pages, null, 2));

return transformLinks(markdown, (url: string) => {
const convertHref = (url: string) => {
const p = pages.find(p => {
return p.matchesLinkId(url);
});
Expand All @@ -25,8 +23,29 @@ export function convertInternalLinks(
);

return url;
});
};
const convertLinkText = (text: string, url: string) => {
// In Notion, if you just add a link to a page without linking it to any text, then in Notion
// you see the name of the page as the text of the link. But when Notion gives us that same
// link, it uses "link_to_page" as the text. So we have to look up the name of the page in
// order to fix that.
if (text !== "link_to_page") {
return text;
}

const p = pages.find(p => {
return p.matchesLinkId(url);
});
if (p) {
return p.nameOrTitle;
} else {
error(`Encountered a link to page ${url} but could not find that page.`);
return "Problem Link";
}
};
return transformLinks(markdown, convertHref, convertLinkText);
}

// function convertInternalLinks(
// blocks: (
// | ListBlockChildrenResponse
Expand All @@ -48,27 +67,31 @@ export function convertInternalLinks(
// });
// }

function transformLinks(input: string, transform: (url: string) => string) {
function transformLinks(
pageMarkdown: string,
convertHref: (url: string) => string,
convertLinkText: (text: string, url: string) => string
) {
// Note: from notion (or notion-md?) we get slightly different hrefs depending on whether the links is "inline"
// (has some other text that's been turned into a link) or "raw".
// Raw links come in without a leading slash, e.g. [link_to_page](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
// Inline links come in with a leading slash, e.g. [pointer to the introduction](/4a6de8c0b90b444b8a7bd534d6ec71a4)
const linkRegExp = /\[([^\]]+)?\]\(\/?([^),^/]+)\)/g;
let output = input;
let output = pageMarkdown;
let match;

// The key to understanding this while is that linkRegExp actually has state, and
// it gives you a new one each time. https://stackoverflow.com/a/1520853/723299
verbose(`transformLinks ${input}`);
while ((match = linkRegExp.exec(input)) !== null) {
verbose(`transformLinks ${pageMarkdown}`);
while ((match = linkRegExp.exec(pageMarkdown)) !== null) {
const string = match[0];
const text = match[1] || "";
const url = match[2];

const replacement = transform(url);
const hrefFromNotion = match[2];
const text = convertLinkText(match[1] || "", hrefFromNotion);
const hrefForDocusaurus = convertHref(hrefFromNotion);

if (replacement) {
output = output.replace(string, `[${text}](${replacement})`);
if (hrefForDocusaurus) {
output = output.replace(string, `[${text}](${hrefForDocusaurus})`);
} else {
verbose(`Maybe problem with link ${JSON.stringify(match)}`);
}
Expand Down

0 comments on commit bb31930

Please sign in to comment.