From 710b527e938103ff515c4913d542359b2a58953a Mon Sep 17 00:00:00 2001 From: Eric Delord Date: Tue, 31 Mar 2020 13:20:30 +0200 Subject: [PATCH] api change --- README.md | 43 ++++++++++-------------------------- package-lock.json | 56 +++++++++++++++++++++++++++++++++-------------- package.json | 2 +- src/index.html | 10 --------- src/index.js | 38 ++++++++++++++++++++++---------- webpack.config.js | 2 +- 6 files changed, 78 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index 30edac3..3a1ce75 100644 --- a/README.md +++ b/README.md @@ -24,33 +24,9 @@ npm run build ## Running from Scriptable -Copy `dist/scriptable-pdfjs.html` into Scriptable Document Folder. +Copy the file `dist/scriptable-pdfjs.html` into Scriptable Document Folder. -```javascript -const fm = FileManager.iCloud(); -const wv = new WebView(); -const htmlFileUrl = fm.joinPath(fm.documentsDirectory(), "scriptable-pdfjs.html"); -await wv.loadFile(htmlFileUrl); - -/* - In the WebView your javascript will have access to the pdfjs global var. - pdfjs.pdfjsLib is the pdfjs module - pdfjs.getPDFText is a convenience wrapper - You have to pass the pdf file as a base64 string -*/ - -let javascript = 'pdfjs.getPDFText('; -javascript += '"' + fm.read(pdfFilePath).toBase64String() + '"'; -javascript += ');' - -let result = ""; -try { - result = await wv.evaluateJavaScript(javascript, true); -} catch (e) { - //... -} -//... -``` +and use [scriptable-pdfjs-demo](https://gist.github.com/flyingeek/70f5e09887f17dbfcd11a4b620a68b28) to play. ## Running from Shortcuts app @@ -71,27 +47,30 @@ try { } // use the same bookmark name as in the action above const filePath = fm.bookmarkedPath("ShortcutPDF"); -await fm. downloadFileFromiCloud(filePath); // works also for local file // We execute pdfjs in a WebView const wv = new WebView(); const htmlFileUrl = fm.joinPath(fm.documentsDirectory(), "scriptable-pdfjs.html"); +await fm.downloadFileFromiCloud(htmlFileUrl); await wv.loadFile(htmlFileUrl); -let javascript = 'pdfjs.getPDFText('; +let javascript = 'pdfjs.getText('; javascript += '"' + fm.read(filePath).toBase64String() + '"'; -javascript += ', (pageText) => pageText.includes("(Long copy #1)")'; -javascript += ', true'; +//javascript += ', (pageText) => pageText.includes("(Long copy #1)")'; +//javascript += ', true'; javascript += ');' - +let result = ""; try { result = await wv.evaluateJavaScript(javascript, true); } catch (e) { result = ""; } -Script.setShortcutOutput(result); +return result; ``` For some reasons... (bug in shortcuts or scriptable ?) You cannot convert the PDF to Base64 and pass it as an argument to the script. You have to use the bookmark trick and make the base64 conversion in Scriptable. + +For file larger than 2.5 Mo, you can not run this script inline and you +have to modify the script to get results by using the clipboard. diff --git a/package-lock.json b/package-lock.json index 203b592..3eef5aa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2431,7 +2431,9 @@ } }, "minimist": { - "version": "^1.2.5" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" }, "minipass": { "version": "2.9.0", @@ -2462,7 +2464,11 @@ }, "dependencies": { "minimist": { - "version": "^1.2.5" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true, + "optional": true } } }, @@ -2612,12 +2618,16 @@ "requires": { "deep-extend": "^0.6.0", "ini": "~1.3.0", - "minimist": "^1.2.5", + "minimist": "^1.2.0", "strip-json-comments": "~2.0.1" }, "dependencies": { "minimist": { - "version": "^1.2.5" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true, + "optional": true } } }, @@ -3559,7 +3569,10 @@ }, "dependencies": { "minimist": { - "version": "^1.2.5" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true } } }, @@ -3609,11 +3622,13 @@ "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", "requires": { - "minimist": "^1.2.5" + "minimist": "^1.2.0" }, "dependencies": { "minimist": { - "version": "^1.2.5" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" } } } @@ -3800,6 +3815,12 @@ "brace-expansion": "^1.1.7" } }, + "minimist": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", + "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=", + "dev": true + }, "mississippi": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/mississippi/-/mississippi-3.0.0.tgz", @@ -3845,12 +3866,7 @@ "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", "dev": true, "requires": { - "minimist": "^1.2.5" - }, - "dependencies": { - "minimist": { - "version": "^1.2.5" - } + "minimist": "0.0.8" } }, "move-concurrently": { @@ -5836,7 +5852,10 @@ }, "dependencies": { "minimist": { - "version": "^1.2.5" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true } } }, @@ -5894,11 +5913,14 @@ "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", "dev": true, "requires": { - "minimist": "^1.2.5" + "minimist": "^1.2.0" }, "dependencies": { "minimist": { - "version": "^1.2.5" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true } } }, @@ -6066,4 +6088,4 @@ } } } -} \ No newline at end of file +} diff --git a/package.json b/package.json index 4521711..b45e15b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scriptable-pdfjs", - "version": "1.0.0", + "version": "1.0.0-beta.1", "description": "converts a pdf to text in ios app scriptable app.", "main": "index.js", "scripts": { diff --git a/src/index.html b/src/index.html index e5221bc..e69de29 100644 --- a/src/index.html +++ b/src/index.html @@ -1,10 +0,0 @@ - - - - - scriptable-pdfjs - - - - - diff --git a/src/index.js b/src/index.js index 56d04f2..4541d44 100644 --- a/src/index.js +++ b/src/index.js @@ -13,13 +13,14 @@ if (typeof window !== 'undefined' && 'Worker' in window) { * returns the text of a page * @param pdf:pdfjs.Document * @param pageNo:number + * @param separator:string="" * @returns {Promise} */ -async function getPageText(pdf, pageNo) { +async function getPageText(pdf, pageNo, separator="") { // noinspection JSUnresolvedFunction const page = await pdf.getPage(pageNo); const tokenizedText = await page.getTextContent(); - return tokenizedText.items.map(token => token.str).join(""); + return tokenizedText.items.map(token => token.str).join(separator); } /** @@ -42,15 +43,17 @@ async function getPageText(pdf, pageNo) { * @param source: pdfjs.Document - the pdf document * @param matchFn:[function] - the match function * @param breakAfter:[boolean=false] - if true, stop the search after failure + * @param pageSeparator:string="" - the page separator + * @param tokenSeparator:string="" - the token separator * @returns {Promise} */ -async function extractPDFText(source, matchFn, breakAfter=false){ +export async function extractText(source, matchFn, breakAfter=false, pageSeparator="", tokenSeparator=""){ const pdfPages = []; let matchingPagesCount = 0; const pdf = await pdfjs.getDocument(source).promise; const maxPages = pdf.numPages; for (let pageNo = 1; pageNo <= maxPages; pageNo += 1) { - const pageText = await getPageText(pdf, pageNo); + const pageText = await getPageText(pdf, pageNo, tokenSeparator); if (matchFn) { if (matchFn(pageText, pageNo, pdf)) { pdfPages.push(pageText); @@ -62,23 +65,34 @@ async function extractPDFText(source, matchFn, breakAfter=false){ pdfPages.push(pageText); } } - return pdfPages.join(""); + return pdfPages.join(pageSeparator); } /** - * a wrapper to the extractPDFText with completion and error handling - * @param base64string:string - the pdf in base64 string format + * a convenience wrapper to extractText using Scriptable completion fn + * @param source:[string|object] - pdf source accepted by pdfjs.getDocument * @param matchFn:[function] - optional text matching function * @param breakAfter:[boolean=false] + * @param pageSeparator:string="" - the page separator + * @param tokenSeparator:string="" - the token separator */ -export function getPDFText(base64string, matchFn, breakAfter=false ) { - extractPDFText( - {data: atob(base64string)}, - matchFn, - breakAfter).then((text) => { +export function getText(source, matchFn, breakAfter=false, pageSeparator="", tokenSeparator = "" ) { + extractText(source, matchFn, breakAfter, pageSeparator, tokenSeparator) + .then((text) => { completion(text) }, (error) => { throw Error(error); }); } +/** + * a wrapper for getText when you need to use base64 string as source + * @param base64string:string - the pdf in base64 string format + * @param matchFn:[function] - optional text matching function + * @param breakAfter:[boolean=false] + * @param pageSeparator:string="" - the page separator + * @param tokenSeparator:string="" - the token separator + */ +export function getTextFromBase64String(base64string, matchFn, breakAfter=false, pageSeparator="", tokenSeparator = "") { + getText({data: atob(base64string)}, matchFn, breakAfter, pageSeparator, tokenSeparator); +} diff --git a/webpack.config.js b/webpack.config.js index f859f99..4c9b608 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -30,7 +30,7 @@ const config = { inlineSource: '.(js|css)$', filename: `${infos.name}.html`, }), - new HtmlWebpackInlineSourcePlugin(HtmlWebpackPlugin), + new HtmlWebpackInlineSourcePlugin(HtmlWebpackPlugin) ] };