From 30d6a0cfcdc19e7292a2369315c69b5ce0003e9c Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Thu, 21 Nov 2024 10:58:00 -0800 Subject: [PATCH] Refactor fetchText to use host.fetchText function (#886) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: migrate fetchText to host.fetchText function ✨ * feat: add resolveIssue option to GitHub parsing 🎉 * update tests * refactor: ♻️ restructure project & remove configs --- .../content/docs/getting-started/tutorial.md | 2 +- .../content/docs/guides/search-and-fetch.mdx | 6 +-- .../content/docs/reference/scripts/fetch.md | 16 ++++++-- packages/cli/src/run.ts | 5 ++- packages/core/src/fetch.ts | 38 +++++++++++++++++-- packages/core/src/github.ts | 7 +++- packages/core/src/promptcontext.ts | 3 ++ packages/core/src/types/prompt_template.d.ts | 34 ++++++++++++++++- packages/core/src/types/prompt_type.d.ts | 3 +- .../{fetch.genai.js => fetch.genai.mjs} | 9 ++++- packages/sample/genaisrc/lza_review.genai.js | 2 +- .../sample/genaisrc/style-tester.genai.mjs | 0 packages/vscode/tutorial.md | 2 +- slides/genaisrc/.gitattributes | 1 - slides/genaisrc/.gitignore | 4 -- slides/genaisrc/jsconfig.json | 17 --------- slides/genaisrc/tsconfig.json | 21 ---------- 17 files changed, 104 insertions(+), 66 deletions(-) rename packages/sample/genaisrc/{fetch.genai.js => fetch.genai.mjs} (50%) rename slides/genaisrc/style-tester.genai.js => packages/sample/genaisrc/style-tester.genai.mjs (100%) delete mode 100644 slides/genaisrc/.gitattributes delete mode 100644 slides/genaisrc/.gitignore delete mode 100644 slides/genaisrc/jsconfig.json delete mode 100644 slides/genaisrc/tsconfig.json diff --git a/docs/src/content/docs/getting-started/tutorial.md b/docs/src/content/docs/getting-started/tutorial.md index b04de6f672..44557882c0 100644 --- a/docs/src/content/docs/getting-started/tutorial.md +++ b/docs/src/content/docs/getting-started/tutorial.md @@ -201,7 +201,7 @@ defTool( "fetch", "Download text from a URL", { url: "https://..." }, - ({ url }) => fetchText(url) + ({ url }) => host.fetchText(url) ) $`Summarize https://raw.githubusercontent.com/microsoft/genaiscript/main/README.md in 1 sentence.` diff --git a/docs/src/content/docs/guides/search-and-fetch.mdx b/docs/src/content/docs/guides/search-and-fetch.mdx index cb9d553211..d723d8f644 100644 --- a/docs/src/content/docs/guides/search-and-fetch.mdx +++ b/docs/src/content/docs/guides/search-and-fetch.mdx @@ -30,16 +30,16 @@ You will need a [Bing Web Search API key](/genaiscript/reference/scripts/web-sea 3. Use the [`webSearch`](/genaiscript/reference/scripts/web-search/) function to search for information about the destination. If you don't have one, then you can search for the web pages manually and use the URLs directly - in the call to the `fetchText` function. + in the call to the `host.fetchText` function. ```js const parkinfo = await retrieval.webSearch("mt rainier things to do") ``` 4. `webSearch` returns a list of URLs. Use [`fetchText`](/genaiscript/reference/scripts/fetch/) to fetch the contents of the 1st URL. ```js - const parktext = await fetchText(parkinfo.webPages[0]) + const parktext = await host.fetchText(parkinfo.webPages[0]) ``` -5. `fetchText` returns a lot of formatting HTML tags, etc. +5. `host.fetchText` returns a lot of formatting HTML tags, etc. Use [`runPrompt`](/genaiscript/reference/scripts/inline-prompts/) to call the LLM to clean out the tags and just keep the text. ```js diff --git a/docs/src/content/docs/reference/scripts/fetch.md b/docs/src/content/docs/reference/scripts/fetch.md index 206ba259d2..e84e0fcf62 100644 --- a/docs/src/content/docs/reference/scripts/fetch.md +++ b/docs/src/content/docs/reference/scripts/fetch.md @@ -9,12 +9,20 @@ keywords: fetch API, fetchText, HTTP requests, scripts, API key The JavaScript `fetch` API is available; but we also provide a helper `fetchText` for issuing requests into a friendly format. -## `fetchText` +## `host.fetch` -Use `fetchText` to issue requests and download text from the internet. +The `host.fetch` function is a wrapper around the global `fetch` function which adds builtin proxy support and retry capabilities. + +```js +const response = await host.fetch("https://api.example.com", { retries: 3 }) +``` + +## `host.fetchText` + +Use `host.fetchText` to issue requests and download text from the internet. ```ts -const { text, file } = await fetchText("https://....") +const { text, file } = await host.fetchText("https://....") if (text) $`And also ${text}` def("FILE", file) @@ -23,7 +31,7 @@ def("FILE", file) fetchText will also resolve the contents of file in the current workspace if the url is a relative path. ```ts -const { file } = await fetchText("README.md") +const { file } = await host.fetchText("README.md") def("README", file) ``` diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index 8313919f25..ef8ef54a44 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -503,7 +503,10 @@ export async function runScript( let _ghInfo: GithubConnectionInfo = undefined const resolveGitHubInfo = async () => { if (!_ghInfo) - _ghInfo = await githubParseEnv(process.env, { issue: pullRequest }) + _ghInfo = await githubParseEnv(process.env, { + issue: pullRequest, + resolveIssue: true, + }) return _ghInfo } let adoInfo: AzureDevOpsEnv = undefined diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index c45aea975d..5ff5810e61 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -42,7 +42,13 @@ export async function createFetch( } = options || {} // We create a proxy based on Node.js environment variables. - const proxy = process.env.HTTPS_PROXY || process.env.HTTP_PROXY || process.env.https_proxy || process.env.http_proxy; + const proxy = + process.env.GENAISCRIPT_HTTPS_PROXY || + process.env.GENAISCRIPT_HTTP_PROXY || + process.env.HTTPS_PROXY || + process.env.HTTP_PROXY || + process.env.https_proxy || + process.env.http_proxy const agent = proxy ? new HttpsProxyAgent(proxy) : null // We enrich crossFetch with the proxy. @@ -89,6 +95,22 @@ export async function createFetch( return fetchRetry } +export async function fetch( + input: string | URL | globalThis.Request, + options?: FetchOptions & TraceOptions +): Promise { + const { retryOn, retries, retryDelay, maxDelay, trace, ...rest } = + options || {} + const f = await createFetch({ + retryOn, + retries, + retryDelay, + maxDelay, + trace, + }) + return f(input, rest) +} + /** * Fetches text content from a URL or file. * @@ -101,8 +123,10 @@ export async function createFetch( */ export async function fetchText( urlOrFile: string | WorkspaceFile, - fetchOptions?: FetchTextOptions + fetchOptions?: FetchTextOptions & TraceOptions ) { + const { retries, retryDelay, retryOn, maxDelay, trace, ...rest } = + fetchOptions || {} if (typeof urlOrFile === "string") { urlOrFile = { filename: urlOrFile, @@ -114,8 +138,14 @@ export async function fetchText( let status = 404 let text: string if (/^https?:\/\//i.test(url)) { - const fetch = await createFetch() - const resp = await fetch(url, fetchOptions) + const f = await createFetch({ + retries, + retryDelay, + retryOn, + maxDelay, + trace, + }) + const resp = await f(url, rest) ok = resp.ok status = resp.status if (ok) text = await resp.text() diff --git a/packages/core/src/github.ts b/packages/core/src/github.ts index 4a1b54de14..915f23627d 100644 --- a/packages/core/src/github.ts +++ b/packages/core/src/github.ts @@ -86,7 +86,9 @@ async function githubGetPullRequestNumber() { export async function githubParseEnv( env: Record, - options?: { issue?: number } & Partial> + options?: { issue?: number; resolveIssue?: boolean } & Partial< + Pick + > ): Promise { const res = githubFromEnv(env) try { @@ -110,8 +112,9 @@ export async function githubParseEnv( res.repo = repo res.owner = owner.login res.repository = res.owner + "/" + res.repo - if (isNaN(res.issue)) res.issue = await githubGetPullRequestNumber() } + if (isNaN(res.issue) && options?.resolveIssue) + res.issue = await githubGetPullRequestNumber() } catch (e) {} return Object.freeze(res) } diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index 4634391e24..8affe362c6 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -27,6 +27,7 @@ import { HTMLEscape } from "./html" import { hash } from "./crypto" import { resolveModelConnectionInfo } from "./models" import { DOCS_WEB_SEARCH_URL } from "./constants" +import { fetch, fetchText } from "./fetch" /** * Creates a prompt context for the given project, variables, trace, options, and model. @@ -212,6 +213,8 @@ export async function createPromptContext( // Define the host for executing commands, browsing, and other operations const promptHost: PromptHost = Object.freeze({ + fetch: (url, options) => fetch(url, {...(options || {}), trace }), + fetchText: (url, options) => fetchText(url, {...(options || {}), trace }), resolveLanguageModel: async (modelId) => { const { configuration } = await resolveModelConnectionInfo( { model: modelId }, diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 06076f590e..de71bf8549 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -2237,8 +2237,6 @@ interface Retrieval { ): Promise } -type FetchTextOptions = Omit - interface DataFilter { /** * The keys to select from the object. @@ -3242,11 +3240,43 @@ interface ContentSafetyHost { contentSafety(id?: ContentSafetyProvider): Promise } +type FetchOptions = RequestInit & { + retryOn?: number[] // HTTP status codes to retry on + retries?: number // Number of retry attempts + retryDelay?: number // Initial delay between retries + maxDelay?: number // Maximum delay between retries +} + +type FetchTextOptions = Omit + interface PromptHost extends ShellHost, UserInterfaceHost, LanguageModelHost, ContentSafetyHost { + /** + * A fetch wrapper with proxy, retry and timeout handling. + */ + fetch( + input: string | URL | globalThis.Request, + init?: FetchOptions + ): Promise + + /** + * A function that fetches text from a URL or a file + * @param url + * @param options + */ + fetchText( + url: string | WorkspaceFile, + options?: FetchTextOptions + ): Promise<{ + ok: boolean + status: number + text?: string + file?: WorkspaceFile + }> + /** * Opens a in-memory key-value cache for the given cache name. Entries are dropped when the cache grows too large. * @param cacheName diff --git a/packages/core/src/types/prompt_type.d.ts b/packages/core/src/types/prompt_type.d.ts index 8a9fc2e566..cc9d44b14e 100644 --- a/packages/core/src/types/prompt_type.d.ts +++ b/packages/core/src/types/prompt_type.d.ts @@ -222,8 +222,7 @@ declare var git: Git declare var tokenizers: Tokenizers /** - * Fetches a given URL and returns the response. - * @param url + * @deprecated use `host.fetchText` instead */ declare function fetchText( url: string | WorkspaceFile, diff --git a/packages/sample/genaisrc/fetch.genai.js b/packages/sample/genaisrc/fetch.genai.mjs similarity index 50% rename from packages/sample/genaisrc/fetch.genai.js rename to packages/sample/genaisrc/fetch.genai.mjs index ded696b053..926621d7c1 100644 --- a/packages/sample/genaisrc/fetch.genai.js +++ b/packages/sample/genaisrc/fetch.genai.mjs @@ -8,12 +8,17 @@ script({ /** * @type {any} */ -const res = await fetch( +const res = await host.fetch( "https://raw.githubusercontent.com/microsoft/genaiscript/main/package.json", { method: "GET" } ) const pkg = await res.json() +const { file: readme } = await host.fetchText( + "https://raw.githubusercontent.com/microsoft/genaiscript/refs/heads/main/README.md" +) + def("PACKAGE", YAML.stringify(pkg)) +def("README", readme) -$`Explain the purpose of the product described in PACKAGE. Mention its name.` +$`Explain the purpose of the product described in PACKAG and README. Mention its name.` diff --git a/packages/sample/genaisrc/lza_review.genai.js b/packages/sample/genaisrc/lza_review.genai.js index 31ecbf05ea..c2f1f28d7f 100644 --- a/packages/sample/genaisrc/lza_review.genai.js +++ b/packages/sample/genaisrc/lza_review.genai.js @@ -30,7 +30,7 @@ for (const link of biceps) { const [, , p] = dependency if (p.includes("shared")) continue // ignore those shared files const dp = path.join(dirname, p) - const resp = await fetchText(dp) + const resp = await host.fetchText(dp) def("DEPS", resp.file, { lineNumbers: true }) } } diff --git a/slides/genaisrc/style-tester.genai.js b/packages/sample/genaisrc/style-tester.genai.mjs similarity index 100% rename from slides/genaisrc/style-tester.genai.js rename to packages/sample/genaisrc/style-tester.genai.mjs diff --git a/packages/vscode/tutorial.md b/packages/vscode/tutorial.md index b04de6f672..44557882c0 100644 --- a/packages/vscode/tutorial.md +++ b/packages/vscode/tutorial.md @@ -201,7 +201,7 @@ defTool( "fetch", "Download text from a URL", { url: "https://..." }, - ({ url }) => fetchText(url) + ({ url }) => host.fetchText(url) ) $`Summarize https://raw.githubusercontent.com/microsoft/genaiscript/main/README.md in 1 sentence.` diff --git a/slides/genaisrc/.gitattributes b/slides/genaisrc/.gitattributes deleted file mode 100644 index b89350c92b..0000000000 --- a/slides/genaisrc/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -genaiscript.d.ts -diff merge=ours linguist-generated \ No newline at end of file diff --git a/slides/genaisrc/.gitignore b/slides/genaisrc/.gitignore deleted file mode 100644 index 6641d96c08..0000000000 --- a/slides/genaisrc/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# auto-generated -genaiscript.d.ts -tsconfig.json -jsconfig.json \ No newline at end of file diff --git a/slides/genaisrc/jsconfig.json b/slides/genaisrc/jsconfig.json deleted file mode 100644 index 4c21c58ce6..0000000000 --- a/slides/genaisrc/jsconfig.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "compilerOptions": { - "lib": [ - "ES2022" - ], - "target": "ES2022", - "module": "ES2022", - "moduleDetection": "force", - "checkJs": true, - "allowJs": true, - "skipLibCheck": true - }, - "include": [ - "*.js", - "./genaiscript.d.ts" - ] -} \ No newline at end of file diff --git a/slides/genaisrc/tsconfig.json b/slides/genaisrc/tsconfig.json deleted file mode 100644 index 510eefe8f9..0000000000 --- a/slides/genaisrc/tsconfig.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "compilerOptions": { - "lib": [ - "ES2022" - ], - "target": "ES2023", - "module": "NodeNext", - "moduleDetection": "force", - "moduleResolution": "nodenext", - "checkJs": true, - "allowJs": true, - "skipLibCheck": true, - "noEmit": true, - "allowImportingTsExtensions": true - }, - "include": [ - "*.mjs", - "*.mts", - "./genaiscript.d.ts" - ] -} \ No newline at end of file