From 7378631a17310513fe7fec555d573409df104a27 Mon Sep 17 00:00:00 2001 From: Srujan Gurram <52039218+Royal-lobster@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:37:10 +0530 Subject: [PATCH 1/4] Fix error when storing embeddings in localStorage Fixes #95 Update `getContextVectorStore` to use `IndexedDB` instead of `localStorage` for storing embeddings. * **src/lib/getMatchedContent.ts** - Import `getFromIndexedDB` and `saveToIndexedDB` from `useStorage.ts`. - Replace `localStorage` usage with `IndexedDB` functions for storing and retrieving embeddings. * **src/hooks/useStorage.ts** - Add `saveToIndexedDB` function to save data to `IndexedDB`. - Add `getFromIndexedDB` function to retrieve data from `IndexedDB`. --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/Royal-lobster/Syncia/issues/95?shareId=XXXX-XXXX-XXXX-XXXX). --- src/hooks/useStorage.ts | 66 ++++++++++++++++++++++++++++++++++++ src/lib/getMatchedContent.ts | 9 +++-- 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/src/hooks/useStorage.ts b/src/hooks/useStorage.ts index 0ef96c05..5990bff4 100644 --- a/src/hooks/useStorage.ts +++ b/src/hooks/useStorage.ts @@ -104,3 +104,69 @@ export async function setStorage( return false } } + +/** + * Function to save data to IndexedDB + */ +export const saveToIndexedDB = async (key: string, data: any) => { + return new Promise((resolve, reject) => { + const request = indexedDB.open('SynciaDB', 1) + + request.onupgradeneeded = (event) => { + const db = (event.target as IDBOpenDBRequest).result + db.createObjectStore('embeddings') + } + + request.onsuccess = (event) => { + const db = (event.target as IDBOpenDBRequest).result + const transaction = db.transaction('embeddings', 'readwrite') + const store = transaction.objectStore('embeddings') + store.put(data, key) + + transaction.oncomplete = () => { + resolve() + } + + transaction.onerror = (event) => { + reject(event) + } + } + + request.onerror = (event) => { + reject(event) + } + }) +} + +/** + * Function to retrieve data from IndexedDB + */ +export const getFromIndexedDB = async (key: string) => { + return new Promise((resolve, reject) => { + const request = indexedDB.open('SynciaDB', 1) + + request.onupgradeneeded = (event) => { + const db = (event.target as IDBOpenDBRequest).result + db.createObjectStore('embeddings') + } + + request.onsuccess = (event) => { + const db = (event.target as IDBOpenDBRequest).result + const transaction = db.transaction('embeddings', 'readonly') + const store = transaction.objectStore('embeddings') + const getRequest = store.get(key) + + getRequest.onsuccess = () => { + resolve(getRequest.result) + } + + getRequest.onerror = (event) => { + reject(event) + } + } + + request.onerror = (event) => { + reject(event) + } + }) +} diff --git a/src/lib/getMatchedContent.ts b/src/lib/getMatchedContent.ts index 10f37804..746a0e81 100644 --- a/src/lib/getMatchedContent.ts +++ b/src/lib/getMatchedContent.ts @@ -2,6 +2,7 @@ import { OpenAIEmbeddings } from '@langchain/openai' import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter' import { MemoryVectorStore } from 'langchain/vectorstores/memory' import { createSHA256Hash } from './createSHA256Hash' +import { getFromIndexedDB, saveToIndexedDB } from '../hooks/useStorage' /** * This function is responsible for getting the matched content @@ -21,7 +22,7 @@ export const getMatchedContent = async ( /** * This function is responsible for getting the context vector store - * from the context. It caches the vector store in the local storage + * from the context. It caches the vector store in the IndexedDB * for faster retrieval */ const getContextVectorStore = async ( @@ -36,9 +37,7 @@ const getContextVectorStore = async ( }, }) const hashKey = `SYNCIA_STORE_EMBEDDINGS_${await createSHA256Hash(context)}` - const memoryVectors: [] | null = JSON.parse( - localStorage.getItem(hashKey) || 'null', - ) + const memoryVectors: [] | null = await getFromIndexedDB(hashKey) if (!memoryVectors) { const textSplitter = new RecursiveCharacterTextSplitter({ @@ -46,7 +45,7 @@ const getContextVectorStore = async ( }) const docs = await textSplitter.createDocuments([context]) const store = await MemoryVectorStore.fromDocuments(docs, embeddings) - localStorage.setItem(hashKey, JSON.stringify(store.memoryVectors)) + await saveToIndexedDB(hashKey, store.memoryVectors) return store } From de5a74cd1a982be0980def4a5b722c07c5590365 Mon Sep 17 00:00:00 2001 From: Srujan Gurram <52039218+Royal-lobster@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:41:02 +0530 Subject: [PATCH 2/4] Create tasty-ravens-tickle.md --- .changeset/tasty-ravens-tickle.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/tasty-ravens-tickle.md diff --git a/.changeset/tasty-ravens-tickle.md b/.changeset/tasty-ravens-tickle.md new file mode 100644 index 00000000..24cd49e3 --- /dev/null +++ b/.changeset/tasty-ravens-tickle.md @@ -0,0 +1,5 @@ +--- +"syncia": patch +--- + +Fix error when storing embeddings in localStorage From 6ec2e69d9183dfb0e4e1966dd80732934a228ce8 Mon Sep 17 00:00:00 2001 From: Srujan Gurram <52039218+Royal-lobster@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:51:27 +0530 Subject: [PATCH 3/4] Update .changeset/tasty-ravens-tickle.md Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .changeset/tasty-ravens-tickle.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.changeset/tasty-ravens-tickle.md b/.changeset/tasty-ravens-tickle.md index 24cd49e3..d1848972 100644 --- a/.changeset/tasty-ravens-tickle.md +++ b/.changeset/tasty-ravens-tickle.md @@ -2,4 +2,12 @@ "syncia": patch --- -Fix error when storing embeddings in localStorage +Fix storage quota exceeded error for embeddings + +Previously, the application would fail when trying to store large embeddings in localStorage due to quota limitations. This patch: + +- Transitions from localStorage to IndexedDB for storing embeddings +- Implements new functions `saveToIndexedDB` and `getFromIndexedDB` +- Resolves the "QuotaExceededError" when storing large datasets + +This change improves the application's ability to handle larger embeddings without storage constraints. From e3e1e870c4ac4634198ca835dc7c43ab0e209309 Mon Sep 17 00:00:00 2001 From: Srujan Gurram Date: Tue, 5 Nov 2024 10:59:45 +0530 Subject: [PATCH 4/4] Add support for 'indexedDB' in useStorage --- src/hooks/useStorage.ts | 24 ++++++++++++++---------- src/lib/getMatchedContent.ts | 15 +++------------ 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/hooks/useStorage.ts b/src/hooks/useStorage.ts index 5990bff4..0e70dbce 100644 --- a/src/hooks/useStorage.ts +++ b/src/hooks/useStorage.ts @@ -7,7 +7,7 @@ import { useRef, } from 'react' -export type StorageArea = 'sync' | 'local' +export type StorageArea = 'sync' | 'local' | 'indexedDB' // custom hook to set chrome local/sync storage // should also set a listener on this specific key @@ -75,6 +75,9 @@ export async function readStorage( key: string, area: StorageArea = 'local', ): Promise { + if (area === 'indexedDB') { + return getFromIndexedDB(key) + } try { const result = await chrome.storage[area].get(key) return result?.[key] @@ -96,6 +99,10 @@ export async function setStorage( value: T, area: StorageArea = 'local', ): Promise { + if (area === 'indexedDB') { + await saveToIndexedDB(key, value) + return true + } try { await chrome.storage[area].set({ [key]: value }) return true @@ -105,10 +112,10 @@ export async function setStorage( } } -/** - * Function to save data to IndexedDB - */ -export const saveToIndexedDB = async (key: string, data: any) => { +export const saveToIndexedDB = async ( + key: string, + data: T, +): Promise => { return new Promise((resolve, reject) => { const request = indexedDB.open('SynciaDB', 1) @@ -138,11 +145,8 @@ export const saveToIndexedDB = async (key: string, data: any) => { }) } -/** - * Function to retrieve data from IndexedDB - */ -export const getFromIndexedDB = async (key: string) => { - return new Promise((resolve, reject) => { +export const getFromIndexedDB = async (key: string): Promise => { + return new Promise((resolve, reject) => { const request = indexedDB.open('SynciaDB', 1) request.onupgradeneeded = (event) => { diff --git a/src/lib/getMatchedContent.ts b/src/lib/getMatchedContent.ts index 746a0e81..81e86031 100644 --- a/src/lib/getMatchedContent.ts +++ b/src/lib/getMatchedContent.ts @@ -2,12 +2,8 @@ import { OpenAIEmbeddings } from '@langchain/openai' import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter' import { MemoryVectorStore } from 'langchain/vectorstores/memory' import { createSHA256Hash } from './createSHA256Hash' -import { getFromIndexedDB, saveToIndexedDB } from '../hooks/useStorage' +import { readStorage, setStorage } from '../hooks/useStorage' -/** - * This function is responsible for getting the matched content - * from the context and query - */ export const getMatchedContent = async ( query: string, context: string, @@ -20,11 +16,6 @@ export const getMatchedContent = async ( return relevantDocs.map((doc) => doc.pageContent).join('\n') } -/** - * This function is responsible for getting the context vector store - * from the context. It caches the vector store in the IndexedDB - * for faster retrieval - */ const getContextVectorStore = async ( context: string, apiKey: string, @@ -37,7 +28,7 @@ const getContextVectorStore = async ( }, }) const hashKey = `SYNCIA_STORE_EMBEDDINGS_${await createSHA256Hash(context)}` - const memoryVectors: [] | null = await getFromIndexedDB(hashKey) + const memoryVectors = await readStorage<[]>(hashKey, 'indexedDB') if (!memoryVectors) { const textSplitter = new RecursiveCharacterTextSplitter({ @@ -45,7 +36,7 @@ const getContextVectorStore = async ( }) const docs = await textSplitter.createDocuments([context]) const store = await MemoryVectorStore.fromDocuments(docs, embeddings) - await saveToIndexedDB(hashKey, store.memoryVectors) + await setStorage(hashKey, store.memoryVectors, 'indexedDB') return store }