diff --git a/.changeset/tasty-ravens-tickle.md b/.changeset/tasty-ravens-tickle.md new file mode 100644 index 00000000..d1848972 --- /dev/null +++ b/.changeset/tasty-ravens-tickle.md @@ -0,0 +1,13 @@ +--- +"syncia": patch +--- + +Fix storage quota exceeded error for embeddings + +Previously, the application would fail when trying to store large embeddings in localStorage due to quota limitations. This patch: + +- Transitions from localStorage to IndexedDB for storing embeddings +- Implements new functions `saveToIndexedDB` and `getFromIndexedDB` +- Resolves the "QuotaExceededError" when storing large datasets + +This change improves the application's ability to handle larger embeddings without storage constraints. diff --git a/src/hooks/useStorage.ts b/src/hooks/useStorage.ts index 0ef96c05..0e70dbce 100644 --- a/src/hooks/useStorage.ts +++ b/src/hooks/useStorage.ts @@ -7,7 +7,7 @@ import { useRef, } from 'react' -export type StorageArea = 'sync' | 'local' +export type StorageArea = 'sync' | 'local' | 'indexedDB' // custom hook to set chrome local/sync storage // should also set a listener on this specific key @@ -75,6 +75,9 @@ export async function readStorage( key: string, area: StorageArea = 'local', ): Promise { + if (area === 'indexedDB') { + return getFromIndexedDB(key) + } try { const result = await chrome.storage[area].get(key) return result?.[key] @@ -96,6 +99,10 @@ export async function setStorage( value: T, area: StorageArea = 'local', ): Promise { + if (area === 'indexedDB') { + await saveToIndexedDB(key, value) + return true + } try { await chrome.storage[area].set({ [key]: value }) return true @@ -104,3 +111,66 @@ export async function setStorage( return false } } + +export const saveToIndexedDB = async ( + key: string, + data: T, +): Promise => { + return new Promise((resolve, reject) => { + const request = indexedDB.open('SynciaDB', 1) + + request.onupgradeneeded = (event) => { + const db = (event.target as IDBOpenDBRequest).result + db.createObjectStore('embeddings') + } + + request.onsuccess = (event) => { + const db = (event.target as IDBOpenDBRequest).result + const transaction = db.transaction('embeddings', 'readwrite') + const store = transaction.objectStore('embeddings') + store.put(data, key) + + transaction.oncomplete = () => { + resolve() + } + + transaction.onerror = (event) => { + reject(event) + } + } + + request.onerror = (event) => { + reject(event) + } + }) +} + +export const getFromIndexedDB = async (key: string): Promise => { + return new Promise((resolve, reject) => { + const request = indexedDB.open('SynciaDB', 1) + + request.onupgradeneeded = (event) => { + const db = (event.target as IDBOpenDBRequest).result + db.createObjectStore('embeddings') + } + + request.onsuccess = (event) => { + const db = (event.target as IDBOpenDBRequest).result + const transaction = db.transaction('embeddings', 'readonly') + const store = transaction.objectStore('embeddings') + const getRequest = store.get(key) + + getRequest.onsuccess = () => { + resolve(getRequest.result) + } + + getRequest.onerror = (event) => { + reject(event) + } + } + + request.onerror = (event) => { + reject(event) + } + }) +} diff --git a/src/lib/getMatchedContent.ts b/src/lib/getMatchedContent.ts index 10f37804..81e86031 100644 --- a/src/lib/getMatchedContent.ts +++ b/src/lib/getMatchedContent.ts @@ -2,11 +2,8 @@ import { OpenAIEmbeddings } from '@langchain/openai' import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter' import { MemoryVectorStore } from 'langchain/vectorstores/memory' import { createSHA256Hash } from './createSHA256Hash' +import { readStorage, setStorage } from '../hooks/useStorage' -/** - * This function is responsible for getting the matched content - * from the context and query - */ export const getMatchedContent = async ( query: string, context: string, @@ -19,11 +16,6 @@ export const getMatchedContent = async ( return relevantDocs.map((doc) => doc.pageContent).join('\n') } -/** - * This function is responsible for getting the context vector store - * from the context. It caches the vector store in the local storage - * for faster retrieval - */ const getContextVectorStore = async ( context: string, apiKey: string, @@ -36,9 +28,7 @@ const getContextVectorStore = async ( }, }) const hashKey = `SYNCIA_STORE_EMBEDDINGS_${await createSHA256Hash(context)}` - const memoryVectors: [] | null = JSON.parse( - localStorage.getItem(hashKey) || 'null', - ) + const memoryVectors = await readStorage<[]>(hashKey, 'indexedDB') if (!memoryVectors) { const textSplitter = new RecursiveCharacterTextSplitter({ @@ -46,7 +36,7 @@ const getContextVectorStore = async ( }) const docs = await textSplitter.createDocuments([context]) const store = await MemoryVectorStore.fromDocuments(docs, embeddings) - localStorage.setItem(hashKey, JSON.stringify(store.memoryVectors)) + await setStorage(hashKey, store.memoryVectors, 'indexedDB') return store }