Skip to content

Commit

Permalink
Merge pull request #96 from Royal-lobster/fix-embeddings-error
Browse files Browse the repository at this point in the history
  • Loading branch information
Royal-lobster authored Nov 5, 2024
2 parents f0a0a71 + e3e1e87 commit 073d961
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 14 deletions.
13 changes: 13 additions & 0 deletions .changeset/tasty-ravens-tickle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
"syncia": patch
---

Fix storage quota exceeded error for embeddings

Previously, the application would fail when trying to store large embeddings in localStorage due to quota limitations. This patch:

- Transitions from localStorage to IndexedDB for storing embeddings
- Implements new functions `saveToIndexedDB` and `getFromIndexedDB`
- Resolves the "QuotaExceededError" when storing large datasets

This change improves the application's ability to handle larger embeddings without storage constraints.
72 changes: 71 additions & 1 deletion src/hooks/useStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
useRef,
} from 'react'

export type StorageArea = 'sync' | 'local'
export type StorageArea = 'sync' | 'local' | 'indexedDB'

// custom hook to set chrome local/sync storage
// should also set a listener on this specific key
Expand Down Expand Up @@ -75,6 +75,9 @@ export async function readStorage<T>(
key: string,
area: StorageArea = 'local',
): Promise<T | undefined> {
if (area === 'indexedDB') {
return getFromIndexedDB<T>(key)
}
try {
const result = await chrome.storage[area].get(key)
return result?.[key]
Expand All @@ -96,6 +99,10 @@ export async function setStorage<T>(
value: T,
area: StorageArea = 'local',
): Promise<boolean> {
if (area === 'indexedDB') {
await saveToIndexedDB<T>(key, value)
return true
}
try {
await chrome.storage[area].set({ [key]: value })
return true
Expand All @@ -104,3 +111,66 @@ export async function setStorage<T>(
return false
}
}

export const saveToIndexedDB = async <T>(
key: string,
data: T,
): Promise<void> => {
return new Promise<void>((resolve, reject) => {
const request = indexedDB.open('SynciaDB', 1)

request.onupgradeneeded = (event) => {
const db = (event.target as IDBOpenDBRequest).result
db.createObjectStore('embeddings')
}

request.onsuccess = (event) => {
const db = (event.target as IDBOpenDBRequest).result
const transaction = db.transaction('embeddings', 'readwrite')
const store = transaction.objectStore('embeddings')
store.put(data, key)

transaction.oncomplete = () => {
resolve()
}

transaction.onerror = (event) => {
reject(event)
}
}

request.onerror = (event) => {
reject(event)
}
})
}

export const getFromIndexedDB = async <T>(key: string): Promise<T> => {
return new Promise<T>((resolve, reject) => {
const request = indexedDB.open('SynciaDB', 1)

request.onupgradeneeded = (event) => {
const db = (event.target as IDBOpenDBRequest).result
db.createObjectStore('embeddings')
}

request.onsuccess = (event) => {
const db = (event.target as IDBOpenDBRequest).result
const transaction = db.transaction('embeddings', 'readonly')
const store = transaction.objectStore('embeddings')
const getRequest = store.get(key)

getRequest.onsuccess = () => {
resolve(getRequest.result)
}

getRequest.onerror = (event) => {
reject(event)
}
}

request.onerror = (event) => {
reject(event)
}
})
}
16 changes: 3 additions & 13 deletions src/lib/getMatchedContent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ import { OpenAIEmbeddings } from '@langchain/openai'
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'
import { MemoryVectorStore } from 'langchain/vectorstores/memory'
import { createSHA256Hash } from './createSHA256Hash'
import { readStorage, setStorage } from '../hooks/useStorage'

/**
* This function is responsible for getting the matched content
* from the context and query
*/
export const getMatchedContent = async (
query: string,
context: string,
Expand All @@ -19,11 +16,6 @@ export const getMatchedContent = async (
return relevantDocs.map((doc) => doc.pageContent).join('\n')
}

/**
* This function is responsible for getting the context vector store
* from the context. It caches the vector store in the local storage
* for faster retrieval
*/
const getContextVectorStore = async (
context: string,
apiKey: string,
Expand All @@ -36,17 +28,15 @@ const getContextVectorStore = async (
},
})
const hashKey = `SYNCIA_STORE_EMBEDDINGS_${await createSHA256Hash(context)}`
const memoryVectors: [] | null = JSON.parse(
localStorage.getItem(hashKey) || 'null',
)
const memoryVectors = await readStorage<[]>(hashKey, 'indexedDB')

if (!memoryVectors) {
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
})
const docs = await textSplitter.createDocuments([context])
const store = await MemoryVectorStore.fromDocuments(docs, embeddings)
localStorage.setItem(hashKey, JSON.stringify(store.memoryVectors))
await setStorage(hashKey, store.memoryVectors, 'indexedDB')
return store
}

Expand Down

0 comments on commit 073d961

Please sign in to comment.