From 5b1e3c646bb486341d17fc4f615565fd844c482d Mon Sep 17 00:00:00 2001 From: Srujan Gurram Date: Sat, 25 Nov 2023 23:33:44 +0530 Subject: [PATCH] added cache system to vector store --- .../Settings/Sections/ChatSettings.tsx | 2 +- src/components/Sidebar/auth/index.tsx | 2 +- src/components/Sidebar/chat/ChatHistory.tsx | 2 +- src/hooks/useChatCompletion.ts | 19 +------- src/hooks/useStorage.ts | 1 + src/lib/createSHA256Hash.ts | 10 ++++ src/{utils => lib}/generateReadableDate.ts | 0 src/lib/getMatchedContent.ts | 48 +++++++++++++++++++ src/{utils => lib}/validApiKey.ts | 0 9 files changed, 64 insertions(+), 20 deletions(-) create mode 100644 src/lib/createSHA256Hash.ts rename src/{utils => lib}/generateReadableDate.ts (100%) create mode 100644 src/lib/getMatchedContent.ts rename src/{utils => lib}/validApiKey.ts (100%) diff --git a/src/components/Settings/Sections/ChatSettings.tsx b/src/components/Settings/Sections/ChatSettings.tsx index 75faef905..52b0e0442 100644 --- a/src/components/Settings/Sections/ChatSettings.tsx +++ b/src/components/Settings/Sections/ChatSettings.tsx @@ -2,7 +2,7 @@ import React from 'react' import SectionHeading from '../Elements/SectionHeading' import FieldWrapper from '../Elements/FieldWrapper' import { useSettings } from '../../../hooks/useSettings' -import { validateApiKey } from '../../../utils/validApiKey' +import { validateApiKey } from '../../../lib/validApiKey' import { AvailableModels, Mode } from '../../../config/settings' const ChatSettings = () => { diff --git a/src/components/Sidebar/auth/index.tsx b/src/components/Sidebar/auth/index.tsx index a68c8a258..b03595cd6 100644 --- a/src/components/Sidebar/auth/index.tsx +++ b/src/components/Sidebar/auth/index.tsx @@ -1,6 +1,6 @@ import React, { useEffect } from 'react' import { useSettings } from '../../../hooks/useSettings' -import { validateApiKey } from '../../../utils/validApiKey' +import { validateApiKey } from '../../../lib/validApiKey' const Auth = () => { const [, setSettings] = useSettings() diff --git a/src/components/Sidebar/chat/ChatHistory.tsx b/src/components/Sidebar/chat/ChatHistory.tsx index a32ecc0c7..4ef0f6c6f 100644 --- a/src/components/Sidebar/chat/ChatHistory.tsx +++ b/src/components/Sidebar/chat/ChatHistory.tsx @@ -1,7 +1,7 @@ import { useChatHistory } from '../../../hooks/useChatHistory' import * as DropdownMenu from '@radix-ui/react-dropdown-menu' import { RiAddLine, RiCloseCircleFill, RiTimeLine } from 'react-icons/ri' -import { generateReadableRelativeDate } from '../../../utils/generateReadableDate' +import { generateReadableRelativeDate } from '../../../lib/generateReadableDate' const ChatHistory = () => { const { diff --git a/src/hooks/useChatCompletion.ts b/src/hooks/useChatCompletion.ts index de852e612..49e7b3697 100644 --- a/src/hooks/useChatCompletion.ts +++ b/src/hooks/useChatCompletion.ts @@ -1,12 +1,10 @@ import endent from 'endent' import { ChatOpenAI } from 'langchain/chat_models/openai' -import { OpenAIEmbeddings } from 'langchain/embeddings/openai' import { AIMessage, HumanMessage, SystemMessage } from 'langchain/schema' -import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter' -import { MemoryVectorStore } from 'langchain/vectorstores/memory' import { useMemo, useState } from 'react' import { AvailableModels, Mode } from '../config/settings' import { ChatRole, useCurrentChat } from './useCurrentChat' +import { getMatchedContent } from '../lib/getMatchedContent' interface UseChatCompletionProps { model: AvailableModels @@ -80,20 +78,7 @@ export const useChatCompletion = ({ */ let matchedContext if (context) { - const textSplitter = new RecursiveCharacterTextSplitter({ - chunkSize: 1000, - }) - const docs = await textSplitter.createDocuments([context]) - const vectorStore = await MemoryVectorStore.fromDocuments( - docs, - new OpenAIEmbeddings({ - openAIApiKey: apiKey, - }), - ) - const retriever = vectorStore.asRetriever() - const relevantDocs = await retriever.getRelevantDocuments(query) - console.log(relevantDocs) - matchedContext = relevantDocs.map((doc) => doc.pageContent).join('\n') + matchedContext = await getMatchedContent(query, context, apiKey) } const expandedQuery = matchedContext diff --git a/src/hooks/useStorage.ts b/src/hooks/useStorage.ts index b3f3a5a02..80061d888 100644 --- a/src/hooks/useStorage.ts +++ b/src/hooks/useStorage.ts @@ -18,6 +18,7 @@ export function useStorage( ): [T, SetValue] { const [storedValue, setStoredValue] = useAtom(atom) + // biome-ignore lint/correctness/useExhaustiveDependencies: This works fine. i don't want to change it. useEffect(() => { readStorage(key, area).then((res) => { if (res) setStoredValue(res) diff --git a/src/lib/createSHA256Hash.ts b/src/lib/createSHA256Hash.ts new file mode 100644 index 000000000..982aa1135 --- /dev/null +++ b/src/lib/createSHA256Hash.ts @@ -0,0 +1,10 @@ +export const createSHA256Hash = async (content: string): Promise => { + const encoder = new TextEncoder() + const data = encoder.encode(content) + const hashBuffer = await crypto.subtle.digest('SHA-256', data) + const hashArray = Array.from(new Uint8Array(hashBuffer)) + const hashHex = hashArray + .map((byte) => byte.toString(16).padStart(2, '0')) + .join('') + return hashHex +} diff --git a/src/utils/generateReadableDate.ts b/src/lib/generateReadableDate.ts similarity index 100% rename from src/utils/generateReadableDate.ts rename to src/lib/generateReadableDate.ts diff --git a/src/lib/getMatchedContent.ts b/src/lib/getMatchedContent.ts new file mode 100644 index 000000000..e0110c41d --- /dev/null +++ b/src/lib/getMatchedContent.ts @@ -0,0 +1,48 @@ +import { OpenAIEmbeddings } from 'langchain/embeddings/openai' +import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter' +import { MemoryVectorStore } from 'langchain/vectorstores/memory' +import { createSHA256Hash } from './createSHA256Hash' + +/** + * This function is responsible for getting the matched content + * from the context and query + */ +export const getMatchedContent = async ( + query: string, + context: string, + apiKey: string, +) => { + const vectorStore = await getContextVectorStore(context, apiKey) + const retriever = vectorStore.asRetriever() + const relevantDocs = await retriever.getRelevantDocuments(query) + return relevantDocs.map((doc) => doc.pageContent).join('\n') +} + +/** + * This function is responsible for getting the context vector store + * from the context. It caches the vector store in the local storage + * for faster retrieval + */ +const getContextVectorStore = async (context: string, apiKey: string) => { + const embeddings = new OpenAIEmbeddings({ openAIApiKey: apiKey }) + const hashKey = `SYNCIA_STORE_EMBEDDINGS_${await createSHA256Hash(context)}` + const memoryVectors: [] | null = JSON.parse( + localStorage.getItem(hashKey) || 'null', + ) + + if (!memoryVectors) { + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1000, + }) + const docs = await textSplitter.createDocuments([context]) + const store = await MemoryVectorStore.fromDocuments(docs, embeddings) + localStorage.setItem(hashKey, JSON.stringify(store.memoryVectors)) + return store + } + + console.log({ memoryVectors }) + + const store = new MemoryVectorStore(embeddings) + store.memoryVectors = memoryVectors + return store +} diff --git a/src/utils/validApiKey.ts b/src/lib/validApiKey.ts similarity index 100% rename from src/utils/validApiKey.ts rename to src/lib/validApiKey.ts