diff --git a/package.json b/package.json index cb57ecc..2453a4a 100644 --- a/package.json +++ b/package.json @@ -101,10 +101,12 @@ }, "dependencies": { "@google/generative-ai": "^0.3.1", + "@langchain/google-genai": "^0.0.10", "@xenova/transformers": "^2.16.1", "diff": "^5.2.0", "elevenlabs": "^0.2.2", "inquirer": "^8.0.0", + "langchain": "^0.1.31", "node-record-lpcm16": "^1.0.1", "ollama": "^0.5.0", "openai": "^4.31.0", diff --git a/src/experimental/langchain-test-gemini.js b/src/experimental/langchain-test-gemini.js new file mode 100644 index 0000000..02fe195 --- /dev/null +++ b/src/experimental/langchain-test-gemini.js @@ -0,0 +1,105 @@ +const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); +const { TextLoader } = require('langchain/document_loaders/fs/text'); +const { DirectoryLoader } = require('langchain/document_loaders/fs/directory'); + +const { + createStuffDocumentsChain, +} = require('langchain/chains/combine_documents'); +const { ChatPromptTemplate } = require('@langchain/core/prompts'); +const { RecursiveCharacterTextSplitter } = require('langchain/text_splitter'); +const { MemoryVectorStore } = require('langchain/vectorstores/memory'); +const { createRetrievalChain } = require('langchain/chains/retrieval'); +const { GoogleGenerativeAIEmbeddings } = require('@langchain/google-genai'); + +const config = require('../../config'); +const prompts = require('../prompt'); + +const safetySettings = [ + { + category: 'HARM_CATEGORY_HARASSMENT', + threshold: 'BLOCK_ONLY_HIGH', + }, + { + category: 'HARM_CATEGORY_HATE_SPEECH', + threshold: 'BLOCK_ONLY_HIGH', + }, + { + category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', + threshold: 'BLOCK_ONLY_HIGH', + }, + { + category: 'HARM_CATEGORY_DANGEROUS_CONTENT', + threshold: 'BLOCK_ONLY_HIGH', + }, +]; + +const model = new ChatGoogleGenerativeAI({ + apiKey: config.geminiApiToken, + modelName: 'gemini-pro', + maxOutputTokens: 2048, + safetySettings, +}); + +const embeddings = new GoogleGenerativeAIEmbeddings({ + apiKey: config.geminiApiToken, + modelName: 'embedding-001', // 768 dimensions + //taskType: TaskType.RETRIEVAL_DOCUMENT, + //title: "Document title", +}); + +go(); + +console.log(prompts); + +async function go() { + const prompt = ChatPromptTemplate.fromTemplate( + `${prompts.systemPrompt}. In addition, you might use the vocucabulary, language, and style of the following context: + {context} + Now answer this: {input}` + ); + + const chain = await createStuffDocumentsChain({ + llm: model, + prompt, + }); + // const dir = 'transcripts'; + // const files = fs.readdirSync(dir); + + // for (const file of files) { + // if (path.extname(file) === '.txt') { + // const filePath = path.join(dir, file); + // const loader = new TextLoader(filePath); + // const docs = await loader.load(); + // } + // } + + // const loader = new TextLoader('transcripts/_-AfhLQfb6w.txt'); + // const docs = await loader.load(); + + const loader = new DirectoryLoader('transcripts', { + '.txt': (path) => new TextLoader(path), + }); + const docs = await loader.load(); + + const splitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1000, + chunkOverlap: 20, + }); + const splitDocs = await splitter.splitDocuments(docs); + + const vectorstore = await MemoryVectorStore.fromDocuments( + splitDocs, + embeddings + ); + const retriever = vectorstore.asRetriever({ k: 2 }); + const retrievalChain = await createRetrievalChain({ + combineDocsChain: chain, + retriever, + }); + const response = await retrievalChain.invoke({ + input: + 'Hi MattGPT! Please introduce yourself for the audience here at the Bell House, Brooklyn. Tell us what you are here to do.', + }); + + console.log(response); +} diff --git a/src/experimental/langchain-test.js b/src/experimental/langchain-test.js new file mode 100644 index 0000000..62962b7 --- /dev/null +++ b/src/experimental/langchain-test.js @@ -0,0 +1,44 @@ +const { OpenAI, ChatOpenAI, OpenAIEmbeddings } = require('@langchain/openai'); +// const { TextLoader } = require('langchain/document_loaders/fs/text'); +const { RecursiveCharacterTextSplitter } = require('langchain/text_splitter'); +const { HNSWLib } = require('langchain/vectorstores/hnswlib'); + +const fs = require('fs'); + +const config = require('../../config'); + +const chatModel = new ChatOpenAI({ + openAIApiKey: config.openAIApiToken, +}); + +const model = new OpenAI({ + openAIApiKey: config.openAIApiToken, +}); + +const embeddings = new OllamaEmbeddings({ + openAIApiKey: config.openAIApiToken, +}); + +testChat(); + +async function testChat() { + // const loader = new TextLoader('transcripts/example.txt'); + // const docs = await loader.load(); + + const text = fs.readFileSync('transcripts/example.txt', 'utf8'); + const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); + const docs = await textSplitter.createDocuments([text]); + + const vectorStore = await HNSWLib.fromDocuments(docs, embeddings()); + + // Initialize a retriever wrapper around the vector store + const retriever = vectorStore.asRetriever(); + + const references = await retriever.getRelevantDocuments( + 'what is a raindobw?' + ); + console.log(references); + + //const response = await chatModel.invoke('How are you?'); + //console.log(response); +}