From fccf12dff686b7d104a395f01ae2ad7d36b442f4 Mon Sep 17 00:00:00 2001 From: Daniel Shiffman Date: Sat, 6 Apr 2024 09:58:20 -0400 Subject: [PATCH 1/2] path fix issue --- src/experimental/tts-piper-server.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/experimental/tts-piper-server.js b/src/experimental/tts-piper-server.js index e01f1c4..5d1fc2e 100644 --- a/src/experimental/tts-piper-server.js +++ b/src/experimental/tts-piper-server.js @@ -4,6 +4,7 @@ const fs = require('fs'); speak(`Hello! I'm MattGPT, ready to battle with Dan Shiffman!`); async function speak(txt) { + // export DYLD_LIBRARY_PATH=/System/Volumes/Data/Users/shiffman/Repos/codingtrain/pp/install/lib:$DYLD_LIBRARY_PATH // python3 -m piper.http_server --model en_GB-alan-medium.onnx --port 5001 const response = await fetch(`http://127.0.0.1:5001/?text=${txt}`); const blob = await response.blob(); From 24b782a6a100e92385bf8b39772a45bbd612b708 Mon Sep 17 00:00:00 2001 From: Daniel Shiffman Date: Sun, 7 Apr 2024 22:02:00 -0400 Subject: [PATCH 2/2] langchain RAG experiment --- .gitignore | 3 +- package.json | 2 + src/experimental/langchain-test-gemini.js | 105 ++++++++++++++++++++++ src/experimental/langchain-test.js | 44 +++++++++ 4 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 src/experimental/langchain-test-gemini.js create mode 100644 src/experimental/langchain-test.js diff --git a/.gitignore b/.gitignore index dea59f8..1035650 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ node_modules *.wav *.bin *.npy -config.js \ No newline at end of file +config.js +transcripts/ \ No newline at end of file diff --git a/package.json b/package.json index 8b172fe..79cc11b 100644 --- a/package.json +++ b/package.json @@ -91,10 +91,12 @@ }, "dependencies": { "@google/generative-ai": "^0.3.1", + "@langchain/google-genai": "^0.0.10", "@xenova/transformers": "^2.16.1", "diff": "^5.2.0", "elevenlabs": "^0.2.2", "inquirer": "^8.0.0", + "langchain": "^0.1.31", "node-record-lpcm16": "^1.0.1", "ollama": "^0.5.0", "openai": "^4.31.0", diff --git a/src/experimental/langchain-test-gemini.js b/src/experimental/langchain-test-gemini.js new file mode 100644 index 0000000..02fe195 --- /dev/null +++ b/src/experimental/langchain-test-gemini.js @@ -0,0 +1,105 @@ +const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); +const { TextLoader } = require('langchain/document_loaders/fs/text'); +const { DirectoryLoader } = require('langchain/document_loaders/fs/directory'); + +const { + createStuffDocumentsChain, +} = require('langchain/chains/combine_documents'); +const { ChatPromptTemplate } = require('@langchain/core/prompts'); +const { RecursiveCharacterTextSplitter } = require('langchain/text_splitter'); +const { MemoryVectorStore } = require('langchain/vectorstores/memory'); +const { createRetrievalChain } = require('langchain/chains/retrieval'); +const { GoogleGenerativeAIEmbeddings } = require('@langchain/google-genai'); + +const config = require('../../config'); +const prompts = require('../prompt'); + +const safetySettings = [ + { + category: 'HARM_CATEGORY_HARASSMENT', + threshold: 'BLOCK_ONLY_HIGH', + }, + { + category: 'HARM_CATEGORY_HATE_SPEECH', + threshold: 'BLOCK_ONLY_HIGH', + }, + { + category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', + threshold: 'BLOCK_ONLY_HIGH', + }, + { + category: 'HARM_CATEGORY_DANGEROUS_CONTENT', + threshold: 'BLOCK_ONLY_HIGH', + }, +]; + +const model = new ChatGoogleGenerativeAI({ + apiKey: config.geminiApiToken, + modelName: 'gemini-pro', + maxOutputTokens: 2048, + safetySettings, +}); + +const embeddings = new GoogleGenerativeAIEmbeddings({ + apiKey: config.geminiApiToken, + modelName: 'embedding-001', // 768 dimensions + //taskType: TaskType.RETRIEVAL_DOCUMENT, + //title: "Document title", +}); + +go(); + +console.log(prompts); + +async function go() { + const prompt = ChatPromptTemplate.fromTemplate( + `${prompts.systemPrompt}. In addition, you might use the vocucabulary, language, and style of the following context: + {context} + Now answer this: {input}` + ); + + const chain = await createStuffDocumentsChain({ + llm: model, + prompt, + }); + // const dir = 'transcripts'; + // const files = fs.readdirSync(dir); + + // for (const file of files) { + // if (path.extname(file) === '.txt') { + // const filePath = path.join(dir, file); + // const loader = new TextLoader(filePath); + // const docs = await loader.load(); + // } + // } + + // const loader = new TextLoader('transcripts/_-AfhLQfb6w.txt'); + // const docs = await loader.load(); + + const loader = new DirectoryLoader('transcripts', { + '.txt': (path) => new TextLoader(path), + }); + const docs = await loader.load(); + + const splitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1000, + chunkOverlap: 20, + }); + const splitDocs = await splitter.splitDocuments(docs); + + const vectorstore = await MemoryVectorStore.fromDocuments( + splitDocs, + embeddings + ); + const retriever = vectorstore.asRetriever({ k: 2 }); + const retrievalChain = await createRetrievalChain({ + combineDocsChain: chain, + retriever, + }); + const response = await retrievalChain.invoke({ + input: + 'Hi MattGPT! Please introduce yourself for the audience here at the Bell House, Brooklyn. Tell us what you are here to do.', + }); + + console.log(response); +} diff --git a/src/experimental/langchain-test.js b/src/experimental/langchain-test.js new file mode 100644 index 0000000..62962b7 --- /dev/null +++ b/src/experimental/langchain-test.js @@ -0,0 +1,44 @@ +const { OpenAI, ChatOpenAI, OpenAIEmbeddings } = require('@langchain/openai'); +// const { TextLoader } = require('langchain/document_loaders/fs/text'); +const { RecursiveCharacterTextSplitter } = require('langchain/text_splitter'); +const { HNSWLib } = require('langchain/vectorstores/hnswlib'); + +const fs = require('fs'); + +const config = require('../../config'); + +const chatModel = new ChatOpenAI({ + openAIApiKey: config.openAIApiToken, +}); + +const model = new OpenAI({ + openAIApiKey: config.openAIApiToken, +}); + +const embeddings = new OllamaEmbeddings({ + openAIApiKey: config.openAIApiToken, +}); + +testChat(); + +async function testChat() { + // const loader = new TextLoader('transcripts/example.txt'); + // const docs = await loader.load(); + + const text = fs.readFileSync('transcripts/example.txt', 'utf8'); + const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); + const docs = await textSplitter.createDocuments([text]); + + const vectorStore = await HNSWLib.fromDocuments(docs, embeddings()); + + // Initialize a retriever wrapper around the vector store + const retriever = vectorStore.asRetriever(); + + const references = await retriever.getRelevantDocuments( + 'what is a raindobw?' + ); + console.log(references); + + //const response = await chatModel.invoke('How are you?'); + //console.log(response); +}