-
Notifications
You must be signed in to change notification settings - Fork 5
/
test.py
46 lines (36 loc) · 1.44 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from llama_index import ServiceContext
from llama_index.llms import Ollama
import chromadb
from llama_index.vector_stores import ChromaVectorStore
from llama_index import StorageContext
import re
def extract_urls(text):
url_pattern = re.compile(r'link: (https?://[^\s,]+)')
urls = re.findall(url_pattern, text)
return urls
from langchain_community.tools import DuckDuckGoSearchResults
search = DuckDuckGoSearchResults()
result = search.run("What is catch-up contribution?")
results = extract_urls(result)
parsed_results = []
for r in results:
parsed_results.append(r.replace("]",""))
print(parsed_results)
chroma_client = chromadb.PersistentClient()
chroma_collection = chroma_client.create_collection(name="ddg", get_or_create=True)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
from llama_index import VectorStoreIndex
from llama_index.readers import SimpleWebPageReader
documents = SimpleWebPageReader(html_to_text=True).load_data(
parsed_results
)
llm1 = Ollama(model="dolphin-mistral:latest")
service_context = ServiceContext.from_defaults(llm=llm1, embed_model='local')
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context, service_context=service_context
)
query_engine = index.as_query_engine()
response = query_engine.query("What is catch-up contribution?")
#print(response)
print(response.source_nodes)