Skip to content

Commit

Permalink
feat: Qdrant vectorstore (#163)
Browse files Browse the repository at this point in the history
Signed-off-by: Anush008 <[email protected]>
Co-authored-by: Michał Pstrąg <[email protected]>
  • Loading branch information
Anush008 and micpst authored Nov 15, 2024
1 parent 6e46d4a commit 95f7021
Show file tree
Hide file tree
Showing 23 changed files with 706 additions and 110 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,6 @@ dist/

# examples
chroma/
qdrant/

.aider*
2 changes: 2 additions & 0 deletions docs/api_reference/core/vector-stores.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
::: ragbits.core.vector_stores.in_memory.InMemoryVectorStore

::: ragbits.core.vector_stores.chroma.ChromaVectorStore

::: ragbits.core.vector_stores.qdrant.QdrantVectorStore
2 changes: 1 addition & 1 deletion examples/apps/documents_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ async def _handle_message(
if not self._documents_ingested:
yield self.NO_DOCUMENTS_INGESTED_MESSAGE
results = await self.document_search.search(message[-1])
prompt = RAGPrompt(QueryWithContext(query=message, context=[i.get_text_representation() for i in results]))
prompt = RAGPrompt(QueryWithContext(query=message, context=[i.text_representation for i in results]))
response = await self._llm.generate(prompt)
yield response.answer

Expand Down
10 changes: 5 additions & 5 deletions examples/document-search/chroma.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
1. Create a list of documents.
2. Initialize the `LiteLLMEmbeddings` class with the OpenAI `text-embedding-3-small` embedding model.
3. Initialize the `ChromaVectorStore` class with a `PersistentClient` instance and an index name.
3. Initialize the `ChromaVectorStore` class with a `EphemeralClient` instance and an index name.
4. Initialize the `DocumentSearch` class with the embedder and the vector store.
5. Ingest the documents into the `DocumentSearch` instance.
6. List all documents in the vector store.
Expand All @@ -33,7 +33,7 @@

import asyncio

from chromadb import PersistentClient
from chromadb import EphemeralClient

from ragbits.core.embeddings.litellm import LiteLLMEmbeddings
from ragbits.core.vector_stores.chroma import ChromaVectorStore
Expand Down Expand Up @@ -72,7 +72,7 @@ async def main() -> None:
model="text-embedding-3-small",
)
vector_store = ChromaVectorStore(
client=PersistentClient("./chroma"),
client=EphemeralClient(),
index_name="jokes",
)
document_search = DocumentSearch(
Expand All @@ -91,7 +91,7 @@ async def main() -> None:
query = "I'm boiling my water and I need a joke"
vector_store_kwargs = {
"k": 2,
"max_distance": None,
"max_distance": 0.6,
}
results = await document_search.search(
query,
Expand All @@ -100,7 +100,7 @@ async def main() -> None:

print()
print(f"Documents similar to: {query}")
print([element.get_text_representation() for element in results])
print([element.text_representation for element in results])


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion examples/document-search/chroma_otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ async def main() -> None:

print()
print(f"Documents similar to: {query}")
print([element.get_text_representation() for element in results])
print([element.text_representation for element in results])


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion examples/document-search/multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ async def main() -> None:
print("Results for 'Fluffy teady bear toy':")
for result in results:
document = await result.document_meta.fetch()
print(f"Type: {result.element_type}, Location: {document.local_path}, Text: {result.get_text_representation()}")
print(f"Type: {result.element_type}, Location: {document.local_path}, Text: {result.text_representation}")


if __name__ == "__main__":
Expand Down
107 changes: 107 additions & 0 deletions examples/document-search/qdrant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Ragbits Document Search Example: Qdrant
This example demonstrates how to use the `DocumentSearch` class to search for documents with a more advanced setup.
We will use the `LiteLLMEmbeddings` class to embed the documents and the query, the `QdrantVectorStore` class to store
the embeddings.
The script performs the following steps:
1. Create a list of documents.
2. Initialize the `LiteLLMEmbeddings` class with the OpenAI `text-embedding-3-small` embedding model.
3. Initialize the `QdrantVectorStore` class with a `AsyncQdrantClient` in-memory instance and an index name.
4. Initialize the `DocumentSearch` class with the embedder and the vector store.
5. Ingest the documents into the `DocumentSearch` instance.
6. List all documents in the vector store.
7. Search for documents using a query.
8. Print the list of all documents and the search results.
To run the script, execute the following command:
```bash
uv run examples/document-search/qdrant.py
```
"""

# /// script
# requires-python = ">=3.10"
# dependencies = [
# "ragbits-document-search",
# "ragbits-core[litellm,qdrant]",
# ]
# ///

import asyncio

from qdrant_client import AsyncQdrantClient

from ragbits.core.embeddings.litellm import LiteLLMEmbeddings
from ragbits.core.vector_stores.qdrant import QdrantVectorStore
from ragbits.document_search import DocumentSearch, SearchConfig
from ragbits.document_search.documents.document import DocumentMeta

documents = [
DocumentMeta.create_text_document_from_literal(
"""
RIP boiled water. You will be mist.
"""
),
DocumentMeta.create_text_document_from_literal(
"""
Why doesn't James Bond fart in bed? Because it would blow his cover.
"""
),
DocumentMeta.create_text_document_from_literal(
"""
Why programmers don't like to swim? Because they're scared of the floating points.
"""
),
DocumentMeta.create_text_document_from_literal(
"""
This one is completely unrelated.
"""
),
]


async def main() -> None:
"""
Run the example.
"""
embedder = LiteLLMEmbeddings(
model="text-embedding-3-small",
)
vector_store = QdrantVectorStore(
client=AsyncQdrantClient(":memory:"),
index_name="jokes",
)
document_search = DocumentSearch(
embedder=embedder,
vector_store=vector_store,
)

await document_search.ingest(documents)

all_documents = await vector_store.list()

print()
print("All documents:")
print([doc.metadata["content"] for doc in all_documents])

query = "I'm boiling my water and I need a joke"
vector_store_kwargs = {
"k": 2,
"max_distance": 0.6,
}
results = await document_search.search(
query,
config=SearchConfig(vector_store_kwargs=vector_store_kwargs),
)

print()
print(f"Documents similar to: {query}")
print([element.text_representation for element in results])


if __name__ == "__main__":
asyncio.run(main())
3 changes: 3 additions & 0 deletions packages/ragbits-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ promptfoo = [
otel = [
"opentelemetry-api~=1.27.0",
]
qdrant = [
"qdrant-client~=1.12.1",
]

[tool.uv]
dev-dependencies = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ async def store(self, ids: list[str], metadatas: list[dict]) -> None:
ids: list of unique ids of the entries
metadatas: list of dicts with metadata.
"""
for _id, metadata in zip(ids, metadatas, strict=False):
for _id, metadata in zip(ids, metadatas, strict=True):
self._storage[_id] = metadata

@traceable
Expand Down
33 changes: 13 additions & 20 deletions packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,27 @@
import sys

from ..metadata_stores import get_metadata_store
from ..utils.config_handling import get_cls_from_config
from .base import VectorStore, VectorStoreEntry, VectorStoreOptions, WhereQuery
from .in_memory import InMemoryVectorStore
from ragbits.core.utils.config_handling import get_cls_from_config
from ragbits.core.vector_stores.base import VectorStore, VectorStoreEntry, VectorStoreOptions, WhereQuery
from ragbits.core.vector_stores.in_memory import InMemoryVectorStore

__all__ = ["InMemoryVectorStore", "VectorStore", "VectorStoreEntry", "WhereQuery"]
__all__ = ["InMemoryVectorStore", "VectorStore", "VectorStoreEntry", "VectorStoreOptions", "WhereQuery"]

module = sys.modules[__name__]


def get_vector_store(vector_store_config: dict) -> VectorStore:
def get_vector_store(config: dict) -> VectorStore:
"""
Initializes and returns a VectorStore object based on the provided configuration.
Args:
vector_store_config: A dictionary containing configuration details for the VectorStore.
config: A dictionary containing configuration details for the VectorStore.
Returns:
An instance of the specified VectorStore class, initialized with the provided config
(if any) or default arguments.
"""
vector_store_cls = get_cls_from_config(vector_store_config["type"], module)
config = vector_store_config.get("config", {})
if vector_store_config["type"].endswith("ChromaVectorStore"):
return vector_store_cls.from_config(config)

metadata_store_config = vector_store_config.get("metadata_store_config")
return vector_store_cls(
default_options=VectorStoreOptions(**config.get("default_options", {})),
metadata_store=get_metadata_store(metadata_store_config),
)
Raises:
KeyError: If the provided configuration does not contain a valid "type" key.
InvalidConfigurationError: If the provided configuration is invalid.
NotImplementedError: If the specified VectorStore class cannot be created from the provided configuration.
"""
vector_store_cls = get_cls_from_config(config["type"], sys.modules[__name__])
return vector_store_cls.from_config(config.get("config", {}))
18 changes: 17 additions & 1 deletion packages/ragbits-core/src/ragbits/core/vector_stores/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class VectorStoreEntry(BaseModel):
"""

id: str
key: str
vector: list[float]
content: str
metadata: dict


Expand Down Expand Up @@ -48,6 +48,22 @@ def __init__(
self._default_options = default_options or VectorStoreOptions()
self._metadata_store = metadata_store

@classmethod
def from_config(cls, config: dict) -> "VectorStore":
"""
Creates and returns an instance of the Reranker class from the given configuration.
Args:
config: A dictionary containing the configuration for initializing the Reranker instance.
Returns:
An initialized instance of the Reranker class.
Raises:
NotImplementedError: If the class cannot be created from the provided configuration.
"""
raise NotImplementedError(f"Cannot create class {cls.__name__} from config.")

@abstractmethod
async def store(self, entries: list[VectorStoreEntry]) -> None:
"""
Expand Down
Loading

0 comments on commit 95f7021

Please sign in to comment.