1
1
import hashlib
2
2
import os
3
3
import re
4
+ import uuid
4
5
from typing import Any , Callable , Dict , List , Literal , Optional , Tuple , Union
5
6
6
7
from IPython import get_ipython
@@ -135,7 +136,7 @@ def __init__(
135
136
- `client` (Optional, chromadb.Client) - the chromadb client. If key not provided, a
136
137
default client `chromadb.Client()` will be used. If you want to use other
137
138
vector db, extend this class and override the `retrieve_docs` function.
138
- ** Deprecated**: use `vector_db` instead.
139
+ *[ Deprecated]* use `vector_db` instead.
139
140
- `docs_path` (Optional, Union[str, List[str]]) - the path to the docs directory. It
140
141
can also be the path to a single file, the url to a single file or a list
141
142
of directories, files and urls. Default is None, which works only if the
@@ -149,7 +150,7 @@ def __init__(
149
150
By default, "extra_docs" is set to false, starting document IDs from zero.
150
151
This poses a risk as new documents might overwrite existing ones, potentially
151
152
causing unintended loss or alteration of data in the collection.
152
- ** Deprecated**: use `new_docs` when use `vector_db` instead of `client`.
153
+ *[ Deprecated]* use `new_docs` when use `vector_db` instead of `client`.
153
154
- `new_docs` (Optional, bool) - when True, only adds new documents to the collection;
154
155
when False, updates existing documents and adds new ones. Default is True.
155
156
Document id is used to determine if a document is new or existing. By default, the
@@ -172,7 +173,7 @@ def __init__(
172
173
models can be found at `https://www.sbert.net/docs/pretrained_models.html`.
173
174
The default model is a fast model. If you want to use a high performance model,
174
175
`all-mpnet-base-v2` is recommended.
175
- ** Deprecated**: no need when use `vector_db` instead of `client`.
176
+ *[ Deprecated]* no need when use `vector_db` instead of `client`.
176
177
- `embedding_function` (Optional, Callable) - the embedding function for creating the
177
178
vector db. Default is None, SentenceTransformer with the given `embedding_model`
178
179
will be used. If you want to use OpenAI, Cohere, HuggingFace or other embedding
@@ -219,7 +220,7 @@ def __init__(
219
220
220
221
Example of overriding retrieve_docs - If you have set up a customized vector db, and it's
221
222
not compatible with chromadb, you can easily plug in it with below code.
222
- ** Deprecated**: Use `vector_db` instead. You can extend VectorDB and pass it to the agent.
223
+ *[ Deprecated]* use `vector_db` instead. You can extend VectorDB and pass it to the agent.
223
224
```python
224
225
class MyRetrieveUserProxyAgent(RetrieveUserProxyAgent):
225
226
def query_vector_db(
@@ -365,7 +366,11 @@ def _init_db(self):
365
366
else :
366
367
all_docs_ids = set ()
367
368
368
- chunk_ids = [hashlib .blake2b (chunk .encode ("utf-8" )).hexdigest ()[:HASH_LENGTH ] for chunk in chunks ]
369
+ chunk_ids = (
370
+ [hashlib .blake2b (chunk .encode ("utf-8" )).hexdigest ()[:HASH_LENGTH ] for chunk in chunks ]
371
+ if not self ._vector_db .type == "qdrant"
372
+ else [str (uuid .UUID (hex = hashlib .md5 (chunk .encode ("utf-8" )).hexdigest ())) for chunk in chunks ]
373
+ )
369
374
chunk_ids_set = set (chunk_ids )
370
375
chunk_ids_set_idx = [chunk_ids .index (hash_value ) for hash_value in chunk_ids_set ]
371
376
docs = [
0 commit comments