"testing/python/vscode:/vscode.git/clone" did not exist on "cd9ec62e639bf3560b32777022e05a1c4db1a418"
Unverified Commit 0bae789d authored by Steven Kreitzer's avatar Steven Kreitzer
Browse files

fix: support batching chromadb

parent 839efa44
...@@ -15,6 +15,7 @@ from typing import List ...@@ -15,6 +15,7 @@ from typing import List
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
from chromadb.utils import embedding_functions from chromadb.utils import embedding_functions
from chromadb.utils.batch_utils import create_batches
from langchain_community.document_loaders import ( from langchain_community.document_loaders import (
WebBaseLoader, WebBaseLoader,
...@@ -331,9 +332,14 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b ...@@ -331,9 +332,14 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
embedding_function=app.state.sentence_transformer_ef, embedding_function=app.state.sentence_transformer_ef,
) )
collection.add( for batch in create_batches(
documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts] api=CHROMA_CLIENT,
) ids=[str(uuid.uuid1()) for _ in texts],
metadatas=metadatas,
documents=texts,
):
collection.add(*batch)
return True return True
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment