# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (sentence-transformers/all-MiniLM-L6-v2)
RAG_TOP_K=int(os.environ.get("RAG_TOP_K","5"))
...
...
@@ -461,10 +474,23 @@ if USE_CUDA.lower() == "true":