Commit d4b2578f authored by Timothy J. Baek's avatar Timothy J. Baek
Browse files

feat: rag csv support

parent d6a1bf14
...@@ -13,7 +13,12 @@ import os, shutil ...@@ -13,7 +13,12 @@ import os, shutil
# from chromadb.utils import embedding_functions # from chromadb.utils import embedding_functions
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader from langchain_community.document_loaders import (
WebBaseLoader,
TextLoader,
PyPDFLoader,
CSVLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA from langchain.chains import RetrievalQA
...@@ -129,7 +134,7 @@ def store_doc( ...@@ -129,7 +134,7 @@ def store_doc(
): ):
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
if file.content_type not in ["application/pdf", "text/plain"]: if file.content_type not in ["application/pdf", "text/plain", "text/csv"]:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
...@@ -152,6 +157,8 @@ def store_doc( ...@@ -152,6 +157,8 @@ def store_doc(
loader = PyPDFLoader(file_path) loader = PyPDFLoader(file_path)
elif file.content_type == "text/plain": elif file.content_type == "text/plain":
loader = TextLoader(file_path) loader = TextLoader(file_path)
elif file.content_type == "text/csv":
loader = CSVLoader(file_path)
data = loader.load() data = loader.load()
result = store_data_in_vector_db(data, collection_name) result = store_data_in_vector_db(data, collection_name)
......
...@@ -122,9 +122,8 @@ ...@@ -122,9 +122,8 @@
const file = inputFiles[0]; const file = inputFiles[0];
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
reader.readAsDataURL(file); reader.readAsDataURL(file);
} else if (['application/pdf', 'text/plain'].includes(file['type'])) { } else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
console.log(file); console.log(file);
// const hash = (await calculateSHA256(file)).substring(0, 63);
const res = await uploadDocToVectorDB(localStorage.token, '', file); const res = await uploadDocToVectorDB(localStorage.token, '', file);
if (res) { if (res) {
...@@ -241,9 +240,8 @@ ...@@ -241,9 +240,8 @@
const file = inputFiles[0]; const file = inputFiles[0];
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
reader.readAsDataURL(file); reader.readAsDataURL(file);
} else if (['application/pdf', 'text/plain'].includes(file['type'])) { } else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
console.log(file); console.log(file);
// const hash = (await calculateSHA256(file)).substring(0, 63);
const res = await uploadDocToVectorDB(localStorage.token, '', file); const res = await uploadDocToVectorDB(localStorage.token, '', file);
if (res) { if (res) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment