Commit 8bfda730 authored by Marclass's avatar Marclass
Browse files

add excel document support

parent 7eea3ef3
...@@ -23,6 +23,7 @@ from langchain_community.document_loaders import ( ...@@ -23,6 +23,7 @@ from langchain_community.document_loaders import (
UnstructuredMarkdownLoader, UnstructuredMarkdownLoader,
UnstructuredXMLLoader, UnstructuredXMLLoader,
UnstructuredRSTLoader, UnstructuredRSTLoader,
UnstructuredExcelLoader,
) )
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma from langchain_community.vectorstores import Chroma
...@@ -157,6 +158,9 @@ def store_doc( ...@@ -157,6 +158,9 @@ def store_doc(
] ]
docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document" docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
known_doc_ext=["doc","docx"] known_doc_ext=["doc","docx"]
excel_types=["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]
known_excel_ext=["xls", "xlsx"]
file_ext=file.filename.split(".")[-1].lower() file_ext=file.filename.split(".")[-1].lower()
known_type=True known_type=True
...@@ -179,6 +183,8 @@ def store_doc( ...@@ -179,6 +183,8 @@ def store_doc(
loader = Docx2txtLoader(file_path) loader = Docx2txtLoader(file_path)
elif file_ext=="csv": elif file_ext=="csv":
loader = CSVLoader(file_path) loader = CSVLoader(file_path)
elif (file.content_type in excel_types or file_ext in known_excel_ext):
loader = UnstructuredExcelLoader(file_path)
elif file_ext=="rst": elif file_ext=="rst":
loader = UnstructuredRSTLoader(file_path, mode="elements") loader = UnstructuredRSTLoader(file_path, mode="elements")
elif file_ext in text_xml: elif file_ext in text_xml:
......
...@@ -29,3 +29,8 @@ PyJWT ...@@ -29,3 +29,8 @@ PyJWT
pyjwt[crypto] pyjwt[crypto]
black black
pandas
openpyxl
pyxlsb
xlrd
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment