Commit d41179da authored by myhloli's avatar myhloli
Browse files

feat: update parse_doc function to support backend options and add environment...

feat: update parse_doc function to support backend options and add environment variable instructions
parent 8737ebb2
...@@ -230,4 +230,14 @@ if __name__ == '__main__': ...@@ -230,4 +230,14 @@ if __name__ == '__main__':
for doc_path in Path(pdf_files_dir).glob('*'): for doc_path in Path(pdf_files_dir).glob('*'):
if doc_path.suffix in pdf_suffixes + image_suffixes: if doc_path.suffix in pdf_suffixes + image_suffixes:
doc_path_list.append(doc_path) doc_path_list.append(doc_path)
parse_doc(doc_path_list, output_dir)
"""如果您由于网络问题无法下载模型,可以设置环境变量MINERU_MODEL_SOURCE为modelscope使用免代理仓库下载模型"""
# os.environ['MINERU_MODEL_SOURCE'] = "modelscope"
"""Use pipeline mode if your environment does not support VLM"""
parse_doc(doc_path_list, output_dir, backend="pipeline")
"""To enable VLM mode, change the backend to 'vlm-xxx'"""
# parse_doc(doc_path_list, output_dir, backend="vlm-transformers") # more general.
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-engine") # faster(engine).
# parse_doc(doc_path_list, output_dir, backend="vlm-sglang-client", server_url="http://127.0.0.1:30000") # faster(client).
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment