Unverified Commit ece7f8d5 authored by Kaiwen Liu's avatar Kaiwen Liu Committed by GitHub
Browse files

Merge pull request #6 from opendatalab/dev

Dev
parents 98362a6e 702b6ac9
from flask import render_template, Response
from flask_restful import Resource
class ReactAppView(Resource):
def get(self):
# 创建自定义的响应对象
rendered_template = render_template('index.html')
response = Response(rendered_template, mimetype='text/html')
return response
......@@ -11,8 +11,8 @@ def is_pdf(filename, file):
:return: 如果文件是PDF格式,则返回True,否则返回False
"""
# 检查文件扩展名 https://arxiv.org/pdf/2405.08702 pdf链接可能存在不带扩展名的情况,先注释
if not filename.endswith('.pdf'):
return False
# if not filename.endswith('.pdf'):
# return False
# 检查MIME类型
mime_type, _ = mimetypes.guess_type(filename)
......
......@@ -11,6 +11,8 @@ BaseConfig: &base
JWT_ACCESS_TOKEN_EXPIRES: 3600
PDF_UPLOAD_FOLDER: "upload_pdf"
PDF_ANALYSIS_FOLDER: "analysis_pdf"
# 前端项目打包的路径
REACT_APP_DIST: "../../web/dist/"
# 开发配置
DevelopmentConfig:
......
......@@ -8,7 +8,7 @@ fast-langdetect==0.2.0
wordninja>=2.0.0
scikit-learn>=1.0.2
pdfminer.six==20231228
unimernet==0.1.6
unimernet==0.2.1
matplotlib
ultralytics
paddleocr==2.7.3
......
......@@ -17,3 +17,4 @@ pyopenssl==24.0.0
struct-eqtable==0.1.0
pytest-cov
beautifulsoup4
coverage
\ No newline at end of file
......@@ -9,4 +9,6 @@ pydantic>=2.7.2,<2.8.0
PyMuPDF>=1.24.9
scikit-learn>=1.0.2
wordninja>=2.0.0
torch>=2.2.2,<=2.3.1
transformers
# The requirements.txt must ensure that only necessary external dependencies are introduced. If there are new dependencies to add, please contact the project administrator.
......@@ -36,7 +36,7 @@ if __name__ == '__main__':
"paddlepaddle==3.0.0b1;platform_system=='Linux'",
"paddlepaddle==2.6.1;platform_system=='Windows' or platform_system=='Darwin'",
],
"full": ["unimernet==0.1.6", # 0.1.6版本大幅裁剪依赖包范围,推荐使用此版本
"full": ["unimernet==0.2.1", # unimernet升级0.2.1
"matplotlib<=3.9.0;platform_system=='Windows'", # 3.9.1及之后不提供windows的预编译包,避免一些没有编译环境的windows设备安装失败
"matplotlib;platform_system=='Linux' or platform_system=='Darwin'", # linux 和 macos 不应限制matplotlib的最高版本,以避免无法更新导致的一些bug
"ultralytics", # yolov8,公式检测
......
"""
clean coverage
"""
import os
import shutil
def delete_file(path):
"""delete file."""
if not os.path.exists(path):
if os.path.isfile(path):
try:
os.remove(path)
print(f"File '{path}' deleted.")
except TypeError as e:
print(f"Error deleting file '{path}': {e}")
elif os.path.isdir(path):
try:
shutil.rmtree(path)
print(f"Directory '{path}' and its contents deleted.")
except TypeError as e:
print(f"Error deleting directory '{path}': {e}")
if __name__ == "__main__":
delete_file("htmlcov/")
#delete_file(".coverage")
......@@ -2,7 +2,7 @@
get cov
"""
from bs4 import BeautifulSoup
import shutil
def get_covrage():
"""get covrage"""
# 发送请求获取网页内容
......
#!/bin/bash
# 定义最大重试次数
max_retries=5
retry_count=0
while true; do
# prepare env
source activate MinerU
pip install -r requirements-qa.txt
pip install magic-pdf[full]==0.7.0b1 --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
#python -m pip install -r requirements-qa.txt
python -m pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "test.sh 成功执行!"
......@@ -21,6 +19,6 @@ while true; do
exit 1
fi
echo "test.sh 执行失败 (退出码: $exit_code)。尝试第 $retry_count 次重试..."
sleep 5 # 等待 5 秒后重试
sleep 5
fi
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment