Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
0fc2c291
"vscode:/vscode.git/clone" did not exist on "e836fc63f34c78487faf88433ceb04eee9fadc8f"
Commit
0fc2c291
authored
Sep 05, 2024
by
houlinfeng
Browse files
feat: mineru_web
parent
aac91094
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
208 additions
and
0 deletions
+208
-0
projects/web_api/web_api/common/__init__.py
projects/web_api/web_api/common/__init__.py
+0
-0
projects/web_api/web_api/common/custom_response.py
projects/web_api/web_api/common/custom_response.py
+23
-0
projects/web_api/web_api/common/error_types.py
projects/web_api/web_api/common/error_types.py
+45
-0
projects/web_api/web_api/common/ext.py
projects/web_api/web_api/common/ext.py
+80
-0
projects/web_api/web_api/common/import_models.py
projects/web_api/web_api/common/import_models.py
+1
-0
projects/web_api/web_api/common/logger.py
projects/web_api/web_api/common/logger.py
+19
-0
projects/web_api/web_api/common/web_hook.py
projects/web_api/web_api/common/web_hook.py
+9
-0
projects/web_api/web_api/config/__init__.py
projects/web_api/web_api/config/__init__.py
+0
-0
projects/web_api/web_api/config/config.yaml
projects/web_api/web_api/config/config.yaml
+31
-0
projects/web_api/web_api/config/mineru_web.db
projects/web_api/web_api/config/mineru_web.db
+0
-0
projects/web_api/web_api/static/__init__.py
projects/web_api/web_api/static/__init__.py
+0
-0
No files found.
projects/web_api/web_api/common/__init__.py
0 → 100644
View file @
0fc2c291
projects/web_api/web_api/common/custom_response.py
0 → 100644
View file @
0fc2c291
from
flask
import
jsonify
class
ResponseCode
:
SUCCESS
=
200
PARAM_WARING
=
400
MESSAGE
=
"success"
def
generate_response
(
data
=
None
,
code
=
ResponseCode
.
SUCCESS
,
msg
=
ResponseCode
.
MESSAGE
,
**
kwargs
):
"""
自定义响应
:param code:状态码
:param data:返回数据
:param msg:返回消息
:param kwargs:
:return:
"""
msg
=
msg
or
'success'
if
code
==
200
else
msg
or
'fail'
success
=
True
if
code
==
200
else
False
res
=
jsonify
(
dict
(
code
=
code
,
success
=
success
,
data
=
data
,
msg
=
msg
,
**
kwargs
))
res
.
status_code
=
200
return
res
projects/web_api/web_api/common/error_types.py
0 → 100644
View file @
0fc2c291
import
json
from
flask
import
request
from
werkzeug.exceptions
import
HTTPException
class
ApiException
(
HTTPException
):
"""API错误基类"""
code
=
500
msg
=
'Sorry, we made a mistake Σ(っ °Д °;)っ'
msgZH
=
""
error_code
=
999
def
__init__
(
self
,
msg
=
None
,
msgZH
=
None
,
code
=
None
,
error_code
=
None
,
headers
=
None
):
if
code
:
self
.
code
=
code
if
msg
:
self
.
msg
=
msg
if
msgZH
:
self
.
msgZH
=
msgZH
if
error_code
:
self
.
error_code
=
error_code
super
(
ApiException
,
self
).
__init__
(
msg
,
None
)
@
staticmethod
def
get_error_url
():
"""获取出错路由和请求方式"""
method
=
request
.
method
full_path
=
str
(
request
.
full_path
)
main_path
=
full_path
.
split
(
'?'
)[
0
]
res
=
method
+
' '
+
main_path
return
res
def
get_body
(
self
,
environ
=
None
,
scope
=
None
):
"""异常返回信息"""
body
=
dict
(
msg
=
self
.
msg
,
error_code
=
self
.
error_code
,
request
=
self
.
get_error_url
()
)
text
=
json
.
dumps
(
body
)
return
text
def
get_headers
(
self
,
environ
=
None
,
scope
=
None
):
"""异常返回格式"""
return
[(
"Content-Type"
,
"application/json"
)]
\ No newline at end of file
projects/web_api/web_api/common/ext.py
0 → 100644
View file @
0fc2c291
import
hashlib
import
mimetypes
def
is_pdf
(
filename
,
file
):
"""
判断文件是否为PDF格式。
:param filename: 文件名
:param file: 文件对象
:return: 如果文件是PDF格式,则返回True,否则返回False
"""
# 检查文件扩展名 https://arxiv.org/pdf/2405.08702 pdf链接可能存在不带扩展名的情况,先注释
if
not
filename
.
endswith
(
'.pdf'
):
return
False
# 检查MIME类型
mime_type
,
_
=
mimetypes
.
guess_type
(
filename
)
print
(
mime_type
)
if
mime_type
!=
'application/pdf'
:
return
False
# 可选:读取文件的前几KB内容并检查MIME类型
# 这一步是可选的,用于更严格的检查
# if not mimetypes.guess_type(filename, strict=False)[0] == 'application/pdf':
# return False
# 检查文件内容
file_start
=
file
.
read
(
5
)
file
.
seek
(
0
)
if
not
file_start
.
startswith
(
b
'%PDF-'
):
return
False
return
True
def
url_is_pdf
(
file
):
"""
判断文件是否为PDF格式。
:param file: 文件对象
:return: 如果文件是PDF格式,则返回True,否则返回False
"""
# 检查文件内容
file_start
=
file
.
read
(
5
)
file
.
seek
(
0
)
if
not
file_start
.
startswith
(
b
'%PDF-'
):
return
False
return
True
def
calculate_file_hash
(
file
,
algorithm
=
'sha256'
):
"""
计算给定文件的哈希值。
:param file: 文件对象
:param algorithm: 哈希算法的名字,如:'sha256', 'md5', 'sha1'等
:return: 文件的哈希值
"""
hash_func
=
getattr
(
hashlib
,
algorithm
)()
block_size
=
65536
# 64KB chunks
# with open(file_path, 'rb') as file:
buffer
=
file
.
read
(
block_size
)
while
len
(
buffer
)
>
0
:
hash_func
.
update
(
buffer
)
buffer
=
file
.
read
(
block_size
)
file
.
seek
(
0
)
return
hash_func
.
hexdigest
()
def
singleton_func
(
cls
):
instance
=
{}
def
_singleton
(
*
args
,
**
kwargs
):
if
cls
not
in
instance
:
instance
[
cls
]
=
cls
(
*
args
,
**
kwargs
)
return
instance
[
cls
]
return
_singleton
projects/web_api/web_api/common/import_models.py
0 → 100644
View file @
0fc2c291
from
api.analysis.models
import
*
\ No newline at end of file
projects/web_api/web_api/common/logger.py
0 → 100644
View file @
0fc2c291
import
os
from
loguru
import
logger
from
pathlib
import
Path
from
datetime
import
datetime
def
setup_log
(
config
):
"""
Setup logging
:param config: config file
:return:
"""
log_path
=
os
.
path
.
join
(
Path
(
__file__
).
parent
.
parent
,
"log"
)
if
not
Path
(
log_path
).
exists
():
Path
(
log_path
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
log_level
=
config
.
get
(
"LOG_LEVEL"
)
log_name
=
f
'log_
{
datetime
.
now
().
strftime
(
"%Y-%m-%d"
)
}
.log'
log_file_path
=
os
.
path
.
join
(
log_path
,
log_name
)
logger
.
add
(
str
(
log_file_path
),
rotation
=
'00:00'
,
encoding
=
'utf-8'
,
level
=
log_level
,
enqueue
=
True
)
projects/web_api/web_api/common/web_hook.py
0 → 100644
View file @
0fc2c291
def
before_request
():
return
None
def
after_request
(
response
):
response
.
headers
.
add
(
'Access-Control-Allow-Origin'
,
'*'
)
response
.
headers
.
add
(
'Access-Control-Allow-Headers'
,
'Content-Type,Authorization'
)
return
response
projects/web_api/web_api/config/__init__.py
0 → 100644
View file @
0fc2c291
projects/web_api/web_api/config/config.yaml
0 → 100644
View file @
0fc2c291
# 基本配置
BaseConfig
:
&base
DEBUG
:
false
PORT
:
5559
LOG_LEVEL
:
"
DEBUG"
SQLALCHEMY_TRACK_MODIFICATIONS
:
true
SQLALCHEMY_DATABASE_URI
:
"
"
PROPAGATE_EXCEPTIONS
:
true
SECRET_KEY
:
"
#$%^&**$##*(*^%%$**((&"
JWT_SECRET_KEY
:
"
#$%^&**$##*(*^%%$**((&"
JWT_ACCESS_TOKEN_EXPIRES
:
3600
PDF_UPLOAD_FOLDER
:
"
upload_pdf"
PDF_ANALYSIS_FOLDER
:
"
analysis_pdf"
# 开发配置
DevelopmentConfig
:
<<
:
*base
database
:
type
:
sqlite
path
:
config/mineru_web.db
# 生产配置
ProductionConfig
:
<<
:
*base
# 测试配置
TestingConfig
:
<<
:
*base
# 当前使用配置
CurrentConfig
:
"
DevelopmentConfig"
projects/web_api/web_api/config/mineru_web.db
0 → 100644
View file @
0fc2c291
File added
projects/web_api/web_api/static/__init__.py
0 → 100644
View file @
0fc2c291
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment