Commit 0fc2c291 authored by houlinfeng's avatar houlinfeng
Browse files

feat: mineru_web

parent aac91094
from flask import jsonify
class ResponseCode:
SUCCESS = 200
PARAM_WARING = 400
MESSAGE = "success"
def generate_response(data=None, code=ResponseCode.SUCCESS, msg=ResponseCode.MESSAGE, **kwargs):
"""
自定义响应
:param code:状态码
:param data:返回数据
:param msg:返回消息
:param kwargs:
:return:
"""
msg = msg or 'success' if code == 200 else msg or 'fail'
success = True if code == 200 else False
res = jsonify(dict(code=code, success=success, data=data, msg=msg, **kwargs))
res.status_code = 200
return res
import json
from flask import request
from werkzeug.exceptions import HTTPException
class ApiException(HTTPException):
"""API错误基类"""
code = 500
msg = 'Sorry, we made a mistake Σ(っ °Д °;)っ'
msgZH = ""
error_code = 999
def __init__(self, msg=None, msgZH=None, code=None, error_code=None, headers=None):
if code:
self.code = code
if msg:
self.msg = msg
if msgZH:
self.msgZH = msgZH
if error_code:
self.error_code = error_code
super(ApiException, self).__init__(msg, None)
@staticmethod
def get_error_url():
"""获取出错路由和请求方式"""
method = request.method
full_path = str(request.full_path)
main_path = full_path.split('?')[0]
res = method + ' ' + main_path
return res
def get_body(self, environ=None, scope=None):
"""异常返回信息"""
body = dict(
msg=self.msg,
error_code=self.error_code,
request=self.get_error_url()
)
text = json.dumps(body)
return text
def get_headers(self, environ=None, scope=None):
"""异常返回格式"""
return [("Content-Type", "application/json")]
\ No newline at end of file
import hashlib
import mimetypes
def is_pdf(filename, file):
"""
判断文件是否为PDF格式。
:param filename: 文件名
:param file: 文件对象
:return: 如果文件是PDF格式,则返回True,否则返回False
"""
# 检查文件扩展名 https://arxiv.org/pdf/2405.08702 pdf链接可能存在不带扩展名的情况,先注释
if not filename.endswith('.pdf'):
return False
# 检查MIME类型
mime_type, _ = mimetypes.guess_type(filename)
print(mime_type)
if mime_type != 'application/pdf':
return False
# 可选:读取文件的前几KB内容并检查MIME类型
# 这一步是可选的,用于更严格的检查
# if not mimetypes.guess_type(filename, strict=False)[0] == 'application/pdf':
# return False
# 检查文件内容
file_start = file.read(5)
file.seek(0)
if not file_start.startswith(b'%PDF-'):
return False
return True
def url_is_pdf(file):
"""
判断文件是否为PDF格式。
:param file: 文件对象
:return: 如果文件是PDF格式,则返回True,否则返回False
"""
# 检查文件内容
file_start = file.read(5)
file.seek(0)
if not file_start.startswith(b'%PDF-'):
return False
return True
def calculate_file_hash(file, algorithm='sha256'):
"""
计算给定文件的哈希值。
:param file: 文件对象
:param algorithm: 哈希算法的名字,如:'sha256', 'md5', 'sha1'等
:return: 文件的哈希值
"""
hash_func = getattr(hashlib, algorithm)()
block_size = 65536 # 64KB chunks
# with open(file_path, 'rb') as file:
buffer = file.read(block_size)
while len(buffer) > 0:
hash_func.update(buffer)
buffer = file.read(block_size)
file.seek(0)
return hash_func.hexdigest()
def singleton_func(cls):
instance = {}
def _singleton(*args, **kwargs):
if cls not in instance:
instance[cls] = cls(*args, **kwargs)
return instance[cls]
return _singleton
from api.analysis.models import *
\ No newline at end of file
import os
from loguru import logger
from pathlib import Path
from datetime import datetime
def setup_log(config):
"""
Setup logging
:param config: config file
:return:
"""
log_path = os.path.join(Path(__file__).parent.parent, "log")
if not Path(log_path).exists():
Path(log_path).mkdir(parents=True, exist_ok=True)
log_level = config.get("LOG_LEVEL")
log_name = f'log_{datetime.now().strftime("%Y-%m-%d")}.log'
log_file_path = os.path.join(log_path, log_name)
logger.add(str(log_file_path), rotation='00:00', encoding='utf-8', level=log_level, enqueue=True)
def before_request():
return None
def after_request(response):
response.headers.add('Access-Control-Allow-Origin', '*')
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
return response
# 基本配置
BaseConfig: &base
DEBUG: false
PORT: 5559
LOG_LEVEL: "DEBUG"
SQLALCHEMY_TRACK_MODIFICATIONS: true
SQLALCHEMY_DATABASE_URI: ""
PROPAGATE_EXCEPTIONS: true
SECRET_KEY: "#$%^&**$##*(*^%%$**((&"
JWT_SECRET_KEY: "#$%^&**$##*(*^%%$**((&"
JWT_ACCESS_TOKEN_EXPIRES: 3600
PDF_UPLOAD_FOLDER: "upload_pdf"
PDF_ANALYSIS_FOLDER: "analysis_pdf"
# 开发配置
DevelopmentConfig:
<<: *base
database:
type: sqlite
path: config/mineru_web.db
# 生产配置
ProductionConfig:
<<: *base
# 测试配置
TestingConfig:
<<: *base
# 当前使用配置
CurrentConfig: "DevelopmentConfig"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment