Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
8ca9eb32
Unverified
Commit
8ca9eb32
authored
Nov 11, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Nov 11, 2024
Browse files
Delete docs/download_models_hf.py
parent
07f9fd00
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
70 deletions
+0
-70
docs/download_models_hf.py
docs/download_models_hf.py
+0
-70
No files found.
docs/download_models_hf.py
deleted
100644 → 0
View file @
07f9fd00
import
json
import
os
import
requests
from
huggingface_hub
import
snapshot_download
def
download_json
(
url
):
# 下载JSON文件
response
=
requests
.
get
(
url
)
response
.
raise_for_status
()
# 检查请求是否成功
return
response
.
json
()
def
download_and_modify_json
(
url
,
local_filename
,
modifications
):
if
os
.
path
.
exists
(
local_filename
):
data
=
json
.
load
(
open
(
local_filename
))
config_version
=
data
.
get
(
'config_version'
,
'0.0.0'
)
if
config_version
<
'1.0.0'
:
data
=
download_json
(
url
)
else
:
data
=
download_json
(
url
)
# 修改内容
for
key
,
value
in
modifications
.
items
():
data
[
key
]
=
value
# 保存修改后的内容
with
open
(
local_filename
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
json
.
dump
(
data
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
if
__name__
==
'__main__'
:
mineru_patterns
=
[
"models/Layout/LayoutLMv3/*"
,
"models/Layout/YOLO/*"
,
"models/MFD/YOLO/*"
,
"models/MFR/unimernet_small/*"
,
"models/TabRec/TableMaster/*"
,
"models/TabRec/StructEqTable/*"
,
]
model_dir
=
snapshot_download
(
'opendatalab/PDF-Extract-Kit-1.0'
,
allow_patterns
=
mineru_patterns
)
layoutreader_pattern
=
[
"*.json"
,
"*.safetensors"
,
]
layoutreader_model_dir
=
snapshot_download
(
'hantian/layoutreader'
,
allow_patterns
=
layoutreader_pattern
)
model_dir
=
model_dir
+
'/models'
print
(
f
'model_dir is:
{
model_dir
}
'
)
print
(
f
'layoutreader_model_dir is:
{
layoutreader_model_dir
}
'
)
json_url
=
'https://github.com/opendatalab/MinerU/raw/dev/magic-pdf.template.json'
config_file_name
=
'magic-pdf.json'
home_dir
=
os
.
path
.
expanduser
(
'~'
)
config_file
=
os
.
path
.
join
(
home_dir
,
config_file_name
)
json_mods
=
{
'models-dir'
:
model_dir
,
'layoutreader-model-dir'
:
layoutreader_model_dir
,
}
download_and_modify_json
(
json_url
,
config_file
,
json_mods
)
print
(
f
'The configuration file has been configured successfully, the path is:
{
config_file
}
'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment