Unverified Commit c479245e authored by icecraft's avatar icecraft Committed by GitHub
Browse files

feat: manager docs with sphinx (#737)



* feat: manager docs with sphinx

* fix: readthedocs configure

* feat: support multiple language

* fix: add .readthedocs.yaml

* fix: requirments.txt path

---------
Co-authored-by: default avataricecraft <xurui1@pjlab.org.cn>
parent b9631f30
import json
import os
import requests
import json
from modelscope import snapshot_download
......@@ -27,13 +28,13 @@ def download_and_modify_json(url, local_filename, modifications):
if __name__ == '__main__':
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
layoutreader_model_dir = snapshot_download('ppaanngggg/layoutreader')
model_dir = model_dir + "/models"
print(f"model_dir is: {model_dir}")
print(f"layoutreader_model_dir is: {layoutreader_model_dir}")
model_dir = model_dir + '/models'
print(f'model_dir is: {model_dir}')
print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
json_url = 'https://gitee.com/myhloli/MinerU/raw/master/magic-pdf.template.json'
config_file_name = "magic-pdf.json"
home_dir = os.path.expanduser("~")
config_file_name = 'magic-pdf.json'
home_dir = os.path.expanduser('~')
config_file = os.path.join(home_dir, config_file_name)
json_mods = {
......@@ -42,5 +43,4 @@ if __name__ == '__main__':
}
download_and_modify_json(json_url, config_file, json_mods)
print(f"The configuration file has been configured successfully, the path is: {config_file}")
print(f'The configuration file has been configured successfully, the path is: {config_file}')
import json
import os
import requests
import json
from huggingface_hub import snapshot_download
......@@ -27,13 +28,13 @@ def download_and_modify_json(url, local_filename, modifications):
if __name__ == '__main__':
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
layoutreader_model_dir = snapshot_download('hantian/layoutreader')
model_dir = model_dir + "/models"
print(f"model_dir is: {model_dir}")
print(f"layoutreader_model_dir is: {layoutreader_model_dir}")
model_dir = model_dir + '/models'
print(f'model_dir is: {model_dir}')
print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
config_file_name = "magic-pdf.json"
home_dir = os.path.expanduser("~")
config_file_name = 'magic-pdf.json'
home_dir = os.path.expanduser('~')
config_file = os.path.join(home_dir, config_file_name)
json_mods = {
......@@ -42,5 +43,4 @@ if __name__ == '__main__':
}
download_and_modify_json(json_url, config_file, json_mods)
print(f"The configuration file has been configured successfully, the path is: {config_file}")
print(f'The configuration file has been configured successfully, the path is: {config_file}')
Model downloads are divided into initial downloads and updates to the model directory. Please refer to the corresponding documentation for instructions on how to proceed.
# Initial download of model files
### 1. Download the Model from Hugging Face
Use a Python Script to Download Model Files from Hugging Face
```bash
pip install huggingface_hub
wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py
python download_models_hf.py
```
The Python script will automatically download the model files and configure the model directory in the configuration file.
The configuration file can be found in the user directory, with the filename `magic-pdf.json`.
......@@ -18,7 +20,7 @@ The configuration file can be found in the user directory, with the filename `ma
## 1. Models downloaded via Git LFS
>Due to feedback from some users that downloading model files using git lfs was incomplete or resulted in corrupted model files, this method is no longer recommended.
> Due to feedback from some users that downloading model files using git lfs was incomplete or resulted in corrupted model files, this method is no longer recommended.
If you previously downloaded model files via git lfs, you can navigate to the previous download directory and use the `git pull` command to update the model.
......
......@@ -25,23 +25,23 @@ python download_models.py
python脚本会自动下载模型文件并配置好配置文件中的模型目录
配置文件可以在用户目录中找到,文件名为`magic-pdf.json`
> windows的用户目录为 "C:\\Users\\用户名", linux用户目录为 "/home/用户名", macOS用户目录为 "/Users/用户名"
> windows的用户目录为 "C:\\Users\\用户名", linux用户目录为 "/home/用户名", macOS用户目录为 "/Users/用户名"
# 此前下载过模型,如何更新
## 1. 通过git lfs下载过模型
>由于部分用户反馈通过git lfs下载模型文件遇到下载不全和模型文件损坏情况,现已不推荐使用该方式下载。
> 由于部分用户反馈通过git lfs下载模型文件遇到下载不全和模型文件损坏情况,现已不推荐使用该方式下载。
如此前通过 git lfs 下载过模型文件,可以进入到之前的下载目录中,通过`git pull`命令更新模型。
> 0.9.x及以后版本由于新增layout排序模型,且该模型和此前的模型不在同一仓库,不能通过`git pull`命令更新,需要单独下载。
>
>```
>from modelscope import snapshot_download
>snapshot_download('ppaanngggg/layoutreader')
>```
>
> ```
> from modelscope import snapshot_download
> snapshot_download('ppaanngggg/layoutreader')
> ```
## 2. 通过 Hugging Face 或 Model Scope 下载过模型
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment