Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
1fa55b76
Commit
1fa55b76
authored
Jun 15, 2025
by
myhloli
Browse files
Merge remote-tracking branch 'origin/dev' into dev
parents
98b8c4a9
f1997b49
Changes
24
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1290 additions
and
0 deletions
+1290
-0
projects/mcp/src/mineru/examples.py
projects/mcp/src/mineru/examples.py
+76
-0
projects/mcp/src/mineru/language.py
projects/mcp/src/mineru/language.py
+106
-0
projects/mcp/src/mineru/server.py
projects/mcp/src/mineru/server.py
+1060
-0
signatures/version1/cla.json
signatures/version1/cla.json
+48
-0
No files found.
projects/mcp/src/mineru/examples.py
0 → 100644
View file @
1fa55b76
"""演示如何使用 MinerU File转Markdown客户端的示例。"""
import
os
import
asyncio
from
mcp.client
import
MCPClient
async
def
convert_file_url_example
():
"""从 URL 转换 File 的示例。"""
client
=
MCPClient
(
"http://localhost:8000"
)
# 转换单个 File URL
result
=
await
client
.
call
(
"convert_file_url"
,
url
=
"https://example.com/sample.pdf"
,
enable_ocr
=
True
)
print
(
f
"转换结果:
{
result
}
"
)
# 转换多个 File URL
urls
=
"""
https://example.com/doc1.pdf
https://example.com/doc2.pdf
"""
result
=
await
client
.
call
(
"convert_file_url"
,
url
=
urls
,
enable_ocr
=
True
)
print
(
f
"多个转换结果:
{
result
}
"
)
async
def
convert_file_file_example
():
"""转换本地 File 文件的示例。"""
client
=
MCPClient
(
"http://localhost:8000"
)
# 获取测试 File 的绝对路径
script_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
project_root
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
script_dir
)))
test_file_path
=
os
.
path
.
join
(
project_root
,
"test_files"
,
"test.pdf"
)
# 转换单个 File 文件
result
=
await
client
.
call
(
"convert_file_file"
,
file_path
=
test_file_path
,
enable_ocr
=
True
)
print
(
f
"文件转换结果:
{
result
}
"
)
async
def
get_api_status_example
():
"""获取 API 状态的示例。"""
client
=
MCPClient
(
"http://localhost:8000"
)
# 获取 API 状态
status
=
await
client
.
get_resource
(
"status://api"
)
print
(
f
"API 状态:
{
status
}
"
)
# 获取使用帮助
help_text
=
await
client
.
get_resource
(
"help://usage"
)
print
(
f
"使用帮助:
{
help_text
[:
100
]
}
..."
)
# 显示前 100 个字符
async
def
main
():
"""运行所有示例。"""
print
(
"运行 File 到 Markdown 转换示例..."
)
# 检查是否设置了 API_KEY
if
not
os
.
environ
.
get
(
"MINERU_API_KEY"
):
print
(
"警告: MINERU_API_KEY 环境变量未设置。"
)
print
(
"使用以下命令设置: export MINERU_API_KEY=your_api_key"
)
print
(
"跳过需要 API 访问的示例..."
)
# 仅获取 API 状态
await
get_api_status_example
()
else
:
# 运行所有示例
await
convert_file_url_example
()
await
convert_file_file_example
()
await
get_api_status_example
()
if
__name__
==
"__main__"
:
asyncio
.
run
(
main
())
projects/mcp/src/mineru/language.py
0 → 100644
View file @
1fa55b76
"""MinerU支持的语言列表。"""
from
typing
import
Dict
,
List
# 支持的语言列表
LANGUAGES
:
List
[
Dict
[
str
,
str
]]
=
[
{
"name"
:
"中文"
,
"description"
:
"Chinese & English"
,
"code"
:
"ch"
},
{
"name"
:
"英文"
,
"description"
:
"English"
,
"code"
:
"en"
},
{
"name"
:
"法文"
,
"description"
:
"French"
,
"code"
:
"fr"
},
{
"name"
:
"德文"
,
"description"
:
"German"
,
"code"
:
"german"
},
{
"name"
:
"日文"
,
"description"
:
"Japanese"
,
"code"
:
"japan"
},
{
"name"
:
"韩文"
,
"description"
:
"Korean"
,
"code"
:
"korean"
},
{
"name"
:
"中文繁体"
,
"description"
:
"Chinese Traditional"
,
"code"
:
"chinese_cht"
},
{
"name"
:
"意大利文"
,
"description"
:
"Italian"
,
"code"
:
"it"
},
{
"name"
:
"西班牙文"
,
"description"
:
"Spanish"
,
"code"
:
"es"
},
{
"name"
:
"葡萄牙文"
,
"description"
:
"Portuguese"
,
"code"
:
"pt"
},
{
"name"
:
"俄罗斯文"
,
"description"
:
"Russian"
,
"code"
:
"ru"
},
{
"name"
:
"阿拉伯文"
,
"description"
:
"Arabic"
,
"code"
:
"ar"
},
{
"name"
:
"印地文"
,
"description"
:
"Hindi"
,
"code"
:
"hi"
},
{
"name"
:
"维吾尔"
,
"description"
:
"Uyghur"
,
"code"
:
"ug"
},
{
"name"
:
"波斯文"
,
"description"
:
"Persian"
,
"code"
:
"fa"
},
{
"name"
:
"乌尔都文"
,
"description"
:
"Urdu"
,
"code"
:
"ur"
},
{
"name"
:
"塞尔维亚文(latin)"
,
"description"
:
"Serbian(latin)"
,
"code"
:
"rs_latin"
},
{
"name"
:
"欧西坦文"
,
"description"
:
"Occitan"
,
"code"
:
"oc"
},
{
"name"
:
"马拉地文"
,
"description"
:
"Marathi"
,
"code"
:
"mr"
},
{
"name"
:
"尼泊尔文"
,
"description"
:
"Nepali"
,
"code"
:
"ne"
},
{
"name"
:
"塞尔维亚文(cyrillic)"
,
"description"
:
"Serbian(cyrillic)"
,
"code"
:
"rs_cyrillic"
,
},
{
"name"
:
"毛利文"
,
"description"
:
"Maori"
,
"code"
:
"mi"
},
{
"name"
:
"马来文"
,
"description"
:
"Malay"
,
"code"
:
"ms"
},
{
"name"
:
"马耳他文"
,
"description"
:
"Maltese"
,
"code"
:
"mt"
},
{
"name"
:
"荷兰文"
,
"description"
:
"Dutch"
,
"code"
:
"nl"
},
{
"name"
:
"挪威文"
,
"description"
:
"Norwegian"
,
"code"
:
"no"
},
{
"name"
:
"波兰文"
,
"description"
:
"Polish"
,
"code"
:
"pl"
},
{
"name"
:
"罗马尼亚文"
,
"description"
:
"Romanian"
,
"code"
:
"ro"
},
{
"name"
:
"斯洛伐克文"
,
"description"
:
"Slovak"
,
"code"
:
"sk"
},
{
"name"
:
"斯洛文尼亚文"
,
"description"
:
"Slovenian"
,
"code"
:
"sl"
},
{
"name"
:
"阿尔巴尼亚文"
,
"description"
:
"Albanian"
,
"code"
:
"sq"
},
{
"name"
:
"瑞典文"
,
"description"
:
"Swedish"
,
"code"
:
"sv"
},
{
"name"
:
"西瓦希里文"
,
"description"
:
"Swahili"
,
"code"
:
"sw"
},
{
"name"
:
"塔加洛文"
,
"description"
:
"Tagalog"
,
"code"
:
"tl"
},
{
"name"
:
"土耳其文"
,
"description"
:
"Turkish"
,
"code"
:
"tr"
},
{
"name"
:
"乌兹别克文"
,
"description"
:
"Uzbek"
,
"code"
:
"uz"
},
{
"name"
:
"越南文"
,
"description"
:
"Vietnamese"
,
"code"
:
"vi"
},
{
"name"
:
"蒙古文"
,
"description"
:
"Mongolian"
,
"code"
:
"mn"
},
{
"name"
:
"车臣文"
,
"description"
:
"Chechen"
,
"code"
:
"che"
},
{
"name"
:
"哈里亚纳语"
,
"description"
:
"Haryanvi"
,
"code"
:
"bgc"
},
{
"name"
:
"保加利亚文"
,
"description"
:
"Bulgarian"
,
"code"
:
"bg"
},
{
"name"
:
"乌克兰文"
,
"description"
:
"Ukranian"
,
"code"
:
"uk"
},
{
"name"
:
"白俄罗斯文"
,
"description"
:
"Belarusian"
,
"code"
:
"be"
},
{
"name"
:
"泰卢固文"
,
"description"
:
"Telugu"
,
"code"
:
"te"
},
{
"name"
:
"阿巴扎文"
,
"description"
:
"Abaza"
,
"code"
:
"abq"
},
{
"name"
:
"泰米尔文"
,
"description"
:
"Tamil"
,
"code"
:
"ta"
},
{
"name"
:
"南非荷兰文"
,
"description"
:
"Afrikaans"
,
"code"
:
"af"
},
{
"name"
:
"阿塞拜疆文"
,
"description"
:
"Azerbaijani"
,
"code"
:
"az"
},
{
"name"
:
"波斯尼亚文"
,
"description"
:
"Bosnian"
,
"code"
:
"bs"
},
{
"name"
:
"捷克文"
,
"description"
:
"Czech"
,
"code"
:
"cs"
},
{
"name"
:
"威尔士文"
,
"description"
:
"Welsh"
,
"code"
:
"cy"
},
{
"name"
:
"丹麦文"
,
"description"
:
"Danish"
,
"code"
:
"da"
},
{
"name"
:
"爱沙尼亚文"
,
"description"
:
"Estonian"
,
"code"
:
"et"
},
{
"name"
:
"爱尔兰文"
,
"description"
:
"Irish"
,
"code"
:
"ga"
},
{
"name"
:
"克罗地亚文"
,
"description"
:
"Croatian"
,
"code"
:
"hr"
},
{
"name"
:
"匈牙利文"
,
"description"
:
"Hungarian"
,
"code"
:
"hu"
},
{
"name"
:
"印尼文"
,
"description"
:
"Indonesian"
,
"code"
:
"id"
},
{
"name"
:
"冰岛文"
,
"description"
:
"Icelandic"
,
"code"
:
"is"
},
{
"name"
:
"库尔德文"
,
"description"
:
"Kurdish"
,
"code"
:
"ku"
},
{
"name"
:
"立陶宛文"
,
"description"
:
"Lithuanian"
,
"code"
:
"lt"
},
{
"name"
:
"拉脱维亚文"
,
"description"
:
"Latvian"
,
"code"
:
"lv"
},
{
"name"
:
"达尔瓦文"
,
"description"
:
"Dargwa"
,
"code"
:
"dar"
},
{
"name"
:
"因古什文"
,
"description"
:
"Ingush"
,
"code"
:
"inh"
},
{
"name"
:
"拉克文"
,
"description"
:
"Lak"
,
"code"
:
"lbe"
},
{
"name"
:
"莱兹甘文"
,
"description"
:
"Lezghian"
,
"code"
:
"lez"
},
{
"name"
:
"塔巴萨兰文"
,
"description"
:
"Tabassaran"
,
"code"
:
"tab"
},
{
"name"
:
"比尔哈文"
,
"description"
:
"Bihari"
,
"code"
:
"bh"
},
{
"name"
:
"迈蒂利文"
,
"description"
:
"Maithili"
,
"code"
:
"mai"
},
{
"name"
:
"昂加文"
,
"description"
:
"Angika"
,
"code"
:
"ang"
},
{
"name"
:
"孟加拉文"
,
"description"
:
"Bhojpuri"
,
"code"
:
"bho"
},
{
"name"
:
"摩揭陀文"
,
"description"
:
"Magahi"
,
"code"
:
"mah"
},
{
"name"
:
"那格浦尔文"
,
"description"
:
"Nagpur"
,
"code"
:
"sck"
},
{
"name"
:
"尼瓦尔文"
,
"description"
:
"Newari"
,
"code"
:
"new"
},
{
"name"
:
"保加利亚文"
,
"description"
:
"Goan Konkani"
,
"code"
:
"gom"
},
{
"name"
:
"梵文"
,
"description"
:
"Sanskrit"
,
"code"
:
"sa"
},
{
"name"
:
"阿瓦尔文"
,
"description"
:
"Avar"
,
"code"
:
"ava"
},
{
"name"
:
"阿瓦尔文"
,
"description"
:
"Avar"
,
"code"
:
"ava"
},
{
"name"
:
"阿迪赫文"
,
"description"
:
"Adyghe"
,
"code"
:
"ady"
},
{
"name"
:
"巴利文"
,
"description"
:
"Pali"
,
"code"
:
"pi"
},
{
"name"
:
"拉丁文"
,
"description"
:
"Latin"
,
"code"
:
"la"
},
]
# 构建语言代码到语言信息的映射字典,便于快速查找
LANGUAGES_DICT
:
Dict
[
str
,
Dict
[
str
,
str
]]
=
{
lang
[
"code"
]:
lang
for
lang
in
LANGUAGES
}
def
get_language_list
()
->
List
[
Dict
[
str
,
str
]]:
"""获取所有支持的语言列表。"""
return
LANGUAGES
def
get_language_by_code
(
code
:
str
)
->
Dict
[
str
,
str
]:
"""根据语言代码获取语言信息。"""
return
LANGUAGES_DICT
.
get
(
code
,
{
"name"
:
"未知"
,
"description"
:
"Unknown"
,
"code"
:
code
}
)
projects/mcp/src/mineru/server.py
0 → 100644
View file @
1fa55b76
This diff is collapsed.
Click to expand it.
signatures/version1/cla.json
View file @
1fa55b76
...
...
@@ -263,6 +263,54 @@
"created_at"
:
"2025-04-30T09:25:31Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2411
},
{
"name"
:
"seedclaimer"
,
"id"
:
86753366
,
"comment_id"
:
2916194375
,
"created_at"
:
"2025-05-28T12:50:25Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2536
},
{
"name"
:
"liuzhenghua"
,
"id"
:
11787325
,
"comment_id"
:
2921092605
,
"created_at"
:
"2025-05-30T02:57:07Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2550
},
{
"name"
:
"PairZhu"
,
"id"
:
47098840
,
"comment_id"
:
2938149702
,
"created_at"
:
"2025-06-04T02:39:39Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2566
},
{
"name"
:
"AdrianWangs"
,
"id"
:
72337244
,
"comment_id"
:
2943818300
,
"created_at"
:
"2025-06-05T11:30:42Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2578
},
{
"name"
:
"YanzhenHuang"
,
"id"
:
86364920
,
"comment_id"
:
2968974232
,
"created_at"
:
"2025-06-13T04:17:08Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2620
},
{
"name"
:
"Ar-Hyk"
,
"id"
:
55748412
,
"comment_id"
:
2970512136
,
"created_at"
:
"2025-06-13T14:02:16Z"
,
"repoId"
:
765083837
,
"pullRequestNo"
:
2634
}
]
}
\ No newline at end of file
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment