Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhougaofeng
magic_pdf
Commits
78340ac2
Commit
78340ac2
authored
Oct 25, 2024
by
zhougaofeng
Browse files
Update doc_analyze_by_custom_model.py
parent
8b230796
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
5 deletions
+6
-5
magic_pdf/model/doc_analyze_by_custom_model.py
magic_pdf/model/doc_analyze_by_custom_model.py
+6
-5
No files found.
magic_pdf/model/doc_analyze_by_custom_model.py
View file @
78340ac2
...
@@ -95,7 +95,7 @@ def custom_model_init(ocr: bool = False, show_log: bool = False):
...
@@ -95,7 +95,7 @@ def custom_model_init(ocr: bool = False, show_log: bool = False):
else
:
else
:
logger
.
error
(
"Not allow model_name!"
)
logger
.
error
(
"Not allow model_name!"
)
exit
(
1
)
exit
(
1
)
model_init_cost
=
time
.
time
()
-
model_init_start
model_init_cost
=
round
(
time
.
time
()
-
model_init_start
,
2
)
logger
.
info
(
f
"model init cost:
{
model_init_cost
}
"
)
logger
.
info
(
f
"model init cost:
{
model_init_cost
}
"
)
else
:
else
:
logger
.
error
(
"use_inside_model is False, not allow to use inside model"
)
logger
.
error
(
"use_inside_model is False, not allow to use inside model"
)
...
@@ -104,11 +104,12 @@ def custom_model_init(ocr: bool = False, show_log: bool = False):
...
@@ -104,11 +104,12 @@ def custom_model_init(ocr: bool = False, show_log: bool = False):
return
custom_model
return
custom_model
def
doc_analyze
(
pdf_bytes
:
bytes
,
ocr
:
bool
=
False
,
show_log
:
bool
=
False
,
def
doc_analyze
(
model
,
pdf_bytes
:
bytes
,
ocr
:
bool
=
False
,
show_log
:
bool
=
False
,
start_page_id
=
0
,
end_page_id
=
None
):
start_page_id
=
0
,
end_page_id
=
None
):
model_manager
=
ModelSingleton
()
# model_manager = ModelSingleton()
custom_model
=
model_manager
.
get_model
(
ocr
,
show_log
)
# custom_model = model_manager.get_model(ocr, show_log)
custom_model
=
model
images
=
load_images_from_pdf
(
pdf_bytes
)
images
=
load_images_from_pdf
(
pdf_bytes
)
...
@@ -133,7 +134,7 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
...
@@ -133,7 +134,7 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
page_info
=
{
"page_no"
:
index
,
"height"
:
page_height
,
"width"
:
page_width
}
page_info
=
{
"page_no"
:
index
,
"height"
:
page_height
,
"width"
:
page_width
}
page_dict
=
{
"layout_dets"
:
result
,
"page_info"
:
page_info
}
page_dict
=
{
"layout_dets"
:
result
,
"page_info"
:
page_info
}
model_json
.
append
(
page_dict
)
model_json
.
append
(
page_dict
)
doc_analyze_cost
=
time
.
time
()
-
doc_analyze_start
doc_analyze_cost
=
round
(
time
.
time
()
-
doc_analyze_start
,
2
)
logger
.
info
(
f
"文件分析提取截图共耗时:
{
doc_analyze_cost
}
"
)
logger
.
info
(
f
"文件分析提取截图共耗时:
{
doc_analyze_cost
}
"
)
# logger.info(f'model_json:\n{model_json}')
# logger.info(f'model_json:\n{model_json}')
return
model_json
return
model_json
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment