"git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "d1504a94bfeff72cc4fb6957e1f09da7ad811de6"
Unverified Commit e6da37dd authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #1099 from myhloli/dev

refactor(magic_pdf): remove unused functions and simplify code
parents 79b58a1e 6a22b5ab
import collections
def get_main_text_font(pdf_docs):
font_names = collections.Counter()
for page in pdf_docs:
blocks = page.get_text('dict')['blocks']
if blocks is not None:
for block in blocks:
lines = block.get('lines')
if lines is not None:
for line in lines:
span_font = [(span['font'], len(span['text'])) for span in line['spans'] if
'font' in span and len(span['text']) > 0]
if span_font:
# main_text_font应该用基于字数最多的字体而不是span级别的统计
# font_names.append(font_name for font_name in span_font)
# block_fonts.append(font_name for font_name in span_font)
for font, count in span_font:
font_names[font] += count
main_text_font = font_names.most_common(1)[0][0]
return main_text_font
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment