From cbba27b4f5b5eff17e4bd9472f0bc09b7357b488 Mon Sep 17 00:00:00 2001 From: myhloli Date: Wed, 28 May 2025 20:17:26 +0800 Subject: [PATCH] refactor: reorganize project structure and update import paths --- mineru/__init__.py | 1 + mineru/backend/pipeline/batch_analyze.py | 214 + .../pipeline/doc_analyze_by_custom_model.py | 235 + mineru/backend/pipeline/magic_model.py | 771 + mineru/backend/pipeline/model_init.py | 190 + mineru/backend/pipeline/model_list.py | 6 + mineru/backend/vlm/token_to_middle_json.py | 10 +- mineru/backend/vlm/vlm_analyze.py | 2 +- mineru/model/__init__.py | 1 + mineru/model/layout/__init__.py | 1 + mineru/model/layout/doclayout_yolo.py | 64 + mineru/model/mfd/__init__.py | 1 + mineru/model/mfd/yolo_v8.py | 33 + mineru/model/mfr/__init__.py | 1 + mineru/model/mfr/unimernet/Unimernet.py | 135 + mineru/model/mfr/unimernet/__init__.py | 0 .../mfr/unimernet/unimernet_hf/__init__.py | 13 + .../unimernet_hf/modeling_unimernet.py | 490 + .../unimernet_hf/unimer_mbart/__init__.py | 8 + .../configuration_unimer_mbart.py | 163 + .../unimer_mbart/modeling_unimer_mbart.py | 2351 ++ .../unimer_mbart/tokenization_unimer_mbart.py | 0 .../unimernet_hf/unimer_swin/__init__.py | 9 + .../unimer_swin/configuration_unimer_swin.py | 132 + .../image_processing_unimer_swin.py | 132 + .../unimer_swin/modeling_unimer_swin.py | 1084 + mineru/model/ocr/__init__.py | 1 + .../model/ocr/paddleocr2pytorch/__init__.py | 1 + .../ocr/paddleocr2pytorch/pytorch_paddle.py | 199 + .../paddleocr2pytorch/pytorchocr/__init__.py | 0 .../pytorchocr/base_ocr_v20.py | 39 + .../pytorchocr/data/__init__.py | 8 + .../pytorchocr/data/imaug/__init__.py | 48 + .../pytorchocr/data/imaug/operators.py | 418 + .../pytorchocr/modeling/__init__.py | 0 .../modeling/architectures/__init__.py | 25 + .../modeling/architectures/base_model.py | 105 + .../pytorchocr/modeling/backbones/__init__.py | 63 + .../modeling/backbones/det_mobilenet_v3.py | 269 + .../modeling/backbones/rec_hgnet.py | 290 + .../modeling/backbones/rec_lcnetv3.py | 516 + .../modeling/backbones/rec_mobilenet_v3.py | 136 + .../modeling/backbones/rec_mv1_enhance.py | 234 + .../modeling/backbones/rec_pphgnetv2.py | 810 + .../modeling/backbones/rec_svtrnet.py | 638 + .../pytorchocr/modeling/common.py | 76 + .../pytorchocr/modeling/heads/__init__.py | 43 + .../pytorchocr/modeling/heads/cls_head.py | 23 + .../pytorchocr/modeling/heads/det_db_head.py | 109 + .../pytorchocr/modeling/heads/rec_ctc_head.py | 54 + .../modeling/heads/rec_multi_head.py | 58 + .../pytorchocr/modeling/necks/__init__.py | 29 + .../pytorchocr/modeling/necks/db_fpn.py | 456 + .../pytorchocr/modeling/necks/intracl.py | 117 + .../pytorchocr/modeling/necks/rnn.py | 241 + .../pytorchocr/postprocess/__init__.py | 33 + .../pytorchocr/postprocess/cls_postprocess.py | 20 + .../pytorchocr/postprocess/db_postprocess.py | 179 + .../pytorchocr/postprocess/rec_postprocess.py | 690 + .../pytorchocr/utils/__init__.py | 0 .../utils/resources/arch_config.yaml | 476 + .../utils/resources/dict/arabic_dict.txt | 162 + .../utils/resources/dict/chinese_cht_dict.txt | 8421 +++++++ .../utils/resources/dict/cyrillic_dict.txt | 163 + .../utils/resources/dict/devanagari_dict.txt | 167 + .../utils/resources/dict/en_dict.txt | 95 + .../utils/resources/dict/japan_dict.txt | 4399 ++++ .../utils/resources/dict/ka_dict.txt | 153 + .../utils/resources/dict/korean_dict.txt | 3688 ++++ .../utils/resources/dict/latin_dict.txt | 185 + .../utils/resources/dict/ppocr_keys_v1.txt | 6623 ++++++ .../utils/resources/dict/ppocrv4_doc_dict.txt | 15629 +++++++++++++ .../utils/resources/dict/ppocrv5_dict.txt | 18383 ++++++++++++++++ .../utils/resources/dict/ta_dict.txt | 128 + .../utils/resources/dict/te_dict.txt | 151 + .../utils/resources/models_config.yml | 65 + .../ocr/paddleocr2pytorch/tools/__init__.py | 1 + .../paddleocr2pytorch/tools/infer/__init__.py | 1 + .../tools/infer/predict_cls.py | 106 + .../tools/infer/predict_det.py | 217 + .../tools/infer/predict_rec.py | 446 + .../tools/infer/predict_system.py | 104 + .../tools/infer/pytorchocr_utility.py | 227 + mineru/model/reading_order/__init__.py | 1 + mineru/model/reading_order/layout_reader.py | 125 + mineru/model/reading_order/xycut.py | 242 + mineru/model/table/__init__.py | 1 + mineru/model/table/rapid_table.py | 79 + mineru/{libs => utils}/boxbase.py | 0 mineru/{libs => utils}/cut_image.py | 0 mineru/{libs => utils}/draw_bbox.py | 0 mineru/{libs => utils}/enum_class.py | 0 mineru/{libs => utils}/hash_utils.py | 0 mineru/{libs => utils}/magic_model.py | 0 mineru/utils/model_utils.py | 323 + mineru/utils/ocr_utils.py | 401 + mineru/{libs => utils}/pdf_image_tools.py | 4 +- mineru/{libs => }/version.py | 0 98 files changed, 73133 insertions(+), 8 deletions(-) create mode 100644 mineru/__init__.py create mode 100644 mineru/backend/pipeline/batch_analyze.py create mode 100644 mineru/backend/pipeline/doc_analyze_by_custom_model.py create mode 100644 mineru/backend/pipeline/magic_model.py create mode 100644 mineru/backend/pipeline/model_init.py create mode 100644 mineru/backend/pipeline/model_list.py create mode 100644 mineru/model/layout/__init__.py create mode 100644 mineru/model/layout/doclayout_yolo.py create mode 100644 mineru/model/mfd/__init__.py create mode 100644 mineru/model/mfd/yolo_v8.py create mode 100644 mineru/model/mfr/__init__.py create mode 100644 mineru/model/mfr/unimernet/Unimernet.py create mode 100644 mineru/model/mfr/unimernet/__init__.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/__init__.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py create mode 100644 mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py create mode 100644 mineru/model/ocr/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/__init__.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt create mode 100644 mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml create mode 100644 mineru/model/ocr/paddleocr2pytorch/tools/__init__.py create mode 100644 mineru/model/ocr/paddleocr2pytorch/tools/infer/__init__.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_cls.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_det.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_rec.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_system.py create mode 100755 mineru/model/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py create mode 100644 mineru/model/reading_order/__init__.py create mode 100644 mineru/model/reading_order/layout_reader.py create mode 100644 mineru/model/reading_order/xycut.py create mode 100644 mineru/model/table/__init__.py create mode 100644 mineru/model/table/rapid_table.py rename mineru/{libs => utils}/boxbase.py (100%) rename mineru/{libs => utils}/cut_image.py (100%) rename mineru/{libs => utils}/draw_bbox.py (100%) rename mineru/{libs => utils}/enum_class.py (100%) rename mineru/{libs => utils}/hash_utils.py (100%) rename mineru/{libs => utils}/magic_model.py (100%) create mode 100644 mineru/utils/model_utils.py create mode 100644 mineru/utils/ocr_utils.py rename mineru/{libs => utils}/pdf_image_tools.py (95%) rename mineru/{libs => }/version.py (100%) diff --git a/mineru/__init__.py b/mineru/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/backend/pipeline/batch_analyze.py b/mineru/backend/pipeline/batch_analyze.py new file mode 100644 index 00000000..eb56d434 --- /dev/null +++ b/mineru/backend/pipeline/batch_analyze.py @@ -0,0 +1,214 @@ +import cv2 +from loguru import logger +from tqdm import tqdm + +from .model_init import AtomModelSingleton +from ...utils.model_utils import crop_img, get_res_list_from_layout_res, get_coords_and_area +from ...utils.ocr_utils import get_adjusted_mfdetrec_res, get_ocr_result_list + +YOLO_LAYOUT_BASE_BATCH_SIZE = 1 +MFD_BASE_BATCH_SIZE = 1 +MFR_BASE_BATCH_SIZE = 16 + + +class BatchAnalyze: + def __init__(self, model_manager, batch_ratio: int, formula_enable, table_enable): + self.batch_ratio = batch_ratio + self.formula_enable = formula_enable + self.table_enable = table_enable + self.model_manager = model_manager + + def __call__(self, images_with_extra_info: list) -> list: + if len(images_with_extra_info) == 0: + return [] + + images_layout_res = [] + + self.model = self.model_manager.get_model( + lang=None, + formula_enable=self.formula_enable, + table_enable=self.table_enable, + ) + atom_model_manager = AtomModelSingleton() + + images = [image for image, _, _ in images_with_extra_info] + + # doclayout_yolo + layout_images = [] + for image_index, image in enumerate(images): + layout_images.append(image) + + + images_layout_res += self.model.layout_model.batch_predict( + layout_images, YOLO_LAYOUT_BASE_BATCH_SIZE + ) + + if self.formula_enable: + # 公式检测 + images_mfd_res = self.model.mfd_model.batch_predict( + images, MFD_BASE_BATCH_SIZE + ) + + # 公式识别 + images_formula_list = self.model.mfr_model.batch_predict( + images_mfd_res, + images, + batch_size=self.batch_ratio * MFR_BASE_BATCH_SIZE, + ) + mfr_count = 0 + for image_index in range(len(images)): + images_layout_res[image_index] += images_formula_list[image_index] + mfr_count += len(images_formula_list[image_index]) + + # 清理显存 + # clean_vram(self.model.device, vram_threshold=8) + + ocr_res_list_all_page = [] + table_res_list_all_page = [] + for index in range(len(images)): + _, ocr_enable, _lang = images_with_extra_info[index] + layout_res = images_layout_res[index] + np_array_img = images[index] + + ocr_res_list, table_res_list, single_page_mfdetrec_res = ( + get_res_list_from_layout_res(layout_res) + ) + + ocr_res_list_all_page.append({'ocr_res_list':ocr_res_list, + 'lang':_lang, + 'ocr_enable':ocr_enable, + 'np_array_img':np_array_img, + 'single_page_mfdetrec_res':single_page_mfdetrec_res, + 'layout_res':layout_res, + }) + + for table_res in table_res_list: + table_img, _ = crop_img(table_res, np_array_img) + table_res_list_all_page.append({'table_res':table_res, + 'lang':_lang, + 'table_img':table_img, + }) + + # 文本框检测 + + for ocr_res_list_dict in tqdm(ocr_res_list_all_page, desc="OCR-det Predict"): + # Process each area that requires OCR processing + _lang = ocr_res_list_dict['lang'] + # Get OCR results for this language's images + ocr_model = atom_model_manager.get_atom_model( + atom_model_name='ocr', + det_db_box_thresh=0.3, + lang=_lang + ) + for res in ocr_res_list_dict['ocr_res_list']: + new_image, useful_list = crop_img( + res, ocr_res_list_dict['np_array_img'], crop_paste_x=50, crop_paste_y=50 + ) + adjusted_mfdetrec_res = get_adjusted_mfdetrec_res( + ocr_res_list_dict['single_page_mfdetrec_res'], useful_list + ) + + # OCR-det + new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR) + ocr_res = ocr_model.ocr( + new_image, mfd_res=adjusted_mfdetrec_res, rec=False + )[0] + + # Integration results + if ocr_res: + ocr_result_list = get_ocr_result_list(ocr_res, useful_list, ocr_res_list_dict['ocr_enable'], new_image, _lang) + + if res["category_id"] == 3: + # ocr_result_list中所有bbox的面积之和 + ocr_res_area = sum(get_coords_and_area(ocr_res_item)[4] for ocr_res_item in ocr_result_list if 'poly' in ocr_res_item) + # 求ocr_res_area和res的面积的比值 + res_area = get_coords_and_area(res)[4] + if res_area > 0: + ratio = ocr_res_area / res_area + if ratio > 0.25: + res["category_id"] = 1 + else: + continue + + ocr_res_list_dict['layout_res'].extend(ocr_result_list) + + # 表格识别 table recognition + if self.table_enable: + for table_res_dict in tqdm(table_res_list_all_page, desc="Table Predict"): + _lang = table_res_dict['lang'] + table_model = atom_model_manager.get_atom_model( + atom_model_name='table', + device='cpu', + lang=_lang, + table_sub_model_name='slanet_plus' + ) + html_code, table_cell_bboxes, logic_points, elapse = table_model.predict(table_res_dict['table_img']) + # 判断是否返回正常 + if html_code: + expected_ending = html_code.strip().endswith( + '' + ) or html_code.strip().endswith('') + if expected_ending: + table_res_dict['table_res']['html'] = html_code + else: + logger.warning( + 'table recognition processing fails, not found expected HTML table end' + ) + else: + logger.warning( + 'table recognition processing fails, not get html return' + ) + + # Create dictionaries to store items by language + need_ocr_lists_by_lang = {} # Dict of lists for each language + img_crop_lists_by_lang = {} # Dict of lists for each language + + for layout_res in images_layout_res: + for layout_res_item in layout_res: + if layout_res_item['category_id'] in [15]: + if 'np_img' in layout_res_item and 'lang' in layout_res_item: + lang = layout_res_item['lang'] + + # Initialize lists for this language if not exist + if lang not in need_ocr_lists_by_lang: + need_ocr_lists_by_lang[lang] = [] + img_crop_lists_by_lang[lang] = [] + + # Add to the appropriate language-specific lists + need_ocr_lists_by_lang[lang].append(layout_res_item) + img_crop_lists_by_lang[lang].append(layout_res_item['np_img']) + + # Remove the fields after adding to lists + layout_res_item.pop('np_img') + layout_res_item.pop('lang') + + if len(img_crop_lists_by_lang) > 0: + + # Process OCR by language + total_processed = 0 + + # Process each language separately + for lang, img_crop_list in img_crop_lists_by_lang.items(): + if len(img_crop_list) > 0: + # Get OCR results for this language's images + + ocr_model = atom_model_manager.get_atom_model( + atom_model_name='ocr', + det_db_box_thresh=0.3, + lang=lang + ) + ocr_res_list = ocr_model.ocr(img_crop_list, det=False, tqdm_enable=True)[0] + + # Verify we have matching counts + assert len(ocr_res_list) == len( + need_ocr_lists_by_lang[lang]), f'ocr_res_list: {len(ocr_res_list)}, need_ocr_list: {len(need_ocr_lists_by_lang[lang])} for lang: {lang}' + + # Process OCR results for this language + for index, layout_res_item in enumerate(need_ocr_lists_by_lang[lang]): + ocr_text, ocr_score = ocr_res_list[index] + layout_res_item['text'] = ocr_text + layout_res_item['score'] = float(f"{ocr_score:.3f}") + + total_processed += len(img_crop_list) + + return images_layout_res diff --git a/mineru/backend/pipeline/doc_analyze_by_custom_model.py b/mineru/backend/pipeline/doc_analyze_by_custom_model.py new file mode 100644 index 00000000..6e2fb977 --- /dev/null +++ b/mineru/backend/pipeline/doc_analyze_by_custom_model.py @@ -0,0 +1,235 @@ +import os +import time +import numpy as np +import torch +from mineru.backend.pipeline.model_init import MineruPipelineModel + +os.environ['FLAGS_npu_jit_compile'] = '0' # 关闭paddle的jit编译 +os.environ['FLAGS_use_stride_kernel'] = '0' +os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 让mps可以fallback +os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1' # 禁止albumentations检查更新 + + +from loguru import logger + +from ...utils.model_utils import get_vram, clean_memory +from magic_pdf.libs.config_reader import (get_device, get_formula_config, + get_layout_config, + get_local_models_dir, + get_table_recog_config) + +class ModelSingleton: + _instance = None + _models = {} + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def get_model( + self, + lang=None, + formula_enable=None, + table_enable=None, + ): + key = (lang, formula_enable, table_enable) + if key not in self._models: + self._models[key] = custom_model_init( + lang=lang, + formula_enable=formula_enable, + table_enable=table_enable, + ) + return self._models[key] + + +def custom_model_init( + lang=None, + formula_enable=None, + table_enable=None, +): + model_init_start = time.time() + # 从配置文件读取model-dir和device + local_models_dir = get_local_models_dir() + device = get_device() + + formula_config = get_formula_config() + if formula_enable is not None: + formula_config['enable'] = formula_enable + + table_config = get_table_recog_config() + if table_enable is not None: + table_config['enable'] = table_enable + + model_input = { + 'models_dir': local_models_dir, + 'device': device, + 'table_config': table_config, + 'formula_config': formula_config, + 'lang': lang, + } + + custom_model = MineruPipelineModel(**model_input) + + model_init_cost = time.time() - model_init_start + logger.info(f'model init cost: {model_init_cost}') + + return custom_model + +def doc_analyze( + dataset: Dataset, + ocr: bool = False, + start_page_id=0, + end_page_id=None, + lang=None, + formula_enable=None, + table_enable=None, +): + end_page_id = ( + end_page_id + if end_page_id is not None and end_page_id >= 0 + else len(dataset) - 1 + ) + + MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100)) + images = [] + page_wh_list = [] + for index in range(len(dataset)): + if start_page_id <= index <= end_page_id: + page_data = dataset.get_page(index) + img_dict = page_data.get_image() + images.append(img_dict['img']) + page_wh_list.append((img_dict['width'], img_dict['height'])) + + images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(images))] + + if len(images) >= MIN_BATCH_INFERENCE_SIZE: + batch_size = MIN_BATCH_INFERENCE_SIZE + batch_images = [images_with_extra_info[i:i+batch_size] for i in range(0, len(images_with_extra_info), batch_size)] + else: + batch_images = [images_with_extra_info] + + results = [] + processed_images_count = 0 + for index, batch_image in enumerate(batch_images): + processed_images_count += len(batch_image) + logger.info(f'Batch {index + 1}/{len(batch_images)}: {processed_images_count} pages/{len(images_with_extra_info)} pages') + result = may_batch_image_analyze(batch_image, formula_enable, table_enable) + results.extend(result) + + model_json = [] + for index in range(len(dataset)): + if start_page_id <= index <= end_page_id: + result = results.pop(0) + page_width, page_height = page_wh_list.pop(0) + else: + result = [] + page_height = 0 + page_width = 0 + + page_info = {'page_no': index, 'width': page_width, 'height': page_height} + page_dict = {'layout_dets': result, 'page_info': page_info} + model_json.append(page_dict) + + return model_json + +def batch_doc_analyze( + datasets: list[Dataset], + parse_method: str = 'auto', + lang=None, + formula_enable=None, + table_enable=None, +): + MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100)) + batch_size = MIN_BATCH_INFERENCE_SIZE + page_wh_list = [] + + images_with_extra_info = [] + for dataset in datasets: + + ocr = False + if parse_method == 'auto': + if dataset.classify() == 'txt': + ocr = False + elif dataset.classify() == 'ocr': + ocr = True + elif parse_method == 'ocr': + ocr = True + elif parse_method == 'txt': + ocr = False + + _lang = dataset._lang + + for index in range(len(dataset)): + page_data = dataset.get_page(index) + img_dict = page_data.get_image() + page_wh_list.append((img_dict['width'], img_dict['height'])) + images_with_extra_info.append((img_dict['img'], ocr, _lang)) + + batch_images = [images_with_extra_info[i:i+batch_size] for i in range(0, len(images_with_extra_info), batch_size)] + results = [] + processed_images_count = 0 + for index, batch_image in enumerate(batch_images): + processed_images_count += len(batch_image) + logger.info(f'Batch {index + 1}/{len(batch_images)}: {processed_images_count} pages/{len(images_with_extra_info)} pages') + result = may_batch_image_analyze(batch_image, formula_enable, table_enable) + results.extend(result) + + infer_results = [] + for index in range(len(datasets)): + dataset = datasets[index] + model_json = [] + for i in range(len(dataset)): + result = results.pop(0) + page_width, page_height = page_wh_list.pop(0) + page_info = {'page_no': i, 'width': page_width, 'height': page_height} + page_dict = {'layout_dets': result, 'page_info': page_info} + model_json.append(page_dict) + infer_results.append(model_json) + return infer_results + + +def may_batch_image_analyze( + images_with_extra_info: list[(np.ndarray, bool, str)], + formula_enable=None, + table_enable=None): + # os.environ['CUDA_VISIBLE_DEVICES'] = str(idx) + + from .batch_analyze import BatchAnalyze + + model_manager = ModelSingleton() + + batch_ratio = 1 + device = get_device() + + if str(device).startswith('npu'): + import torch_npu + if torch_npu.npu.is_available(): + torch.npu.set_compile_mode(jit_compile=False) + + if str(device).startswith('npu') or str(device).startswith('cuda'): + vram = get_vram(device) + if vram is not None: + gpu_memory = int(os.getenv('VIRTUAL_VRAM_SIZE', round(vram))) + if gpu_memory >= 16: + batch_ratio = 16 + elif gpu_memory >= 12: + batch_ratio = 8 + elif gpu_memory >= 8: + batch_ratio = 4 + elif gpu_memory >= 6: + batch_ratio = 2 + else: + batch_ratio = 1 + logger.info(f'gpu_memory: {gpu_memory} GB, batch_ratio: {batch_ratio}') + else: + # Default batch_ratio when VRAM can't be determined + batch_ratio = 1 + logger.info(f'Could not determine GPU memory, using default batch_ratio: {batch_ratio}') + + batch_model = BatchAnalyze(model_manager, batch_ratio, formula_enable, table_enable) + results = batch_model(images_with_extra_info) + + clean_memory(get_device()) + + return results \ No newline at end of file diff --git a/mineru/backend/pipeline/magic_model.py b/mineru/backend/pipeline/magic_model.py new file mode 100644 index 00000000..b5922d35 --- /dev/null +++ b/mineru/backend/pipeline/magic_model.py @@ -0,0 +1,771 @@ +import enum + +from magic_pdf.config.model_block_type import ModelBlockTypeEnum +from magic_pdf.config.ocr_content_type import CategoryId, ContentType +from magic_pdf.data.dataset import Dataset +from magic_pdf.libs.boxbase import (_is_in, bbox_distance, bbox_relative_pos, + calculate_iou) +from magic_pdf.libs.coordinate_transform import get_scale_ratio +from magic_pdf.pre_proc.remove_bbox_overlap import _remove_overlap_between_bbox + +CAPATION_OVERLAP_AREA_RATIO = 0.6 +MERGE_BOX_OVERLAP_AREA_RATIO = 1.1 + + +class PosRelationEnum(enum.Enum): + LEFT = 'left' + RIGHT = 'right' + UP = 'up' + BOTTOM = 'bottom' + ALL = 'all' + + +class MagicModel: + """每个函数没有得到元素的时候返回空list.""" + + def __fix_axis(self): + for model_page_info in self.__model_list: + need_remove_list = [] + page_no = model_page_info['page_info']['page_no'] + horizontal_scale_ratio, vertical_scale_ratio = get_scale_ratio( + model_page_info, self.__docs.get_page(page_no) + ) + layout_dets = model_page_info['layout_dets'] + for layout_det in layout_dets: + + if layout_det.get('bbox') is not None: + # 兼容直接输出bbox的模型数据,如paddle + x0, y0, x1, y1 = layout_det['bbox'] + else: + # 兼容直接输出poly的模型数据,如xxx + x0, y0, _, _, x1, y1, _, _ = layout_det['poly'] + + bbox = [ + int(x0 / horizontal_scale_ratio), + int(y0 / vertical_scale_ratio), + int(x1 / horizontal_scale_ratio), + int(y1 / vertical_scale_ratio), + ] + layout_det['bbox'] = bbox + # 删除高度或者宽度小于等于0的spans + if bbox[2] - bbox[0] <= 0 or bbox[3] - bbox[1] <= 0: + need_remove_list.append(layout_det) + for need_remove in need_remove_list: + layout_dets.remove(need_remove) + + def __fix_by_remove_low_confidence(self): + for model_page_info in self.__model_list: + need_remove_list = [] + layout_dets = model_page_info['layout_dets'] + for layout_det in layout_dets: + if layout_det['score'] <= 0.05: + need_remove_list.append(layout_det) + else: + continue + for need_remove in need_remove_list: + layout_dets.remove(need_remove) + + def __fix_by_remove_high_iou_and_low_confidence(self): + for model_page_info in self.__model_list: + need_remove_list = [] + layout_dets = model_page_info['layout_dets'] + for layout_det1 in layout_dets: + for layout_det2 in layout_dets: + if layout_det1 == layout_det2: + continue + if layout_det1['category_id'] in [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + ] and layout_det2['category_id'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: + if ( + calculate_iou(layout_det1['bbox'], layout_det2['bbox']) + > 0.9 + ): + if layout_det1['score'] < layout_det2['score']: + layout_det_need_remove = layout_det1 + else: + layout_det_need_remove = layout_det2 + + if layout_det_need_remove not in need_remove_list: + need_remove_list.append(layout_det_need_remove) + else: + continue + else: + continue + for need_remove in need_remove_list: + layout_dets.remove(need_remove) + + def __init__(self, model_list: list, docs: Dataset): + self.__model_list = model_list + self.__docs = docs + """为所有模型数据添加bbox信息(缩放,poly->bbox)""" + self.__fix_axis() + """删除置信度特别低的模型数据(<0.05),提高质量""" + self.__fix_by_remove_low_confidence() + """删除高iou(>0.9)数据中置信度较低的那个""" + self.__fix_by_remove_high_iou_and_low_confidence() + self.__fix_footnote() + + def _bbox_distance(self, bbox1, bbox2): + left, right, bottom, top = bbox_relative_pos(bbox1, bbox2) + flags = [left, right, bottom, top] + count = sum([1 if v else 0 for v in flags]) + if count > 1: + return float('inf') + if left or right: + l1 = bbox1[3] - bbox1[1] + l2 = bbox2[3] - bbox2[1] + else: + l1 = bbox1[2] - bbox1[0] + l2 = bbox2[2] - bbox2[0] + + if l2 > l1 and (l2 - l1) / l1 > 0.3: + return float('inf') + + return bbox_distance(bbox1, bbox2) + + def __fix_footnote(self): + # 3: figure, 5: table, 7: footnote + for model_page_info in self.__model_list: + footnotes = [] + figures = [] + tables = [] + + for obj in model_page_info['layout_dets']: + if obj['category_id'] == 7: + footnotes.append(obj) + elif obj['category_id'] == 3: + figures.append(obj) + elif obj['category_id'] == 5: + tables.append(obj) + if len(footnotes) * len(figures) == 0: + continue + dis_figure_footnote = {} + dis_table_footnote = {} + + for i in range(len(footnotes)): + for j in range(len(figures)): + pos_flag_count = sum( + list( + map( + lambda x: 1 if x else 0, + bbox_relative_pos( + footnotes[i]['bbox'], figures[j]['bbox'] + ), + ) + ) + ) + if pos_flag_count > 1: + continue + dis_figure_footnote[i] = min( + self._bbox_distance(figures[j]['bbox'], footnotes[i]['bbox']), + dis_figure_footnote.get(i, float('inf')), + ) + for i in range(len(footnotes)): + for j in range(len(tables)): + pos_flag_count = sum( + list( + map( + lambda x: 1 if x else 0, + bbox_relative_pos( + footnotes[i]['bbox'], tables[j]['bbox'] + ), + ) + ) + ) + if pos_flag_count > 1: + continue + + dis_table_footnote[i] = min( + self._bbox_distance(tables[j]['bbox'], footnotes[i]['bbox']), + dis_table_footnote.get(i, float('inf')), + ) + for i in range(len(footnotes)): + if i not in dis_figure_footnote: + continue + if dis_table_footnote.get(i, float('inf')) > dis_figure_footnote[i]: + footnotes[i]['category_id'] = CategoryId.ImageFootnote + + def __reduct_overlap(self, bboxes): + N = len(bboxes) + keep = [True] * N + for i in range(N): + for j in range(N): + if i == j: + continue + if _is_in(bboxes[i]['bbox'], bboxes[j]['bbox']): + keep[i] = False + return [bboxes[i] for i in range(N) if keep[i]] + + def __tie_up_category_by_distance_v2( + self, + page_no: int, + subject_category_id: int, + object_category_id: int, + priority_pos: PosRelationEnum, + ): + """_summary_ + + Args: + page_no (int): _description_ + subject_category_id (int): _description_ + object_category_id (int): _description_ + priority_pos (PosRelationEnum): _description_ + + Returns: + _type_: _description_ + """ + AXIS_MULPLICITY = 0.5 + subjects = self.__reduct_overlap( + list( + map( + lambda x: {'bbox': x['bbox'], 'score': x['score']}, + filter( + lambda x: x['category_id'] == subject_category_id, + self.__model_list[page_no]['layout_dets'], + ), + ) + ) + ) + + objects = self.__reduct_overlap( + list( + map( + lambda x: {'bbox': x['bbox'], 'score': x['score']}, + filter( + lambda x: x['category_id'] == object_category_id, + self.__model_list[page_no]['layout_dets'], + ), + ) + ) + ) + M = len(objects) + + subjects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2) + objects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2) + + sub_obj_map_h = {i: [] for i in range(len(subjects))} + + dis_by_directions = { + 'top': [[-1, float('inf')]] * M, + 'bottom': [[-1, float('inf')]] * M, + 'left': [[-1, float('inf')]] * M, + 'right': [[-1, float('inf')]] * M, + } + + for i, obj in enumerate(objects): + l_x_axis, l_y_axis = ( + obj['bbox'][2] - obj['bbox'][0], + obj['bbox'][3] - obj['bbox'][1], + ) + axis_unit = min(l_x_axis, l_y_axis) + for j, sub in enumerate(subjects): + + bbox1, bbox2, _ = _remove_overlap_between_bbox( + objects[i]['bbox'], subjects[j]['bbox'] + ) + left, right, bottom, top = bbox_relative_pos(bbox1, bbox2) + flags = [left, right, bottom, top] + if sum([1 if v else 0 for v in flags]) > 1: + continue + + if left: + if dis_by_directions['left'][i][1] > bbox_distance( + obj['bbox'], sub['bbox'] + ): + dis_by_directions['left'][i] = [ + j, + bbox_distance(obj['bbox'], sub['bbox']), + ] + if right: + if dis_by_directions['right'][i][1] > bbox_distance( + obj['bbox'], sub['bbox'] + ): + dis_by_directions['right'][i] = [ + j, + bbox_distance(obj['bbox'], sub['bbox']), + ] + if bottom: + if dis_by_directions['bottom'][i][1] > bbox_distance( + obj['bbox'], sub['bbox'] + ): + dis_by_directions['bottom'][i] = [ + j, + bbox_distance(obj['bbox'], sub['bbox']), + ] + if top: + if dis_by_directions['top'][i][1] > bbox_distance( + obj['bbox'], sub['bbox'] + ): + dis_by_directions['top'][i] = [ + j, + bbox_distance(obj['bbox'], sub['bbox']), + ] + + if ( + dis_by_directions['top'][i][1] != float('inf') + and dis_by_directions['bottom'][i][1] != float('inf') + and priority_pos in (PosRelationEnum.BOTTOM, PosRelationEnum.UP) + ): + RATIO = 3 + if ( + abs( + dis_by_directions['top'][i][1] + - dis_by_directions['bottom'][i][1] + ) + < RATIO * axis_unit + ): + + if priority_pos == PosRelationEnum.BOTTOM: + sub_obj_map_h[dis_by_directions['bottom'][i][0]].append(i) + else: + sub_obj_map_h[dis_by_directions['top'][i][0]].append(i) + continue + + if dis_by_directions['left'][i][1] != float('inf') or dis_by_directions[ + 'right' + ][i][1] != float('inf'): + if dis_by_directions['left'][i][1] != float( + 'inf' + ) and dis_by_directions['right'][i][1] != float('inf'): + if AXIS_MULPLICITY * axis_unit >= abs( + dis_by_directions['left'][i][1] + - dis_by_directions['right'][i][1] + ): + left_sub_bbox = subjects[dis_by_directions['left'][i][0]][ + 'bbox' + ] + right_sub_bbox = subjects[dis_by_directions['right'][i][0]][ + 'bbox' + ] + + left_sub_bbox_y_axis = left_sub_bbox[3] - left_sub_bbox[1] + right_sub_bbox_y_axis = right_sub_bbox[3] - right_sub_bbox[1] + + if ( + abs(left_sub_bbox_y_axis - l_y_axis) + + dis_by_directions['left'][i][0] + > abs(right_sub_bbox_y_axis - l_y_axis) + + dis_by_directions['right'][i][0] + ): + left_or_right = dis_by_directions['right'][i] + else: + left_or_right = dis_by_directions['left'][i] + else: + left_or_right = dis_by_directions['left'][i] + if left_or_right[1] > dis_by_directions['right'][i][1]: + left_or_right = dis_by_directions['right'][i] + else: + left_or_right = dis_by_directions['left'][i] + if left_or_right[1] == float('inf'): + left_or_right = dis_by_directions['right'][i] + else: + left_or_right = [-1, float('inf')] + + if dis_by_directions['top'][i][1] != float('inf') or dis_by_directions[ + 'bottom' + ][i][1] != float('inf'): + if dis_by_directions['top'][i][1] != float('inf') and dis_by_directions[ + 'bottom' + ][i][1] != float('inf'): + if AXIS_MULPLICITY * axis_unit >= abs( + dis_by_directions['top'][i][1] + - dis_by_directions['bottom'][i][1] + ): + top_bottom = subjects[dis_by_directions['bottom'][i][0]]['bbox'] + bottom_top = subjects[dis_by_directions['top'][i][0]]['bbox'] + + top_bottom_x_axis = top_bottom[2] - top_bottom[0] + bottom_top_x_axis = bottom_top[2] - bottom_top[0] + if ( + abs(top_bottom_x_axis - l_x_axis) + + dis_by_directions['bottom'][i][1] + > abs(bottom_top_x_axis - l_x_axis) + + dis_by_directions['top'][i][1] + ): + top_or_bottom = dis_by_directions['top'][i] + else: + top_or_bottom = dis_by_directions['bottom'][i] + else: + top_or_bottom = dis_by_directions['top'][i] + if top_or_bottom[1] > dis_by_directions['bottom'][i][1]: + top_or_bottom = dis_by_directions['bottom'][i] + else: + top_or_bottom = dis_by_directions['top'][i] + if top_or_bottom[1] == float('inf'): + top_or_bottom = dis_by_directions['bottom'][i] + else: + top_or_bottom = [-1, float('inf')] + + if left_or_right[1] != float('inf') or top_or_bottom[1] != float('inf'): + if left_or_right[1] != float('inf') and top_or_bottom[1] != float( + 'inf' + ): + if AXIS_MULPLICITY * axis_unit >= abs( + left_or_right[1] - top_or_bottom[1] + ): + y_axis_bbox = subjects[left_or_right[0]]['bbox'] + x_axis_bbox = subjects[top_or_bottom[0]]['bbox'] + + if ( + abs((x_axis_bbox[2] - x_axis_bbox[0]) - l_x_axis) / l_x_axis + > abs((y_axis_bbox[3] - y_axis_bbox[1]) - l_y_axis) + / l_y_axis + ): + sub_obj_map_h[left_or_right[0]].append(i) + else: + sub_obj_map_h[top_or_bottom[0]].append(i) + else: + if left_or_right[1] > top_or_bottom[1]: + sub_obj_map_h[top_or_bottom[0]].append(i) + else: + sub_obj_map_h[left_or_right[0]].append(i) + else: + if left_or_right[1] != float('inf'): + sub_obj_map_h[left_or_right[0]].append(i) + else: + sub_obj_map_h[top_or_bottom[0]].append(i) + ret = [] + for i in sub_obj_map_h.keys(): + ret.append( + { + 'sub_bbox': { + 'bbox': subjects[i]['bbox'], + 'score': subjects[i]['score'], + }, + 'obj_bboxes': [ + {'score': objects[j]['score'], 'bbox': objects[j]['bbox']} + for j in sub_obj_map_h[i] + ], + 'sub_idx': i, + } + ) + return ret + + + def __tie_up_category_by_distance_v3( + self, + page_no: int, + subject_category_id: int, + object_category_id: int, + priority_pos: PosRelationEnum, + ): + subjects = self.__reduct_overlap( + list( + map( + lambda x: {'bbox': x['bbox'], 'score': x['score']}, + filter( + lambda x: x['category_id'] == subject_category_id, + self.__model_list[page_no]['layout_dets'], + ), + ) + ) + ) + objects = self.__reduct_overlap( + list( + map( + lambda x: {'bbox': x['bbox'], 'score': x['score']}, + filter( + lambda x: x['category_id'] == object_category_id, + self.__model_list[page_no]['layout_dets'], + ), + ) + ) + ) + + ret = [] + N, M = len(subjects), len(objects) + subjects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2) + objects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2) + + OBJ_IDX_OFFSET = 10000 + SUB_BIT_KIND, OBJ_BIT_KIND = 0, 1 + + all_boxes_with_idx = [(i, SUB_BIT_KIND, sub['bbox'][0], sub['bbox'][1]) for i, sub in enumerate(subjects)] + [(i + OBJ_IDX_OFFSET , OBJ_BIT_KIND, obj['bbox'][0], obj['bbox'][1]) for i, obj in enumerate(objects)] + seen_idx = set() + seen_sub_idx = set() + + while N > len(seen_sub_idx): + candidates = [] + for idx, kind, x0, y0 in all_boxes_with_idx: + if idx in seen_idx: + continue + candidates.append((idx, kind, x0, y0)) + + if len(candidates) == 0: + break + left_x = min([v[2] for v in candidates]) + top_y = min([v[3] for v in candidates]) + + candidates.sort(key=lambda x: (x[2]-left_x) ** 2 + (x[3] - top_y) ** 2) + + + fst_idx, fst_kind, left_x, top_y = candidates[0] + candidates.sort(key=lambda x: (x[2] - left_x) ** 2 + (x[3] - top_y)**2) + nxt = None + + for i in range(1, len(candidates)): + if candidates[i][1] ^ fst_kind == 1: + nxt = candidates[i] + break + if nxt is None: + break + + if fst_kind == SUB_BIT_KIND: + sub_idx, obj_idx = fst_idx, nxt[0] - OBJ_IDX_OFFSET + + else: + sub_idx, obj_idx = nxt[0], fst_idx - OBJ_IDX_OFFSET + + pair_dis = bbox_distance(subjects[sub_idx]['bbox'], objects[obj_idx]['bbox']) + nearest_dis = float('inf') + for i in range(N): + if i in seen_idx or i == sub_idx:continue + nearest_dis = min(nearest_dis, bbox_distance(subjects[i]['bbox'], objects[obj_idx]['bbox'])) + + if pair_dis >= 3*nearest_dis: + seen_idx.add(sub_idx) + continue + + seen_idx.add(sub_idx) + seen_idx.add(obj_idx + OBJ_IDX_OFFSET) + seen_sub_idx.add(sub_idx) + + ret.append( + { + 'sub_bbox': { + 'bbox': subjects[sub_idx]['bbox'], + 'score': subjects[sub_idx]['score'], + }, + 'obj_bboxes': [ + {'score': objects[obj_idx]['score'], 'bbox': objects[obj_idx]['bbox']} + ], + 'sub_idx': sub_idx, + } + ) + + for i in range(len(objects)): + j = i + OBJ_IDX_OFFSET + if j in seen_idx: + continue + seen_idx.add(j) + nearest_dis, nearest_sub_idx = float('inf'), -1 + for k in range(len(subjects)): + dis = bbox_distance(objects[i]['bbox'], subjects[k]['bbox']) + if dis < nearest_dis: + nearest_dis = dis + nearest_sub_idx = k + + for k in range(len(subjects)): + if k != nearest_sub_idx: continue + if k in seen_sub_idx: + for kk in range(len(ret)): + if ret[kk]['sub_idx'] == k: + ret[kk]['obj_bboxes'].append({'score': objects[i]['score'], 'bbox': objects[i]['bbox']}) + break + else: + ret.append( + { + 'sub_bbox': { + 'bbox': subjects[k]['bbox'], + 'score': subjects[k]['score'], + }, + 'obj_bboxes': [ + {'score': objects[i]['score'], 'bbox': objects[i]['bbox']} + ], + 'sub_idx': k, + } + ) + seen_sub_idx.add(k) + seen_idx.add(k) + + + for i in range(len(subjects)): + if i in seen_sub_idx: + continue + ret.append( + { + 'sub_bbox': { + 'bbox': subjects[i]['bbox'], + 'score': subjects[i]['score'], + }, + 'obj_bboxes': [], + 'sub_idx': i, + } + ) + + + return ret + + + def get_imgs_v2(self, page_no: int): + with_captions = self.__tie_up_category_by_distance_v3( + page_no, 3, 4, PosRelationEnum.BOTTOM + ) + with_footnotes = self.__tie_up_category_by_distance_v3( + page_no, 3, CategoryId.ImageFootnote, PosRelationEnum.ALL + ) + ret = [] + for v in with_captions: + record = { + 'image_body': v['sub_bbox'], + 'image_caption_list': v['obj_bboxes'], + } + filter_idx = v['sub_idx'] + d = next(filter(lambda x: x['sub_idx'] == filter_idx, with_footnotes)) + record['image_footnote_list'] = d['obj_bboxes'] + ret.append(record) + return ret + + def get_tables_v2(self, page_no: int) -> list: + with_captions = self.__tie_up_category_by_distance_v3( + page_no, 5, 6, PosRelationEnum.UP + ) + with_footnotes = self.__tie_up_category_by_distance_v3( + page_no, 5, 7, PosRelationEnum.ALL + ) + ret = [] + for v in with_captions: + record = { + 'table_body': v['sub_bbox'], + 'table_caption_list': v['obj_bboxes'], + } + filter_idx = v['sub_idx'] + d = next(filter(lambda x: x['sub_idx'] == filter_idx, with_footnotes)) + record['table_footnote_list'] = d['obj_bboxes'] + ret.append(record) + return ret + + def get_imgs(self, page_no: int): + return self.get_imgs_v2(page_no) + + def get_tables( + self, page_no: int + ) -> list: # 3个坐标, caption, table主体,table-note + return self.get_tables_v2(page_no) + + def get_equations(self, page_no: int) -> list: # 有坐标,也有字 + inline_equations = self.__get_blocks_by_type( + ModelBlockTypeEnum.EMBEDDING.value, page_no, ['latex'] + ) + interline_equations = self.__get_blocks_by_type( + ModelBlockTypeEnum.ISOLATED.value, page_no, ['latex'] + ) + interline_equations_blocks = self.__get_blocks_by_type( + ModelBlockTypeEnum.ISOLATE_FORMULA.value, page_no + ) + return inline_equations, interline_equations, interline_equations_blocks + + def get_discarded(self, page_no: int) -> list: # 自研模型,只有坐标 + blocks = self.__get_blocks_by_type(ModelBlockTypeEnum.ABANDON.value, page_no) + return blocks + + def get_text_blocks(self, page_no: int) -> list: # 自研模型搞的,只有坐标,没有字 + blocks = self.__get_blocks_by_type(ModelBlockTypeEnum.PLAIN_TEXT.value, page_no) + return blocks + + def get_title_blocks(self, page_no: int) -> list: # 自研模型,只有坐标,没字 + blocks = self.__get_blocks_by_type(ModelBlockTypeEnum.TITLE.value, page_no) + return blocks + + def get_ocr_text(self, page_no: int) -> list: # paddle 搞的,有字也有坐标 + text_spans = [] + model_page_info = self.__model_list[page_no] + layout_dets = model_page_info['layout_dets'] + for layout_det in layout_dets: + if layout_det['category_id'] == '15': + span = { + 'bbox': layout_det['bbox'], + 'content': layout_det['text'], + } + text_spans.append(span) + return text_spans + + def get_all_spans(self, page_no: int) -> list: + + def remove_duplicate_spans(spans): + new_spans = [] + for span in spans: + if not any(span == existing_span for existing_span in new_spans): + new_spans.append(span) + return new_spans + + all_spans = [] + model_page_info = self.__model_list[page_no] + layout_dets = model_page_info['layout_dets'] + allow_category_id_list = [3, 5, 13, 14, 15] + """当成span拼接的""" + # 3: 'image', # 图片 + # 5: 'table', # 表格 + # 13: 'inline_equation', # 行内公式 + # 14: 'interline_equation', # 行间公式 + # 15: 'text', # ocr识别文本 + for layout_det in layout_dets: + category_id = layout_det['category_id'] + if category_id in allow_category_id_list: + span = {'bbox': layout_det['bbox'], 'score': layout_det['score']} + if category_id == 3: + span['type'] = ContentType.Image + elif category_id == 5: + # 获取table模型结果 + latex = layout_det.get('latex', None) + html = layout_det.get('html', None) + if latex: + span['latex'] = latex + elif html: + span['html'] = html + span['type'] = ContentType.Table + elif category_id == 13: + span['content'] = layout_det['latex'] + span['type'] = ContentType.InlineEquation + elif category_id == 14: + span['content'] = layout_det['latex'] + span['type'] = ContentType.InterlineEquation + elif category_id == 15: + span['content'] = layout_det['text'] + span['type'] = ContentType.Text + all_spans.append(span) + return remove_duplicate_spans(all_spans) + + def get_page_size(self, page_no: int): # 获取页面宽高 + # 获取当前页的page对象 + page = self.__docs.get_page(page_no).get_page_info() + # 获取当前页的宽高 + page_w = page.w + page_h = page.h + return page_w, page_h + + def __get_blocks_by_type( + self, type: int, page_no: int, extra_col: list[str] = [] + ) -> list: + blocks = [] + for page_dict in self.__model_list: + layout_dets = page_dict.get('layout_dets', []) + page_info = page_dict.get('page_info', {}) + page_number = page_info.get('page_no', -1) + if page_no != page_number: + continue + for item in layout_dets: + category_id = item.get('category_id', -1) + bbox = item.get('bbox', None) + + if category_id == type: + block = { + 'bbox': bbox, + 'score': item.get('score'), + } + for col in extra_col: + block[col] = item.get(col, None) + blocks.append(block) + return blocks + + def get_model_list(self, page_no): + return self.__model_list[page_no] diff --git a/mineru/backend/pipeline/model_init.py b/mineru/backend/pipeline/model_init.py new file mode 100644 index 00000000..a7481021 --- /dev/null +++ b/mineru/backend/pipeline/model_init.py @@ -0,0 +1,190 @@ +import os + +import torch +from loguru import logger + +from .model_list import AtomicModel +from ...model.layout.doclayout_yolo import DocLayoutYOLOModel +from ...model.mfd.yolo_v8 import YOLOv8MFDModel +from ...model.mfr.unimernet.Unimernet import UnimernetModel +from ...model.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR +from ...model.table.rapid_table import RapidTableModel + +doclayout_yolo = "Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt" +yolo_v8_mfd = "MFD/YOLO/yolo_v8_ft.pt" +unimernet_small = "MFR/unimernet_hf_small_2503" + + +def table_model_init(lang=None): + atom_model_manager = AtomModelSingleton() + ocr_engine = atom_model_manager.get_atom_model( + atom_model_name='ocr', + det_db_box_thresh=0.5, + det_db_unclip_ratio=1.6, + lang=lang + ) + table_model = RapidTableModel(ocr_engine) + return table_model + + +def mfd_model_init(weight, device='cpu'): + if str(device).startswith('npu'): + device = torch.device(device) + mfd_model = YOLOv8MFDModel(weight, device) + return mfd_model + + +def mfr_model_init(weight_dir, device='cpu'): + mfr_model = UnimernetModel(weight_dir, device) + return mfr_model + + +def doclayout_yolo_model_init(weight, device='cpu'): + if str(device).startswith('npu'): + device = torch.device(device) + model = DocLayoutYOLOModel(weight, device) + return model + +def ocr_model_init(det_db_box_thresh=0.3, + lang=None, + use_dilation=True, + det_db_unclip_ratio=1.8, + ): + if lang is not None and lang != '': + model = PytorchPaddleOCR( + det_db_box_thresh=det_db_box_thresh, + lang=lang, + use_dilation=use_dilation, + det_db_unclip_ratio=det_db_unclip_ratio, + ) + else: + model = PytorchPaddleOCR( + det_db_box_thresh=det_db_box_thresh, + use_dilation=use_dilation, + det_db_unclip_ratio=det_db_unclip_ratio, + ) + return model + + +class AtomModelSingleton: + _instance = None + _models = {} + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def get_atom_model(self, atom_model_name: str, **kwargs): + + lang = kwargs.get('lang', None) + table_model_name = kwargs.get('table_model_name', None) + + if atom_model_name in [AtomicModel.OCR]: + key = (atom_model_name, lang) + elif atom_model_name in [AtomicModel.Table]: + key = (atom_model_name, table_model_name, lang) + else: + key = atom_model_name + + if key not in self._models: + self._models[key] = atom_model_init(model_name=atom_model_name, **kwargs) + return self._models[key] + +def atom_model_init(model_name: str, **kwargs): + atom_model = None + if model_name == AtomicModel.Layout: + atom_model = doclayout_yolo_model_init( + kwargs.get('doclayout_yolo_weights'), + kwargs.get('device') + ) + elif model_name == AtomicModel.MFD: + atom_model = mfd_model_init( + kwargs.get('mfd_weights'), + kwargs.get('device') + ) + elif model_name == AtomicModel.MFR: + atom_model = mfr_model_init( + kwargs.get('mfr_weight_dir'), + kwargs.get('device') + ) + elif model_name == AtomicModel.OCR: + atom_model = ocr_model_init( + kwargs.get('det_db_box_thresh'), + kwargs.get('lang'), + ) + elif model_name == AtomicModel.Table: + atom_model = table_model_init( + kwargs.get('lang'), + ) + else: + logger.error('model name not allow') + exit(1) + + if atom_model is None: + logger.error('model init failed') + exit(1) + else: + return atom_model + + +class MineruPipelineModel: + def __init__(self, **kwargs): + self.formula_config = kwargs.get('formula_config') + self.apply_formula = self.formula_config.get('enable', True) + self.table_config = kwargs.get('table_config') + self.apply_table = self.table_config.get('enable', True) + self.lang = kwargs.get('lang', None) + self.device = kwargs.get('device', 'cpu') + logger.info( + 'DocAnalysis init, this may take some times......' + ) + atom_model_manager = AtomModelSingleton() + models_dir = kwargs.get('models_dir', "") + if not models_dir: + logger.error("can't found models_dir, please set models_dir") + exit(1) + + if self.apply_formula: + # 初始化公式检测模型 + self.mfd_model = atom_model_manager.get_atom_model( + atom_model_name=AtomicModel.MFD, + mfd_weights=str( + os.path.join(models_dir, yolo_v8_mfd) + ), + device=self.device, + ) + + # 初始化公式解析模型 + mfr_weight_dir = str( + os.path.join(models_dir, unimernet_small) + ) + + self.mfr_model = atom_model_manager.get_atom_model( + atom_model_name=AtomicModel.MFR, + mfr_weight_dir=mfr_weight_dir, + device=self.device, + ) + + # 初始化layout模型 + self.layout_model = atom_model_manager.get_atom_model( + atom_model_name=AtomicModel.Layout, + doclayout_yolo_weights=str( + os.path.join(models_dir, doclayout_yolo) + ), + device=self.device, + ) + # 初始化ocr + self.ocr_model = atom_model_manager.get_atom_model( + atom_model_name=AtomicModel.OCR, + det_db_box_thresh=0.3, + lang=self.lang + ) + # init table model + if self.apply_table: + self.table_model = atom_model_manager.get_atom_model( + atom_model_name=AtomicModel.Table, + lang=self.lang, + ) + + logger.info('DocAnalysis init done!') \ No newline at end of file diff --git a/mineru/backend/pipeline/model_list.py b/mineru/backend/pipeline/model_list.py new file mode 100644 index 00000000..3676ae67 --- /dev/null +++ b/mineru/backend/pipeline/model_list.py @@ -0,0 +1,6 @@ +class AtomicModel: + Layout = "layout" + MFD = "mfd" + MFR = "mfr" + OCR = "ocr" + Table = "table" diff --git a/mineru/backend/vlm/token_to_middle_json.py b/mineru/backend/vlm/token_to_middle_json.py index bcad39f8..05f83f17 100644 --- a/mineru/backend/vlm/token_to_middle_json.py +++ b/mineru/backend/vlm/token_to_middle_json.py @@ -1,10 +1,10 @@ import re -from ...libs.cut_image import cut_image_and_table -from ...libs.enum_class import BlockType, ContentType -from ...libs.hash_utils import str_md5 -from ...libs.magic_model import fix_two_layer_blocks -from ...libs.version import __version__ +from mineru.utils.cut_image import cut_image_and_table +from mineru.utils.enum_class import BlockType, ContentType +from mineru.utils.hash_utils import str_md5 +from mineru.utils.magic_model import fix_two_layer_blocks +from mineru.version import __version__ def token_to_page_info(token, image_dict, page, image_writer, page_index) -> dict: diff --git a/mineru/backend/vlm/vlm_analyze.py b/mineru/backend/vlm/vlm_analyze.py index 7496139b..bd15c506 100644 --- a/mineru/backend/vlm/vlm_analyze.py +++ b/mineru/backend/vlm/vlm_analyze.py @@ -4,7 +4,7 @@ import time from loguru import logger from ...data.data_reader_writer import DataWriter -from ...libs.pdf_image_tools import load_images_from_pdf +from mineru.utils.pdf_image_tools import load_images_from_pdf from .base_predictor import BasePredictor from .predictor import get_predictor from .token_to_middle_json import result_to_middle_json diff --git a/mineru/model/__init__.py b/mineru/model/__init__.py index e69de29b..1e17167c 100644 --- a/mineru/model/__init__.py +++ b/mineru/model/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/layout/__init__.py b/mineru/model/layout/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/layout/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/layout/doclayout_yolo.py b/mineru/model/layout/doclayout_yolo.py new file mode 100644 index 00000000..2c7a23a3 --- /dev/null +++ b/mineru/model/layout/doclayout_yolo.py @@ -0,0 +1,64 @@ +from doclayout_yolo import YOLOv10 +from tqdm import tqdm + + +class DocLayoutYOLOModel(object): + def __init__(self, weight, device): + self.model = YOLOv10(weight) + self.device = device + + def predict(self, image): + layout_res = [] + doclayout_yolo_res = self.model.predict( + image, + imgsz=1280, + conf=0.10, + iou=0.45, + verbose=False, device=self.device + )[0] + for xyxy, conf, cla in zip( + doclayout_yolo_res.boxes.xyxy.cpu(), + doclayout_yolo_res.boxes.conf.cpu(), + doclayout_yolo_res.boxes.cls.cpu(), + ): + xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy] + new_item = { + "category_id": int(cla.item()), + "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax], + "score": round(float(conf.item()), 3), + } + layout_res.append(new_item) + return layout_res + + def batch_predict(self, images: list, batch_size: int) -> list: + images_layout_res = [] + # for index in range(0, len(images), batch_size): + for index in tqdm(range(0, len(images), batch_size), desc="Layout Predict"): + doclayout_yolo_res = [ + image_res.cpu() + for image_res in self.model.predict( + images[index : index + batch_size], + imgsz=1280, + conf=0.10, + iou=0.45, + verbose=False, + device=self.device, + ) + ] + for image_res in doclayout_yolo_res: + layout_res = [] + for xyxy, conf, cla in zip( + image_res.boxes.xyxy, + image_res.boxes.conf, + image_res.boxes.cls, + ): + xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy] + new_item = { + "category_id": int(cla.item()), + "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax], + "score": round(float(conf.item()), 3), + } + layout_res.append(new_item) + images_layout_res.append(layout_res) + + return images_layout_res diff --git a/mineru/model/mfd/__init__.py b/mineru/model/mfd/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/mfd/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/mfd/yolo_v8.py b/mineru/model/mfd/yolo_v8.py new file mode 100644 index 00000000..23d230d0 --- /dev/null +++ b/mineru/model/mfd/yolo_v8.py @@ -0,0 +1,33 @@ +from tqdm import tqdm +from ultralytics import YOLO + + +class YOLOv8MFDModel(object): + def __init__(self, weight, device="cpu"): + self.mfd_model = YOLO(weight) + self.device = device + + def predict(self, image): + mfd_res = self.mfd_model.predict( + image, imgsz=1888, conf=0.25, iou=0.45, verbose=False, device=self.device + )[0] + return mfd_res + + def batch_predict(self, images: list, batch_size: int) -> list: + images_mfd_res = [] + # for index in range(0, len(images), batch_size): + for index in tqdm(range(0, len(images), batch_size), desc="MFD Predict"): + mfd_res = [ + image_res.cpu() + for image_res in self.mfd_model.predict( + images[index : index + batch_size], + imgsz=1888, + conf=0.25, + iou=0.45, + verbose=False, + device=self.device, + ) + ] + for image_res in mfd_res: + images_mfd_res.append(image_res) + return images_mfd_res diff --git a/mineru/model/mfr/__init__.py b/mineru/model/mfr/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/mfr/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/mfr/unimernet/Unimernet.py b/mineru/model/mfr/unimernet/Unimernet.py new file mode 100644 index 00000000..6c3a1e1d --- /dev/null +++ b/mineru/model/mfr/unimernet/Unimernet.py @@ -0,0 +1,135 @@ +import torch +from torch.utils.data import DataLoader, Dataset +from tqdm import tqdm + + +class MathDataset(Dataset): + def __init__(self, image_paths, transform=None): + self.image_paths = image_paths + self.transform = transform + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + raw_image = self.image_paths[idx] + if self.transform: + image = self.transform(raw_image) + return image + + +class UnimernetModel(object): + def __init__(self, weight_dir, cfg_path, _device_="cpu"): + from .unimernet_hf import UnimernetModel + if _device_.startswith("mps"): + self.model = UnimernetModel.from_pretrained(weight_dir, attn_implementation="eager") + else: + self.model = UnimernetModel.from_pretrained(weight_dir) + self.device = _device_ + self.model.to(_device_) + if not _device_.startswith("cpu"): + self.model = self.model.to(dtype=torch.float16) + self.model.eval() + + def predict(self, mfd_res, image): + formula_list = [] + mf_image_list = [] + for xyxy, conf, cla in zip( + mfd_res.boxes.xyxy.cpu(), mfd_res.boxes.conf.cpu(), mfd_res.boxes.cls.cpu() + ): + xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy] + new_item = { + "category_id": 13 + int(cla.item()), + "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax], + "score": round(float(conf.item()), 2), + "latex": "", + } + formula_list.append(new_item) + bbox_img = image[ymin:ymax, xmin:xmax] + mf_image_list.append(bbox_img) + + dataset = MathDataset(mf_image_list, transform=self.model.transform) + dataloader = DataLoader(dataset, batch_size=32, num_workers=0) + mfr_res = [] + for mf_img in dataloader: + mf_img = mf_img.to(dtype=self.model.dtype) + mf_img = mf_img.to(self.device) + with torch.no_grad(): + output = self.model.generate({"image": mf_img}) + mfr_res.extend(output["fixed_str"]) + for res, latex in zip(formula_list, mfr_res): + res["latex"] = latex + return formula_list + + def batch_predict(self, images_mfd_res: list, images: list, batch_size: int = 64) -> list: + images_formula_list = [] + mf_image_list = [] + backfill_list = [] + image_info = [] # Store (area, original_index, image) tuples + + # Collect images with their original indices + for image_index in range(len(images_mfd_res)): + mfd_res = images_mfd_res[image_index] + np_array_image = images[image_index] + formula_list = [] + + for idx, (xyxy, conf, cla) in enumerate(zip( + mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls + )): + xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy] + new_item = { + "category_id": 13 + int(cla.item()), + "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax], + "score": round(float(conf.item()), 2), + "latex": "", + } + formula_list.append(new_item) + bbox_img = np_array_image[ymin:ymax, xmin:xmax] + area = (xmax - xmin) * (ymax - ymin) + + curr_idx = len(mf_image_list) + image_info.append((area, curr_idx, bbox_img)) + mf_image_list.append(bbox_img) + + images_formula_list.append(formula_list) + backfill_list += formula_list + + # Stable sort by area + image_info.sort(key=lambda x: x[0]) # sort by area + sorted_indices = [x[1] for x in image_info] + sorted_images = [x[2] for x in image_info] + + # Create mapping for results + index_mapping = {new_idx: old_idx for new_idx, old_idx in enumerate(sorted_indices)} + + # Create dataset with sorted images + dataset = MathDataset(sorted_images, transform=self.model.transform) + dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0) + + # Process batches and store results + mfr_res = [] + # for mf_img in dataloader: + + with tqdm(total=len(sorted_images), desc="MFR Predict") as pbar: + for index, mf_img in enumerate(dataloader): + mf_img = mf_img.to(dtype=self.model.dtype) + mf_img = mf_img.to(self.device) + with torch.no_grad(): + output = self.model.generate({"image": mf_img}) + mfr_res.extend(output["fixed_str"]) + + # 更新进度条,每次增加batch_size,但要注意最后一个batch可能不足batch_size + current_batch_size = min(batch_size, len(sorted_images) - index * batch_size) + pbar.update(current_batch_size) + + # Restore original order + unsorted_results = [""] * len(mfr_res) + for new_idx, latex in enumerate(mfr_res): + original_idx = index_mapping[new_idx] + unsorted_results[original_idx] = latex + + # Fill results back + for res, latex in zip(backfill_list, unsorted_results): + res["latex"] = latex + + return images_formula_list diff --git a/mineru/model/mfr/unimernet/__init__.py b/mineru/model/mfr/unimernet/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mineru/model/mfr/unimernet/unimernet_hf/__init__.py b/mineru/model/mfr/unimernet/unimernet_hf/__init__.py new file mode 100644 index 00000000..772dcfa3 --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/__init__.py @@ -0,0 +1,13 @@ +from .unimer_swin import UnimerSwinConfig, UnimerSwinModel, UnimerSwinImageProcessor +from .unimer_mbart import UnimerMBartConfig, UnimerMBartModel, UnimerMBartForCausalLM +from .modeling_unimernet import UnimernetModel + +__all__ = [ + "UnimerSwinConfig", + "UnimerSwinModel", + "UnimerSwinImageProcessor", + "UnimerMBartConfig", + "UnimerMBartModel", + "UnimerMBartForCausalLM", + "UnimernetModel", +] diff --git a/mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py b/mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py new file mode 100644 index 00000000..a4a9bbb9 --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py @@ -0,0 +1,490 @@ +import os +import re +import warnings +from typing import Optional + +import torch +from ftfy import fix_text +from loguru import logger + +from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig, PreTrainedModel +from transformers import VisionEncoderDecoderConfig, VisionEncoderDecoderModel +from transformers.models.vision_encoder_decoder.modeling_vision_encoder_decoder import logger as base_model_logger + +from .unimer_swin import UnimerSwinConfig, UnimerSwinModel, UnimerSwinImageProcessor +from .unimer_mbart import UnimerMBartConfig, UnimerMBartForCausalLM + +AutoConfig.register(UnimerSwinConfig.model_type, UnimerSwinConfig) +AutoConfig.register(UnimerMBartConfig.model_type, UnimerMBartConfig) +AutoModel.register(UnimerSwinConfig, UnimerSwinModel) +AutoModelForCausalLM.register(UnimerMBartConfig, UnimerMBartForCausalLM) + + +# TODO: rewrite tokenizer +class TokenizerWrapper: + def __init__(self, tokenizer): + self.tokenizer = tokenizer + self.pad_token_id = self.tokenizer.pad_token_id + self.bos_token_id = self.tokenizer.bos_token_id + self.eos_token_id = self.tokenizer.eos_token_id + + def __len__(self): + return len(self.tokenizer) + + def tokenize(self, text, **kwargs): + return self.tokenizer( + text, + return_token_type_ids=False, + return_tensors="pt", + padding="longest", + truncation=True, + **kwargs, + ) + + def token2str(self, tokens) -> list: + generated_text = self.tokenizer.batch_decode(tokens, skip_special_tokens=True) + generated_text = [fix_text(text) for text in generated_text] + return generated_text + + def detokenize(self, tokens): + toks = [self.tokenizer.convert_ids_to_tokens(tok) for tok in tokens] + for b in range(len(toks)): + for i in reversed(range(len(toks[b]))): + if toks[b][i] is None: + toks[b][i] = '' + toks[b][i] = toks[b][i].replace('Ġ', ' ').strip() + if toks[b][i] in ([self.tokenizer.bos_token, self.tokenizer.eos_token, self.tokenizer.pad_token]): + del toks[b][i] + return toks + + +LEFT_PATTERN = re.compile(r'(\\left)(\S*)') +RIGHT_PATTERN = re.compile(r'(\\right)(\S*)') +LEFT_COUNT_PATTERN = re.compile(r'\\left(?![a-zA-Z])') +RIGHT_COUNT_PATTERN = re.compile(r'\\right(?![a-zA-Z])') +LEFT_RIGHT_REMOVE_PATTERN = re.compile(r'\\left\.?|\\right\.?') + +def fix_latex_left_right(s): + """ + 修复LaTeX中的\\left和\\right命令 + 1. 确保它们后面跟有效分隔符 + 2. 平衡\\left和\\right的数量 + """ + # 白名单分隔符 + valid_delims_list = [r'(', r')', r'[', r']', r'{', r'}', r'/', r'|', + r'\{', r'\}', r'\lceil', r'\rceil', r'\lfloor', + r'\rfloor', r'\backslash', r'\uparrow', r'\downarrow', + r'\Uparrow', r'\Downarrow', r'\|', r'\.'] + + # 为\left后缺失有效分隔符的情况添加点 + def fix_delim(match, is_left=True): + cmd = match.group(1) # \left 或 \right + rest = match.group(2) if len(match.groups()) > 1 else "" + if not rest or rest not in valid_delims_list: + return cmd + "." + return match.group(0) + + # 使用更精确的模式匹配\left和\right命令 + # 确保它们是独立的命令,不是其他命令的一部分 + # 使用预编译正则和统一回调函数 + s = LEFT_PATTERN.sub(lambda m: fix_delim(m, True), s) + s = RIGHT_PATTERN.sub(lambda m: fix_delim(m, False), s) + + # 更精确地计算\left和\right的数量 + left_count = len(LEFT_COUNT_PATTERN.findall(s)) # 不匹配\lefteqn等 + right_count = len(RIGHT_COUNT_PATTERN.findall(s)) # 不匹配\rightarrow等 + + if left_count == right_count: + # 如果数量相等,检查是否在同一组 + return fix_left_right_pairs(s) + else: + # 如果数量不等,移除所有\left和\right + # logger.debug(f"latex:{s}") + # logger.warning(f"left_count: {left_count}, right_count: {right_count}") + return LEFT_RIGHT_REMOVE_PATTERN.sub('', s) + + +def fix_left_right_pairs(latex_formula): + """ + 检测并修复LaTeX公式中\\left和\\right不在同一组的情况 + + Args: + latex_formula (str): 输入的LaTeX公式 + + Returns: + str: 修复后的LaTeX公式 + """ + # 用于跟踪花括号嵌套层级 + brace_stack = [] + # 用于存储\left信息: (位置, 深度, 分隔符) + left_stack = [] + # 存储需要调整的\right信息: (开始位置, 结束位置, 目标位置) + adjustments = [] + + i = 0 + while i < len(latex_formula): + # 检查是否是转义字符 + if i > 0 and latex_formula[i - 1] == '\\': + backslash_count = 0 + j = i - 1 + while j >= 0 and latex_formula[j] == '\\': + backslash_count += 1 + j -= 1 + + if backslash_count % 2 == 1: + i += 1 + continue + + # 检测\left命令 + if i + 5 < len(latex_formula) and latex_formula[i:i + 5] == "\\left" and i + 5 < len(latex_formula): + delimiter = latex_formula[i + 5] + left_stack.append((i, len(brace_stack), delimiter)) + i += 6 # 跳过\left和分隔符 + continue + + # 检测\right命令 + elif i + 6 < len(latex_formula) and latex_formula[i:i + 6] == "\\right" and i + 6 < len(latex_formula): + delimiter = latex_formula[i + 6] + + if left_stack: + left_pos, left_depth, left_delim = left_stack.pop() + + # 如果\left和\right不在同一花括号深度 + if left_depth != len(brace_stack): + # 找到\left所在花括号组的结束位置 + target_pos = find_group_end(latex_formula, left_pos, left_depth) + if target_pos != -1: + # 记录需要移动的\right + adjustments.append((i, i + 7, target_pos)) + + i += 7 # 跳过\right和分隔符 + continue + + # 处理花括号 + if latex_formula[i] == '{': + brace_stack.append(i) + elif latex_formula[i] == '}': + if brace_stack: + brace_stack.pop() + + i += 1 + + # 应用调整,从后向前处理以避免索引变化 + if not adjustments: + return latex_formula + + result = list(latex_formula) + adjustments.sort(reverse=True, key=lambda x: x[0]) + + for start, end, target in adjustments: + # 提取\right部分 + right_part = result[start:end] + # 从原位置删除 + del result[start:end] + # 在目标位置插入 + result.insert(target, ''.join(right_part)) + + return ''.join(result) + + +def find_group_end(text, pos, depth): + """查找特定深度的花括号组的结束位置""" + current_depth = depth + i = pos + + while i < len(text): + if text[i] == '{' and (i == 0 or not is_escaped(text, i)): + current_depth += 1 + elif text[i] == '}' and (i == 0 or not is_escaped(text, i)): + current_depth -= 1 + if current_depth < depth: + return i + i += 1 + + return -1 # 未找到对应结束位置 + + +def is_escaped(text, pos): + """检查字符是否被转义""" + backslash_count = 0 + j = pos - 1 + while j >= 0 and text[j] == '\\': + backslash_count += 1 + j -= 1 + + return backslash_count % 2 == 1 + + +def fix_unbalanced_braces(latex_formula): + """ + 检测LaTeX公式中的花括号是否闭合,并删除无法配对的花括号 + + Args: + latex_formula (str): 输入的LaTeX公式 + + Returns: + str: 删除无法配对的花括号后的LaTeX公式 + """ + stack = [] # 存储左括号的索引 + unmatched = set() # 存储不匹配括号的索引 + i = 0 + + while i < len(latex_formula): + # 检查是否是转义的花括号 + if latex_formula[i] in ['{', '}']: + # 计算前面连续的反斜杠数量 + backslash_count = 0 + j = i - 1 + while j >= 0 and latex_formula[j] == '\\': + backslash_count += 1 + j -= 1 + + # 如果前面有奇数个反斜杠,则该花括号是转义的,不参与匹配 + if backslash_count % 2 == 1: + i += 1 + continue + + # 否则,该花括号参与匹配 + if latex_formula[i] == '{': + stack.append(i) + else: # latex_formula[i] == '}' + if stack: # 有对应的左括号 + stack.pop() + else: # 没有对应的左括号 + unmatched.add(i) + + i += 1 + + # 所有未匹配的左括号 + unmatched.update(stack) + + # 构建新字符串,删除不匹配的括号 + return ''.join(char for i, char in enumerate(latex_formula) if i not in unmatched) + + +def process_latex(input_string): + """ + 处理LaTeX公式中的反斜杠: + 1. 如果\后跟特殊字符(#$%&~_^\\{})或空格,保持不变 + 2. 如果\后跟两个小写字母,保持不变 + 3. 其他情况,在\后添加空格 + + Args: + input_string (str): 输入的LaTeX公式 + + Returns: + str: 处理后的LaTeX公式 + """ + + def replace_func(match): + # 获取\后面的字符 + next_char = match.group(1) + + # 如果是特殊字符或空格,保持不变 + if next_char in "#$%&~_^|\\{} \t\n\r\v\f": + return match.group(0) + + # 如果是字母,检查下一个字符 + if 'a' <= next_char <= 'z' or 'A' <= next_char <= 'Z': + pos = match.start() + 2 # \x后的位置 + if pos < len(input_string) and ('a' <= input_string[pos] <= 'z' or 'A' <= input_string[pos] <= 'Z'): + # 下一个字符也是字母,保持不变 + return match.group(0) + + # 其他情况,在\后添加空格 + return '\\' + ' ' + next_char + + # 匹配\后面跟一个字符的情况 + pattern = r'\\(.)' + + return re.sub(pattern, replace_func, input_string) + +# 常见的在KaTeX/MathJax中可用的数学环境 +ENV_TYPES = ['array', 'matrix', 'pmatrix', 'bmatrix', 'vmatrix', + 'Bmatrix', 'Vmatrix', 'cases', 'aligned', 'gathered'] +ENV_BEGIN_PATTERNS = {env: re.compile(r'\\begin\{' + env + r'\}') for env in ENV_TYPES} +ENV_END_PATTERNS = {env: re.compile(r'\\end\{' + env + r'\}') for env in ENV_TYPES} +ENV_FORMAT_PATTERNS = {env: re.compile(r'\\begin\{' + env + r'\}\{([^}]*)\}') for env in ENV_TYPES} + +def fix_latex_environments(s): + """ + 检测LaTeX中环境(如array)的\\begin和\\end是否匹配 + 1. 如果缺少\\begin标签则在开头添加 + 2. 如果缺少\\end标签则在末尾添加 + """ + for env in ENV_TYPES: + begin_count = len(ENV_BEGIN_PATTERNS[env].findall(s)) + end_count = len(ENV_END_PATTERNS[env].findall(s)) + + if begin_count != end_count: + if end_count > begin_count: + format_match = ENV_FORMAT_PATTERNS[env].search(s) + default_format = '{c}' if env == 'array' else '' + format_str = '{' + format_match.group(1) + '}' if format_match else default_format + + missing_count = end_count - begin_count + begin_command = '\\begin{' + env + '}' + format_str + ' ' + s = begin_command * missing_count + s + else: + missing_count = begin_count - end_count + s = s + (' \\end{' + env + '}') * missing_count + + return s + + +UP_PATTERN = re.compile(r'\\up([a-zA-Z]+)') +COMMANDS_TO_REMOVE_PATTERN = re.compile( + r'\\(?:lefteqn|boldmath|ensuremath|centering|textsubscript|sides|textsl|textcent|emph|protect|null)') +REPLACEMENTS_PATTERNS = { + re.compile(r'\\underbar'): r'\\underline', + re.compile(r'\\Bar'): r'\\hat', + re.compile(r'\\Hat'): r'\\hat', + re.compile(r'\\Tilde'): r'\\tilde', + re.compile(r'\\slash'): r'/', + re.compile(r'\\textperthousand'): r'‰', + re.compile(r'\\sun'): r'☉', + re.compile(r'\\textunderscore'): r'\\_', + re.compile(r'\\fint'): r'⨏', + re.compile(r'\\up '): r'\\ ', + re.compile(r'\\vline = '): r'\\models ', + re.compile(r'\\vDash '): r'\\models ', + re.compile(r'\\sq \\sqcup '): r'\\square ', +} +QQUAD_PATTERN = re.compile(r'\\qquad(?!\s)') + +def latex_rm_whitespace(s: str): + """Remove unnecessary whitespace from LaTeX code.""" + s = fix_unbalanced_braces(s) + s = fix_latex_left_right(s) + s = fix_latex_environments(s) + + # 使用预编译的正则表达式 + s = UP_PATTERN.sub( + lambda m: m.group(0) if m.group(1) in ["arrow", "downarrow", "lus", "silon"] else f"\\{m.group(1)}", s + ) + s = COMMANDS_TO_REMOVE_PATTERN.sub('', s) + + # 应用所有替换 + for pattern, replacement in REPLACEMENTS_PATTERNS.items(): + s = pattern.sub(replacement, s) + + # 处理LaTeX中的反斜杠和空格 + s = process_latex(s) + + # \qquad后补空格 + s = QQUAD_PATTERN.sub(r'\\qquad ', s) + + return s + + +class UnimernetModel(VisionEncoderDecoderModel): + def __init__( + self, + config: Optional[PretrainedConfig] = None, + encoder: Optional[PreTrainedModel] = None, + decoder: Optional[PreTrainedModel] = None, + ): + # VisionEncoderDecoderModel's checking log has bug, disable for temp. + base_model_logger.disabled = True + try: + super().__init__(config, encoder, decoder) + finally: + base_model_logger.disabled = False + + if not config or not hasattr(config, "_name_or_path"): + raise RuntimeError("config._name_or_path is required by UnimernetModel.") + + model_path = config._name_or_path + self.transform = UnimerSwinImageProcessor() + self.tokenizer = TokenizerWrapper(AutoTokenizer.from_pretrained(model_path)) + self._post_check() + + def _post_check(self): + tokenizer = self.tokenizer + + if tokenizer.tokenizer.model_max_length != self.config.decoder.max_position_embeddings: + warnings.warn( + f"decoder.max_position_embeddings={self.config.decoder.max_position_embeddings}," + + f" but tokenizer.model_max_length={tokenizer.tokenizer.model_max_length}, will set" + + f" tokenizer.model_max_length to {self.config.decoder.max_position_embeddings}.") + tokenizer.tokenizer.model_max_length = self.config.decoder.max_position_embeddings + + assert self.config.decoder.vocab_size == len(tokenizer) + assert self.config.decoder_start_token_id == tokenizer.bos_token_id + assert self.config.pad_token_id == tokenizer.pad_token_id + + @classmethod + def from_checkpoint(cls, model_path: str, model_filename: str = "pytorch_model.pth", state_dict_strip_prefix="model.model."): + config = VisionEncoderDecoderConfig.from_pretrained(model_path) + config._name_or_path = model_path + config.encoder = UnimerSwinConfig(**vars(config.encoder)) + config.decoder = UnimerMBartConfig(**vars(config.decoder)) + + encoder = UnimerSwinModel(config.encoder) + decoder = UnimerMBartForCausalLM(config.decoder) + model = cls(config, encoder, decoder) + + # load model weights + model_file_path = os.path.join(model_path, model_filename) + checkpoint = torch.load(model_file_path, map_location="cpu", weights_only=True) + state_dict = checkpoint["model"] if "model" in checkpoint else checkpoint + if not state_dict: + raise RuntimeError("state_dict is empty.") + if state_dict_strip_prefix: + state_dict = { + k[len(state_dict_strip_prefix):] if k.startswith(state_dict_strip_prefix) else k: v + for k, v in state_dict.items() + } + missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False) + if len(unexpected_keys) > 0: + warnings.warn("Unexpected key(s) in state_dict: {}.".format(", ".join(f'"{k}"' for k in unexpected_keys))) + if len(missing_keys) > 0: + raise RuntimeError("Missing key(s) in state_dict: {}.".format(", ".join(f'"{k}"' for k in missing_keys))) + return model + + def forward_bak(self, samples): + pixel_values, text = samples["image"], samples["text_input"] + + text_inputs = self.tokenizer.tokenize(text).to(pixel_values.device) + decoder_input_ids, decoder_attention_mask = text_inputs["input_ids"], text_inputs["attention_mask"] + + num_channels = pixel_values.shape[1] + if num_channels == 1: + pixel_values = pixel_values.repeat(1, 3, 1, 1) + + labels = decoder_input_ids * 1 + labels = labels.masked_fill(labels == self.tokenizer.pad_token_id, -100) + + loss = self.model( + pixel_values=pixel_values, + decoder_input_ids=decoder_input_ids[:, :-1], + decoder_attention_mask=decoder_attention_mask[:, :-1], + labels=labels[:, 1:], + ).loss + return {"loss": loss} + + def generate(self, samples, do_sample: bool = False, temperature: float = 0.2, top_p: float = 0.95): + pixel_values = samples["image"] + num_channels = pixel_values.shape[1] + if num_channels == 1: + pixel_values = pixel_values.repeat(1, 3, 1, 1) + + kwargs = {} + if do_sample: + kwargs["temperature"] = temperature + kwargs["top_p"] = top_p + + outputs = super().generate( + pixel_values=pixel_values, + max_new_tokens=self.tokenizer.tokenizer.model_max_length, # required + decoder_start_token_id=self.tokenizer.tokenizer.bos_token_id, + do_sample=do_sample, + **kwargs, + ) + + outputs = outputs[:, 1:].cpu().numpy() + pred_tokens = self.tokenizer.detokenize(outputs) + pred_str = self.tokenizer.token2str(outputs) + fixed_str = [latex_rm_whitespace(s) for s in pred_str] + return {"pred_ids": outputs, "pred_tokens": pred_tokens, "pred_str": pred_str, "fixed_str": fixed_str} + diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py new file mode 100644 index 00000000..155a786b --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py @@ -0,0 +1,8 @@ +from .configuration_unimer_mbart import UnimerMBartConfig +from .modeling_unimer_mbart import UnimerMBartModel, UnimerMBartForCausalLM + +__all__ = [ + "UnimerMBartConfig", + "UnimerMBartModel", + "UnimerMBartForCausalLM", +] diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py new file mode 100644 index 00000000..eef4a57d --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py @@ -0,0 +1,163 @@ +# coding=utf-8 +# Copyright 2021, The Facebook AI Research Team and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""UnimerMBART model configuration""" + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + + +class UnimerMBartConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`MBartModel`]. It is used to instantiate an MBART + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the MBART + [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 50265): + Vocabulary size of the MBART model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`MBartModel`] or [`TFMBartModel`]. + d_model (`int`, *optional*, defaults to 1024): + Dimensionality of the layers and the pooler layer. + qk_squeeze (`int`, *optional*, defaults to 2): + Squeeze ratio for query/key's output dimension. See the [UniMERNet paper](https://arxiv.org/abs/2404.15254). + Squeeze Attention maps the query and key to a lower-dimensional space without excessive loss of information, + thereby accelerating the computation of attention. + encoder_layers (`int`, *optional*, defaults to 12): + Number of encoder layers. + decoder_layers (`int`, *optional*, defaults to 12): + Number of decoder layers. + encoder_attention_heads (`int`, *optional*, defaults to 16): + Number of attention heads for each attention layer in the Transformer encoder. + decoder_attention_heads (`int`, *optional*, defaults to 16): + Number of attention heads for each attention layer in the Transformer decoder. + decoder_ffn_dim (`int`, *optional*, defaults to 4096): + Dimensionality of the "intermediate" (often named feed-forward) layer in decoder. + encoder_ffn_dim (`int`, *optional*, defaults to 4096): + Dimensionality of the "intermediate" (often named feed-forward) layer in decoder. + activation_function (`str` or `function`, *optional*, defaults to `"gelu"`): + The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, + `"relu"`, `"silu"` and `"gelu_new"` are supported. + dropout (`float`, *optional*, defaults to 0.1): + The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + activation_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for activations inside the fully connected layer. + classifier_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for classifier. + max_position_embeddings (`int`, *optional*, defaults to 1024): + The maximum sequence length that this model might ever be used with. Typically set this to something large + just in case (e.g., 512 or 1024 or 2048). + init_std (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + encoder_layerdrop (`float`, *optional*, defaults to 0.0): + The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) + for more details. + decoder_layerdrop (`float`, *optional*, defaults to 0.0): + The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) + for more details. + scale_embedding (`bool`, *optional*, defaults to `False`): + Scale embeddings by diving by sqrt(d_model). + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models) + forced_eos_token_id (`int`, *optional*, defaults to 2): + The id of the token to force as the last generated token when `max_length` is reached. Usually set to + `eos_token_id`. + + Example: + + ```python + >>> from transformers import MBartConfig, MBartModel + + >>> # Initializing a MBART facebook/mbart-large-cc25 style configuration + >>> configuration = MBartConfig() + + >>> # Initializing a model (with random weights) from the facebook/mbart-large-cc25 style configuration + >>> model = MBartModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "unimer-mbart" + keys_to_ignore_at_inference = ["past_key_values"] + attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"} + + def __init__( + self, + vocab_size=50265, + max_position_embeddings=1024, + encoder_layers=12, + encoder_ffn_dim=4096, + encoder_attention_heads=16, + decoder_layers=12, + decoder_ffn_dim=4096, + decoder_attention_heads=16, + encoder_layerdrop=0.0, + decoder_layerdrop=0.0, + use_cache=True, + is_encoder_decoder=True, + activation_function="gelu", + d_model=1024, + qk_squeeze=2, + dropout=0.1, + attention_dropout=0.0, + activation_dropout=0.0, + init_std=0.02, + classifier_dropout=0.0, + scale_embedding=False, + pad_token_id=1, + bos_token_id=0, + eos_token_id=2, + forced_eos_token_id=2, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.d_model = d_model + self.qk_squeeze = qk_squeeze + self.encoder_ffn_dim = encoder_ffn_dim + self.encoder_layers = encoder_layers + self.encoder_attention_heads = encoder_attention_heads + self.decoder_ffn_dim = decoder_ffn_dim + self.decoder_layers = decoder_layers + self.decoder_attention_heads = decoder_attention_heads + self.dropout = dropout + self.attention_dropout = attention_dropout + self.activation_dropout = activation_dropout + self.activation_function = activation_function + self.init_std = init_std + self.encoder_layerdrop = encoder_layerdrop + self.decoder_layerdrop = decoder_layerdrop + self.classifier_dropout = classifier_dropout + self.use_cache = use_cache + self.num_hidden_layers = encoder_layers + self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + is_encoder_decoder=is_encoder_decoder, + forced_eos_token_id=forced_eos_token_id, + **kwargs, + ) diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py new file mode 100644 index 00000000..08a5a049 --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py @@ -0,0 +1,2351 @@ +# coding=utf-8 +# Copyright 2021, The Facebook AI Research Team and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch UnimerMBART model.""" + +import copy +import math +from dataclasses import dataclass +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from torch import nn +from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss + +from transformers.activations import ACT2FN +from transformers.modeling_attn_mask_utils import ( + _prepare_4d_attention_mask, + _prepare_4d_attention_mask_for_sdpa, + _prepare_4d_causal_attention_mask, + _prepare_4d_causal_attention_mask_for_sdpa, +) +from transformers.modeling_outputs import ( + BaseModelOutput, + BaseModelOutputWithPastAndCrossAttentions, + CausalLMOutputWithCrossAttentions, + Seq2SeqLMOutput, + Seq2SeqModelOutput, + Seq2SeqQuestionAnsweringModelOutput, + Seq2SeqSequenceClassifierOutput, +) +from transformers import GenerationMixin, PreTrainedModel +from transformers.utils import ( + add_code_sample_docstrings, + add_end_docstrings, + add_start_docstrings, + add_start_docstrings_to_model_forward, + is_flash_attn_2_available, + is_flash_attn_greater_or_equal_2_10, + logging, + replace_return_docstrings, +) +from .configuration_unimer_mbart import UnimerMBartConfig + + +if is_flash_attn_2_available(): + from flash_attn import flash_attn_func, flash_attn_varlen_func + from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa + + +logger = logging.get_logger(__name__) + +_CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25" +_CONFIG_FOR_DOC = "MBartConfig" + +# Base model docstring +_EXPECTED_OUTPUT_SHAPE = [1, 8, 1024] + + +# Copied from transformers.models.llama.modeling_llama._get_unpad_data +def _get_unpad_data(attention_mask): + seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32) + indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten() + max_seqlen_in_batch = seqlens_in_batch.max().item() + cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0)) + return ( + indices, + cu_seqlens, + max_seqlen_in_batch, + ) + + +def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int): + """ + Shift input ids one token to the right, and wrap the last non pad token (the token) Note that MBart does not + have a single `decoder_start_token_id` in contrast to other Bart-like models. + """ + prev_output_tokens = input_ids.clone() + + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") + # replace possible -100 values in labels by `pad_token_id` + prev_output_tokens.masked_fill_(prev_output_tokens == -100, pad_token_id) + + index_of_eos = (prev_output_tokens.ne(pad_token_id).sum(dim=1) - 1).unsqueeze(-1) + decoder_start_tokens = prev_output_tokens.gather(1, index_of_eos).squeeze() + prev_output_tokens[:, 1:] = prev_output_tokens[:, :-1].clone() + prev_output_tokens[:, 0] = decoder_start_tokens + + return prev_output_tokens + +@dataclass +class CausalLMOutputWithCrossAttentionsAndCounting(CausalLMOutputWithCrossAttentions): + """ + Base class for causal language model (or autoregressive) outputs. + + Args: + loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): + Language modeling loss (for next-token prediction). + logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`): + Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). + hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, + + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + cross_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Cross attentions weights after the attention softmax, used to compute the weighted average in the + cross-attention heads. + past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`): + Tuple of `torch.FloatTensor` tuples of length `config.n_layers`, with each tuple containing the cached key, + value states of the self-attention and the cross-attention layers if model is used in encoder-decoder + setting. Only relevant if `config.is_decoder = True`. + + Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see + `past_key_values` input) to speed up sequential decoding. + counting: + Counting + """ + counting: Optional[torch.FloatTensor] = None + +# Copied from transformers.models.bart.modeling_bart.BartLearnedPositionalEmbedding with Bart->MBart +class UnimerMBartLearnedPositionalEmbedding(nn.Embedding): + """ + This module learns positional embeddings up to a fixed maximum size. + """ + + def __init__(self, num_embeddings: int, embedding_dim: int): + # MBart is set up so that if padding_idx is specified then offset the embedding ids by 2 + # and adjust num_embeddings appropriately. Other models don't have this hack + self.offset = 2 + super().__init__(num_embeddings + self.offset, embedding_dim) + + def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0): + """`input_ids' shape is expected to be [bsz x seqlen].""" + + bsz, seq_len = input_ids.shape[:2] + positions = torch.arange( + past_key_values_length, past_key_values_length + seq_len, dtype=torch.long, device=self.weight.device + ).expand(bsz, -1) + + return super().forward(positions + self.offset) + + +# Copied from transformers.models.bart.modeling_bart.BartScaledWordEmbedding with Bart->MBart +class UnimerMBartScaledWordEmbedding(nn.Embedding): + """ + This module overrides nn.Embeddings' forward by multiplying with embeddings scale. + """ + + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int, embed_scale: Optional[float] = 1.0): + super().__init__(num_embeddings, embedding_dim, padding_idx) + self.embed_scale = embed_scale + + def forward(self, input_ids: torch.Tensor): + return super().forward(input_ids) * self.embed_scale + + +# Copied from transformers.models.bart.modeling_bart.BartAttention with Bart->MBart +class UnimerMBartAttention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper, with qk_squeeze""" + + def __init__( + self, + embed_dim: int, + num_heads: int, + dropout: float = 0.0, + is_decoder: bool = False, + bias: bool = True, + is_causal: bool = False, + *, + config: UnimerMBartConfig, + ): + super().__init__() + self.embed_dim = embed_dim + self.num_heads = num_heads + self.dropout = dropout + self.head_dim = embed_dim // num_heads + self.config = config + + if (self.head_dim * num_heads) != self.embed_dim: + raise ValueError( + f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim}" + f" and `num_heads`: {num_heads})." + ) + + self.squeeze_dim = embed_dim // config.qk_squeeze + self.squeeze_head_dim = self.squeeze_dim // num_heads + self.scaling = self.squeeze_head_dim**-0.5 + self.is_decoder = is_decoder + self.is_causal = is_causal + + self.q_proj = nn.Linear(embed_dim, self.squeeze_dim, bias=bias) + self.k_proj = nn.Linear(embed_dim, self.squeeze_dim, bias=bias) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias=bias) + self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias) + + def _shape_qk(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, self.squeeze_head_dim).transpose(1, 2).contiguous() + + def _shape_v(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() + + def forward( + self, + hidden_states: torch.Tensor, + key_value_states: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + attention_mask: Optional[torch.Tensor] = None, + layer_head_mask: Optional[torch.Tensor] = None, + output_attentions: bool = False, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + + # if key_value_states are provided this layer is used as a cross-attention layer + # for the decoder + is_cross_attention = key_value_states is not None + + bsz, tgt_len, _ = hidden_states.size() + + # get query proj + query_states = self.q_proj(hidden_states) * self.scaling + # get key, value proj + # `past_key_value[0].shape[2] == key_value_states.shape[1]` + # is checking that the `sequence_length` of the `past_key_value` is the same as + # the provided `key_value_states` to support prefix tuning + if ( + is_cross_attention + and past_key_value is not None + and past_key_value[0].shape[2] == key_value_states.shape[1] + ): + # reuse k,v, cross_attentions + key_states = past_key_value[0] + value_states = past_key_value[1] + elif is_cross_attention: + # cross_attentions + key_states = self._shape_qk(self.k_proj(key_value_states), -1, bsz) + value_states = self._shape_v(self.v_proj(key_value_states), -1, bsz) + elif past_key_value is not None: + # reuse k, v, self_attention + key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz) + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + else: + # self_attention + key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz) + + if self.is_decoder: + # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states. + # Further calls to cross_attention layer can then reuse all cross-attention + # key/value_states (first "if" case) + # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of + # all previous decoder key/value_states. Further calls to uni-directional self-attention + # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case) + # if encoder bi-directional self-attention `past_key_value` is always `None` + past_key_value = (key_states, value_states) + + proj_shape = (bsz * self.num_heads, -1, self.squeeze_head_dim) + value_shape = (bsz * self.num_heads, -1, self.head_dim) + query_states = self._shape_qk(query_states, tgt_len, bsz).view(*proj_shape) + key_states = key_states.reshape(*proj_shape) + value_states = value_states.reshape(*value_shape) + + src_len = key_states.size(1) + attn_weights = torch.bmm(query_states, key_states.transpose(1, 2)) + + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + raise ValueError( + f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is" + f" {attn_weights.size()}" + ) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, tgt_len, src_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}" + ) + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + attn_weights = nn.functional.softmax(attn_weights, dim=-1) + + if layer_head_mask is not None: + if layer_head_mask.size() != (self.num_heads,): + raise ValueError( + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is" + f" {layer_head_mask.size()}" + ) + attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if output_attentions: + # this operation is a bit awkward, but it's required to + # make sure that attn_weights keeps its gradient. + # In order to do so, attn_weights have to be reshaped + # twice and have to be reused in the following + attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len) + else: + attn_weights_reshaped = None + + attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = torch.bmm(attn_probs, value_states) + + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz * self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim) + attn_output = attn_output.transpose(1, 2) + + # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be + # partitioned across GPUs when using tensor-parallelism. + attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim) + + attn_output = self.out_proj(attn_output) + + return attn_output, attn_weights_reshaped, past_key_value + + +# Copied from transformers.models.bart.modeling_bart.BartFlashAttention2 with Bart->MBart +class UnimerMBartFlashAttention2(UnimerMBartAttention): + """ + MBart flash attention module. This module inherits from `MBartSqueezeAttention` as the weights of the module stays + untouched. The only required change would be on the forward pass where it needs to correctly call the public API of + flash attention and deal with padding tokens in case the input contains any of them. + """ + + # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2.__init__ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1. + # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0. + # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left). + self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10() + + # def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int): + # return tensor.view(bsz, seq_len, self.num_heads, self.head_dim) + + def _shape_qk(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, self.squeeze_head_dim) + + def _shape_v(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, self.head_dim) + + def forward( + self, + hidden_states: torch.Tensor, + key_value_states: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + attention_mask: Optional[torch.Tensor] = None, + layer_head_mask: Optional[torch.Tensor] = None, + output_attentions: bool = False, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + # MBartFlashAttention2 attention does not support output_attentions + if output_attentions: + raise ValueError("MBartFlashAttention2 attention does not support output_attentions") + + # if key_value_states are provided this layer is used as a cross-attention layer + # for the decoder + is_cross_attention = key_value_states is not None + + bsz, q_len, _ = hidden_states.size() + + # get query proj + query_states = self._shape_qk(self.q_proj(hidden_states), -1, bsz) + + # get key, value proj + # `past_key_value[0].shape[2] == key_value_states.shape[1]` + # is checking that the `sequence_length` of the `past_key_value` is the same as + # the provided `key_value_states` to support prefix tuning + if ( + is_cross_attention + and past_key_value is not None + and past_key_value[0].shape[2] == key_value_states.shape[1] + ): + # reuse k,v, cross_attentions + key_states = past_key_value[0].transpose(1, 2) + value_states = past_key_value[1].transpose(1, 2) + elif is_cross_attention: + # cross_attentions + key_states = self._shape_qk(self.k_proj(key_value_states), -1, bsz) + value_states = self._shape_v(self.v_proj(key_value_states), -1, bsz) + elif past_key_value is not None: + # reuse k, v, self_attention + key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz) + key_states = torch.cat([past_key_value[0].transpose(1, 2), key_states], dim=1) + value_states = torch.cat([past_key_value[1].transpose(1, 2), value_states], dim=1) + else: + # self_attention + key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz) + + if self.is_decoder: + # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states. + # Further calls to cross_attention layer can then reuse all cross-attention + # key/value_states (first "if" case) + # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of + # all previous decoder key/value_states. Further calls to uni-directional self-attention + # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case) + # if encoder bi-directional self-attention `past_key_value` is always `None` + past_key_value = (key_states.transpose(1, 2), value_states.transpose(1, 2)) + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value[0].shape[-2] + + # In PEFT, usually we cast the layer norms in float32 for training stability reasons + # therefore the input hidden states gets silently casted in float32. Hence, we need + # cast them back in the correct dtype just to be sure everything works as expected. + # This might slowdown training & inference so it is recommended to not cast the LayerNorms + # in fp32. (LlamaRMSNorm handles it correctly) + + input_dtype = query_states.dtype + if input_dtype == torch.float32: + if torch.is_autocast_enabled(): + target_dtype = torch.get_autocast_gpu_dtype() + # Handle the case where the model is quantized + elif hasattr(self.config, "_pre_quantization_dtype"): + target_dtype = self.config._pre_quantization_dtype + else: + target_dtype = self.q_proj.weight.dtype + + logger.warning_once( + f"The input hidden states seems to be silently casted in float32, this might be related to" + f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in" + f" {target_dtype}." + ) + + query_states = query_states.to(target_dtype) + key_states = key_states.to(target_dtype) + value_states = value_states.to(target_dtype) + + attn_output = self._flash_attention_forward( + query_states, key_states, value_states, attention_mask, q_len, dropout=self.dropout + ) + + attn_output = attn_output.reshape(bsz, q_len, -1) + attn_output = self.out_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._flash_attention_forward + def _flash_attention_forward( + self, query_states, key_states, value_states, attention_mask, query_length, dropout=0.0, softmax_scale=None + ): + """ + Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token + first unpad the input, then computes the attention scores and pad the final attention scores. + + Args: + query_states (`torch.Tensor`): + Input query states to be passed to Flash Attention API + key_states (`torch.Tensor`): + Input key states to be passed to Flash Attention API + value_states (`torch.Tensor`): + Input value states to be passed to Flash Attention API + attention_mask (`torch.Tensor`): + The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the + position of padding tokens and 1 for the position of non-padding tokens. + dropout (`float`): + Attention dropout + softmax_scale (`float`, *optional*): + The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim) + """ + if not self._flash_attn_uses_top_left_mask: + causal = self.is_causal + else: + # TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in LlamaFlashAttention2 __init__. + causal = self.is_causal and query_length != 1 + + # Contains at least one padding token in the sequence + if attention_mask is not None: + batch_size = query_states.shape[0] + + query_states, key_states, value_states, indices_q, cu_seq_lens, max_seq_lens = self._upad_input( + query_states, key_states, value_states, attention_mask, query_length + ) + + cu_seqlens_q, cu_seqlens_k = cu_seq_lens + max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens + + attn_output_unpad = flash_attn_varlen_func( + query_states, + key_states, + value_states, + cu_seqlens_q=cu_seqlens_q, + cu_seqlens_k=cu_seqlens_k, + max_seqlen_q=max_seqlen_in_batch_q, + max_seqlen_k=max_seqlen_in_batch_k, + dropout_p=dropout, + softmax_scale=softmax_scale, + causal=causal, + ) + + attn_output = pad_input(attn_output_unpad, indices_q, batch_size, query_length) + else: + attn_output = flash_attn_func( + query_states, key_states, value_states, dropout, softmax_scale=softmax_scale, causal=causal + ) + + return attn_output + + # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._upad_input + def _upad_input(self, query_layer, key_layer, value_layer, attention_mask, query_length): + indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask) + batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape + + key_layer = index_first_axis( + key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k + ) + value_layer = index_first_axis( + value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k + ) + if query_length == kv_seq_len: + query_layer = index_first_axis( + query_layer.reshape(batch_size * kv_seq_len, self.num_heads, head_dim), indices_k + ) + cu_seqlens_q = cu_seqlens_k + max_seqlen_in_batch_q = max_seqlen_in_batch_k + indices_q = indices_k + elif query_length == 1: + max_seqlen_in_batch_q = 1 + cu_seqlens_q = torch.arange( + batch_size + 1, dtype=torch.int32, device=query_layer.device + ) # There is a memcpy here, that is very bad. + indices_q = cu_seqlens_q[:-1] + query_layer = query_layer.squeeze(1) + else: + # The -q_len: slice assumes left padding. + attention_mask = attention_mask[:, -query_length:] + query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(query_layer, attention_mask) + + return ( + query_layer, + key_layer, + value_layer, + indices_q, + (cu_seqlens_q, cu_seqlens_k), + (max_seqlen_in_batch_q, max_seqlen_in_batch_k), + ) + +class UnimerMBartSdpaAttention(UnimerMBartAttention): + def forward( + self, + hidden_states: torch.Tensor, + key_value_states: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + attention_mask: Optional[torch.Tensor] = None, + layer_head_mask: Optional[torch.Tensor] = None, + output_attentions: bool = False, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + if output_attentions or layer_head_mask is not None: + # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented. + logger.warning( + "BartModel is using BartSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `layer_head_mask` not None. Falling back to the manual attention" + ' implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.' + ) + return super().forward( + hidden_states, + key_value_states=key_value_states, + past_key_value=past_key_value, + attention_mask=attention_mask, + layer_head_mask=layer_head_mask, + output_attentions=output_attentions, + ) + + # if key_value_states are provided this layer is used as a cross-attention layer + # for the decoder + is_cross_attention = key_value_states is not None + + bsz, tgt_len, _ = hidden_states.size() + + # get query proj + query_states = self.q_proj(hidden_states) + # get key, value proj + # `past_key_value[0].shape[2] == key_value_states.shape[1]` + # is checking that the `sequence_length` of the `past_key_value` is the same as + # the provided `key_value_states` to support prefix tuning + if ( + is_cross_attention + and past_key_value is not None + and past_key_value[0].shape[2] == key_value_states.shape[1] + ): + # reuse k,v, cross_attentions + key_states = past_key_value[0] + value_states = past_key_value[1] + elif is_cross_attention: + # cross_attentions + key_states = self._shape_qk(self.k_proj(key_value_states), -1, bsz) + value_states = self._shape_v(self.v_proj(key_value_states), -1, bsz) + elif past_key_value is not None: + # reuse k, v, self_attention + key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz) + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + else: + # self_attention + key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz) + value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz) + + if self.is_decoder: + # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states. + # Further calls to cross_attention layer can then reuse all cross-attention + # key/value_states (first "if" case) + # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of + # all previous decoder key/value_states. Further calls to uni-directional self-attention + # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case) + # if encoder bi-directional self-attention `past_key_value` is always `None` + past_key_value = (key_states, value_states) + + query_states = self._shape_qk(query_states, tgt_len, bsz) + + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1. + is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False + + # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask, + # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577 + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=attention_mask, + dropout_p=self.dropout if self.training else 0.0, + is_causal=is_causal, + ) + + if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.transpose(1, 2) + + # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be + # partitioned across GPUs when using tensor-parallelism. + attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim) + + attn_output = self.out_proj(attn_output) + + return attn_output, None, past_key_value + +UNIMER_MBART_ATTENTION_CLASSES = { + "eager": UnimerMBartAttention, + "flash_attention_2": UnimerMBartFlashAttention2, + "sdpa": UnimerMBartSdpaAttention, +} + + +class UnimerMBartEncoderLayer(nn.Module): + def __init__(self, config: UnimerMBartConfig): + super().__init__() + self.embed_dim = config.d_model + + self.self_attn = UNIMER_MBART_ATTENTION_CLASSES[config._attn_implementation]( + embed_dim=self.embed_dim, + num_heads=config.encoder_attention_heads, + dropout=config.attention_dropout, + config=config, + ) + self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim) + self.dropout = config.dropout + self.activation_fn = ACT2FN[config.activation_function] + self.activation_dropout = config.activation_dropout + self.fc1 = nn.Linear(self.embed_dim, config.encoder_ffn_dim) + self.fc2 = nn.Linear(config.encoder_ffn_dim, self.embed_dim) + self.final_layer_norm = nn.LayerNorm(self.embed_dim) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + layer_head_mask: torch.Tensor, + output_attentions: bool = False, + ) -> torch.Tensor: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`): attention mask of size + `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values. + layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size + `(encoder_attention_heads,)`. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + """ + residual = hidden_states + hidden_states = self.self_attn_layer_norm(hidden_states) + hidden_states, attn_weights, _ = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + layer_head_mask=layer_head_mask, + output_attentions=output_attentions, + ) + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + hidden_states = residual + hidden_states + + residual = hidden_states + hidden_states = self.final_layer_norm(hidden_states) + hidden_states = self.activation_fn(self.fc1(hidden_states)) + hidden_states = nn.functional.dropout(hidden_states, p=self.activation_dropout, training=self.training) + hidden_states = self.fc2(hidden_states) + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + hidden_states = residual + hidden_states + + if hidden_states.dtype == torch.float16 and ( + torch.isinf(hidden_states).any() or torch.isnan(hidden_states).any() + ): + clamp_value = torch.finfo(hidden_states.dtype).max - 1000 + hidden_states = torch.clamp(hidden_states, min=-clamp_value, max=clamp_value) + + outputs = (hidden_states,) + + if output_attentions: + outputs += (attn_weights,) + + return outputs + + +class UnimerMBartDecoderLayer(nn.Module): + def __init__(self, config: UnimerMBartConfig): + super().__init__() + self.embed_dim = config.d_model + + self.self_attn = UNIMER_MBART_ATTENTION_CLASSES[config._attn_implementation]( + embed_dim=self.embed_dim, + num_heads=config.decoder_attention_heads, + dropout=config.attention_dropout, + is_decoder=True, + is_causal=True, + config=config, + ) + self.dropout = config.dropout + self.activation_fn = ACT2FN[config.activation_function] + self.activation_dropout = config.activation_dropout + + self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim) + self.encoder_attn = UNIMER_MBART_ATTENTION_CLASSES[config._attn_implementation]( + self.embed_dim, + config.decoder_attention_heads, + dropout=config.attention_dropout, + is_decoder=True, + config=config, + ) + self.encoder_attn_layer_norm = nn.LayerNorm(self.embed_dim) + self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim) + self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim) + self.final_layer_norm = nn.LayerNorm(self.embed_dim) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + encoder_hidden_states: Optional[torch.Tensor] = None, + encoder_attention_mask: Optional[torch.Tensor] = None, + layer_head_mask: Optional[torch.Tensor] = None, + cross_attn_layer_head_mask: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = True, + ) -> torch.Tensor: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`): attention mask of size + `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values. + encoder_hidden_states (`torch.FloatTensor`): + cross attention input to the layer of shape `(batch, seq_len, embed_dim)` + encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size + `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values. + layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size + `(encoder_attention_heads,)`. + cross_attn_layer_head_mask (`torch.FloatTensor`): mask for cross-attention heads in a given layer of + size `(decoder_attention_heads,)`. + past_key_value (`Tuple(torch.FloatTensor)`): cached past key and value projection states + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + """ + residual = hidden_states + hidden_states = self.self_attn_layer_norm(hidden_states) + + # Self Attention + # decoder uni-directional self-attention cached key/values tuple is at positions 1,2 + self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None + # add present self-attn cache to positions 1,2 of present_key_value tuple + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + past_key_value=self_attn_past_key_value, + attention_mask=attention_mask, + layer_head_mask=layer_head_mask, + output_attentions=output_attentions, + ) + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + hidden_states = residual + hidden_states + + # Cross-Attention Block + cross_attn_present_key_value = None + cross_attn_weights = None + if encoder_hidden_states is not None: + residual = hidden_states + hidden_states = self.encoder_attn_layer_norm(hidden_states) + + # cross_attn cached key/values tuple is at positions 3,4 of present_key_value tuple + cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None + hidden_states, cross_attn_weights, cross_attn_present_key_value = self.encoder_attn( + hidden_states=hidden_states, + key_value_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + layer_head_mask=cross_attn_layer_head_mask, + past_key_value=cross_attn_past_key_value, + output_attentions=output_attentions, + ) + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + hidden_states = residual + hidden_states + + # add cross-attn to positions 3,4 of present_key_value tuple + present_key_value = present_key_value + cross_attn_present_key_value + + # Fully Connected + residual = hidden_states + hidden_states = self.final_layer_norm(hidden_states) + hidden_states = self.activation_fn(self.fc1(hidden_states)) + hidden_states = nn.functional.dropout(hidden_states, p=self.activation_dropout, training=self.training) + hidden_states = self.fc2(hidden_states) + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + hidden_states = residual + hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights, cross_attn_weights) + + if use_cache: + outputs += (present_key_value,) + + return outputs + + +# Copied from transformers.models.bart.modeling_bart.BartClassificationHead with Bart->MBart +class UnimerMBartClassificationHead(nn.Module): + """Head for sentence-level classification tasks.""" + + def __init__( + self, + input_dim: int, + inner_dim: int, + num_classes: int, + pooler_dropout: float, + ): + super().__init__() + self.dense = nn.Linear(input_dim, inner_dim) + self.dropout = nn.Dropout(p=pooler_dropout) + self.out_proj = nn.Linear(inner_dim, num_classes) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.dropout(hidden_states) + hidden_states = self.dense(hidden_states) + hidden_states = torch.tanh(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.out_proj(hidden_states) + return hidden_states + + +class UnimerMBartPreTrainedModel(PreTrainedModel): + config_class = UnimerMBartConfig + base_model_prefix = "model" + supports_gradient_checkpointing = True + _no_split_modules = ["MBartDecoderLayer", "MBartSqueezeAttention"] + _supports_flash_attn_2 = True + _supports_sdpa = True + + def _init_weights(self, module): + std = self.config.init_std + if isinstance(module, nn.Linear): + module.weight.data.normal_(mean=0.0, std=std) + if module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.Embedding): + module.weight.data.normal_(mean=0.0, std=std) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + + @property + def dummy_inputs(self): + pad_token = self.config.pad_token_id + input_ids = torch.tensor([[0, 6, 10, 4, 2], [0, 8, 12, 2, pad_token]], device=self.device) + dummy_inputs = { + "attention_mask": input_ids.ne(pad_token), + "input_ids": input_ids, + } + return dummy_inputs + + +MBART_START_DOCSTRING = r""" + This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the + library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads + etc.) + + This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage + and behavior. + + Parameters: + config ([`MBartConfig`]): + Model configuration class with all the parameters of the model. Initializing with a config file does not + load the weights associated with the model, only the configuration. Check out the + [`~PreTrainedModel.from_pretrained`] method to load the model weights. +""" + +MBART_GENERATION_EXAMPLE = r""" + Translation example: + + ```python + >>> from transformers import AutoTokenizer, MBartForConditionalGeneration + + >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro") + >>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-en-ro") + + >>> example_english_phrase = "42 is the answer" + >>> inputs = tokenizer(example_english_phrase, return_tensors="pt") + + >>> # Translate + >>> generated_ids = model.generate(**inputs, num_beams=4, max_length=5) + >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + '42 este răspuns' + ``` + + Mask filling example: + + ```python + >>> from transformers import AutoTokenizer, MBartForConditionalGeneration + + >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") + >>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") + + >>> # de_DE is the language symbol id for German + >>> TXT = " Meine Freunde sind nett aber sie essen zu viel Kuchen. de_DE" + + >>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt")["input_ids"] + >>> logits = model(input_ids).logits + + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ['nett', 'sehr', 'ganz', 'nicht', 'so'] + ``` +""" + +MBART_INPUTS_DOCSTRING = r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide + it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*): + Indices of decoder input sequence tokens in the vocabulary. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are decoder input IDs?](../glossary#decoder-input-ids) + + MBart uses a specific language id token as the starting token for `decoder_input_ids` generation that + varies according to source and target language, *e.g.* 25004 for *en_XX*, and 25003 for *de_DE*. If + `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see + `past_key_values`). + + For translation and summarization training, `decoder_input_ids` should be provided. If no + `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right + for denoising pre-training following the paper. + decoder_attention_mask (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*): + Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also + be used by default. + head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + decoder_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in `[0, + 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + encoder_outputs (`tuple(tuple(torch.FloatTensor)`, *optional*): + Tuple consists of (`last_hidden_state`, *optional*: `hidden_states`, *optional*: `attentions`) + `last_hidden_state` of shape `(batch_size, sequence_length, hidden_size)`, *optional*) is a sequence of + hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder. + past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`): + Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape + `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape + `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`. + + Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention + blocks) that can be used (see `past_key_values` input) to speed up sequential decoding. + + If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that + don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all + `decoder_input_ids` of shape `(batch_size, sequence_length)`. + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): + Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. + This is useful if you want more control over how to convert `input_ids` indices into associated vectors + than the model's internal embedding lookup matrix. + decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): + Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded + representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be + input (see `past_key_values`). This is useful if you want more control over how to convert + `decoder_input_ids` indices into associated vectors than the model's internal embedding lookup matrix. + + If `decoder_input_ids` and `decoder_inputs_embeds` are both unset, `decoder_inputs_embeds` takes the value + of `inputs_embeds`. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see + `past_key_values`). + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + + +class UnimerMBartEncoder(UnimerMBartPreTrainedModel): + """ + Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a + [`MBartEncoderLayer`]. + + Args: + config: MBartConfig + embed_tokens (nn.Embedding): output embedding + """ + + def __init__(self, config: UnimerMBartConfig, embed_tokens: Optional[nn.Embedding] = None): + super().__init__(config) + + self.dropout = config.dropout + self.layerdrop = config.encoder_layerdrop + + embed_dim = config.d_model + self.padding_idx = config.pad_token_id + self.max_source_positions = config.max_position_embeddings + embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0 + + self.embed_tokens = UnimerMBartScaledWordEmbedding( + config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale + ) + + if embed_tokens is not None: + self.embed_tokens.weight = embed_tokens.weight + + self.embed_positions = UnimerMBartLearnedPositionalEmbedding( + config.max_position_embeddings, + embed_dim, + ) + self.layers = nn.ModuleList([UnimerMBartEncoderLayer(config) for _ in range(config.encoder_layers)]) + self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2" + self._use_sdpa = config._attn_implementation == "sdpa" + self.layernorm_embedding = nn.LayerNorm(embed_dim) + self.layer_norm = nn.LayerNorm(config.d_model) + + self.gradient_checkpointing = False + # Initialize weights and apply final processing + self.post_init() + + def _backward_compatibility_gradient_checkpointing(self): + # Override to not delete the attribute from the config + if self.supports_gradient_checkpointing and getattr(self.config, "gradient_checkpointing", False): + self.gradient_checkpointing_enable() + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + head_mask: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutput]: + r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you + provide it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): + Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. + This is useful if you want more control over how to convert `input_ids` indices into associated vectors + than the model's internal embedding lookup matrix. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + for more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") + elif input_ids is not None: + input = input_ids + input_shape = input.shape + input_ids = input_ids.view(-1, input_shape[-1]) + elif inputs_embeds is not None: + input = inputs_embeds[:, :, -1] + else: + raise ValueError("You have to specify either input_ids or inputs_embeds") + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + + embed_pos = self.embed_positions(input) + + hidden_states = inputs_embeds + embed_pos.to(inputs_embeds.device) + hidden_states = self.layernorm_embedding(hidden_states) + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + + # expand attention_mask + if attention_mask is not None: + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + if self._use_flash_attention_2: + attention_mask = attention_mask if 0 in attention_mask else None + elif self._use_sdpa and head_mask is None and not output_attentions: + # output_attentions=True & head_mask can not be supported when using SDPA, fall back to + # the manual implementation that requires a 4D causal mask in all cases. + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + attention_mask = _prepare_4d_attention_mask_for_sdpa(attention_mask, inputs_embeds.dtype) + else: + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + attention_mask = _prepare_4d_attention_mask(attention_mask, inputs_embeds.dtype) + + encoder_states = () if output_hidden_states else None + all_attentions = () if output_attentions else None + + # check if head_mask has a correct number of layers specified if desired + if head_mask is not None: + if head_mask.size()[0] != len(self.layers): + raise ValueError( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." + ) + for idx, encoder_layer in enumerate(self.layers): + if output_hidden_states: + encoder_states = encoder_states + (hidden_states,) + # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) + to_drop = False + if self.training: + dropout_probability = torch.rand([]) + if dropout_probability < self.layerdrop: # skip the layer + to_drop = True + + if to_drop: + layer_outputs = (None, None) + else: + if self.gradient_checkpointing and self.training: + layer_outputs = self._gradient_checkpointing_func( + encoder_layer.__call__, + hidden_states, + attention_mask, + (head_mask[idx] if head_mask is not None else None), + output_attentions, + ) + else: + layer_outputs = encoder_layer( + hidden_states, + attention_mask, + layer_head_mask=(head_mask[idx] if head_mask is not None else None), + output_attentions=output_attentions, + ) + + hidden_states = layer_outputs[0] + + if output_attentions: + all_attentions = all_attentions + (layer_outputs[1],) + + hidden_states = self.layer_norm(hidden_states) + + if output_hidden_states: + encoder_states = encoder_states + (hidden_states,) + + if not return_dict: + return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None) + return BaseModelOutput( + last_hidden_state=hidden_states, hidden_states=encoder_states, attentions=all_attentions + ) + + +class UnimerMBartDecoder(UnimerMBartPreTrainedModel): + """ + Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a [`MBartDecoderLayer`] + + Args: + config: MBartConfig + embed_tokens (nn.Embedding): output embedding + """ + + def __init__(self, config: UnimerMBartConfig, embed_tokens: Optional[nn.Embedding] = None): + super().__init__(config) + self.dropout = config.dropout + self.layerdrop = config.decoder_layerdrop + self.padding_idx = config.pad_token_id + self.max_target_positions = config.max_position_embeddings + embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0 + + self.embed_tokens = UnimerMBartScaledWordEmbedding( + config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale + ) + + if embed_tokens is not None: + self.embed_tokens.weight = embed_tokens.weight + + self.embed_positions = UnimerMBartLearnedPositionalEmbedding( + config.max_position_embeddings, + config.d_model, + ) + self.layers = nn.ModuleList([UnimerMBartDecoderLayer(config) for _ in range(config.decoder_layers)]) + self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2" + self._use_sdpa = config._attn_implementation == "sdpa" + self.layernorm_embedding = nn.LayerNorm(config.d_model) + self.layer_norm = nn.LayerNorm(config.d_model) + + self.gradient_checkpointing = False + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.embed_tokens + + def set_input_embeddings(self, value): + self.embed_tokens = value + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + count_pred: Optional[torch.FloatTensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.Tensor] = None, + cross_attn_head_mask: Optional[torch.Tensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutputWithPastAndCrossAttentions]: + r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you + provide it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*): + Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention + of the decoder. + encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*): + Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values + selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the cross-attention modules in the decoder to avoid performing + cross-attention on hidden heads. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`): + Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of + shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of + shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`. + + Contains pre-computed hidden-states (key and values in the self-attention blocks and in the + cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding. + + If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those + that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): + Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. + This is useful if you want more control over how to convert `input_ids` indices into associated vectors + than the model's internal embedding lookup matrix. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + for more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time") + elif input_ids is not None: + input = input_ids + input_shape = input.size() + input_ids = input_ids.view(-1, input_shape[-1]) + elif inputs_embeds is not None: + input_shape = inputs_embeds.size()[:-1] + input = inputs_embeds[:, :, -1] + else: + raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds") + + # past_key_values_length + past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0 + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + + if self._use_flash_attention_2: + # 2d mask is passed through the layers + attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None + elif self._use_sdpa and not output_attentions and cross_attn_head_mask is None: + # output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on + # the manual implementation that requires a 4D causal mask in all cases. + attention_mask = _prepare_4d_causal_attention_mask_for_sdpa( + attention_mask, + input_shape, + inputs_embeds, + past_key_values_length, + ) + else: + # 4d mask is passed through the layers + attention_mask = _prepare_4d_causal_attention_mask( + attention_mask, input_shape, inputs_embeds, past_key_values_length + ) + + # expand encoder attention mask + if encoder_hidden_states is not None and encoder_attention_mask is not None: + if self._use_flash_attention_2: + encoder_attention_mask = encoder_attention_mask if 0 in encoder_attention_mask else None + elif self._use_sdpa and cross_attn_head_mask is None and not output_attentions: + # output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on + # the manual implementation that requires a 4D causal mask in all cases. + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + encoder_attention_mask = _prepare_4d_attention_mask_for_sdpa( + encoder_attention_mask, + inputs_embeds.dtype, + tgt_len=input_shape[-1], + ) + else: + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + encoder_attention_mask = _prepare_4d_attention_mask( + encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1] + ) + + # embed positions + positions = self.embed_positions(input, past_key_values_length) + + hidden_states = inputs_embeds + positions.to(inputs_embeds.device) + + # TODO: add counting context weight to hidden_states + if count_pred is not None: + count_context_weight = self.counting_context_weight(count_pred) + hidden_states = hidden_states + 0.5 * count_context_weight.unsqueeze(1) + + hidden_states = self.layernorm_embedding(hidden_states) + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + + if self.gradient_checkpointing and self.training: + if use_cache: + logger.warning_once( + "`use_cache=True` is incompatible with gradient checkpointing`. Setting `use_cache=False`..." + ) + use_cache = False + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + all_cross_attentions = () if (output_attentions and encoder_hidden_states is not None) else None + next_decoder_cache = () if use_cache else None + + # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired + for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): + if attn_mask is not None: + if attn_mask.size()[0] != len(self.layers): + raise ValueError( + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {attn_mask.size()[0]}." + ) + for idx, decoder_layer in enumerate(self.layers): + # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) + if output_hidden_states: + all_hidden_states += (hidden_states,) + if self.training: + dropout_probability = torch.rand([]) + if dropout_probability < self.layerdrop: + continue + + past_key_value = past_key_values[idx] if past_key_values is not None else None + + if self.gradient_checkpointing and self.training: + layer_outputs = self._gradient_checkpointing_func( + decoder_layer.__call__, + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + head_mask[idx] if head_mask is not None else None, + cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None, + None, + output_attentions, + use_cache, + ) + else: + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + layer_head_mask=(head_mask[idx] if head_mask is not None else None), + cross_attn_layer_head_mask=( + cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None + ), + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + ) + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache += (layer_outputs[3 if output_attentions else 1],) + + if output_attentions: + all_self_attns += (layer_outputs[1],) + + if encoder_hidden_states is not None: + all_cross_attentions += (layer_outputs[2],) + + hidden_states = self.layer_norm(hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + next_cache = next_decoder_cache if use_cache else None + if not return_dict: + return tuple( + v + for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, all_cross_attentions] + if v is not None + ) + return BaseModelOutputWithPastAndCrossAttentions( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + cross_attentions=all_cross_attentions, + ) + + +@add_start_docstrings( + "The bare MBART Model outputting raw hidden-states without any specific head on top.", + MBART_START_DOCSTRING, +) +class UnimerMBartModel(UnimerMBartPreTrainedModel): + _tied_weights_keys = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight"] + + def __init__(self, config: UnimerMBartConfig): + super().__init__(config) + + padding_idx, vocab_size = config.pad_token_id, config.vocab_size + self.shared = nn.Embedding(vocab_size, config.d_model, padding_idx) + + self.encoder = UnimerMBartEncoder(config, self.shared) + self.decoder = UnimerMBartDecoder(config, self.shared) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.shared + + def set_input_embeddings(self, value): + self.shared = value + self.encoder.embed_tokens = self.shared + self.decoder.embed_tokens = self.shared + + def get_encoder(self): + return self.encoder + + def get_decoder(self): + return self.decoder + + def _tie_weights(self): + if self.config.tie_word_embeddings: + self._tie_or_clone_weights(self.encoder.embed_tokens, self.get_input_embeddings()) + self._tie_or_clone_weights(self.decoder.embed_tokens, self.get_input_embeddings()) + + @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) + @add_code_sample_docstrings( + checkpoint=_CHECKPOINT_FOR_DOC, + output_type=Seq2SeqModelOutput, + config_class=_CONFIG_FOR_DOC, + expected_output=_EXPECTED_OUTPUT_SHAPE, + ) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + decoder_input_ids: Optional[torch.LongTensor] = None, + decoder_attention_mask: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.Tensor] = None, + decoder_head_mask: Optional[torch.Tensor] = None, + cross_attn_head_mask: Optional[torch.Tensor] = None, + encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + decoder_inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Seq2SeqModelOutput, Tuple[torch.FloatTensor]]: + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # different to other models, MBart automatically creates decoder_input_ids from + # input_ids if no decoder_input_ids are provided + if decoder_input_ids is None and decoder_inputs_embeds is None: + decoder_input_ids = shift_tokens_right(input_ids, self.config.pad_token_id) + + if encoder_outputs is None: + encoder_outputs = self.encoder( + input_ids=input_ids, + attention_mask=attention_mask, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True + elif return_dict and not isinstance(encoder_outputs, BaseModelOutput): + encoder_outputs = BaseModelOutput( + last_hidden_state=encoder_outputs[0], + hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None, + attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None, + ) + + # decoder outputs consists of (dec_features, past_key_value, dec_hidden, dec_attn) + decoder_outputs = self.decoder( + input_ids=decoder_input_ids, + attention_mask=decoder_attention_mask, + encoder_hidden_states=encoder_outputs[0], + encoder_attention_mask=attention_mask, + head_mask=decoder_head_mask, + cross_attn_head_mask=cross_attn_head_mask, + past_key_values=past_key_values, + inputs_embeds=decoder_inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + if not return_dict: + return decoder_outputs + encoder_outputs + + return Seq2SeqModelOutput( + last_hidden_state=decoder_outputs.last_hidden_state, + past_key_values=decoder_outputs.past_key_values, + decoder_hidden_states=decoder_outputs.hidden_states, + decoder_attentions=decoder_outputs.attentions, + cross_attentions=decoder_outputs.cross_attentions, + encoder_last_hidden_state=encoder_outputs.last_hidden_state, + encoder_hidden_states=encoder_outputs.hidden_states, + encoder_attentions=encoder_outputs.attentions, + ) + + +@add_start_docstrings( + "The MBART Model with a language modeling head. Can be used for summarization, after fine-tuning the pretrained models.", + MBART_START_DOCSTRING, +) +class UnimerMBartForConditionalGeneration(UnimerMBartPreTrainedModel, GenerationMixin): + base_model_prefix = "model" + _keys_to_ignore_on_load_missing = ["final_logits_bias"] + _tied_weights_keys = ["model.encoder.embed_tokens.weight", "model.decoder.embed_tokens.weight", "lm_head.weight"] + + def __init__(self, config: UnimerMBartConfig): + super().__init__(config) + self.model = UnimerMBartModel(config) + self.register_buffer("final_logits_bias", torch.zeros((1, self.model.shared.num_embeddings))) + self.lm_head = nn.Linear(config.d_model, self.model.shared.num_embeddings, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_encoder(self): + return self.model.get_encoder() + + def get_decoder(self): + return self.model.get_decoder() + + def resize_token_embeddings(self, new_num_tokens: int, pad_to_multiple_of: Optional[int] = None) -> nn.Embedding: + new_embeddings = super().resize_token_embeddings(new_num_tokens, pad_to_multiple_of) + self._resize_final_logits_bias(new_embeddings.weight.shape[0]) + return new_embeddings + + def _resize_final_logits_bias(self, new_num_tokens: int) -> None: + old_num_tokens = self.final_logits_bias.shape[-1] + if new_num_tokens <= old_num_tokens: + new_bias = self.final_logits_bias[:, :new_num_tokens] + else: + extra_bias = torch.zeros((1, new_num_tokens - old_num_tokens), device=self.final_logits_bias.device) + new_bias = torch.cat([self.final_logits_bias, extra_bias], dim=1) + self.register_buffer("final_logits_bias", new_bias) + + def get_output_embeddings(self): + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + self.lm_head = new_embeddings + + @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) + @add_end_docstrings(MBART_GENERATION_EXAMPLE) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + decoder_input_ids: Optional[torch.LongTensor] = None, + decoder_attention_mask: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.Tensor] = None, + decoder_head_mask: Optional[torch.Tensor] = None, + cross_attn_head_mask: Optional[torch.Tensor] = None, + encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + decoder_inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Seq2SeqLMOutput, Tuple[torch.FloatTensor]]: + r""" + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., + config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored + (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. + + Returns: + + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if labels is not None: + if use_cache: + logger.warning("The `use_cache` argument is changed to `False` since `labels` is provided.") + use_cache = False + if decoder_input_ids is None and decoder_inputs_embeds is None: + decoder_input_ids = shift_tokens_right(labels, self.config.pad_token_id) + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + decoder_input_ids=decoder_input_ids, + encoder_outputs=encoder_outputs, + decoder_attention_mask=decoder_attention_mask, + head_mask=head_mask, + decoder_head_mask=decoder_head_mask, + cross_attn_head_mask=cross_attn_head_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + decoder_inputs_embeds=decoder_inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + lm_logits = self.lm_head(outputs[0]) + self.final_logits_bias + + masked_lm_loss = None + if labels is not None: + loss_fct = CrossEntropyLoss() + masked_lm_loss = loss_fct(lm_logits.view(-1, self.config.vocab_size), labels.view(-1)) + + if not return_dict: + output = (lm_logits,) + outputs[1:] + return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output + + return Seq2SeqLMOutput( + loss=masked_lm_loss, + logits=lm_logits, + past_key_values=outputs.past_key_values, + decoder_hidden_states=outputs.decoder_hidden_states, + decoder_attentions=outputs.decoder_attentions, + cross_attentions=outputs.cross_attentions, + encoder_last_hidden_state=outputs.encoder_last_hidden_state, + encoder_hidden_states=outputs.encoder_hidden_states, + encoder_attentions=outputs.encoder_attentions, + ) + + def prepare_inputs_for_generation( + self, + decoder_input_ids, + past_key_values=None, + attention_mask=None, + head_mask=None, + decoder_head_mask=None, + cross_attn_head_mask=None, + use_cache=None, + encoder_outputs=None, + **kwargs, + ): + # cut decoder_input_ids if past is used + if past_key_values is not None: + past_length = past_key_values[0][0].shape[2] + + # Some generation methods already pass only the last input ID + if decoder_input_ids.shape[1] > past_length: + remove_prefix_length = past_length + else: + # Default to old behavior: keep only final ID + remove_prefix_length = decoder_input_ids.shape[1] - 1 + + decoder_input_ids = decoder_input_ids[:, remove_prefix_length:] + + return { + "input_ids": None, # encoder_outputs is defined. input_ids not needed + "encoder_outputs": encoder_outputs, + "past_key_values": past_key_values, + "decoder_input_ids": decoder_input_ids, + "attention_mask": attention_mask, + "head_mask": head_mask, + "decoder_head_mask": decoder_head_mask, + "cross_attn_head_mask": cross_attn_head_mask, + "use_cache": use_cache, # change this to avoid caching (presumably for debugging) + } + + def prepare_decoder_input_ids_from_labels(self, labels: torch.Tensor): + return shift_tokens_right(labels, self.config.pad_token_id) + + @staticmethod + def _reorder_cache(past_key_values, beam_idx): + reordered_past = () + for layer_past in past_key_values: + # cached cross_attention states don't have to be reordered -> they are always the same + reordered_past += ( + tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past[:2]) + + layer_past[2:], + ) + return reordered_past + + +@add_start_docstrings( + """ + MBart model with a sequence classification/head on top (a linear layer on top of the pooled output) e.g. for GLUE + tasks. + """, + MBART_START_DOCSTRING, +) +class UnimerMBartForSequenceClassification(UnimerMBartPreTrainedModel): + _tied_weights_keys = ["model.encoder.embed_tokens.weight", "model.decoder.embed_tokens.weight"] + + def __init__(self, config: UnimerMBartConfig, **kwargs): + super().__init__(config, **kwargs) + self.model = UnimerMBartModel(config) + self.classification_head = UnimerMBartClassificationHead( + config.d_model, + config.d_model, + config.num_labels, + config.classifier_dropout, + ) + + # Initialize weights and apply final processing + self.post_init() + + @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) + @add_code_sample_docstrings( + checkpoint=_CHECKPOINT_FOR_DOC, + output_type=Seq2SeqSequenceClassifierOutput, + config_class=_CONFIG_FOR_DOC, + ) + # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + decoder_input_ids: Optional[torch.LongTensor] = None, + decoder_attention_mask: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.Tensor] = None, + decoder_head_mask: Optional[torch.Tensor] = None, + cross_attn_head_mask: Optional[torch.Tensor] = None, + encoder_outputs: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + decoder_inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, Seq2SeqSequenceClassifierOutput]: + r""" + labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + if labels is not None: + use_cache = False + + if input_ids is None and inputs_embeds is not None: + raise NotImplementedError( + f"Passing input embeddings is currently not supported for {self.__class__.__name__}" + ) + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + head_mask=head_mask, + decoder_head_mask=decoder_head_mask, + cross_attn_head_mask=cross_attn_head_mask, + encoder_outputs=encoder_outputs, + inputs_embeds=inputs_embeds, + decoder_inputs_embeds=decoder_inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + hidden_states = outputs[0] # last hidden state + + eos_mask = input_ids.eq(self.config.eos_token_id).to(hidden_states.device) + + if len(torch.unique_consecutive(eos_mask.sum(1))) > 1: + raise ValueError("All examples must have the same number of tokens.") + sentence_representation = hidden_states[eos_mask, :].view(hidden_states.size(0), -1, hidden_states.size(-1))[ + :, -1, : + ] + logits = self.classification_head(sentence_representation) + + loss = None + if labels is not None: + labels = labels.to(logits.device) + if self.config.problem_type is None: + if self.config.num_labels == 1: + self.config.problem_type = "regression" + elif self.config.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int): + self.config.problem_type = "single_label_classification" + else: + self.config.problem_type = "multi_label_classification" + + if self.config.problem_type == "regression": + loss_fct = MSELoss() + if self.config.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) + elif self.config.problem_type == "single_label_classification": + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1)) + elif self.config.problem_type == "multi_label_classification": + loss_fct = BCEWithLogitsLoss() + loss = loss_fct(logits, labels) + if not return_dict: + output = (logits,) + outputs[1:] + return ((loss,) + output) if loss is not None else output + + return Seq2SeqSequenceClassifierOutput( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + decoder_hidden_states=outputs.decoder_hidden_states, + decoder_attentions=outputs.decoder_attentions, + cross_attentions=outputs.cross_attentions, + encoder_last_hidden_state=outputs.encoder_last_hidden_state, + encoder_hidden_states=outputs.encoder_hidden_states, + encoder_attentions=outputs.encoder_attentions, + ) + + +@add_start_docstrings( + """ + MBART Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear + layer on top of the hidden-states output to compute `span start logits` and `span end logits`). + """, + MBART_START_DOCSTRING, +) +class UnimerMBartForQuestionAnswering(UnimerMBartPreTrainedModel): + _tied_weights_keys = ["model.encoder.embed_tokens.weight", "model.decoder.embed_tokens.weight"] + + def __init__(self, config): + super().__init__(config) + + config.num_labels = 2 + self.num_labels = config.num_labels + + self.model = UnimerMBartModel(config) + self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) + + # Initialize weights and apply final processing + self.post_init() + + @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) + @add_code_sample_docstrings( + checkpoint=_CHECKPOINT_FOR_DOC, + output_type=Seq2SeqQuestionAnsweringModelOutput, + config_class=_CONFIG_FOR_DOC, + ) + # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward + def forward( + self, + input_ids: torch.Tensor = None, + attention_mask: Optional[torch.Tensor] = None, + decoder_input_ids: Optional[torch.LongTensor] = None, + decoder_attention_mask: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.Tensor] = None, + decoder_head_mask: Optional[torch.Tensor] = None, + cross_attn_head_mask: Optional[torch.Tensor] = None, + encoder_outputs: Optional[List[torch.FloatTensor]] = None, + start_positions: Optional[torch.LongTensor] = None, + end_positions: Optional[torch.LongTensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + decoder_inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, Seq2SeqQuestionAnsweringModelOutput]: + r""" + start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for position (index) of the start of the labelled span for computing the token classification loss. + Positions are clamped to the length of the sequence (*sequence_length*). Position outside of the sequence + are not taken into account for computing the loss. + end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + Labels for position (index) of the end of the labelled span for computing the token classification loss. + Positions are clamped to the length of the sequence (*sequence_length*). Position outside of the sequence + are not taken into account for computing the loss. + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + if start_positions is not None and end_positions is not None: + use_cache = False + + outputs = self.model( + input_ids, + attention_mask=attention_mask, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + head_mask=head_mask, + decoder_head_mask=decoder_head_mask, + cross_attn_head_mask=cross_attn_head_mask, + encoder_outputs=encoder_outputs, + inputs_embeds=inputs_embeds, + decoder_inputs_embeds=decoder_inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + sequence_output = outputs[0] + + logits = self.qa_outputs(sequence_output) + start_logits, end_logits = logits.split(1, dim=-1) + start_logits = start_logits.squeeze(-1).contiguous() + end_logits = end_logits.squeeze(-1).contiguous() + + total_loss = None + if start_positions is not None and end_positions is not None: + # If we are on multi-GPU, split add a dimension + if len(start_positions.size()) > 1: + start_positions = start_positions.squeeze(-1) + if len(end_positions.size()) > 1: + end_positions = end_positions.squeeze(-1) + # sometimes the start/end positions are outside our model inputs, we ignore these terms + ignored_index = start_logits.size(1) + start_positions = start_positions.clamp(0, ignored_index) + end_positions = end_positions.clamp(0, ignored_index) + + loss_fct = CrossEntropyLoss(ignore_index=ignored_index) + start_loss = loss_fct(start_logits, start_positions) + end_loss = loss_fct(end_logits, end_positions) + total_loss = (start_loss + end_loss) / 2 + + if not return_dict: + output = ( + start_logits, + end_logits, + ) + outputs[1:] + return ((total_loss,) + output) if total_loss is not None else output + + return Seq2SeqQuestionAnsweringModelOutput( + loss=total_loss, + start_logits=start_logits, + end_logits=end_logits, + past_key_values=outputs.past_key_values, + decoder_hidden_states=outputs.decoder_hidden_states, + decoder_attentions=outputs.decoder_attentions, + cross_attentions=outputs.cross_attentions, + encoder_last_hidden_state=outputs.encoder_last_hidden_state, + encoder_hidden_states=outputs.encoder_hidden_states, + encoder_attentions=outputs.encoder_attentions, + ) + + +# Copied from transformers.models.bart.modeling_bart.BartDecoderWrapper with Bart->MBart +class UnimerMBartDecoderWrapper(UnimerMBartPreTrainedModel): + """ + This wrapper class is a helper class to correctly load pretrained checkpoints when the causal language model is + used in combination with the [`EncoderDecoderModel`] framework. + """ + + def __init__(self, config): + super().__init__(config) + self.decoder = UnimerMBartDecoder(config) + + def forward(self, *args, **kwargs): + return self.decoder(*args, **kwargs) + + +# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-base->facebook/mbart-large-cc25 +class UnimerMBartForCausalLM(UnimerMBartPreTrainedModel, GenerationMixin): + _tied_weights_keys = ["lm_head.weight"] + + def __init__(self, config): + config = copy.deepcopy(config) + config.is_decoder = True + config.is_encoder_decoder = False + super().__init__(config) + self.model = UnimerMBartDecoderWrapper(config) + + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.decoder.embed_tokens + + def set_input_embeddings(self, value): + self.model.decoder.embed_tokens = value + + def get_output_embeddings(self): + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + self.lm_head = new_embeddings + + def set_decoder(self, decoder): + self.model.decoder = decoder + + def get_decoder(self): + return self.model.decoder + + @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentionsAndCounting, config_class=_CONFIG_FOR_DOC) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + head_mask: Optional[torch.Tensor] = None, + cross_attn_head_mask: Optional[torch.Tensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + count_gt: Optional[torch.LongTensor] = None, + ) -> Union[Tuple, CausalLMOutputWithCrossAttentions]: + r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you + provide it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): + Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention + if the model is configured as a decoder. + encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used + in the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`: + head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*): + Mask to nullify selected heads of the cross-attention modules. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`): + Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of + shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of + shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`. The two additional + tensors are only required when the model is used as a decoder in a Sequence to Sequence model. + + Contains pre-computed hidden-states (key and values in the self-attention blocks and in the + cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding. + + If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those + that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., + config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored + (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding + (see `past_key_values`). + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + for more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + + Returns: + + Example: + + ```python + >>> from transformers import AutoTokenizer, MBartForCausalLM + + >>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") + >>> model = MBartForCausalLM.from_pretrained("facebook/mbart-large-cc25", add_cross_attention=False) + >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." + >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") + >>> outputs = model(**inputs) + + >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True + ```""" + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + count_pred = None + + # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) + outputs = self.model.decoder( + input_ids=input_ids, + attention_mask=attention_mask, + count_pred=count_pred, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + head_mask=head_mask, + cross_attn_head_mask=cross_attn_head_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + logits = self.lm_head(outputs[0]) + + loss = None + if labels is not None: + labels = labels.to(logits.device) + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[1:] + return (loss,) + output if loss is not None else output + + return CausalLMOutputWithCrossAttentionsAndCounting( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + cross_attentions=outputs.cross_attentions, + counting=count_pred, + ) + + def prepare_inputs_for_generation( + self, input_ids, past_key_values=None, attention_mask=None, use_cache=None, **kwargs + ): + # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly + if attention_mask is None: + attention_mask = input_ids.new_ones(input_ids.shape) + + if past_key_values: + past_length = past_key_values[0][0].shape[2] + + # Some generation methods already pass only the last input ID + if input_ids.shape[1] > past_length: + remove_prefix_length = past_length + else: + # Default to old behavior: keep only final ID + remove_prefix_length = input_ids.shape[1] - 1 + + input_ids = input_ids[:, remove_prefix_length:] + # first step, decoder_cached_states are empty + return { + "input_ids": input_ids, # encoder_outputs is defined. input_ids not needed + "attention_mask": attention_mask, + "past_key_values": past_key_values, + "use_cache": use_cache, + } + + @staticmethod + def _reorder_cache(past_key_values, beam_idx): + reordered_past = () + for layer_past in past_key_values: + reordered_past += ( + tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past), + ) + return reordered_past diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py new file mode 100644 index 00000000..e69de29b diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py new file mode 100644 index 00000000..0b91b3be --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py @@ -0,0 +1,9 @@ +from .configuration_unimer_swin import UnimerSwinConfig +from .modeling_unimer_swin import UnimerSwinModel +from .image_processing_unimer_swin import UnimerSwinImageProcessor + +__all__ = [ + "UnimerSwinConfig", + "UnimerSwinModel", + "UnimerSwinImageProcessor", +] diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py new file mode 100644 index 00000000..6c577e7c --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py @@ -0,0 +1,132 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Donut Swin Transformer model configuration""" + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + + +logger = logging.get_logger(__name__) + + +class UnimerSwinConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`UnimerSwinModel`]. It is used to instantiate a + Donut model according to the specified arguments, defining the model architecture. Instantiating a configuration + with the defaults will yield a similar configuration to that of the Donut + [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + Args: + image_size (`int`, *optional*, defaults to 224): + The size (resolution) of each image. + patch_size (`int`, *optional*, defaults to 4): + The size (resolution) of each patch. + num_channels (`int`, *optional*, defaults to 3): + The number of input channels. + embed_dim (`int`, *optional*, defaults to 96): + Dimensionality of patch embedding. + depths (`list(int)`, *optional*, defaults to `[2, 2, 6, 2]`): + Depth of each layer in the Transformer encoder. + num_heads (`list(int)`, *optional*, defaults to `[3, 6, 12, 24]`): + Number of attention heads in each layer of the Transformer encoder. + window_size (`int`, *optional*, defaults to 7): + Size of windows. + mlp_ratio (`float`, *optional*, defaults to 4.0): + Ratio of MLP hidden dimensionality to embedding dimensionality. + qkv_bias (`bool`, *optional*, defaults to `True`): + Whether or not a learnable bias should be added to the queries, keys and values. + hidden_dropout_prob (`float`, *optional*, defaults to 0.0): + The dropout probability for all fully connected layers in the embeddings and encoder. + attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + drop_path_rate (`float`, *optional*, defaults to 0.1): + Stochastic depth rate. + hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`): + The non-linear activation function (function or string) in the encoder. If string, `"gelu"`, `"relu"`, + `"selu"` and `"gelu_new"` are supported. + use_absolute_embeddings (`bool`, *optional*, defaults to `False`): + Whether or not to add absolute position embeddings to the patch embeddings. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + layer_norm_eps (`float`, *optional*, defaults to 1e-05): + The epsilon used by the layer normalization layers. + + Example: + + ```python + >>> from transformers import UnimerSwinConfig, UnimerSwinModel + + >>> # Initializing a Donut naver-clova-ix/donut-base style configuration + >>> configuration = UnimerSwinConfig() + + >>> # Randomly initializing a model from the naver-clova-ix/donut-base style configuration + >>> model = UnimerSwinModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "unimer-swin" + + attribute_map = { + "num_attention_heads": "num_heads", + "num_hidden_layers": "num_layers", + } + + def __init__( + self, + image_size=224, + patch_size=4, + num_channels=3, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4.0, + qkv_bias=True, + hidden_dropout_prob=0.0, + attention_probs_dropout_prob=0.0, + drop_path_rate=0.1, + hidden_act="gelu", + use_absolute_embeddings=False, + initializer_range=0.02, + layer_norm_eps=1e-5, + **kwargs, + ): + super().__init__(**kwargs) + + self.image_size = image_size + self.patch_size = patch_size + self.num_channels = num_channels + self.embed_dim = embed_dim + self.depths = depths + self.num_layers = len(depths) + self.num_heads = num_heads + self.window_size = window_size + self.mlp_ratio = mlp_ratio + self.qkv_bias = qkv_bias + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.drop_path_rate = drop_path_rate + self.hidden_act = hidden_act + self.use_absolute_embeddings = use_absolute_embeddings + self.layer_norm_eps = layer_norm_eps + self.initializer_range = initializer_range + # we set the hidden_size attribute in order to make Swin work with VisionEncoderDecoderModel + # this indicates the channel dimension after the last stage of the model + self.hidden_size = int(embed_dim * 2 ** (len(depths) - 1)) diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py new file mode 100644 index 00000000..a16d2433 --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py @@ -0,0 +1,132 @@ +from transformers.image_processing_utils import BaseImageProcessor +import numpy as np +import cv2 +import albumentations as alb +from albumentations.pytorch import ToTensorV2 + + +# TODO: dereference cv2 if possible +class UnimerSwinImageProcessor(BaseImageProcessor): + def __init__( + self, + image_size = (192, 672), + ): + self.input_size = [int(_) for _ in image_size] + assert len(self.input_size) == 2 + + self.transform = alb.Compose( + [ + alb.ToGray(), + alb.Normalize((0.7931, 0.7931, 0.7931), (0.1738, 0.1738, 0.1738)), + # alb.Sharpen() + ToTensorV2(), + ] + ) + + def __call__(self, item): + image = self.prepare_input(item) + return self.transform(image=image)['image'][:1] + + @staticmethod + def crop_margin_numpy(img: np.ndarray) -> np.ndarray: + """Crop margins of image using NumPy operations""" + # Convert to grayscale if it's a color image + if len(img.shape) == 3 and img.shape[2] == 3: + gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + else: + gray = img.copy() + + # Normalize and threshold + if gray.max() == gray.min(): + return img + + normalized = (((gray - gray.min()) / (gray.max() - gray.min())) * 255).astype(np.uint8) + binary = 255 * (normalized < 200).astype(np.uint8) + + # Find bounding box + coords = cv2.findNonZero(binary) # Find all non-zero points (text) + x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box + + # Return cropped image + return img[y:y + h, x:x + w] + + def prepare_input(self, img, random_padding: bool = False): + """ + Convert PIL Image or numpy array to properly sized and padded image after: + - crop margins + - resize while maintaining aspect ratio + - pad to target size + """ + if img is None: + return None + + # try: + # img = self.crop_margin_numpy(img) + # except Exception: + # # might throw an error for broken files + # return None + + if img.shape[0] == 0 or img.shape[1] == 0: + return None + + # Get current dimensions + h, w = img.shape[:2] + target_h, target_w = self.input_size + + # Calculate scale to preserve aspect ratio (equivalent to resize + thumbnail) + scale = min(target_h / h, target_w / w) + + # Calculate new dimensions + new_h, new_w = int(h * scale), int(w * scale) + + # Resize the image while preserving aspect ratio + resized_img = cv2.resize(img, (new_w, new_h)) + + # Calculate padding values using the existing method + delta_width = target_w - new_w + delta_height = target_h - new_h + + pad_width, pad_height = self._get_padding_values(new_w, new_h, random_padding) + + # Apply padding (convert PIL padding format to OpenCV format) + padding_color = [0, 0, 0] if len(img.shape) == 3 else [0] + + padded_img = cv2.copyMakeBorder( + resized_img, + pad_height, # top + delta_height - pad_height, # bottom + pad_width, # left + delta_width - pad_width, # right + cv2.BORDER_CONSTANT, + value=padding_color + ) + + return padded_img + + def _calculate_padding(self, new_w, new_h, random_padding): + """Calculate padding values for PIL images""" + delta_width = self.input_size[1] - new_w + delta_height = self.input_size[0] - new_h + + pad_width, pad_height = self._get_padding_values(new_w, new_h, random_padding) + + return ( + pad_width, + pad_height, + delta_width - pad_width, + delta_height - pad_height, + ) + + def _get_padding_values(self, new_w, new_h, random_padding): + """Get padding values based on image dimensions and padding strategy""" + delta_width = self.input_size[1] - new_w + delta_height = self.input_size[0] - new_h + + if random_padding: + pad_width = np.random.randint(low=0, high=delta_width + 1) + pad_height = np.random.randint(low=0, high=delta_height + 1) + else: + pad_width = delta_width // 2 + pad_height = delta_height // 2 + + return pad_width, pad_height diff --git a/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py new file mode 100644 index 00000000..1b808e8b --- /dev/null +++ b/mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py @@ -0,0 +1,1084 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch UnimerSwin Transformer model. + +This implementation is identical to a regular Swin Transformer, without final layer norm on top of the final hidden +states.""" + +import collections.abc +import math +from dataclasses import dataclass +from typing import Optional, Tuple, Union + +import torch +import torch.utils.checkpoint +from torch import nn + +from transformers.activations import ACT2FN +from transformers.modeling_utils import PreTrainedModel +from transformers.pytorch_utils import find_pruneable_heads_and_indices, meshgrid, prune_linear_layer +from transformers.utils import ( + ModelOutput, + add_code_sample_docstrings, + add_start_docstrings, + add_start_docstrings_to_model_forward, + logging, + torch_int, +) +from .configuration_unimer_swin import UnimerSwinConfig + + +logger = logging.get_logger(__name__) + +# General docstring +_CONFIG_FOR_DOC = "UnimerSwinConfig" + +# Base docstring +_CHECKPOINT_FOR_DOC = "https://huggingface.co/naver-clova-ix/donut-base" +_EXPECTED_OUTPUT_SHAPE = [1, 49, 768] + + +@dataclass +# Copied from transformers.models.swin.modeling_swin.SwinEncoderOutput with Swin->UnimerSwin +class UnimerSwinEncoderOutput(ModelOutput): + """ + UnimerSwin encoder's outputs, with potential hidden states and attentions. + + Args: + last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): + Sequence of hidden-states at the output of the last layer of the model. + hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of + shape `(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of + shape `(batch_size, hidden_size, height, width)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to + include the spatial dimensions. + """ + + last_hidden_state: torch.FloatTensor = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + + +@dataclass +# Copied from transformers.models.swin.modeling_swin.SwinModelOutput with Swin->UnimerSwin +class UnimerSwinModelOutput(ModelOutput): + """ + UnimerSwin model's outputs that also contains a pooling of the last hidden states. + + Args: + last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): + Sequence of hidden-states at the output of the last layer of the model. + pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed): + Average pooling of the last layer hidden-state. + hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of + shape `(batch_size, sequence_length, hidden_size)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. + + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention + heads. + reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of + shape `(batch_size, hidden_size, height, width)`. + + Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to + include the spatial dimensions. + """ + + last_hidden_state: torch.FloatTensor = None + pooler_output: Optional[torch.FloatTensor] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + + +# Copied from transformers.models.swin.modeling_swin.window_partition +def window_partition(input_feature, window_size): + """ + Partitions the given input into windows. + """ + batch_size, height, width, num_channels = input_feature.shape + input_feature = input_feature.view( + batch_size, height // window_size, window_size, width // window_size, window_size, num_channels + ) + windows = input_feature.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, num_channels) + return windows + + +# Copied from transformers.models.swin.modeling_swin.window_reverse +def window_reverse(windows, window_size, height, width): + """ + Merges windows to produce higher resolution features. + """ + num_channels = windows.shape[-1] + windows = windows.view(-1, height // window_size, width // window_size, window_size, window_size, num_channels) + windows = windows.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, height, width, num_channels) + return windows + + +# Copied from transformers.models.swin.modeling_swin.SwinEmbeddings with Swin->UnimerSwin +class UnimerSwinEmbeddings(nn.Module): + """ + Construct the patch and position embeddings. Optionally, also the mask token. + """ + + def __init__(self, config, use_mask_token=False): + super().__init__() + + self.patch_embeddings = UnimerSwinPatchEmbeddings(config) + num_patches = self.patch_embeddings.num_patches + self.patch_grid = self.patch_embeddings.grid_size + self.mask_token = nn.Parameter(torch.zeros(1, 1, config.embed_dim)) if use_mask_token else None + + if config.use_absolute_embeddings: + self.position_embeddings = nn.Parameter(torch.zeros(1, num_patches + 1, config.embed_dim)) + else: + self.position_embeddings = None + + ### code added. ### + if config.use_2d_embeddings: + self.row_embeddings = nn.Parameter(torch.zeros(1, self.patch_grid[0] + 1, config.embed_dim)) + self.column_embeddings = nn.Parameter(torch.zeros(1, self.patch_grid[1] + 1, config.embed_dim)) + else: + self.row_embeddings = None + self.column_embeddings = None + ###### + + self.norm = nn.LayerNorm(config.embed_dim) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: int, width: int) -> torch.Tensor: + """ + This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher + resolution images. + + Source: + https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174 + """ + + num_patches = embeddings.shape[1] - 1 + num_positions = self.position_embeddings.shape[1] - 1 + if num_patches == num_positions and height == width: + return self.position_embeddings + class_pos_embed = self.position_embeddings[:, 0] + patch_pos_embed = self.position_embeddings[:, 1:] + dim = embeddings.shape[-1] + h0 = height // self.config.patch_size + w0 = width // self.config.patch_size + # we add a small number to avoid floating point error in the interpolation + # see discussion at https://github.com/facebookresearch/dino/issues/8 + h0, w0 = h0 + 0.1, w0 + 0.1 + patch_pos_embed = patch_pos_embed.reshape(1, int(math.sqrt(num_positions)), int(math.sqrt(num_positions)), dim) + patch_pos_embed = patch_pos_embed.permute(0, 3, 1, 2) + patch_pos_embed = nn.functional.interpolate( + patch_pos_embed, + scale_factor=(h0 / math.sqrt(num_positions), w0 / math.sqrt(num_positions)), + mode="bicubic", + align_corners=False, + ) + patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) + return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1) + + def forward( + self, + pixel_values: Optional[torch.FloatTensor], + bool_masked_pos: Optional[torch.BoolTensor] = None, + interpolate_pos_encoding: bool = False, + ) -> Tuple[torch.Tensor]: + _, num_channels, height, width = pixel_values.shape + embeddings, output_dimensions = self.patch_embeddings(pixel_values) + embeddings = self.norm(embeddings) + batch_size, seq_len, _ = embeddings.size() + + if bool_masked_pos is not None: + mask_tokens = self.mask_token.expand(batch_size, seq_len, -1) + # replace the masked visual tokens by mask_tokens + mask = bool_masked_pos.unsqueeze(-1).type_as(mask_tokens) + embeddings = embeddings * (1.0 - mask) + mask_tokens * mask + + if self.position_embeddings is not None: + # if interpolate_pos_encoding: + # embeddings = embeddings + self.interpolate_pos_encoding(embeddings, height, width) + # else: + # embeddings = embeddings + self.position_embeddings + embeddings = embeddings + self.position_embeddings[:, :seq_len, :] # code edited. + + ### code added. ### + if self.row_embeddings is not None and self.column_embeddings is not None: + # Repeat the x position embeddings across the y axis like 0, 1, 2, 3, 0, 1, 2, 3, ... + row_embeddings = self.row_embeddings[:, :output_dimensions[0], :].repeat_interleave(output_dimensions[1], dim=1) + column_embeddings = self.column_embeddings[:, :output_dimensions[1], :].repeat(1, output_dimensions[0], 1) + embeddings = embeddings + row_embeddings + column_embeddings + ###### + + embeddings = self.dropout(embeddings) + + return embeddings, output_dimensions + +class StemLayer(nn.Module): + r""" Stem layer of InternImage + Args: + in_chans (int): number of input channels + out_chans (int): number of output channels + act_layer (str): activation layer + norm_layer (str): normalization layer + """ + + def __init__(self, in_chans=3, out_chans=96, act_layer=nn.GELU, norm_layer='BN'): + super().__init__() + self.conv1 = nn.Conv2d(in_chans, out_chans // 2, kernel_size=3, stride=2, padding=1) + self.norm1 = self.build_norm_layer(out_chans // 2, norm_layer) + self.act = act_layer() + self.conv2 = nn.Conv2d(out_chans // 2, out_chans, kernel_size=3, stride=2, padding=1) + + def build_norm_layer(self, dim, norm_layer): + layers = [] + if norm_layer == 'BN': + layers.append(nn.BatchNorm2d(dim)) + else: + raise NotImplementedError(f'build_norm_layer does not support {norm_layer}') + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.norm1(x) + x = self.act(x) + x = self.conv2(x) + return x + +# Copied from transformers.models.swin.modeling_swin.SwinPatchEmbeddings with Swin->UnimerSwin +class UnimerSwinPatchEmbeddings(nn.Module): + """ + This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial + `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a + Transformer. + """ + + def __init__(self, config): + super().__init__() + image_size, patch_size = config.image_size, config.patch_size + num_channels, hidden_size = config.num_channels, config.embed_dim + image_size = image_size if isinstance(image_size, collections.abc.Iterable) else (image_size, image_size) + patch_size = patch_size if isinstance(patch_size, collections.abc.Iterable) else (patch_size, patch_size) + num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) + self.image_size = image_size + self.patch_size = patch_size + self.num_channels = num_channels + self.num_patches = num_patches + self.grid_size = (image_size[0] // patch_size[0], image_size[1] // patch_size[1]) + + ### code edited. ### + # self.projection = nn.Conv2d(num_channels, hidden_size, kernel_size=patch_size, stride=patch_size) + self.projection = StemLayer(in_chans=num_channels, out_chans=hidden_size) + ### + + def maybe_pad(self, pixel_values, height, width): + if width % self.patch_size[1] != 0: + pad_values = (0, self.patch_size[1] - width % self.patch_size[1]) + pixel_values = nn.functional.pad(pixel_values, pad_values) + if height % self.patch_size[0] != 0: + pad_values = (0, 0, 0, self.patch_size[0] - height % self.patch_size[0]) + pixel_values = nn.functional.pad(pixel_values, pad_values) + return pixel_values + + def forward(self, pixel_values: Optional[torch.FloatTensor]) -> Tuple[torch.Tensor, Tuple[int]]: + _, num_channels, height, width = pixel_values.shape + # pad the input to be divisible by self.patch_size, if needed + pixel_values = self.maybe_pad(pixel_values, height, width) + embeddings = self.projection(pixel_values) + _, _, height, width = embeddings.shape + output_dimensions = (height, width) + embeddings = embeddings.flatten(2).transpose(1, 2) + + return embeddings, output_dimensions + + +# Copied from transformers.models.swin.modeling_swin.SwinPatchMerging +class UnimerSwinPatchMerging(nn.Module): + """ + Patch Merging Layer. + + Args: + input_resolution (`Tuple[int]`): + Resolution of input feature. + dim (`int`): + Number of input channels. + norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`): + Normalization layer class. + """ + + def __init__(self, input_resolution: Tuple[int], dim: int, norm_layer: nn.Module = nn.LayerNorm) -> None: + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def maybe_pad(self, input_feature, height, width): + should_pad = (height % 2 == 1) or (width % 2 == 1) + if should_pad: + pad_values = (0, 0, 0, width % 2, 0, height % 2) + input_feature = nn.functional.pad(input_feature, pad_values) + + return input_feature + + def forward(self, input_feature: torch.Tensor, input_dimensions: Tuple[int, int]) -> torch.Tensor: + height, width = input_dimensions + # `dim` is height * width + batch_size, dim, num_channels = input_feature.shape + + input_feature = input_feature.view(batch_size, height, width, num_channels) + # pad input to be disible by width and height, if needed + input_feature = self.maybe_pad(input_feature, height, width) + # [batch_size, height/2, width/2, num_channels] + input_feature_0 = input_feature[:, 0::2, 0::2, :] + # [batch_size, height/2, width/2, num_channels] + input_feature_1 = input_feature[:, 1::2, 0::2, :] + # [batch_size, height/2, width/2, num_channels] + input_feature_2 = input_feature[:, 0::2, 1::2, :] + # [batch_size, height/2, width/2, num_channels] + input_feature_3 = input_feature[:, 1::2, 1::2, :] + # batch_size height/2 width/2 4*num_channels + input_feature = torch.cat([input_feature_0, input_feature_1, input_feature_2, input_feature_3], -1) + input_feature = input_feature.view(batch_size, -1, 4 * num_channels) # batch_size height/2*width/2 4*C + + input_feature = self.norm(input_feature) + input_feature = self.reduction(input_feature) + + return input_feature + + +# Copied from transformers.models.beit.modeling_beit.drop_path +def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor: + """ + Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks, + however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the + layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the + argument. + """ + if drop_prob == 0.0 or not training: + return input + keep_prob = 1 - drop_prob + shape = (input.shape[0],) + (1,) * (input.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = keep_prob + torch.rand(shape, dtype=input.dtype, device=input.device) + random_tensor.floor_() # binarize + output = input.div(keep_prob) * random_tensor + return output + + +# Copied from transformers.models.swin.modeling_swin.SwinDropPath +class UnimerSwinDropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob: Optional[float] = None) -> None: + super().__init__() + self.drop_prob = drop_prob + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + return drop_path(hidden_states, self.drop_prob, self.training) + + def extra_repr(self) -> str: + return "p={}".format(self.drop_prob) + + +# Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->UnimerSwin +class UnimerSwinSelfAttention(nn.Module): + def __init__(self, config, dim, num_heads, window_size): + super().__init__() + if dim % num_heads != 0: + raise ValueError( + f"The hidden size ({dim}) is not a multiple of the number of attention heads ({num_heads})" + ) + + self.num_attention_heads = num_heads + self.attention_head_size = int(dim / num_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + self.window_size = ( + window_size if isinstance(window_size, collections.abc.Iterable) else (window_size, window_size) + ) + + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1), num_heads) + ) + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(meshgrid([coords_h, coords_w], indexing="ij")) + coords_flatten = torch.flatten(coords, 1) + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] + relative_coords = relative_coords.permute(1, 2, 0).contiguous() + relative_coords[:, :, 0] += self.window_size[0] - 1 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) + self.register_buffer("relative_position_index", relative_position_index) + + self.query = nn.Linear(self.all_head_size, self.all_head_size, bias=config.qkv_bias) + self.key = nn.Linear(self.all_head_size, self.all_head_size, bias=config.qkv_bias) + self.value = nn.Linear(self.all_head_size, self.all_head_size, bias=config.qkv_bias) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.FloatTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor]: + batch_size, dim, num_channels = hidden_states.shape + mixed_query_layer = self.query(hidden_states) + + key_layer = self.transpose_for_scores(self.key(hidden_states)) + value_layer = self.transpose_for_scores(self.value(hidden_states)) + query_layer = self.transpose_for_scores(mixed_query_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)] + relative_position_bias = relative_position_bias.view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1 + ) + + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() + attention_scores = attention_scores + relative_position_bias.unsqueeze(0) + + if attention_mask is not None: + # Apply the attention mask is (precomputed for all layers in UnimerSwinModel forward() function) + mask_shape = attention_mask.shape[0] + attention_scores = attention_scores.view( + batch_size // mask_shape, mask_shape, self.num_attention_heads, dim, dim + ) + attention_scores = attention_scores + attention_mask.unsqueeze(1).unsqueeze(0) + attention_scores = attention_scores.view(-1, self.num_attention_heads, dim, dim) + + # Normalize the attention scores to probabilities. + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + # Mask heads if we want to + if head_mask is not None: + attention_probs = attention_probs * head_mask + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(new_context_layer_shape) + + outputs = (context_layer, attention_probs) if output_attentions else (context_layer,) + + return outputs + + +# Copied from transformers.models.swin.modeling_swin.SwinSelfOutput +class UnimerSwinSelfOutput(nn.Module): + def __init__(self, config, dim): + super().__init__() + self.dense = nn.Linear(dim, dim) + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> torch.Tensor: + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + + return hidden_states + + +# Copied from transformers.models.swin.modeling_swin.SwinAttention with Swin->UnimerSwin +class UnimerSwinAttention(nn.Module): + def __init__(self, config, dim, num_heads, window_size): + super().__init__() + self.self = UnimerSwinSelfAttention(config, dim, num_heads, window_size) + self.output = UnimerSwinSelfOutput(config, dim) + self.pruned_heads = set() + + def prune_heads(self, heads): + if len(heads) == 0: + return + heads, index = find_pruneable_heads_and_indices( + heads, self.self.num_attention_heads, self.self.attention_head_size, self.pruned_heads + ) + + # Prune linear layers + self.self.query = prune_linear_layer(self.self.query, index) + self.self.key = prune_linear_layer(self.self.key, index) + self.self.value = prune_linear_layer(self.self.value, index) + self.output.dense = prune_linear_layer(self.output.dense, index, dim=1) + + # Update hyper params and store pruned heads + self.self.num_attention_heads = self.self.num_attention_heads - len(heads) + self.self.all_head_size = self.self.attention_head_size * self.self.num_attention_heads + self.pruned_heads = self.pruned_heads.union(heads) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.FloatTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor]: + self_outputs = self.self(hidden_states, attention_mask, head_mask, output_attentions) + attention_output = self.output(self_outputs[0], hidden_states) + outputs = (attention_output,) + self_outputs[1:] # add attentions if we output them + return outputs + + +# Copied from transformers.models.swin.modeling_swin.SwinIntermediate +class UnimerSwinIntermediate(nn.Module): + def __init__(self, config, dim): + super().__init__() + self.dense = nn.Linear(dim, int(config.mlp_ratio * dim)) + if isinstance(config.hidden_act, str): + self.intermediate_act_fn = ACT2FN[config.hidden_act] + else: + self.intermediate_act_fn = config.hidden_act + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +# Copied from transformers.models.swin.modeling_swin.SwinOutput +class UnimerSwinOutput(nn.Module): + def __init__(self, config, dim): + super().__init__() + self.dense = nn.Linear(int(config.mlp_ratio * dim), dim) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + return hidden_states + + +class ConvEnhance(nn.Module): + """Depth-wise convolution to get the positional information. + """ + def __init__(self, config, dim, k=3): + super(ConvEnhance, self).__init__() + self.proj = nn.Conv2d(dim, + dim, + (k,k), + (1,1), + (k // 2,k // 2), + groups=dim) + self.act_fn = ACT2FN[config.hidden_act] + + def forward(self, x, size: Tuple[int, int]): + B, N, C = x.shape + H, W = size + assert N == H * W + + feat = x.transpose(1, 2).view(B, C, H, W) + feat = self.proj(feat) + feat = self.act_fn(feat) + feat = feat.flatten(2).transpose(1, 2) + + x = x + feat + return x + + +# Copied from transformers.models.swin.modeling_swin.SwinLayer with Swin->UnimerSwin +class UnimerSwinLayer(nn.Module): + def __init__(self, config, dim, input_resolution, num_heads, shift_size=0): + super().__init__() + self.chunk_size_feed_forward = config.chunk_size_feed_forward + self.shift_size = shift_size + self.window_size = config.window_size + self.input_resolution = input_resolution + self.layernorm_before = nn.LayerNorm(dim, eps=config.layer_norm_eps) + + self.ce = nn.ModuleList([ConvEnhance(config, dim=dim, k=3), + ConvEnhance(config, dim=dim, k=3)]) + + self.attention = UnimerSwinAttention(config, dim, num_heads, window_size=self.window_size) + self.drop_path = UnimerSwinDropPath(config.drop_path_rate) if config.drop_path_rate > 0.0 else nn.Identity() + self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps) + self.intermediate = UnimerSwinIntermediate(config, dim) + self.output = UnimerSwinOutput(config, dim) + + def set_shift_and_window_size(self, input_resolution): + if min(input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = torch_int(0) + self.window_size = ( + torch.min(torch.tensor(input_resolution)) if torch.jit.is_tracing() else min(input_resolution) + ) + + def get_attn_mask(self, height, width, dtype, device): + if self.shift_size > 0: + # calculate attention mask for SW-MSA + img_mask = torch.zeros((1, height, width, 1), dtype=dtype, device=device) + height_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + width_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + count = 0 + for height_slice in height_slices: + for width_slice in width_slices: + img_mask[:, height_slice, width_slice, :] = count + count += 1 + + mask_windows = window_partition(img_mask, self.window_size) + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + else: + attn_mask = None + return attn_mask + + def maybe_pad(self, hidden_states, height, width): + pad_right = (self.window_size - width % self.window_size) % self.window_size + pad_bottom = (self.window_size - height % self.window_size) % self.window_size + pad_values = (0, 0, 0, pad_right, 0, pad_bottom) + hidden_states = nn.functional.pad(hidden_states, pad_values) + return hidden_states, pad_values + + def forward( + self, + hidden_states: torch.Tensor, + input_dimensions: Tuple[int, int], + head_mask: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = False, + always_partition: Optional[bool] = False, + ) -> Tuple[torch.Tensor, torch.Tensor]: + if not always_partition: + self.set_shift_and_window_size(input_dimensions) + else: + pass + height, width = input_dimensions + batch_size, _, channels = hidden_states.size() + + + + hidden_states = self.ce[0](hidden_states, input_dimensions) + shortcut = hidden_states + + + hidden_states = self.layernorm_before(hidden_states) + hidden_states = hidden_states.view(batch_size, height, width, channels) + + # pad hidden_states to multiples of window size + hidden_states, pad_values = self.maybe_pad(hidden_states, height, width) + + _, height_pad, width_pad, _ = hidden_states.shape + # cyclic shift + if self.shift_size > 0: + shifted_hidden_states = torch.roll(hidden_states, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + else: + shifted_hidden_states = hidden_states + + # partition windows + hidden_states_windows = window_partition(shifted_hidden_states, self.window_size) + hidden_states_windows = hidden_states_windows.view(-1, self.window_size * self.window_size, channels) + attn_mask = self.get_attn_mask( + height_pad, width_pad, dtype=hidden_states.dtype, device=hidden_states_windows.device + ) + + attention_outputs = self.attention( + hidden_states_windows, attn_mask, head_mask, output_attentions=output_attentions + ) + + attention_output = attention_outputs[0] + + attention_windows = attention_output.view(-1, self.window_size, self.window_size, channels) + shifted_windows = window_reverse(attention_windows, self.window_size, height_pad, width_pad) + + # reverse cyclic shift + if self.shift_size > 0: + attention_windows = torch.roll(shifted_windows, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + attention_windows = shifted_windows + + was_padded = pad_values[3] > 0 or pad_values[5] > 0 + if was_padded: + attention_windows = attention_windows[:, :height, :width, :].contiguous() + + attention_windows = attention_windows.view(batch_size, height * width, channels) + + hidden_states = shortcut + self.drop_path(attention_windows) + + + + hidden_states = self.ce[1](hidden_states, input_dimensions) + layer_output = self.layernorm_after(hidden_states) + layer_output = self.intermediate(layer_output) + layer_output = hidden_states + self.output(layer_output) + + layer_outputs = (layer_output, attention_outputs[1]) if output_attentions else (layer_output,) + return layer_outputs + + +# Copied from transformers.models.swin.modeling_swin.SwinStage with Swin->UnimerSwin +class UnimerSwinStage(nn.Module): + def __init__(self, config, dim, input_resolution, depth, num_heads, drop_path, downsample): + super().__init__() + self.config = config + self.dim = dim + self.blocks = nn.ModuleList( + [ + UnimerSwinLayer( + config=config, + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + shift_size=0, + ) + for i in range(depth) + ] + ) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(input_resolution, dim=dim, norm_layer=nn.LayerNorm) + else: + self.downsample = None + + self.pointing = False + + def forward( + self, + hidden_states: torch.Tensor, + input_dimensions: Tuple[int, int], + head_mask: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = False, + always_partition: Optional[bool] = False, + ) -> Tuple[torch.Tensor]: + height, width = input_dimensions + for i, layer_module in enumerate(self.blocks): + layer_head_mask = head_mask[i] if head_mask is not None else None + + layer_outputs = layer_module( + hidden_states, input_dimensions, layer_head_mask, output_attentions, always_partition + ) + + hidden_states = layer_outputs[0] + + hidden_states_before_downsampling = hidden_states + if self.downsample is not None: + height_downsampled, width_downsampled = (height + 1) // 2, (width + 1) // 2 + output_dimensions = (height, width, height_downsampled, width_downsampled) + hidden_states = self.downsample(hidden_states_before_downsampling, input_dimensions) + else: + output_dimensions = (height, width, height, width) + + stage_outputs = (hidden_states, hidden_states_before_downsampling, output_dimensions) + + if output_attentions: + stage_outputs += layer_outputs[1:] + return stage_outputs + + +# Copied from transformers.models.swin.modeling_swin.SwinEncoder with Swin->UnimerSwin +class UnimerSwinEncoder(nn.Module): + def __init__(self, config, grid_size): + super().__init__() + self.num_layers = len(config.depths) + self.config = config + dpr = [x.item() for x in torch.linspace(0, config.drop_path_rate, sum(config.depths))] + self.layers = nn.ModuleList( + [ + UnimerSwinStage( + config=config, + dim=int(config.embed_dim * 2**i_layer), + input_resolution=(grid_size[0] // (2**i_layer), grid_size[1] // (2**i_layer)), + depth=config.depths[i_layer], + num_heads=config.num_heads[i_layer], + drop_path=dpr[sum(config.depths[:i_layer]) : sum(config.depths[: i_layer + 1])], + downsample=UnimerSwinPatchMerging if (i_layer < self.num_layers - 1) else None, + ) + for i_layer in range(self.num_layers) + ] + ) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + input_dimensions: Tuple[int, int], + head_mask: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = False, + output_hidden_states: Optional[bool] = False, + output_hidden_states_before_downsampling: Optional[bool] = False, + always_partition: Optional[bool] = False, + return_dict: Optional[bool] = True, + ) -> Union[Tuple, UnimerSwinEncoderOutput]: + all_hidden_states = () if output_hidden_states else None + all_reshaped_hidden_states = () if output_hidden_states else None + all_self_attentions = () if output_attentions else None + + if output_hidden_states: + batch_size, _, hidden_size = hidden_states.shape + # rearrange b (h w) c -> b c h w + reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size) + reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2) + all_hidden_states += (hidden_states,) + all_reshaped_hidden_states += (reshaped_hidden_state,) + + for i, layer_module in enumerate(self.layers): + layer_head_mask = head_mask[i] if head_mask is not None else None + + if self.gradient_checkpointing and self.training: + layer_outputs = self._gradient_checkpointing_func( + layer_module.__call__, + hidden_states, + input_dimensions, + layer_head_mask, + output_attentions, + always_partition, + ) + else: + layer_outputs = layer_module( + hidden_states, input_dimensions, layer_head_mask, output_attentions, always_partition + ) + + hidden_states = layer_outputs[0] + hidden_states_before_downsampling = layer_outputs[1] + output_dimensions = layer_outputs[2] + + input_dimensions = (output_dimensions[-2], output_dimensions[-1]) + + if output_hidden_states and output_hidden_states_before_downsampling: + batch_size, _, hidden_size = hidden_states_before_downsampling.shape + # rearrange b (h w) c -> b c h w + # here we use the original (not downsampled) height and width + reshaped_hidden_state = hidden_states_before_downsampling.view( + batch_size, *(output_dimensions[0], output_dimensions[1]), hidden_size + ) + reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2) + all_hidden_states += (hidden_states_before_downsampling,) + all_reshaped_hidden_states += (reshaped_hidden_state,) + elif output_hidden_states and not output_hidden_states_before_downsampling: + batch_size, _, hidden_size = hidden_states.shape + # rearrange b (h w) c -> b c h w + reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size) + reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2) + all_hidden_states += (hidden_states,) + all_reshaped_hidden_states += (reshaped_hidden_state,) + + if output_attentions: + all_self_attentions += layer_outputs[3:] + + if not return_dict: + return tuple(v for v in [hidden_states, all_hidden_states, all_self_attentions] if v is not None) + + return UnimerSwinEncoderOutput( + last_hidden_state=hidden_states, + hidden_states=all_hidden_states, + attentions=all_self_attentions, + reshaped_hidden_states=all_reshaped_hidden_states, + ) + + +# Copied from transformers.models.swin.modeling_swin.SwinPreTrainedModel with Swin->UnimerSwin +class UnimerSwinPreTrainedModel(PreTrainedModel): + """ + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models. + """ + + config_class = UnimerSwinConfig + base_model_prefix = "unimer-swin" + main_input_name = "pixel_values" + supports_gradient_checkpointing = True + _no_split_modules = ["UnimerSwinStage"] + + def _init_weights(self, module): + """Initialize the weights""" + if isinstance(module, (nn.Linear, nn.Conv2d)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + if module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + + +SWIN_START_DOCSTRING = r""" + This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use + it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and + behavior. + + Parameters: + config ([`UnimerSwinConfig`]): Model configuration class with all the parameters of the model. + Initializing with a config file does not load the weights associated with the model, only the + configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights. +""" + +SWIN_INPUTS_DOCSTRING = r""" + Args: + pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): + Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See + [`DonutImageProcessor.__call__`] for details. + head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): + Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + interpolate_pos_encoding (`bool`, *optional*, defaults to `False`): + Whether to interpolate the pre-trained position encodings. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. +""" + + +@add_start_docstrings( + "The bare UnimerSwin Model transformer outputting raw hidden-states without any specific head on top.", + SWIN_START_DOCSTRING, +) +class UnimerSwinModel(UnimerSwinPreTrainedModel): + def __init__(self, config, add_pooling_layer=True, use_mask_token=False): + super().__init__(config) + self.config = config + self.num_layers = len(config.depths) + self.num_features = int(config.embed_dim * 2 ** (self.num_layers - 1)) + + self.embeddings = UnimerSwinEmbeddings(config, use_mask_token=use_mask_token) + self.encoder = UnimerSwinEncoder(config, self.embeddings.patch_grid) + self.pooler = nn.AdaptiveAvgPool1d(1) if add_pooling_layer else None + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.embeddings.patch_embeddings + + def _prune_heads(self, heads_to_prune): + """ + Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base + class PreTrainedModel + """ + for layer, heads in heads_to_prune.items(): + self.encoder.layer[layer].attention.prune_heads(heads) + + @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING) + @add_code_sample_docstrings( + checkpoint=_CHECKPOINT_FOR_DOC, + output_type=UnimerSwinModelOutput, + config_class=_CONFIG_FOR_DOC, + modality="vision", + expected_output=_EXPECTED_OUTPUT_SHAPE, + ) + def forward( + self, + pixel_values: Optional[torch.FloatTensor] = None, + bool_masked_pos: Optional[torch.BoolTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + interpolate_pos_encoding: bool = False, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, UnimerSwinModelOutput]: + r""" + bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`): + Boolean masked positions. Indicates which patches are masked (1) and which aren't (0). + """ + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if pixel_values is None: + raise ValueError("You have to specify pixel_values") + + # Prepare head mask if needed + # 1.0 in head_mask indicate we keep the head + # attention_probs has shape bsz x n_heads x N x N + # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] + # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length] + head_mask = self.get_head_mask(head_mask, len(self.config.depths)) + + embedding_output, input_dimensions = self.embeddings( + pixel_values, bool_masked_pos=bool_masked_pos, interpolate_pos_encoding=interpolate_pos_encoding + ) + + encoder_outputs = self.encoder( + embedding_output, + input_dimensions, + head_mask=head_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + sequence_output = encoder_outputs[0] + + pooled_output = None + if self.pooler is not None: + pooled_output = self.pooler(sequence_output.transpose(1, 2)) + pooled_output = torch.flatten(pooled_output, 1) + + if not return_dict: + output = (sequence_output, pooled_output) + encoder_outputs[1:] + + return output + + return UnimerSwinModelOutput( + last_hidden_state=sequence_output, + pooler_output=pooled_output, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, + reshaped_hidden_states=encoder_outputs.reshaped_hidden_states, + ) diff --git a/mineru/model/ocr/__init__.py b/mineru/model/ocr/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/ocr/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/ocr/paddleocr2pytorch/__init__.py b/mineru/model/ocr/paddleocr2pytorch/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py b/mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py new file mode 100644 index 00000000..b2662b56 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py @@ -0,0 +1,199 @@ +# Copyright (c) Opendatalab. All rights reserved. +import copy +import os.path +import warnings +from pathlib import Path + +import cv2 +import numpy as np +import yaml +from loguru import logger + +from magic_pdf.libs.config_reader import get_device, get_local_models_dir +from ....utils.ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image +from .tools.infer.predict_system import TextSystem +from .tools.infer import pytorchocr_utility as utility +import argparse + + +latin_lang = [ + 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr', # noqa: E126 + 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl', + 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv', + 'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german' +] +arabic_lang = ['ar', 'fa', 'ug', 'ur'] +cyrillic_lang = [ + 'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava', # noqa: E126 + 'dar', 'inh', 'che', 'lbe', 'lez', 'tab' +] +devanagari_lang = [ + 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', # noqa: E126 + 'sa', 'bgc' +] + + +def get_model_params(lang, config): + if lang in config['lang']: + params = config['lang'][lang] + det = params.get('det') + rec = params.get('rec') + dict_file = params.get('dict') + return det, rec, dict_file + else: + raise Exception (f'Language {lang} not supported') + + +root_dir = Path(__file__).resolve().parent + + +class PytorchPaddleOCR(TextSystem): + def __init__(self, *args, **kwargs): + parser = utility.init_args() + args = parser.parse_args(args) + + self.lang = kwargs.get('lang', 'ch') + + device = get_device() + if device == 'cpu' and self.lang in ['ch', 'ch_server']: + logger.warning("The current device in use is CPU. To ensure the speed of parsing, the language is automatically switched to ch_lite.") + self.lang = 'ch_lite' + + if self.lang in latin_lang: + self.lang = 'latin' + elif self.lang in arabic_lang: + self.lang = 'arabic' + elif self.lang in cyrillic_lang: + self.lang = 'cyrillic' + elif self.lang in devanagari_lang: + self.lang = 'devanagari' + else: + pass + + models_config_path = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'models_config.yml') + with open(models_config_path) as file: + config = yaml.safe_load(file) + det, rec, dict_file = get_model_params(self.lang, config) + ocr_models_dir = os.path.join(get_local_models_dir(), 'OCR', 'paddleocr_torch') + kwargs['det_model_path'] = os.path.join(ocr_models_dir, det) + kwargs['rec_model_path'] = os.path.join(ocr_models_dir, rec) + kwargs['rec_char_dict_path'] = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'dict', dict_file) + # kwargs['rec_batch_num'] = 8 + + kwargs['device'] = device + + default_args = vars(args) + default_args.update(kwargs) + args = argparse.Namespace(**default_args) + + super().__init__(args) + + def ocr(self, + img, + det=True, + rec=True, + mfd_res=None, + tqdm_enable=False, + ): + assert isinstance(img, (np.ndarray, list, str, bytes)) + if isinstance(img, list) and det == True: + logger.error('When input a list of images, det must be false') + exit(0) + img = check_img(img) + imgs = [img] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=RuntimeWarning) + if det and rec: + ocr_res = [] + for img in imgs: + img = preprocess_image(img) + dt_boxes, rec_res = self.__call__(img, mfd_res=mfd_res) + if not dt_boxes and not rec_res: + ocr_res.append(None) + continue + tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] + ocr_res.append(tmp_res) + return ocr_res + elif det and not rec: + ocr_res = [] + for img in imgs: + img = preprocess_image(img) + dt_boxes, elapse = self.text_detector(img) + # logger.debug("dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse)) + if dt_boxes is None: + ocr_res.append(None) + continue + dt_boxes = sorted_boxes(dt_boxes) + # merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly,因此需要过滤所有倾斜程度较大的文本框 + dt_boxes = merge_det_boxes(dt_boxes) + if mfd_res: + dt_boxes = update_det_boxes(dt_boxes, mfd_res) + tmp_res = [box.tolist() for box in dt_boxes] + ocr_res.append(tmp_res) + return ocr_res + elif not det and rec: + ocr_res = [] + for img in imgs: + if not isinstance(img, list): + img = preprocess_image(img) + img = [img] + rec_res, elapse = self.text_recognizer(img, tqdm_enable=tqdm_enable) + # logger.debug("rec_res num : {}, elapsed : {}".format(len(rec_res), elapse)) + ocr_res.append(rec_res) + return ocr_res + + def __call__(self, img, mfd_res=None): + + if img is None: + logger.debug("no valid image provided") + return None, None + + ori_im = img.copy() + dt_boxes, elapse = self.text_detector(img) + + if dt_boxes is None: + logger.debug("no dt_boxes found, elapsed : {}".format(elapse)) + return None, None + else: + pass + # logger.debug("dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse)) + img_crop_list = [] + + dt_boxes = sorted_boxes(dt_boxes) + + # merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly,因此需要过滤所有倾斜程度较大的文本框 + dt_boxes = merge_det_boxes(dt_boxes) + + if mfd_res: + dt_boxes = update_det_boxes(dt_boxes, mfd_res) + + for bno in range(len(dt_boxes)): + tmp_box = copy.deepcopy(dt_boxes[bno]) + img_crop = get_rotate_crop_image(ori_im, tmp_box) + img_crop_list.append(img_crop) + + rec_res, elapse = self.text_recognizer(img_crop_list) + # logger.debug("rec_res num : {}, elapsed : {}".format(len(rec_res), elapse)) + + filter_boxes, filter_rec_res = [], [] + for box, rec_result in zip(dt_boxes, rec_res): + text, score = rec_result + if score >= self.drop_score: + filter_boxes.append(box) + filter_rec_res.append(rec_result) + + return filter_boxes, filter_rec_res + +if __name__ == '__main__': + pytorch_paddle_ocr = PytorchPaddleOCR() + img = cv2.imread("/Users/myhloli/Downloads/screenshot-20250326-194348.png") + dt_boxes, rec_res = pytorch_paddle_ocr(img) + ocr_res = [] + if not dt_boxes and not rec_res: + ocr_res.append(None) + else: + tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] + ocr_res.append(tmp_res) + print(ocr_res) + + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py new file mode 100755 index 00000000..c169d20d --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py @@ -0,0 +1,39 @@ +import os +import torch +from .modeling.architectures.base_model import BaseModel + +class BaseOCRV20: + def __init__(self, config, **kwargs): + self.config = config + self.build_net(**kwargs) + self.net.eval() + + + def build_net(self, **kwargs): + self.net = BaseModel(self.config, **kwargs) + + def read_pytorch_weights(self, weights_path): + if not os.path.exists(weights_path): + raise FileNotFoundError('{} is not existed.'.format(weights_path)) + weights = torch.load(weights_path) + return weights + + def get_out_channels(self, weights): + if list(weights.keys())[-1].endswith('.weight') and len(list(weights.values())[-1].shape) == 2: + out_channels = list(weights.values())[-1].numpy().shape[1] + else: + out_channels = list(weights.values())[-1].numpy().shape[0] + return out_channels + + def load_state_dict(self, weights): + self.net.load_state_dict(weights) + # print('weights is loaded.') + + def load_pytorch_weights(self, weights_path): + self.net.load_state_dict(torch.load(weights_path, weights_only=True)) + # print('model is loaded: {}'.format(weights_path)) + + def inference(self, inputs): + with torch.no_grad(): + infer = self.net(inputs) + return infer diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py new file mode 100755 index 00000000..9eef2969 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from .imaug import transform, create_operators + + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py new file mode 100755 index 00000000..13abd674 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py @@ -0,0 +1,48 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +# from .iaa_augment import IaaAugment +# from .make_border_map import MakeBorderMap +# from .make_shrink_map import MakeShrinkMap +# from .random_crop_data import EastRandomCropData, PSERandomCrop + +# from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg +# from .randaugment import RandAugment +from .operators import * +# from .label_ops import * + +# from .east_process import * +# from .sast_process import * +# from .gen_table_mask import * + +def transform(data, ops=None): + """ transform """ + if ops is None: + ops = [] + for op in ops: + data = op(data) + if data is None: + return None + return data + + +def create_operators(op_param_list, global_config=None): + """ + create operators based on the config + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(op_param_list, list), ('operator config should be a list') + ops = [] + for operator in op_param_list: + assert isinstance(operator, + dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + if global_config is not None: + param.update(global_config) + op = eval(op_name)(**param) + ops.append(op) + return ops \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py new file mode 100755 index 00000000..daa67a25 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py @@ -0,0 +1,418 @@ +""" +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import sys +import six +import cv2 +import numpy as np + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, img_mode='RGB', channel_first=False, **kwargs): + self.img_mode = img_mode + self.channel_first = channel_first + + def __call__(self, data): + img = data['image'] + if six.PY2: + assert type(img) is str and len( + img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len( + img) > 0, "invalid input 'img' in DecodeImage" + img = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(img, 1) + if img is None: + return None + if self.img_mode == 'GRAY': + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + elif self.img_mode == 'RGB': + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + data['image'] = img + return data + + +class NRTRDecodeImage(object): + """ decode image """ + + def __init__(self, img_mode='RGB', channel_first=False, **kwargs): + self.img_mode = img_mode + self.channel_first = channel_first + + def __call__(self, data): + img = data['image'] + if six.PY2: + assert type(img) is str and len( + img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len( + img) > 0, "invalid input 'img' in DecodeImage" + img = np.frombuffer(img, dtype='uint8') + + img = cv2.imdecode(img, 1) + + if img is None: + return None + if self.img_mode == 'GRAY': + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + elif self.img_mode == 'RGB': + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if self.channel_first: + img = img.transpose((2, 0, 1)) + data['image'] = img + return data + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs): + if isinstance(scale, str): + scale = eval(scale) + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, data): + img = data['image'] + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + assert isinstance(img, + np.ndarray), "invalid input 'img' in NormalizeImage" + data['image'] = ( + img.astype('float32') * self.scale - self.mean) / self.std + return data + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self, **kwargs): + pass + + def __call__(self, data): + img = data['image'] + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + data['image'] = img.transpose((2, 0, 1)) + return data + + +class Fasttext(object): + def __init__(self, path="None", **kwargs): + import fasttext + self.fast_model = fasttext.load_model(path) + + def __call__(self, data): + label = data['label'] + fast_label = self.fast_model[label] + data['fast_label'] = fast_label + return data + + +class KeepKeys(object): + def __init__(self, keep_keys, **kwargs): + self.keep_keys = keep_keys + + def __call__(self, data): + data_list = [] + for key in self.keep_keys: + data_list.append(data[key]) + return data_list + + +class Resize(object): + def __init__(self, size=(640, 640), **kwargs): + self.size = size + + def resize_image(self, img): + resize_h, resize_w = self.size + ori_h, ori_w = img.shape[:2] # (h, w, c) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + img = cv2.resize(img, (int(resize_w), int(resize_h))) + return img, [ratio_h, ratio_w] + + def __call__(self, data): + img = data['image'] + text_polys = data['polys'] + + img_resize, [ratio_h, ratio_w] = self.resize_image(img) + new_boxes = [] + for box in text_polys: + new_box = [] + for cord in box: + new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) + new_boxes.append(new_box) + data['image'] = img_resize + data['polys'] = np.array(new_boxes, dtype=np.float32) + return data + + +class DetResizeForTest(object): + def __init__(self, **kwargs): + super(DetResizeForTest, self).__init__() + self.resize_type = 0 + if 'image_shape' in kwargs: + self.image_shape = kwargs['image_shape'] + self.resize_type = 1 + elif 'limit_side_len' in kwargs: + self.limit_side_len = kwargs['limit_side_len'] + self.limit_type = kwargs.get('limit_type', 'min') + elif 'resize_long' in kwargs: + self.resize_type = 2 + self.resize_long = kwargs.get('resize_long', 960) + else: + self.limit_side_len = 736 + self.limit_type = 'min' + + def __call__(self, data): + img = data['image'] + src_h, src_w, _ = img.shape + + if self.resize_type == 0: + # img, shape = self.resize_image_type0(img) + img, [ratio_h, ratio_w] = self.resize_image_type0(img) + elif self.resize_type == 2: + img, [ratio_h, ratio_w] = self.resize_image_type2(img) + else: + # img, shape = self.resize_image_type1(img) + img, [ratio_h, ratio_w] = self.resize_image_type1(img) + data['image'] = img + data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) + return data + + def resize_image_type1(self, img): + resize_h, resize_w = self.image_shape + ori_h, ori_w = img.shape[:2] # (h, w, c) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + img = cv2.resize(img, (int(resize_w), int(resize_h))) + # return img, np.array([ori_h, ori_w]) + return img, [ratio_h, ratio_w] + + def resize_image_type0(self, img): + """ + resize image to a size multiple of 32 which is required by the network + args: + img(array): array with shape [h, w, c] + return(tuple): + img, (ratio_h, ratio_w) + """ + limit_side_len = self.limit_side_len + h, w, c = img.shape + + # limit the max side + if self.limit_type == 'max': + if max(h, w) > limit_side_len: + if h > w: + ratio = float(limit_side_len) / h + else: + ratio = float(limit_side_len) / w + else: + ratio = 1. + elif self.limit_type == 'min': + if min(h, w) < limit_side_len: + if h < w: + ratio = float(limit_side_len) / h + else: + ratio = float(limit_side_len) / w + else: + ratio = 1. + elif self.limit_type == 'resize_long': + ratio = float(limit_side_len) / max(h, w) + else: + raise Exception('not support limit type, image ') + resize_h = int(h * ratio) + resize_w = int(w * ratio) + + resize_h = max(int(round(resize_h / 32) * 32), 32) + resize_w = max(int(round(resize_w / 32) * 32), 32) + + try: + if int(resize_w) <= 0 or int(resize_h) <= 0: + return None, (None, None) + img = cv2.resize(img, (int(resize_w), int(resize_h))) + except: + print(img.shape, resize_w, resize_h) + sys.exit(0) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return img, [ratio_h, ratio_w] + + def resize_image_type2(self, img): + h, w, _ = img.shape + + resize_w = w + resize_h = h + + if resize_h > resize_w: + ratio = float(self.resize_long) / resize_h + else: + ratio = float(self.resize_long) / resize_w + + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + img = cv2.resize(img, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + + return img, [ratio_h, ratio_w] + + +class E2EResizeForTest(object): + def __init__(self, **kwargs): + super(E2EResizeForTest, self).__init__() + self.max_side_len = kwargs['max_side_len'] + self.valid_set = kwargs['valid_set'] + + def __call__(self, data): + img = data['image'] + src_h, src_w, _ = img.shape + if self.valid_set == 'totaltext': + im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext( + img, max_side_len=self.max_side_len) + else: + im_resized, (ratio_h, ratio_w) = self.resize_image( + img, max_side_len=self.max_side_len) + data['image'] = im_resized + data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) + return data + + def resize_image_for_totaltext(self, im, max_side_len=512): + + h, w, _ = im.shape + resize_w = w + resize_h = h + ratio = 1.25 + if h * ratio > max_side_len: + ratio = float(max_side_len) / resize_h + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return im, (ratio_h, ratio_w) + + def resize_image(self, im, max_side_len=512): + """ + resize image to a size multiple of max_stride which is required by the network + :param im: the resized image + :param max_side_len: limit of max image size to avoid out of memory in gpu + :return: the resized image and the resize ratio + """ + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # Fix the longer side + if resize_h > resize_w: + ratio = float(max_side_len) / resize_h + else: + ratio = float(max_side_len) / resize_w + + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + + return im, (ratio_h, ratio_w) + + +class KieResize(object): + def __init__(self, **kwargs): + super(KieResize, self).__init__() + self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[ + 'img_scale'][1] + + def __call__(self, data): + img = data['image'] + points = data['points'] + src_h, src_w, _ = img.shape + im_resized, scale_factor, [ratio_h, ratio_w + ], [new_h, new_w] = self.resize_image(img) + resize_points = self.resize_boxes(img, points, scale_factor) + data['ori_image'] = img + data['ori_boxes'] = points + data['points'] = resize_points + data['image'] = im_resized + data['shape'] = np.array([new_h, new_w]) + return data + + def resize_image(self, img): + norm_img = np.zeros([1024, 1024, 3], dtype='float32') + scale = [512, 1024] + h, w = img.shape[:2] + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), + max_short_edge / min(h, w)) + resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float( + scale_factor) + 0.5) + max_stride = 32 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(img, (resize_w, resize_h)) + new_h, new_w = im.shape[:2] + w_scale = new_w / w + h_scale = new_h / h + scale_factor = np.array( + [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) + norm_img[:new_h, :new_w, :] = im + return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w] + + def resize_boxes(self, im, points, scale_factor): + points = points * scale_factor + img_shape = im.shape[:2] + points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1]) + points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0]) + return points diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py new file mode 100644 index 00000000..7ad5eb47 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +__all__ = ["build_model"] + + +def build_model(config, **kwargs): + from .base_model import BaseModel + + config = copy.deepcopy(config) + module_class = BaseModel(config, **kwargs) + return module_class diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py new file mode 100644 index 00000000..e7f7ce49 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py @@ -0,0 +1,105 @@ +from torch import nn + +from ..backbones import build_backbone +from ..heads import build_head +from ..necks import build_neck + + +class BaseModel(nn.Module): + def __init__(self, config, **kwargs): + """ + the module for OCR. + args: + config (dict): the super parameters for module. + """ + super(BaseModel, self).__init__() + + in_channels = config.get("in_channels", 3) + model_type = config["model_type"] + # build backbone, backbone is need for del, rec and cls + if "Backbone" not in config or config["Backbone"] is None: + self.use_backbone = False + else: + self.use_backbone = True + config["Backbone"]["in_channels"] = in_channels + self.backbone = build_backbone(config["Backbone"], model_type) + in_channels = self.backbone.out_channels + + # build neck + # for rec, neck can be cnn,rnn or reshape(None) + # for det, neck can be FPN, BIFPN and so on. + # for cls, neck should be none + if "Neck" not in config or config["Neck"] is None: + self.use_neck = False + else: + self.use_neck = True + config["Neck"]["in_channels"] = in_channels + self.neck = build_neck(config["Neck"]) + in_channels = self.neck.out_channels + + # # build head, head is need for det, rec and cls + if "Head" not in config or config["Head"] is None: + self.use_head = False + else: + self.use_head = True + config["Head"]["in_channels"] = in_channels + self.head = build_head(config["Head"], **kwargs) + + self.return_all_feats = config.get("return_all_feats", False) + + self._initialize_weights() + + def _initialize_weights(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.ConvTranspose2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + + def forward(self, x): + y = dict() + if self.use_backbone: + x = self.backbone(x) + if isinstance(x, dict): + y.update(x) + else: + y["backbone_out"] = x + final_name = "backbone_out" + if self.use_neck: + x = self.neck(x) + if isinstance(x, dict): + y.update(x) + else: + y["neck_out"] = x + final_name = "neck_out" + if self.use_head: + x = self.head(x) + # for multi head, save ctc neck out for udml + if isinstance(x, dict) and "ctc_nect" in x.keys(): + y["neck_out"] = x["ctc_neck"] + y["head_out"] = x + elif isinstance(x, dict): + y.update(x) + else: + y["head_out"] = x + if self.return_all_feats: + if self.training: + return y + elif isinstance(x, dict): + return x + else: + return {final_name: x} + else: + return x diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py new file mode 100644 index 00000000..7f437a23 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["build_backbone"] + + +def build_backbone(config, model_type): + if model_type == "det": + from .det_mobilenet_v3 import MobileNetV3 + from .rec_hgnet import PPHGNet_small + from .rec_lcnetv3 import PPLCNetV3 + + support_dict = [ + "MobileNetV3", + "ResNet", + "ResNet_vd", + "ResNet_SAST", + "PPLCNetV3", + "PPHGNet_small", + ] + elif model_type == "rec" or model_type == "cls": + from .rec_hgnet import PPHGNet_small + from .rec_lcnetv3 import PPLCNetV3 + from .rec_mobilenet_v3 import MobileNetV3 + from .rec_svtrnet import SVTRNet + from .rec_mv1_enhance import MobileNetV1Enhance + from .rec_pphgnetv2 import PPHGNetV2_B4 + support_dict = [ + "MobileNetV1Enhance", + "MobileNetV3", + "ResNet", + "ResNetFPN", + "MTB", + "ResNet31", + "SVTRNet", + "ViTSTR", + "DenseNet", + "PPLCNetV3", + "PPHGNet_small", + "PPHGNetV2_B4", + ] + else: + raise NotImplementedError + + module_name = config.pop("name") + assert module_name in support_dict, Exception( + "when model typs is {}, backbone only support {}".format( + model_type, support_dict + ) + ) + module_class = eval(module_name)(**config) + return module_class diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py new file mode 100644 index 00000000..03511599 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py @@ -0,0 +1,269 @@ +from torch import nn + +from ..common import Activation + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + groups=1, + if_act=True, + act=None, + name=None, + ): + super(ConvBNLayer, self).__init__() + self.if_act = if_act + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias=False, + ) + + self.bn = nn.BatchNorm2d( + out_channels, + ) + if self.if_act: + self.act = Activation(act_type=act, inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + x = self.act(x) + return x + + +class SEModule(nn.Module): + def __init__(self, in_channels, reduction=4, name=""): + super(SEModule, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv1 = nn.Conv2d( + in_channels=in_channels, + out_channels=in_channels // reduction, + kernel_size=1, + stride=1, + padding=0, + bias=True, + ) + self.relu1 = Activation(act_type="relu", inplace=True) + self.conv2 = nn.Conv2d( + in_channels=in_channels // reduction, + out_channels=in_channels, + kernel_size=1, + stride=1, + padding=0, + bias=True, + ) + self.hard_sigmoid = Activation(act_type="hard_sigmoid", inplace=True) + + def forward(self, inputs): + outputs = self.avg_pool(inputs) + outputs = self.conv1(outputs) + outputs = self.relu1(outputs) + outputs = self.conv2(outputs) + outputs = self.hard_sigmoid(outputs) + outputs = inputs * outputs + return outputs + + +class ResidualUnit(nn.Module): + def __init__( + self, + in_channels, + mid_channels, + out_channels, + kernel_size, + stride, + use_se, + act=None, + name="", + ): + super(ResidualUnit, self).__init__() + self.if_shortcut = stride == 1 and in_channels == out_channels + self.if_se = use_se + + self.expand_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + "_expand", + ) + self.bottleneck_conv = ConvBNLayer( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=int((kernel_size - 1) // 2), + groups=mid_channels, + if_act=True, + act=act, + name=name + "_depthwise", + ) + if self.if_se: + self.mid_se = SEModule(mid_channels, name=name + "_se") + self.linear_conv = ConvBNLayer( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + if_act=False, + act=None, + name=name + "_linear", + ) + + def forward(self, inputs): + x = self.expand_conv(inputs) + x = self.bottleneck_conv(x) + if self.if_se: + x = self.mid_se(x) + x = self.linear_conv(x) + if self.if_shortcut: + x = inputs + x + return x + + +class MobileNetV3(nn.Module): + def __init__( + self, in_channels=3, model_name="large", scale=0.5, disable_se=False, **kwargs + ): + """ + the MobilenetV3 backbone network for detection module. + Args: + params(dict): the super parameters for build network + """ + super(MobileNetV3, self).__init__() + + self.disable_se = disable_se + + if model_name == "large": + cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], + [3, 240, 80, False, "hard_swish", 2], + [3, 200, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 480, 112, True, "hard_swish", 1], + [3, 672, 112, True, "hard_swish", 1], + [5, 672, 160, True, "hard_swish", 2], + [5, 960, 160, True, "hard_swish", 1], + [5, 960, 160, True, "hard_swish", 1], + ] + cls_ch_squeeze = 960 + elif model_name == "small": + cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, "relu", 2], + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hard_swish", 2], + [5, 240, 40, True, "hard_swish", 1], + [5, 240, 40, True, "hard_swish", 1], + [5, 120, 48, True, "hard_swish", 1], + [5, 144, 48, True, "hard_swish", 1], + [5, 288, 96, True, "hard_swish", 2], + [5, 576, 96, True, "hard_swish", 1], + [5, 576, 96, True, "hard_swish", 1], + ] + cls_ch_squeeze = 576 + else: + raise NotImplementedError( + "mode[" + model_name + "_model] is not implemented!" + ) + + supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] + assert ( + scale in supported_scale + ), "supported scale are {} but input scale is {}".format(supported_scale, scale) + inplanes = 16 + # conv1 + self.conv = ConvBNLayer( + in_channels=in_channels, + out_channels=make_divisible(inplanes * scale), + kernel_size=3, + stride=2, + padding=1, + groups=1, + if_act=True, + act="hard_swish", + name="conv1", + ) + + self.stages = nn.ModuleList() + self.out_channels = [] + block_list = [] + i = 0 + inplanes = make_divisible(inplanes * scale) + for k, exp, c, se, nl, s in cfg: + se = se and not self.disable_se + if s == 2 and i > 2: + self.out_channels.append(inplanes) + self.stages.append(nn.Sequential(*block_list)) + block_list = [] + block_list.append( + ResidualUnit( + in_channels=inplanes, + mid_channels=make_divisible(scale * exp), + out_channels=make_divisible(scale * c), + kernel_size=k, + stride=s, + use_se=se, + act=nl, + name="conv" + str(i + 2), + ) + ) + inplanes = make_divisible(scale * c) + i += 1 + block_list.append( + ConvBNLayer( + in_channels=inplanes, + out_channels=make_divisible(scale * cls_ch_squeeze), + kernel_size=1, + stride=1, + padding=0, + groups=1, + if_act=True, + act="hard_swish", + name="conv_last", + ) + ) + self.stages.append(nn.Sequential(*block_list)) + self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) + # for i, stage in enumerate(self.stages): + # self.add_sublayer(sublayer=stage, name="stage{}".format(i)) + + def forward(self, x): + x = self.conv(x) + out_list = [] + for stage in self.stages: + x = stage(x) + out_list.append(x) + return out_list diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py new file mode 100644 index 00000000..c1515a71 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py @@ -0,0 +1,290 @@ +import torch +import torch.nn.functional as F +from torch import nn + + +class ConvBNAct(nn.Module): + def __init__( + self, in_channels, out_channels, kernel_size, stride, groups=1, use_act=True + ): + super().__init__() + self.use_act = use_act + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding=(kernel_size - 1) // 2, + groups=groups, + bias=False, + ) + self.bn = nn.BatchNorm2d(out_channels) + if self.use_act: + self.act = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.use_act: + x = self.act(x) + return x + + +class ESEModule(nn.Module): + def __init__(self, channels): + super().__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv = nn.Conv2d( + in_channels=channels, + out_channels=channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv(x) + x = self.sigmoid(x) + return x * identity + + +class HG_Block(nn.Module): + def __init__( + self, + in_channels, + mid_channels, + out_channels, + layer_num, + identity=False, + ): + super().__init__() + self.identity = identity + + self.layers = nn.ModuleList() + self.layers.append( + ConvBNAct( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=3, + stride=1, + ) + ) + for _ in range(layer_num - 1): + self.layers.append( + ConvBNAct( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=3, + stride=1, + ) + ) + + # feature aggregation + total_channels = in_channels + layer_num * mid_channels + self.aggregation_conv = ConvBNAct( + in_channels=total_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + ) + self.att = ESEModule(out_channels) + + def forward(self, x): + identity = x + output = [] + output.append(x) + for layer in self.layers: + x = layer(x) + output.append(x) + x = torch.cat(output, dim=1) + x = self.aggregation_conv(x) + x = self.att(x) + if self.identity: + x += identity + return x + + +class HG_Stage(nn.Module): + def __init__( + self, + in_channels, + mid_channels, + out_channels, + block_num, + layer_num, + downsample=True, + stride=[2, 1], + ): + super().__init__() + self.downsample = downsample + if downsample: + self.downsample = ConvBNAct( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=stride, + groups=in_channels, + use_act=False, + ) + + blocks_list = [] + blocks_list.append( + HG_Block(in_channels, mid_channels, out_channels, layer_num, identity=False) + ) + for _ in range(block_num - 1): + blocks_list.append( + HG_Block( + out_channels, mid_channels, out_channels, layer_num, identity=True + ) + ) + self.blocks = nn.Sequential(*blocks_list) + + def forward(self, x): + if self.downsample: + x = self.downsample(x) + x = self.blocks(x) + return x + + +class PPHGNet(nn.Module): + """ + PPHGNet + Args: + stem_channels: list. Stem channel list of PPHGNet. + stage_config: dict. The configuration of each stage of PPHGNet. such as the number of channels, stride, etc. + layer_num: int. Number of layers of HG_Block. + use_last_conv: boolean. Whether to use a 1x1 convolutional layer before the classification layer. + class_expand: int=2048. Number of channels for the last 1x1 convolutional layer. + dropout_prob: float. Parameters of dropout, 0.0 means dropout is not used. + class_num: int=1000. The number of classes. + Returns: + model: nn.Layer. Specific PPHGNet model depends on args. + """ + + def __init__( + self, + stem_channels, + stage_config, + layer_num, + in_channels=3, + det=False, + out_indices=None, + ): + super().__init__() + self.det = det + self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3] + + # stem + stem_channels.insert(0, in_channels) + self.stem = nn.Sequential( + *[ + ConvBNAct( + in_channels=stem_channels[i], + out_channels=stem_channels[i + 1], + kernel_size=3, + stride=2 if i == 0 else 1, + ) + for i in range(len(stem_channels) - 1) + ] + ) + + if self.det: + self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + # stages + self.stages = nn.ModuleList() + self.out_channels = [] + for block_id, k in enumerate(stage_config): + ( + in_channels, + mid_channels, + out_channels, + block_num, + downsample, + stride, + ) = stage_config[k] + self.stages.append( + HG_Stage( + in_channels, + mid_channels, + out_channels, + block_num, + layer_num, + downsample, + stride, + ) + ) + if block_id in self.out_indices: + self.out_channels.append(out_channels) + + if not self.det: + self.out_channels = stage_config["stage4"][2] + + self._init_weights() + + def _init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.stem(x) + if self.det: + x = self.pool(x) + + out = [] + for i, stage in enumerate(self.stages): + x = stage(x) + if self.det and i in self.out_indices: + out.append(x) + if self.det: + return out + + if self.training: + x = F.adaptive_avg_pool2d(x, [1, 40]) + else: + x = F.avg_pool2d(x, [3, 2]) + return x + + +def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs): + """ + PPHGNet_small + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPHGNet_small` model depends on args. + """ + stage_config_det = { + # in_channels, mid_channels, out_channels, blocks, downsample + "stage1": [128, 128, 256, 1, False, 2], + "stage2": [256, 160, 512, 1, True, 2], + "stage3": [512, 192, 768, 2, True, 2], + "stage4": [768, 224, 1024, 1, True, 2], + } + + stage_config_rec = { + # in_channels, mid_channels, out_channels, blocks, downsample + "stage1": [128, 128, 256, 1, True, [2, 1]], + "stage2": [256, 160, 512, 1, True, [1, 2]], + "stage3": [512, 192, 768, 2, True, [2, 1]], + "stage4": [768, 224, 1024, 1, True, [2, 1]], + } + + model = PPHGNet( + stem_channels=[64, 64, 128], + stage_config=stage_config_det if det else stage_config_rec, + layer_num=6, + det=det, + **kwargs + ) + return model diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py new file mode 100644 index 00000000..e2bd4572 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py @@ -0,0 +1,516 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import torch +import torch.nn.functional as F +from torch import nn + +from ..common import Activation + +NET_CONFIG_det = { + "blocks2": + # k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [ + [3, 128, 256, 2, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + ], + "blocks6": [ + [5, 256, 512, 2, True], + [5, 512, 512, 1, True], + [5, 512, 512, 1, False], + [5, 512, 512, 1, False], + ], +} + +NET_CONFIG_rec = { + "blocks2": + # k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]], + "blocks5": [ + [3, 128, 256, (1, 2), False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + ], + "blocks6": [ + [5, 256, 512, (2, 1), True], + [5, 512, 512, 1, True], + [5, 512, 512, (2, 1), False], + [5, 512, 512, 1, False], + ], +} + + +def make_divisible(v, divisor=16, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class LearnableAffineBlock(nn.Module): + def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1): + super().__init__() + self.scale = nn.Parameter(torch.Tensor([scale_value])) + self.bias = nn.Parameter(torch.Tensor([bias_value])) + + def forward(self, x): + return self.scale * x + self.bias + + +class ConvBNLayer(nn.Module): + def __init__( + self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0 + ): + super().__init__() + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + bias=False, + ) + + self.bn = nn.BatchNorm2d( + out_channels, + ) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class Act(nn.Module): + def __init__(self, act="hswish", lr_mult=1.0, lab_lr=0.1): + super().__init__() + if act == "hswish": + self.act = nn.Hardswish(inplace=True) + else: + assert act == "relu" + self.act = Activation(act) + self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr) + + def forward(self, x): + return self.lab(self.act(x)) + + +class LearnableRepLayer(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + groups=1, + num_conv_branches=1, + lr_mult=1.0, + lab_lr=0.1, + ): + super().__init__() + self.is_repped = False + self.groups = groups + self.stride = stride + self.kernel_size = kernel_size + self.in_channels = in_channels + self.out_channels = out_channels + self.num_conv_branches = num_conv_branches + self.padding = (kernel_size - 1) // 2 + + self.identity = ( + nn.BatchNorm2d( + num_features=in_channels, + ) + if out_channels == in_channels and stride == 1 + else None + ) + + self.conv_kxk = nn.ModuleList( + [ + ConvBNLayer( + in_channels, + out_channels, + kernel_size, + stride, + groups=groups, + lr_mult=lr_mult, + ) + for _ in range(self.num_conv_branches) + ] + ) + + self.conv_1x1 = ( + ConvBNLayer( + in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult + ) + if kernel_size > 1 + else None + ) + + self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr) + self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr) + + def forward(self, x): + # for export + if self.is_repped: + out = self.lab(self.reparam_conv(x)) + if self.stride != 2: + out = self.act(out) + return out + + out = 0 + if self.identity is not None: + out += self.identity(x) + + if self.conv_1x1 is not None: + out += self.conv_1x1(x) + + for conv in self.conv_kxk: + out += conv(x) + + out = self.lab(out) + if self.stride != 2: + out = self.act(out) + return out + + def rep(self): + if self.is_repped: + return + kernel, bias = self._get_kernel_bias() + self.reparam_conv = nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + groups=self.groups, + ) + self.reparam_conv.weight.data = kernel + self.reparam_conv.bias.data = bias + self.is_repped = True + + def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad): + if not isinstance(kernel1x1, torch.Tensor): + return 0 + else: + return nn.functional.pad(kernel1x1, [pad, pad, pad, pad]) + + def _get_kernel_bias(self): + kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1) + kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk( + kernel_conv_1x1, self.kernel_size // 2 + ) + + kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity) + + kernel_conv_kxk = 0 + bias_conv_kxk = 0 + for conv in self.conv_kxk: + kernel, bias = self._fuse_bn_tensor(conv) + kernel_conv_kxk += kernel + bias_conv_kxk += bias + + kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity + bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity + return kernel_reparam, bias_reparam + + def _fuse_bn_tensor(self, branch): + if not branch: + return 0, 0 + elif isinstance(branch, ConvBNLayer): + kernel = branch.conv.weight + running_mean = branch.bn._mean + running_var = branch.bn._variance + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn._epsilon + else: + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, "id_tensor"): + input_dim = self.in_channels // self.groups + kernel_value = torch.zeros( + (self.in_channels, input_dim, self.kernel_size, self.kernel_size), + dtype=branch.weight.dtype, + ) + for i in range(self.in_channels): + kernel_value[ + i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2 + ] = 1 + self.id_tensor = kernel_value + kernel = self.id_tensor + running_mean = branch._mean + running_var = branch._variance + gamma = branch.weight + beta = branch.bias + eps = branch._epsilon + std = (running_var + eps).sqrt() + t = (gamma / std).reshape((-1, 1, 1, 1)) + return kernel * t, beta - running_mean * gamma / std + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=4, lr_mult=1.0): + super().__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv1 = nn.Conv2d( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0, + ) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0, + ) + self.hardsigmoid = nn.Hardsigmoid(inplace=True) + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = identity * x + return x + + +class LCNetV3Block(nn.Module): + def __init__( + self, + in_channels, + out_channels, + stride, + dw_size, + use_se=False, + conv_kxk_num=4, + lr_mult=1.0, + lab_lr=0.1, + ): + super().__init__() + self.use_se = use_se + self.dw_conv = LearnableRepLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=dw_size, + stride=stride, + groups=in_channels, + num_conv_branches=conv_kxk_num, + lr_mult=lr_mult, + lab_lr=lab_lr, + ) + if use_se: + self.se = SELayer(in_channels, lr_mult=lr_mult) + self.pw_conv = LearnableRepLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + num_conv_branches=conv_kxk_num, + lr_mult=lr_mult, + lab_lr=lab_lr, + ) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class PPLCNetV3(nn.Module): + def __init__( + self, + scale=1.0, + conv_kxk_num=4, + lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + lab_lr=0.1, + det=False, + **kwargs + ): + super().__init__() + self.scale = scale + self.lr_mult_list = lr_mult_list + self.det = det + + self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec + + assert isinstance( + self.lr_mult_list, (list, tuple) + ), "lr_mult_list should be in (list, tuple) but got {}".format( + type(self.lr_mult_list) + ) + assert ( + len(self.lr_mult_list) == 6 + ), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list)) + + self.conv1 = ConvBNLayer( + in_channels=3, + out_channels=make_divisible(16 * scale), + kernel_size=3, + stride=2, + lr_mult=self.lr_mult_list[0], + ) + + self.blocks2 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[1], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"]) + ] + ) + + self.blocks3 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[2], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"]) + ] + ) + + self.blocks4 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[3], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"]) + ] + ) + + self.blocks5 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[4], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"]) + ] + ) + + self.blocks6 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[5], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"]) + ] + ) + self.out_channels = make_divisible(512 * scale) + + if self.det: + mv_c = [16, 24, 56, 480] + self.out_channels = [ + make_divisible(self.net_config["blocks3"][-1][2] * scale), + make_divisible(self.net_config["blocks4"][-1][2] * scale), + make_divisible(self.net_config["blocks5"][-1][2] * scale), + make_divisible(self.net_config["blocks6"][-1][2] * scale), + ] + + self.layer_list = nn.ModuleList( + [ + nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0), + nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0), + nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0), + nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0), + ] + ) + self.out_channels = [ + int(mv_c[0] * scale), + int(mv_c[1] * scale), + int(mv_c[2] * scale), + int(mv_c[3] * scale), + ] + + def forward(self, x): + out_list = [] + x = self.conv1(x) + x = self.blocks2(x) + x = self.blocks3(x) + out_list.append(x) + x = self.blocks4(x) + out_list.append(x) + x = self.blocks5(x) + out_list.append(x) + x = self.blocks6(x) + out_list.append(x) + + if self.det: + out_list[0] = self.layer_list[0](out_list[0]) + out_list[1] = self.layer_list[1](out_list[1]) + out_list[2] = self.layer_list[2](out_list[2]) + out_list[3] = self.layer_list[3](out_list[3]) + return out_list + + if self.training: + x = F.adaptive_avg_pool2d(x, [1, 40]) + else: + x = F.avg_pool2d(x, [3, 2]) + return x diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py new file mode 100644 index 00000000..d284a6d4 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py @@ -0,0 +1,136 @@ +from torch import nn + +from .det_mobilenet_v3 import ConvBNLayer, ResidualUnit, make_divisible + + +class MobileNetV3(nn.Module): + def __init__( + self, + in_channels=3, + model_name="small", + scale=0.5, + large_stride=None, + small_stride=None, + **kwargs + ): + super(MobileNetV3, self).__init__() + if small_stride is None: + small_stride = [2, 2, 2, 2] + if large_stride is None: + large_stride = [1, 2, 2, 2] + + assert isinstance( + large_stride, list + ), "large_stride type must " "be list but got {}".format(type(large_stride)) + assert isinstance( + small_stride, list + ), "small_stride type must " "be list but got {}".format(type(small_stride)) + assert ( + len(large_stride) == 4 + ), "large_stride length must be " "4 but got {}".format(len(large_stride)) + assert ( + len(small_stride) == 4 + ), "small_stride length must be " "4 but got {}".format(len(small_stride)) + + if model_name == "large": + cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, "relu", large_stride[0]], + [3, 64, 24, False, "relu", (large_stride[1], 1)], + [3, 72, 24, False, "relu", 1], + [5, 72, 40, True, "relu", (large_stride[2], 1)], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], + [3, 240, 80, False, "hard_swish", 1], + [3, 200, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 480, 112, True, "hard_swish", 1], + [3, 672, 112, True, "hard_swish", 1], + [5, 672, 160, True, "hard_swish", (large_stride[3], 1)], + [5, 960, 160, True, "hard_swish", 1], + [5, 960, 160, True, "hard_swish", 1], + ] + cls_ch_squeeze = 960 + elif model_name == "small": + cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, "relu", (small_stride[0], 1)], + [3, 72, 24, False, "relu", (small_stride[1], 1)], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hard_swish", (small_stride[2], 1)], + [5, 240, 40, True, "hard_swish", 1], + [5, 240, 40, True, "hard_swish", 1], + [5, 120, 48, True, "hard_swish", 1], + [5, 144, 48, True, "hard_swish", 1], + [5, 288, 96, True, "hard_swish", (small_stride[3], 1)], + [5, 576, 96, True, "hard_swish", 1], + [5, 576, 96, True, "hard_swish", 1], + ] + cls_ch_squeeze = 576 + else: + raise NotImplementedError( + "mode[" + model_name + "_model] is not implemented!" + ) + + supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] + assert ( + scale in supported_scale + ), "supported scales are {} but input scale is {}".format( + supported_scale, scale + ) + + inplanes = 16 + # conv1 + self.conv1 = ConvBNLayer( + in_channels=in_channels, + out_channels=make_divisible(inplanes * scale), + kernel_size=3, + stride=2, + padding=1, + groups=1, + if_act=True, + act="hard_swish", + name="conv1", + ) + i = 0 + block_list = [] + inplanes = make_divisible(inplanes * scale) + for k, exp, c, se, nl, s in cfg: + block_list.append( + ResidualUnit( + in_channels=inplanes, + mid_channels=make_divisible(scale * exp), + out_channels=make_divisible(scale * c), + kernel_size=k, + stride=s, + use_se=se, + act=nl, + name="conv" + str(i + 2), + ) + ) + inplanes = make_divisible(scale * c) + i += 1 + self.blocks = nn.Sequential(*block_list) + + self.conv2 = ConvBNLayer( + in_channels=inplanes, + out_channels=make_divisible(scale * cls_ch_squeeze), + kernel_size=1, + stride=1, + padding=0, + groups=1, + if_act=True, + act="hard_swish", + name="conv_last", + ) + + self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) + self.out_channels = make_divisible(scale * cls_ch_squeeze) + + def forward(self, x): + x = self.conv1(x) + x = self.blocks(x) + x = self.conv2(x) + x = self.pool(x) + return x diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py new file mode 100644 index 00000000..447c48f6 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py @@ -0,0 +1,234 @@ +import os, sys +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..common import Activation + + +class ConvBNLayer(nn.Module): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='hard_swish'): + super(ConvBNLayer, self).__init__() + self.act = act + self._conv = nn.Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + bias=False) + + self._batch_norm = nn.BatchNorm2d( + num_filters, + ) + if self.act is not None: + self._act = Activation(act_type=act, inplace=True) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + if self.act is not None: + y = self._act(y) + return y + + +class DepthwiseSeparable(nn.Module): + def __init__(self, + num_channels, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + dw_size=3, + padding=1, + use_se=False): + super(DepthwiseSeparable, self).__init__() + self.use_se = use_se + self._depthwise_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=int(num_filters1 * scale), + filter_size=dw_size, + stride=stride, + padding=padding, + num_groups=int(num_groups * scale)) + if use_se: + self._se = SEModule(int(num_filters1 * scale)) + self._pointwise_conv = ConvBNLayer( + num_channels=int(num_filters1 * scale), + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) + + def forward(self, inputs): + y = self._depthwise_conv(inputs) + if self.use_se: + y = self._se(y) + y = self._pointwise_conv(y) + return y + + +class MobileNetV1Enhance(nn.Module): + def __init__(self, + in_channels=3, + scale=0.5, + last_conv_stride=1, + last_pool_type='max', + **kwargs): + super().__init__() + self.scale = scale + self.block_list = [] + + self.conv1 = ConvBNLayer( + num_channels=in_channels, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) + + conv2_1 = DepthwiseSeparable( + num_channels=int(32 * scale), + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale) + self.block_list.append(conv2_1) + + conv2_2 = DepthwiseSeparable( + num_channels=int(64 * scale), + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=1, + scale=scale) + self.block_list.append(conv2_2) + + conv3_1 = DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale) + self.block_list.append(conv3_1) + + conv3_2 = DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=(2, 1), + scale=scale) + self.block_list.append(conv3_2) + + conv4_1 = DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale) + self.block_list.append(conv4_1) + + conv4_2 = DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=(2, 1), + scale=scale) + self.block_list.append(conv4_2) + + for _ in range(5): + conv5 = DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + dw_size=5, + padding=2, + scale=scale, + use_se=False) + self.block_list.append(conv5) + + conv5_6 = DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=(2, 1), + dw_size=5, + padding=2, + scale=scale, + use_se=True) + self.block_list.append(conv5_6) + + conv6 = DepthwiseSeparable( + num_channels=int(1024 * scale), + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=last_conv_stride, + dw_size=5, + padding=2, + use_se=True, + scale=scale) + self.block_list.append(conv6) + + self.block_list = nn.Sequential(*self.block_list) + if last_pool_type == 'avg': + self.pool = nn.AvgPool2d(kernel_size=2, stride=2, padding=0) + else: + self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) + self.out_channels = int(1024 * scale) + + def forward(self, inputs): + y = self.conv1(inputs) + y = self.block_list(y) + y = self.pool(y) + return y + +def hardsigmoid(x): + return F.relu6(x + 3., inplace=True) / 6. + +class SEModule(nn.Module): + def __init__(self, channel, reduction=4): + super(SEModule, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv1 = nn.Conv2d( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0, + bias=True) + self.conv2 = nn.Conv2d( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0, + bias=True) + + def forward(self, inputs): + outputs = self.avg_pool(inputs) + outputs = self.conv1(outputs) + outputs = F.relu(outputs) + outputs = self.conv2(outputs) + outputs = hardsigmoid(outputs) + x = torch.mul(inputs, outputs) + + return x diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py new file mode 100644 index 00000000..390ca4c6 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py @@ -0,0 +1,810 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class AdaptiveAvgPool2D(nn.AdaptiveAvgPool2d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + if isinstance(self.output_size, int) and self.output_size == 1: + self._gap = True + elif ( + isinstance(self.output_size, tuple) + and self.output_size[0] == 1 + and self.output_size[1] == 1 + ): + self._gap = True + else: + self._gap = False + + def forward(self, x): + if self._gap: + # Global Average Pooling + N, C, _, _ = x.shape + x_mean = torch.mean(x, dim=[2, 3]) + x_mean = torch.reshape(x_mean, [N, C, 1, 1]) + return x_mean + else: + return F.adaptive_avg_pool2d( + x, + output_size=self.output_size + ) + +class LearnableAffineBlock(nn.Module): + """ + Create a learnable affine block module. This module can significantly improve accuracy on smaller models. + + Args: + scale_value (float): The initial value of the scale parameter, default is 1.0. + bias_value (float): The initial value of the bias parameter, default is 0.0. + lr_mult (float): The learning rate multiplier, default is 1.0. + lab_lr (float): The learning rate, default is 0.01. + """ + + def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.01): + super().__init__() + self.scale = nn.Parameter(torch.Tensor([scale_value])) + self.bias = nn.Parameter(torch.Tensor([bias_value])) + + def forward(self, x): + return self.scale * x + self.bias + + +class ConvBNAct(nn.Module): + """ + ConvBNAct is a combination of convolution and batchnorm layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + kernel_size (int): Size of the convolution kernel. Defaults to 3. + stride (int): Stride of the convolution. Defaults to 1. + padding (int/str): Padding or padding type for the convolution. Defaults to 1. + groups (int): Number of groups for the convolution. Defaults to 1. + use_act: (bool): Whether to use activation function. Defaults to True. + use_lab (bool): Whether to use the LAB operation. Defaults to False. + lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + groups=1, + use_act=True, + use_lab=False, + lr_mult=1.0, + ): + super().__init__() + self.use_act = use_act + self.use_lab = use_lab + + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding=padding if isinstance(padding, str) else (kernel_size - 1) // 2, + # padding=(kernel_size - 1) // 2, + groups=groups, + bias=False, + ) + self.bn = nn.BatchNorm2d( + out_channels, + ) + if self.use_act: + self.act = nn.ReLU() + if self.use_lab: + self.lab = LearnableAffineBlock(lr_mult=lr_mult) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.use_act: + x = self.act(x) + if self.use_lab: + x = self.lab(x) + return x + + +class LightConvBNAct(nn.Module): + """ + LightConvBNAct is a combination of pw and dw layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + kernel_size (int): Size of the depth-wise convolution kernel. + use_lab (bool): Whether to use the LAB operation. Defaults to False. + lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + use_lab=False, + lr_mult=1.0, + **kwargs, + ): + super().__init__() + self.conv1 = ConvBNAct( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + use_act=False, + use_lab=use_lab, + lr_mult=lr_mult, + ) + self.conv2 = ConvBNAct( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=kernel_size, + groups=out_channels, + use_act=True, + use_lab=use_lab, + lr_mult=lr_mult, + ) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + return x + + +class CustomMaxPool2d(nn.Module): + def __init__( + self, + kernel_size, + stride=None, + padding=0, + dilation=1, + return_indices=False, + ceil_mode=False, + data_format="NCHW", + ): + super(CustomMaxPool2d, self).__init__() + self.kernel_size = kernel_size if isinstance(kernel_size, (tuple, list)) else (kernel_size, kernel_size) + self.stride = stride if stride is not None else self.kernel_size + self.stride = self.stride if isinstance(self.stride, (tuple, list)) else (self.stride, self.stride) + self.dilation = dilation if isinstance(dilation, (tuple, list)) else (dilation, dilation) + self.return_indices = return_indices + self.ceil_mode = ceil_mode + self.padding_mode = padding + + # 当padding不是"same"时使用标准MaxPool2d + if padding != "same": + self.padding = padding if isinstance(padding, (tuple, list)) else (padding, padding) + self.pool = nn.MaxPool2d( + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + return_indices=self.return_indices, + ceil_mode=self.ceil_mode + ) + + def forward(self, x): + # 处理same padding + if self.padding_mode == "same": + input_height, input_width = x.size(2), x.size(3) + + # 计算期望的输出尺寸 + out_height = math.ceil(input_height / self.stride[0]) + out_width = math.ceil(input_width / self.stride[1]) + + # 计算需要的padding + pad_height = max((out_height - 1) * self.stride[0] + self.kernel_size[0] - input_height, 0) + pad_width = max((out_width - 1) * self.stride[1] + self.kernel_size[1] - input_width, 0) + + # 将padding分配到两边 + pad_top = pad_height // 2 + pad_bottom = pad_height - pad_top + pad_left = pad_width // 2 + pad_right = pad_width - pad_left + + # 应用padding + x = F.pad(x, (pad_left, pad_right, pad_top, pad_bottom)) + + # 使用标准max_pool2d函数 + if self.return_indices: + return F.max_pool2d_with_indices( + x, + kernel_size=self.kernel_size, + stride=self.stride, + padding=0, # 已经手动pad过了 + dilation=self.dilation, + ceil_mode=self.ceil_mode + ) + else: + return F.max_pool2d( + x, + kernel_size=self.kernel_size, + stride=self.stride, + padding=0, # 已经手动pad过了 + dilation=self.dilation, + ceil_mode=self.ceil_mode + ) + else: + # 使用预定义的MaxPool2d + return self.pool(x) + +class StemBlock(nn.Module): + """ + StemBlock for PP-HGNetV2. + + Args: + in_channels (int): Number of input channels. + mid_channels (int): Number of middle channels. + out_channels (int): Number of output channels. + use_lab (bool): Whether to use the LAB operation. Defaults to False. + lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0. + """ + + def __init__( + self, + in_channels, + mid_channels, + out_channels, + use_lab=False, + lr_mult=1.0, + text_rec=False, + ): + super().__init__() + self.stem1 = ConvBNAct( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=3, + stride=2, + use_lab=use_lab, + lr_mult=lr_mult, + ) + self.stem2a = ConvBNAct( + in_channels=mid_channels, + out_channels=mid_channels // 2, + kernel_size=2, + stride=1, + padding="same", + use_lab=use_lab, + lr_mult=lr_mult, + ) + self.stem2b = ConvBNAct( + in_channels=mid_channels // 2, + out_channels=mid_channels, + kernel_size=2, + stride=1, + padding="same", + use_lab=use_lab, + lr_mult=lr_mult, + ) + self.stem3 = ConvBNAct( + in_channels=mid_channels * 2, + out_channels=mid_channels, + kernel_size=3, + stride=1 if text_rec else 2, + use_lab=use_lab, + lr_mult=lr_mult, + ) + self.stem4 = ConvBNAct( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + use_lab=use_lab, + lr_mult=lr_mult, + ) + self.pool = CustomMaxPool2d( + kernel_size=2, stride=1, ceil_mode=True, padding="same" + ) + # self.pool = nn.MaxPool2d( + # kernel_size=2, stride=1, ceil_mode=True, padding=1 + # ) + + def forward(self, x): + x = self.stem1(x) + x2 = self.stem2a(x) + x2 = self.stem2b(x2) + x1 = self.pool(x) + + # if x1.shape[2:] != x2.shape[2:]: + # x1 = F.interpolate(x1, size=x2.shape[2:], mode='bilinear', align_corners=False) + + x = torch.cat([x1, x2], 1) + x = self.stem3(x) + x = self.stem4(x) + + return x + + +class HGV2_Block(nn.Module): + """ + HGV2_Block, the basic unit that constitutes the HGV2_Stage. + + Args: + in_channels (int): Number of input channels. + mid_channels (int): Number of middle channels. + out_channels (int): Number of output channels. + kernel_size (int): Size of the convolution kernel. Defaults to 3. + layer_num (int): Number of layers in the HGV2 block. Defaults to 6. + stride (int): Stride of the convolution. Defaults to 1. + padding (int/str): Padding or padding type for the convolution. Defaults to 1. + groups (int): Number of groups for the convolution. Defaults to 1. + use_act (bool): Whether to use activation function. Defaults to True. + use_lab (bool): Whether to use the LAB operation. Defaults to False. + lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0. + """ + + def __init__( + self, + in_channels, + mid_channels, + out_channels, + kernel_size=3, + layer_num=6, + identity=False, + light_block=True, + use_lab=False, + lr_mult=1.0, + ): + super().__init__() + self.identity = identity + + self.layers = nn.ModuleList() + block_type = "LightConvBNAct" if light_block else "ConvBNAct" + for i in range(layer_num): + self.layers.append( + eval(block_type)( + in_channels=in_channels if i == 0 else mid_channels, + out_channels=mid_channels, + stride=1, + kernel_size=kernel_size, + use_lab=use_lab, + lr_mult=lr_mult, + ) + ) + # feature aggregation + total_channels = in_channels + layer_num * mid_channels + self.aggregation_squeeze_conv = ConvBNAct( + in_channels=total_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + use_lab=use_lab, + lr_mult=lr_mult, + ) + self.aggregation_excitation_conv = ConvBNAct( + in_channels=out_channels // 2, + out_channels=out_channels, + kernel_size=1, + stride=1, + use_lab=use_lab, + lr_mult=lr_mult, + ) + + def forward(self, x): + identity = x + output = [] + output.append(x) + for layer in self.layers: + x = layer(x) + output.append(x) + x = torch.cat(output, dim=1) + x = self.aggregation_squeeze_conv(x) + x = self.aggregation_excitation_conv(x) + if self.identity: + x += identity + return x + + +class HGV2_Stage(nn.Module): + """ + HGV2_Stage, the basic unit that constitutes the PPHGNetV2. + + Args: + in_channels (int): Number of input channels. + mid_channels (int): Number of middle channels. + out_channels (int): Number of output channels. + block_num (int): Number of blocks in the HGV2 stage. + layer_num (int): Number of layers in the HGV2 block. Defaults to 6. + is_downsample (bool): Whether to use downsampling operation. Defaults to False. + light_block (bool): Whether to use light block. Defaults to True. + kernel_size (int): Size of the convolution kernel. Defaults to 3. + use_lab (bool, optional): Whether to use the LAB operation. Defaults to False. + lr_mult (float, optional): Learning rate multiplier for the layer. Defaults to 1.0. + """ + + def __init__( + self, + in_channels, + mid_channels, + out_channels, + block_num, + layer_num=6, + is_downsample=True, + light_block=True, + kernel_size=3, + use_lab=False, + stride=2, + lr_mult=1.0, + ): + + super().__init__() + self.is_downsample = is_downsample + if self.is_downsample: + self.downsample = ConvBNAct( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=stride, + groups=in_channels, + use_act=False, + use_lab=use_lab, + lr_mult=lr_mult, + ) + + blocks_list = [] + for i in range(block_num): + blocks_list.append( + HGV2_Block( + in_channels=in_channels if i == 0 else out_channels, + mid_channels=mid_channels, + out_channels=out_channels, + kernel_size=kernel_size, + layer_num=layer_num, + identity=False if i == 0 else True, + light_block=light_block, + use_lab=use_lab, + lr_mult=lr_mult, + ) + ) + self.blocks = nn.Sequential(*blocks_list) + + def forward(self, x): + if self.is_downsample: + x = self.downsample(x) + x = self.blocks(x) + return x + + +class DropoutInferDownscale(nn.Module): + """ + 实现与Paddle的mode="downscale_in_infer"等效的Dropout + 训练模式:out = input * mask(直接应用掩码,不进行放大) + 推理模式:out = input * (1.0 - p)(在推理时按概率缩小) + """ + + def __init__(self, p=0.5): + super().__init__() + self.p = p + + def forward(self, x): + if self.training: + # 训练时:应用随机mask但不放大 + return F.dropout(x, self.p, training=True) * (1.0 - self.p) + else: + # 推理时:按照dropout概率缩小输出 + return x * (1.0 - self.p) + +class PPHGNetV2(nn.Module): + """ + PPHGNetV2 + + Args: + stage_config (dict): Config for PPHGNetV2 stages. such as the number of channels, stride, etc. + stem_channels: (list): Number of channels of the stem of the PPHGNetV2. + use_lab (bool): Whether to use the LAB operation. Defaults to False. + use_last_conv (bool): Whether to use the last conv layer as the output channel. Defaults to True. + class_expand (int): Number of channels for the last 1x1 convolutional layer. + drop_prob (float): Dropout probability for the last 1x1 convolutional layer. Defaults to 0.0. + class_num (int): The number of classes for the classification layer. Defaults to 1000. + lr_mult_list (list): Learning rate multiplier for the stages. Defaults to [1.0, 1.0, 1.0, 1.0, 1.0]. + Returns: + model: nn.Layer. Specific PPHGNetV2 model depends on args. + """ + + def __init__( + self, + stage_config, + stem_channels=[3, 32, 64], + use_lab=False, + use_last_conv=True, + class_expand=2048, + dropout_prob=0.0, + class_num=1000, + lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], + det=False, + text_rec=False, + out_indices=None, + **kwargs, + ): + super().__init__() + self.det = det + self.text_rec = text_rec + self.use_lab = use_lab + self.use_last_conv = use_last_conv + self.class_expand = class_expand + self.class_num = class_num + self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3] + self.out_channels = [] + + # stem + self.stem = StemBlock( + in_channels=stem_channels[0], + mid_channels=stem_channels[1], + out_channels=stem_channels[2], + use_lab=use_lab, + lr_mult=lr_mult_list[0], + text_rec=text_rec, + ) + + # stages + self.stages = nn.ModuleList() + for i, k in enumerate(stage_config): + ( + in_channels, + mid_channels, + out_channels, + block_num, + is_downsample, + light_block, + kernel_size, + layer_num, + stride, + ) = stage_config[k] + self.stages.append( + HGV2_Stage( + in_channels, + mid_channels, + out_channels, + block_num, + layer_num, + is_downsample, + light_block, + kernel_size, + use_lab, + stride, + lr_mult=lr_mult_list[i + 1], + ) + ) + if i in self.out_indices: + self.out_channels.append(out_channels) + if not self.det: + self.out_channels = stage_config["stage4"][2] + + self.avg_pool = AdaptiveAvgPool2D(1) + + if self.use_last_conv: + self.last_conv = nn.Conv2d( + in_channels=out_channels, + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias=False, + ) + self.act = nn.ReLU() + if self.use_lab: + self.lab = LearnableAffineBlock() + self.dropout = DropoutInferDownscale(p=dropout_prob) + + self.flatten = nn.Flatten(start_dim=1, end_dim=-1) + if not self.det: + self.fc = nn.Linear( + self.class_expand if self.use_last_conv else out_channels, + self.class_num, + ) + + self._init_weights() + + def _init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.stem(x) + out = [] + for i, stage in enumerate(self.stages): + x = stage(x) + if self.det and i in self.out_indices: + out.append(x) + if self.det: + return out + + if self.text_rec: + if self.training: + x = F.adaptive_avg_pool2d(x, [1, 40]) + else: + x = F.avg_pool2d(x, [3, 2]) + return x + + +def PPHGNetV2_B0(pretrained=False, use_ssld=False, **kwargs): + """ + PPHGNetV2_B0 + Args: + pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld (bool) Whether using ssld pretrained model when pretrained is True. + Returns: + model: nn.Layer. Specific `PPHGNetV2_B0` model depends on args. + """ + stage_config = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num + "stage1": [16, 16, 64, 1, False, False, 3, 3], + "stage2": [64, 32, 256, 1, True, False, 3, 3], + "stage3": [256, 64, 512, 2, True, True, 5, 3], + "stage4": [512, 128, 1024, 1, True, True, 5, 3], + } + + model = PPHGNetV2( + stem_channels=[3, 16, 16], stage_config=stage_config, use_lab=True, **kwargs + ) + return model + + +def PPHGNetV2_B1(pretrained=False, use_ssld=False, **kwargs): + """ + PPHGNetV2_B1 + Args: + pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld (bool) Whether using ssld pretrained model when pretrained is True. + Returns: + model: nn.Layer. Specific `PPHGNetV2_B1` model depends on args. + """ + stage_config = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num + "stage1": [32, 32, 64, 1, False, False, 3, 3], + "stage2": [64, 48, 256, 1, True, False, 3, 3], + "stage3": [256, 96, 512, 2, True, True, 5, 3], + "stage4": [512, 192, 1024, 1, True, True, 5, 3], + } + + model = PPHGNetV2( + stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs + ) + return model + + +def PPHGNetV2_B2(pretrained=False, use_ssld=False, **kwargs): + """ + PPHGNetV2_B2 + Args: + pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld (bool) Whether using ssld pretrained model when pretrained is True. + Returns: + model: nn.Layer. Specific `PPHGNetV2_B2` model depends on args. + """ + stage_config = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num + "stage1": [32, 32, 96, 1, False, False, 3, 4], + "stage2": [96, 64, 384, 1, True, False, 3, 4], + "stage3": [384, 128, 768, 3, True, True, 5, 4], + "stage4": [768, 256, 1536, 1, True, True, 5, 4], + } + + model = PPHGNetV2( + stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs + ) + return model + + +def PPHGNetV2_B3(pretrained=False, use_ssld=False, **kwargs): + """ + PPHGNetV2_B3 + Args: + pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld (bool) Whether using ssld pretrained model when pretrained is True. + Returns: + model: nn.Layer. Specific `PPHGNetV2_B3` model depends on args. + """ + stage_config = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num + "stage1": [32, 32, 128, 1, False, False, 3, 5], + "stage2": [128, 64, 512, 1, True, False, 3, 5], + "stage3": [512, 128, 1024, 3, True, True, 5, 5], + "stage4": [1024, 256, 2048, 1, True, True, 5, 5], + } + + model = PPHGNetV2( + stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs + ) + return model + + +def PPHGNetV2_B4(pretrained=False, use_ssld=False, det=False, text_rec=False, **kwargs): + """ + PPHGNetV2_B4 + Args: + pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld (bool) Whether using ssld pretrained model when pretrained is True. + Returns: + model: nn.Layer. Specific `PPHGNetV2_B4` model depends on args. + """ + stage_config_rec = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num, stride + "stage1": [48, 48, 128, 1, True, False, 3, 6, [2, 1]], + "stage2": [128, 96, 512, 1, True, False, 3, 6, [1, 2]], + "stage3": [512, 192, 1024, 3, True, True, 5, 6, [2, 1]], + "stage4": [1024, 384, 2048, 1, True, True, 5, 6, [2, 1]], + } + + stage_config_det = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num + "stage1": [48, 48, 128, 1, False, False, 3, 6, 2], + "stage2": [128, 96, 512, 1, True, False, 3, 6, 2], + "stage3": [512, 192, 1024, 3, True, True, 5, 6, 2], + "stage4": [1024, 384, 2048, 1, True, True, 5, 6, 2], + } + model = PPHGNetV2( + stem_channels=[3, 32, 48], + stage_config=stage_config_det if det else stage_config_rec, + use_lab=False, + det=det, + text_rec=text_rec, + **kwargs, + ) + return model + + +def PPHGNetV2_B5(pretrained=False, use_ssld=False, **kwargs): + """ + PPHGNetV2_B5 + Args: + pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld (bool) Whether using ssld pretrained model when pretrained is True. + Returns: + model: nn.Layer. Specific `PPHGNetV2_B5` model depends on args. + """ + stage_config = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num + "stage1": [64, 64, 128, 1, False, False, 3, 6], + "stage2": [128, 128, 512, 2, True, False, 3, 6], + "stage3": [512, 256, 1024, 5, True, True, 5, 6], + "stage4": [1024, 512, 2048, 2, True, True, 5, 6], + } + + model = PPHGNetV2( + stem_channels=[3, 32, 64], stage_config=stage_config, use_lab=False, **kwargs + ) + return model + + +def PPHGNetV2_B6(pretrained=False, use_ssld=False, **kwargs): + """ + PPHGNetV2_B6 + Args: + pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld (bool) Whether using ssld pretrained model when pretrained is True. + Returns: + model: nn.Layer. Specific `PPHGNetV2_B6` model depends on args. + """ + stage_config = { + # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num + "stage1": [96, 96, 192, 2, False, False, 3, 6], + "stage2": [192, 192, 512, 3, True, False, 3, 6], + "stage3": [512, 384, 1024, 6, True, True, 5, 6], + "stage4": [1024, 768, 2048, 3, True, True, 5, 6], + } + + model = PPHGNetV2( + stem_channels=[3, 48, 96], stage_config=stage_config, use_lab=False, **kwargs + ) + return model diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py new file mode 100644 index 00000000..3a117736 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py @@ -0,0 +1,638 @@ +import numpy as np +import torch +from torch import nn + +from ..common import Activation + + +def drop_path(x, drop_prob=0.0, training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... + """ + if drop_prob == 0.0 or not training: + return x + keep_prob = torch.as_tensor(1 - drop_prob) + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype) + random_tensor = torch.floor(random_tensor) # binarize + output = x.divide(keep_prob) * random_tensor + return output + + +class ConvBNLayer(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=0, + bias_attr=False, + groups=1, + act="gelu", + ): + super().__init__() + self.conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias=bias_attr, + ) + self.norm = nn.BatchNorm2d(out_channels) + self.act = Activation(act_type=act, inplace=True) + + def forward(self, inputs): + out = self.conv(inputs) + out = self.norm(out) + out = self.act(out) + return out + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class Identity(nn.Module): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +class Mlp(nn.Module): + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer="gelu", + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = Activation(act_type=act_layer, inplace=True) + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class ConvMixer(nn.Module): + def __init__( + self, + dim, + num_heads=8, + HW=[8, 25], + local_k=[3, 3], + ): + super().__init__() + self.HW = HW + self.dim = dim + self.local_mixer = nn.Conv2d( + dim, + dim, + local_k, + 1, + [local_k[0] // 2, local_k[1] // 2], + groups=num_heads, + ) + + def forward(self, x): + h = self.HW[0] + w = self.HW[1] + x = x.transpose([0, 2, 1]).reshape([0, self.dim, h, w]) + x = self.local_mixer(x) + x = x.flatten(2).permute(0, 2, 1) + return x + + +class Attention(nn.Module): + def __init__( + self, + dim, + num_heads=8, + mixer="Global", + HW=[8, 25], + local_k=[7, 11], + qkv_bias=False, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + self.HW = HW + if HW is not None: + H = HW[0] + W = HW[1] + self.N = H * W + self.C = dim + if mixer == "Local" and HW is not None: + hk = local_k[0] + wk = local_k[1] + mask = torch.ones(H * W, H + hk - 1, W + wk - 1, dtype=torch.float32) + for h in range(0, H): + for w in range(0, W): + mask[h * W + w, h : h + hk, w : w + wk] = 0.0 + mask_paddle = mask[:, hk // 2 : H + hk // 2, wk // 2 : W + wk // 2].flatten( + 1 + ) + mask_inf = torch.full( + [H * W, H * W], fill_value=float("-Inf"), dtype=torch.float32 + ) + mask = torch.where(mask_paddle < 1, mask_paddle, mask_inf) + self.mask = mask.unsqueeze(0).unsqueeze(1) + # self.mask = mask[None, None, :] + self.mixer = mixer + + def forward(self, x): + if self.HW is not None: + N = self.N + C = self.C + else: + _, N, C = x.shape + qkv = self.qkv(x) + qkv = qkv.reshape((-1, N, 3, self.num_heads, C // self.num_heads)).permute( + 2, 0, 3, 1, 4 + ) + q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] + + attn = q.matmul(k.permute(0, 1, 3, 2)) + if self.mixer == "Local": + attn += self.mask + attn = nn.functional.softmax(attn, dim=-1) + attn = self.attn_drop(attn) + + x = (attn.matmul(v)).permute(0, 2, 1, 3).reshape((-1, N, C)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Module): + def __init__( + self, + dim, + num_heads, + mixer="Global", + local_mixer=[7, 11], + HW=None, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer="gelu", + norm_layer="nn.LayerNorm", + epsilon=1e-6, + prenorm=True, + ): + super().__init__() + if isinstance(norm_layer, str): + self.norm1 = eval(norm_layer)(dim, eps=epsilon) + else: + self.norm1 = norm_layer(dim) + if mixer == "Global" or mixer == "Local": + self.mixer = Attention( + dim, + num_heads=num_heads, + mixer=mixer, + HW=HW, + local_k=local_mixer, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + elif mixer == "Conv": + self.mixer = ConvMixer(dim, num_heads=num_heads, HW=HW, local_k=local_mixer) + else: + raise TypeError("The mixer must be one of [Global, Local, Conv]") + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity() + if isinstance(norm_layer, str): + self.norm2 = eval(norm_layer)(dim, eps=epsilon) + else: + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp_ratio = mlp_ratio + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + self.prenorm = prenorm + + def forward(self, x): + if self.prenorm: + x = self.norm1(x + self.drop_path(self.mixer(x))) + x = self.norm2(x + self.drop_path(self.mlp(x))) + else: + x = x + self.drop_path(self.mixer(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding""" + + def __init__( + self, + img_size=[32, 100], + in_channels=3, + embed_dim=768, + sub_num=2, + patch_size=[4, 4], + mode="pope", + ): + super().__init__() + num_patches = (img_size[1] // (2**sub_num)) * (img_size[0] // (2**sub_num)) + self.img_size = img_size + self.num_patches = num_patches + self.embed_dim = embed_dim + self.norm = None + if mode == "pope": + if sub_num == 2: + self.proj = nn.Sequential( + ConvBNLayer( + in_channels=in_channels, + out_channels=embed_dim // 2, + kernel_size=3, + stride=2, + padding=1, + act="gelu", + bias_attr=True, + ), + ConvBNLayer( + in_channels=embed_dim // 2, + out_channels=embed_dim, + kernel_size=3, + stride=2, + padding=1, + act="gelu", + bias_attr=True, + ), + ) + if sub_num == 3: + self.proj = nn.Sequential( + ConvBNLayer( + in_channels=in_channels, + out_channels=embed_dim // 4, + kernel_size=3, + stride=2, + padding=1, + act="gelu", + bias_attr=True, + ), + ConvBNLayer( + in_channels=embed_dim // 4, + out_channels=embed_dim // 2, + kernel_size=3, + stride=2, + padding=1, + act="gelu", + bias_attr=True, + ), + ConvBNLayer( + in_channels=embed_dim // 2, + out_channels=embed_dim, + kernel_size=3, + stride=2, + padding=1, + act="gelu", + bias_attr=True, + ), + ) + elif mode == "linear": + self.proj = nn.Conv2d( + 1, embed_dim, kernel_size=patch_size, stride=patch_size + ) + self.num_patches = ( + img_size[0] // patch_size[0] * img_size[1] // patch_size[1] + ) + + def forward(self, x): + B, C, H, W = x.shape + assert ( + H == self.img_size[0] and W == self.img_size[1] + ), "Input image size ({}*{}) doesn't match model ({}*{}).".format( + H, W, self.img_size[0], self.img_size[1] + ) + x = self.proj(x).flatten(2).permute(0, 2, 1) + return x + + +class SubSample(nn.Module): + def __init__( + self, + in_channels, + out_channels, + types="Pool", + stride=[2, 1], + sub_norm="nn.LayerNorm", + act=None, + ): + super().__init__() + self.types = types + if types == "Pool": + self.avgpool = nn.AvgPool2d( + kernel_size=[3, 5], stride=stride, padding=[1, 2] + ) + self.maxpool = nn.MaxPool2d( + kernel_size=[3, 5], stride=stride, padding=[1, 2] + ) + self.proj = nn.Linear(in_channels, out_channels) + else: + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=stride, + padding=1, + ) + self.norm = eval(sub_norm)(out_channels) + if act is not None: + self.act = act() + else: + self.act = None + + def forward(self, x): + if self.types == "Pool": + x1 = self.avgpool(x) + x2 = self.maxpool(x) + x = (x1 + x2) * 0.5 + out = self.proj(x.flatten(2).permute(0, 2, 1)) + else: + x = self.conv(x) + out = x.flatten(2).permute(0, 2, 1) + out = self.norm(out) + if self.act is not None: + out = self.act(out) + + return out + + +class SVTRNet(nn.Module): + def __init__( + self, + img_size=[32, 100], + in_channels=3, + embed_dim=[64, 128, 256], + depth=[3, 6, 3], + num_heads=[2, 4, 8], + mixer=["Local"] * 6 + ["Global"] * 6, # Local atten, Global atten, Conv + local_mixer=[[7, 11], [7, 11], [7, 11]], + patch_merging="Conv", # Conv, Pool, None + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + last_drop=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_layer="nn.LayerNorm", + sub_norm="nn.LayerNorm", + epsilon=1e-6, + out_channels=192, + out_char_num=25, + block_unit="Block", + act="gelu", + last_stage=True, + sub_num=2, + prenorm=True, + use_lenhead=False, + **kwargs + ): + super().__init__() + self.img_size = img_size + self.embed_dim = embed_dim + self.out_channels = out_channels + self.prenorm = prenorm + patch_merging = ( + None + if patch_merging != "Conv" and patch_merging != "Pool" + else patch_merging + ) + self.patch_embed = PatchEmbed( + img_size=img_size, + in_channels=in_channels, + embed_dim=embed_dim[0], + sub_num=sub_num, + ) + num_patches = self.patch_embed.num_patches + self.HW = [img_size[0] // (2**sub_num), img_size[1] // (2**sub_num)] + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim[0])) + self.pos_drop = nn.Dropout(p=drop_rate) + Block_unit = eval(block_unit) + + dpr = np.linspace(0, drop_path_rate, sum(depth)) + self.blocks1 = nn.ModuleList( + [ + Block_unit( + dim=embed_dim[0], + num_heads=num_heads[0], + mixer=mixer[0 : depth[0]][i], + HW=self.HW, + local_mixer=local_mixer[0], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer=act, + attn_drop=attn_drop_rate, + drop_path=dpr[0 : depth[0]][i], + norm_layer=norm_layer, + epsilon=epsilon, + prenorm=prenorm, + ) + for i in range(depth[0]) + ] + ) + if patch_merging is not None: + self.sub_sample1 = SubSample( + embed_dim[0], + embed_dim[1], + sub_norm=sub_norm, + stride=[2, 1], + types=patch_merging, + ) + HW = [self.HW[0] // 2, self.HW[1]] + else: + HW = self.HW + self.patch_merging = patch_merging + self.blocks2 = nn.ModuleList( + [ + Block_unit( + dim=embed_dim[1], + num_heads=num_heads[1], + mixer=mixer[depth[0] : depth[0] + depth[1]][i], + HW=HW, + local_mixer=local_mixer[1], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer=act, + attn_drop=attn_drop_rate, + drop_path=dpr[depth[0] : depth[0] + depth[1]][i], + norm_layer=norm_layer, + epsilon=epsilon, + prenorm=prenorm, + ) + for i in range(depth[1]) + ] + ) + if patch_merging is not None: + self.sub_sample2 = SubSample( + embed_dim[1], + embed_dim[2], + sub_norm=sub_norm, + stride=[2, 1], + types=patch_merging, + ) + HW = [self.HW[0] // 4, self.HW[1]] + else: + HW = self.HW + self.blocks3 = nn.ModuleList( + [ + Block_unit( + dim=embed_dim[2], + num_heads=num_heads[2], + mixer=mixer[depth[0] + depth[1] :][i], + HW=HW, + local_mixer=local_mixer[2], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer=act, + attn_drop=attn_drop_rate, + drop_path=dpr[depth[0] + depth[1] :][i], + norm_layer=norm_layer, + epsilon=epsilon, + prenorm=prenorm, + ) + for i in range(depth[2]) + ] + ) + self.last_stage = last_stage + if last_stage: + self.avg_pool = nn.AdaptiveAvgPool2d([1, out_char_num]) + self.last_conv = nn.Conv2d( + in_channels=embed_dim[2], + out_channels=self.out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False, + ) + self.hardswish = Activation("hard_swish", inplace=True) # nn.Hardswish() + # self.dropout = nn.Dropout(p=last_drop, mode="downscale_in_infer") + self.dropout = nn.Dropout(p=last_drop) + if not prenorm: + self.norm = eval(norm_layer)(embed_dim[-1], eps=epsilon) + self.use_lenhead = use_lenhead + if use_lenhead: + self.len_conv = nn.Linear(embed_dim[2], self.out_channels) + self.hardswish_len = Activation( + "hard_swish", inplace=True + ) # nn.Hardswish() + self.dropout_len = nn.Dropout(p=last_drop) + + torch.nn.init.xavier_normal_(self.pos_embed) + self.apply(self._init_weights) + + def _init_weights(self, m): + # weight initialization + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.ConvTranspose2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + def forward_features(self, x): + x = self.patch_embed(x) + x = x + self.pos_embed + x = self.pos_drop(x) + for blk in self.blocks1: + x = blk(x) + if self.patch_merging is not None: + x = self.sub_sample1( + x.permute(0, 2, 1).reshape( + [-1, self.embed_dim[0], self.HW[0], self.HW[1]] + ) + ) + for blk in self.blocks2: + x = blk(x) + if self.patch_merging is not None: + x = self.sub_sample2( + x.permute(0, 2, 1).reshape( + [-1, self.embed_dim[1], self.HW[0] // 2, self.HW[1]] + ) + ) + for blk in self.blocks3: + x = blk(x) + if not self.prenorm: + x = self.norm(x) + return x + + def forward(self, x): + x = self.forward_features(x) + if self.use_lenhead: + len_x = self.len_conv(x.mean(1)) + len_x = self.dropout_len(self.hardswish_len(len_x)) + if self.last_stage: + if self.patch_merging is not None: + h = self.HW[0] // 4 + else: + h = self.HW[0] + x = self.avg_pool( + x.permute(0, 2, 1).reshape([-1, self.embed_dim[2], h, self.HW[1]]) + ) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + if self.use_lenhead: + return x, len_x + return x diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py new file mode 100644 index 00000000..ec1b30cc --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py @@ -0,0 +1,76 @@ +import torch +import torch.nn.functional as F +from torch import nn + + +class Hswish(nn.Module): + def __init__(self, inplace=True): + super(Hswish, self).__init__() + self.inplace = inplace + + def forward(self, x): + return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0 + + +# out = max(0, min(1, slop*x+offset)) +# paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None) +class Hsigmoid(nn.Module): + def __init__(self, inplace=True): + super(Hsigmoid, self).__init__() + self.inplace = inplace + + def forward(self, x): + # torch: F.relu6(x + 3., inplace=self.inplace) / 6. + # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6. + return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0 + + +class GELU(nn.Module): + def __init__(self, inplace=True): + super(GELU, self).__init__() + self.inplace = inplace + + def forward(self, x): + return torch.nn.functional.gelu(x) + + +class Swish(nn.Module): + def __init__(self, inplace=True): + super(Swish, self).__init__() + self.inplace = inplace + + def forward(self, x): + if self.inplace: + x.mul_(torch.sigmoid(x)) + return x + else: + return x * torch.sigmoid(x) + + +class Activation(nn.Module): + def __init__(self, act_type, inplace=True): + super(Activation, self).__init__() + act_type = act_type.lower() + if act_type == "relu": + self.act = nn.ReLU(inplace=inplace) + elif act_type == "relu6": + self.act = nn.ReLU6(inplace=inplace) + elif act_type == "sigmoid": + raise NotImplementedError + elif act_type == "hard_sigmoid": + self.act = Hsigmoid( + inplace + ) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)# + elif act_type == "hard_swish" or act_type == "hswish": + self.act = Hswish(inplace=inplace) + elif act_type == "leakyrelu": + self.act = nn.LeakyReLU(inplace=inplace) + elif act_type == "gelu": + self.act = GELU(inplace=inplace) + elif act_type == "swish": + self.act = Swish(inplace=inplace) + else: + raise NotImplementedError + + def forward(self, inputs): + return self.act(inputs) diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py new file mode 100644 index 00000000..00428c43 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py @@ -0,0 +1,43 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["build_head"] + + +def build_head(config, **kwargs): + # det head + from .det_db_head import DBHead, PFHeadLocal + + # rec head + from .rec_ctc_head import CTCHead + from .rec_multi_head import MultiHead + + # cls head + from .cls_head import ClsHead + + support_dict = [ + "DBHead", + "CTCHead", + "ClsHead", + "MultiHead", + "PFHeadLocal", + ] + + module_name = config.pop("name") + char_num = config.pop("char_num", 6625) + assert module_name in support_dict, Exception( + "head only support {}".format(support_dict) + ) + module_class = eval(module_name)(**config, **kwargs) + return module_class diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py new file mode 100644 index 00000000..9353b9eb --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py @@ -0,0 +1,23 @@ +import torch +import torch.nn.functional as F +from torch import nn + + +class ClsHead(nn.Module): + """ + Class orientation + Args: + params(dict): super parameters for build Class network + """ + + def __init__(self, in_channels, class_dim, **kwargs): + super(ClsHead, self).__init__() + self.pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Linear(in_channels, class_dim, bias=True) + + def forward(self, x): + x = self.pool(x) + x = torch.reshape(x, shape=[x.shape[0], x.shape[1]]) + x = self.fc(x) + x = F.softmax(x, dim=1) + return x diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py new file mode 100644 index 00000000..7c119683 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py @@ -0,0 +1,109 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from ..common import Activation +from ..backbones.det_mobilenet_v3 import ConvBNLayer + +class Head(nn.Module): + def __init__(self, in_channels, **kwargs): + super(Head, self).__init__() + self.conv1 = nn.Conv2d( + in_channels=in_channels, + out_channels=in_channels // 4, + kernel_size=3, + padding=1, + bias=False) + self.conv_bn1 = nn.BatchNorm2d( + in_channels // 4) + self.relu1 = Activation(act_type='relu') + + self.conv2 = nn.ConvTranspose2d( + in_channels=in_channels // 4, + out_channels=in_channels // 4, + kernel_size=2, + stride=2) + self.conv_bn2 = nn.BatchNorm2d( + in_channels // 4) + self.relu2 = Activation(act_type='relu') + + self.conv3 = nn.ConvTranspose2d( + in_channels=in_channels // 4, + out_channels=1, + kernel_size=2, + stride=2) + + def forward(self, x, return_f=False): + x = self.conv1(x) + x = self.conv_bn1(x) + x = self.relu1(x) + x = self.conv2(x) + x = self.conv_bn2(x) + x = self.relu2(x) + if return_f is True: + f = x + x = self.conv3(x) + x = torch.sigmoid(x) + if return_f is True: + return x, f + return x + + +class DBHead(nn.Module): + """ + Differentiable Binarization (DB) for text detection: + see https://arxiv.org/abs/1911.08947 + args: + params(dict): super parameters for build DB network + """ + + def __init__(self, in_channels, k=50, **kwargs): + super(DBHead, self).__init__() + self.k = k + binarize_name_list = [ + 'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48', + 'conv2d_transpose_1', 'binarize' + ] + thresh_name_list = [ + 'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50', + 'conv2d_transpose_3', 'thresh' + ] + self.binarize = Head(in_channels, **kwargs)# binarize_name_list) + self.thresh = Head(in_channels, **kwargs)#thresh_name_list) + + def step_function(self, x, y): + return torch.reciprocal(1 + torch.exp(-self.k * (x - y))) + + def forward(self, x): + shrink_maps = self.binarize(x) + return {'maps': shrink_maps} + + +class LocalModule(nn.Module): + def __init__(self, in_c, mid_c, use_distance=True): + super(self.__class__, self).__init__() + self.last_3 = ConvBNLayer(in_c + 1, mid_c, 3, 1, 1, act='relu') + self.last_1 = nn.Conv2d(mid_c, 1, 1, 1, 0) + + def forward(self, x, init_map, distance_map): + outf = torch.cat([init_map, x], dim=1) + # last Conv + out = self.last_1(self.last_3(outf)) + return out + +class PFHeadLocal(DBHead): + def __init__(self, in_channels, k=50, mode='small', **kwargs): + super(PFHeadLocal, self).__init__(in_channels, k, **kwargs) + self.mode = mode + + self.up_conv = nn.Upsample(scale_factor=2, mode="nearest") + if self.mode == 'large': + self.cbn_layer = LocalModule(in_channels // 4, in_channels // 4) + elif self.mode == 'small': + self.cbn_layer = LocalModule(in_channels // 4, in_channels // 8) + + def forward(self, x, targets=None): + shrink_maps, f = self.binarize(x, return_f=True) + base_maps = shrink_maps + cbn_maps = self.cbn_layer(self.up_conv(f), shrink_maps, None) + cbn_maps = F.sigmoid(cbn_maps) + return {'maps': 0.5 * (base_maps + cbn_maps), 'cbn_maps': cbn_maps} \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py new file mode 100644 index 00000000..42e2fabb --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py @@ -0,0 +1,54 @@ +import torch.nn.functional as F +from torch import nn + + +class CTCHead(nn.Module): + def __init__( + self, + in_channels, + out_channels=6625, + fc_decay=0.0004, + mid_channels=None, + return_feats=False, + **kwargs + ): + super(CTCHead, self).__init__() + if mid_channels is None: + self.fc = nn.Linear( + in_channels, + out_channels, + bias=True, + ) + else: + self.fc1 = nn.Linear( + in_channels, + mid_channels, + bias=True, + ) + self.fc2 = nn.Linear( + mid_channels, + out_channels, + bias=True, + ) + + self.out_channels = out_channels + self.mid_channels = mid_channels + self.return_feats = return_feats + + def forward(self, x, labels=None): + if self.mid_channels is None: + predicts = self.fc(x) + else: + x = self.fc1(x) + predicts = self.fc2(x) + + if self.return_feats: + result = (x, predicts) + else: + result = predicts + + if not self.training: + predicts = F.softmax(predicts, dim=2) + result = predicts + + return result diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py new file mode 100644 index 00000000..a4807cbb --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py @@ -0,0 +1,58 @@ +from torch import nn + +from ..necks.rnn import Im2Seq, SequenceEncoder +from .rec_ctc_head import CTCHead + + +class FCTranspose(nn.Module): + def __init__(self, in_channels, out_channels, only_transpose=False): + super().__init__() + self.only_transpose = only_transpose + if not self.only_transpose: + self.fc = nn.Linear(in_channels, out_channels, bias=False) + + def forward(self, x): + if self.only_transpose: + return x.permute([0, 2, 1]) + else: + return self.fc(x.permute([0, 2, 1])) + + +class MultiHead(nn.Module): + def __init__(self, in_channels, out_channels_list, **kwargs): + super().__init__() + self.head_list = kwargs.pop("head_list") + + self.gtc_head = "sar" + assert len(self.head_list) >= 2 + for idx, head_name in enumerate(self.head_list): + name = list(head_name)[0] + if name == "SARHead": + pass + + elif name == "NRTRHead": + pass + elif name == "CTCHead": + # ctc neck + self.encoder_reshape = Im2Seq(in_channels) + neck_args = self.head_list[idx][name]["Neck"] + encoder_type = neck_args.pop("name") + self.ctc_encoder = SequenceEncoder( + in_channels=in_channels, encoder_type=encoder_type, **neck_args + ) + # ctc head + head_args = self.head_list[idx][name].get("Head", {}) + if head_args is None: + head_args = {} + + self.ctc_head = CTCHead( + in_channels=self.ctc_encoder.out_channels, + out_channels=out_channels_list["CTCLabelDecode"], + **head_args, + ) + else: + raise NotImplementedError(f"{name} is not supported in MultiHead yet") + + def forward(self, x, data=None): + ctc_encoder = self.ctc_encoder(x) + return self.ctc_head(ctc_encoder) diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py new file mode 100644 index 00000000..bbe85bc6 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["build_neck"] + + +def build_neck(config): + from .db_fpn import DBFPN, LKPAN, RSEFPN + from .rnn import SequenceEncoder + + support_dict = ["DBFPN", "SequenceEncoder", "RSEFPN", "LKPAN"] + + module_name = config.pop("name") + assert module_name in support_dict, Exception( + "neck only support {}".format(support_dict) + ) + module_class = eval(module_name)(**config) + return module_class diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py new file mode 100644 index 00000000..9c8460a2 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py @@ -0,0 +1,456 @@ +import torch +import torch.nn.functional as F +from torch import nn + +from ..backbones.det_mobilenet_v3 import SEModule +from ..necks.intracl import IntraCLBlock + + +def hard_swish(x, inplace=True): + return x * F.relu6(x + 3.0, inplace=inplace) / 6.0 + + +class DSConv(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + padding, + stride=1, + groups=None, + if_act=True, + act="relu", + **kwargs + ): + super(DSConv, self).__init__() + if groups == None: + groups = in_channels + self.if_act = if_act + self.act = act + self.conv1 = nn.Conv2d( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias=False, + ) + + self.bn1 = nn.BatchNorm2d(in_channels) + + self.conv2 = nn.Conv2d( + in_channels=in_channels, + out_channels=int(in_channels * 4), + kernel_size=1, + stride=1, + bias=False, + ) + + self.bn2 = nn.BatchNorm2d(int(in_channels * 4)) + + self.conv3 = nn.Conv2d( + in_channels=int(in_channels * 4), + out_channels=out_channels, + kernel_size=1, + stride=1, + bias=False, + ) + self._c = [in_channels, out_channels] + if in_channels != out_channels: + self.conv_end = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + bias=False, + ) + + def forward(self, inputs): + x = self.conv1(inputs) + x = self.bn1(x) + + x = self.conv2(x) + x = self.bn2(x) + if self.if_act: + if self.act == "relu": + x = F.relu(x) + elif self.act == "hardswish": + x = hard_swish(x) + else: + print( + "The activation function({}) is selected incorrectly.".format( + self.act + ) + ) + exit() + + x = self.conv3(x) + if self._c[0] != self._c[1]: + x = x + self.conv_end(inputs) + return x + + +class DBFPN(nn.Module): + def __init__(self, in_channels, out_channels, use_asf=False, **kwargs): + super(DBFPN, self).__init__() + self.out_channels = out_channels + self.use_asf = use_asf + + self.in2_conv = nn.Conv2d( + in_channels=in_channels[0], + out_channels=self.out_channels, + kernel_size=1, + bias=False, + ) + self.in3_conv = nn.Conv2d( + in_channels=in_channels[1], + out_channels=self.out_channels, + kernel_size=1, + bias=False, + ) + self.in4_conv = nn.Conv2d( + in_channels=in_channels[2], + out_channels=self.out_channels, + kernel_size=1, + bias=False, + ) + self.in5_conv = nn.Conv2d( + in_channels=in_channels[3], + out_channels=self.out_channels, + kernel_size=1, + bias=False, + ) + self.p5_conv = nn.Conv2d( + in_channels=self.out_channels, + out_channels=self.out_channels // 4, + kernel_size=3, + padding=1, + bias=False, + ) + self.p4_conv = nn.Conv2d( + in_channels=self.out_channels, + out_channels=self.out_channels // 4, + kernel_size=3, + padding=1, + bias=False, + ) + self.p3_conv = nn.Conv2d( + in_channels=self.out_channels, + out_channels=self.out_channels // 4, + kernel_size=3, + padding=1, + bias=False, + ) + self.p2_conv = nn.Conv2d( + in_channels=self.out_channels, + out_channels=self.out_channels // 4, + kernel_size=3, + padding=1, + bias=False, + ) + + if self.use_asf is True: + self.asf = ASFBlock(self.out_channels, self.out_channels // 4) + + def forward(self, x): + c2, c3, c4, c5 = x + + in5 = self.in5_conv(c5) + in4 = self.in4_conv(c4) + in3 = self.in3_conv(c3) + in2 = self.in2_conv(c2) + + out4 = in4 + F.interpolate( + in5, + scale_factor=2, + mode="nearest", + ) # align_mode=1) # 1/16 + out3 = in3 + F.interpolate( + out4, + scale_factor=2, + mode="nearest", + ) # align_mode=1) # 1/8 + out2 = in2 + F.interpolate( + out3, + scale_factor=2, + mode="nearest", + ) # align_mode=1) # 1/4 + + p5 = self.p5_conv(in5) + p4 = self.p4_conv(out4) + p3 = self.p3_conv(out3) + p2 = self.p2_conv(out2) + p5 = F.interpolate( + p5, + scale_factor=8, + mode="nearest", + ) # align_mode=1) + p4 = F.interpolate( + p4, + scale_factor=4, + mode="nearest", + ) # align_mode=1) + p3 = F.interpolate( + p3, + scale_factor=2, + mode="nearest", + ) # align_mode=1) + + fuse = torch.cat([p5, p4, p3, p2], dim=1) + + if self.use_asf is True: + fuse = self.asf(fuse, [p5, p4, p3, p2]) + + return fuse + + +class RSELayer(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, shortcut=True): + super(RSELayer, self).__init__() + self.out_channels = out_channels + self.in_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=self.out_channels, + kernel_size=kernel_size, + padding=int(kernel_size // 2), + bias=False, + ) + self.se_block = SEModule(self.out_channels) + self.shortcut = shortcut + + def forward(self, ins): + x = self.in_conv(ins) + if self.shortcut: + out = x + self.se_block(x) + else: + out = self.se_block(x) + return out + + +class RSEFPN(nn.Module): + def __init__(self, in_channels, out_channels, shortcut=True, **kwargs): + super(RSEFPN, self).__init__() + self.out_channels = out_channels + self.ins_conv = nn.ModuleList() + self.inp_conv = nn.ModuleList() + self.intracl = False + if "intracl" in kwargs.keys() and kwargs["intracl"] is True: + self.intracl = kwargs["intracl"] + self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + + for i in range(len(in_channels)): + self.ins_conv.append( + RSELayer(in_channels[i], out_channels, kernel_size=1, shortcut=shortcut) + ) + self.inp_conv.append( + RSELayer( + out_channels, out_channels // 4, kernel_size=3, shortcut=shortcut + ) + ) + + def forward(self, x): + c2, c3, c4, c5 = x + + in5 = self.ins_conv[3](c5) + in4 = self.ins_conv[2](c4) + in3 = self.ins_conv[1](c3) + in2 = self.ins_conv[0](c2) + + out4 = in4 + F.interpolate(in5, scale_factor=2, mode="nearest") # 1/16 + out3 = in3 + F.interpolate(out4, scale_factor=2, mode="nearest") # 1/8 + out2 = in2 + F.interpolate(out3, scale_factor=2, mode="nearest") # 1/4 + + p5 = self.inp_conv[3](in5) + p4 = self.inp_conv[2](out4) + p3 = self.inp_conv[1](out3) + p2 = self.inp_conv[0](out2) + + if self.intracl is True: + p5 = self.incl4(p5) + p4 = self.incl3(p4) + p3 = self.incl2(p3) + p2 = self.incl1(p2) + + p5 = F.interpolate(p5, scale_factor=8, mode="nearest") + p4 = F.interpolate(p4, scale_factor=4, mode="nearest") + p3 = F.interpolate(p3, scale_factor=2, mode="nearest") + + fuse = torch.cat([p5, p4, p3, p2], dim=1) + return fuse + + +class LKPAN(nn.Module): + def __init__(self, in_channels, out_channels, mode="large", **kwargs): + super(LKPAN, self).__init__() + self.out_channels = out_channels + + self.ins_conv = nn.ModuleList() + self.inp_conv = nn.ModuleList() + # pan head + self.pan_head_conv = nn.ModuleList() + self.pan_lat_conv = nn.ModuleList() + + if mode.lower() == "lite": + p_layer = DSConv + elif mode.lower() == "large": + p_layer = nn.Conv2d + else: + raise ValueError( + "mode can only be one of ['lite', 'large'], but received {}".format( + mode + ) + ) + + for i in range(len(in_channels)): + self.ins_conv.append( + nn.Conv2d( + in_channels=in_channels[i], + out_channels=self.out_channels, + kernel_size=1, + bias=False, + ) + ) + + self.inp_conv.append( + p_layer( + in_channels=self.out_channels, + out_channels=self.out_channels // 4, + kernel_size=9, + padding=4, + bias=False, + ) + ) + + if i > 0: + self.pan_head_conv.append( + nn.Conv2d( + in_channels=self.out_channels // 4, + out_channels=self.out_channels // 4, + kernel_size=3, + padding=1, + stride=2, + bias=False, + ) + ) + self.pan_lat_conv.append( + p_layer( + in_channels=self.out_channels // 4, + out_channels=self.out_channels // 4, + kernel_size=9, + padding=4, + bias=False, + ) + ) + self.intracl = False + if "intracl" in kwargs.keys() and kwargs["intracl"] is True: + self.intracl = kwargs["intracl"] + self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2) + + def forward(self, x): + c2, c3, c4, c5 = x + + in5 = self.ins_conv[3](c5) + in4 = self.ins_conv[2](c4) + in3 = self.ins_conv[1](c3) + in2 = self.ins_conv[0](c2) + + out4 = in4 + F.interpolate(in5, scale_factor=2, mode="nearest") # 1/16 + out3 = in3 + F.interpolate(out4, scale_factor=2, mode="nearest") # 1/8 + out2 = in2 + F.interpolate(out3, scale_factor=2, mode="nearest") # 1/4 + + f5 = self.inp_conv[3](in5) + f4 = self.inp_conv[2](out4) + f3 = self.inp_conv[1](out3) + f2 = self.inp_conv[0](out2) + + pan3 = f3 + self.pan_head_conv[0](f2) + pan4 = f4 + self.pan_head_conv[1](pan3) + pan5 = f5 + self.pan_head_conv[2](pan4) + + p2 = self.pan_lat_conv[0](f2) + p3 = self.pan_lat_conv[1](pan3) + p4 = self.pan_lat_conv[2](pan4) + p5 = self.pan_lat_conv[3](pan5) + + if self.intracl is True: + p5 = self.incl4(p5) + p4 = self.incl3(p4) + p3 = self.incl2(p3) + p2 = self.incl1(p2) + + p5 = F.interpolate(p5, scale_factor=8, mode="nearest") + p4 = F.interpolate(p4, scale_factor=4, mode="nearest") + p3 = F.interpolate(p3, scale_factor=2, mode="nearest") + + fuse = torch.cat([p5, p4, p3, p2], dim=1) + return fuse + + +class ASFBlock(nn.Module): + """ + This code is refered from: + https://github.com/MhLiao/DB/blob/master/decoders/feature_attention.py + """ + + def __init__(self, in_channels, inter_channels, out_features_num=4): + """ + Adaptive Scale Fusion (ASF) block of DBNet++ + Args: + in_channels: the number of channels in the input data + inter_channels: the number of middle channels + out_features_num: the number of fused stages + """ + super(ASFBlock, self).__init__() + self.in_channels = in_channels + self.inter_channels = inter_channels + self.out_features_num = out_features_num + self.conv = nn.Conv2d(in_channels, inter_channels, 3, padding=1) + + self.spatial_scale = nn.Sequential( + # Nx1xHxW + nn.Conv2d( + in_channels=1, + out_channels=1, + kernel_size=3, + bias=False, + padding=1, + ), + nn.ReLU(), + nn.Conv2d( + in_channels=1, + out_channels=1, + kernel_size=1, + bias=False, + ), + nn.Sigmoid(), + ) + + self.channel_scale = nn.Sequential( + nn.Conv2d( + in_channels=inter_channels, + out_channels=out_features_num, + kernel_size=1, + bias=False, + ), + nn.Sigmoid(), + ) + + def forward(self, fuse_features, features_list): + fuse_features = self.conv(fuse_features) + spatial_x = torch.mean(fuse_features, dim=1, keepdim=True) + attention_scores = self.spatial_scale(spatial_x) + fuse_features + attention_scores = self.channel_scale(attention_scores) + assert len(features_list) == self.out_features_num + + out_list = [] + for i in range(self.out_features_num): + out_list.append(attention_scores[:, i : i + 1] * features_list[i]) + return torch.cat(out_list, dim=1) diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py new file mode 100644 index 00000000..0ba85fa8 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py @@ -0,0 +1,117 @@ +from torch import nn + + +class IntraCLBlock(nn.Module): + def __init__(self, in_channels=96, reduce_factor=4): + super(IntraCLBlock, self).__init__() + self.channels = in_channels + self.rf = reduce_factor + self.conv1x1_reduce_channel = nn.Conv2d( + self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0 + ) + self.conv1x1_return_channel = nn.Conv2d( + self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0 + ) + + self.v_layer_7x1 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(7, 1), + stride=(1, 1), + padding=(3, 0), + ) + self.v_layer_5x1 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(5, 1), + stride=(1, 1), + padding=(2, 0), + ) + self.v_layer_3x1 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(3, 1), + stride=(1, 1), + padding=(1, 0), + ) + + self.q_layer_1x7 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(1, 7), + stride=(1, 1), + padding=(0, 3), + ) + self.q_layer_1x5 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(1, 5), + stride=(1, 1), + padding=(0, 2), + ) + self.q_layer_1x3 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(1, 3), + stride=(1, 1), + padding=(0, 1), + ) + + # base + self.c_layer_7x7 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(7, 7), + stride=(1, 1), + padding=(3, 3), + ) + self.c_layer_5x5 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(5, 5), + stride=(1, 1), + padding=(2, 2), + ) + self.c_layer_3x3 = nn.Conv2d( + self.channels // self.rf, + self.channels // self.rf, + kernel_size=(3, 3), + stride=(1, 1), + padding=(1, 1), + ) + + self.bn = nn.BatchNorm2d(self.channels) + self.relu = nn.ReLU() + + def forward(self, x): + x_new = self.conv1x1_reduce_channel(x) + + x_7_c = self.c_layer_7x7(x_new) + x_7_v = self.v_layer_7x1(x_new) + x_7_q = self.q_layer_1x7(x_new) + x_7 = x_7_c + x_7_v + x_7_q + + x_5_c = self.c_layer_5x5(x_7) + x_5_v = self.v_layer_5x1(x_7) + x_5_q = self.q_layer_1x5(x_7) + x_5 = x_5_c + x_5_v + x_5_q + + x_3_c = self.c_layer_3x3(x_5) + x_3_v = self.v_layer_3x1(x_5) + x_3_q = self.q_layer_1x3(x_5) + x_3 = x_3_c + x_3_v + x_3_q + + x_relation = self.conv1x1_return_channel(x_3) + + x_relation = self.bn(x_relation) + x_relation = self.relu(x_relation) + + return x + x_relation + + +def build_intraclblock_list(num_block): + IntraCLBlock_list = nn.ModuleList() + for i in range(num_block): + IntraCLBlock_list.append(IntraCLBlock()) + + return IntraCLBlock_list diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py new file mode 100644 index 00000000..79c8af30 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py @@ -0,0 +1,241 @@ +import torch +from torch import nn + +from ..backbones.rec_svtrnet import Block, ConvBNLayer + + +class Im2Seq(nn.Module): + def __init__(self, in_channels, **kwargs): + super().__init__() + self.out_channels = in_channels + + # def forward(self, x): + # B, C, H, W = x.shape + # # assert H == 1 + # x = x.squeeze(dim=2) + # # x = x.transpose([0, 2, 1]) # paddle (NTC)(batch, width, channels) + # x = x.permute(0, 2, 1) + # return x + + def forward(self, x): + B, C, H, W = x.shape + # 处理四维张量,将空间维度展平为序列 + if H == 1: + # 原来的处理逻辑,适用于H=1的情况 + x = x.squeeze(dim=2) + x = x.permute(0, 2, 1) # (B, W, C) + else: + # 处理H不为1的情况 + x = x.permute(0, 2, 3, 1) # (B, H, W, C) + x = x.reshape(B, H * W, C) # (B, H*W, C) + + return x + +class EncoderWithRNN_(nn.Module): + def __init__(self, in_channels, hidden_size): + super(EncoderWithRNN_, self).__init__() + self.out_channels = hidden_size * 2 + self.rnn1 = nn.LSTM( + in_channels, + hidden_size, + bidirectional=False, + batch_first=True, + num_layers=2, + ) + self.rnn2 = nn.LSTM( + in_channels, + hidden_size, + bidirectional=False, + batch_first=True, + num_layers=2, + ) + + def forward(self, x): + self.rnn1.flatten_parameters() + self.rnn2.flatten_parameters() + out1, h1 = self.rnn1(x) + out2, h2 = self.rnn2(torch.flip(x, [1])) + return torch.cat([out1, torch.flip(out2, [1])], 2) + + +class EncoderWithRNN(nn.Module): + def __init__(self, in_channels, hidden_size): + super(EncoderWithRNN, self).__init__() + self.out_channels = hidden_size * 2 + self.lstm = nn.LSTM( + in_channels, hidden_size, num_layers=2, batch_first=True, bidirectional=True + ) # batch_first:=True + + def forward(self, x): + x, _ = self.lstm(x) + return x + + +class EncoderWithFC(nn.Module): + def __init__(self, in_channels, hidden_size): + super(EncoderWithFC, self).__init__() + self.out_channels = hidden_size + self.fc = nn.Linear( + in_channels, + hidden_size, + bias=True, + ) + + def forward(self, x): + x = self.fc(x) + return x + + +class EncoderWithSVTR(nn.Module): + def __init__( + self, + in_channels, + dims=64, # XS + depth=2, + hidden_dims=120, + use_guide=False, + num_heads=8, + qkv_bias=True, + mlp_ratio=2.0, + drop_rate=0.1, + kernel_size=[3, 3], + attn_drop_rate=0.1, + drop_path=0.0, + qk_scale=None, + ): + super(EncoderWithSVTR, self).__init__() + self.depth = depth + self.use_guide = use_guide + self.conv1 = ConvBNLayer( + in_channels, + in_channels // 8, + kernel_size=kernel_size, + padding=[kernel_size[0] // 2, kernel_size[1] // 2], + act="swish", + ) + self.conv2 = ConvBNLayer( + in_channels // 8, hidden_dims, kernel_size=1, act="swish" + ) + + self.svtr_block = nn.ModuleList( + [ + Block( + dim=hidden_dims, + num_heads=num_heads, + mixer="Global", + HW=None, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer="swish", + attn_drop=attn_drop_rate, + drop_path=drop_path, + norm_layer="nn.LayerNorm", + epsilon=1e-05, + prenorm=False, + ) + for i in range(depth) + ] + ) + self.norm = nn.LayerNorm(hidden_dims, eps=1e-6) + self.conv3 = ConvBNLayer(hidden_dims, in_channels, kernel_size=1, act="swish") + # last conv-nxn, the input is concat of input tensor and conv3 output tensor + self.conv4 = ConvBNLayer( + 2 * in_channels, in_channels // 8, padding=1, act="swish" + ) + + self.conv1x1 = ConvBNLayer(in_channels // 8, dims, kernel_size=1, act="swish") + self.out_channels = dims + self.apply(self._init_weights) + + def _init_weights(self, m): + # weight initialization + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.ConvTranspose2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + def forward(self, x): + # for use guide + if self.use_guide: + z = x.clone() + z.stop_gradient = True + else: + z = x + # for short cut + h = z + # reduce dim + z = self.conv1(z) + z = self.conv2(z) + # SVTR global block + B, C, H, W = z.shape + z = z.flatten(2).permute(0, 2, 1) + + for blk in self.svtr_block: + z = blk(z) + + z = self.norm(z) + # last stage + z = z.reshape([-1, H, W, C]).permute(0, 3, 1, 2) + z = self.conv3(z) + z = torch.cat((h, z), dim=1) + z = self.conv1x1(self.conv4(z)) + + return z + + +class SequenceEncoder(nn.Module): + def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs): + super(SequenceEncoder, self).__init__() + self.encoder_reshape = Im2Seq(in_channels) + self.out_channels = self.encoder_reshape.out_channels + self.encoder_type = encoder_type + if encoder_type == "reshape": + self.only_reshape = True + else: + support_encoder_dict = { + "reshape": Im2Seq, + "fc": EncoderWithFC, + "rnn": EncoderWithRNN, + "svtr": EncoderWithSVTR, + } + assert encoder_type in support_encoder_dict, "{} must in {}".format( + encoder_type, support_encoder_dict.keys() + ) + + if encoder_type == "svtr": + self.encoder = support_encoder_dict[encoder_type]( + self.encoder_reshape.out_channels, **kwargs + ) + else: + self.encoder = support_encoder_dict[encoder_type]( + self.encoder_reshape.out_channels, hidden_size + ) + self.out_channels = self.encoder.out_channels + self.only_reshape = False + + def forward(self, x): + if self.encoder_type != "svtr": + x = self.encoder_reshape(x) + if not self.only_reshape: + x = self.encoder(x) + return x + else: + x = self.encoder(x) + x = self.encoder_reshape(x) + return x diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py new file mode 100755 index 00000000..40603ade --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py @@ -0,0 +1,33 @@ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import copy + +__all__ = ['build_post_process'] + + +def build_post_process(config, global_config=None): + from .db_postprocess import DBPostProcess + from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, TableLabelDecode, \ + NRTRLabelDecode, SARLabelDecode, ViTSTRLabelDecode, RFLLabelDecode + from .cls_postprocess import ClsPostProcess + from .rec_postprocess import CANLabelDecode + + support_dict = [ + 'DBPostProcess', 'CTCLabelDecode', + 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', + 'TableLabelDecode', 'NRTRLabelDecode', 'SARLabelDecode', + 'ViTSTRLabelDecode','CANLabelDecode', 'RFLLabelDecode' + ] + + config = copy.deepcopy(config) + module_name = config.pop('name') + if global_config is not None: + config.update(global_config) + assert module_name in support_dict, Exception( + 'post process only support {}, but got {}'.format(support_dict, module_name)) + module_class = eval(module_name)(**config) + return module_class \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py new file mode 100755 index 00000000..c9c6affc --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py @@ -0,0 +1,20 @@ +import torch + + +class ClsPostProcess(object): + """ Convert between text-label and text-index """ + + def __init__(self, label_list, **kwargs): + super(ClsPostProcess, self).__init__() + self.label_list = label_list + + def __call__(self, preds, label=None, *args, **kwargs): + if isinstance(preds, torch.Tensor): + preds = preds.cpu().numpy() + pred_idxs = preds.argmax(axis=1) + decode_out = [(self.label_list[idx], preds[i, idx]) + for i, idx in enumerate(pred_idxs)] + if label is None: + return decode_out + label = [(self.label_list[idx], 1.0) for idx in label] + return decode_out, label \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py new file mode 100755 index 00000000..309f7f3f --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py @@ -0,0 +1,179 @@ +""" +This code is refered from: +https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import cv2 +import torch +from shapely.geometry import Polygon +import pyclipper + + +class DBPostProcess(object): + """ + The post process for Differentiable Binarization (DB). + """ + + def __init__(self, + thresh=0.3, + box_thresh=0.7, + max_candidates=1000, + unclip_ratio=2.0, + use_dilation=False, + score_mode="fast", + **kwargs): + self.thresh = thresh + self.box_thresh = box_thresh + self.max_candidates = max_candidates + self.unclip_ratio = unclip_ratio + self.min_size = 3 + self.score_mode = score_mode + assert score_mode in [ + "slow", "fast" + ], "Score mode must be in [slow, fast] but got: {}".format(score_mode) + + self.dilation_kernel = None if not use_dilation else np.array( + [[1, 1], [1, 1]]) + + def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, + cv2.CHAIN_APPROX_SIMPLE) + if len(outs) == 3: + img, contours, _ = outs[0], outs[1], outs[2] + elif len(outs) == 2: + contours, _ = outs[0], outs[1] + + num_contours = min(len(contours), self.max_candidates) + + boxes = [] + scores = [] + for index in range(num_contours): + contour = contours[index] + points, sside = self.get_mini_boxes(contour) + if sside < self.min_size: + continue + points = np.array(points) + if self.score_mode == "fast": + score = self.box_score_fast(pred, points.reshape(-1, 2)) + else: + score = self.box_score_slow(pred, contour) + if self.box_thresh > score: + continue + + box = self.unclip(points).reshape(-1, 1, 2) + box, sside = self.get_mini_boxes(box) + if sside < self.min_size + 2: + continue + box = np.array(box) + + box[:, 0] = np.clip( + np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes.append(box.astype(np.int16)) + scores.append(score) + return np.array(boxes, dtype=np.int16), scores + + def unclip(self, box): + unclip_ratio = self.unclip_ratio + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def get_mini_boxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [ + points[index_1], points[index_2], points[index_3], points[index_4] + ] + return box, min(bounding_box[1]) + + def box_score_fast(self, bitmap, _box): + ''' + box_score_fast: use bbox mean score as the mean score + ''' + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int64), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int64), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int64), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int64), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def box_score_slow(self, bitmap, contour): + ''' + box_score_slow: use polyon mean score as the mean score + ''' + h, w = bitmap.shape[:2] + contour = contour.copy() + contour = np.reshape(contour, (-1, 2)) + + xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) + xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) + ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) + ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + + contour[:, 0] = contour[:, 0] - xmin + contour[:, 1] = contour[:, 1] - ymin + + cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def __call__(self, outs_dict, shape_list): + pred = outs_dict['maps'] + if isinstance(pred, torch.Tensor): + pred = pred.cpu().numpy() + pred = pred[:, 0, :, :] + segmentation = pred > self.thresh + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] + if self.dilation_kernel is not None: + mask = cv2.dilate( + np.array(segmentation[batch_index]).astype(np.uint8), + self.dilation_kernel) + else: + mask = segmentation[batch_index] + boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, + src_w, src_h) + + boxes_batch.append({'points': boxes}) + return boxes_batch \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py new file mode 100755 index 00000000..c83fe5c3 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py @@ -0,0 +1,690 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch + + +class BaseRecLabelDecode(object): + """ Convert between text-label and text-index """ + + def __init__(self, + character_dict_path=None, + use_space_char=False): + + self.beg_str = "sos" + self.end_str = "eos" + + self.character_str = [] + if character_dict_path is None: + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + else: + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n").strip("\r\n") + self.character_str.append(line) + if use_space_char: + self.character_str.append(" ") + dict_character = list(self.character_str) + + dict_character = self.add_special_char(dict_character) + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def add_special_char(self, dict_character): + return dict_character + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + batch_size = len(text_index) + for batch_idx in range(batch_size): + char_list = [] + conf_list = [] + for idx in range(len(text_index[batch_idx])): + if text_index[batch_idx][idx] in ignored_tokens: + continue + if is_remove_duplicate: + # only for predict + if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ + batch_idx][idx]: + continue + char_list.append(self.character[int(text_index[batch_idx][ + idx])]) + if text_prob is not None: + conf_list.append(text_prob[batch_idx][idx]) + else: + conf_list.append(1) + text = ''.join(char_list) + result_list.append((text, np.mean(conf_list))) + return result_list + + def get_ignored_tokens(self): + return [0] # for ctc blank + + +class CTCLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, + character_dict_path=None, + use_space_char=False, + **kwargs): + super(CTCLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def __call__(self, preds, label=None, *args, **kwargs): + if isinstance(preds, torch.Tensor): + preds = preds.numpy() + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) + + if label is None: + return text + label = self.decode(label) + return text, label + + def add_special_char(self, dict_character): + dict_character = ['blank'] + dict_character + return dict_character + + +class NRTRLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=True, **kwargs): + super(NRTRLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def __call__(self, preds, label=None, *args, **kwargs): + + if len(preds) == 2: + preds_id = preds[0] + preds_prob = preds[1] + if isinstance(preds_id, torch.Tensor): + preds_id = preds_id.numpy() + if isinstance(preds_prob, torch.Tensor): + preds_prob = preds_prob.numpy() + if preds_id[0][0] == 2: + preds_idx = preds_id[:, 1:] + preds_prob = preds_prob[:, 1:] + else: + preds_idx = preds_id + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) + if label is None: + return text + label = self.decode(label[:, 1:]) + else: + if isinstance(preds, torch.Tensor): + preds = preds.numpy() + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) + if label is None: + return text + label = self.decode(label[:, 1:]) + return text, label + + def add_special_char(self, dict_character): + dict_character = ['blank', '', '', ''] + dict_character + return dict_character + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + batch_size = len(text_index) + for batch_idx in range(batch_size): + char_list = [] + conf_list = [] + for idx in range(len(text_index[batch_idx])): + try: + char_idx = self.character[int(text_index[batch_idx][idx])] + except: + continue + if char_idx == '': # end + break + char_list.append(char_idx) + if text_prob is not None: + conf_list.append(text_prob[batch_idx][idx]) + else: + conf_list.append(1) + text = ''.join(char_list) + result_list.append((text.lower(), np.mean(conf_list).tolist())) + return result_list + +class ViTSTRLabelDecode(NRTRLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False, + **kwargs): + super(ViTSTRLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def __call__(self, preds, label=None, *args, **kwargs): + if isinstance(preds, torch.Tensor): + preds = preds[:, 1:].numpy() + else: + preds = preds[:, 1:] + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) + if label is None: + return text + label = self.decode(label[:, 1:]) + return text, label + + def add_special_char(self, dict_character): + dict_character = ['', ''] + dict_character + return dict_character + + +class AttnLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, + character_dict_path=None, + use_space_char=False, + **kwargs): + super(AttnLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def add_special_char(self, dict_character): + self.beg_str = "sos" + self.end_str = "eos" + dict_character = dict_character + dict_character = [self.beg_str] + dict_character + [self.end_str] + return dict_character + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + [beg_idx, end_idx] = self.get_ignored_tokens() + batch_size = len(text_index) + for batch_idx in range(batch_size): + char_list = [] + conf_list = [] + for idx in range(len(text_index[batch_idx])): + if text_index[batch_idx][idx] in ignored_tokens: + continue + if int(text_index[batch_idx][idx]) == int(end_idx): + break + if is_remove_duplicate: + # only for predict + if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ + batch_idx][idx]: + continue + char_list.append(self.character[int(text_index[batch_idx][ + idx])]) + if text_prob is not None: + conf_list.append(text_prob[batch_idx][idx]) + else: + conf_list.append(1) + text = ''.join(char_list) + result_list.append((text, np.mean(conf_list))) + return result_list + + def __call__(self, preds, label=None, *args, **kwargs): + """ + text = self.decode(text) + if label is None: + return text + else: + label = self.decode(label, is_remove_duplicate=False) + return text, label + """ + if isinstance(preds, torch.Tensor): + preds = preds.cpu().numpy() + + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) + if label is None: + return text + label = self.decode(label, is_remove_duplicate=False) + return text, label + + def get_ignored_tokens(self): + beg_idx = self.get_beg_end_flag_idx("beg") + end_idx = self.get_beg_end_flag_idx("end") + return [beg_idx, end_idx] + + def get_beg_end_flag_idx(self, beg_or_end): + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "unsupport type %s in get_beg_end_flag_idx" \ + % beg_or_end + return idx + + +class RFLLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False, + **kwargs): + super(RFLLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def add_special_char(self, dict_character): + self.beg_str = "sos" + self.end_str = "eos" + dict_character = dict_character + dict_character = [self.beg_str] + dict_character + [self.end_str] + return dict_character + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + [beg_idx, end_idx] = self.get_ignored_tokens() + batch_size = len(text_index) + for batch_idx in range(batch_size): + char_list = [] + conf_list = [] + for idx in range(len(text_index[batch_idx])): + if text_index[batch_idx][idx] in ignored_tokens: + continue + if int(text_index[batch_idx][idx]) == int(end_idx): + break + if is_remove_duplicate: + # only for predict + if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ + batch_idx][idx]: + continue + char_list.append(self.character[int(text_index[batch_idx][ + idx])]) + if text_prob is not None: + conf_list.append(text_prob[batch_idx][idx]) + else: + conf_list.append(1) + text = ''.join(char_list) + result_list.append((text, np.mean(conf_list).tolist())) + return result_list + + def __call__(self, preds, label=None, *args, **kwargs): + # if seq_outputs is not None: + if isinstance(preds, tuple) or isinstance(preds, list): + cnt_outputs, seq_outputs = preds + if isinstance(seq_outputs, torch.Tensor): + seq_outputs = seq_outputs.numpy() + preds_idx = seq_outputs.argmax(axis=2) + preds_prob = seq_outputs.max(axis=2) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) + + if label is None: + return text + label = self.decode(label, is_remove_duplicate=False) + return text, label + + else: + cnt_outputs = preds + if isinstance(cnt_outputs, torch.Tensor): + cnt_outputs = cnt_outputs.numpy() + cnt_length = [] + for lens in cnt_outputs: + length = round(np.sum(lens)) + cnt_length.append(length) + if label is None: + return cnt_length + label = self.decode(label, is_remove_duplicate=False) + length = [len(res[0]) for res in label] + return cnt_length, length + + def get_ignored_tokens(self): + beg_idx = self.get_beg_end_flag_idx("beg") + end_idx = self.get_beg_end_flag_idx("end") + return [beg_idx, end_idx] + + def get_beg_end_flag_idx(self, beg_or_end): + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "unsupport type %s in get_beg_end_flag_idx" \ + % beg_or_end + return idx + + +class SRNLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, + character_dict_path=None, + use_space_char=False, + **kwargs): + self.max_text_length = kwargs.get('max_text_length', 25) + super(SRNLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def __call__(self, preds, label=None, *args, **kwargs): + pred = preds['predict'] + char_num = len(self.character_str) + 2 + if isinstance(pred, torch.Tensor): + pred = pred.numpy() + pred = np.reshape(pred, [-1, char_num]) + + preds_idx = np.argmax(pred, axis=1) + preds_prob = np.max(pred, axis=1) + + preds_idx = np.reshape(preds_idx, [-1, self.max_text_length]) + + preds_prob = np.reshape(preds_prob, [-1, self.max_text_length]) + + text = self.decode(preds_idx, preds_prob) + + if label is None: + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) + return text + label = self.decode(label) + return text, label + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + batch_size = len(text_index) + + for batch_idx in range(batch_size): + char_list = [] + conf_list = [] + for idx in range(len(text_index[batch_idx])): + if text_index[batch_idx][idx] in ignored_tokens: + continue + if is_remove_duplicate: + # only for predict + if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ + batch_idx][idx]: + continue + char_list.append(self.character[int(text_index[batch_idx][ + idx])]) + if text_prob is not None: + conf_list.append(text_prob[batch_idx][idx]) + else: + conf_list.append(1) + + text = ''.join(char_list) + result_list.append((text, np.mean(conf_list))) + return result_list + + def add_special_char(self, dict_character): + dict_character = dict_character + [self.beg_str, self.end_str] + return dict_character + + def get_ignored_tokens(self): + beg_idx = self.get_beg_end_flag_idx("beg") + end_idx = self.get_beg_end_flag_idx("end") + return [beg_idx, end_idx] + + def get_beg_end_flag_idx(self, beg_or_end): + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "unsupport type %s in get_beg_end_flag_idx" \ + % beg_or_end + return idx + + +class TableLabelDecode(object): + """ """ + + def __init__(self, + character_dict_path, + **kwargs): + list_character, list_elem = self.load_char_elem_dict(character_dict_path) + list_character = self.add_special_char(list_character) + list_elem = self.add_special_char(list_elem) + self.dict_character = {} + self.dict_idx_character = {} + for i, char in enumerate(list_character): + self.dict_idx_character[i] = char + self.dict_character[char] = i + self.dict_elem = {} + self.dict_idx_elem = {} + for i, elem in enumerate(list_elem): + self.dict_idx_elem[i] = elem + self.dict_elem[elem] = i + + def load_char_elem_dict(self, character_dict_path): + list_character = [] + list_elem = [] + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + substr = lines[0].decode('utf-8').strip("\n").strip("\r\n").split("\t") + character_num = int(substr[0]) + elem_num = int(substr[1]) + for cno in range(1, 1 + character_num): + character = lines[cno].decode('utf-8').strip("\n").strip("\r\n") + list_character.append(character) + for eno in range(1 + character_num, 1 + character_num + elem_num): + elem = lines[eno].decode('utf-8').strip("\n").strip("\r\n") + list_elem.append(elem) + return list_character, list_elem + + def add_special_char(self, list_character): + self.beg_str = "sos" + self.end_str = "eos" + list_character = [self.beg_str] + list_character + [self.end_str] + return list_character + + def __call__(self, preds): + structure_probs = preds['structure_probs'] + loc_preds = preds['loc_preds'] + if isinstance(structure_probs,torch.Tensor): + structure_probs = structure_probs.numpy() + if isinstance(loc_preds,torch.Tensor): + loc_preds = loc_preds.numpy() + structure_idx = structure_probs.argmax(axis=2) + structure_probs = structure_probs.max(axis=2) + structure_str, structure_pos, result_score_list, result_elem_idx_list = self.decode(structure_idx, + structure_probs, 'elem') + res_html_code_list = [] + res_loc_list = [] + batch_num = len(structure_str) + for bno in range(batch_num): + res_loc = [] + for sno in range(len(structure_str[bno])): + text = structure_str[bno][sno] + if text in ['', ' 0 and tmp_elem_idx == end_idx: + break + if tmp_elem_idx in ignored_tokens: + continue + + char_list.append(current_dict[tmp_elem_idx]) + elem_pos_list.append(idx) + score_list.append(structure_probs[batch_idx, idx]) + elem_idx_list.append(tmp_elem_idx) + result_list.append(char_list) + result_pos_list.append(elem_pos_list) + result_score_list.append(score_list) + result_elem_idx_list.append(elem_idx_list) + return result_list, result_pos_list, result_score_list, result_elem_idx_list + + def get_ignored_tokens(self, char_or_elem): + beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem) + end_idx = self.get_beg_end_flag_idx("end", char_or_elem) + return [beg_idx, end_idx] + + def get_beg_end_flag_idx(self, beg_or_end, char_or_elem): + if char_or_elem == "char": + if beg_or_end == "beg": + idx = self.dict_character[self.beg_str] + elif beg_or_end == "end": + idx = self.dict_character[self.end_str] + else: + assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \ + % beg_or_end + elif char_or_elem == "elem": + if beg_or_end == "beg": + idx = self.dict_elem[self.beg_str] + elif beg_or_end == "end": + idx = self.dict_elem[self.end_str] + else: + assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \ + % beg_or_end + else: + assert False, "Unsupport type %s in char_or_elem" \ + % char_or_elem + return idx + + +class SARLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False, + **kwargs): + super(SARLabelDecode, self).__init__(character_dict_path, + use_space_char) + + self.rm_symbol = kwargs.get('rm_symbol', False) + + def add_special_char(self, dict_character): + beg_end_str = "" + unknown_str = "" + padding_str = "" + dict_character = dict_character + [unknown_str] + self.unknown_idx = len(dict_character) - 1 + dict_character = dict_character + [beg_end_str] + self.start_idx = len(dict_character) - 1 + self.end_idx = len(dict_character) - 1 + dict_character = dict_character + [padding_str] + self.padding_idx = len(dict_character) - 1 + return dict_character + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + + batch_size = len(text_index) + for batch_idx in range(batch_size): + char_list = [] + conf_list = [] + for idx in range(len(text_index[batch_idx])): + if text_index[batch_idx][idx] in ignored_tokens: + continue + if int(text_index[batch_idx][idx]) == int(self.end_idx): + if text_prob is None and idx == 0: + continue + else: + break + if is_remove_duplicate: + # only for predict + if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ + batch_idx][idx]: + continue + char_list.append(self.character[int(text_index[batch_idx][ + idx])]) + if text_prob is not None: + conf_list.append(text_prob[batch_idx][idx]) + else: + conf_list.append(1) + text = ''.join(char_list) + if self.rm_symbol: + comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]') + text = text.lower() + text = comp.sub('', text) + result_list.append((text, np.mean(conf_list).tolist())) + return result_list + + def __call__(self, preds, label=None, *args, **kwargs): + if isinstance(preds, torch.Tensor): + preds = preds.cpu().numpy() + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) + + if label is None: + return text + label = self.decode(label, is_remove_duplicate=False) + return text, label + + def get_ignored_tokens(self): + return [self.padding_idx] + + +class CANLabelDecode(BaseRecLabelDecode): + """ Convert between latex-symbol and symbol-index """ + + def __init__(self, character_dict_path=None, use_space_char=False, + **kwargs): + super(CANLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def decode(self, text_index, preds_prob=None): + result_list = [] + batch_size = len(text_index) + for batch_idx in range(batch_size): + seq_end = text_index[batch_idx].argmin(0) + idx_list = text_index[batch_idx][:seq_end].tolist() + symbol_list = [self.character[idx] for idx in idx_list] + probs = [] + if preds_prob is not None: + probs = preds_prob[batch_idx][:len(symbol_list)].tolist() + + result_list.append([' '.join(symbol_list), probs]) + return result_list + + def __call__(self, preds, label=None, *args, **kwargs): + pred_prob, _, _, _ = preds + preds_idx = pred_prob.argmax(axis=2) + + text = self.decode(preds_idx) + if label is None: + return text + label = self.decode(label) + return text, label \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml new file mode 100644 index 00000000..2dd3b633 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml @@ -0,0 +1,476 @@ +ch_ptocr_mobile_v2.0_cls_infer: + model_type: cls + algorithm: CLS + Transform: + Backbone: + name: MobileNetV3 + scale: 0.35 + model_name: small + Neck: + Head: + name: ClsHead + class_dim: 2 + +Multilingual_PP-OCRv3_det_infer: + model_type: det + algorithm: DB + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: RSEFPN + out_channels: 96 + shortcut: True + Head: + name: DBHead + k: 50 + +en_PP-OCRv3_det_infer: + model_type: det + algorithm: DB + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: RSEFPN + out_channels: 96 + shortcut: True + Head: + name: DBHead + k: 50 + +ch_PP-OCRv3_det_infer: + model_type: det + algorithm: DB + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + disable_se: True + Neck: + name: RSEFPN + out_channels: 96 + shortcut: True + Head: + name: DBHead + k: 50 + +en_PP-OCRv4_rec_infer: + model_type: rec + algorithm: SVTR_LCNet + Transform: + Backbone: + name: PPLCNetV3 + scale: 0.95 + Head: + name: MultiHead + out_channels_list: + CTCLabelDecode: 97 #'blank' + ...(62) + ' ' + head_list: + - CTCHead: + Neck: + name: svtr + dims: 120 + depth: 2 + hidden_dims: 120 + kernel_size: [ 1, 3 ] + use_guide: True + Head: + fc_decay: 0.00001 + - NRTRHead: + nrtr_dim: 384 + max_text_length: 25 + +ch_PP-OCRv4_det_infer: + model_type: det + algorithm: DB + Transform: null + Backbone: + name: PPLCNetV3 + scale: 0.75 + det: True + Neck: + name: RSEFPN + out_channels: 96 + shortcut: True + Head: + name: DBHead + k: 50 + +ch_PP-OCRv5_det_infer: + model_type: det + algorithm: DB + Transform: null + Backbone: + name: PPLCNetV3 + scale: 0.75 + det: True + Neck: + name: RSEFPN + out_channels: 96 + shortcut: True + Head: + name: DBHead + k: 50 + +ch_PP-OCRv4_det_server_infer: + model_type: det + algorithm: DB + Transform: null + Backbone: + name: PPHGNet_small + det: True + Neck: + name: LKPAN + out_channels: 256 + intracl: true + Head: + name: PFHeadLocal + k: 50 + mode: "large" + +ch_PP-OCRv4_rec_infer: + model_type: rec + algorithm: SVTR_LCNet + Transform: + Backbone: + name: PPLCNetV3 + scale: 0.95 + Head: + name: MultiHead + out_channels_list: + CTCLabelDecode: 6625 #'blank' + ...(6623) + ' ' + head_list: + - CTCHead: + Neck: + name: svtr + dims: 120 + depth: 2 + hidden_dims: 120 + kernel_size: [ 1, 3 ] + use_guide: True + Head: + fc_decay: 0.00001 + - NRTRHead: + nrtr_dim: 384 + max_text_length: 25 + +ch_PP-OCRv4_rec_server_infer: + model_type: rec + algorithm: SVTR_HGNet + Transform: + Backbone: + name: PPHGNet_small + Head: + name: MultiHead + out_channels_list: + CTCLabelDecode: 6625 #'blank' + ...(6623) + ' ' + head_list: + - CTCHead: + Neck: + name: svtr + dims: 120 + depth: 2 + hidden_dims: 120 + kernel_size: [ 1, 3 ] + use_guide: True + Head: + fc_decay: 0.00001 + - NRTRHead: + nrtr_dim: 384 + max_text_length: 25 + +ch_PP-OCRv4_rec_server_doc_infer: + model_type: rec + algorithm: SVTR_HGNet + Transform: + Backbone: + name: PPHGNet_small + Head: + name: MultiHead + out_channels_list: + CTCLabelDecode: 15631 + head_list: + - CTCHead: + Neck: + name: svtr + dims: 120 + depth: 2 + hidden_dims: 120 + kernel_size: [ 1, 3 ] + use_guide: True + Head: + fc_decay: 0.00001 + - NRTRHead: + nrtr_dim: 384 + max_text_length: 25 + +ch_PP-OCRv5_rec_server_infer: + model_type: rec + algorithm: SVTR_HGNet + Transform: + Backbone: + name: PPHGNetV2_B4 + text_rec: True + Head: + name: MultiHead + out_channels_list: + CTCLabelDecode: 18385 + head_list: + - CTCHead: + Neck: + name: svtr + dims: 120 + depth: 2 + hidden_dims: 120 + kernel_size: [ 1, 3 ] + use_guide: True + Head: + fc_decay: 0.00001 + - NRTRHead: + nrtr_dim: 384 + max_text_length: 25 + +ch_PP-OCRv5_rec_infer: + model_type: rec + algorithm: SVTR_HGNet + Transform: + Backbone: + name: PPLCNetV3 + scale: 0.95 + Head: + name: MultiHead + out_channels_list: + CTCLabelDecode: 18385 + head_list: + - CTCHead: + Neck: + name: svtr + dims: 120 + depth: 2 + hidden_dims: 120 + kernel_size: [ 1, 3 ] + use_guide: True + Head: + fc_decay: 0.00001 + - NRTRHead: + nrtr_dim: 384 + max_text_length: 25 + +chinese_cht_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [1, 2] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 8423 + fc_decay: 0.00001 + +latin_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 187 + fc_decay: 0.00001 + +cyrillic_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 165 + fc_decay: 0.00001 + +arabic_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 164 + fc_decay: 0.00001 + +korean_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 3690 + fc_decay: 0.00001 + +japan_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 4401 + fc_decay: 0.00001 + +ta_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 130 + fc_decay: 0.00001 + +te_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 153 + fc_decay: 0.00001 + +ka_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 155 + fc_decay: 0.00001 + +devanagari_PP-OCRv3_rec_infer: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [ 1, 2 ] + last_pool_type: avg + Neck: + name: SequenceEncoder + encoder_type: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + name: CTCHead +# out_channels: 169 + fc_decay: 0.00001 + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt new file mode 100644 index 00000000..e97abf39 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt @@ -0,0 +1,162 @@ + +! +# +$ +% +& +' +( ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +_ +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +É +é +ء +آ +أ +ؤ +إ +ئ +ا +ب +ة +ت +ث +ج +ح +خ +د +ذ +ر +ز +س +ش +ص +ض +ط +ظ +ع +غ +ف +ق +ك +ل +م +ن +ه +و +ى +ي +ً +ٌ +ٍ +َ +ُ +ِ +ّ +ْ +ٓ +ٔ +ٰ +ٱ +ٹ +پ +چ +ڈ +ڑ +ژ +ک +ڭ +گ +ں +ھ +ۀ +ہ +ۂ +ۃ +ۆ +ۇ +ۈ +ۋ +ی +ې +ے +ۓ +ە +١ +٢ +٣ +٤ +٥ +٦ +٧ +٨ +٩ diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt new file mode 100644 index 00000000..cc1aa472 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt @@ -0,0 +1,8421 @@ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +¥ +® +° +± +² +´ +· +» +É +Ë +Ó +× +Ü +à +á +ä +è +é +ì +í +ò +ó +÷ +ú +ü +ā +ē +ī +ō +ū +ǐ +ǒ +ɔ +ɡ +ʌ +ˋ +Λ +Ο +Φ +Ω +α +β +ε +θ +μ +π +З +И +Й +П +Я +г +— +‖ +‘ +’ +“ +” +• +… +‧ +′ +″ +※ +℃ +№ +™ +Ⅱ +Ⅲ +Ⅳ +← +↑ +→ +↓ +⇋ +∈ +∑ +√ +∞ +∣ +∧ +∩ +∫ +∶ +≈ +≠ +≤ +≥ +⊙ +⊥ +① +② +③ +④ +⑧ +⑴ +⑵ +⑶ +─ +│ +┅ +┌ +├ +█ +▎ +▏ +▕ +■ +□ +▪ +▲ +△ +▼ +◆ +◇ +○ +◎ +● +◥ +★ +☆ +❋ +❤ +  +、 +。 +〇 +〉 +《 +》 +「 +」 +『 +』 +【 +】 +〔 +〕 +〖 +〗 +の +サ +シ +ジ +マ +ㄱ +ㆍ +㎏ +㎡ +㐂 +㐱 +㙟 +㴪 +㸃 +䖝 +䝉 +䰾 +䲁 +一 +丁 +七 +丄 +丈 +三 +上 +下 +丌 +不 +与 +丏 +丐 +丑 +且 +丕 +世 +丘 +丙 +丞 +丟 +両 +並 +丨 +丫 +中 +丰 +串 +丶 +丸 +丹 +主 +丼 +丿 +乂 +乃 +久 +么 +之 +乍 +乎 +乏 +乒 +乓 +乖 +乗 +乘 +乙 +乚 +乜 +九 +乞 +也 +乩 +乭 +乳 +乸 +乹 +乾 +亀 +亂 +亅 +了 +予 +亊 +事 +二 +亍 +云 +互 +亓 +五 +井 +亘 +些 +亜 +亞 +亟 +亠 +亡 +亢 +交 +亥 +亦 +亨 +享 +京 +亭 +亮 +亰 +亳 +亶 +亹 +人 +亻 +什 +仁 +仂 +仃 +仄 +仇 +仉 +今 +介 +仍 +仏 +仔 +仕 +他 +仗 +付 +仙 +仛 +仝 +仞 +仟 +仡 +代 +令 +以 +仨 +仫 +仮 +仰 +仲 +仳 +仵 +件 +仺 +任 +仼 +份 +仿 +企 +伃 +伈 +伉 +伊 +伋 +伍 +伎 +伏 +伐 +休 +伕 +伙 +伝 +伢 +伯 +估 +伱 +伴 +伶 +伷 +伸 +伺 +似 +伽 +伾 +佀 +佁 +佃 +但 +佇 +佈 +佉 +佋 +位 +低 +住 +佐 +佑 +体 +佔 +何 +佗 +佘 +余 +佚 +佛 +作 +佝 +佞 +佟 +你 +佣 +佤 +佧 +佩 +佬 +佯 +佰 +佳 +併 +佶 +佹 +佺 +佼 +佾 +使 +侁 +侃 +侄 +侅 +來 +侈 +侊 +例 +侍 +侏 +侑 +侖 +侗 +侘 +侚 +供 +依 +侞 +価 +侮 +侯 +侵 +侶 +侷 +侹 +便 +俁 +係 +促 +俄 +俅 +俊 +俋 +俌 +俍 +俎 +俏 +俐 +俑 +俗 +俘 +俚 +俛 +保 +俞 +俟 +俠 +信 +俬 +修 +俯 +俱 +俳 +俴 +俵 +俶 +俸 +俺 +俽 +俾 +倆 +倈 +倉 +個 +倌 +倍 +們 +倒 +倓 +倔 +倖 +倗 +倘 +候 +倚 +倜 +倞 +借 +倡 +倢 +倣 +値 +倦 +倧 +倩 +倪 +倫 +倬 +倭 +倮 +倻 +值 +偁 +偃 +假 +偈 +偉 +偊 +偌 +偍 +偎 +偏 +偓 +偕 +做 +停 +健 +偪 +偲 +側 +偵 +偶 +偷 +偸 +偽 +傀 +傃 +傅 +傈 +傉 +傍 +傑 +傒 +傕 +傖 +傘 +備 +傜 +傢 +傣 +催 +傭 +傲 +傳 +債 +傷 +傻 +傾 +僅 +僉 +僊 +働 +像 +僑 +僔 +僕 +僖 +僙 +僚 +僜 +僡 +僧 +僩 +僭 +僮 +僰 +僱 +僳 +僴 +僵 +價 +僻 +儀 +儁 +儂 +億 +儆 +儇 +儈 +儉 +儋 +儐 +儒 +儔 +儕 +儘 +儚 +儞 +償 +儡 +儥 +儦 +優 +儫 +儱 +儲 +儷 +儺 +儻 +儼 +兀 +允 +元 +兄 +充 +兆 +先 +光 +克 +兌 +免 +児 +兒 +兔 +兕 +兗 +兜 +入 +內 +全 +兩 +兪 +八 +公 +六 +兮 +共 +兵 +其 +具 +典 +兼 +兿 +冀 +冂 +円 +冇 +冉 +冊 +再 +冏 +冑 +冒 +冕 +冖 +冗 +冚 +冠 +冢 +冤 +冥 +冧 +冨 +冪 +冫 +冬 +冮 +冰 +冴 +冶 +冷 +冼 +冽 +凃 +凄 +准 +凈 +凋 +凌 +凍 +凖 +凜 +凝 +凞 +几 +凡 +処 +凪 +凬 +凰 +凱 +凳 +凵 +凶 +凸 +凹 +出 +函 +刀 +刁 +刂 +刃 +刄 +分 +切 +刈 +刊 +刎 +刑 +划 +列 +初 +判 +別 +刦 +刧 +刨 +利 +刪 +刮 +到 +制 +刷 +券 +刺 +刻 +刼 +剁 +剃 +則 +削 +剋 +剌 +前 +剎 +剏 +剔 +剖 +剛 +剝 +剡 +剣 +剩 +剪 +剮 +副 +割 +創 +剿 +劃 +劄 +劇 +劈 +劉 +劊 +劌 +劍 +劑 +劔 +力 +功 +加 +劣 +助 +努 +劫 +劬 +劭 +劵 +効 +劼 +劾 +勁 +勃 +勅 +勇 +勉 +勐 +勑 +勒 +勔 +動 +勖 +勗 +勘 +務 +勛 +勝 +勞 +募 +勢 +勣 +勤 +勦 +勰 +勱 +勲 +勳 +勵 +勷 +勸 +勺 +勻 +勾 +勿 +匂 +匄 +包 +匆 +匈 +匋 +匍 +匏 +匐 +匕 +化 +北 +匙 +匚 +匝 +匠 +匡 +匣 +匪 +匯 +匱 +匸 +匹 +匾 +匿 +區 +十 +千 +卅 +升 +午 +卉 +半 +卋 +卍 +卐 +卑 +卒 +卓 +協 +南 +博 +卜 +卞 +卟 +占 +卡 +卣 +卦 +卧 +卩 +卬 +卮 +卯 +印 +危 +卲 +即 +卵 +卷 +卸 +卹 +卺 +卻 +卽 +卿 +厄 +厓 +厔 +厙 +厚 +厝 +原 +厥 +厭 +厰 +厲 +厴 +厶 +去 +參 +叄 +又 +叉 +及 +友 +反 +収 +叔 +叕 +取 +受 +叛 +叟 +叡 +叢 +口 +古 +句 +另 +叨 +叩 +只 +叫 +召 +叭 +叮 +可 +台 +叱 +史 +右 +叵 +司 +叻 +叼 +吁 +吃 +各 +吆 +合 +吉 +吊 +吋 +同 +名 +后 +吏 +吐 +向 +吒 +吔 +吖 +君 +吝 +吞 +吟 +吠 +吡 +吥 +否 +吧 +吩 +含 +吮 +吱 +吲 +吳 +吵 +吶 +吸 +吹 +吻 +吼 +吾 +呀 +呂 +呃 +呈 +呉 +告 +呋 +呎 +呢 +呤 +呦 +周 +呱 +味 +呵 +呷 +呸 +呼 +命 +呾 +咀 +咁 +咂 +咄 +咅 +咆 +咋 +和 +咎 +咑 +咒 +咔 +咕 +咖 +咗 +咘 +咚 +咟 +咤 +咥 +咧 +咨 +咩 +咪 +咫 +咬 +咭 +咯 +咱 +咲 +咳 +咸 +咻 +咼 +咽 +咾 +咿 +哀 +品 +哂 +哄 +哆 +哇 +哈 +哉 +哌 +哎 +哏 +哐 +哖 +哚 +哞 +員 +哥 +哦 +哨 +哩 +哪 +哭 +哮 +哱 +哲 +哺 +哼 +唃 +唄 +唆 +唇 +唉 +唏 +唐 +唑 +唔 +唘 +唧 +唫 +唬 +唭 +售 +唯 +唱 +唳 +唵 +唷 +唸 +唻 +唾 +啁 +啃 +啄 +商 +啉 +啊 +啍 +問 +啓 +啖 +啚 +啜 +啞 +啟 +啡 +啣 +啤 +啥 +啦 +啪 +啫 +啯 +啰 +啱 +啲 +啵 +啶 +啷 +啻 +啼 +啾 +喀 +喂 +喃 +善 +喆 +喇 +喈 +喉 +喊 +喋 +喏 +喔 +喘 +喙 +喚 +喜 +喝 +喢 +喦 +喧 +喪 +喫 +喬 +單 +喰 +喱 +喲 +喳 +喵 +喹 +喻 +喼 +嗄 +嗅 +嗆 +嗇 +嗊 +嗎 +嗑 +嗒 +嗓 +嗔 +嗖 +嗚 +嗜 +嗝 +嗞 +嗡 +嗢 +嗣 +嗦 +嗨 +嗩 +嗪 +嗮 +嗯 +嗲 +嗶 +嗹 +嗽 +嘀 +嘅 +嘆 +嘉 +嘌 +嘍 +嘎 +嘏 +嘔 +嘗 +嘚 +嘛 +嘜 +嘞 +嘟 +嘢 +嘣 +嘥 +嘧 +嘩 +嘬 +嘮 +嘯 +嘰 +嘲 +嘴 +嘶 +嘸 +嘹 +嘻 +嘿 +噁 +噌 +噍 +噏 +噓 +噗 +噝 +噠 +噢 +噤 +噥 +噦 +器 +噩 +噪 +噬 +噯 +噰 +噲 +噴 +噶 +噸 +噹 +噻 +嚇 +嚈 +嚎 +嚏 +嚐 +嚒 +嚓 +嚕 +嚗 +嚙 +嚞 +嚟 +嚤 +嚦 +嚧 +嚨 +嚩 +嚮 +嚳 +嚴 +嚶 +嚷 +嚼 +嚿 +囀 +囂 +囃 +囉 +囊 +囍 +囑 +囒 +囓 +囗 +囚 +四 +囝 +回 +因 +囡 +団 +囤 +囧 +囪 +囮 +囯 +困 +囲 +図 +囶 +囷 +囹 +固 +囿 +圂 +圃 +圄 +圈 +圉 +國 +圍 +圏 +園 +圓 +圖 +圗 +團 +圜 +土 +圧 +在 +圩 +圪 +圭 +圯 +地 +圳 +圻 +圾 +址 +均 +坊 +坋 +坌 +坍 +坎 +坐 +坑 +坖 +坡 +坣 +坤 +坦 +坨 +坩 +坪 +坫 +坬 +坭 +坮 +坯 +坳 +坵 +坶 +坷 +坻 +垂 +垃 +垈 +型 +垍 +垓 +垕 +垚 +垛 +垞 +垟 +垠 +垢 +垣 +垮 +垯 +垰 +垵 +垸 +垻 +垿 +埃 +埅 +埇 +埈 +埋 +埌 +城 +埏 +埒 +埔 +埕 +埗 +埜 +域 +埠 +埡 +埤 +埧 +埨 +埪 +埭 +埮 +埴 +埵 +執 +培 +基 +埻 +埼 +堀 +堂 +堃 +堅 +堆 +堇 +堈 +堉 +堊 +堍 +堖 +堝 +堡 +堤 +堦 +堪 +堮 +堯 +堰 +報 +場 +堵 +堷 +堺 +塀 +塅 +塆 +塊 +塋 +塌 +塍 +塏 +塑 +塔 +塗 +塘 +塙 +塜 +塞 +塡 +塢 +塤 +塨 +塩 +填 +塬 +塭 +塰 +塱 +塲 +塵 +塹 +塽 +塾 +墀 +境 +墅 +墉 +墊 +墎 +墓 +増 +墘 +墜 +增 +墟 +墡 +墣 +墨 +墩 +墫 +墬 +墮 +墱 +墳 +墺 +墼 +墾 +壁 +壄 +壆 +壇 +壋 +壌 +壎 +壐 +壑 +壓 +壔 +壕 +壘 +壙 +壞 +壟 +壠 +壢 +壤 +壩 +士 +壬 +壯 +壱 +壴 +壹 +壺 +壽 +夀 +夆 +変 +夊 +夋 +夌 +夏 +夔 +夕 +外 +夙 +多 +夜 +夠 +夢 +夤 +夥 +大 +天 +太 +夫 +夬 +夭 +央 +夯 +失 +夷 +夾 +奀 +奄 +奇 +奈 +奉 +奎 +奏 +奐 +契 +奓 +奔 +奕 +套 +奘 +奚 +奠 +奢 +奣 +奧 +奩 +奪 +奫 +奭 +奮 +女 +奴 +奶 +她 +好 +妀 +妁 +如 +妃 +妄 +妊 +妍 +妏 +妑 +妒 +妓 +妖 +妙 +妝 +妞 +妠 +妤 +妥 +妧 +妨 +妭 +妮 +妯 +妲 +妳 +妸 +妹 +妺 +妻 +妾 +姀 +姁 +姃 +姆 +姈 +姉 +姊 +始 +姌 +姍 +姐 +姑 +姒 +姓 +委 +姚 +姜 +姝 +姣 +姥 +姦 +姨 +姪 +姫 +姬 +姮 +姵 +姶 +姸 +姻 +姿 +威 +娃 +娉 +娋 +娌 +娍 +娎 +娑 +娖 +娘 +娛 +娜 +娟 +娠 +娣 +娥 +娩 +娫 +娳 +娶 +娸 +娼 +娽 +婀 +婁 +婆 +婉 +婊 +婑 +婕 +婚 +婢 +婦 +婧 +婪 +婭 +婯 +婷 +婺 +婻 +婼 +婿 +媃 +媄 +媊 +媐 +媒 +媓 +媖 +媗 +媚 +媛 +媜 +媞 +媧 +媭 +媯 +媲 +媳 +媺 +媼 +媽 +媾 +媿 +嫁 +嫂 +嫄 +嫈 +嫉 +嫌 +嫖 +嫘 +嫚 +嫡 +嫣 +嫦 +嫩 +嫪 +嫲 +嫳 +嫵 +嫺 +嫻 +嬅 +嬈 +嬉 +嬋 +嬌 +嬗 +嬛 +嬝 +嬡 +嬤 +嬨 +嬪 +嬬 +嬭 +嬰 +嬴 +嬸 +嬾 +嬿 +孀 +孃 +孆 +孋 +孌 +子 +孑 +孔 +孕 +孖 +字 +存 +孚 +孛 +孜 +孝 +孟 +孢 +季 +孤 +孩 +孫 +孬 +孮 +孰 +孳 +孵 +學 +孺 +孻 +孽 +孿 +宀 +它 +宅 +宇 +守 +安 +宋 +完 +宍 +宏 +宓 +宕 +宗 +官 +宙 +定 +宛 +宜 +実 +客 +宣 +室 +宥 +宦 +宧 +宮 +宰 +害 +宴 +宵 +家 +宸 +容 +宿 +寀 +寁 +寂 +寄 +寅 +密 +寇 +寈 +寊 +富 +寐 +寒 +寓 +寔 +寕 +寖 +寗 +寘 +寛 +寜 +寞 +察 +寡 +寢 +寤 +寥 +實 +寧 +寨 +審 +寫 +寬 +寮 +寯 +寰 +寳 +寵 +寶 +寸 +寺 +対 +封 +専 +尃 +射 +將 +專 +尉 +尊 +尋 +對 +導 +小 +尐 +少 +尓 +尕 +尖 +尗 +尙 +尚 +尢 +尤 +尨 +尪 +尬 +就 +尷 +尹 +尺 +尻 +尼 +尾 +尿 +局 +屁 +屄 +居 +屆 +屇 +屈 +屋 +屌 +屍 +屎 +屏 +屐 +屑 +屓 +展 +屚 +屜 +屠 +屢 +層 +履 +屬 +屭 +屯 +山 +屹 +屺 +屻 +岀 +岈 +岌 +岐 +岑 +岔 +岡 +岢 +岣 +岧 +岩 +岪 +岫 +岬 +岰 +岱 +岳 +岵 +岷 +岸 +岻 +峁 +峅 +峇 +峋 +峍 +峒 +峘 +峙 +峚 +峠 +峨 +峩 +峪 +峭 +峯 +峰 +峴 +島 +峻 +峼 +峽 +崁 +崆 +崇 +崈 +崋 +崍 +崎 +崐 +崑 +崒 +崔 +崖 +崗 +崘 +崙 +崚 +崛 +崞 +崟 +崠 +崢 +崤 +崧 +崩 +崬 +崮 +崱 +崴 +崵 +崶 +崽 +嵇 +嵊 +嵋 +嵌 +嵎 +嵐 +嵒 +嵕 +嵖 +嵗 +嵙 +嵛 +嵜 +嵨 +嵩 +嵬 +嵮 +嵯 +嵰 +嵴 +嵻 +嵿 +嶁 +嶂 +嶃 +嶄 +嶇 +嶋 +嶌 +嶍 +嶒 +嶔 +嶗 +嶝 +嶠 +嶢 +嶦 +嶧 +嶪 +嶬 +嶰 +嶲 +嶴 +嶷 +嶸 +嶺 +嶼 +嶽 +巂 +巄 +巆 +巋 +巌 +巍 +巎 +巑 +巒 +巔 +巖 +巘 +巛 +川 +州 +巡 +巢 +工 +左 +巧 +巨 +巫 +差 +巰 +己 +已 +巳 +巴 +巶 +巷 +巻 +巽 +巾 +巿 +市 +布 +帆 +希 +帑 +帔 +帕 +帖 +帘 +帙 +帚 +帛 +帝 +帡 +帢 +帥 +師 +席 +帯 +帰 +帳 +帶 +帷 +常 +帽 +幀 +幃 +幄 +幅 +幌 +幔 +幕 +幗 +幚 +幛 +幟 +幡 +幢 +幣 +幪 +幫 +干 +平 +年 +幵 +幷 +幸 +幹 +幺 +幻 +幼 +幽 +幾 +庀 +庁 +広 +庇 +床 +序 +底 +庖 +店 +庚 +府 +庠 +庢 +庥 +度 +座 +庫 +庭 +庲 +庵 +庶 +康 +庸 +庹 +庼 +庾 +廁 +廂 +廄 +廆 +廈 +廉 +廊 +廋 +廌 +廍 +廑 +廓 +廔 +廕 +廖 +廙 +廚 +廝 +廞 +廟 +廠 +廡 +廢 +廣 +廧 +廨 +廩 +廬 +廰 +廱 +廳 +延 +廷 +廸 +建 +廻 +廼 +廿 +弁 +弄 +弅 +弇 +弈 +弉 +弊 +弋 +弍 +式 +弐 +弒 +弓 +弔 +引 +弖 +弗 +弘 +弛 +弟 +弢 +弦 +弧 +弨 +弩 +弭 +弱 +張 +強 +弸 +弼 +弾 +彀 +彄 +彅 +彆 +彈 +彊 +彌 +彎 +彐 +彔 +彖 +彗 +彘 +彙 +彜 +彞 +彠 +彡 +形 +彣 +彤 +彥 +彧 +彩 +彪 +彫 +彬 +彭 +彰 +影 +彳 +彷 +役 +彼 +彿 +往 +征 +徂 +待 +徇 +很 +徉 +徊 +律 +後 +徐 +徑 +徒 +得 +徘 +徙 +徜 +從 +徠 +御 +徧 +徨 +復 +循 +徫 +徬 +徭 +微 +徳 +徴 +徵 +德 +徸 +徹 +徽 +心 +忄 +必 +忉 +忌 +忍 +忐 +忑 +忒 +志 +忘 +忙 +応 +忝 +忞 +忠 +快 +忬 +忯 +忱 +忳 +念 +忻 +忽 +忿 +怍 +怎 +怒 +怕 +怖 +怙 +怛 +思 +怠 +怡 +急 +怦 +性 +怨 +怪 +怯 +怵 +恁 +恂 +恃 +恆 +恊 +恍 +恐 +恕 +恙 +恢 +恣 +恤 +恥 +恨 +恩 +恪 +恬 +恭 +息 +恰 +恵 +恿 +悄 +悅 +悆 +悉 +悌 +悍 +悔 +悖 +悚 +悛 +悝 +悞 +悟 +悠 +患 +悧 +您 +悪 +悰 +悲 +悳 +悵 +悶 +悸 +悼 +情 +惆 +惇 +惑 +惔 +惕 +惘 +惚 +惜 +惟 +惠 +惡 +惣 +惦 +惰 +惱 +惲 +想 +惶 +惹 +惺 +愁 +愃 +愆 +愈 +愉 +愍 +意 +愐 +愒 +愔 +愕 +愚 +愛 +愜 +感 +愣 +愧 +愨 +愫 +愭 +愴 +愷 +愼 +愾 +愿 +慄 +慈 +態 +慌 +慎 +慕 +慘 +慚 +慜 +慟 +慢 +慣 +慥 +慧 +慨 +慮 +慰 +慳 +慵 +慶 +慷 +慾 +憂 +憊 +憋 +憍 +憎 +憐 +憑 +憓 +憕 +憙 +憚 +憤 +憧 +憨 +憩 +憫 +憬 +憲 +憶 +憺 +憻 +憾 +懂 +懃 +懇 +懈 +應 +懋 +懌 +懍 +懐 +懣 +懦 +懮 +懲 +懵 +懶 +懷 +懸 +懺 +懼 +懽 +懾 +懿 +戀 +戇 +戈 +戊 +戌 +戍 +戎 +成 +我 +戒 +戔 +戕 +或 +戙 +戚 +戛 +戟 +戡 +戢 +戥 +戦 +戩 +截 +戮 +戰 +戱 +戲 +戳 +戴 +戶 +戸 +戻 +戽 +戾 +房 +所 +扁 +扆 +扇 +扈 +扉 +手 +扌 +才 +扎 +扒 +打 +扔 +托 +扙 +扛 +扞 +扣 +扥 +扦 +扭 +扮 +扯 +扳 +扶 +批 +扼 +找 +承 +技 +抃 +抄 +抇 +抉 +把 +抑 +抒 +抓 +投 +抖 +抗 +折 +抦 +披 +抬 +抱 +抵 +抹 +抻 +押 +抽 +抿 +拂 +拆 +拇 +拈 +拉 +拋 +拌 +拍 +拎 +拏 +拐 +拒 +拓 +拔 +拖 +拗 +拘 +拙 +拚 +招 +拜 +拝 +拡 +括 +拭 +拮 +拯 +拱 +拳 +拴 +拷 +拺 +拼 +拽 +拾 +拿 +持 +指 +按 +挎 +挑 +挖 +挙 +挨 +挪 +挫 +振 +挲 +挵 +挹 +挺 +挻 +挾 +捂 +捆 +捉 +捌 +捍 +捎 +捏 +捐 +捒 +捕 +捜 +捦 +捧 +捨 +捩 +捫 +捭 +捱 +捲 +捶 +捷 +捺 +捻 +掀 +掂 +掃 +掄 +掇 +授 +掉 +掌 +掏 +掐 +排 +掖 +掘 +掙 +掛 +掞 +掟 +掠 +採 +探 +掣 +接 +控 +推 +掩 +措 +掬 +掰 +掾 +揀 +揄 +揆 +揉 +揍 +描 +提 +插 +揔 +揖 +揚 +換 +握 +揪 +揭 +揮 +援 +揸 +揺 +損 +搏 +搐 +搓 +搔 +搖 +搗 +搜 +搞 +搠 +搢 +搪 +搬 +搭 +搳 +搴 +搵 +搶 +搽 +搾 +摂 +摒 +摔 +摘 +摜 +摞 +摟 +摠 +摧 +摩 +摭 +摯 +摳 +摴 +摵 +摶 +摸 +摹 +摺 +摻 +摽 +撃 +撇 +撈 +撐 +撒 +撓 +撕 +撖 +撙 +撚 +撞 +撣 +撤 +撥 +撩 +撫 +撬 +播 +撮 +撰 +撲 +撳 +撻 +撼 +撾 +撿 +擀 +擁 +擂 +擅 +擇 +擊 +擋 +操 +擎 +擒 +擔 +擘 +據 +擠 +擢 +擥 +擦 +擬 +擯 +擰 +擱 +擲 +擴 +擷 +擺 +擼 +擾 +攀 +攏 +攔 +攖 +攘 +攜 +攝 +攞 +攢 +攣 +攤 +攪 +攫 +攬 +支 +攴 +攵 +收 +攷 +攸 +改 +攻 +攽 +放 +政 +故 +效 +敍 +敎 +敏 +救 +敔 +敕 +敖 +敗 +敘 +教 +敝 +敞 +敟 +敢 +散 +敦 +敫 +敬 +敭 +敲 +整 +敵 +敷 +數 +敻 +敾 +斂 +斃 +文 +斌 +斎 +斐 +斑 +斕 +斖 +斗 +料 +斛 +斜 +斝 +斟 +斡 +斤 +斥 +斧 +斬 +斯 +新 +斷 +方 +於 +施 +斿 +旁 +旂 +旃 +旄 +旅 +旉 +旋 +旌 +旎 +族 +旖 +旗 +旙 +旛 +旡 +既 +日 +旦 +旨 +早 +旬 +旭 +旱 +旲 +旳 +旺 +旻 +旼 +旽 +旾 +旿 +昀 +昂 +昃 +昆 +昇 +昉 +昊 +昌 +昍 +明 +昏 +昐 +易 +昔 +昕 +昚 +昛 +昜 +昝 +昞 +星 +映 +昡 +昣 +昤 +春 +昧 +昨 +昪 +昫 +昭 +是 +昰 +昱 +昴 +昵 +昶 +昺 +晁 +時 +晃 +晈 +晉 +晊 +晏 +晗 +晙 +晚 +晛 +晝 +晞 +晟 +晤 +晦 +晧 +晨 +晩 +晪 +晫 +晭 +普 +景 +晰 +晳 +晴 +晶 +晷 +晸 +智 +晾 +暃 +暄 +暅 +暇 +暈 +暉 +暊 +暌 +暎 +暏 +暐 +暑 +暕 +暖 +暗 +暘 +暝 +暟 +暠 +暢 +暦 +暨 +暫 +暮 +暱 +暲 +暴 +暸 +暹 +暻 +暾 +曄 +曅 +曆 +曇 +曉 +曌 +曔 +曖 +曙 +曜 +曝 +曠 +曦 +曧 +曨 +曩 +曬 +曮 +曰 +曲 +曳 +更 +曶 +曷 +書 +曹 +曺 +曼 +曽 +曾 +替 +最 +會 +月 +有 +朊 +朋 +服 +朏 +朐 +朓 +朔 +朕 +朖 +朗 +望 +朝 +期 +朦 +朧 +木 +未 +末 +本 +札 +朱 +朴 +朵 +朶 +朽 +朿 +杁 +杉 +杋 +杌 +李 +杏 +材 +村 +杓 +杖 +杙 +杜 +杞 +束 +杠 +杣 +杤 +杧 +杬 +杭 +杯 +東 +杲 +杳 +杴 +杵 +杷 +杻 +杼 +松 +板 +极 +枇 +枉 +枋 +枏 +析 +枕 +枖 +林 +枚 +枛 +果 +枝 +枠 +枡 +枯 +枰 +枱 +枲 +枳 +架 +枷 +枸 +枹 +枼 +柁 +柃 +柄 +柉 +柊 +柎 +柏 +某 +柑 +柒 +染 +柔 +柘 +柚 +柜 +柝 +柞 +柟 +查 +柩 +柬 +柯 +柰 +柱 +柳 +柴 +柵 +柶 +柷 +査 +柾 +柿 +栃 +栄 +栐 +栒 +栓 +栜 +栝 +栞 +校 +栢 +栨 +栩 +株 +栲 +栴 +核 +根 +栻 +格 +栽 +桀 +桁 +桂 +桃 +桄 +桅 +框 +案 +桉 +桌 +桎 +桐 +桑 +桓 +桔 +桕 +桖 +桙 +桜 +桝 +桫 +桱 +桲 +桴 +桶 +桷 +桼 +桿 +梀 +梁 +梂 +梃 +梅 +梆 +梉 +梏 +梓 +梔 +梗 +梘 +條 +梟 +梠 +梢 +梣 +梧 +梨 +梫 +梭 +梯 +械 +梱 +梳 +梵 +梶 +梽 +棄 +棆 +棉 +棋 +棍 +棐 +棒 +棓 +棕 +棖 +棗 +棘 +棚 +棛 +棟 +棠 +棡 +棣 +棧 +棨 +棩 +棪 +棫 +森 +棱 +棲 +棵 +棶 +棹 +棺 +棻 +棼 +棽 +椅 +椆 +椇 +椋 +植 +椎 +椏 +椒 +椙 +椥 +椪 +椰 +椲 +椴 +椵 +椹 +椽 +椿 +楂 +楊 +楓 +楔 +楗 +楙 +楚 +楝 +楞 +楠 +楡 +楢 +楣 +楤 +楦 +楧 +楨 +楫 +業 +楮 +楯 +楳 +極 +楷 +楸 +楹 +楽 +楿 +概 +榆 +榊 +榍 +榎 +榑 +榔 +榕 +榖 +榗 +榘 +榛 +榜 +榞 +榢 +榣 +榤 +榦 +榧 +榨 +榫 +榭 +榮 +榲 +榴 +榷 +榻 +榿 +槀 +槁 +槃 +槊 +構 +槌 +槍 +槎 +槐 +槓 +槔 +槗 +様 +槙 +槤 +槩 +槭 +槰 +槱 +槲 +槳 +槺 +槻 +槼 +槽 +槿 +樀 +樁 +樂 +樅 +樆 +樊 +樋 +樑 +樓 +樗 +樘 +標 +樞 +樟 +模 +樣 +樨 +権 +樫 +樵 +樸 +樹 +樺 +樻 +樽 +樾 +橄 +橇 +橈 +橋 +橐 +橒 +橓 +橘 +橙 +橚 +機 +橡 +橢 +橪 +橫 +橿 +檀 +檄 +檇 +檉 +檊 +檎 +檐 +檔 +檗 +檜 +檞 +檠 +檡 +檢 +檣 +檦 +檨 +檫 +檬 +檯 +檳 +檵 +檸 +檻 +檽 +櫂 +櫃 +櫆 +櫈 +櫓 +櫚 +櫛 +櫞 +櫟 +櫥 +櫨 +櫪 +櫱 +櫸 +櫻 +櫾 +櫿 +欄 +欉 +權 +欏 +欒 +欖 +欞 +欠 +次 +欣 +欥 +欲 +欸 +欹 +欺 +欽 +款 +歆 +歇 +歉 +歊 +歌 +歎 +歐 +歓 +歙 +歛 +歡 +止 +正 +此 +步 +武 +歧 +歩 +歪 +歲 +歳 +歴 +歷 +歸 +歹 +死 +歿 +殂 +殃 +殄 +殆 +殉 +殊 +殑 +殖 +殘 +殛 +殞 +殟 +殤 +殭 +殮 +殯 +殲 +殳 +段 +殷 +殺 +殻 +殼 +殿 +毀 +毅 +毆 +毉 +毋 +毌 +母 +毎 +每 +毐 +毒 +毓 +比 +毖 +毗 +毘 +毛 +毫 +毬 +毯 +毴 +毸 +毽 +毿 +氂 +氈 +氍 +氏 +氐 +民 +氓 +氖 +気 +氘 +氙 +氚 +氛 +氟 +氣 +氦 +氧 +氨 +氪 +氫 +氬 +氮 +氯 +氰 +水 +氵 +氷 +永 +氹 +氻 +氽 +氾 +汀 +汁 +求 +汊 +汎 +汐 +汕 +汗 +汛 +汜 +汝 +汞 +江 +池 +污 +汧 +汨 +汩 +汪 +汭 +汰 +汲 +汴 +汶 +決 +汽 +汾 +沁 +沂 +沃 +沄 +沅 +沆 +沇 +沈 +沉 +沌 +沍 +沏 +沐 +沒 +沓 +沔 +沖 +沘 +沙 +沚 +沛 +沜 +沢 +沨 +沫 +沭 +沮 +沯 +沱 +河 +沸 +油 +沺 +治 +沼 +沽 +沾 +沿 +況 +泂 +泄 +泆 +泇 +泉 +泊 +泌 +泐 +泓 +泔 +法 +泖 +泗 +泚 +泛 +泠 +泡 +波 +泣 +泥 +泩 +泫 +泮 +泯 +泰 +泱 +泳 +泵 +洄 +洋 +洌 +洎 +洗 +洙 +洛 +洞 +洢 +洣 +洤 +津 +洨 +洩 +洪 +洮 +洱 +洲 +洳 +洵 +洸 +洹 +洺 +活 +洽 +派 +流 +浄 +浙 +浚 +浛 +浜 +浞 +浟 +浠 +浡 +浣 +浤 +浥 +浦 +浩 +浪 +浮 +浯 +浴 +浵 +海 +浸 +浹 +涅 +涇 +消 +涉 +涌 +涎 +涑 +涓 +涔 +涕 +涙 +涪 +涫 +涮 +涯 +液 +涵 +涸 +涼 +涿 +淄 +淅 +淆 +淇 +淋 +淌 +淍 +淎 +淏 +淑 +淓 +淖 +淘 +淙 +淚 +淛 +淝 +淞 +淠 +淡 +淤 +淥 +淦 +淨 +淩 +淪 +淫 +淬 +淮 +淯 +淰 +深 +淳 +淵 +淶 +混 +淸 +淹 +淺 +添 +淼 +淽 +渃 +清 +済 +渉 +渋 +渕 +渙 +渚 +減 +渝 +渟 +渠 +渡 +渣 +渤 +渥 +渦 +渫 +測 +渭 +港 +渲 +渴 +游 +渺 +渼 +渽 +渾 +湃 +湄 +湉 +湊 +湍 +湓 +湔 +湖 +湘 +湛 +湜 +湞 +湟 +湣 +湥 +湧 +湫 +湮 +湯 +湳 +湴 +湼 +満 +溁 +溇 +溈 +溉 +溋 +溎 +溏 +源 +準 +溙 +溜 +溝 +溟 +溢 +溥 +溦 +溧 +溪 +溫 +溯 +溱 +溲 +溴 +溵 +溶 +溺 +溼 +滀 +滁 +滂 +滄 +滅 +滇 +滈 +滉 +滋 +滌 +滎 +滏 +滑 +滓 +滔 +滕 +滘 +滙 +滝 +滬 +滯 +滲 +滴 +滷 +滸 +滹 +滻 +滽 +滾 +滿 +漁 +漂 +漆 +漇 +漈 +漎 +漏 +漓 +演 +漕 +漚 +漠 +漢 +漣 +漩 +漪 +漫 +漬 +漯 +漱 +漲 +漳 +漴 +漵 +漷 +漸 +漼 +漾 +漿 +潁 +潑 +潔 +潘 +潛 +潞 +潟 +潢 +潤 +潭 +潮 +潯 +潰 +潲 +潺 +潼 +潽 +潾 +潿 +澀 +澁 +澂 +澄 +澆 +澇 +澈 +澉 +澋 +澌 +澍 +澎 +澔 +澗 +澠 +澡 +澣 +澤 +澥 +澧 +澪 +澮 +澯 +澱 +澳 +澶 +澹 +澻 +激 +濁 +濂 +濃 +濉 +濊 +濋 +濕 +濘 +濙 +濛 +濞 +濟 +濠 +濡 +濤 +濫 +濬 +濮 +濯 +濰 +濱 +濲 +濶 +濺 +濼 +濾 +瀁 +瀅 +瀆 +瀉 +瀍 +瀏 +瀑 +瀔 +瀕 +瀘 +瀚 +瀛 +瀝 +瀞 +瀟 +瀠 +瀣 +瀦 +瀧 +瀨 +瀬 +瀰 +瀲 +瀴 +瀶 +瀹 +瀾 +灃 +灊 +灌 +灑 +灘 +灝 +灞 +灡 +灣 +灤 +灧 +火 +灰 +灴 +灸 +灼 +災 +炁 +炅 +炆 +炊 +炎 +炒 +炔 +炕 +炘 +炙 +炟 +炣 +炤 +炫 +炬 +炭 +炮 +炯 +炱 +炲 +炳 +炷 +炸 +為 +炻 +烈 +烉 +烊 +烋 +烏 +烒 +烔 +烘 +烙 +烜 +烝 +烤 +烯 +烱 +烴 +烷 +烹 +烺 +烽 +焃 +焄 +焉 +焊 +焌 +焓 +焗 +焙 +焚 +焜 +焞 +無 +焦 +焯 +焰 +焱 +焴 +然 +焻 +焼 +焿 +煇 +煉 +煊 +煌 +煎 +煐 +煒 +煔 +煕 +煖 +煙 +煚 +煜 +煞 +煠 +煤 +煥 +煦 +照 +煨 +煩 +煬 +煮 +煲 +煳 +煵 +煶 +煸 +煽 +熄 +熅 +熇 +熈 +熊 +熏 +熒 +熔 +熖 +熗 +熘 +熙 +熜 +熟 +熠 +熤 +熥 +熨 +熬 +熯 +熱 +熲 +熳 +熵 +熹 +熺 +熼 +熾 +熿 +燁 +燃 +燄 +燈 +燉 +燊 +燎 +燏 +燐 +燒 +燔 +燕 +燘 +燙 +燚 +燜 +燝 +營 +燥 +燦 +燧 +燫 +燬 +燭 +燮 +燴 +燹 +燻 +燼 +燾 +燿 +爀 +爆 +爌 +爍 +爐 +爔 +爚 +爛 +爝 +爨 +爪 +爬 +爭 +爯 +爰 +爲 +爵 +父 +爸 +爹 +爺 +爻 +爽 +爾 +爿 +牁 +牂 +牆 +片 +版 +牌 +牒 +牕 +牖 +牘 +牙 +牛 +牝 +牟 +牠 +牡 +牢 +牧 +物 +牯 +牲 +特 +牻 +牼 +牽 +犀 +犁 +犂 +犇 +犍 +犎 +犖 +犛 +犢 +犧 +犨 +犬 +犯 +犰 +犴 +犽 +狀 +狂 +狄 +狍 +狎 +狐 +狒 +狓 +狗 +狙 +狛 +狟 +狠 +狡 +狦 +狨 +狩 +狳 +狶 +狷 +狸 +狹 +狻 +狼 +猁 +猄 +猇 +猊 +猗 +猙 +猛 +猜 +猝 +猞 +猢 +猥 +猨 +猩 +猳 +猴 +猶 +猷 +猺 +猻 +猾 +猿 +獁 +獃 +獄 +獅 +獇 +獎 +獏 +獐 +獒 +獠 +獢 +獣 +獨 +獬 +獮 +獯 +獰 +獲 +獴 +獵 +獷 +獸 +獺 +獻 +獼 +獾 +玀 +玄 +玆 +率 +玉 +王 +玎 +玏 +玓 +玕 +玖 +玗 +玘 +玙 +玟 +玠 +玡 +玢 +玥 +玧 +玨 +玩 +玫 +玭 +玲 +玳 +玶 +玷 +玹 +玻 +玾 +珀 +珂 +珅 +珈 +珉 +珊 +珌 +珍 +珎 +珏 +珖 +珙 +珝 +珞 +珠 +珡 +珣 +珤 +珥 +珦 +珧 +珩 +珪 +班 +珮 +珵 +珹 +珺 +珽 +現 +琁 +球 +琄 +琅 +理 +琇 +琉 +琊 +琍 +琎 +琚 +琛 +琡 +琢 +琤 +琥 +琦 +琨 +琪 +琬 +琮 +琯 +琰 +琱 +琳 +琴 +琵 +琶 +琹 +琺 +琿 +瑀 +瑁 +瑂 +瑄 +瑅 +瑆 +瑈 +瑊 +瑋 +瑑 +瑒 +瑕 +瑗 +瑙 +瑚 +瑛 +瑜 +瑝 +瑞 +瑟 +瑠 +瑢 +瑣 +瑤 +瑥 +瑧 +瑨 +瑩 +瑪 +瑭 +瑯 +瑰 +瑱 +瑳 +瑴 +瑺 +瑾 +璀 +璁 +璃 +璄 +璆 +璇 +璈 +璉 +璋 +璌 +璐 +璕 +璘 +璙 +璚 +璜 +璞 +璟 +璠 +璡 +璣 +璥 +璦 +璧 +璨 +璩 +璪 +璫 +璬 +璮 +環 +璱 +璵 +璸 +璹 +璽 +璿 +瓈 +瓊 +瓌 +瓏 +瓑 +瓔 +瓖 +瓘 +瓚 +瓛 +瓜 +瓞 +瓠 +瓢 +瓣 +瓤 +瓦 +瓮 +瓴 +瓶 +瓷 +瓿 +甂 +甄 +甌 +甍 +甑 +甕 +甘 +甙 +甚 +甜 +生 +甡 +產 +産 +甥 +甦 +用 +甩 +甪 +甫 +甬 +甯 +田 +由 +甲 +申 +男 +甸 +甹 +町 +甾 +畀 +畇 +畈 +畊 +畋 +界 +畎 +畏 +畐 +畑 +畔 +留 +畜 +畝 +畠 +畢 +略 +畦 +畧 +番 +畫 +畬 +畯 +異 +畲 +畳 +畵 +當 +畷 +畸 +畹 +畿 +疃 +疆 +疇 +疊 +疋 +疌 +疍 +疏 +疑 +疒 +疕 +疙 +疚 +疝 +疣 +疤 +疥 +疫 +疲 +疳 +疵 +疸 +疹 +疼 +疽 +疾 +痂 +病 +症 +痊 +痍 +痔 +痕 +痘 +痙 +痛 +痞 +痟 +痠 +痢 +痣 +痤 +痧 +痩 +痰 +痱 +痲 +痴 +痹 +痺 +痿 +瘀 +瘁 +瘊 +瘋 +瘍 +瘓 +瘙 +瘜 +瘞 +瘟 +瘠 +瘡 +瘢 +瘤 +瘦 +瘧 +瘩 +瘰 +瘴 +瘺 +癀 +療 +癆 +癇 +癌 +癒 +癖 +癘 +癜 +癟 +癡 +癢 +癤 +癥 +癩 +癬 +癭 +癮 +癯 +癰 +癱 +癲 +癸 +発 +登 +發 +白 +百 +皂 +的 +皆 +皇 +皈 +皋 +皎 +皐 +皓 +皖 +皙 +皚 +皛 +皝 +皞 +皮 +皰 +皴 +皷 +皸 +皺 +皿 +盂 +盃 +盅 +盆 +盈 +益 +盋 +盌 +盎 +盒 +盔 +盛 +盜 +盞 +盟 +盡 +監 +盤 +盥 +盦 +盧 +盨 +盩 +盪 +盫 +目 +盯 +盱 +盲 +直 +盷 +相 +盹 +盺 +盼 +盾 +眀 +省 +眉 +看 +県 +眙 +眛 +眜 +眞 +真 +眠 +眥 +眨 +眩 +眭 +眯 +眵 +眶 +眷 +眸 +眺 +眼 +眾 +着 +睇 +睛 +睜 +睞 +睡 +睢 +督 +睥 +睦 +睨 +睪 +睫 +睭 +睹 +睺 +睽 +睾 +睿 +瞄 +瞅 +瞋 +瞌 +瞎 +瞑 +瞓 +瞞 +瞢 +瞥 +瞧 +瞪 +瞫 +瞬 +瞭 +瞰 +瞳 +瞻 +瞼 +瞽 +瞿 +矇 +矍 +矗 +矚 +矛 +矜 +矞 +矢 +矣 +知 +矧 +矩 +短 +矮 +矯 +石 +矸 +矽 +砂 +砋 +砌 +砍 +砒 +研 +砝 +砢 +砥 +砦 +砧 +砩 +砫 +砭 +砮 +砯 +砰 +砲 +砳 +破 +砵 +砷 +砸 +砼 +硂 +硃 +硅 +硇 +硏 +硐 +硒 +硓 +硚 +硜 +硝 +硤 +硨 +硫 +硬 +硭 +硯 +硼 +碁 +碇 +碉 +碌 +碎 +碑 +碓 +碕 +碗 +碘 +碚 +碟 +碡 +碣 +碧 +碩 +碪 +碭 +碰 +碲 +碳 +碴 +碶 +碸 +確 +碻 +碼 +碽 +碾 +磁 +磅 +磊 +磋 +磐 +磔 +磕 +磘 +磙 +磚 +磜 +磡 +磨 +磪 +磬 +磯 +磱 +磲 +磵 +磷 +磺 +磻 +磾 +礁 +礄 +礎 +礐 +礑 +礒 +礙 +礠 +礦 +礪 +礫 +礬 +礮 +礱 +礴 +示 +礻 +礽 +社 +祀 +祁 +祂 +祆 +祇 +祈 +祉 +祋 +祏 +祐 +祓 +祕 +祖 +祗 +祙 +祚 +祛 +祜 +祝 +神 +祟 +祠 +祥 +祧 +票 +祭 +祹 +祺 +祼 +祿 +禁 +禃 +禇 +禍 +禎 +福 +禑 +禓 +禔 +禕 +禘 +禛 +禟 +禠 +禤 +禦 +禧 +禨 +禩 +禪 +禮 +禰 +禱 +禵 +禹 +禺 +禼 +禽 +禾 +禿 +秀 +私 +秈 +秉 +秋 +科 +秒 +秕 +秘 +租 +秠 +秣 +秤 +秦 +秧 +秩 +秭 +秳 +秸 +移 +稀 +稅 +稈 +稉 +程 +稍 +稑 +稔 +稗 +稘 +稙 +稚 +稜 +稞 +稟 +稠 +種 +稱 +稲 +稷 +稹 +稺 +稻 +稼 +稽 +稾 +稿 +穀 +穂 +穆 +穈 +穉 +穌 +積 +穎 +穗 +穟 +穠 +穡 +穢 +穣 +穩 +穫 +穰 +穴 +穵 +究 +穹 +空 +穿 +突 +窄 +窅 +窈 +窋 +窒 +窕 +窖 +窗 +窘 +窟 +窠 +窣 +窨 +窩 +窪 +窮 +窯 +窰 +窶 +窺 +窿 +竄 +竅 +竇 +竈 +竊 +立 +竑 +站 +竜 +竟 +章 +竣 +童 +竦 +竩 +竭 +端 +競 +竹 +竺 +竻 +竿 +笄 +笆 +笈 +笏 +笑 +笘 +笙 +笛 +笞 +笠 +笥 +符 +笨 +笩 +笪 +第 +笭 +笮 +笯 +笱 +笳 +笹 +筅 +筆 +等 +筊 +筋 +筌 +筍 +筏 +筐 +筒 +答 +策 +筘 +筠 +筥 +筦 +筧 +筬 +筭 +筱 +筲 +筳 +筵 +筶 +筷 +筻 +箆 +箇 +箋 +箍 +箏 +箐 +箑 +箒 +箔 +箕 +算 +箜 +管 +箬 +箭 +箱 +箴 +箸 +節 +篁 +範 +篆 +篇 +築 +篊 +篋 +篌 +篔 +篙 +篝 +篠 +篡 +篤 +篥 +篦 +篩 +篪 +篭 +篯 +篳 +篷 +簀 +簃 +簇 +簉 +簋 +簍 +簑 +簕 +簗 +簞 +簠 +簡 +簧 +簪 +簫 +簷 +簸 +簹 +簺 +簽 +簾 +簿 +籀 +籃 +籌 +籍 +籐 +籙 +籛 +籜 +籝 +籟 +籠 +籣 +籤 +籥 +籪 +籬 +籮 +籲 +米 +籽 +籾 +粄 +粉 +粍 +粑 +粒 +粕 +粗 +粘 +粟 +粢 +粥 +粦 +粧 +粩 +粱 +粲 +粳 +粵 +粹 +粼 +粽 +精 +粿 +糀 +糅 +糊 +糌 +糍 +糎 +糕 +糖 +糙 +糜 +糝 +糞 +糟 +糠 +糢 +糧 +糬 +糯 +糰 +糴 +糶 +糸 +糹 +糺 +系 +糾 +紀 +紂 +約 +紅 +紆 +紇 +紈 +紉 +紊 +紋 +納 +紐 +紑 +紓 +純 +紕 +紗 +紘 +紙 +級 +紛 +紜 +紝 +紞 +素 +紡 +索 +紫 +紮 +累 +細 +紱 +紲 +紳 +紵 +紹 +紺 +紿 +終 +絃 +組 +絆 +経 +絎 +結 +絕 +絛 +絜 +絞 +絡 +絢 +給 +絨 +絪 +絮 +統 +絲 +絳 +絵 +絶 +絹 +絺 +綁 +綃 +綈 +綉 +綎 +綏 +經 +綖 +継 +続 +綜 +綝 +綞 +綠 +綢 +綣 +綦 +綧 +綫 +綬 +維 +綮 +綰 +綱 +網 +綳 +綴 +綸 +綺 +綻 +綽 +綾 +綿 +緁 +緃 +緄 +緈 +緊 +緋 +総 +緑 +緒 +緖 +緘 +線 +緜 +緝 +緞 +締 +緡 +緣 +緤 +編 +緩 +緬 +緯 +緱 +緲 +練 +緹 +緻 +縂 +縄 +縈 +縉 +縊 +縕 +縛 +縝 +縞 +縠 +縡 +縣 +縤 +縫 +縮 +縯 +縱 +縴 +縵 +縷 +縹 +縻 +總 +績 +繁 +繃 +繆 +繇 +繒 +織 +繕 +繖 +繙 +繚 +繞 +繡 +繩 +繪 +繫 +繭 +繰 +繳 +繹 +繻 +繼 +繽 +繾 +纁 +纂 +纈 +續 +纍 +纏 +纓 +纔 +纕 +纖 +纘 +纛 +纜 +缐 +缶 +缸 +缺 +缽 +罃 +罄 +罅 +罈 +罉 +罌 +罍 +罐 +罔 +罕 +罘 +罟 +罡 +罨 +罩 +罪 +置 +罰 +罱 +署 +罳 +罵 +罶 +罷 +罹 +罽 +羂 +羅 +羆 +羈 +羊 +羋 +羌 +美 +羔 +羕 +羗 +羙 +羚 +羞 +羡 +羣 +群 +羥 +羧 +羨 +義 +羯 +羰 +羱 +羲 +羸 +羹 +羽 +羿 +翀 +翁 +翂 +翃 +翅 +翊 +翌 +翎 +翏 +習 +翔 +翕 +翙 +翜 +翟 +翠 +翡 +翥 +翦 +翩 +翬 +翮 +翰 +翱 +翳 +翹 +翻 +翼 +耀 +老 +考 +耄 +者 +耆 +而 +耍 +耎 +耐 +耑 +耒 +耔 +耕 +耗 +耘 +耙 +耜 +耦 +耨 +耬 +耳 +耵 +耶 +耷 +耽 +耿 +聃 +聆 +聊 +聒 +聖 +聘 +聚 +聞 +聟 +聨 +聯 +聰 +聱 +聲 +聳 +聴 +聶 +職 +聽 +聾 +聿 +肄 +肅 +肆 +肇 +肉 +肋 +肌 +肏 +肖 +肘 +肚 +肛 +肜 +肝 +肟 +股 +肢 +肥 +肩 +肪 +肫 +肯 +肱 +育 +肸 +肹 +肺 +肼 +肽 +胂 +胃 +胄 +胅 +胇 +胊 +背 +胍 +胎 +胖 +胗 +胙 +胚 +胛 +胝 +胞 +胡 +胤 +胥 +胬 +胭 +胰 +胱 +胳 +胴 +胸 +胺 +胼 +能 +脂 +脅 +脆 +脇 +脈 +脊 +脒 +脖 +脘 +脛 +脣 +脩 +脫 +脬 +脭 +脯 +脲 +脳 +脷 +脹 +脾 +腆 +腈 +腊 +腋 +腌 +腎 +腐 +腑 +腓 +腔 +腕 +腥 +腦 +腧 +腩 +腫 +腮 +腰 +腱 +腳 +腴 +腸 +腹 +腺 +腿 +膀 +膂 +膈 +膊 +膏 +膚 +膛 +膜 +膝 +膠 +膣 +膥 +膦 +膨 +膩 +膮 +膳 +膺 +膽 +膾 +膿 +臀 +臂 +臃 +臆 +臉 +臊 +臍 +臏 +臘 +臚 +臞 +臟 +臠 +臣 +臧 +臨 +自 +臭 +臯 +至 +致 +臺 +臻 +臼 +臾 +舂 +舅 +與 +興 +舉 +舊 +舌 +舍 +舎 +舒 +舔 +舖 +舘 +舛 +舜 +舞 +舟 +舢 +舥 +舨 +舩 +航 +舫 +般 +舲 +舵 +舶 +舷 +舸 +船 +舺 +艅 +艇 +艉 +艋 +艎 +艏 +艔 +艘 +艙 +艚 +艦 +艮 +良 +艱 +色 +艶 +艷 +艸 +艽 +艾 +艿 +芃 +芊 +芋 +芍 +芎 +芑 +芒 +芘 +芙 +芛 +芝 +芡 +芥 +芨 +芩 +芪 +芫 +芬 +芭 +芮 +芯 +花 +芳 +芴 +芷 +芸 +芹 +芻 +芽 +芾 +苄 +苅 +苑 +苒 +苓 +苔 +苕 +苗 +苛 +苜 +苝 +苞 +苟 +苡 +苣 +苤 +若 +苦 +苧 +苪 +苫 +苯 +英 +苳 +苴 +苷 +苺 +苻 +苼 +苾 +茀 +茁 +茂 +范 +茄 +茅 +茆 +茇 +茈 +茉 +茌 +茗 +茘 +茚 +茛 +茜 +茝 +茨 +茫 +茬 +茭 +茮 +茯 +茱 +茲 +茴 +茵 +茶 +茷 +茸 +茹 +茺 +茼 +荀 +荃 +荅 +荇 +草 +荊 +荎 +荏 +荒 +荔 +荖 +荘 +荳 +荷 +荸 +荻 +荼 +荽 +莆 +莉 +莊 +莎 +莒 +莓 +莕 +莖 +莘 +莙 +莛 +莜 +莞 +莠 +莢 +莧 +莨 +莩 +莪 +莫 +莽 +莿 +菀 +菁 +菅 +菇 +菈 +菉 +菊 +菌 +菍 +菏 +菑 +菓 +菔 +菖 +菘 +菜 +菝 +菟 +菠 +菡 +菥 +菩 +菪 +菫 +華 +菰 +菱 +菲 +菴 +菶 +菸 +菹 +菺 +菼 +菽 +菾 +萁 +萃 +萄 +萇 +萊 +萌 +萍 +萎 +萐 +萘 +萜 +萠 +萡 +萣 +萩 +萬 +萭 +萱 +萵 +萸 +萹 +萼 +落 +葃 +葆 +葉 +葊 +葎 +葑 +葒 +著 +葙 +葚 +葛 +葜 +葝 +葡 +董 +葦 +葩 +葫 +葬 +葭 +葯 +葰 +葳 +葵 +葶 +葷 +葺 +蒂 +蒄 +蒍 +蒎 +蒐 +蒓 +蒔 +蒗 +蒙 +蒜 +蒞 +蒟 +蒡 +蒢 +蒤 +蒧 +蒨 +蒭 +蒯 +蒲 +蒴 +蒸 +蒹 +蒺 +蒻 +蒼 +蒽 +蒾 +蒿 +蓀 +蓁 +蓂 +蓄 +蓆 +蓉 +蓋 +蓍 +蓑 +蓓 +蓖 +蓘 +蓚 +蓧 +蓨 +蓪 +蓬 +蓭 +蓮 +蓯 +蓳 +蓼 +蓽 +蓿 +蔆 +蔎 +蔑 +蔓 +蔔 +蔕 +蔗 +蔘 +蔚 +蔝 +蔞 +蔡 +蔣 +蔥 +蔦 +蔬 +蔭 +蔴 +蔵 +蔻 +蔽 +蕁 +蕃 +蕅 +蕈 +蕉 +蕊 +蕎 +蕑 +蕒 +蕖 +蕘 +蕙 +蕚 +蕟 +蕡 +蕢 +蕤 +蕨 +蕩 +蕪 +蕭 +蕷 +蕹 +蕺 +蕻 +蕾 +薀 +薄 +薆 +薇 +薈 +薊 +薌 +薏 +薐 +薑 +薔 +薗 +薘 +薙 +薛 +薜 +薞 +薟 +薡 +薦 +薨 +薩 +薪 +薫 +薬 +薯 +薰 +薲 +薷 +薸 +薹 +薺 +薾 +薿 +藁 +藉 +藍 +藎 +藏 +藐 +藔 +藕 +藜 +藝 +藟 +藤 +藥 +藦 +藨 +藩 +藪 +藶 +藸 +藹 +藺 +藻 +藿 +蘂 +蘄 +蘅 +蘆 +蘇 +蘊 +蘋 +蘐 +蘑 +蘓 +蘗 +蘘 +蘚 +蘞 +蘢 +蘧 +蘩 +蘭 +蘵 +蘶 +蘸 +蘼 +蘿 +虉 +虎 +虐 +虓 +虔 +處 +虖 +虛 +虜 +虞 +號 +虢 +虧 +虨 +虯 +虱 +虵 +虹 +虺 +虻 +蚆 +蚊 +蚋 +蚌 +蚍 +蚓 +蚖 +蚜 +蚝 +蚡 +蚢 +蚣 +蚤 +蚧 +蚨 +蚩 +蚪 +蚯 +蚱 +蚴 +蚵 +蚶 +蚺 +蚼 +蛀 +蛄 +蛇 +蛉 +蛋 +蛍 +蛐 +蛑 +蛔 +蛙 +蛛 +蛞 +蛟 +蛤 +蛭 +蛯 +蛸 +蛹 +蛺 +蛻 +蛾 +蜀 +蜂 +蜃 +蜆 +蜇 +蜈 +蜉 +蜊 +蜍 +蜑 +蜒 +蜓 +蜘 +蜚 +蜛 +蜜 +蜞 +蜢 +蜣 +蜥 +蜨 +蜮 +蜯 +蜱 +蜴 +蜷 +蜻 +蜾 +蜿 +蝀 +蝌 +蝍 +蝎 +蝓 +蝕 +蝗 +蝘 +蝙 +蝚 +蝟 +蝠 +蝣 +蝤 +蝦 +蝨 +蝮 +蝯 +蝰 +蝲 +蝴 +蝶 +蝸 +蝽 +螂 +螃 +螄 +螅 +螈 +螋 +融 +螐 +螔 +螞 +螟 +螠 +螢 +螣 +螥 +螫 +螭 +螯 +螳 +螶 +螺 +螻 +螽 +螾 +蟀 +蟄 +蟅 +蟆 +蟊 +蟋 +蟌 +蟎 +蟑 +蟒 +蟜 +蟠 +蟥 +蟪 +蟫 +蟬 +蟯 +蟲 +蟳 +蟴 +蟶 +蟹 +蟻 +蟾 +蠂 +蠃 +蠄 +蠅 +蠆 +蠊 +蠋 +蠍 +蠐 +蠑 +蠓 +蠔 +蠕 +蠖 +蠘 +蠙 +蠟 +蠡 +蠢 +蠣 +蠱 +蠲 +蠵 +蠶 +蠷 +蠹 +蠻 +血 +衂 +衆 +行 +衍 +衎 +術 +衕 +衖 +街 +衙 +衚 +衛 +衜 +衝 +衞 +衡 +衢 +衣 +表 +衩 +衫 +衰 +衲 +衷 +衽 +衾 +衿 +袁 +袂 +袈 +袋 +袍 +袓 +袖 +袛 +袞 +袤 +袪 +被 +袱 +袴 +袾 +裁 +裂 +裊 +裎 +裒 +裔 +裕 +裖 +裘 +裙 +補 +裝 +裟 +裡 +裨 +裬 +裱 +裳 +裴 +裵 +裸 +裹 +製 +裾 +裿 +褀 +褂 +複 +褌 +褍 +褎 +褐 +褒 +褓 +褔 +褘 +褙 +褚 +褞 +褥 +褧 +褪 +褫 +褭 +褲 +褶 +褸 +褻 +襄 +襌 +襖 +襞 +襟 +襠 +襤 +襦 +襪 +襯 +襲 +襴 +襶 +襻 +襾 +西 +要 +覃 +覆 +覇 +覈 +見 +覌 +規 +覓 +視 +覚 +覡 +覦 +覧 +親 +覬 +覲 +観 +覺 +覽 +覿 +觀 +角 +觔 +觙 +觚 +觜 +解 +觭 +觱 +觴 +觶 +觸 +觿 +言 +訁 +訂 +訃 +訇 +計 +訊 +訌 +討 +訏 +訐 +訒 +訓 +訔 +訕 +訖 +託 +記 +訛 +訝 +訟 +訣 +訥 +訪 +設 +許 +訴 +訶 +診 +註 +証 +訾 +詁 +詆 +詈 +詐 +詒 +詔 +評 +詛 +詞 +詠 +詡 +詢 +詣 +詥 +試 +詧 +詩 +詫 +詭 +詮 +詰 +話 +該 +詳 +詵 +詹 +詼 +誄 +誅 +誇 +誌 +認 +誒 +誓 +誕 +誘 +語 +誠 +誡 +誣 +誤 +誥 +誦 +誨 +說 +説 +読 +誰 +課 +誴 +誹 +誼 +誾 +調 +談 +請 +諍 +諏 +諒 +論 +諗 +諜 +諟 +諠 +諡 +諤 +諦 +諧 +諪 +諫 +諭 +諮 +諱 +諲 +諳 +諴 +諶 +諷 +諸 +諺 +諼 +諾 +謀 +謁 +謂 +謄 +謇 +謊 +謌 +謎 +謏 +謐 +謔 +謖 +謗 +謙 +謚 +講 +謜 +謝 +謠 +謢 +謤 +謨 +謩 +謫 +謬 +謳 +謹 +謾 +證 +譏 +譓 +譔 +識 +譙 +譚 +譜 +譞 +警 +譫 +譬 +譭 +譯 +議 +譲 +譳 +譴 +護 +譽 +譿 +讀 +讃 +變 +讌 +讎 +讓 +讖 +讙 +讚 +讜 +讞 +谷 +谿 +豁 +豆 +豇 +豈 +豉 +豊 +豌 +豎 +豐 +豔 +豕 +豚 +象 +豢 +豨 +豪 +豫 +豬 +豳 +豸 +豹 +豺 +豿 +貂 +貅 +貉 +貊 +貌 +貐 +貒 +貓 +貔 +貘 +貝 +貞 +負 +財 +貢 +貤 +貧 +貨 +販 +貪 +貫 +責 +貭 +貮 +貯 +貲 +貳 +貴 +貶 +買 +貸 +貺 +費 +貼 +貽 +貿 +賀 +賁 +賂 +賃 +賄 +資 +賈 +賊 +賑 +賒 +賓 +賔 +賕 +賚 +賜 +賞 +賠 +賡 +賢 +賣 +賤 +賦 +賨 +質 +賬 +賭 +賴 +賹 +賺 +賻 +購 +賽 +賾 +贄 +贅 +贇 +贈 +贊 +贌 +贍 +贏 +贓 +贔 +贖 +贛 +赤 +赦 +赧 +赫 +赬 +赭 +走 +赳 +赴 +起 +趁 +超 +越 +趐 +趕 +趖 +趙 +趟 +趣 +趨 +足 +趴 +趵 +趺 +趼 +趾 +跅 +跆 +跋 +跌 +跏 +跑 +跖 +跗 +跛 +距 +跟 +跡 +跣 +跤 +跨 +跩 +跪 +路 +跳 +踎 +踏 +踐 +踝 +踞 +踢 +踩 +踰 +踴 +踹 +踺 +蹂 +蹄 +蹇 +蹈 +蹉 +蹊 +蹋 +蹕 +蹙 +蹟 +蹠 +蹤 +蹦 +蹬 +蹭 +蹯 +蹲 +蹴 +蹶 +蹺 +蹻 +蹼 +躁 +躂 +躄 +躉 +躋 +躍 +躑 +躒 +躔 +躝 +躪 +身 +躬 +躰 +躲 +躺 +軀 +車 +軋 +軌 +軍 +軎 +軒 +軔 +軛 +軟 +転 +軫 +軲 +軸 +軹 +軺 +軻 +軼 +軽 +軾 +較 +輄 +輅 +載 +輋 +輒 +輓 +輔 +輕 +輛 +輝 +輞 +輟 +輥 +輦 +輩 +輪 +輬 +輭 +輯 +輶 +輸 +輻 +輾 +輿 +轀 +轂 +轄 +轅 +轆 +轉 +轍 +轎 +轘 +轝 +轟 +轤 +辛 +辜 +辟 +辣 +辦 +辧 +辨 +辭 +辮 +辯 +辰 +辱 +農 +辵 +辺 +辻 +込 +迂 +迄 +迅 +迎 +近 +返 +迢 +迤 +迥 +迦 +迪 +迫 +迭 +迮 +述 +迴 +迵 +迷 +迸 +迺 +追 +退 +送 +逃 +逄 +逅 +逆 +逈 +逋 +逌 +逍 +逎 +透 +逐 +逑 +途 +逕 +逖 +逗 +這 +通 +逛 +逝 +逞 +速 +造 +逢 +連 +逤 +逨 +逮 +逯 +進 +逴 +逵 +逸 +逹 +逺 +逼 +逾 +遁 +遂 +遄 +遇 +遊 +運 +遍 +過 +遏 +遐 +遒 +道 +達 +違 +遘 +遙 +遛 +遜 +遞 +遠 +遢 +遣 +遨 +適 +遭 +遮 +遯 +遲 +遴 +遵 +遶 +遷 +選 +遹 +遺 +遼 +避 +邀 +邁 +邂 +邃 +還 +邇 +邈 +邉 +邊 +邋 +邏 +邑 +邕 +邗 +邙 +邛 +邠 +邡 +邢 +那 +邦 +邨 +邪 +邯 +邰 +邱 +邲 +邳 +邴 +邵 +邸 +邽 +邾 +郁 +郃 +郄 +郅 +郇 +郊 +郋 +郎 +郗 +郛 +郜 +郝 +郞 +郟 +郡 +郢 +郤 +部 +郪 +郫 +郭 +郯 +郳 +郴 +郵 +郷 +都 +郾 +郿 +鄂 +鄃 +鄄 +鄆 +鄉 +鄋 +鄑 +鄒 +鄔 +鄖 +鄗 +鄘 +鄙 +鄚 +鄜 +鄞 +鄠 +鄢 +鄣 +鄤 +鄧 +鄩 +鄫 +鄭 +鄯 +鄰 +鄱 +鄲 +鄳 +鄴 +鄺 +酃 +酆 +酈 +酉 +酊 +酋 +酌 +配 +酎 +酏 +酐 +酒 +酔 +酗 +酚 +酞 +酡 +酢 +酣 +酥 +酩 +酪 +酬 +酮 +酯 +酰 +酴 +酵 +酶 +酷 +酸 +酺 +酼 +醁 +醂 +醃 +醅 +醇 +醉 +醋 +醌 +醍 +醐 +醒 +醚 +醛 +醜 +醞 +醢 +醣 +醪 +醫 +醬 +醮 +醯 +醴 +醺 +醾 +醿 +釀 +釁 +釆 +采 +釉 +釋 +里 +重 +野 +量 +釐 +金 +釒 +釓 +釔 +釕 +釗 +釘 +釙 +釚 +釜 +針 +釣 +釤 +釦 +釧 +釩 +釪 +釭 +釴 +釵 +釷 +釹 +釺 +鈀 +鈁 +鈄 +鈇 +鈈 +鈉 +鈊 +鈍 +鈏 +鈐 +鈑 +鈔 +鈕 +鈖 +鈞 +鈢 +鈣 +鈥 +鈦 +鈫 +鈮 +鈰 +鈳 +鈴 +鈷 +鈸 +鈹 +鈺 +鈾 +鈿 +鉀 +鉄 +鉅 +鉆 +鉈 +鉉 +鉋 +鉌 +鉍 +鉏 +鉑 +鉓 +鉗 +鉚 +鉛 +鉞 +鉟 +鉤 +鉦 +鉬 +鉭 +鉲 +鉶 +鉷 +鉸 +鉻 +鉾 +鉿 +銀 +銂 +銃 +銅 +銋 +銍 +銑 +銓 +銕 +銖 +銘 +銚 +銜 +銠 +銣 +銥 +銦 +銨 +銩 +銪 +銫 +銬 +銭 +銱 +銲 +銳 +銶 +銷 +銹 +銻 +銼 +銾 +鋁 +鋅 +鋆 +鋇 +鋌 +鋏 +鋐 +鋒 +鋕 +鋗 +鋙 +鋡 +鋤 +鋥 +鋦 +鋨 +鋪 +鋮 +鋯 +鋰 +鋱 +鋳 +鋶 +鋸 +鋹 +鋼 +錀 +錄 +錏 +錐 +錒 +錕 +錘 +錚 +錞 +錟 +錠 +錡 +錢 +錦 +錨 +錫 +錬 +錮 +錯 +錳 +錶 +錸 +錻 +鍀 +鍇 +鍈 +鍉 +鍊 +鍋 +鍍 +鍏 +鍔 +鍘 +鍛 +鍝 +鍟 +鍠 +鍥 +鍩 +鍬 +鍱 +鍳 +鍵 +鍶 +鍷 +鍺 +鍼 +鍾 +鎂 +鎅 +鎊 +鎌 +鎏 +鎓 +鎔 +鎖 +鎗 +鎘 +鎚 +鎛 +鎢 +鎣 +鎦 +鎧 +鎪 +鎬 +鎭 +鎮 +鎰 +鎳 +鎵 +鎻 +鏃 +鏇 +鏈 +鏊 +鏌 +鏐 +鏑 +鏓 +鏖 +鏗 +鏘 +鏜 +鏝 +鏞 +鏟 +鏡 +鏢 +鏤 +鏦 +鏳 +鏴 +鏵 +鏷 +鏻 +鏽 +鐃 +鐇 +鐈 +鐓 +鐔 +鐘 +鐙 +鐠 +鐡 +鐤 +鐦 +鐧 +鐫 +鐬 +鐭 +鐮 +鐲 +鐳 +鐵 +鐸 +鐺 +鐽 +鐿 +鑀 +鑁 +鑂 +鑄 +鑅 +鑊 +鑌 +鑑 +鑒 +鑛 +鑠 +鑣 +鑨 +鑪 +鑫 +鑭 +鑰 +鑲 +鑴 +鑷 +鑼 +鑽 +鑾 +鑿 +長 +門 +閂 +閃 +閆 +閉 +開 +閎 +閏 +閑 +閒 +間 +閔 +閘 +閜 +閞 +閟 +関 +閣 +閥 +閦 +閨 +閩 +閬 +閭 +閰 +閱 +閶 +閹 +閻 +閼 +閾 +閿 +闆 +闇 +闈 +闊 +闋 +闌 +闍 +闐 +闓 +闔 +闕 +闖 +闘 +關 +闞 +闡 +闢 +闥 +阜 +阝 +阡 +阪 +阭 +阮 +阯 +阱 +防 +阻 +阿 +陀 +陁 +陂 +附 +陋 +陌 +降 +限 +陔 +陘 +陛 +陜 +陝 +陞 +陟 +陡 +院 +陣 +除 +陪 +陬 +陰 +陲 +陳 +陵 +陶 +陷 +陸 +険 +陽 +隄 +隅 +隆 +隈 +隊 +隋 +隍 +階 +隔 +隕 +隗 +隘 +隙 +際 +障 +隣 +隧 +隨 +險 +隰 +隱 +隲 +隳 +隴 +隷 +隸 +隹 +隻 +隼 +雀 +雁 +雄 +雅 +集 +雇 +雉 +雋 +雌 +雍 +雎 +雑 +雒 +雕 +雖 +雙 +雛 +雜 +雝 +雞 +離 +難 +雨 +雩 +雪 +雫 +雯 +雱 +雲 +零 +雷 +雹 +電 +需 +霄 +霅 +霆 +震 +霈 +霉 +霊 +霍 +霎 +霏 +霑 +霓 +霖 +霙 +霜 +霞 +霤 +霧 +霨 +霰 +露 +霶 +霸 +霹 +霽 +霾 +靁 +靂 +靄 +靈 +靉 +靑 +青 +靖 +靚 +靛 +靜 +非 +靠 +靡 +面 +革 +靫 +靬 +靭 +靳 +靴 +靶 +靺 +靼 +鞅 +鞆 +鞋 +鞍 +鞏 +鞘 +鞞 +鞠 +鞣 +鞥 +鞦 +鞨 +鞭 +鞮 +鞴 +韁 +韃 +韆 +韋 +韌 +韑 +韓 +韙 +韜 +韞 +韠 +韡 +韭 +韮 +音 +韶 +韺 +韻 +韾 +響 +頁 +頂 +頃 +項 +順 +須 +頊 +頌 +頍 +頎 +頏 +預 +頑 +頒 +頓 +頔 +頗 +領 +頜 +頠 +頡 +頤 +頦 +頫 +頭 +頰 +頴 +頵 +頷 +頸 +頹 +頻 +頼 +顆 +題 +額 +顎 +顏 +顒 +顓 +顔 +顕 +顗 +願 +顙 +顛 +類 +顥 +顧 +顫 +顯 +顰 +顱 +顳 +顴 +風 +颮 +颯 +颱 +颶 +颺 +颼 +飄 +飆 +飈 +飛 +食 +飠 +飡 +飢 +飥 +飩 +飪 +飫 +飬 +飭 +飮 +飯 +飲 +飴 +飼 +飽 +飾 +餃 +餄 +餅 +餉 +養 +餌 +餎 +餐 +餒 +餓 +餗 +餘 +餚 +餛 +餞 +餠 +餡 +館 +餮 +餵 +餺 +餾 +餿 +饃 +饅 +饋 +饌 +饑 +饒 +饕 +饗 +饞 +饟 +饢 +首 +馗 +馘 +香 +馛 +馥 +馦 +馨 +馬 +馭 +馮 +馯 +馱 +馳 +馴 +馼 +駁 +駄 +駅 +駆 +駐 +駑 +駒 +駔 +駕 +駘 +駙 +駛 +駝 +駟 +駢 +駭 +駰 +駱 +駿 +騁 +騂 +騄 +騅 +騋 +騎 +騏 +験 +騖 +騙 +騤 +騨 +騫 +騭 +騮 +騰 +騶 +騷 +騾 +驁 +驃 +驄 +驅 +驊 +驌 +驍 +驎 +驒 +驕 +驗 +驚 +驛 +驟 +驢 +驤 +驥 +驩 +驪 +骨 +骯 +骰 +骶 +骷 +骸 +骼 +髀 +髂 +髎 +髏 +髑 +髒 +髓 +體 +高 +髙 +髡 +髦 +髪 +髭 +髮 +髯 +髲 +髷 +髹 +髻 +鬃 +鬄 +鬅 +鬆 +鬍 +鬚 +鬟 +鬢 +鬣 +鬥 +鬧 +鬨 +鬩 +鬪 +鬬 +鬮 +鬯 +鬱 +鬲 +鬹 +鬻 +鬼 +魁 +魂 +魃 +魄 +魅 +魈 +魋 +魍 +魎 +魏 +魔 +魕 +魘 +魚 +魛 +魞 +魟 +魣 +魨 +魩 +魮 +魯 +魴 +魷 +鮀 +鮁 +鮃 +鮄 +鮊 +鮋 +鮍 +鮐 +鮑 +鮒 +鮓 +鮗 +鮜 +鮟 +鮠 +鮡 +鮣 +鮨 +鮪 +鮫 +鮭 +鮮 +鮰 +鮸 +鮹 +鮻 +鯀 +鯁 +鯃 +鯇 +鯉 +鯊 +鯏 +鯒 +鯓 +鯔 +鯕 +鯖 +鯗 +鯙 +鯛 +鯡 +鯢 +鯤 +鯧 +鯨 +鯪 +鯭 +鯮 +鯰 +鯶 +鯷 +鯻 +鯽 +鯿 +鰂 +鰃 +鰆 +鰈 +鰉 +鰍 +鰏 +鰒 +鰓 +鰕 +鰗 +鰛 +鰜 +鰟 +鰣 +鰤 +鰧 +鰨 +鰩 +鰭 +鰮 +鰱 +鰲 +鰳 +鰶 +鰷 +鰹 +鰺 +鰻 +鰼 +鰾 +鱀 +鱂 +鱅 +鱇 +鱈 +鱉 +鱊 +鱒 +鱓 +鱔 +鱖 +鱗 +鱘 +鱚 +鱝 +鱟 +鱠 +鱣 +鱥 +鱧 +鱨 +鱬 +鱮 +鱰 +鱲 +鱵 +鱷 +鱸 +鱺 +鱻 +鳥 +鳧 +鳩 +鳯 +鳰 +鳳 +鳴 +鳶 +鳽 +鴆 +鴇 +鴉 +鴒 +鴓 +鴕 +鴗 +鴛 +鴝 +鴞 +鴟 +鴡 +鴣 +鴦 +鴨 +鴫 +鴯 +鴰 +鴴 +鴻 +鴿 +鵂 +鵄 +鵎 +鵐 +鵑 +鵒 +鵓 +鵙 +鵜 +鵝 +鵞 +鵟 +鵠 +鵡 +鵪 +鵬 +鵯 +鵰 +鵲 +鵵 +鵼 +鵾 +鶆 +鶇 +鶉 +鶏 +鶒 +鶓 +鶘 +鶚 +鶡 +鶥 +鶩 +鶬 +鶯 +鶲 +鶴 +鶹 +鶺 +鶻 +鶼 +鶿 +鷂 +鷄 +鷉 +鷎 +鷓 +鷗 +鷙 +鷚 +鷟 +鷥 +鷦 +鷫 +鷯 +鷲 +鷳 +鷸 +鷹 +鷺 +鸊 +鸌 +鸐 +鸑 +鸕 +鸘 +鸚 +鸛 +鸜 +鸝 +鸞 +鹮 +鹵 +鹹 +鹼 +鹽 +鹿 +麂 +麅 +麇 +麈 +麊 +麋 +麐 +麒 +麓 +麗 +麝 +麞 +麟 +麥 +麩 +麪 +麯 +麴 +麵 +麹 +麺 +麻 +麼 +麽 +麾 +麿 +黁 +黃 +黇 +黌 +黍 +黎 +黏 +黐 +黑 +黒 +黔 +默 +黙 +黛 +黜 +黝 +點 +黟 +黥 +黧 +黨 +黯 +黴 +黶 +黻 +黼 +黽 +黿 +鼂 +鼇 +鼈 +鼉 +鼎 +鼐 +鼒 +鼓 +鼕 +鼙 +鼠 +鼢 +鼩 +鼬 +鼯 +鼱 +鼴 +鼷 +鼻 +鼽 +鼾 +齊 +齋 +齒 +齕 +齡 +齣 +齦 +齧 +齲 +齶 +龍 +龎 +龐 +龑 +龔 +龕 +龜 +龝 +龠 +龢 +郎 +凉 +﹑ +﹗ +﹝ +﹞ +﹢ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +A +B +C +D +E +F +G +H +I +K +L +M +N +O +P +R +S +T +U +V +W +Y +Z +[ +] +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +r +s +t +u +z +{ +| +} +~ +¥ +𣇉 + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt new file mode 100644 index 00000000..2b6f6649 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt @@ -0,0 +1,163 @@ + +! +# +$ +% +& +' +( ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +_ +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +É +é +Ё +Є +І +Ј +Љ +Ў +А +Б +В +Г +Д +Е +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +ё +ђ +є +і +ј +љ +њ +ћ +ў +џ +Ґ +ґ diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt new file mode 100644 index 00000000..f5592306 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt @@ -0,0 +1,167 @@ + +! +# +$ +% +& +' +( ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +_ +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +É +é +ँ +ं +ः +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +ऩ +प +फ +ब +भ +म +य +र +ऱ +ल +ळ +व +श +ष +स +ह +़ +ा +ि +ी +ु +ू +ृ +ॅ +े +ै +ॉ +ो +ौ +् +॒ +क़ +ख़ +ग़ +ज़ +ड़ +ढ़ +फ़ +ॠ +। +० +१ +२ +३ +४ +५ +६ +७ +८ +९ +॰ diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt new file mode 100644 index 00000000..7677d31b --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt @@ -0,0 +1,95 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt new file mode 100644 index 00000000..339d4b89 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt @@ -0,0 +1,4399 @@ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +] +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +© +° +² +´ +½ +Á +Ä +Å +Ç +È +É +Í +Ó +Ö +× +Ü +ß +à +á +â +ã +ä +å +æ +ç +è +é +ê +ë +í +ð +ñ +ò +ó +ô +õ +ö +ø +ú +û +ü +ý +ā +ă +ą +ć +Č +č +đ +ē +ė +ę +ğ +ī +ı +Ł +ł +ń +ň +ō +ř +Ş +ş +Š +š +ţ +ū +ż +Ž +ž +Ș +ș +ț +Δ +α +λ +μ +φ +Г +О +а +в +л +о +р +с +т +я +ồ +​ +— +― +’ +“ +” +… +℃ +→ +∇ +− +■ +☆ +  +、 +。 +々 +〆 +〈 +〉 +「 +」 +『 +』 +〔 +〕 +〜 +ぁ +あ +ぃ +い +う +ぇ +え +ぉ +お +か +が +き +ぎ +く +ぐ +け +げ +こ +ご +さ +ざ +し +じ +す +ず +せ +ぜ +そ +ぞ +た +だ +ち +ぢ +っ +つ +づ +て +で +と +ど +な +に +ぬ +ね +の +は +ば +ぱ +ひ +び +ぴ +ふ +ぶ +ぷ +へ +べ +ぺ +ほ +ぼ +ぽ +ま +み +む +め +も +ゃ +や +ゅ +ゆ +ょ +よ +ら +り +る +れ +ろ +わ +ゑ +を +ん +ゝ +ゞ +ァ +ア +ィ +イ +ゥ +ウ +ェ +エ +ォ +オ +カ +ガ +キ +ギ +ク +グ +ケ +ゲ +コ +ゴ +サ +ザ +シ +ジ +ス +ズ +セ +ゼ +ソ +ゾ +タ +ダ +チ +ヂ +ッ +ツ +ヅ +テ +デ +ト +ド +ナ +ニ +ヌ +ネ +ノ +ハ +バ +パ +ヒ +ビ +ピ +フ +ブ +プ +ヘ +ベ +ペ +ホ +ボ +ポ +マ +ミ +ム +メ +モ +ャ +ヤ +ュ +ユ +ョ +ヨ +ラ +リ +ル +レ +ロ +ワ +ヰ +ン +ヴ +ヵ +ヶ +・ +ー +㈱ +一 +丁 +七 +万 +丈 +三 +上 +下 +不 +与 +丑 +且 +世 +丘 +丙 +丞 +両 +並 +中 +串 +丸 +丹 +主 +丼 +丿 +乃 +久 +之 +乎 +乏 +乗 +乘 +乙 +九 +乞 +也 +乱 +乳 +乾 +亀 +了 +予 +争 +事 +二 +于 +互 +五 +井 +亘 +亙 +些 +亜 +亟 +亡 +交 +亥 +亦 +亨 +享 +京 +亭 +亮 +人 +什 +仁 +仇 +今 +介 +仍 +仏 +仔 +仕 +他 +仗 +付 +仙 +代 +令 +以 +仮 +仰 +仲 +件 +任 +企 +伊 +伍 +伎 +伏 +伐 +休 +会 +伝 +伯 +估 +伴 +伶 +伸 +伺 +似 +伽 +佃 +但 +位 +低 +住 +佐 +佑 +体 +何 +余 +佚 +佛 +作 +佩 +佳 +併 +佶 +使 +侈 +例 +侍 +侏 +侑 +侘 +供 +依 +侠 +価 +侮 +侯 +侵 +侶 +便 +係 +促 +俄 +俊 +俔 +俗 +俘 +保 +信 +俣 +俤 +修 +俯 +俳 +俵 +俸 +俺 +倉 +個 +倍 +倒 +候 +借 +倣 +値 +倫 +倭 +倶 +倹 +偃 +假 +偈 +偉 +偏 +偐 +偕 +停 +健 +側 +偵 +偶 +偽 +傀 +傅 +傍 +傑 +傘 +備 +催 +傭 +傲 +傳 +債 +傷 +傾 +僊 +働 +像 +僑 +僕 +僚 +僧 +僭 +僮 +儀 +億 +儇 +儒 +儛 +償 +儡 +優 +儲 +儺 +儼 +兀 +允 +元 +兄 +充 +兆 +先 +光 +克 +兌 +免 +兎 +児 +党 +兜 +入 +全 +八 +公 +六 +共 +兵 +其 +具 +典 +兼 +内 +円 +冊 +再 +冑 +冒 +冗 +写 +冠 +冤 +冥 +冨 +冬 +冲 +决 +冶 +冷 +准 +凉 +凋 +凌 +凍 +凛 +凝 +凞 +几 +凡 +処 +凪 +凰 +凱 +凶 +凸 +凹 +出 +函 +刀 +刃 +分 +切 +刈 +刊 +刎 +刑 +列 +初 +判 +別 +利 +刪 +到 +制 +刷 +券 +刹 +刺 +刻 +剃 +則 +削 +剋 +前 +剖 +剛 +剣 +剤 +剥 +剪 +副 +剰 +割 +創 +剽 +劇 +劉 +劔 +力 +功 +加 +劣 +助 +努 +劫 +劭 +励 +労 +効 +劾 +勃 +勅 +勇 +勉 +勒 +動 +勘 +務 +勝 +募 +勢 +勤 +勧 +勲 +勺 +勾 +勿 +匁 +匂 +包 +匏 +化 +北 +匙 +匝 +匠 +匡 +匣 +匯 +匲 +匹 +区 +医 +匿 +十 +千 +升 +午 +卉 +半 +卍 +卑 +卒 +卓 +協 +南 +単 +博 +卜 +占 +卦 +卯 +印 +危 +即 +却 +卵 +卸 +卿 +厄 +厚 +原 +厠 +厨 +厩 +厭 +厳 +去 +参 +又 +叉 +及 +友 +双 +反 +収 +叔 +取 +受 +叙 +叛 +叟 +叡 +叢 +口 +古 +句 +叩 +只 +叫 +召 +可 +台 +叱 +史 +右 +叶 +号 +司 +吃 +各 +合 +吉 +吊 +同 +名 +后 +吏 +吐 +向 +君 +吝 +吟 +吠 +否 +含 +吸 +吹 +吻 +吽 +吾 +呂 +呆 +呈 +呉 +告 +呑 +周 +呪 +呰 +味 +呼 +命 +咀 +咄 +咋 +和 +咒 +咫 +咲 +咳 +咸 +哀 +品 +哇 +哉 +員 +哨 +哩 +哭 +哲 +哺 +唄 +唆 +唇 +唐 +唖 +唯 +唱 +唳 +唸 +唾 +啄 +商 +問 +啓 +啼 +善 +喋 +喚 +喜 +喝 +喧 +喩 +喪 +喫 +喬 +單 +喰 +営 +嗅 +嗇 +嗔 +嗚 +嗜 +嗣 +嘆 +嘉 +嘗 +嘘 +嘩 +嘯 +嘱 +嘲 +嘴 +噂 +噌 +噛 +器 +噴 +噺 +嚆 +嚢 +囀 +囃 +囉 +囚 +四 +回 +因 +団 +困 +囲 +図 +固 +国 +圀 +圃 +國 +圏 +園 +圓 +團 +圜 +土 +圧 +在 +圭 +地 +址 +坂 +均 +坊 +坐 +坑 +坡 +坤 +坦 +坪 +垂 +型 +垢 +垣 +埃 +埋 +城 +埒 +埔 +域 +埠 +埴 +埵 +執 +培 +基 +埼 +堀 +堂 +堅 +堆 +堕 +堤 +堪 +堯 +堰 +報 +場 +堵 +堺 +塀 +塁 +塊 +塑 +塔 +塗 +塘 +塙 +塚 +塞 +塩 +填 +塵 +塾 +境 +墉 +墓 +増 +墜 +墟 +墨 +墳 +墺 +墻 +墾 +壁 +壇 +壊 +壌 +壕 +士 +壬 +壮 +声 +壱 +売 +壷 +壹 +壺 +壽 +変 +夏 +夕 +外 +夙 +多 +夜 +夢 +夥 +大 +天 +太 +夫 +夬 +夭 +央 +失 +夷 +夾 +奄 +奇 +奈 +奉 +奎 +奏 +契 +奔 +奕 +套 +奘 +奠 +奢 +奥 +奨 +奪 +奮 +女 +奴 +奸 +好 +如 +妃 +妄 +妊 +妍 +妓 +妖 +妙 +妥 +妨 +妬 +妲 +妹 +妻 +妾 +姉 +始 +姐 +姓 +委 +姚 +姜 +姞 +姥 +姦 +姨 +姪 +姫 +姶 +姻 +姿 +威 +娑 +娘 +娟 +娠 +娩 +娯 +娼 +婆 +婉 +婚 +婢 +婦 +婬 +婿 +媄 +媒 +媓 +媚 +媛 +媞 +媽 +嫁 +嫄 +嫉 +嫌 +嫐 +嫗 +嫡 +嬉 +嬌 +嬢 +嬪 +嬬 +嬾 +孁 +子 +孔 +字 +存 +孚 +孝 +孟 +季 +孤 +学 +孫 +孵 +學 +宅 +宇 +守 +安 +宋 +完 +宍 +宏 +宕 +宗 +官 +宙 +定 +宛 +宜 +宝 +実 +客 +宣 +室 +宥 +宮 +宰 +害 +宴 +宵 +家 +宸 +容 +宿 +寂 +寄 +寅 +密 +寇 +富 +寒 +寓 +寔 +寛 +寝 +察 +寡 +實 +寧 +審 +寮 +寵 +寶 +寸 +寺 +対 +寿 +封 +専 +射 +将 +尉 +尊 +尋 +對 +導 +小 +少 +尖 +尚 +尤 +尪 +尭 +就 +尹 +尺 +尻 +尼 +尽 +尾 +尿 +局 +居 +屈 +届 +屋 +屍 +屎 +屏 +屑 +屓 +展 +属 +屠 +層 +履 +屯 +山 +岐 +岑 +岡 +岩 +岫 +岬 +岳 +岷 +岸 +峠 +峡 +峨 +峯 +峰 +島 +峻 +崇 +崋 +崎 +崑 +崖 +崗 +崛 +崩 +嵌 +嵐 +嵩 +嵯 +嶂 +嶋 +嶠 +嶺 +嶼 +嶽 +巀 +巌 +巒 +巖 +川 +州 +巡 +巣 +工 +左 +巧 +巨 +巫 +差 +己 +巳 +巴 +巷 +巻 +巽 +巾 +市 +布 +帆 +希 +帖 +帚 +帛 +帝 +帥 +師 +席 +帯 +帰 +帳 +帷 +常 +帽 +幄 +幅 +幇 +幌 +幔 +幕 +幟 +幡 +幢 +幣 +干 +平 +年 +并 +幸 +幹 +幻 +幼 +幽 +幾 +庁 +広 +庄 +庇 +床 +序 +底 +庖 +店 +庚 +府 +度 +座 +庫 +庭 +庵 +庶 +康 +庸 +廂 +廃 +廉 +廊 +廓 +廟 +廠 +廣 +廬 +延 +廷 +建 +廻 +廼 +廿 +弁 +弄 +弉 +弊 +弌 +式 +弐 +弓 +弔 +引 +弖 +弗 +弘 +弛 +弟 +弥 +弦 +弧 +弱 +張 +強 +弼 +弾 +彈 +彊 +彌 +彎 +当 +彗 +彙 +彝 +形 +彦 +彩 +彫 +彬 +彭 +彰 +影 +彷 +役 +彼 +往 +征 +徂 +径 +待 +律 +後 +徐 +徑 +徒 +従 +得 +徠 +御 +徧 +徨 +復 +循 +徭 +微 +徳 +徴 +德 +徹 +徽 +心 +必 +忉 +忌 +忍 +志 +忘 +忙 +応 +忠 +快 +忯 +念 +忻 +忽 +忿 +怒 +怖 +思 +怠 +怡 +急 +性 +怨 +怪 +怯 +恂 +恋 +恐 +恒 +恕 +恣 +恤 +恥 +恨 +恩 +恬 +恭 +息 +恵 +悉 +悌 +悍 +悔 +悟 +悠 +患 +悦 +悩 +悪 +悲 +悼 +情 +惇 +惑 +惚 +惜 +惟 +惠 +惣 +惧 +惨 +惰 +想 +惹 +惺 +愈 +愉 +愍 +意 +愔 +愚 +愛 +感 +愷 +愿 +慈 +態 +慌 +慎 +慕 +慢 +慣 +慧 +慨 +慮 +慰 +慶 +憂 +憎 +憐 +憑 +憙 +憤 +憧 +憩 +憬 +憲 +憶 +憾 +懇 +應 +懌 +懐 +懲 +懸 +懺 +懽 +懿 +戈 +戊 +戌 +戎 +成 +我 +戒 +戔 +或 +戚 +戟 +戦 +截 +戮 +戯 +戴 +戸 +戻 +房 +所 +扁 +扇 +扈 +扉 +手 +才 +打 +払 +托 +扮 +扱 +扶 +批 +承 +技 +抄 +把 +抑 +抓 +投 +抗 +折 +抜 +択 +披 +抱 +抵 +抹 +押 +抽 +担 +拇 +拈 +拉 +拍 +拏 +拐 +拒 +拓 +拘 +拙 +招 +拝 +拠 +拡 +括 +拭 +拳 +拵 +拶 +拾 +拿 +持 +挂 +指 +按 +挑 +挙 +挟 +挨 +振 +挺 +挽 +挿 +捉 +捕 +捗 +捜 +捧 +捨 +据 +捺 +捻 +掃 +掄 +授 +掌 +排 +掖 +掘 +掛 +掟 +採 +探 +掣 +接 +控 +推 +掩 +措 +掬 +掲 +掴 +掻 +掾 +揃 +揄 +揆 +揉 +描 +提 +揖 +揚 +換 +握 +揮 +援 +揶 +揺 +損 +搦 +搬 +搭 +携 +搾 +摂 +摘 +摩 +摸 +摺 +撃 +撒 +撞 +撤 +撥 +撫 +播 +撮 +撰 +撲 +撹 +擁 +操 +擔 +擦 +擬 +擾 +攘 +攝 +攣 +支 +收 +改 +攻 +放 +政 +故 +敏 +救 +敗 +教 +敢 +散 +敦 +敬 +数 +整 +敵 +敷 +斂 +文 +斉 +斎 +斐 +斑 +斗 +料 +斜 +斟 +斤 +斥 +斧 +斬 +断 +斯 +新 +方 +於 +施 +旁 +旅 +旋 +旌 +族 +旗 +旛 +无 +旡 +既 +日 +旦 +旧 +旨 +早 +旬 +旭 +旺 +旻 +昂 +昆 +昇 +昉 +昌 +明 +昏 +易 +昔 +星 +映 +春 +昧 +昨 +昪 +昭 +是 +昵 +昼 +晁 +時 +晃 +晋 +晏 +晒 +晟 +晦 +晧 +晩 +普 +景 +晴 +晶 +智 +暁 +暇 +暈 +暉 +暑 +暖 +暗 +暘 +暢 +暦 +暫 +暮 +暲 +暴 +暹 +暾 +曄 +曇 +曉 +曖 +曙 +曜 +曝 +曠 +曰 +曲 +曳 +更 +書 +曹 +曼 +曽 +曾 +替 +最 +會 +月 +有 +朋 +服 +朏 +朔 +朕 +朗 +望 +朝 +期 +朧 +木 +未 +末 +本 +札 +朱 +朴 +机 +朽 +杁 +杉 +李 +杏 +材 +村 +杓 +杖 +杜 +杞 +束 +条 +杢 +杣 +来 +杭 +杮 +杯 +東 +杲 +杵 +杷 +杼 +松 +板 +枅 +枇 +析 +枓 +枕 +林 +枚 +果 +枝 +枠 +枡 +枢 +枯 +枳 +架 +柄 +柊 +柏 +某 +柑 +染 +柔 +柘 +柚 +柯 +柱 +柳 +柴 +柵 +査 +柾 +柿 +栂 +栃 +栄 +栖 +栗 +校 +株 +栲 +栴 +核 +根 +栻 +格 +栽 +桁 +桂 +桃 +框 +案 +桐 +桑 +桓 +桔 +桜 +桝 +桟 +桧 +桴 +桶 +桾 +梁 +梅 +梆 +梓 +梔 +梗 +梛 +條 +梟 +梢 +梧 +梨 +械 +梱 +梲 +梵 +梶 +棄 +棋 +棒 +棗 +棘 +棚 +棟 +棠 +森 +棲 +棹 +棺 +椀 +椅 +椋 +植 +椎 +椏 +椒 +椙 +検 +椥 +椹 +椿 +楊 +楓 +楕 +楚 +楞 +楠 +楡 +楢 +楨 +楪 +楫 +業 +楮 +楯 +楳 +極 +楷 +楼 +楽 +概 +榊 +榎 +榕 +榛 +榜 +榮 +榱 +榴 +槃 +槇 +槊 +構 +槌 +槍 +槐 +様 +槙 +槻 +槽 +槿 +樂 +樋 +樓 +樗 +標 +樟 +模 +権 +横 +樫 +樵 +樹 +樺 +樽 +橇 +橋 +橘 +機 +橿 +檀 +檄 +檎 +檐 +檗 +檜 +檣 +檥 +檬 +檮 +檸 +檻 +櫃 +櫓 +櫛 +櫟 +櫨 +櫻 +欄 +欅 +欠 +次 +欣 +欧 +欲 +欺 +欽 +款 +歌 +歎 +歓 +止 +正 +此 +武 +歩 +歪 +歯 +歳 +歴 +死 +殆 +殉 +殊 +残 +殖 +殯 +殴 +段 +殷 +殺 +殻 +殿 +毀 +毅 +母 +毎 +毒 +比 +毘 +毛 +毫 +毬 +氈 +氏 +民 +気 +水 +氷 +永 +氾 +汀 +汁 +求 +汎 +汐 +汗 +汚 +汝 +江 +池 +汪 +汰 +汲 +決 +汽 +沂 +沃 +沅 +沆 +沈 +沌 +沐 +沓 +沖 +沙 +没 +沢 +沱 +河 +沸 +油 +治 +沼 +沽 +沿 +況 +泉 +泊 +泌 +法 +泗 +泡 +波 +泣 +泥 +注 +泯 +泰 +泳 +洋 +洒 +洗 +洛 +洞 +津 +洩 +洪 +洲 +洸 +洹 +活 +洽 +派 +流 +浄 +浅 +浙 +浚 +浜 +浣 +浦 +浩 +浪 +浮 +浴 +海 +浸 +涅 +消 +涌 +涙 +涛 +涯 +液 +涵 +涼 +淀 +淄 +淆 +淇 +淋 +淑 +淘 +淡 +淤 +淨 +淫 +深 +淳 +淵 +混 +淹 +添 +清 +済 +渉 +渋 +渓 +渕 +渚 +減 +渟 +渠 +渡 +渤 +渥 +渦 +温 +渫 +測 +港 +游 +渾 +湊 +湖 +湘 +湛 +湧 +湫 +湯 +湾 +湿 +満 +源 +準 +溜 +溝 +溢 +溥 +溪 +溶 +溺 +滄 +滅 +滋 +滌 +滑 +滕 +滝 +滞 +滴 +滸 +滹 +滿 +漁 +漂 +漆 +漉 +漏 +漑 +演 +漕 +漠 +漢 +漣 +漫 +漬 +漱 +漸 +漿 +潅 +潔 +潙 +潜 +潟 +潤 +潭 +潮 +潰 +潴 +澁 +澂 +澄 +澎 +澗 +澤 +澪 +澱 +澳 +激 +濁 +濃 +濟 +濠 +濡 +濤 +濫 +濯 +濱 +濾 +瀉 +瀋 +瀑 +瀕 +瀞 +瀟 +瀧 +瀬 +瀾 +灌 +灑 +灘 +火 +灯 +灰 +灸 +災 +炉 +炊 +炎 +炒 +炭 +炮 +炷 +点 +為 +烈 +烏 +烙 +烝 +烹 +焔 +焙 +焚 +無 +焦 +然 +焼 +煇 +煉 +煌 +煎 +煕 +煙 +煤 +煥 +照 +煩 +煬 +煮 +煽 +熈 +熊 +熙 +熟 +熨 +熱 +熹 +熾 +燃 +燈 +燎 +燔 +燕 +燗 +燥 +燭 +燻 +爆 +爐 +爪 +爬 +爲 +爵 +父 +爺 +爼 +爽 +爾 +片 +版 +牌 +牒 +牘 +牙 +牛 +牝 +牟 +牡 +牢 +牧 +物 +牲 +特 +牽 +犂 +犠 +犬 +犯 +状 +狂 +狄 +狐 +狗 +狙 +狛 +狡 +狩 +独 +狭 +狷 +狸 +狼 +猊 +猛 +猟 +猥 +猨 +猩 +猪 +猫 +献 +猴 +猶 +猷 +猾 +猿 +獄 +獅 +獏 +獣 +獲 +玄 +玅 +率 +玉 +王 +玖 +玩 +玲 +珀 +珂 +珈 +珉 +珊 +珍 +珎 +珞 +珠 +珣 +珥 +珪 +班 +現 +球 +理 +琉 +琢 +琥 +琦 +琮 +琲 +琳 +琴 +琵 +琶 +瑁 +瑋 +瑙 +瑚 +瑛 +瑜 +瑞 +瑠 +瑤 +瑩 +瑪 +瑳 +瑾 +璃 +璋 +璜 +璞 +璧 +璨 +環 +璵 +璽 +璿 +瓊 +瓔 +瓜 +瓢 +瓦 +瓶 +甍 +甑 +甕 +甘 +甚 +甞 +生 +産 +甥 +用 +甫 +田 +由 +甲 +申 +男 +町 +画 +界 +畏 +畑 +畔 +留 +畜 +畝 +畠 +畢 +略 +番 +異 +畳 +當 +畷 +畸 +畺 +畿 +疆 +疇 +疋 +疎 +疏 +疑 +疫 +疱 +疲 +疹 +疼 +疾 +病 +症 +痒 +痔 +痕 +痘 +痙 +痛 +痢 +痩 +痴 +痺 +瘍 +瘡 +瘧 +療 +癇 +癌 +癒 +癖 +癡 +癪 +発 +登 +白 +百 +的 +皆 +皇 +皋 +皐 +皓 +皮 +皺 +皿 +盂 +盃 +盆 +盈 +益 +盒 +盗 +盛 +盞 +盟 +盡 +監 +盤 +盥 +盧 +目 +盲 +直 +相 +盾 +省 +眉 +看 +県 +眞 +真 +眠 +眷 +眺 +眼 +着 +睡 +督 +睦 +睨 +睿 +瞋 +瞑 +瞞 +瞬 +瞭 +瞰 +瞳 +瞻 +瞼 +瞿 +矍 +矛 +矜 +矢 +知 +矧 +矩 +短 +矮 +矯 +石 +砂 +砌 +研 +砕 +砥 +砦 +砧 +砲 +破 +砺 +硝 +硫 +硬 +硯 +碁 +碇 +碌 +碑 +碓 +碕 +碗 +碣 +碧 +碩 +確 +碾 +磁 +磐 +磔 +磧 +磨 +磬 +磯 +礁 +礎 +礒 +礙 +礫 +礬 +示 +礼 +社 +祀 +祁 +祇 +祈 +祉 +祐 +祓 +祕 +祖 +祗 +祚 +祝 +神 +祟 +祠 +祢 +祥 +票 +祭 +祷 +祺 +禁 +禄 +禅 +禊 +禍 +禎 +福 +禔 +禖 +禛 +禦 +禧 +禮 +禰 +禹 +禽 +禿 +秀 +私 +秋 +科 +秒 +秘 +租 +秤 +秦 +秩 +称 +移 +稀 +程 +税 +稔 +稗 +稙 +稚 +稜 +稠 +種 +稱 +稲 +稷 +稻 +稼 +稽 +稿 +穀 +穂 +穆 +積 +穎 +穏 +穗 +穜 +穢 +穣 +穫 +穴 +究 +空 +突 +窃 +窄 +窒 +窓 +窟 +窠 +窩 +窪 +窮 +窯 +竃 +竄 +竈 +立 +站 +竜 +竝 +竟 +章 +童 +竪 +竭 +端 +竴 +競 +竹 +竺 +竽 +竿 +笄 +笈 +笏 +笑 +笙 +笛 +笞 +笠 +笥 +符 +第 +笹 +筅 +筆 +筇 +筈 +等 +筋 +筌 +筍 +筏 +筐 +筑 +筒 +答 +策 +筝 +筥 +筧 +筬 +筮 +筯 +筰 +筵 +箆 +箇 +箋 +箏 +箒 +箔 +箕 +算 +箙 +箜 +管 +箪 +箭 +箱 +箸 +節 +篁 +範 +篆 +篇 +築 +篋 +篌 +篝 +篠 +篤 +篥 +篦 +篩 +篭 +篳 +篷 +簀 +簒 +簡 +簧 +簪 +簫 +簺 +簾 +簿 +籀 +籃 +籌 +籍 +籐 +籟 +籠 +籤 +籬 +米 +籾 +粂 +粉 +粋 +粒 +粕 +粗 +粘 +粛 +粟 +粥 +粧 +粮 +粳 +精 +糊 +糖 +糜 +糞 +糟 +糠 +糧 +糯 +糸 +糺 +系 +糾 +紀 +約 +紅 +紋 +納 +紐 +純 +紗 +紘 +紙 +級 +紛 +素 +紡 +索 +紫 +紬 +累 +細 +紳 +紵 +紹 +紺 +絁 +終 +絃 +組 +絅 +経 +結 +絖 +絞 +絡 +絣 +給 +統 +絲 +絵 +絶 +絹 +絽 +綏 +經 +継 +続 +綜 +綟 +綬 +維 +綱 +網 +綴 +綸 +綺 +綽 +綾 +綿 +緊 +緋 +総 +緑 +緒 +線 +締 +緥 +編 +緩 +緬 +緯 +練 +緻 +縁 +縄 +縅 +縒 +縛 +縞 +縢 +縣 +縦 +縫 +縮 +縹 +總 +績 +繁 +繊 +繋 +繍 +織 +繕 +繝 +繦 +繧 +繰 +繹 +繼 +纂 +纈 +纏 +纐 +纒 +纛 +缶 +罔 +罠 +罧 +罪 +置 +罰 +署 +罵 +罷 +罹 +羂 +羅 +羆 +羇 +羈 +羊 +羌 +美 +群 +羨 +義 +羯 +羲 +羹 +羽 +翁 +翅 +翌 +習 +翔 +翛 +翠 +翡 +翫 +翰 +翺 +翻 +翼 +耀 +老 +考 +者 +耆 +而 +耐 +耕 +耗 +耨 +耳 +耶 +耽 +聊 +聖 +聘 +聚 +聞 +聟 +聡 +聨 +聯 +聰 +聲 +聴 +職 +聾 +肄 +肆 +肇 +肉 +肋 +肌 +肖 +肘 +肛 +肝 +股 +肢 +肥 +肩 +肪 +肯 +肱 +育 +肴 +肺 +胃 +胆 +背 +胎 +胖 +胚 +胝 +胞 +胡 +胤 +胱 +胴 +胸 +能 +脂 +脅 +脆 +脇 +脈 +脊 +脚 +脛 +脩 +脱 +脳 +腋 +腎 +腐 +腑 +腔 +腕 +腫 +腰 +腱 +腸 +腹 +腺 +腿 +膀 +膏 +膚 +膜 +膝 +膠 +膣 +膨 +膩 +膳 +膵 +膾 +膿 +臂 +臆 +臈 +臍 +臓 +臘 +臚 +臣 +臥 +臨 +自 +臭 +至 +致 +臺 +臼 +舂 +舅 +與 +興 +舌 +舍 +舎 +舒 +舖 +舗 +舘 +舜 +舞 +舟 +舩 +航 +般 +舳 +舶 +船 +艇 +艘 +艦 +艮 +良 +色 +艶 +芋 +芒 +芙 +芝 +芥 +芦 +芬 +芭 +芯 +花 +芳 +芸 +芹 +芻 +芽 +芿 +苅 +苑 +苔 +苗 +苛 +苞 +苡 +若 +苦 +苧 +苫 +英 +苴 +苻 +茂 +范 +茄 +茅 +茎 +茗 +茘 +茜 +茨 +茲 +茵 +茶 +茸 +茹 +草 +荊 +荏 +荒 +荘 +荷 +荻 +荼 +莞 +莪 +莫 +莬 +莱 +莵 +莽 +菅 +菊 +菌 +菓 +菖 +菘 +菜 +菟 +菩 +菫 +華 +菱 +菴 +萄 +萊 +萌 +萍 +萎 +萠 +萩 +萬 +萱 +落 +葉 +著 +葛 +葡 +董 +葦 +葩 +葬 +葭 +葱 +葵 +葺 +蒋 +蒐 +蒔 +蒙 +蒟 +蒡 +蒲 +蒸 +蒻 +蒼 +蒿 +蓄 +蓆 +蓉 +蓋 +蓑 +蓬 +蓮 +蓼 +蔀 +蔑 +蔓 +蔚 +蔡 +蔦 +蔬 +蔭 +蔵 +蔽 +蕃 +蕉 +蕊 +蕎 +蕨 +蕩 +蕪 +蕭 +蕾 +薄 +薇 +薊 +薔 +薗 +薙 +薛 +薦 +薨 +薩 +薪 +薫 +薬 +薭 +薮 +藁 +藉 +藍 +藏 +藐 +藝 +藤 +藩 +藪 +藷 +藹 +藺 +藻 +蘂 +蘆 +蘇 +蘊 +蘭 +虎 +虐 +虔 +虚 +虜 +虞 +號 +虫 +虹 +虻 +蚊 +蚕 +蛇 +蛉 +蛍 +蛎 +蛙 +蛛 +蛟 +蛤 +蛭 +蛮 +蛸 +蛹 +蛾 +蜀 +蜂 +蜃 +蜆 +蜊 +蜘 +蜜 +蜷 +蜻 +蝉 +蝋 +蝕 +蝙 +蝠 +蝦 +蝶 +蝿 +螂 +融 +螣 +螺 +蟄 +蟇 +蟠 +蟷 +蟹 +蟻 +蠢 +蠣 +血 +衆 +行 +衍 +衒 +術 +街 +衙 +衛 +衝 +衞 +衡 +衢 +衣 +表 +衫 +衰 +衵 +衷 +衽 +衾 +衿 +袁 +袈 +袋 +袍 +袒 +袖 +袙 +袞 +袢 +被 +袰 +袱 +袴 +袷 +袿 +裁 +裂 +裃 +装 +裏 +裔 +裕 +裘 +裙 +補 +裟 +裡 +裲 +裳 +裴 +裸 +裹 +製 +裾 +褂 +褄 +複 +褌 +褐 +褒 +褥 +褪 +褶 +褻 +襄 +襖 +襞 +襟 +襠 +襦 +襪 +襲 +襴 +襷 +西 +要 +覆 +覇 +覈 +見 +規 +視 +覗 +覚 +覧 +親 +覲 +観 +覺 +觀 +角 +解 +触 +言 +訂 +計 +討 +訓 +託 +記 +訛 +訟 +訢 +訥 +訪 +設 +許 +訳 +訴 +訶 +診 +註 +証 +詐 +詔 +評 +詛 +詞 +詠 +詢 +詣 +試 +詩 +詫 +詮 +詰 +話 +該 +詳 +誄 +誅 +誇 +誉 +誌 +認 +誓 +誕 +誘 +語 +誠 +誡 +誣 +誤 +誥 +誦 +説 +読 +誰 +課 +誼 +誾 +調 +談 +請 +諌 +諍 +諏 +諒 +論 +諚 +諜 +諟 +諡 +諦 +諧 +諫 +諭 +諮 +諱 +諶 +諷 +諸 +諺 +諾 +謀 +謄 +謌 +謎 +謗 +謙 +謚 +講 +謝 +謡 +謫 +謬 +謹 +證 +識 +譚 +譛 +譜 +警 +譬 +譯 +議 +譲 +譴 +護 +讀 +讃 +讐 +讒 +谷 +谿 +豅 +豆 +豊 +豎 +豐 +豚 +象 +豪 +豫 +豹 +貌 +貝 +貞 +負 +財 +貢 +貧 +貨 +販 +貪 +貫 +責 +貯 +貰 +貴 +買 +貸 +費 +貼 +貿 +賀 +賁 +賂 +賃 +賄 +資 +賈 +賊 +賎 +賑 +賓 +賛 +賜 +賞 +賠 +賢 +賣 +賤 +賦 +質 +賭 +購 +賽 +贄 +贅 +贈 +贋 +贔 +贖 +赤 +赦 +走 +赴 +起 +超 +越 +趙 +趣 +足 +趺 +趾 +跋 +跏 +距 +跡 +跨 +跪 +路 +跳 +践 +踊 +踏 +踐 +踞 +踪 +踵 +蹄 +蹉 +蹊 +蹟 +蹲 +蹴 +躅 +躇 +躊 +躍 +躑 +躙 +躪 +身 +躬 +躯 +躰 +車 +軋 +軌 +軍 +軒 +軟 +転 +軸 +軻 +軽 +軾 +較 +載 +輌 +輔 +輜 +輝 +輦 +輩 +輪 +輯 +輸 +輿 +轄 +轍 +轟 +轢 +辛 +辞 +辟 +辥 +辦 +辨 +辰 +辱 +農 +辺 +辻 +込 +迂 +迅 +迎 +近 +返 +迢 +迦 +迪 +迫 +迭 +述 +迷 +迹 +追 +退 +送 +逃 +逅 +逆 +逍 +透 +逐 +逓 +途 +逕 +逗 +這 +通 +逝 +逞 +速 +造 +逢 +連 +逮 +週 +進 +逸 +逼 +遁 +遂 +遅 +遇 +遊 +運 +遍 +過 +遐 +道 +達 +違 +遙 +遜 +遠 +遡 +遣 +遥 +適 +遭 +遮 +遯 +遵 +遷 +選 +遺 +遼 +避 +邀 +邁 +邂 +邃 +還 +邇 +邉 +邊 +邑 +那 +邦 +邨 +邪 +邯 +邵 +邸 +郁 +郊 +郎 +郡 +郢 +部 +郭 +郴 +郵 +郷 +都 +鄂 +鄙 +鄭 +鄰 +鄲 +酉 +酋 +酌 +配 +酎 +酒 +酔 +酢 +酥 +酪 +酬 +酵 +酷 +酸 +醍 +醐 +醒 +醗 +醜 +醤 +醪 +醵 +醸 +采 +釈 +釉 +釋 +里 +重 +野 +量 +釐 +金 +釘 +釜 +針 +釣 +釧 +釿 +鈍 +鈎 +鈐 +鈔 +鈞 +鈦 +鈴 +鈷 +鈸 +鈿 +鉄 +鉇 +鉉 +鉋 +鉛 +鉢 +鉤 +鉦 +鉱 +鉾 +銀 +銃 +銅 +銈 +銑 +銕 +銘 +銚 +銜 +銭 +鋏 +鋒 +鋤 +鋭 +鋲 +鋳 +鋸 +鋺 +鋼 +錆 +錍 +錐 +錘 +錠 +錣 +錦 +錫 +錬 +錯 +録 +錵 +鍋 +鍍 +鍑 +鍔 +鍛 +鍬 +鍮 +鍵 +鍼 +鍾 +鎌 +鎖 +鎗 +鎚 +鎧 +鎬 +鎮 +鎰 +鎹 +鏃 +鏑 +鏡 +鐃 +鐇 +鐐 +鐔 +鐘 +鐙 +鐚 +鐡 +鐵 +鐸 +鑁 +鑊 +鑑 +鑒 +鑚 +鑠 +鑢 +鑰 +鑵 +鑷 +鑼 +鑽 +鑿 +長 +門 +閃 +閇 +閉 +開 +閏 +閑 +間 +閔 +閘 +関 +閣 +閤 +閥 +閦 +閨 +閬 +閲 +閻 +閼 +閾 +闇 +闍 +闔 +闕 +闘 +關 +闡 +闢 +闥 +阜 +阪 +阮 +阯 +防 +阻 +阿 +陀 +陂 +附 +陌 +降 +限 +陛 +陞 +院 +陣 +除 +陥 +陪 +陬 +陰 +陳 +陵 +陶 +陸 +険 +陽 +隅 +隆 +隈 +隊 +隋 +階 +随 +隔 +際 +障 +隠 +隣 +隧 +隷 +隻 +隼 +雀 +雁 +雄 +雅 +集 +雇 +雉 +雊 +雋 +雌 +雍 +雑 +雖 +雙 +雛 +離 +難 +雨 +雪 +雫 +雰 +雲 +零 +雷 +雹 +電 +需 +震 +霊 +霍 +霖 +霜 +霞 +霧 +霰 +露 +靈 +青 +靖 +静 +靜 +非 +面 +革 +靫 +靭 +靱 +靴 +靺 +鞁 +鞄 +鞆 +鞋 +鞍 +鞏 +鞘 +鞠 +鞨 +鞭 +韋 +韓 +韜 +韮 +音 +韶 +韻 +響 +頁 +頂 +頃 +項 +順 +須 +頌 +預 +頑 +頒 +頓 +領 +頚 +頬 +頭 +頴 +頸 +頻 +頼 +顆 +題 +額 +顎 +顔 +顕 +顗 +願 +顛 +類 +顧 +顯 +風 +飛 +食 +飢 +飩 +飫 +飯 +飲 +飴 +飼 +飽 +飾 +餃 +餅 +餉 +養 +餌 +餐 +餓 +餘 +餝 +餡 +館 +饂 +饅 +饉 +饋 +饌 +饒 +饗 +首 +馗 +香 +馨 +馬 +馳 +馴 +駄 +駅 +駆 +駈 +駐 +駒 +駕 +駝 +駿 +騁 +騎 +騏 +騒 +験 +騙 +騨 +騰 +驕 +驚 +驛 +驢 +骨 +骸 +髄 +體 +高 +髙 +髢 +髪 +髭 +髮 +髷 +髻 +鬘 +鬚 +鬢 +鬨 +鬯 +鬱 +鬼 +魁 +魂 +魄 +魅 +魏 +魔 +魚 +魯 +鮎 +鮑 +鮒 +鮪 +鮫 +鮭 +鮮 +鯉 +鯔 +鯖 +鯛 +鯨 +鯰 +鯱 +鰐 +鰒 +鰭 +鰯 +鰰 +鰹 +鰻 +鱈 +鱒 +鱗 +鱧 +鳥 +鳩 +鳰 +鳳 +鳴 +鳶 +鴈 +鴉 +鴎 +鴛 +鴟 +鴦 +鴨 +鴫 +鴻 +鵄 +鵜 +鵞 +鵡 +鵬 +鵲 +鵺 +鶉 +鶏 +鶯 +鶴 +鷄 +鷙 +鷲 +鷹 +鷺 +鸚 +鸞 +鹸 +鹽 +鹿 +麁 +麒 +麓 +麗 +麝 +麞 +麟 +麦 +麩 +麹 +麺 +麻 +麾 +麿 +黄 +黌 +黍 +黒 +黙 +黛 +黠 +鼈 +鼉 +鼎 +鼓 +鼠 +鼻 +齊 +齋 +齟 +齢 +齬 +龍 +龕 +龗 +! +# +% +& +( +) ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; += +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +R +S +T +U +V +W +X +Z +a +c +d +e +f +h +i +j +k +l +m +n +o +p +r +s +t +u +y +z +~ +・ + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt new file mode 100644 index 00000000..d506b691 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt @@ -0,0 +1,153 @@ +k +a +_ +i +m +g +/ +1 +2 +I +L +S +V +R +C +0 +v +l +6 +4 +8 +. +j +p +ಗ +ು +ಣ +ಪ +ಡ +ಿ +ಸ +ಲ +ಾ +ದ +್ +7 +5 +3 +ವ +ಷ +ಬ +ಹ +ೆ +9 +ಅ +ಳ +ನ +ರ +ಉ +ಕ +ಎ +ೇ +ಂ +ೈ +ೊ +ೀ +ಯ +ೋ +ತ +ಶ +ಭ +ಧ +ಚ +ಜ +ೂ +ಮ +ಒ +ೃ +ಥ +ಇ +ಟ +ಖ +ಆ +ಞ +ಫ +- +ಢ +ಊ +ಓ +ಐ +ಃ +ಘ +ಝ +ೌ +ಠ +ಛ +ಔ +ಏ +ಈ +ಋ +೨ +೦ +೧ +೮ +೯ +೪ +, +೫ +೭ +೩ +೬ +ಙ +s +c +e +n +w +o +u +t +d +E +A +T +B +Z +N +G +O +q +z +r +x +P +K +M +J +U +D +f +F +h +b +W +Y +y +H +X +Q +' +# +& +! +@ +$ +: +% +é +É +( +? ++ + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt new file mode 100644 index 00000000..a13899f1 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt @@ -0,0 +1,3688 @@ +! +" +# +$ +% +& +' +* ++ +- +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +© +° +² +½ +Á +Ä +Å +Ç +É +Í +Î +Ó +Ö +× +Ü +ß +à +á +â +ã +ä +å +æ +ç +è +é +ê +ë +ì +í +î +ï +ð +ñ +ò +ó +ô +õ +ö +ø +ú +û +ü +ý +ā +ă +ą +ć +Č +č +đ +ē +ė +ę +ě +ğ +ī +İ +ı +Ł +ł +ń +ň +ō +ř +Ş +ş +Š +š +ţ +ū +ź +ż +Ž +ž +Ș +ș +Α +Δ +α +λ +φ +Г +О +а +в +л +о +р +с +т +я +​ +’ +“ +” +→ +∇ +∼ +「 +」 +ア +カ +グ +ニ +ラ +ン +ㄱ +ㄴ +ㄷ +ㄸ +ㄹ +ㅂ +ㅅ +ㅆ +ㅇ +ㅈ +ㅊ +ㅋ +ㅌ +ㅎ +ㅓ +ㅜ +ㅣ +一 +丁 +七 +三 +上 +下 +不 +丑 +世 +丘 +丞 +中 +丸 +丹 +主 +乃 +久 +之 +乎 +乘 +九 +也 +乳 +乾 +事 +二 +云 +互 +五 +井 +亞 +亡 +交 +亥 +亨 +享 +京 +亭 +人 +仁 +今 +他 +仙 +代 +令 +以 +仰 +仲 +件 +任 +企 +伊 +伍 +伎 +伏 +伐 +休 +伯 +伴 +伸 +佃 +佈 +位 +低 +住 +佐 +何 +佛 +作 +使 +來 +供 +依 +侯 +侵 +侶 +便 +俗 +保 +俠 +信 +修 +俱 +俳 +倉 +個 +倍 +倒 +候 +借 +値 +倫 +倭 +假 +偈 +偉 +偏 +停 +偶 +傅 +傑 +傳 +傷 +傾 +像 +僞 +僥 +僧 +價 +儀 +儉 +儒 +優 +儼 +兀 +允 +元 +兆 +先 +光 +克 +兒 +入 +內 +全 +八 +公 +六 +共 +兵 +其 +具 +典 +兼 +再 +冠 +冥 +冶 +准 +凞 +凡 +凱 +出 +函 +刀 +分 +刊 +刑 +列 +初 +判 +別 +利 +到 +制 +券 +刺 +刻 +則 +前 +剛 +副 +創 +劃 +劑 +力 +功 +加 +劣 +助 +劫 +勇 +動 +務 +勝 +勢 +勳 +勸 +匈 +化 +北 +匠 +區 +十 +千 +午 +半 +卍 +卑 +卒 +卓 +南 +博 +卜 +占 +卦 +印 +危 +卵 +卷 +卽 +卿 +厄 +原 +厦 +去 +參 +又 +叉 +友 +反 +叔 +受 +口 +古 +句 +可 +台 +史 +右 +司 +各 +合 +吉 +同 +名 +后 +吏 +吐 +君 +吠 +吳 +呂 +告 +周 +味 +呵 +命 +和 +咳 +咸 +咽 +哀 +品 +哨 +哮 +哲 +唐 +唯 +唱 +商 +問 +啼 +善 +喆 +喉 +喜 +喩 +喪 +嘗 +器 +嚴 +囊 +四 +回 +因 +困 +固 +圈 +國 +圍 +園 +圓 +圖 +團 +土 +在 +地 +均 +坊 +坐 +坑 +坵 +型 +垢 +城 +域 +埴 +執 +培 +基 +堂 +堅 +堆 +堤 +堯 +報 +場 +塔 +塚 +塞 +塵 +境 +墜 +墟 +墨 +墳 +墾 +壁 +壇 +壓 +壤 +士 +壬 +壯 +壺 +壽 +夏 +夕 +外 +多 +夜 +夢 +大 +天 +太 +夫 +央 +失 +夷 +奄 +奇 +奉 +奎 +奏 +契 +奔 +奮 +女 +奴 +好 +如 +妄 +妊 +妖 +妙 +始 +姑 +姓 +姚 +姜 +威 +婆 +婚 +婦 +媒 +媚 +子 +孔 +字 +存 +孝 +孟 +季 +孤 +孫 +學 +孺 +宇 +守 +安 +宋 +宗 +官 +宙 +定 +客 +宣 +室 +宮 +害 +家 +容 +寂 +寃 +寄 +寅 +密 +寇 +富 +寒 +寓 +實 +審 +寫 +寬 +寶 +寸 +寺 +封 +將 +專 +尊 +對 +小 +少 +尙 +尹 +尼 +尿 +局 +居 +屈 +屋 +屍 +屎 +屛 +層 +屬 +山 +岐 +岡 +岩 +岳 +岸 +峙 +峰 +島 +峻 +峽 +崇 +崔 +崖 +崩 +嶋 +巖 +川 +州 +巢 +工 +左 +巧 +巨 +巫 +差 +己 +巷 +市 +布 +帝 +師 +帶 +常 +帽 +幕 +干 +平 +年 +幹 +幻 +幼 +幽 +庇 +序 +店 +府 +度 +座 +庫 +庭 +康 +廟 +廣 +廳 +延 +廷 +建 +廻 +弁 +式 +弑 +弓 +引 +弘 +弟 +弱 +張 +强 +弼 +彌 +彛 +形 +彬 +影 +役 +彼 +彿 +往 +征 +待 +律 +後 +徐 +徑 +得 +從 +循 +微 +德 +徹 +心 +必 +忌 +忍 +志 +忠 +思 +怡 +急 +性 +恐 +恒 +恨 +恩 +悅 +悖 +患 +悲 +情 +惑 +惟 +惠 +惡 +想 +惺 +愁 +意 +愚 +愛 +感 +愼 +慈 +態 +慕 +慣 +慧 +慾 +憂 +憤 +憺 +應 +懸 +戎 +成 +我 +戟 +戮 +戰 +戴 +戶 +房 +所 +手 +才 +打 +批 +承 +技 +抄 +把 +抗 +抱 +抽 +拇 +拓 +拘 +拙 +拜 +拾 +持 +指 +捌 +捨 +捿 +授 +掌 +排 +接 +推 +提 +揚 +揭 +援 +損 +搗 +摩 +播 +操 +擒 +擔 +擘 +據 +擧 +攘 +攝 +攬 +支 +改 +攻 +放 +政 +故 +敍 +敎 +救 +敗 +散 +敬 +整 +數 +文 +斗 +料 +斛 +斜 +斧 +斯 +新 +斷 +方 +於 +施 +旋 +族 +旗 +日 +旨 +早 +旱 +昌 +明 +易 +昔 +星 +春 +昧 +昭 +是 +時 +晉 +晋 +晩 +普 +景 +晴 +晶 +智 +暈 +暑 +暗 +暘 +曉 +曜 +曠 +曦 +曰 +曲 +書 +曹 +曼 +曾 +最 +會 +月 +有 +朋 +服 +望 +朝 +期 +木 +未 +末 +本 +朱 +朴 +李 +材 +村 +杖 +杜 +杞 +杭 +杯 +東 +松 +板 +林 +果 +枝 +枯 +枰 +枾 +柏 +柑 +柱 +栗 +校 +栢 +核 +根 +格 +桀 +桂 +案 +桎 +桑 +桓 +桔 +梁 +梏 +梓 +梗 +條 +梨 +梵 +棗 +棟 +森 +植 +椒 +楊 +楓 +楚 +業 +楮 +極 +榮 +槃 +槍 +樂 +樓 +樗 +樣 +樸 +樹 +樺 +樽 +橄 +橋 +橘 +機 +橡 +檀 +檎 +權 +欌 +欖 +次 +欲 +歌 +歐 +止 +正 +此 +步 +武 +歲 +歸 +死 +殖 +段 +殷 +殺 +殿 +毅 +母 +毒 +比 +毛 +氏 +民 +氣 +水 +永 +求 +汎 +汗 +江 +池 +沅 +沒 +沖 +沙 +沛 +河 +油 +治 +沼 +沿 +泉 +泊 +法 +泗 +泡 +波 +注 +泰 +洋 +洙 +洛 +洞 +津 +洲 +活 +派 +流 +浅 +浦 +浮 +浴 +海 +涅 +涇 +消 +涌 +液 +淑 +淡 +淨 +淫 +深 +淳 +淵 +淸 +渠 +渡 +游 +渾 +湖 +湯 +源 +溪 +溫 +溶 +滄 +滅 +滋 +滯 +滿 +漁 +漆 +漢 +漫 +漸 +潑 +潤 +潭 +澄 +澎 +澤 +澳 +澹 +濁 +濕 +濟 +濤 +濯 +瀋 +瀝 +灣 +火 +灰 +灸 +災 +炎 +炭 +点 +烈 +烏 +烙 +焚 +無 +焦 +然 +煌 +煎 +照 +煬 +煮 +熟 +熱 +燁 +燈 +燔 +燕 +燥 +燧 +燮 +爲 +爵 +父 +片 +版 +牌 +牛 +牝 +牟 +牡 +物 +特 +犧 +犬 +狀 +狗 +猥 +猩 +猪 +獨 +獵 +獸 +獻 +玄 +玉 +王 +玲 +珍 +珠 +珪 +班 +現 +球 +理 +琴 +瑞 +瑟 +瑪 +璃 +璋 +璽 +瓜 +瓦 +甑 +甘 +生 +産 +用 +甫 +田 +由 +甲 +申 +男 +界 +畏 +留 +畜 +畢 +略 +番 +異 +畵 +當 +畸 +疏 +疑 +疫 +疹 +疼 +病 +症 +痔 +痛 +痺 +瘀 +瘍 +瘡 +療 +癌 +癖 +登 +發 +白 +百 +的 +皆 +皇 +皮 +盂 +盆 +益 +盛 +盜 +盟 +盡 +盤 +盧 +目 +直 +相 +省 +看 +眞 +眼 +睡 +督 +瞋 +矢 +矣 +知 +短 +石 +破 +碍 +碑 +磁 +磨 +磬 +示 +社 +祇 +祖 +祝 +神 +祥 +祭 +祺 +禁 +禅 +禍 +福 +禦 +禪 +禮 +禹 +禽 +禾 +秀 +私 +秉 +秋 +科 +秘 +秤 +秦 +秩 +移 +稀 +稗 +種 +稱 +稷 +稼 +稽 +穀 +穆 +積 +空 +窮 +竅 +立 +章 +童 +竭 +端 +竹 +笑 +符 +第 +筆 +等 +筍 +答 +策 +箋 +箕 +管 +箱 +節 +篇 +簡 +米 +粉 +粘 +粥 +精 +糖 +糞 +系 +紀 +紂 +約 +紅 +紋 +純 +紙 +級 +素 +索 +紫 +紬 +累 +細 +紳 +終 +組 +結 +絡 +統 +絲 +絶 +絹 +經 +綠 +維 +綱 +網 +綸 +綽 +緖 +線 +緣 +緯 +縣 +縱 +總 +織 +繡 +繩 +繪 +繭 +纂 +續 +罕 +置 +罰 +羅 +羊 +美 +群 +義 +羽 +翁 +習 +翟 +老 +考 +者 +而 +耐 +耕 +耳 +聃 +聖 +聞 +聰 +聲 +職 +肇 +肉 +肖 +肝 +股 +肥 +育 +肺 +胃 +胎 +胚 +胞 +胡 +胥 +能 +脂 +脈 +脚 +脛 +脣 +脩 +脫 +脯 +脾 +腋 +腎 +腫 +腸 +腹 +膜 +膠 +膨 +膽 +臆 +臟 +臣 +臥 +臨 +自 +至 +致 +臺 +臼 +臾 +與 +興 +舊 +舌 +舍 +舒 +舜 +舟 +般 +船 +艦 +良 +色 +芋 +花 +芳 +芽 +苑 +苔 +苕 +苛 +苞 +若 +苦 +英 +茂 +茵 +茶 +茹 +荀 +荇 +草 +荒 +荷 +莊 +莫 +菊 +菌 +菜 +菩 +菫 +華 +菴 +菽 +萊 +萍 +萬 +落 +葉 +著 +葛 +董 +葬 +蒙 +蒜 +蒲 +蒸 +蒿 +蓮 +蔓 +蔘 +蔡 +蔬 +蕃 +蕉 +蕓 +薄 +薑 +薛 +薩 +薪 +薺 +藏 +藝 +藤 +藥 +藩 +藻 +蘆 +蘇 +蘊 +蘚 +蘭 +虎 +處 +虛 +虞 +虹 +蜀 +蜂 +蜜 +蝕 +蝶 +融 +蟬 +蟲 +蠶 +蠻 +血 +衆 +行 +術 +衛 +衡 +衣 +表 +袁 +裔 +裕 +裙 +補 +製 +複 +襄 +西 +要 +見 +視 +親 +覺 +觀 +角 +解 +言 +訂 +訊 +訓 +託 +記 +訣 +設 +診 +註 +評 +詩 +話 +詵 +誅 +誌 +認 +誕 +語 +誠 +誤 +誥 +誦 +說 +調 +談 +諍 +論 +諡 +諫 +諭 +諸 +謙 +講 +謝 +謠 +證 +識 +譚 +譜 +譯 +議 +護 +讀 +變 +谷 +豆 +豊 +豚 +象 +豪 +豫 +貝 +貞 +財 +貧 +貨 +貪 +貫 +貴 +貸 +費 +資 +賊 +賓 +賞 +賢 +賣 +賦 +質 +贍 +赤 +赫 +走 +起 +超 +越 +趙 +趣 +趨 +足 +趾 +跋 +跡 +路 +踏 +蹟 +身 +躬 +車 +軍 +軒 +軟 +載 +輓 +輕 +輪 +輯 +輸 +輻 +輿 +轅 +轉 +辨 +辭 +辯 +辰 +農 +近 +迦 +述 +追 +逆 +透 +逐 +通 +逝 +造 +逢 +連 +進 +逵 +遂 +遊 +運 +遍 +過 +道 +達 +遠 +遡 +適 +遷 +選 +遺 +遽 +還 +邊 +邑 +那 +邪 +郞 +郡 +部 +都 +鄒 +鄕 +鄭 +鄲 +配 +酒 +酸 +醉 +醫 +醯 +釋 +里 +重 +野 +量 +釐 +金 +針 +鈍 +鈴 +鉞 +銀 +銅 +銘 +鋼 +錄 +錢 +錦 +鎭 +鏡 +鐘 +鐵 +鑑 +鑛 +長 +門 +閃 +開 +間 +閔 +閣 +閥 +閭 +閻 +闕 +關 +阪 +防 +阿 +陀 +降 +限 +陝 +院 +陰 +陳 +陵 +陶 +陸 +陽 +隆 +隊 +隋 +階 +際 +障 +隣 +隨 +隱 +隷 +雀 +雄 +雅 +集 +雇 +雌 +雖 +雙 +雜 +離 +難 +雨 +雪 +雲 +電 +霜 +露 +靈 +靑 +靖 +靜 +非 +面 +革 +靴 +鞏 +韓 +音 +韶 +韻 +順 +須 +頊 +頌 +領 +頭 +顔 +願 +顚 +類 +顯 +風 +飛 +食 +飢 +飮 +飯 +飾 +養 +餓 +餘 +首 +香 +馨 +馬 +駒 +騫 +騷 +驕 +骨 +骸 +髓 +體 +高 +髥 +髮 +鬪 +鬱 +鬼 +魏 +魔 +魚 +魯 +鮮 +鰍 +鰐 +鳥 +鳧 +鳳 +鴨 +鵲 +鶴 +鷄 +鷹 +鹽 +鹿 +麗 +麥 +麻 +黃 +黑 +默 +點 +黨 +鼎 +齊 +齋 +齒 +龍 +龜 +가 +각 +간 +갇 +갈 +갉 +감 +갑 +값 +갓 +갔 +강 +갖 +갗 +같 +갚 +갛 +개 +객 +갠 +갤 +갬 +갭 +갯 +갰 +갱 +갸 +걀 +걔 +걘 +거 +걱 +건 +걷 +걸 +검 +겁 +것 +겄 +겅 +겆 +겉 +겊 +겋 +게 +겐 +겔 +겟 +겠 +겡 +겨 +격 +겪 +견 +결 +겸 +겹 +겻 +겼 +경 +곁 +계 +곕 +곗 +고 +곡 +곤 +곧 +골 +곪 +곬 +곯 +곰 +곱 +곳 +공 +곶 +과 +곽 +관 +괄 +괌 +광 +괘 +괜 +괭 +괴 +괸 +굉 +교 +구 +국 +군 +굳 +굴 +굵 +굶 +굼 +굽 +굿 +궁 +궂 +궈 +권 +궐 +궜 +궝 +궤 +귀 +귄 +귈 +귓 +규 +균 +귤 +그 +극 +근 +글 +긁 +금 +급 +긋 +긍 +기 +긴 +길 +김 +깁 +깃 +깅 +깊 +까 +깍 +깎 +깐 +깔 +깜 +깝 +깟 +깡 +깥 +깨 +깬 +깰 +깻 +깼 +깽 +꺄 +꺼 +꺽 +꺾 +껀 +껄 +껌 +껍 +껏 +껐 +껑 +께 +껴 +꼈 +꼍 +꼐 +꼬 +꼭 +꼴 +꼼 +꼽 +꼿 +꽁 +꽂 +꽃 +꽉 +꽝 +꽤 +꽥 +꾀 +꾜 +꾸 +꾹 +꾼 +꿀 +꿇 +꿈 +꿉 +꿋 +꿍 +꿎 +꿔 +꿨 +꿩 +꿰 +꿴 +뀄 +뀌 +뀐 +뀔 +뀜 +뀝 +끄 +끈 +끊 +끌 +끓 +끔 +끕 +끗 +끙 +끝 +끼 +끽 +낀 +낄 +낌 +낍 +낏 +낑 +나 +낙 +낚 +난 +낟 +날 +낡 +남 +납 +낫 +났 +낭 +낮 +낯 +낱 +낳 +내 +낵 +낸 +낼 +냄 +냅 +냇 +냈 +냉 +냐 +냔 +냘 +냥 +너 +넉 +넋 +넌 +널 +넓 +넘 +넙 +넛 +넜 +넝 +넣 +네 +넥 +넨 +넬 +넴 +넵 +넷 +넸 +넹 +녀 +녁 +년 +념 +녔 +녕 +녘 +녜 +노 +녹 +논 +놀 +놈 +놋 +농 +높 +놓 +놔 +놨 +뇌 +뇨 +뇩 +뇽 +누 +눅 +눈 +눌 +눔 +눕 +눗 +눠 +눴 +뉘 +뉜 +뉩 +뉴 +늄 +늅 +늉 +느 +늑 +는 +늘 +늙 +늠 +늡 +능 +늦 +늪 +늬 +니 +닉 +닌 +닐 +님 +닙 +닛 +닝 +닢 +다 +닥 +닦 +단 +닫 +달 +닭 +닮 +닯 +닳 +담 +답 +닷 +당 +닻 +닿 +대 +댁 +댄 +댈 +댐 +댑 +댓 +댔 +댕 +댜 +더 +덕 +덖 +던 +덜 +덟 +덤 +덥 +덧 +덩 +덫 +덮 +데 +덱 +덴 +델 +뎀 +뎃 +뎅 +뎌 +뎠 +뎨 +도 +독 +돈 +돋 +돌 +돔 +돕 +돗 +동 +돛 +돝 +돼 +됐 +되 +된 +될 +됨 +됩 +됴 +두 +둑 +둔 +둘 +둠 +둡 +둣 +둥 +둬 +뒀 +뒤 +뒬 +뒷 +뒹 +듀 +듈 +듐 +드 +득 +든 +듣 +들 +듦 +듬 +듭 +듯 +등 +듸 +디 +딕 +딘 +딛 +딜 +딤 +딥 +딧 +딨 +딩 +딪 +따 +딱 +딴 +딸 +땀 +땄 +땅 +때 +땐 +땔 +땜 +땝 +땠 +땡 +떠 +떡 +떤 +떨 +떫 +떰 +떱 +떳 +떴 +떵 +떻 +떼 +떽 +뗀 +뗄 +뗍 +뗏 +뗐 +뗑 +또 +똑 +똘 +똥 +뙤 +뚜 +뚝 +뚤 +뚫 +뚱 +뛰 +뛴 +뛸 +뜀 +뜁 +뜨 +뜩 +뜬 +뜯 +뜰 +뜸 +뜻 +띄 +띈 +띌 +띔 +띕 +띠 +띤 +띨 +띱 +띵 +라 +락 +란 +랄 +람 +랍 +랏 +랐 +랑 +랒 +랗 +래 +랙 +랜 +랠 +램 +랩 +랫 +랬 +랭 +랴 +략 +량 +러 +럭 +런 +럴 +럼 +럽 +럿 +렀 +렁 +렇 +레 +렉 +렌 +렐 +렘 +렙 +렛 +렝 +려 +력 +련 +렬 +렴 +렵 +렷 +렸 +령 +례 +로 +록 +론 +롤 +롬 +롭 +롯 +롱 +롸 +롹 +뢰 +뢴 +뢸 +룃 +료 +룐 +룡 +루 +룩 +룬 +룰 +룸 +룹 +룻 +룽 +뤄 +뤘 +뤼 +류 +륙 +륜 +률 +륨 +륭 +르 +륵 +른 +를 +름 +릅 +릇 +릉 +릎 +리 +릭 +린 +릴 +림 +립 +릿 +링 +마 +막 +만 +많 +맏 +말 +맑 +맘 +맙 +맛 +망 +맞 +맡 +맣 +매 +맥 +맨 +맬 +맴 +맵 +맷 +맸 +맹 +맺 +먀 +먁 +머 +먹 +먼 +멀 +멈 +멋 +멍 +멎 +메 +멕 +멘 +멜 +멤 +멥 +멧 +멩 +며 +멱 +면 +멸 +몄 +명 +몇 +모 +목 +몫 +몬 +몰 +몸 +몹 +못 +몽 +뫼 +묘 +무 +묵 +묶 +문 +묻 +물 +묽 +뭄 +뭅 +뭇 +뭉 +뭍 +뭏 +뭐 +뭔 +뭘 +뭡 +뭣 +뮈 +뮌 +뮐 +뮤 +뮬 +므 +믈 +믐 +미 +믹 +민 +믿 +밀 +밈 +밉 +밋 +밌 +밍 +및 +밑 +바 +박 +밖 +반 +받 +발 +밝 +밟 +밤 +밥 +밧 +방 +밭 +배 +백 +밴 +밸 +뱀 +뱁 +뱃 +뱄 +뱅 +뱉 +뱍 +뱐 +버 +벅 +번 +벌 +범 +법 +벗 +벙 +벚 +베 +벡 +벤 +벨 +벰 +벱 +벳 +벵 +벼 +벽 +변 +별 +볍 +볏 +볐 +병 +볕 +보 +복 +볶 +본 +볼 +봄 +봅 +봇 +봉 +봐 +봤 +뵈 +뵐 +뵙 +부 +북 +분 +붇 +불 +붉 +붐 +붓 +붕 +붙 +뷔 +뷰 +뷴 +뷸 +브 +븐 +블 +비 +빅 +빈 +빌 +빔 +빕 +빗 +빙 +빚 +빛 +빠 +빡 +빤 +빨 +빳 +빴 +빵 +빻 +빼 +빽 +뺀 +뺄 +뺌 +뺏 +뺐 +뺑 +뺨 +뻐 +뻑 +뻔 +뻗 +뻘 +뻣 +뻤 +뻥 +뻬 +뼈 +뼉 +뼘 +뽀 +뽈 +뽐 +뽑 +뽕 +뾰 +뿌 +뿍 +뿐 +뿔 +뿜 +쁘 +쁜 +쁠 +쁨 +삐 +삔 +삘 +사 +삭 +삯 +산 +살 +삵 +삶 +삼 +삽 +삿 +샀 +상 +샅 +새 +색 +샌 +샐 +샘 +샙 +샛 +샜 +생 +샤 +샨 +샬 +샴 +샵 +샷 +샹 +서 +석 +섞 +선 +섣 +설 +섬 +섭 +섯 +섰 +성 +섶 +세 +섹 +센 +셀 +셈 +셉 +셋 +셌 +셍 +셔 +션 +셜 +셨 +셰 +셴 +셸 +소 +속 +손 +솔 +솜 +솝 +솟 +송 +솥 +쇄 +쇠 +쇤 +쇳 +쇼 +숀 +숄 +숍 +수 +숙 +순 +숟 +술 +숨 +숩 +숫 +숭 +숯 +숱 +숲 +숴 +쉐 +쉘 +쉬 +쉭 +쉰 +쉴 +쉼 +쉽 +슈 +슐 +슘 +슛 +슝 +스 +슥 +슨 +슬 +슭 +슴 +습 +슷 +승 +시 +식 +신 +싣 +실 +싫 +심 +십 +싯 +싱 +싶 +싸 +싹 +싼 +쌀 +쌈 +쌉 +쌌 +쌍 +쌓 +쌔 +쌘 +쌩 +써 +썩 +썬 +썰 +썸 +썹 +썼 +썽 +쎄 +쎈 +쏘 +쏙 +쏜 +쏟 +쏠 +쏭 +쏴 +쐈 +쐐 +쐬 +쑤 +쑥 +쑨 +쒀 +쒔 +쓰 +쓱 +쓴 +쓸 +씀 +씁 +씌 +씨 +씩 +씬 +씰 +씸 +씹 +씻 +씽 +아 +악 +안 +앉 +않 +알 +앎 +앓 +암 +압 +앗 +았 +앙 +앞 +애 +액 +앤 +앨 +앰 +앱 +앳 +앴 +앵 +야 +약 +얀 +얄 +얇 +얌 +얍 +얏 +양 +얕 +얗 +얘 +얜 +어 +억 +언 +얹 +얻 +얼 +얽 +엄 +업 +없 +엇 +었 +엉 +엊 +엌 +엎 +에 +엑 +엔 +엘 +엠 +엡 +엣 +엥 +여 +역 +엮 +연 +열 +엷 +염 +엽 +엾 +엿 +였 +영 +옅 +옆 +옇 +예 +옌 +옐 +옙 +옛 +오 +옥 +온 +올 +옭 +옮 +옳 +옴 +옵 +옷 +옹 +옻 +와 +왁 +완 +왈 +왑 +왓 +왔 +왕 +왜 +왠 +왱 +외 +왼 +요 +욕 +욘 +욜 +욤 +용 +우 +욱 +운 +울 +움 +웁 +웃 +웅 +워 +웍 +원 +월 +웜 +웠 +웡 +웨 +웬 +웰 +웸 +웹 +위 +윅 +윈 +윌 +윔 +윗 +윙 +유 +육 +윤 +율 +윱 +윳 +융 +으 +윽 +은 +을 +읊 +음 +읍 +응 +의 +읜 +읠 +이 +익 +인 +일 +읽 +잃 +임 +입 +잇 +있 +잉 +잊 +잎 +자 +작 +잔 +잖 +잘 +잠 +잡 +잣 +잤 +장 +잦 +재 +잭 +잰 +잴 +잽 +잿 +쟀 +쟁 +쟈 +쟉 +쟤 +저 +적 +전 +절 +젊 +점 +접 +젓 +정 +젖 +제 +젝 +젠 +젤 +젬 +젭 +젯 +져 +젼 +졀 +졌 +졍 +조 +족 +존 +졸 +좀 +좁 +종 +좇 +좋 +좌 +좍 +좽 +죄 +죠 +죤 +주 +죽 +준 +줄 +줌 +줍 +줏 +중 +줘 +줬 +쥐 +쥔 +쥘 +쥬 +쥴 +즈 +즉 +즌 +즐 +즘 +즙 +증 +지 +직 +진 +짇 +질 +짊 +짐 +집 +짓 +징 +짖 +짙 +짚 +짜 +짝 +짠 +짢 +짤 +짧 +짬 +짭 +짰 +짱 +째 +짹 +짼 +쨀 +쨉 +쨋 +쨌 +쨍 +쩄 +쩌 +쩍 +쩐 +쩔 +쩜 +쩝 +쩡 +쩨 +쪄 +쪘 +쪼 +쪽 +쪾 +쫀 +쫄 +쫑 +쫓 +쫙 +쬐 +쭈 +쭉 +쭐 +쭙 +쯔 +쯤 +쯧 +찌 +찍 +찐 +찔 +찜 +찝 +찡 +찢 +찧 +차 +착 +찬 +찮 +찰 +참 +찹 +찻 +찼 +창 +찾 +채 +책 +챈 +챌 +챔 +챕 +챗 +챘 +챙 +챠 +챤 +처 +척 +천 +철 +첨 +첩 +첫 +청 +체 +첵 +첸 +첼 +쳄 +쳇 +쳉 +쳐 +쳔 +쳤 +초 +촉 +촌 +촘 +촛 +총 +촨 +촬 +최 +쵸 +추 +축 +춘 +출 +춤 +춥 +춧 +충 +춰 +췄 +췌 +취 +췬 +츄 +츠 +측 +츨 +츰 +층 +치 +칙 +친 +칠 +칡 +침 +칩 +칫 +칭 +카 +칵 +칸 +칼 +캄 +캅 +캇 +캉 +캐 +캔 +캘 +캠 +캡 +캣 +캤 +캥 +캬 +커 +컥 +컨 +컫 +컬 +컴 +컵 +컷 +컸 +컹 +케 +켄 +켈 +켐 +켓 +켕 +켜 +켠 +켤 +켭 +켯 +켰 +코 +콕 +콘 +콜 +콤 +콥 +콧 +콩 +콰 +콱 +콴 +콸 +쾅 +쾌 +쾡 +쾨 +쾰 +쿄 +쿠 +쿡 +쿤 +쿨 +쿰 +쿵 +쿼 +퀀 +퀄 +퀘 +퀭 +퀴 +퀵 +퀸 +퀼 +큐 +큘 +크 +큰 +클 +큼 +큽 +키 +킥 +킨 +킬 +킴 +킵 +킷 +킹 +타 +탁 +탄 +탈 +탉 +탐 +탑 +탓 +탔 +탕 +태 +택 +탠 +탤 +탬 +탭 +탯 +탰 +탱 +터 +턱 +턴 +털 +텀 +텁 +텃 +텄 +텅 +테 +텍 +텐 +텔 +템 +텝 +텡 +텨 +톈 +토 +톡 +톤 +톨 +톰 +톱 +톳 +통 +퇴 +툇 +투 +툭 +툰 +툴 +툼 +퉁 +퉈 +퉜 +튀 +튄 +튈 +튕 +튜 +튠 +튤 +튬 +트 +특 +튼 +튿 +틀 +틈 +틉 +틋 +틔 +티 +틱 +틴 +틸 +팀 +팁 +팅 +파 +팍 +팎 +판 +팔 +팜 +팝 +팟 +팠 +팡 +팥 +패 +팩 +팬 +팰 +팸 +팻 +팼 +팽 +퍼 +퍽 +펀 +펄 +펌 +펍 +펐 +펑 +페 +펙 +펜 +펠 +펨 +펩 +펫 +펭 +펴 +편 +펼 +폄 +폈 +평 +폐 +포 +폭 +폰 +폴 +폼 +폿 +퐁 +표 +푭 +푸 +푹 +푼 +풀 +품 +풋 +풍 +퓨 +퓬 +퓰 +퓸 +프 +픈 +플 +픔 +픕 +피 +픽 +핀 +필 +핌 +핍 +핏 +핑 +하 +학 +한 +할 +핥 +함 +합 +핫 +항 +해 +핵 +핸 +핼 +햄 +햅 +햇 +했 +행 +햐 +향 +헀 +허 +헉 +헌 +헐 +험 +헙 +헛 +헝 +헤 +헥 +헨 +헬 +헴 +헵 +헷 +헹 +혀 +혁 +현 +혈 +혐 +협 +혓 +혔 +형 +혜 +호 +혹 +혼 +홀 +홈 +홉 +홋 +홍 +홑 +화 +확 +환 +활 +홧 +황 +홰 +홱 +횃 +회 +획 +횝 +횟 +횡 +효 +후 +훅 +훈 +훌 +훑 +훔 +훗 +훤 +훨 +훼 +휄 +휑 +휘 +휙 +휜 +휠 +휩 +휭 +휴 +휼 +흄 +흉 +흐 +흑 +흔 +흘 +흙 +흠 +흡 +흣 +흥 +흩 +희 +흰 +흽 +히 +힉 +힌 +힐 +힘 +힙 +힝 +車 +滑 +金 +奈 +羅 +洛 +卵 +欄 +蘭 +郎 +來 +盧 +老 +魯 +綠 +鹿 +論 +雷 +樓 +縷 +凌 +樂 +不 +參 +葉 +沈 +若 +兩 +凉 +梁 +呂 +女 +廬 +麗 +黎 +曆 +歷 +戀 +蓮 +連 +列 +烈 +裂 +念 +獵 +靈 +領 +例 +禮 +醴 +惡 +尿 +料 +遼 +龍 +暈 +柳 +流 +類 +六 +陸 +倫 +律 +栗 +利 +李 +梨 +理 +離 +燐 +林 +臨 +立 +茶 +切 +宅 + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt new file mode 100644 index 00000000..e166bf33 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt @@ -0,0 +1,185 @@ + +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +] +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +} +¡ +£ +§ +ª +« +­ +° +² +³ +´ +µ +· +º +» +¿ +À +Á + +Ä +Å +Ç +È +É +Ê +Ë +Ì +Í +Î +Ï +Ò +Ó +Ô +Õ +Ö +Ú +Ü +Ý +ß +à +á +â +ã +ä +å +æ +ç +è +é +ê +ë +ì +í +î +ï +ñ +ò +ó +ô +õ +ö +ø +ù +ú +û +ü +ý +ą +Ć +ć +Č +č +Đ +đ +ę +ı +Ł +ł +ō +Œ +œ +Š +š +Ÿ +Ž +ž +ʒ +β +δ +ε +з +Ṡ +‘ +€ +™ diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt new file mode 100644 index 00000000..84b885d8 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt @@ -0,0 +1,6623 @@ +' +疗 +绚 +诚 +娇 +溜 +题 +贿 +者 +廖 +更 +纳 +加 +奉 +公 +一 +就 +汴 +计 +与 +路 +房 +原 +妇 +2 +0 +8 +- +7 +其 +> +: +] +, +, +骑 +刈 +全 +消 +昏 +傈 +安 +久 +钟 +嗅 +不 +影 +处 +驽 +蜿 +资 +关 +椤 +地 +瘸 +专 +问 +忖 +票 +嫉 +炎 +韵 +要 +月 +田 +节 +陂 +鄙 +捌 +备 +拳 +伺 +眼 +网 +盎 +大 +傍 +心 +东 +愉 +汇 +蹿 +科 +每 +业 +里 +航 +晏 +字 +平 +录 +先 +1 +3 +彤 +鲶 +产 +稍 +督 +腴 +有 +象 +岳 +注 +绍 +在 +泺 +文 +定 +核 +名 +水 +过 +理 +让 +偷 +率 +等 +这 +发 +” +为 +含 +肥 +酉 +相 +鄱 +七 +编 +猥 +锛 +日 +镀 +蒂 +掰 +倒 +辆 +栾 +栗 +综 +涩 +州 +雌 +滑 +馀 +了 +机 +块 +司 +宰 +甙 +兴 +矽 +抚 +保 +用 +沧 +秩 +如 +收 +息 +滥 +页 +疑 +埠 +! +! +姥 +异 +橹 +钇 +向 +下 +跄 +的 +椴 +沫 +国 +绥 +獠 +报 +开 +民 +蜇 +何 +分 +凇 +长 +讥 +藏 +掏 +施 +羽 +中 +讲 +派 +嘟 +人 +提 +浼 +间 +世 +而 +古 +多 +倪 +唇 +饯 +控 +庚 +首 +赛 +蜓 +味 +断 +制 +觉 +技 +替 +艰 +溢 +潮 +夕 +钺 +外 +摘 +枋 +动 +双 +单 +啮 +户 +枇 +确 +锦 +曜 +杜 +或 +能 +效 +霜 +盒 +然 +侗 +电 +晁 +放 +步 +鹃 +新 +杖 +蜂 +吒 +濂 +瞬 +评 +总 +隍 +对 +独 +合 +也 +是 +府 +青 +天 +诲 +墙 +组 +滴 +级 +邀 +帘 +示 +已 +时 +骸 +仄 +泅 +和 +遨 +店 +雇 +疫 +持 +巍 +踮 +境 +只 +亨 +目 +鉴 +崤 +闲 +体 +泄 +杂 +作 +般 +轰 +化 +解 +迂 +诿 +蛭 +璀 +腾 +告 +版 +服 +省 +师 +小 +规 +程 +线 +海 +办 +引 +二 +桧 +牌 +砺 +洄 +裴 +修 +图 +痫 +胡 +许 +犊 +事 +郛 +基 +柴 +呼 +食 +研 +奶 +律 +蛋 +因 +葆 +察 +戏 +褒 +戒 +再 +李 +骁 +工 +貂 +油 +鹅 +章 +啄 +休 +场 +给 +睡 +纷 +豆 +器 +捎 +说 +敏 +学 +会 +浒 +设 +诊 +格 +廓 +查 +来 +霓 +室 +溆 +¢ +诡 +寥 +焕 +舜 +柒 +狐 +回 +戟 +砾 +厄 +实 +翩 +尿 +五 +入 +径 +惭 +喹 +股 +宇 +篝 +| +; +美 +期 +云 +九 +祺 +扮 +靠 +锝 +槌 +系 +企 +酰 +阊 +暂 +蚕 +忻 +豁 +本 +羹 +执 +条 +钦 +H +獒 +限 +进 +季 +楦 +于 +芘 +玖 +铋 +茯 +未 +答 +粘 +括 +样 +精 +欠 +矢 +甥 +帷 +嵩 +扣 +令 +仔 +风 +皈 +行 +支 +部 +蓉 +刮 +站 +蜡 +救 +钊 +汗 +松 +嫌 +成 +可 +. +鹤 +院 +从 +交 +政 +怕 +活 +调 +球 +局 +验 +髌 +第 +韫 +谗 +串 +到 +圆 +年 +米 +/ +* +友 +忿 +检 +区 +看 +自 +敢 +刃 +个 +兹 +弄 +流 +留 +同 +没 +齿 +星 +聆 +轼 +湖 +什 +三 +建 +蛔 +儿 +椋 +汕 +震 +颧 +鲤 +跟 +力 +情 +璺 +铨 +陪 +务 +指 +族 +训 +滦 +鄣 +濮 +扒 +商 +箱 +十 +召 +慷 +辗 +所 +莞 +管 +护 +臭 +横 +硒 +嗓 +接 +侦 +六 +露 +党 +馋 +驾 +剖 +高 +侬 +妪 +幂 +猗 +绺 +骐 +央 +酐 +孝 +筝 +课 +徇 +缰 +门 +男 +西 +项 +句 +谙 +瞒 +秃 +篇 +教 +碲 +罚 +声 +呐 +景 +前 +富 +嘴 +鳌 +稀 +免 +朋 +啬 +睐 +去 +赈 +鱼 +住 +肩 +愕 +速 +旁 +波 +厅 +健 +茼 +厥 +鲟 +谅 +投 +攸 +炔 +数 +方 +击 +呋 +谈 +绩 +别 +愫 +僚 +躬 +鹧 +胪 +炳 +招 +喇 +膨 +泵 +蹦 +毛 +结 +5 +4 +谱 +识 +陕 +粽 +婚 +拟 +构 +且 +搜 +任 +潘 +比 +郢 +妨 +醪 +陀 +桔 +碘 +扎 +选 +哈 +骷 +楷 +亿 +明 +缆 +脯 +监 +睫 +逻 +婵 +共 +赴 +淝 +凡 +惦 +及 +达 +揖 +谩 +澹 +减 +焰 +蛹 +番 +祁 +柏 +员 +禄 +怡 +峤 +龙 +白 +叽 +生 +闯 +起 +细 +装 +谕 +竟 +聚 +钙 +上 +导 +渊 +按 +艾 +辘 +挡 +耒 +盹 +饪 +臀 +记 +邮 +蕙 +受 +各 +医 +搂 +普 +滇 +朗 +茸 +带 +翻 +酚 +( +光 +堤 +墟 +蔷 +万 +幻 +〓 +瑙 +辈 +昧 +盏 +亘 +蛀 +吉 +铰 +请 +子 +假 +闻 +税 +井 +诩 +哨 +嫂 +好 +面 +琐 +校 +馊 +鬣 +缂 +营 +访 +炖 +占 +农 +缀 +否 +经 +钚 +棵 +趟 +张 +亟 +吏 +茶 +谨 +捻 +论 +迸 +堂 +玉 +信 +吧 +瞠 +乡 +姬 +寺 +咬 +溏 +苄 +皿 +意 +赉 +宝 +尔 +钰 +艺 +特 +唳 +踉 +都 +荣 +倚 +登 +荐 +丧 +奇 +涵 +批 +炭 +近 +符 +傩 +感 +道 +着 +菊 +虹 +仲 +众 +懈 +濯 +颞 +眺 +南 +释 +北 +缝 +标 +既 +茗 +整 +撼 +迤 +贲 +挎 +耱 +拒 +某 +妍 +卫 +哇 +英 +矶 +藩 +治 +他 +元 +领 +膜 +遮 +穗 +蛾 +飞 +荒 +棺 +劫 +么 +市 +火 +温 +拈 +棚 +洼 +转 +果 +奕 +卸 +迪 +伸 +泳 +斗 +邡 +侄 +涨 +屯 +萋 +胭 +氡 +崮 +枞 +惧 +冒 +彩 +斜 +手 +豚 +随 +旭 +淑 +妞 +形 +菌 +吲 +沱 +争 +驯 +歹 +挟 +兆 +柱 +传 +至 +包 +内 +响 +临 +红 +功 +弩 +衡 +寂 +禁 +老 +棍 +耆 +渍 +织 +害 +氵 +渑 +布 +载 +靥 +嗬 +虽 +苹 +咨 +娄 +库 +雉 +榜 +帜 +嘲 +套 +瑚 +亲 +簸 +欧 +边 +6 +腿 +旮 +抛 +吹 +瞳 +得 +镓 +梗 +厨 +继 +漾 +愣 +憨 +士 +策 +窑 +抑 +躯 +襟 +脏 +参 +贸 +言 +干 +绸 +鳄 +穷 +藜 +音 +折 +详 +) +举 +悍 +甸 +癌 +黎 +谴 +死 +罩 +迁 +寒 +驷 +袖 +媒 +蒋 +掘 +模 +纠 +恣 +观 +祖 +蛆 +碍 +位 +稿 +主 +澧 +跌 +筏 +京 +锏 +帝 +贴 +证 +糠 +才 +黄 +鲸 +略 +炯 +饱 +四 +出 +园 +犀 +牧 +容 +汉 +杆 +浈 +汰 +瑷 +造 +虫 +瘩 +怪 +驴 +济 +应 +花 +沣 +谔 +夙 +旅 +价 +矿 +以 +考 +s +u +呦 +晒 +巡 +茅 +准 +肟 +瓴 +詹 +仟 +褂 +译 +桌 +混 +宁 +怦 +郑 +抿 +些 +余 +鄂 +饴 +攒 +珑 +群 +阖 +岔 +琨 +藓 +预 +环 +洮 +岌 +宀 +杲 +瀵 +最 +常 +囡 +周 +踊 +女 +鼓 +袭 +喉 +简 +范 +薯 +遐 +疏 +粱 +黜 +禧 +法 +箔 +斤 +遥 +汝 +奥 +直 +贞 +撑 +置 +绱 +集 +她 +馅 +逗 +钧 +橱 +魉 +[ +恙 +躁 +唤 +9 +旺 +膘 +待 +脾 +惫 +购 +吗 +依 +盲 +度 +瘿 +蠖 +俾 +之 +镗 +拇 +鲵 +厝 +簧 +续 +款 +展 +啃 +表 +剔 +品 +钻 +腭 +损 +清 +锶 +统 +涌 +寸 +滨 +贪 +链 +吠 +冈 +伎 +迥 +咏 +吁 +览 +防 +迅 +失 +汾 +阔 +逵 +绀 +蔑 +列 +川 +凭 +努 +熨 +揪 +利 +俱 +绉 +抢 +鸨 +我 +即 +责 +膦 +易 +毓 +鹊 +刹 +玷 +岿 +空 +嘞 +绊 +排 +术 +估 +锷 +违 +们 +苟 +铜 +播 +肘 +件 +烫 +审 +鲂 +广 +像 +铌 +惰 +铟 +巳 +胍 +鲍 +康 +憧 +色 +恢 +想 +拷 +尤 +疳 +知 +S +Y +F +D +A +峄 +裕 +帮 +握 +搔 +氐 +氘 +难 +墒 +沮 +雨 +叁 +缥 +悴 +藐 +湫 +娟 +苑 +稠 +颛 +簇 +后 +阕 +闭 +蕤 +缚 +怎 +佞 +码 +嘤 +蔡 +痊 +舱 +螯 +帕 +赫 +昵 +升 +烬 +岫 +、 +疵 +蜻 +髁 +蕨 +隶 +烛 +械 +丑 +盂 +梁 +强 +鲛 +由 +拘 +揉 +劭 +龟 +撤 +钩 +呕 +孛 +费 +妻 +漂 +求 +阑 +崖 +秤 +甘 +通 +深 +补 +赃 +坎 +床 +啪 +承 +吼 +量 +暇 +钼 +烨 +阂 +擎 +脱 +逮 +称 +P +神 +属 +矗 +华 +届 +狍 +葑 +汹 +育 +患 +窒 +蛰 +佼 +静 +槎 +运 +鳗 +庆 +逝 +曼 +疱 +克 +代 +官 +此 +麸 +耧 +蚌 +晟 +例 +础 +榛 +副 +测 +唰 +缢 +迹 +灬 +霁 +身 +岁 +赭 +扛 +又 +菡 +乜 +雾 +板 +读 +陷 +徉 +贯 +郁 +虑 +变 +钓 +菜 +圾 +现 +琢 +式 +乐 +维 +渔 +浜 +左 +吾 +脑 +钡 +警 +T +啵 +拴 +偌 +漱 +湿 +硕 +止 +骼 +魄 +积 +燥 +联 +踢 +玛 +则 +窿 +见 +振 +畿 +送 +班 +钽 +您 +赵 +刨 +印 +讨 +踝 +籍 +谡 +舌 +崧 +汽 +蔽 +沪 +酥 +绒 +怖 +财 +帖 +肱 +私 +莎 +勋 +羔 +霸 +励 +哼 +帐 +将 +帅 +渠 +纪 +婴 +娩 +岭 +厘 +滕 +吻 +伤 +坝 +冠 +戊 +隆 +瘁 +介 +涧 +物 +黍 +并 +姗 +奢 +蹑 +掣 +垸 +锴 +命 +箍 +捉 +病 +辖 +琰 +眭 +迩 +艘 +绌 +繁 +寅 +若 +毋 +思 +诉 +类 +诈 +燮 +轲 +酮 +狂 +重 +反 +职 +筱 +县 +委 +磕 +绣 +奖 +晋 +濉 +志 +徽 +肠 +呈 +獐 +坻 +口 +片 +碰 +几 +村 +柿 +劳 +料 +获 +亩 +惕 +晕 +厌 +号 +罢 +池 +正 +鏖 +煨 +家 +棕 +复 +尝 +懋 +蜥 +锅 +岛 +扰 +队 +坠 +瘾 +钬 +@ +卧 +疣 +镇 +譬 +冰 +彷 +频 +黯 +据 +垄 +采 +八 +缪 +瘫 +型 +熹 +砰 +楠 +襁 +箐 +但 +嘶 +绳 +啤 +拍 +盥 +穆 +傲 +洗 +盯 +塘 +怔 +筛 +丿 +台 +恒 +喂 +葛 +永 +¥ +烟 +酒 +桦 +书 +砂 +蚝 +缉 +态 +瀚 +袄 +圳 +轻 +蛛 +超 +榧 +遛 +姒 +奘 +铮 +右 +荽 +望 +偻 +卡 +丶 +氰 +附 +做 +革 +索 +戚 +坨 +桷 +唁 +垅 +榻 +岐 +偎 +坛 +莨 +山 +殊 +微 +骇 +陈 +爨 +推 +嗝 +驹 +澡 +藁 +呤 +卤 +嘻 +糅 +逛 +侵 +郓 +酌 +德 +摇 +※ +鬃 +被 +慨 +殡 +羸 +昌 +泡 +戛 +鞋 +河 +宪 +沿 +玲 +鲨 +翅 +哽 +源 +铅 +语 +照 +邯 +址 +荃 +佬 +顺 +鸳 +町 +霭 +睾 +瓢 +夸 +椁 +晓 +酿 +痈 +咔 +侏 +券 +噎 +湍 +签 +嚷 +离 +午 +尚 +社 +锤 +背 +孟 +使 +浪 +缦 +潍 +鞅 +军 +姹 +驶 +笑 +鳟 +鲁 +》 +孽 +钜 +绿 +洱 +礴 +焯 +椰 +颖 +囔 +乌 +孔 +巴 +互 +性 +椽 +哞 +聘 +昨 +早 +暮 +胶 +炀 +隧 +低 +彗 +昝 +铁 +呓 +氽 +藉 +喔 +癖 +瑗 +姨 +权 +胱 +韦 +堑 +蜜 +酋 +楝 +砝 +毁 +靓 +歙 +锲 +究 +屋 +喳 +骨 +辨 +碑 +武 +鸠 +宫 +辜 +烊 +适 +坡 +殃 +培 +佩 +供 +走 +蜈 +迟 +翼 +况 +姣 +凛 +浔 +吃 +飘 +债 +犟 +金 +促 +苛 +崇 +坂 +莳 +畔 +绂 +兵 +蠕 +斋 +根 +砍 +亢 +欢 +恬 +崔 +剁 +餐 +榫 +快 +扶 +‖ +濒 +缠 +鳜 +当 +彭 +驭 +浦 +篮 +昀 +锆 +秸 +钳 +弋 +娣 +瞑 +夷 +龛 +苫 +拱 +致 +% +嵊 +障 +隐 +弑 +初 +娓 +抉 +汩 +累 +蓖 +" +唬 +助 +苓 +昙 +押 +毙 +破 +城 +郧 +逢 +嚏 +獭 +瞻 +溱 +婿 +赊 +跨 +恼 +璧 +萃 +姻 +貉 +灵 +炉 +密 +氛 +陶 +砸 +谬 +衔 +点 +琛 +沛 +枳 +层 +岱 +诺 +脍 +榈 +埂 +征 +冷 +裁 +打 +蹴 +素 +瘘 +逞 +蛐 +聊 +激 +腱 +萘 +踵 +飒 +蓟 +吆 +取 +咙 +簋 +涓 +矩 +曝 +挺 +揣 +座 +你 +史 +舵 +焱 +尘 +苏 +笈 +脚 +溉 +榨 +诵 +樊 +邓 +焊 +义 +庶 +儋 +蟋 +蒲 +赦 +呷 +杞 +诠 +豪 +还 +试 +颓 +茉 +太 +除 +紫 +逃 +痴 +草 +充 +鳕 +珉 +祗 +墨 +渭 +烩 +蘸 +慕 +璇 +镶 +穴 +嵘 +恶 +骂 +险 +绋 +幕 +碉 +肺 +戳 +刘 +潞 +秣 +纾 +潜 +銮 +洛 +须 +罘 +销 +瘪 +汞 +兮 +屉 +r +林 +厕 +质 +探 +划 +狸 +殚 +善 +煊 +烹 +〒 +锈 +逯 +宸 +辍 +泱 +柚 +袍 +远 +蹋 +嶙 +绝 +峥 +娥 +缍 +雀 +徵 +认 +镱 +谷 += +贩 +勉 +撩 +鄯 +斐 +洋 +非 +祚 +泾 +诒 +饿 +撬 +威 +晷 +搭 +芍 +锥 +笺 +蓦 +候 +琊 +档 +礁 +沼 +卵 +荠 +忑 +朝 +凹 +瑞 +头 +仪 +弧 +孵 +畏 +铆 +突 +衲 +车 +浩 +气 +茂 +悖 +厢 +枕 +酝 +戴 +湾 +邹 +飚 +攘 +锂 +写 +宵 +翁 +岷 +无 +喜 +丈 +挑 +嗟 +绛 +殉 +议 +槽 +具 +醇 +淞 +笃 +郴 +阅 +饼 +底 +壕 +砚 +弈 +询 +缕 +庹 +翟 +零 +筷 +暨 +舟 +闺 +甯 +撞 +麂 +茌 +蔼 +很 +珲 +捕 +棠 +角 +阉 +媛 +娲 +诽 +剿 +尉 +爵 +睬 +韩 +诰 +匣 +危 +糍 +镯 +立 +浏 +阳 +少 +盆 +舔 +擘 +匪 +申 +尬 +铣 +旯 +抖 +赘 +瓯 +居 +ˇ +哮 +游 +锭 +茏 +歌 +坏 +甚 +秒 +舞 +沙 +仗 +劲 +潺 +阿 +燧 +郭 +嗖 +霏 +忠 +材 +奂 +耐 +跺 +砀 +输 +岖 +媳 +氟 +极 +摆 +灿 +今 +扔 +腻 +枝 +奎 +药 +熄 +吨 +话 +q +额 +慑 +嘌 +协 +喀 +壳 +埭 +视 +著 +於 +愧 +陲 +翌 +峁 +颅 +佛 +腹 +聋 +侯 +咎 +叟 +秀 +颇 +存 +较 +罪 +哄 +岗 +扫 +栏 +钾 +羌 +己 +璨 +枭 +霉 +煌 +涸 +衿 +键 +镝 +益 +岢 +奏 +连 +夯 +睿 +冥 +均 +糖 +狞 +蹊 +稻 +爸 +刿 +胥 +煜 +丽 +肿 +璃 +掸 +跚 +灾 +垂 +樾 +濑 +乎 +莲 +窄 +犹 +撮 +战 +馄 +软 +络 +显 +鸢 +胸 +宾 +妲 +恕 +埔 +蝌 +份 +遇 +巧 +瞟 +粒 +恰 +剥 +桡 +博 +讯 +凯 +堇 +阶 +滤 +卖 +斌 +骚 +彬 +兑 +磺 +樱 +舷 +两 +娱 +福 +仃 +差 +找 +桁 +÷ +净 +把 +阴 +污 +戬 +雷 +碓 +蕲 +楚 +罡 +焖 +抽 +妫 +咒 +仑 +闱 +尽 +邑 +菁 +爱 +贷 +沥 +鞑 +牡 +嗉 +崴 +骤 +塌 +嗦 +订 +拮 +滓 +捡 +锻 +次 +坪 +杩 +臃 +箬 +融 +珂 +鹗 +宗 +枚 +降 +鸬 +妯 +阄 +堰 +盐 +毅 +必 +杨 +崃 +俺 +甬 +状 +莘 +货 +耸 +菱 +腼 +铸 +唏 +痤 +孚 +澳 +懒 +溅 +翘 +疙 +杷 +淼 +缙 +骰 +喊 +悉 +砻 +坷 +艇 +赁 +界 +谤 +纣 +宴 +晃 +茹 +归 +饭 +梢 +铡 +街 +抄 +肼 +鬟 +苯 +颂 +撷 +戈 +炒 +咆 +茭 +瘙 +负 +仰 +客 +琉 +铢 +封 +卑 +珥 +椿 +镧 +窨 +鬲 +寿 +御 +袤 +铃 +萎 +砖 +餮 +脒 +裳 +肪 +孕 +嫣 +馗 +嵇 +恳 +氯 +江 +石 +褶 +冢 +祸 +阻 +狈 +羞 +银 +靳 +透 +咳 +叼 +敷 +芷 +啥 +它 +瓤 +兰 +痘 +懊 +逑 +肌 +往 +捺 +坊 +甩 +呻 +〃 +沦 +忘 +膻 +祟 +菅 +剧 +崆 +智 +坯 +臧 +霍 +墅 +攻 +眯 +倘 +拢 +骠 +铐 +庭 +岙 +瓠 +′ +缺 +泥 +迢 +捶 +? +? +郏 +喙 +掷 +沌 +纯 +秘 +种 +听 +绘 +固 +螨 +团 +香 +盗 +妒 +埚 +蓝 +拖 +旱 +荞 +铀 +血 +遏 +汲 +辰 +叩 +拽 +幅 +硬 +惶 +桀 +漠 +措 +泼 +唑 +齐 +肾 +念 +酱 +虚 +屁 +耶 +旗 +砦 +闵 +婉 +馆 +拭 +绅 +韧 +忏 +窝 +醋 +葺 +顾 +辞 +倜 +堆 +辋 +逆 +玟 +贱 +疾 +董 +惘 +倌 +锕 +淘 +嘀 +莽 +俭 +笏 +绑 +鲷 +杈 +择 +蟀 +粥 +嗯 +驰 +逾 +案 +谪 +褓 +胫 +哩 +昕 +颚 +鲢 +绠 +躺 +鹄 +崂 +儒 +俨 +丝 +尕 +泌 +啊 +萸 +彰 +幺 +吟 +骄 +苣 +弦 +脊 +瑰 +〈 +诛 +镁 +析 +闪 +剪 +侧 +哟 +框 +螃 +守 +嬗 +燕 +狭 +铈 +缮 +概 +迳 +痧 +鲲 +俯 +售 +笼 +痣 +扉 +挖 +满 +咋 +援 +邱 +扇 +歪 +便 +玑 +绦 +峡 +蛇 +叨 +〖 +泽 +胃 +斓 +喋 +怂 +坟 +猪 +该 +蚬 +炕 +弥 +赞 +棣 +晔 +娠 +挲 +狡 +创 +疖 +铕 +镭 +稷 +挫 +弭 +啾 +翔 +粉 +履 +苘 +哦 +楼 +秕 +铂 +土 +锣 +瘟 +挣 +栉 +习 +享 +桢 +袅 +磨 +桂 +谦 +延 +坚 +蔚 +噗 +署 +谟 +猬 +钎 +恐 +嬉 +雒 +倦 +衅 +亏 +璩 +睹 +刻 +殿 +王 +算 +雕 +麻 +丘 +柯 +骆 +丸 +塍 +谚 +添 +鲈 +垓 +桎 +蚯 +芥 +予 +飕 +镦 +谌 +窗 +醚 +菀 +亮 +搪 +莺 +蒿 +羁 +足 +J +真 +轶 +悬 +衷 +靛 +翊 +掩 +哒 +炅 +掐 +冼 +妮 +l +谐 +稚 +荆 +擒 +犯 +陵 +虏 +浓 +崽 +刍 +陌 +傻 +孜 +千 +靖 +演 +矜 +钕 +煽 +杰 +酗 +渗 +伞 +栋 +俗 +泫 +戍 +罕 +沾 +疽 +灏 +煦 +芬 +磴 +叱 +阱 +榉 +湃 +蜀 +叉 +醒 +彪 +租 +郡 +篷 +屎 +良 +垢 +隗 +弱 +陨 +峪 +砷 +掴 +颁 +胎 +雯 +绵 +贬 +沐 +撵 +隘 +篙 +暖 +曹 +陡 +栓 +填 +臼 +彦 +瓶 +琪 +潼 +哪 +鸡 +摩 +啦 +俟 +锋 +域 +耻 +蔫 +疯 +纹 +撇 +毒 +绶 +痛 +酯 +忍 +爪 +赳 +歆 +嘹 +辕 +烈 +册 +朴 +钱 +吮 +毯 +癜 +娃 +谀 +邵 +厮 +炽 +璞 +邃 +丐 +追 +词 +瓒 +忆 +轧 +芫 +谯 +喷 +弟 +半 +冕 +裙 +掖 +墉 +绮 +寝 +苔 +势 +顷 +褥 +切 +衮 +君 +佳 +嫒 +蚩 +霞 +佚 +洙 +逊 +镖 +暹 +唛 +& +殒 +顶 +碗 +獗 +轭 +铺 +蛊 +废 +恹 +汨 +崩 +珍 +那 +杵 +曲 +纺 +夏 +薰 +傀 +闳 +淬 +姘 +舀 +拧 +卷 +楂 +恍 +讪 +厩 +寮 +篪 +赓 +乘 +灭 +盅 +鞣 +沟 +慎 +挂 +饺 +鼾 +杳 +树 +缨 +丛 +絮 +娌 +臻 +嗳 +篡 +侩 +述 +衰 +矛 +圈 +蚜 +匕 +筹 +匿 +濞 +晨 +叶 +骋 +郝 +挚 +蚴 +滞 +增 +侍 +描 +瓣 +吖 +嫦 +蟒 +匾 +圣 +赌 +毡 +癞 +恺 +百 +曳 +需 +篓 +肮 +庖 +帏 +卿 +驿 +遗 +蹬 +鬓 +骡 +歉 +芎 +胳 +屐 +禽 +烦 +晌 +寄 +媾 +狄 +翡 +苒 +船 +廉 +终 +痞 +殇 +々 +畦 +饶 +改 +拆 +悻 +萄 +£ +瓿 +乃 +訾 +桅 +匮 +溧 +拥 +纱 +铍 +骗 +蕃 +龋 +缬 +父 +佐 +疚 +栎 +醍 +掳 +蓄 +x +惆 +颜 +鲆 +榆 +〔 +猎 +敌 +暴 +谥 +鲫 +贾 +罗 +玻 +缄 +扦 +芪 +癣 +落 +徒 +臾 +恿 +猩 +托 +邴 +肄 +牵 +春 +陛 +耀 +刊 +拓 +蓓 +邳 +堕 +寇 +枉 +淌 +啡 +湄 +兽 +酷 +萼 +碚 +濠 +萤 +夹 +旬 +戮 +梭 +琥 +椭 +昔 +勺 +蜊 +绐 +晚 +孺 +僵 +宣 +摄 +冽 +旨 +萌 +忙 +蚤 +眉 +噼 +蟑 +付 +契 +瓜 +悼 +颡 +壁 +曾 +窕 +颢 +澎 +仿 +俑 +浑 +嵌 +浣 +乍 +碌 +褪 +乱 +蔟 +隙 +玩 +剐 +葫 +箫 +纲 +围 +伐 +决 +伙 +漩 +瑟 +刑 +肓 +镳 +缓 +蹭 +氨 +皓 +典 +畲 +坍 +铑 +檐 +塑 +洞 +倬 +储 +胴 +淳 +戾 +吐 +灼 +惺 +妙 +毕 +珐 +缈 +虱 +盖 +羰 +鸿 +磅 +谓 +髅 +娴 +苴 +唷 +蚣 +霹 +抨 +贤 +唠 +犬 +誓 +逍 +庠 +逼 +麓 +籼 +釉 +呜 +碧 +秧 +氩 +摔 +霄 +穸 +纨 +辟 +妈 +映 +完 +牛 +缴 +嗷 +炊 +恩 +荔 +茆 +掉 +紊 +慌 +莓 +羟 +阙 +萁 +磐 +另 +蕹 +辱 +鳐 +湮 +吡 +吩 +唐 +睦 +垠 +舒 +圜 +冗 +瞿 +溺 +芾 +囱 +匠 +僳 +汐 +菩 +饬 +漓 +黑 +霰 +浸 +濡 +窥 +毂 +蒡 +兢 +驻 +鹉 +芮 +诙 +迫 +雳 +厂 +忐 +臆 +猴 +鸣 +蚪 +栈 +箕 +羡 +渐 +莆 +捍 +眈 +哓 +趴 +蹼 +埕 +嚣 +骛 +宏 +淄 +斑 +噜 +严 +瑛 +垃 +椎 +诱 +压 +庾 +绞 +焘 +廿 +抡 +迄 +棘 +夫 +纬 +锹 +眨 +瞌 +侠 +脐 +竞 +瀑 +孳 +骧 +遁 +姜 +颦 +荪 +滚 +萦 +伪 +逸 +粳 +爬 +锁 +矣 +役 +趣 +洒 +颔 +诏 +逐 +奸 +甭 +惠 +攀 +蹄 +泛 +尼 +拼 +阮 +鹰 +亚 +颈 +惑 +勒 +〉 +际 +肛 +爷 +刚 +钨 +丰 +养 +冶 +鲽 +辉 +蔻 +画 +覆 +皴 +妊 +麦 +返 +醉 +皂 +擀 +〗 +酶 +凑 +粹 +悟 +诀 +硖 +港 +卜 +z +杀 +涕 +± +舍 +铠 +抵 +弛 +段 +敝 +镐 +奠 +拂 +轴 +跛 +袱 +e +t +沉 +菇 +俎 +薪 +峦 +秭 +蟹 +历 +盟 +菠 +寡 +液 +肢 +喻 +染 +裱 +悱 +抱 +氙 +赤 +捅 +猛 +跑 +氮 +谣 +仁 +尺 +辊 +窍 +烙 +衍 +架 +擦 +倏 +璐 +瑁 +币 +楞 +胖 +夔 +趸 +邛 +惴 +饕 +虔 +蝎 +§ +哉 +贝 +宽 +辫 +炮 +扩 +饲 +籽 +魏 +菟 +锰 +伍 +猝 +末 +琳 +哚 +蛎 +邂 +呀 +姿 +鄞 +却 +歧 +仙 +恸 +椐 +森 +牒 +寤 +袒 +婆 +虢 +雅 +钉 +朵 +贼 +欲 +苞 +寰 +故 +龚 +坭 +嘘 +咫 +礼 +硷 +兀 +睢 +汶 +’ +铲 +烧 +绕 +诃 +浃 +钿 +哺 +柜 +讼 +颊 +璁 +腔 +洽 +咐 +脲 +簌 +筠 +镣 +玮 +鞠 +谁 +兼 +姆 +挥 +梯 +蝴 +谘 +漕 +刷 +躏 +宦 +弼 +b +垌 +劈 +麟 +莉 +揭 +笙 +渎 +仕 +嗤 +仓 +配 +怏 +抬 +错 +泯 +镊 +孰 +猿 +邪 +仍 +秋 +鼬 +壹 +歇 +吵 +炼 +< +尧 +射 +柬 +廷 +胧 +霾 +凳 +隋 +肚 +浮 +梦 +祥 +株 +堵 +退 +L +鹫 +跎 +凶 +毽 +荟 +炫 +栩 +玳 +甜 +沂 +鹿 +顽 +伯 +爹 +赔 +蛴 +徐 +匡 +欣 +狰 +缸 +雹 +蟆 +疤 +默 +沤 +啜 +痂 +衣 +禅 +w +i +h +辽 +葳 +黝 +钗 +停 +沽 +棒 +馨 +颌 +肉 +吴 +硫 +悯 +劾 +娈 +马 +啧 +吊 +悌 +镑 +峭 +帆 +瀣 +涉 +咸 +疸 +滋 +泣 +翦 +拙 +癸 +钥 +蜒 ++ +尾 +庄 +凝 +泉 +婢 +渴 +谊 +乞 +陆 +锉 +糊 +鸦 +淮 +I +B +N +晦 +弗 +乔 +庥 +葡 +尻 +席 +橡 +傣 +渣 +拿 +惩 +麋 +斛 +缃 +矮 +蛏 +岘 +鸽 +姐 +膏 +催 +奔 +镒 +喱 +蠡 +摧 +钯 +胤 +柠 +拐 +璋 +鸥 +卢 +荡 +倾 +^ +_ +珀 +逄 +萧 +塾 +掇 +贮 +笆 +聂 +圃 +冲 +嵬 +M +滔 +笕 +值 +炙 +偶 +蜱 +搐 +梆 +汪 +蔬 +腑 +鸯 +蹇 +敞 +绯 +仨 +祯 +谆 +梧 +糗 +鑫 +啸 +豺 +囹 +猾 +巢 +柄 +瀛 +筑 +踌 +沭 +暗 +苁 +鱿 +蹉 +脂 +蘖 +牢 +热 +木 +吸 +溃 +宠 +序 +泞 +偿 +拜 +檩 +厚 +朐 +毗 +螳 +吞 +媚 +朽 +担 +蝗 +橘 +畴 +祈 +糟 +盱 +隼 +郜 +惜 +珠 +裨 +铵 +焙 +琚 +唯 +咚 +噪 +骊 +丫 +滢 +勤 +棉 +呸 +咣 +淀 +隔 +蕾 +窈 +饨 +挨 +煅 +短 +匙 +粕 +镜 +赣 +撕 +墩 +酬 +馁 +豌 +颐 +抗 +酣 +氓 +佑 +搁 +哭 +递 +耷 +涡 +桃 +贻 +碣 +截 +瘦 +昭 +镌 +蔓 +氚 +甲 +猕 +蕴 +蓬 +散 +拾 +纛 +狼 +猷 +铎 +埋 +旖 +矾 +讳 +囊 +糜 +迈 +粟 +蚂 +紧 +鲳 +瘢 +栽 +稼 +羊 +锄 +斟 +睁 +桥 +瓮 +蹙 +祉 +醺 +鼻 +昱 +剃 +跳 +篱 +跷 +蒜 +翎 +宅 +晖 +嗑 +壑 +峻 +癫 +屏 +狠 +陋 +袜 +途 +憎 +祀 +莹 +滟 +佶 +溥 +臣 +约 +盛 +峰 +磁 +慵 +婪 +拦 +莅 +朕 +鹦 +粲 +裤 +哎 +疡 +嫖 +琵 +窟 +堪 +谛 +嘉 +儡 +鳝 +斩 +郾 +驸 +酊 +妄 +胜 +贺 +徙 +傅 +噌 +钢 +栅 +庇 +恋 +匝 +巯 +邈 +尸 +锚 +粗 +佟 +蛟 +薹 +纵 +蚊 +郅 +绢 +锐 +苗 +俞 +篆 +淆 +膀 +鲜 +煎 +诶 +秽 +寻 +涮 +刺 +怀 +噶 +巨 +褰 +魅 +灶 +灌 +桉 +藕 +谜 +舸 +薄 +搀 +恽 +借 +牯 +痉 +渥 +愿 +亓 +耘 +杠 +柩 +锔 +蚶 +钣 +珈 +喘 +蹒 +幽 +赐 +稗 +晤 +莱 +泔 +扯 +肯 +菪 +裆 +腩 +豉 +疆 +骜 +腐 +倭 +珏 +唔 +粮 +亡 +润 +慰 +伽 +橄 +玄 +誉 +醐 +胆 +龊 +粼 +塬 +陇 +彼 +削 +嗣 +绾 +芽 +妗 +垭 +瘴 +爽 +薏 +寨 +龈 +泠 +弹 +赢 +漪 +猫 +嘧 +涂 +恤 +圭 +茧 +烽 +屑 +痕 +巾 +赖 +荸 +凰 +腮 +畈 +亵 +蹲 +偃 +苇 +澜 +艮 +换 +骺 +烘 +苕 +梓 +颉 +肇 +哗 +悄 +氤 +涠 +葬 +屠 +鹭 +植 +竺 +佯 +诣 +鲇 +瘀 +鲅 +邦 +移 +滁 +冯 +耕 +癔 +戌 +茬 +沁 +巩 +悠 +湘 +洪 +痹 +锟 +循 +谋 +腕 +鳃 +钠 +捞 +焉 +迎 +碱 +伫 +急 +榷 +奈 +邝 +卯 +辄 +皲 +卟 +醛 +畹 +忧 +稳 +雄 +昼 +缩 +阈 +睑 +扌 +耗 +曦 +涅 +捏 +瞧 +邕 +淖 +漉 +铝 +耦 +禹 +湛 +喽 +莼 +琅 +诸 +苎 +纂 +硅 +始 +嗨 +傥 +燃 +臂 +赅 +嘈 +呆 +贵 +屹 +壮 +肋 +亍 +蚀 +卅 +豹 +腆 +邬 +迭 +浊 +} +童 +螂 +捐 +圩 +勐 +触 +寞 +汊 +壤 +荫 +膺 +渌 +芳 +懿 +遴 +螈 +泰 +蓼 +蛤 +茜 +舅 +枫 +朔 +膝 +眙 +避 +梅 +判 +鹜 +璜 +牍 +缅 +垫 +藻 +黔 +侥 +惚 +懂 +踩 +腰 +腈 +札 +丞 +唾 +慈 +顿 +摹 +荻 +琬 +~ +斧 +沈 +滂 +胁 +胀 +幄 +莜 +Z +匀 +鄄 +掌 +绰 +茎 +焚 +赋 +萱 +谑 +汁 +铒 +瞎 +夺 +蜗 +野 +娆 +冀 +弯 +篁 +懵 +灞 +隽 +芡 +脘 +俐 +辩 +芯 +掺 +喏 +膈 +蝈 +觐 +悚 +踹 +蔗 +熠 +鼠 +呵 +抓 +橼 +峨 +畜 +缔 +禾 +崭 +弃 +熊 +摒 +凸 +拗 +穹 +蒙 +抒 +祛 +劝 +闫 +扳 +阵 +醌 +踪 +喵 +侣 +搬 +仅 +荧 +赎 +蝾 +琦 +买 +婧 +瞄 +寓 +皎 +冻 +赝 +箩 +莫 +瞰 +郊 +笫 +姝 +筒 +枪 +遣 +煸 +袋 +舆 +痱 +涛 +母 +〇 +启 +践 +耙 +绲 +盘 +遂 +昊 +搞 +槿 +诬 +纰 +泓 +惨 +檬 +亻 +越 +C +o +憩 +熵 +祷 +钒 +暧 +塔 +阗 +胰 +咄 +娶 +魔 +琶 +钞 +邻 +扬 +杉 +殴 +咽 +弓 +〆 +髻 +】 +吭 +揽 +霆 +拄 +殖 +脆 +彻 +岩 +芝 +勃 +辣 +剌 +钝 +嘎 +甄 +佘 +皖 +伦 +授 +徕 +憔 +挪 +皇 +庞 +稔 +芜 +踏 +溴 +兖 +卒 +擢 +饥 +鳞 +煲 +‰ +账 +颗 +叻 +斯 +捧 +鳍 +琮 +讹 +蛙 +纽 +谭 +酸 +兔 +莒 +睇 +伟 +觑 +羲 +嗜 +宜 +褐 +旎 +辛 +卦 +诘 +筋 +鎏 +溪 +挛 +熔 +阜 +晰 +鳅 +丢 +奚 +灸 +呱 +献 +陉 +黛 +鸪 +甾 +萨 +疮 +拯 +洲 +疹 +辑 +叙 +恻 +谒 +允 +柔 +烂 +氏 +逅 +漆 +拎 +惋 +扈 +湟 +纭 +啕 +掬 +擞 +哥 +忽 +涤 +鸵 +靡 +郗 +瓷 +扁 +廊 +怨 +雏 +钮 +敦 +E +懦 +憋 +汀 +拚 +啉 +腌 +岸 +f +痼 +瞅 +尊 +咀 +眩 +飙 +忌 +仝 +迦 +熬 +毫 +胯 +篑 +茄 +腺 +凄 +舛 +碴 +锵 +诧 +羯 +後 +漏 +汤 +宓 +仞 +蚁 +壶 +谰 +皑 +铄 +棰 +罔 +辅 +晶 +苦 +牟 +闽 +\ +烃 +饮 +聿 +丙 +蛳 +朱 +煤 +涔 +鳖 +犁 +罐 +荼 +砒 +淦 +妤 +黏 +戎 +孑 +婕 +瑾 +戢 +钵 +枣 +捋 +砥 +衩 +狙 +桠 +稣 +阎 +肃 +梏 +诫 +孪 +昶 +婊 +衫 +嗔 +侃 +塞 +蜃 +樵 +峒 +貌 +屿 +欺 +缫 +阐 +栖 +诟 +珞 +荭 +吝 +萍 +嗽 +恂 +啻 +蜴 +磬 +峋 +俸 +豫 +谎 +徊 +镍 +韬 +魇 +晴 +U +囟 +猜 +蛮 +坐 +囿 +伴 +亭 +肝 +佗 +蝠 +妃 +胞 +滩 +榴 +氖 +垩 +苋 +砣 +扪 +馏 +姓 +轩 +厉 +夥 +侈 +禀 +垒 +岑 +赏 +钛 +辐 +痔 +披 +纸 +碳 +“ +坞 +蠓 +挤 +荥 +沅 +悔 +铧 +帼 +蒌 +蝇 +a +p +y +n +g +哀 +浆 +瑶 +凿 +桶 +馈 +皮 +奴 +苜 +佤 +伶 +晗 +铱 +炬 +优 +弊 +氢 +恃 +甫 +攥 +端 +锌 +灰 +稹 +炝 +曙 +邋 +亥 +眶 +碾 +拉 +萝 +绔 +捷 +浍 +腋 +姑 +菖 +凌 +涞 +麽 +锢 +桨 +潢 +绎 +镰 +殆 +锑 +渝 +铬 +困 +绽 +觎 +匈 +糙 +暑 +裹 +鸟 +盔 +肽 +迷 +綦 +『 +亳 +佝 +俘 +钴 +觇 +骥 +仆 +疝 +跪 +婶 +郯 +瀹 +唉 +脖 +踞 +针 +晾 +忒 +扼 +瞩 +叛 +椒 +疟 +嗡 +邗 +肆 +跆 +玫 +忡 +捣 +咧 +唆 +艄 +蘑 +潦 +笛 +阚 +沸 +泻 +掊 +菽 +贫 +斥 +髂 +孢 +镂 +赂 +麝 +鸾 +屡 +衬 +苷 +恪 +叠 +希 +粤 +爻 +喝 +茫 +惬 +郸 +绻 +庸 +撅 +碟 +宄 +妹 +膛 +叮 +饵 +崛 +嗲 +椅 +冤 +搅 +咕 +敛 +尹 +垦 +闷 +蝉 +霎 +勰 +败 +蓑 +泸 +肤 +鹌 +幌 +焦 +浠 +鞍 +刁 +舰 +乙 +竿 +裔 +。 +茵 +函 +伊 +兄 +丨 +娜 +匍 +謇 +莪 +宥 +似 +蝽 +翳 +酪 +翠 +粑 +薇 +祢 +骏 +赠 +叫 +Q +噤 +噻 +竖 +芗 +莠 +潭 +俊 +羿 +耜 +O +郫 +趁 +嗪 +囚 +蹶 +芒 +洁 +笋 +鹑 +敲 +硝 +啶 +堡 +渲 +揩 +』 +携 +宿 +遒 +颍 +扭 +棱 +割 +萜 +蔸 +葵 +琴 +捂 +饰 +衙 +耿 +掠 +募 +岂 +窖 +涟 +蔺 +瘤 +柞 +瞪 +怜 +匹 +距 +楔 +炜 +哆 +秦 +缎 +幼 +茁 +绪 +痨 +恨 +楸 +娅 +瓦 +桩 +雪 +嬴 +伏 +榔 +妥 +铿 +拌 +眠 +雍 +缇 +‘ +卓 +搓 +哌 +觞 +噩 +屈 +哧 +髓 +咦 +巅 +娑 +侑 +淫 +膳 +祝 +勾 +姊 +莴 +胄 +疃 +薛 +蜷 +胛 +巷 +芙 +芋 +熙 +闰 +勿 +窃 +狱 +剩 +钏 +幢 +陟 +铛 +慧 +靴 +耍 +k +浙 +浇 +飨 +惟 +绗 +祜 +澈 +啼 +咪 +磷 +摞 +诅 +郦 +抹 +跃 +壬 +吕 +肖 +琏 +颤 +尴 +剡 +抠 +凋 +赚 +泊 +津 +宕 +殷 +倔 +氲 +漫 +邺 +涎 +怠 +$ +垮 +荬 +遵 +俏 +叹 +噢 +饽 +蜘 +孙 +筵 +疼 +鞭 +羧 +牦 +箭 +潴 +c +眸 +祭 +髯 +啖 +坳 +愁 +芩 +驮 +倡 +巽 +穰 +沃 +胚 +怒 +凤 +槛 +剂 +趵 +嫁 +v +邢 +灯 +鄢 +桐 +睽 +檗 +锯 +槟 +婷 +嵋 +圻 +诗 +蕈 +颠 +遭 +痢 +芸 +怯 +馥 +竭 +锗 +徜 +恭 +遍 +籁 +剑 +嘱 +苡 +龄 +僧 +桑 +潸 +弘 +澶 +楹 +悲 +讫 +愤 +腥 +悸 +谍 +椹 +呢 +桓 +葭 +攫 +阀 +翰 +躲 +敖 +柑 +郎 +笨 +橇 +呃 +魁 +燎 +脓 +葩 +磋 +垛 +玺 +狮 +沓 +砜 +蕊 +锺 +罹 +蕉 +翱 +虐 +闾 +巫 +旦 +茱 +嬷 +枯 +鹏 +贡 +芹 +汛 +矫 +绁 +拣 +禺 +佃 +讣 +舫 +惯 +乳 +趋 +疲 +挽 +岚 +虾 +衾 +蠹 +蹂 +飓 +氦 +铖 +孩 +稞 +瑜 +壅 +掀 +勘 +妓 +畅 +髋 +W +庐 +牲 +蓿 +榕 +练 +垣 +唱 +邸 +菲 +昆 +婺 +穿 +绡 +麒 +蚱 +掂 +愚 +泷 +涪 +漳 +妩 +娉 +榄 +讷 +觅 +旧 +藤 +煮 +呛 +柳 +腓 +叭 +庵 +烷 +阡 +罂 +蜕 +擂 +猖 +咿 +媲 +脉 +【 +沏 +貅 +黠 +熏 +哲 +烁 +坦 +酵 +兜 +× +潇 +撒 +剽 +珩 +圹 +乾 +摸 +樟 +帽 +嗒 +襄 +魂 +轿 +憬 +锡 +〕 +喃 +皆 +咖 +隅 +脸 +残 +泮 +袂 +鹂 +珊 +囤 +捆 +咤 +误 +徨 +闹 +淙 +芊 +淋 +怆 +囗 +拨 +梳 +渤 +R +G +绨 +蚓 +婀 +幡 +狩 +麾 +谢 +唢 +裸 +旌 +伉 +纶 +裂 +驳 +砼 +咛 +澄 +樨 +蹈 +宙 +澍 +倍 +貔 +操 +勇 +蟠 +摈 +砧 +虬 +够 +缁 +悦 +藿 +撸 +艹 +摁 +淹 +豇 +虎 +榭 +ˉ +吱 +d +° +喧 +荀 +踱 +侮 +奋 +偕 +饷 +犍 +惮 +坑 +璎 +徘 +宛 +妆 +袈 +倩 +窦 +昂 +荏 +乖 +K +怅 +撰 +鳙 +牙 +袁 +酞 +X +痿 +琼 +闸 +雁 +趾 +荚 +虻 +涝 +《 +杏 +韭 +偈 +烤 +绫 +鞘 +卉 +症 +遢 +蓥 +诋 +杭 +荨 +匆 +竣 +簪 +辙 +敕 +虞 +丹 +缭 +咩 +黟 +m +淤 +瑕 +咂 +铉 +硼 +茨 +嶂 +痒 +畸 +敬 +涿 +粪 +窘 +熟 +叔 +嫔 +盾 +忱 +裘 +憾 +梵 +赡 +珙 +咯 +娘 +庙 +溯 +胺 +葱 +痪 +摊 +荷 +卞 +乒 +髦 +寐 +铭 +坩 +胗 +枷 +爆 +溟 +嚼 +羚 +砬 +轨 +惊 +挠 +罄 +竽 +菏 +氧 +浅 +楣 +盼 +枢 +炸 +阆 +杯 +谏 +噬 +淇 +渺 +俪 +秆 +墓 +泪 +跻 +砌 +痰 +垡 +渡 +耽 +釜 +讶 +鳎 +煞 +呗 +韶 +舶 +绷 +鹳 +缜 +旷 +铊 +皱 +龌 +檀 +霖 +奄 +槐 +艳 +蝶 +旋 +哝 +赶 +骞 +蚧 +腊 +盈 +丁 +` +蜚 +矸 +蝙 +睨 +嚓 +僻 +鬼 +醴 +夜 +彝 +磊 +笔 +拔 +栀 +糕 +厦 +邰 +纫 +逭 +纤 +眦 +膊 +馍 +躇 +烯 +蘼 +冬 +诤 +暄 +骶 +哑 +瘠 +」 +臊 +丕 +愈 +咱 +螺 +擅 +跋 +搏 +硪 +谄 +笠 +淡 +嘿 +骅 +谧 +鼎 +皋 +姚 +歼 +蠢 +驼 +耳 +胬 +挝 +涯 +狗 +蒽 +孓 +犷 +凉 +芦 +箴 +铤 +孤 +嘛 +坤 +V +茴 +朦 +挞 +尖 +橙 +诞 +搴 +碇 +洵 +浚 +帚 +蜍 +漯 +柘 +嚎 +讽 +芭 +荤 +咻 +祠 +秉 +跖 +埃 +吓 +糯 +眷 +馒 +惹 +娼 +鲑 +嫩 +讴 +轮 +瞥 +靶 +褚 +乏 +缤 +宋 +帧 +删 +驱 +碎 +扑 +俩 +俄 +偏 +涣 +竹 +噱 +皙 +佰 +渚 +唧 +斡 +# +镉 +刀 +崎 +筐 +佣 +夭 +贰 +肴 +峙 +哔 +艿 +匐 +牺 +镛 +缘 +仡 +嫡 +劣 +枸 +堀 +梨 +簿 +鸭 +蒸 +亦 +稽 +浴 +{ +衢 +束 +槲 +j +阁 +揍 +疥 +棋 +潋 +聪 +窜 +乓 +睛 +插 +冉 +阪 +苍 +搽 +「 +蟾 +螟 +幸 +仇 +樽 +撂 +慢 +跤 +幔 +俚 +淅 +覃 +觊 +溶 +妖 +帛 +侨 +曰 +妾 +泗 +· +: +瀘 +風 +Ë +( +) +∶ +紅 +紗 +瑭 +雲 +頭 +鶏 +財 +許 +• +¥ +樂 +焗 +麗 +— +; +滙 +東 +榮 +繪 +興 +… +門 +業 +π +楊 +國 +顧 +é +盤 +寳 +Λ +龍 +鳳 +島 +誌 +緣 +結 +銭 +萬 +勝 +祎 +璟 +優 +歡 +臨 +時 +購 += +★ +藍 +昇 +鐵 +觀 +勅 +農 +聲 +畫 +兿 +術 +發 +劉 +記 +專 +耑 +園 +書 +壴 +種 +Ο +● +褀 +號 +銀 +匯 +敟 +锘 +葉 +橪 +廣 +進 +蒄 +鑽 +阝 +祙 +貢 +鍋 +豊 +夬 +喆 +團 +閣 +開 +燁 +賓 +館 +酡 +沔 +順 ++ +硚 +劵 +饸 +陽 +車 +湓 +復 +萊 +氣 +軒 +華 +堃 +迮 +纟 +戶 +馬 +學 +裡 +電 +嶽 +獨 +マ +シ +サ +ジ +燘 +袪 +環 +❤ +臺 +灣 +専 +賣 +孖 +聖 +攝 +線 +▪ +α +傢 +俬 +夢 +達 +莊 +喬 +貝 +薩 +劍 +羅 +壓 +棛 +饦 +尃 +璈 +囍 +醫 +G +I +A +# +N +鷄 +髙 +嬰 +啓 +約 +隹 +潔 +賴 +藝 +~ +寶 +籣 +麺 +  +嶺 +√ +義 +網 +峩 +長 +∧ +魚 +機 +構 +② +鳯 +偉 +L +B +㙟 +畵 +鴿 +' +詩 +溝 +嚞 +屌 +藔 +佧 +玥 +蘭 +織 +1 +3 +9 +0 +7 +點 +砭 +鴨 +鋪 +銘 +廳 +弍 +‧ +創 +湯 +坶 +℃ +卩 +骝 +& +烜 +荘 +當 +潤 +扞 +係 +懷 +碶 +钅 +蚨 +讠 +☆ +叢 +爲 +埗 +涫 +塗 +→ +楽 +現 +鯨 +愛 +瑪 +鈺 +忄 +悶 +藥 +飾 +樓 +視 +孬 +ㆍ +燚 +苪 +師 +① +丼 +锽 +│ +韓 +標 +è +兒 +閏 +匋 +張 +漢 +Ü +髪 +會 +閑 +檔 +習 +裝 +の +峯 +菘 +輝 +И +雞 +釣 +億 +浐 +K +O +R +8 +H +E +P +T +W +D +S +C +M +F +姌 +饹 +» +晞 +廰 +ä +嵯 +鷹 +負 +飲 +絲 +冚 +楗 +澤 +綫 +區 +❋ +← +質 +靑 +揚 +③ +滬 +統 +産 +協 +﹑ +乸 +畐 +經 +運 +際 +洺 +岽 +為 +粵 +諾 +崋 +豐 +碁 +ɔ +V +2 +6 +齋 +誠 +訂 +´ +勑 +雙 +陳 +無 +í +泩 +媄 +夌 +刂 +i +c +t +o +r +a +嘢 +耄 +燴 +暃 +壽 +媽 +靈 +抻 +體 +唻 +É +冮 +甹 +鎮 +錦 +ʌ +蜛 +蠄 +尓 +駕 +戀 +飬 +逹 +倫 +貴 +極 +Я +Й +寬 +磚 +嶪 +郎 +職 +| +間 +n +d +剎 +伈 +課 +飛 +橋 +瘊 +№ +譜 +骓 +圗 +滘 +縣 +粿 +咅 +養 +濤 +彳 +® +% +Ⅱ +啰 +㴪 +見 +矞 +薬 +糁 +邨 +鲮 +顔 +罱 +З +選 +話 +贏 +氪 +俵 +競 +瑩 +繡 +枱 +β +綉 +á +獅 +爾 +™ +麵 +戋 +淩 +徳 +個 +劇 +場 +務 +簡 +寵 +h +實 +膠 +轱 +圖 +築 +嘣 +樹 +㸃 +營 +耵 +孫 +饃 +鄺 +飯 +麯 +遠 +輸 +坫 +孃 +乚 +閃 +鏢 +㎡ +題 +廠 +關 +↑ +爺 +將 +軍 +連 +篦 +覌 +參 +箸 +- +窠 +棽 +寕 +夀 +爰 +歐 +呙 +閥 +頡 +熱 +雎 +垟 +裟 +凬 +勁 +帑 +馕 +夆 +疌 +枼 +馮 +貨 +蒤 +樸 +彧 +旸 +靜 +龢 +暢 +㐱 +鳥 +珺 +鏡 +灡 +爭 +堷 +廚 +Ó +騰 +診 +┅ +蘇 +褔 +凱 +頂 +豕 +亞 +帥 +嘬 +⊥ +仺 +桖 +複 +饣 +絡 +穂 +顏 +棟 +納 +▏ +濟 +親 +設 +計 +攵 +埌 +烺 +ò +頤 +燦 +蓮 +撻 +節 +講 +濱 +濃 +娽 +洳 +朿 +燈 +鈴 +護 +膚 +铔 +過 +補 +Z +U +5 +4 +坋 +闿 +䖝 +餘 +缐 +铞 +貿 +铪 +桼 +趙 +鍊 +[ +㐂 +垚 +菓 +揸 +捲 +鐘 +滏 +𣇉 +爍 +輪 +燜 +鴻 +鮮 +動 +鹞 +鷗 +丄 +慶 +鉌 +翥 +飮 +腸 +⇋ +漁 +覺 +來 +熘 +昴 +翏 +鲱 +圧 +鄉 +萭 +頔 +爐 +嫚 +г +貭 +類 +聯 +幛 +輕 +訓 +鑒 +夋 +锨 +芃 +珣 +䝉 +扙 +嵐 +銷 +處 +ㄱ +語 +誘 +苝 +歸 +儀 +燒 +楿 +內 +粢 +葒 +奧 +麥 +礻 +滿 +蠔 +穵 +瞭 +態 +鱬 +榞 +硂 +鄭 +黃 +煙 +祐 +奓 +逺 +* +瑄 +獲 +聞 +薦 +讀 +這 +樣 +決 +問 +啟 +們 +執 +説 +轉 +單 +隨 +唘 +帶 +倉 +庫 +還 +贈 +尙 +皺 +■ +餅 +產 +○ +∈ +報 +狀 +楓 +賠 +琯 +嗮 +禮 +` +傳 +> +≤ +嗞 +Φ +≥ +換 +咭 +∣ +↓ +曬 +ε +応 +寫 +″ +終 +様 +純 +費 +療 +聨 +凍 +壐 +郵 +ü +黒 +∫ +製 +塊 +調 +軽 +確 +撃 +級 +馴 +Ⅲ +涇 +繹 +數 +碼 +證 +狒 +処 +劑 +< +晧 +賀 +衆 +] +櫥 +兩 +陰 +絶 +對 +鯉 +憶 +◎ +p +e +Y +蕒 +煖 +頓 +測 +試 +鼽 +僑 +碩 +妝 +帯 +≈ +鐡 +舖 +權 +喫 +倆 +ˋ +該 +悅 +ā +俫 +. +f +s +b +m +k +g +u +j +貼 +淨 +濕 +針 +適 +備 +l +/ +給 +謢 +強 +觸 +衛 +與 +⊙ +$ +緯 +變 +⑴ +⑵ +⑶ +㎏ +殺 +∩ +幚 +─ +價 +▲ +離 +ú +ó +飄 +烏 +関 +閟 +﹝ +﹞ +邏 +輯 +鍵 +驗 +訣 +導 +歷 +屆 +層 +▼ +儱 +錄 +熳 +ē +艦 +吋 +錶 +辧 +飼 +顯 +④ +禦 +販 +気 +対 +枰 +閩 +紀 +幹 +瞓 +貊 +淚 +△ +眞 +墊 +Ω +獻 +褲 +縫 +緑 +亜 +鉅 +餠 +{ +} +◆ +蘆 +薈 +█ +◇ +溫 +彈 +晳 +粧 +犸 +穩 +訊 +崬 +凖 +熥 +П +舊 +條 +紋 +圍 +Ⅳ +筆 +尷 +難 +雜 +錯 +綁 +識 +頰 +鎖 +艶 +□ +殁 +殼 +⑧ +├ +▕ +鵬 +ǐ +ō +ǒ +糝 +綱 +▎ +μ +盜 +饅 +醬 +籤 +蓋 +釀 +鹽 +據 +à +ɡ +辦 +◥ +彐 +┌ +婦 +獸 +鲩 +伱 +ī +蒟 +蒻 +齊 +袆 +腦 +寧 +凈 +妳 +煥 +詢 +偽 +謹 +啫 +鯽 +騷 +鱸 +損 +傷 +鎻 +髮 +買 +冏 +儥 +両 +﹢ +∞ +載 +喰 +z +羙 +悵 +燙 +曉 +員 +組 +徹 +艷 +痠 +鋼 +鼙 +縮 +細 +嚒 +爯 +≠ +維 +" +鱻 +壇 +厍 +帰 +浥 +犇 +薡 +軎 +² +應 +醜 +刪 +緻 +鶴 +賜 +噁 +軌 +尨 +镔 +鷺 +槗 +彌 +葚 +濛 +請 +溇 +緹 +賢 +訪 +獴 +瑅 +資 +縤 +陣 +蕟 +栢 +韻 +祼 +恁 +伢 +謝 +劃 +涑 +總 +衖 +踺 +砋 +凉 +籃 +駿 +苼 +瘋 +昽 +紡 +驊 +腎 +﹗ +響 +杋 +剛 +嚴 +禪 +歓 +槍 +傘 +檸 +檫 +炣 +勢 +鏜 +鎢 +銑 +尐 +減 +奪 +惡 +θ +僮 +婭 +臘 +ū +ì +殻 +鉄 +∑ +蛲 +焼 +緖 +續 +紹 +懮 \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt new file mode 100644 index 00000000..09e275ba --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt @@ -0,0 +1,15629 @@ +' +疗 +绚 +诚 +娇 +溜 +题 +贿 +者 +廖 +更 +纳 +加 +奉 +公 +一 +就 +汴 +计 +与 +路 +房 +原 +妇 +2 +0 +8 +- +7 +其 +> +: +] +, +, +骑 +刈 +全 +消 +昏 +傈 +安 +久 +钟 +嗅 +不 +影 +处 +驽 +蜿 +资 +关 +椤 +地 +瘸 +专 +问 +忖 +票 +嫉 +炎 +韵 +要 +月 +田 +节 +陂 +鄙 +捌 +备 +拳 +伺 +眼 +网 +盎 +大 +傍 +心 +东 +愉 +汇 +蹿 +科 +每 +业 +里 +航 +晏 +字 +平 +录 +先 +1 +3 +彤 +鲶 +产 +稍 +督 +腴 +有 +象 +岳 +注 +绍 +在 +泺 +文 +定 +核 +名 +水 +过 +理 +让 +偷 +率 +等 +这 +发 +” +为 +含 +肥 +酉 +相 +鄱 +七 +编 +猥 +锛 +日 +镀 +蒂 +掰 +倒 +辆 +栾 +栗 +综 +涩 +州 +雌 +滑 +馀 +了 +机 +块 +司 +宰 +甙 +兴 +矽 +抚 +保 +用 +沧 +秩 +如 +收 +息 +滥 +页 +疑 +埠 +! +! +姥 +异 +橹 +钇 +向 +下 +跄 +的 +椴 +沫 +国 +绥 +獠 +报 +开 +民 +蜇 +何 +分 +凇 +长 +讥 +藏 +掏 +施 +羽 +中 +讲 +派 +嘟 +人 +提 +浼 +间 +世 +而 +古 +多 +倪 +唇 +饯 +控 +庚 +首 +赛 +蜓 +味 +断 +制 +觉 +技 +替 +艰 +溢 +潮 +夕 +钺 +外 +摘 +枋 +动 +双 +单 +啮 +户 +枇 +确 +锦 +曜 +杜 +或 +能 +效 +霜 +盒 +然 +侗 +电 +晁 +放 +步 +鹃 +新 +杖 +蜂 +吒 +濂 +瞬 +评 +总 +隍 +对 +独 +合 +也 +是 +府 +青 +天 +诲 +墙 +组 +滴 +级 +邀 +帘 +示 +已 +时 +骸 +仄 +泅 +和 +遨 +店 +雇 +疫 +持 +巍 +踮 +境 +只 +亨 +目 +鉴 +崤 +闲 +体 +泄 +杂 +作 +般 +轰 +化 +解 +迂 +诿 +蛭 +璀 +腾 +告 +版 +服 +省 +师 +小 +规 +程 +线 +海 +办 +引 +二 +桧 +牌 +砺 +洄 +裴 +修 +图 +痫 +胡 +许 +犊 +事 +郛 +基 +柴 +呼 +食 +研 +奶 +律 +蛋 +因 +葆 +察 +戏 +褒 +戒 +再 +李 +骁 +工 +貂 +油 +鹅 +章 +啄 +休 +场 +给 +睡 +纷 +豆 +器 +捎 +说 +敏 +学 +会 +浒 +设 +诊 +格 +廓 +查 +来 +霓 +室 +溆 +¢ +诡 +寥 +焕 +舜 +柒 +狐 +回 +戟 +砾 +厄 +实 +翩 +尿 +五 +入 +径 +惭 +喹 +股 +宇 +篝 +| +; +美 +期 +云 +九 +祺 +扮 +靠 +锝 +槌 +系 +企 +酰 +阊 +暂 +蚕 +忻 +豁 +本 +羹 +执 +条 +钦 +H +獒 +限 +进 +季 +楦 +于 +芘 +玖 +铋 +茯 +未 +答 +粘 +括 +样 +精 +欠 +矢 +甥 +帷 +嵩 +扣 +令 +仔 +风 +皈 +行 +支 +部 +蓉 +刮 +站 +蜡 +救 +钊 +汗 +松 +嫌 +成 +可 +. +鹤 +院 +从 +交 +政 +怕 +活 +调 +球 +局 +验 +髌 +第 +韫 +谗 +串 +到 +圆 +年 +米 +/ +* +友 +忿 +检 +区 +看 +自 +敢 +刃 +个 +兹 +弄 +流 +留 +同 +没 +齿 +星 +聆 +轼 +湖 +什 +三 +建 +蛔 +儿 +椋 +汕 +震 +颧 +鲤 +跟 +力 +情 +璺 +铨 +陪 +务 +指 +族 +训 +滦 +鄣 +濮 +扒 +商 +箱 +十 +召 +慷 +辗 +所 +莞 +管 +护 +臭 +横 +硒 +嗓 +接 +侦 +六 +露 +党 +馋 +驾 +剖 +高 +侬 +妪 +幂 +猗 +绺 +骐 +央 +酐 +孝 +筝 +课 +徇 +缰 +门 +男 +西 +项 +句 +谙 +瞒 +秃 +篇 +教 +碲 +罚 +声 +呐 +景 +前 +富 +嘴 +鳌 +稀 +免 +朋 +啬 +睐 +去 +赈 +鱼 +住 +肩 +愕 +速 +旁 +波 +厅 +健 +茼 +厥 +鲟 +谅 +投 +攸 +炔 +数 +方 +击 +呋 +谈 +绩 +别 +愫 +僚 +躬 +鹧 +胪 +炳 +招 +喇 +膨 +泵 +蹦 +毛 +结 +5 +4 +谱 +识 +陕 +粽 +婚 +拟 +构 +且 +搜 +任 +潘 +比 +郢 +妨 +醪 +陀 +桔 +碘 +扎 +选 +哈 +骷 +楷 +亿 +明 +缆 +脯 +监 +睫 +逻 +婵 +共 +赴 +淝 +凡 +惦 +及 +达 +揖 +谩 +澹 +减 +焰 +蛹 +番 +祁 +柏 +员 +禄 +怡 +峤 +龙 +白 +叽 +生 +闯 +起 +细 +装 +谕 +竟 +聚 +钙 +上 +导 +渊 +按 +艾 +辘 +挡 +耒 +盹 +饪 +臀 +记 +邮 +蕙 +受 +各 +医 +搂 +普 +滇 +朗 +茸 +带 +翻 +酚 +( +光 +堤 +墟 +蔷 +万 +幻 +〓 +瑙 +辈 +昧 +盏 +亘 +蛀 +吉 +铰 +请 +子 +假 +闻 +税 +井 +诩 +哨 +嫂 +好 +面 +琐 +校 +馊 +鬣 +缂 +营 +访 +炖 +占 +农 +缀 +否 +经 +钚 +棵 +趟 +张 +亟 +吏 +茶 +谨 +捻 +论 +迸 +堂 +玉 +信 +吧 +瞠 +乡 +姬 +寺 +咬 +溏 +苄 +皿 +意 +赉 +宝 +尔 +钰 +艺 +特 +唳 +踉 +都 +荣 +倚 +登 +荐 +丧 +奇 +涵 +批 +炭 +近 +符 +傩 +感 +道 +着 +菊 +虹 +仲 +众 +懈 +濯 +颞 +眺 +南 +释 +北 +缝 +标 +既 +茗 +整 +撼 +迤 +贲 +挎 +耱 +拒 +某 +妍 +卫 +哇 +英 +矶 +藩 +治 +他 +元 +领 +膜 +遮 +穗 +蛾 +飞 +荒 +棺 +劫 +么 +市 +火 +温 +拈 +棚 +洼 +转 +果 +奕 +卸 +迪 +伸 +泳 +斗 +邡 +侄 +涨 +屯 +萋 +胭 +氡 +崮 +枞 +惧 +冒 +彩 +斜 +手 +豚 +随 +旭 +淑 +妞 +形 +菌 +吲 +沱 +争 +驯 +歹 +挟 +兆 +柱 +传 +至 +包 +内 +响 +临 +红 +功 +弩 +衡 +寂 +禁 +老 +棍 +耆 +渍 +织 +害 +氵 +渑 +布 +载 +靥 +嗬 +虽 +苹 +咨 +娄 +库 +雉 +榜 +帜 +嘲 +套 +瑚 +亲 +簸 +欧 +边 +6 +腿 +旮 +抛 +吹 +瞳 +得 +镓 +梗 +厨 +继 +漾 +愣 +憨 +士 +策 +窑 +抑 +躯 +襟 +脏 +参 +贸 +言 +干 +绸 +鳄 +穷 +藜 +音 +折 +详 +) +举 +悍 +甸 +癌 +黎 +谴 +死 +罩 +迁 +寒 +驷 +袖 +媒 +蒋 +掘 +模 +纠 +恣 +观 +祖 +蛆 +碍 +位 +稿 +主 +澧 +跌 +筏 +京 +锏 +帝 +贴 +证 +糠 +才 +黄 +鲸 +略 +炯 +饱 +四 +出 +园 +犀 +牧 +容 +汉 +杆 +浈 +汰 +瑷 +造 +虫 +瘩 +怪 +驴 +济 +应 +花 +沣 +谔 +夙 +旅 +价 +矿 +以 +考 +s +u +呦 +晒 +巡 +茅 +准 +肟 +瓴 +詹 +仟 +褂 +译 +桌 +混 +宁 +怦 +郑 +抿 +些 +余 +鄂 +饴 +攒 +珑 +群 +阖 +岔 +琨 +藓 +预 +环 +洮 +岌 +宀 +杲 +瀵 +最 +常 +囡 +周 +踊 +女 +鼓 +袭 +喉 +简 +范 +薯 +遐 +疏 +粱 +黜 +禧 +法 +箔 +斤 +遥 +汝 +奥 +直 +贞 +撑 +置 +绱 +集 +她 +馅 +逗 +钧 +橱 +魉 +[ +恙 +躁 +唤 +9 +旺 +膘 +待 +脾 +惫 +购 +吗 +依 +盲 +度 +瘿 +蠖 +俾 +之 +镗 +拇 +鲵 +厝 +簧 +续 +款 +展 +啃 +表 +剔 +品 +钻 +腭 +损 +清 +锶 +统 +涌 +寸 +滨 +贪 +链 +吠 +冈 +伎 +迥 +咏 +吁 +览 +防 +迅 +失 +汾 +阔 +逵 +绀 +蔑 +列 +川 +凭 +努 +熨 +揪 +利 +俱 +绉 +抢 +鸨 +我 +即 +责 +膦 +易 +毓 +鹊 +刹 +玷 +岿 +空 +嘞 +绊 +排 +术 +估 +锷 +违 +们 +苟 +铜 +播 +肘 +件 +烫 +审 +鲂 +广 +像 +铌 +惰 +铟 +巳 +胍 +鲍 +康 +憧 +色 +恢 +想 +拷 +尤 +疳 +知 +S +Y +F +D +A +峄 +裕 +帮 +握 +搔 +氐 +氘 +难 +墒 +沮 +雨 +叁 +缥 +悴 +藐 +湫 +娟 +苑 +稠 +颛 +簇 +后 +阕 +闭 +蕤 +缚 +怎 +佞 +码 +嘤 +蔡 +痊 +舱 +螯 +帕 +赫 +昵 +升 +烬 +岫 +、 +疵 +蜻 +髁 +蕨 +隶 +烛 +械 +丑 +盂 +梁 +强 +鲛 +由 +拘 +揉 +劭 +龟 +撤 +钩 +呕 +孛 +费 +妻 +漂 +求 +阑 +崖 +秤 +甘 +通 +深 +补 +赃 +坎 +床 +啪 +承 +吼 +量 +暇 +钼 +烨 +阂 +擎 +脱 +逮 +称 +P +神 +属 +矗 +华 +届 +狍 +葑 +汹 +育 +患 +窒 +蛰 +佼 +静 +槎 +运 +鳗 +庆 +逝 +曼 +疱 +克 +代 +官 +此 +麸 +耧 +蚌 +晟 +例 +础 +榛 +副 +测 +唰 +缢 +迹 +灬 +霁 +身 +岁 +赭 +扛 +又 +菡 +乜 +雾 +板 +读 +陷 +徉 +贯 +郁 +虑 +变 +钓 +菜 +圾 +现 +琢 +式 +乐 +维 +渔 +浜 +左 +吾 +脑 +钡 +警 +T +啵 +拴 +偌 +漱 +湿 +硕 +止 +骼 +魄 +积 +燥 +联 +踢 +玛 +则 +窿 +见 +振 +畿 +送 +班 +钽 +您 +赵 +刨 +印 +讨 +踝 +籍 +谡 +舌 +崧 +汽 +蔽 +沪 +酥 +绒 +怖 +财 +帖 +肱 +私 +莎 +勋 +羔 +霸 +励 +哼 +帐 +将 +帅 +渠 +纪 +婴 +娩 +岭 +厘 +滕 +吻 +伤 +坝 +冠 +戊 +隆 +瘁 +介 +涧 +物 +黍 +并 +姗 +奢 +蹑 +掣 +垸 +锴 +命 +箍 +捉 +病 +辖 +琰 +眭 +迩 +艘 +绌 +繁 +寅 +若 +毋 +思 +诉 +类 +诈 +燮 +轲 +酮 +狂 +重 +反 +职 +筱 +县 +委 +磕 +绣 +奖 +晋 +濉 +志 +徽 +肠 +呈 +獐 +坻 +口 +片 +碰 +几 +村 +柿 +劳 +料 +获 +亩 +惕 +晕 +厌 +号 +罢 +池 +正 +鏖 +煨 +家 +棕 +复 +尝 +懋 +蜥 +锅 +岛 +扰 +队 +坠 +瘾 +钬 +@ +卧 +疣 +镇 +譬 +冰 +彷 +频 +黯 +据 +垄 +采 +八 +缪 +瘫 +型 +熹 +砰 +楠 +襁 +箐 +但 +嘶 +绳 +啤 +拍 +盥 +穆 +傲 +洗 +盯 +塘 +怔 +筛 +丿 +台 +恒 +喂 +葛 +永 +¥ +烟 +酒 +桦 +书 +砂 +蚝 +缉 +态 +瀚 +袄 +圳 +轻 +蛛 +超 +榧 +遛 +姒 +奘 +铮 +右 +荽 +望 +偻 +卡 +丶 +氰 +附 +做 +革 +索 +戚 +坨 +桷 +唁 +垅 +榻 +岐 +偎 +坛 +莨 +山 +殊 +微 +骇 +陈 +爨 +推 +嗝 +驹 +澡 +藁 +呤 +卤 +嘻 +糅 +逛 +侵 +郓 +酌 +德 +摇 +※ +鬃 +被 +慨 +殡 +羸 +昌 +泡 +戛 +鞋 +河 +宪 +沿 +玲 +鲨 +翅 +哽 +源 +铅 +语 +照 +邯 +址 +荃 +佬 +顺 +鸳 +町 +霭 +睾 +瓢 +夸 +椁 +晓 +酿 +痈 +咔 +侏 +券 +噎 +湍 +签 +嚷 +离 +午 +尚 +社 +锤 +背 +孟 +使 +浪 +缦 +潍 +鞅 +军 +姹 +驶 +笑 +鳟 +鲁 +》 +孽 +钜 +绿 +洱 +礴 +焯 +椰 +颖 +囔 +乌 +孔 +巴 +互 +性 +椽 +哞 +聘 +昨 +早 +暮 +胶 +炀 +隧 +低 +彗 +昝 +铁 +呓 +氽 +藉 +喔 +癖 +瑗 +姨 +权 +胱 +韦 +堑 +蜜 +酋 +楝 +砝 +毁 +靓 +歙 +锲 +究 +屋 +喳 +骨 +辨 +碑 +武 +鸠 +宫 +辜 +烊 +适 +坡 +殃 +培 +佩 +供 +走 +蜈 +迟 +翼 +况 +姣 +凛 +浔 +吃 +飘 +债 +犟 +金 +促 +苛 +崇 +坂 +莳 +畔 +绂 +兵 +蠕 +斋 +根 +砍 +亢 +欢 +恬 +崔 +剁 +餐 +榫 +快 +扶 +‖ +濒 +缠 +鳜 +当 +彭 +驭 +浦 +篮 +昀 +锆 +秸 +钳 +弋 +娣 +瞑 +夷 +龛 +苫 +拱 +致 +% +嵊 +障 +隐 +弑 +初 +娓 +抉 +汩 +累 +蓖 +" +唬 +助 +苓 +昙 +押 +毙 +破 +城 +郧 +逢 +嚏 +獭 +瞻 +溱 +婿 +赊 +跨 +恼 +璧 +萃 +姻 +貉 +灵 +炉 +密 +氛 +陶 +砸 +谬 +衔 +点 +琛 +沛 +枳 +层 +岱 +诺 +脍 +榈 +埂 +征 +冷 +裁 +打 +蹴 +素 +瘘 +逞 +蛐 +聊 +激 +腱 +萘 +踵 +飒 +蓟 +吆 +取 +咙 +簋 +涓 +矩 +曝 +挺 +揣 +座 +你 +史 +舵 +焱 +尘 +苏 +笈 +脚 +溉 +榨 +诵 +樊 +邓 +焊 +义 +庶 +儋 +蟋 +蒲 +赦 +呷 +杞 +诠 +豪 +还 +试 +颓 +茉 +太 +除 +紫 +逃 +痴 +草 +充 +鳕 +珉 +祗 +墨 +渭 +烩 +蘸 +慕 +璇 +镶 +穴 +嵘 +恶 +骂 +险 +绋 +幕 +碉 +肺 +戳 +刘 +潞 +秣 +纾 +潜 +銮 +洛 +须 +罘 +销 +瘪 +汞 +兮 +屉 +r +林 +厕 +质 +探 +划 +狸 +殚 +善 +煊 +烹 +〒 +锈 +逯 +宸 +辍 +泱 +柚 +袍 +远 +蹋 +嶙 +绝 +峥 +娥 +缍 +雀 +徵 +认 +镱 +谷 += +贩 +勉 +撩 +鄯 +斐 +洋 +非 +祚 +泾 +诒 +饿 +撬 +威 +晷 +搭 +芍 +锥 +笺 +蓦 +候 +琊 +档 +礁 +沼 +卵 +荠 +忑 +朝 +凹 +瑞 +头 +仪 +弧 +孵 +畏 +铆 +突 +衲 +车 +浩 +气 +茂 +悖 +厢 +枕 +酝 +戴 +湾 +邹 +飚 +攘 +锂 +写 +宵 +翁 +岷 +无 +喜 +丈 +挑 +嗟 +绛 +殉 +议 +槽 +具 +醇 +淞 +笃 +郴 +阅 +饼 +底 +壕 +砚 +弈 +询 +缕 +庹 +翟 +零 +筷 +暨 +舟 +闺 +甯 +撞 +麂 +茌 +蔼 +很 +珲 +捕 +棠 +角 +阉 +媛 +娲 +诽 +剿 +尉 +爵 +睬 +韩 +诰 +匣 +危 +糍 +镯 +立 +浏 +阳 +少 +盆 +舔 +擘 +匪 +申 +尬 +铣 +旯 +抖 +赘 +瓯 +居 +ˇ +哮 +游 +锭 +茏 +歌 +坏 +甚 +秒 +舞 +沙 +仗 +劲 +潺 +阿 +燧 +郭 +嗖 +霏 +忠 +材 +奂 +耐 +跺 +砀 +输 +岖 +媳 +氟 +极 +摆 +灿 +今 +扔 +腻 +枝 +奎 +药 +熄 +吨 +话 +q +额 +慑 +嘌 +协 +喀 +壳 +埭 +视 +著 +於 +愧 +陲 +翌 +峁 +颅 +佛 +腹 +聋 +侯 +咎 +叟 +秀 +颇 +存 +较 +罪 +哄 +岗 +扫 +栏 +钾 +羌 +己 +璨 +枭 +霉 +煌 +涸 +衿 +键 +镝 +益 +岢 +奏 +连 +夯 +睿 +冥 +均 +糖 +狞 +蹊 +稻 +爸 +刿 +胥 +煜 +丽 +肿 +璃 +掸 +跚 +灾 +垂 +樾 +濑 +乎 +莲 +窄 +犹 +撮 +战 +馄 +软 +络 +显 +鸢 +胸 +宾 +妲 +恕 +埔 +蝌 +份 +遇 +巧 +瞟 +粒 +恰 +剥 +桡 +博 +讯 +凯 +堇 +阶 +滤 +卖 +斌 +骚 +彬 +兑 +磺 +樱 +舷 +两 +娱 +福 +仃 +差 +找 +桁 +÷ +净 +把 +阴 +污 +戬 +雷 +碓 +蕲 +楚 +罡 +焖 +抽 +妫 +咒 +仑 +闱 +尽 +邑 +菁 +爱 +贷 +沥 +鞑 +牡 +嗉 +崴 +骤 +塌 +嗦 +订 +拮 +滓 +捡 +锻 +次 +坪 +杩 +臃 +箬 +融 +珂 +鹗 +宗 +枚 +降 +鸬 +妯 +阄 +堰 +盐 +毅 +必 +杨 +崃 +俺 +甬 +状 +莘 +货 +耸 +菱 +腼 +铸 +唏 +痤 +孚 +澳 +懒 +溅 +翘 +疙 +杷 +淼 +缙 +骰 +喊 +悉 +砻 +坷 +艇 +赁 +界 +谤 +纣 +宴 +晃 +茹 +归 +饭 +梢 +铡 +街 +抄 +肼 +鬟 +苯 +颂 +撷 +戈 +炒 +咆 +茭 +瘙 +负 +仰 +客 +琉 +铢 +封 +卑 +珥 +椿 +镧 +窨 +鬲 +寿 +御 +袤 +铃 +萎 +砖 +餮 +脒 +裳 +肪 +孕 +嫣 +馗 +嵇 +恳 +氯 +江 +石 +褶 +冢 +祸 +阻 +狈 +羞 +银 +靳 +透 +咳 +叼 +敷 +芷 +啥 +它 +瓤 +兰 +痘 +懊 +逑 +肌 +往 +捺 +坊 +甩 +呻 +〃 +沦 +忘 +膻 +祟 +菅 +剧 +崆 +智 +坯 +臧 +霍 +墅 +攻 +眯 +倘 +拢 +骠 +铐 +庭 +岙 +瓠 +′ +缺 +泥 +迢 +捶 +? +? +郏 +喙 +掷 +沌 +纯 +秘 +种 +听 +绘 +固 +螨 +团 +香 +盗 +妒 +埚 +蓝 +拖 +旱 +荞 +铀 +血 +遏 +汲 +辰 +叩 +拽 +幅 +硬 +惶 +桀 +漠 +措 +泼 +唑 +齐 +肾 +念 +酱 +虚 +屁 +耶 +旗 +砦 +闵 +婉 +馆 +拭 +绅 +韧 +忏 +窝 +醋 +葺 +顾 +辞 +倜 +堆 +辋 +逆 +玟 +贱 +疾 +董 +惘 +倌 +锕 +淘 +嘀 +莽 +俭 +笏 +绑 +鲷 +杈 +择 +蟀 +粥 +嗯 +驰 +逾 +案 +谪 +褓 +胫 +哩 +昕 +颚 +鲢 +绠 +躺 +鹄 +崂 +儒 +俨 +丝 +尕 +泌 +啊 +萸 +彰 +幺 +吟 +骄 +苣 +弦 +脊 +瑰 +〈 +诛 +镁 +析 +闪 +剪 +侧 +哟 +框 +螃 +守 +嬗 +燕 +狭 +铈 +缮 +概 +迳 +痧 +鲲 +俯 +售 +笼 +痣 +扉 +挖 +满 +咋 +援 +邱 +扇 +歪 +便 +玑 +绦 +峡 +蛇 +叨 +〖 +泽 +胃 +斓 +喋 +怂 +坟 +猪 +该 +蚬 +炕 +弥 +赞 +棣 +晔 +娠 +挲 +狡 +创 +疖 +铕 +镭 +稷 +挫 +弭 +啾 +翔 +粉 +履 +苘 +哦 +楼 +秕 +铂 +土 +锣 +瘟 +挣 +栉 +习 +享 +桢 +袅 +磨 +桂 +谦 +延 +坚 +蔚 +噗 +署 +谟 +猬 +钎 +恐 +嬉 +雒 +倦 +衅 +亏 +璩 +睹 +刻 +殿 +王 +算 +雕 +麻 +丘 +柯 +骆 +丸 +塍 +谚 +添 +鲈 +垓 +桎 +蚯 +芥 +予 +飕 +镦 +谌 +窗 +醚 +菀 +亮 +搪 +莺 +蒿 +羁 +足 +J +真 +轶 +悬 +衷 +靛 +翊 +掩 +哒 +炅 +掐 +冼 +妮 +l +谐 +稚 +荆 +擒 +犯 +陵 +虏 +浓 +崽 +刍 +陌 +傻 +孜 +千 +靖 +演 +矜 +钕 +煽 +杰 +酗 +渗 +伞 +栋 +俗 +泫 +戍 +罕 +沾 +疽 +灏 +煦 +芬 +磴 +叱 +阱 +榉 +湃 +蜀 +叉 +醒 +彪 +租 +郡 +篷 +屎 +良 +垢 +隗 +弱 +陨 +峪 +砷 +掴 +颁 +胎 +雯 +绵 +贬 +沐 +撵 +隘 +篙 +暖 +曹 +陡 +栓 +填 +臼 +彦 +瓶 +琪 +潼 +哪 +鸡 +摩 +啦 +俟 +锋 +域 +耻 +蔫 +疯 +纹 +撇 +毒 +绶 +痛 +酯 +忍 +爪 +赳 +歆 +嘹 +辕 +烈 +册 +朴 +钱 +吮 +毯 +癜 +娃 +谀 +邵 +厮 +炽 +璞 +邃 +丐 +追 +词 +瓒 +忆 +轧 +芫 +谯 +喷 +弟 +半 +冕 +裙 +掖 +墉 +绮 +寝 +苔 +势 +顷 +褥 +切 +衮 +君 +佳 +嫒 +蚩 +霞 +佚 +洙 +逊 +镖 +暹 +唛 +& +殒 +顶 +碗 +獗 +轭 +铺 +蛊 +废 +恹 +汨 +崩 +珍 +那 +杵 +曲 +纺 +夏 +薰 +傀 +闳 +淬 +姘 +舀 +拧 +卷 +楂 +恍 +讪 +厩 +寮 +篪 +赓 +乘 +灭 +盅 +鞣 +沟 +慎 +挂 +饺 +鼾 +杳 +树 +缨 +丛 +絮 +娌 +臻 +嗳 +篡 +侩 +述 +衰 +矛 +圈 +蚜 +匕 +筹 +匿 +濞 +晨 +叶 +骋 +郝 +挚 +蚴 +滞 +增 +侍 +描 +瓣 +吖 +嫦 +蟒 +匾 +圣 +赌 +毡 +癞 +恺 +百 +曳 +需 +篓 +肮 +庖 +帏 +卿 +驿 +遗 +蹬 +鬓 +骡 +歉 +芎 +胳 +屐 +禽 +烦 +晌 +寄 +媾 +狄 +翡 +苒 +船 +廉 +终 +痞 +殇 +々 +畦 +饶 +改 +拆 +悻 +萄 +£ +瓿 +乃 +訾 +桅 +匮 +溧 +拥 +纱 +铍 +骗 +蕃 +龋 +缬 +父 +佐 +疚 +栎 +醍 +掳 +蓄 +x +惆 +颜 +鲆 +榆 +〔 +猎 +敌 +暴 +谥 +鲫 +贾 +罗 +玻 +缄 +扦 +芪 +癣 +落 +徒 +臾 +恿 +猩 +托 +邴 +肄 +牵 +春 +陛 +耀 +刊 +拓 +蓓 +邳 +堕 +寇 +枉 +淌 +啡 +湄 +兽 +酷 +萼 +碚 +濠 +萤 +夹 +旬 +戮 +梭 +琥 +椭 +昔 +勺 +蜊 +绐 +晚 +孺 +僵 +宣 +摄 +冽 +旨 +萌 +忙 +蚤 +眉 +噼 +蟑 +付 +契 +瓜 +悼 +颡 +壁 +曾 +窕 +颢 +澎 +仿 +俑 +浑 +嵌 +浣 +乍 +碌 +褪 +乱 +蔟 +隙 +玩 +剐 +葫 +箫 +纲 +围 +伐 +决 +伙 +漩 +瑟 +刑 +肓 +镳 +缓 +蹭 +氨 +皓 +典 +畲 +坍 +铑 +檐 +塑 +洞 +倬 +储 +胴 +淳 +戾 +吐 +灼 +惺 +妙 +毕 +珐 +缈 +虱 +盖 +羰 +鸿 +磅 +谓 +髅 +娴 +苴 +唷 +蚣 +霹 +抨 +贤 +唠 +犬 +誓 +逍 +庠 +逼 +麓 +籼 +釉 +呜 +碧 +秧 +氩 +摔 +霄 +穸 +纨 +辟 +妈 +映 +完 +牛 +缴 +嗷 +炊 +恩 +荔 +茆 +掉 +紊 +慌 +莓 +羟 +阙 +萁 +磐 +另 +蕹 +辱 +鳐 +湮 +吡 +吩 +唐 +睦 +垠 +舒 +圜 +冗 +瞿 +溺 +芾 +囱 +匠 +僳 +汐 +菩 +饬 +漓 +黑 +霰 +浸 +濡 +窥 +毂 +蒡 +兢 +驻 +鹉 +芮 +诙 +迫 +雳 +厂 +忐 +臆 +猴 +鸣 +蚪 +栈 +箕 +羡 +渐 +莆 +捍 +眈 +哓 +趴 +蹼 +埕 +嚣 +骛 +宏 +淄 +斑 +噜 +严 +瑛 +垃 +椎 +诱 +压 +庾 +绞 +焘 +廿 +抡 +迄 +棘 +夫 +纬 +锹 +眨 +瞌 +侠 +脐 +竞 +瀑 +孳 +骧 +遁 +姜 +颦 +荪 +滚 +萦 +伪 +逸 +粳 +爬 +锁 +矣 +役 +趣 +洒 +颔 +诏 +逐 +奸 +甭 +惠 +攀 +蹄 +泛 +尼 +拼 +阮 +鹰 +亚 +颈 +惑 +勒 +〉 +际 +肛 +爷 +刚 +钨 +丰 +养 +冶 +鲽 +辉 +蔻 +画 +覆 +皴 +妊 +麦 +返 +醉 +皂 +擀 +〗 +酶 +凑 +粹 +悟 +诀 +硖 +港 +卜 +z +杀 +涕 +± +舍 +铠 +抵 +弛 +段 +敝 +镐 +奠 +拂 +轴 +跛 +袱 +e +t +沉 +菇 +俎 +薪 +峦 +秭 +蟹 +历 +盟 +菠 +寡 +液 +肢 +喻 +染 +裱 +悱 +抱 +氙 +赤 +捅 +猛 +跑 +氮 +谣 +仁 +尺 +辊 +窍 +烙 +衍 +架 +擦 +倏 +璐 +瑁 +币 +楞 +胖 +夔 +趸 +邛 +惴 +饕 +虔 +蝎 +§ +哉 +贝 +宽 +辫 +炮 +扩 +饲 +籽 +魏 +菟 +锰 +伍 +猝 +末 +琳 +哚 +蛎 +邂 +呀 +姿 +鄞 +却 +歧 +仙 +恸 +椐 +森 +牒 +寤 +袒 +婆 +虢 +雅 +钉 +朵 +贼 +欲 +苞 +寰 +故 +龚 +坭 +嘘 +咫 +礼 +硷 +兀 +睢 +汶 +’ +铲 +烧 +绕 +诃 +浃 +钿 +哺 +柜 +讼 +颊 +璁 +腔 +洽 +咐 +脲 +簌 +筠 +镣 +玮 +鞠 +谁 +兼 +姆 +挥 +梯 +蝴 +谘 +漕 +刷 +躏 +宦 +弼 +b +垌 +劈 +麟 +莉 +揭 +笙 +渎 +仕 +嗤 +仓 +配 +怏 +抬 +错 +泯 +镊 +孰 +猿 +邪 +仍 +秋 +鼬 +壹 +歇 +吵 +炼 +< +尧 +射 +柬 +廷 +胧 +霾 +凳 +隋 +肚 +浮 +梦 +祥 +株 +堵 +退 +L +鹫 +跎 +凶 +毽 +荟 +炫 +栩 +玳 +甜 +沂 +鹿 +顽 +伯 +爹 +赔 +蛴 +徐 +匡 +欣 +狰 +缸 +雹 +蟆 +疤 +默 +沤 +啜 +痂 +衣 +禅 +w +i +h +辽 +葳 +黝 +钗 +停 +沽 +棒 +馨 +颌 +肉 +吴 +硫 +悯 +劾 +娈 +马 +啧 +吊 +悌 +镑 +峭 +帆 +瀣 +涉 +咸 +疸 +滋 +泣 +翦 +拙 +癸 +钥 +蜒 ++ +尾 +庄 +凝 +泉 +婢 +渴 +谊 +乞 +陆 +锉 +糊 +鸦 +淮 +I +B +N +晦 +弗 +乔 +庥 +葡 +尻 +席 +橡 +傣 +渣 +拿 +惩 +麋 +斛 +缃 +矮 +蛏 +岘 +鸽 +姐 +膏 +催 +奔 +镒 +喱 +蠡 +摧 +钯 +胤 +柠 +拐 +璋 +鸥 +卢 +荡 +倾 +^ +_ +珀 +逄 +萧 +塾 +掇 +贮 +笆 +聂 +圃 +冲 +嵬 +M +滔 +笕 +值 +炙 +偶 +蜱 +搐 +梆 +汪 +蔬 +腑 +鸯 +蹇 +敞 +绯 +仨 +祯 +谆 +梧 +糗 +鑫 +啸 +豺 +囹 +猾 +巢 +柄 +瀛 +筑 +踌 +沭 +暗 +苁 +鱿 +蹉 +脂 +蘖 +牢 +热 +木 +吸 +溃 +宠 +序 +泞 +偿 +拜 +檩 +厚 +朐 +毗 +螳 +吞 +媚 +朽 +担 +蝗 +橘 +畴 +祈 +糟 +盱 +隼 +郜 +惜 +珠 +裨 +铵 +焙 +琚 +唯 +咚 +噪 +骊 +丫 +滢 +勤 +棉 +呸 +咣 +淀 +隔 +蕾 +窈 +饨 +挨 +煅 +短 +匙 +粕 +镜 +赣 +撕 +墩 +酬 +馁 +豌 +颐 +抗 +酣 +氓 +佑 +搁 +哭 +递 +耷 +涡 +桃 +贻 +碣 +截 +瘦 +昭 +镌 +蔓 +氚 +甲 +猕 +蕴 +蓬 +散 +拾 +纛 +狼 +猷 +铎 +埋 +旖 +矾 +讳 +囊 +糜 +迈 +粟 +蚂 +紧 +鲳 +瘢 +栽 +稼 +羊 +锄 +斟 +睁 +桥 +瓮 +蹙 +祉 +醺 +鼻 +昱 +剃 +跳 +篱 +跷 +蒜 +翎 +宅 +晖 +嗑 +壑 +峻 +癫 +屏 +狠 +陋 +袜 +途 +憎 +祀 +莹 +滟 +佶 +溥 +臣 +约 +盛 +峰 +磁 +慵 +婪 +拦 +莅 +朕 +鹦 +粲 +裤 +哎 +疡 +嫖 +琵 +窟 +堪 +谛 +嘉 +儡 +鳝 +斩 +郾 +驸 +酊 +妄 +胜 +贺 +徙 +傅 +噌 +钢 +栅 +庇 +恋 +匝 +巯 +邈 +尸 +锚 +粗 +佟 +蛟 +薹 +纵 +蚊 +郅 +绢 +锐 +苗 +俞 +篆 +淆 +膀 +鲜 +煎 +诶 +秽 +寻 +涮 +刺 +怀 +噶 +巨 +褰 +魅 +灶 +灌 +桉 +藕 +谜 +舸 +薄 +搀 +恽 +借 +牯 +痉 +渥 +愿 +亓 +耘 +杠 +柩 +锔 +蚶 +钣 +珈 +喘 +蹒 +幽 +赐 +稗 +晤 +莱 +泔 +扯 +肯 +菪 +裆 +腩 +豉 +疆 +骜 +腐 +倭 +珏 +唔 +粮 +亡 +润 +慰 +伽 +橄 +玄 +誉 +醐 +胆 +龊 +粼 +塬 +陇 +彼 +削 +嗣 +绾 +芽 +妗 +垭 +瘴 +爽 +薏 +寨 +龈 +泠 +弹 +赢 +漪 +猫 +嘧 +涂 +恤 +圭 +茧 +烽 +屑 +痕 +巾 +赖 +荸 +凰 +腮 +畈 +亵 +蹲 +偃 +苇 +澜 +艮 +换 +骺 +烘 +苕 +梓 +颉 +肇 +哗 +悄 +氤 +涠 +葬 +屠 +鹭 +植 +竺 +佯 +诣 +鲇 +瘀 +鲅 +邦 +移 +滁 +冯 +耕 +癔 +戌 +茬 +沁 +巩 +悠 +湘 +洪 +痹 +锟 +循 +谋 +腕 +鳃 +钠 +捞 +焉 +迎 +碱 +伫 +急 +榷 +奈 +邝 +卯 +辄 +皲 +卟 +醛 +畹 +忧 +稳 +雄 +昼 +缩 +阈 +睑 +扌 +耗 +曦 +涅 +捏 +瞧 +邕 +淖 +漉 +铝 +耦 +禹 +湛 +喽 +莼 +琅 +诸 +苎 +纂 +硅 +始 +嗨 +傥 +燃 +臂 +赅 +嘈 +呆 +贵 +屹 +壮 +肋 +亍 +蚀 +卅 +豹 +腆 +邬 +迭 +浊 +} +童 +螂 +捐 +圩 +勐 +触 +寞 +汊 +壤 +荫 +膺 +渌 +芳 +懿 +遴 +螈 +泰 +蓼 +蛤 +茜 +舅 +枫 +朔 +膝 +眙 +避 +梅 +判 +鹜 +璜 +牍 +缅 +垫 +藻 +黔 +侥 +惚 +懂 +踩 +腰 +腈 +札 +丞 +唾 +慈 +顿 +摹 +荻 +琬 +~ +斧 +沈 +滂 +胁 +胀 +幄 +莜 +Z +匀 +鄄 +掌 +绰 +茎 +焚 +赋 +萱 +谑 +汁 +铒 +瞎 +夺 +蜗 +野 +娆 +冀 +弯 +篁 +懵 +灞 +隽 +芡 +脘 +俐 +辩 +芯 +掺 +喏 +膈 +蝈 +觐 +悚 +踹 +蔗 +熠 +鼠 +呵 +抓 +橼 +峨 +畜 +缔 +禾 +崭 +弃 +熊 +摒 +凸 +拗 +穹 +蒙 +抒 +祛 +劝 +闫 +扳 +阵 +醌 +踪 +喵 +侣 +搬 +仅 +荧 +赎 +蝾 +琦 +买 +婧 +瞄 +寓 +皎 +冻 +赝 +箩 +莫 +瞰 +郊 +笫 +姝 +筒 +枪 +遣 +煸 +袋 +舆 +痱 +涛 +母 +〇 +启 +践 +耙 +绲 +盘 +遂 +昊 +搞 +槿 +诬 +纰 +泓 +惨 +檬 +亻 +越 +C +o +憩 +熵 +祷 +钒 +暧 +塔 +阗 +胰 +咄 +娶 +魔 +琶 +钞 +邻 +扬 +杉 +殴 +咽 +弓 +〆 +髻 +】 +吭 +揽 +霆 +拄 +殖 +脆 +彻 +岩 +芝 +勃 +辣 +剌 +钝 +嘎 +甄 +佘 +皖 +伦 +授 +徕 +憔 +挪 +皇 +庞 +稔 +芜 +踏 +溴 +兖 +卒 +擢 +饥 +鳞 +煲 +‰ +账 +颗 +叻 +斯 +捧 +鳍 +琮 +讹 +蛙 +纽 +谭 +酸 +兔 +莒 +睇 +伟 +觑 +羲 +嗜 +宜 +褐 +旎 +辛 +卦 +诘 +筋 +鎏 +溪 +挛 +熔 +阜 +晰 +鳅 +丢 +奚 +灸 +呱 +献 +陉 +黛 +鸪 +甾 +萨 +疮 +拯 +洲 +疹 +辑 +叙 +恻 +谒 +允 +柔 +烂 +氏 +逅 +漆 +拎 +惋 +扈 +湟 +纭 +啕 +掬 +擞 +哥 +忽 +涤 +鸵 +靡 +郗 +瓷 +扁 +廊 +怨 +雏 +钮 +敦 +E +懦 +憋 +汀 +拚 +啉 +腌 +岸 +f +痼 +瞅 +尊 +咀 +眩 +飙 +忌 +仝 +迦 +熬 +毫 +胯 +篑 +茄 +腺 +凄 +舛 +碴 +锵 +诧 +羯 +後 +漏 +汤 +宓 +仞 +蚁 +壶 +谰 +皑 +铄 +棰 +罔 +辅 +晶 +苦 +牟 +闽 +\ +烃 +饮 +聿 +丙 +蛳 +朱 +煤 +涔 +鳖 +犁 +罐 +荼 +砒 +淦 +妤 +黏 +戎 +孑 +婕 +瑾 +戢 +钵 +枣 +捋 +砥 +衩 +狙 +桠 +稣 +阎 +肃 +梏 +诫 +孪 +昶 +婊 +衫 +嗔 +侃 +塞 +蜃 +樵 +峒 +貌 +屿 +欺 +缫 +阐 +栖 +诟 +珞 +荭 +吝 +萍 +嗽 +恂 +啻 +蜴 +磬 +峋 +俸 +豫 +谎 +徊 +镍 +韬 +魇 +晴 +U +囟 +猜 +蛮 +坐 +囿 +伴 +亭 +肝 +佗 +蝠 +妃 +胞 +滩 +榴 +氖 +垩 +苋 +砣 +扪 +馏 +姓 +轩 +厉 +夥 +侈 +禀 +垒 +岑 +赏 +钛 +辐 +痔 +披 +纸 +碳 +“ +坞 +蠓 +挤 +荥 +沅 +悔 +铧 +帼 +蒌 +蝇 +a +p +y +n +g +哀 +浆 +瑶 +凿 +桶 +馈 +皮 +奴 +苜 +佤 +伶 +晗 +铱 +炬 +优 +弊 +氢 +恃 +甫 +攥 +端 +锌 +灰 +稹 +炝 +曙 +邋 +亥 +眶 +碾 +拉 +萝 +绔 +捷 +浍 +腋 +姑 +菖 +凌 +涞 +麽 +锢 +桨 +潢 +绎 +镰 +殆 +锑 +渝 +铬 +困 +绽 +觎 +匈 +糙 +暑 +裹 +鸟 +盔 +肽 +迷 +綦 +『 +亳 +佝 +俘 +钴 +觇 +骥 +仆 +疝 +跪 +婶 +郯 +瀹 +唉 +脖 +踞 +针 +晾 +忒 +扼 +瞩 +叛 +椒 +疟 +嗡 +邗 +肆 +跆 +玫 +忡 +捣 +咧 +唆 +艄 +蘑 +潦 +笛 +阚 +沸 +泻 +掊 +菽 +贫 +斥 +髂 +孢 +镂 +赂 +麝 +鸾 +屡 +衬 +苷 +恪 +叠 +希 +粤 +爻 +喝 +茫 +惬 +郸 +绻 +庸 +撅 +碟 +宄 +妹 +膛 +叮 +饵 +崛 +嗲 +椅 +冤 +搅 +咕 +敛 +尹 +垦 +闷 +蝉 +霎 +勰 +败 +蓑 +泸 +肤 +鹌 +幌 +焦 +浠 +鞍 +刁 +舰 +乙 +竿 +裔 +。 +茵 +函 +伊 +兄 +丨 +娜 +匍 +謇 +莪 +宥 +似 +蝽 +翳 +酪 +翠 +粑 +薇 +祢 +骏 +赠 +叫 +Q +噤 +噻 +竖 +芗 +莠 +潭 +俊 +羿 +耜 +O +郫 +趁 +嗪 +囚 +蹶 +芒 +洁 +笋 +鹑 +敲 +硝 +啶 +堡 +渲 +揩 +』 +携 +宿 +遒 +颍 +扭 +棱 +割 +萜 +蔸 +葵 +琴 +捂 +饰 +衙 +耿 +掠 +募 +岂 +窖 +涟 +蔺 +瘤 +柞 +瞪 +怜 +匹 +距 +楔 +炜 +哆 +秦 +缎 +幼 +茁 +绪 +痨 +恨 +楸 +娅 +瓦 +桩 +雪 +嬴 +伏 +榔 +妥 +铿 +拌 +眠 +雍 +缇 +‘ +卓 +搓 +哌 +觞 +噩 +屈 +哧 +髓 +咦 +巅 +娑 +侑 +淫 +膳 +祝 +勾 +姊 +莴 +胄 +疃 +薛 +蜷 +胛 +巷 +芙 +芋 +熙 +闰 +勿 +窃 +狱 +剩 +钏 +幢 +陟 +铛 +慧 +靴 +耍 +k +浙 +浇 +飨 +惟 +绗 +祜 +澈 +啼 +咪 +磷 +摞 +诅 +郦 +抹 +跃 +壬 +吕 +肖 +琏 +颤 +尴 +剡 +抠 +凋 +赚 +泊 +津 +宕 +殷 +倔 +氲 +漫 +邺 +涎 +怠 +$ +垮 +荬 +遵 +俏 +叹 +噢 +饽 +蜘 +孙 +筵 +疼 +鞭 +羧 +牦 +箭 +潴 +c +眸 +祭 +髯 +啖 +坳 +愁 +芩 +驮 +倡 +巽 +穰 +沃 +胚 +怒 +凤 +槛 +剂 +趵 +嫁 +v +邢 +灯 +鄢 +桐 +睽 +檗 +锯 +槟 +婷 +嵋 +圻 +诗 +蕈 +颠 +遭 +痢 +芸 +怯 +馥 +竭 +锗 +徜 +恭 +遍 +籁 +剑 +嘱 +苡 +龄 +僧 +桑 +潸 +弘 +澶 +楹 +悲 +讫 +愤 +腥 +悸 +谍 +椹 +呢 +桓 +葭 +攫 +阀 +翰 +躲 +敖 +柑 +郎 +笨 +橇 +呃 +魁 +燎 +脓 +葩 +磋 +垛 +玺 +狮 +沓 +砜 +蕊 +锺 +罹 +蕉 +翱 +虐 +闾 +巫 +旦 +茱 +嬷 +枯 +鹏 +贡 +芹 +汛 +矫 +绁 +拣 +禺 +佃 +讣 +舫 +惯 +乳 +趋 +疲 +挽 +岚 +虾 +衾 +蠹 +蹂 +飓 +氦 +铖 +孩 +稞 +瑜 +壅 +掀 +勘 +妓 +畅 +髋 +W +庐 +牲 +蓿 +榕 +练 +垣 +唱 +邸 +菲 +昆 +婺 +穿 +绡 +麒 +蚱 +掂 +愚 +泷 +涪 +漳 +妩 +娉 +榄 +讷 +觅 +旧 +藤 +煮 +呛 +柳 +腓 +叭 +庵 +烷 +阡 +罂 +蜕 +擂 +猖 +咿 +媲 +脉 +【 +沏 +貅 +黠 +熏 +哲 +烁 +坦 +酵 +兜 +× +潇 +撒 +剽 +珩 +圹 +乾 +摸 +樟 +帽 +嗒 +襄 +魂 +轿 +憬 +锡 +〕 +喃 +皆 +咖 +隅 +脸 +残 +泮 +袂 +鹂 +珊 +囤 +捆 +咤 +误 +徨 +闹 +淙 +芊 +淋 +怆 +囗 +拨 +梳 +渤 +R +G +绨 +蚓 +婀 +幡 +狩 +麾 +谢 +唢 +裸 +旌 +伉 +纶 +裂 +驳 +砼 +咛 +澄 +樨 +蹈 +宙 +澍 +倍 +貔 +操 +勇 +蟠 +摈 +砧 +虬 +够 +缁 +悦 +藿 +撸 +艹 +摁 +淹 +豇 +虎 +榭 +ˉ +吱 +d +° +喧 +荀 +踱 +侮 +奋 +偕 +饷 +犍 +惮 +坑 +璎 +徘 +宛 +妆 +袈 +倩 +窦 +昂 +荏 +乖 +K +怅 +撰 +鳙 +牙 +袁 +酞 +X +痿 +琼 +闸 +雁 +趾 +荚 +虻 +涝 +《 +杏 +韭 +偈 +烤 +绫 +鞘 +卉 +症 +遢 +蓥 +诋 +杭 +荨 +匆 +竣 +簪 +辙 +敕 +虞 +丹 +缭 +咩 +黟 +m +淤 +瑕 +咂 +铉 +硼 +茨 +嶂 +痒 +畸 +敬 +涿 +粪 +窘 +熟 +叔 +嫔 +盾 +忱 +裘 +憾 +梵 +赡 +珙 +咯 +娘 +庙 +溯 +胺 +葱 +痪 +摊 +荷 +卞 +乒 +髦 +寐 +铭 +坩 +胗 +枷 +爆 +溟 +嚼 +羚 +砬 +轨 +惊 +挠 +罄 +竽 +菏 +氧 +浅 +楣 +盼 +枢 +炸 +阆 +杯 +谏 +噬 +淇 +渺 +俪 +秆 +墓 +泪 +跻 +砌 +痰 +垡 +渡 +耽 +釜 +讶 +鳎 +煞 +呗 +韶 +舶 +绷 +鹳 +缜 +旷 +铊 +皱 +龌 +檀 +霖 +奄 +槐 +艳 +蝶 +旋 +哝 +赶 +骞 +蚧 +腊 +盈 +丁 +` +蜚 +矸 +蝙 +睨 +嚓 +僻 +鬼 +醴 +夜 +彝 +磊 +笔 +拔 +栀 +糕 +厦 +邰 +纫 +逭 +纤 +眦 +膊 +馍 +躇 +烯 +蘼 +冬 +诤 +暄 +骶 +哑 +瘠 +」 +臊 +丕 +愈 +咱 +螺 +擅 +跋 +搏 +硪 +谄 +笠 +淡 +嘿 +骅 +谧 +鼎 +皋 +姚 +歼 +蠢 +驼 +耳 +胬 +挝 +涯 +狗 +蒽 +孓 +犷 +凉 +芦 +箴 +铤 +孤 +嘛 +坤 +V +茴 +朦 +挞 +尖 +橙 +诞 +搴 +碇 +洵 +浚 +帚 +蜍 +漯 +柘 +嚎 +讽 +芭 +荤 +咻 +祠 +秉 +跖 +埃 +吓 +糯 +眷 +馒 +惹 +娼 +鲑 +嫩 +讴 +轮 +瞥 +靶 +褚 +乏 +缤 +宋 +帧 +删 +驱 +碎 +扑 +俩 +俄 +偏 +涣 +竹 +噱 +皙 +佰 +渚 +唧 +斡 +# +镉 +刀 +崎 +筐 +佣 +夭 +贰 +肴 +峙 +哔 +艿 +匐 +牺 +镛 +缘 +仡 +嫡 +劣 +枸 +堀 +梨 +簿 +鸭 +蒸 +亦 +稽 +浴 +{ +衢 +束 +槲 +j +阁 +揍 +疥 +棋 +潋 +聪 +窜 +乓 +睛 +插 +冉 +阪 +苍 +搽 +「 +蟾 +螟 +幸 +仇 +樽 +撂 +慢 +跤 +幔 +俚 +淅 +覃 +觊 +溶 +妖 +帛 +侨 +曰 +妾 +泗 +· +: +瀘 +風 +Ë +( +) +∶ +紅 +紗 +瑭 +雲 +頭 +鶏 +財 +許 +• +¥ +樂 +焗 +麗 +— +; +滙 +東 +榮 +繪 +興 +… +門 +業 +π +楊 +國 +顧 +é +盤 +寳 +Λ +龍 +鳳 +島 +誌 +緣 +結 +銭 +萬 +勝 +祎 +璟 +優 +歡 +臨 +時 +購 += +★ +藍 +昇 +鐵 +觀 +勅 +農 +聲 +畫 +兿 +術 +發 +劉 +記 +專 +耑 +園 +書 +壴 +種 +Ο +● +褀 +號 +銀 +匯 +敟 +锘 +葉 +橪 +廣 +進 +蒄 +鑽 +阝 +祙 +貢 +鍋 +豊 +夬 +喆 +團 +閣 +開 +燁 +賓 +館 +酡 +沔 +順 ++ +硚 +劵 +饸 +陽 +車 +湓 +復 +萊 +氣 +軒 +華 +堃 +迮 +纟 +戶 +馬 +學 +裡 +電 +嶽 +獨 +マ +シ +サ +ジ +燘 +袪 +環 +❤ +臺 +灣 +専 +賣 +孖 +聖 +攝 +線 +▪ +α +傢 +俬 +夢 +達 +莊 +喬 +貝 +薩 +劍 +羅 +壓 +棛 +饦 +尃 +璈 +囍 +醫 +G +I +A +# +N +鷄 +髙 +嬰 +啓 +約 +隹 +潔 +賴 +藝 +~ +寶 +籣 +麺 +  +嶺 +√ +義 +網 +峩 +長 +∧ +魚 +機 +構 +② +鳯 +偉 +L +B +㙟 +畵 +鴿 +' +詩 +溝 +嚞 +屌 +藔 +佧 +玥 +蘭 +織 +1 +3 +9 +0 +7 +點 +砭 +鴨 +鋪 +銘 +廳 +弍 +‧ +創 +湯 +坶 +℃ +卩 +骝 +& +烜 +荘 +當 +潤 +扞 +係 +懷 +碶 +钅 +蚨 +讠 +☆ +叢 +爲 +埗 +涫 +塗 +→ +楽 +現 +鯨 +愛 +瑪 +鈺 +忄 +悶 +藥 +飾 +樓 +視 +孬 +ㆍ +燚 +苪 +師 +① +丼 +锽 +│ +韓 +標 +è +兒 +閏 +匋 +張 +漢 +Ü +髪 +會 +閑 +檔 +習 +裝 +の +峯 +菘 +輝 +И +雞 +釣 +億 +浐 +K +O +R +8 +H +E +P +T +W +D +S +C +M +F +姌 +饹 +» +晞 +廰 +ä +嵯 +鷹 +負 +飲 +絲 +冚 +楗 +澤 +綫 +區 +❋ +← +質 +靑 +揚 +③ +滬 +統 +産 +協 +﹑ +乸 +畐 +經 +運 +際 +洺 +岽 +為 +粵 +諾 +崋 +豐 +碁 +ɔ +V +2 +6 +齋 +誠 +訂 +´ +勑 +雙 +陳 +無 +í +泩 +媄 +夌 +刂 +i +c +t +o +r +a +嘢 +耄 +燴 +暃 +壽 +媽 +靈 +抻 +體 +唻 +É +冮 +甹 +鎮 +錦 +ʌ +蜛 +蠄 +尓 +駕 +戀 +飬 +逹 +倫 +貴 +極 +Я +Й +寬 +磚 +嶪 +郎 +職 +| +間 +n +d +剎 +伈 +課 +飛 +橋 +瘊 +№ +譜 +骓 +圗 +滘 +縣 +粿 +咅 +養 +濤 +彳 +® +% +Ⅱ +啰 +㴪 +見 +矞 +薬 +糁 +邨 +鲮 +顔 +罱 +З +選 +話 +贏 +氪 +俵 +競 +瑩 +繡 +枱 +β +綉 +á +獅 +爾 +™ +麵 +戋 +淩 +徳 +個 +劇 +場 +務 +簡 +寵 +h +實 +膠 +轱 +圖 +築 +嘣 +樹 +㸃 +營 +耵 +孫 +饃 +鄺 +飯 +麯 +遠 +輸 +坫 +孃 +乚 +閃 +鏢 +㎡ +題 +廠 +關 +↑ +爺 +將 +軍 +連 +篦 +覌 +參 +箸 +- +窠 +棽 +寕 +夀 +爰 +歐 +呙 +閥 +頡 +熱 +雎 +垟 +裟 +凬 +勁 +帑 +馕 +夆 +疌 +枼 +馮 +貨 +蒤 +樸 +彧 +旸 +靜 +龢 +暢 +㐱 +鳥 +珺 +鏡 +灡 +爭 +堷 +廚 +Ó +騰 +診 +┅ +蘇 +褔 +凱 +頂 +豕 +亞 +帥 +嘬 +⊥ +仺 +桖 +複 +饣 +絡 +穂 +顏 +棟 +納 +▏ +濟 +親 +設 +計 +攵 +埌 +烺 +ò +頤 +燦 +蓮 +撻 +節 +講 +濱 +濃 +娽 +洳 +朿 +燈 +鈴 +護 +膚 +铔 +過 +補 +Z +U +5 +4 +坋 +闿 +䖝 +餘 +缐 +铞 +貿 +铪 +桼 +趙 +鍊 +[ +㐂 +垚 +菓 +揸 +捲 +鐘 +滏 +𣇉 +爍 +輪 +燜 +鴻 +鮮 +動 +鹞 +鷗 +丄 +慶 +鉌 +翥 +飮 +腸 +⇋ +漁 +覺 +來 +熘 +昴 +翏 +鲱 +圧 +鄉 +萭 +頔 +爐 +嫚 +г +貭 +類 +聯 +幛 +輕 +訓 +鑒 +夋 +锨 +芃 +珣 +䝉 +扙 +嵐 +銷 +處 +ㄱ +語 +誘 +苝 +歸 +儀 +燒 +楿 +內 +粢 +葒 +奧 +麥 +礻 +滿 +蠔 +穵 +瞭 +態 +鱬 +榞 +硂 +鄭 +黃 +煙 +祐 +奓 +逺 +* +瑄 +獲 +聞 +薦 +讀 +這 +樣 +決 +問 +啟 +們 +執 +説 +轉 +單 +隨 +唘 +帶 +倉 +庫 +還 +贈 +尙 +皺 +■ +餅 +產 +○ +∈ +報 +狀 +楓 +賠 +琯 +嗮 +禮 +` +傳 +> +≤ +嗞 +Φ +≥ +換 +咭 +∣ +↓ +曬 +ε +応 +寫 +″ +終 +様 +純 +費 +療 +聨 +凍 +壐 +郵 +ü +黒 +∫ +製 +塊 +調 +軽 +確 +撃 +級 +馴 +Ⅲ +涇 +繹 +數 +碼 +證 +狒 +処 +劑 +< +晧 +賀 +衆 +] +櫥 +兩 +陰 +絶 +對 +鯉 +憶 +◎ +p +e +Y +蕒 +煖 +頓 +測 +試 +鼽 +僑 +碩 +妝 +帯 +≈ +鐡 +舖 +權 +喫 +倆 +ˋ +該 +悅 +ā +俫 +. +f +s +b +m +k +g +u +j +貼 +淨 +濕 +針 +適 +備 +l +/ +給 +謢 +強 +觸 +衛 +與 +⊙ +$ +緯 +變 +⑴ +⑵ +⑶ +㎏ +殺 +∩ +幚 +─ +價 +▲ +離 +ú +ó +飄 +烏 +関 +閟 +﹝ +﹞ +邏 +輯 +鍵 +驗 +訣 +導 +歷 +屆 +層 +▼ +儱 +錄 +熳 +ē +艦 +吋 +錶 +辧 +飼 +顯 +④ +禦 +販 +気 +対 +枰 +閩 +紀 +幹 +瞓 +貊 +淚 +△ +眞 +墊 +Ω +獻 +褲 +縫 +緑 +亜 +鉅 +餠 +{ +} +◆ +蘆 +薈 +█ +◇ +溫 +彈 +晳 +粧 +犸 +穩 +訊 +崬 +凖 +熥 +П +舊 +條 +紋 +圍 +Ⅳ +筆 +尷 +難 +雜 +錯 +綁 +識 +頰 +鎖 +艶 +□ +殁 +殼 +⑧ +├ +▕ +鵬 +ǐ +ō +ǒ +糝 +綱 +▎ +μ +盜 +饅 +醬 +籤 +蓋 +釀 +鹽 +據 +à +ɡ +辦 +◥ +彐 +┌ +婦 +獸 +鲩 +伱 +ī +蒟 +蒻 +齊 +袆 +腦 +寧 +凈 +妳 +煥 +詢 +偽 +謹 +啫 +鯽 +騷 +鱸 +損 +傷 +鎻 +髮 +買 +冏 +儥 +両 +﹢ +∞ +載 +喰 +z +羙 +悵 +燙 +曉 +員 +組 +徹 +艷 +痠 +鋼 +鼙 +縮 +細 +嚒 +爯 +≠ +維 +" +鱻 +壇 +厍 +帰 +浥 +犇 +薡 +軎 +² +應 +醜 +刪 +緻 +鶴 +賜 +噁 +軌 +尨 +镔 +鷺 +槗 +彌 +葚 +濛 +請 +溇 +緹 +賢 +訪 +獴 +瑅 +資 +縤 +陣 +蕟 +栢 +韻 +祼 +恁 +伢 +謝 +劃 +涑 +總 +衖 +踺 +砋 +凉 +籃 +駿 +苼 +瘋 +昽 +紡 +驊 +腎 +﹗ +響 +杋 +剛 +嚴 +禪 +歓 +槍 +傘 +檸 +檫 +炣 +勢 +鏜 +鎢 +銑 +尐 +減 +奪 +惡 +θ +僮 +婭 +臘 +ū +ì +殻 +鉄 +∑ +蛲 +焼 +緖 +續 +紹 +懮! +䰾 +䲁 +丌 +丏 +丟 +並 +乂 +乗 +乩 +乭 +乹 +亀 +亂 +亅 +亊 +亠 +亰 +亶 +亹 +仂 +仉 +仏 +仛 +仫 +仮 +仳 +仵 +仼 +伃 +伋 +伕 +伝 +伷 +伾 +佀 +佁 +佇 +佈 +佉 +佋 +佔 +併 +佹 +佺 +佾 +侁 +侅 +侊 +侖 +侘 +侚 +侞 +価 +侶 +侷 +侹 +俁 +俅 +俋 +俌 +俍 +俛 +俠 +俳 +俴 +俶 +俽 +倈 +倓 +倖 +倗 +倞 +倢 +倣 +値 +倧 +倮 +倻 +偁 +偊 +偍 +偓 +偪 +偲 +側 +偵 +偸 +傃 +傉 +傑 +傒 +傕 +傖 +傜 +傭 +債 +傾 +僅 +僉 +僊 +働 +僔 +僕 +僖 +僙 +僜 +僡 +僩 +僭 +僰 +僱 +僴 +儁 +儂 +儆 +儇 +儈 +儉 +儐 +儔 +儕 +儘 +儚 +儞 +償 +儦 +儫 +儲 +儷 +儺 +儻 +儼 +兌 +児 +兕 +兗 +兪 +冂 +円 +冇 +冊 +冑 +冖 +冧 +冨 +冪 +冫 +冴 +凃 +凜 +凞 +凪 +凵 +刄 +刎 +別 +刦 +刧 +刼 +則 +剋 +剏 +剝 +剣 +剮 +劄 +劊 +劌 +劔 +劬 +効 +劼 +勔 +勖 +勗 +勛 +勞 +勣 +勦 +勱 +勲 +勳 +勵 +勷 +勸 +勻 +匂 +匄 +匏 +匚 +匱 +匸 +卋 +卍 +卐 +卣 +卬 +卮 +卲 +卹 +卺 +卻 +卽 +厓 +厔 +厙 +厭 +厰 +厲 +厴 +厶 +叄 +収 +叕 +叡 +叵 +吔 +吥 +吳 +吶 +呂 +呉 +呎 +呾 +咁 +咑 +咗 +咘 +咟 +咥 +咲 +咼 +咾 +哂 +哏 +哐 +哖 +哱 +唃 +唄 +唫 +唭 +唵 +唸 +啁 +啍 +啚 +啞 +啣 +啯 +啱 +啲 +啷 +喈 +喚 +喢 +喦 +喪 +喲 +喼 +嗄 +嗆 +嗇 +嗊 +嗎 +嗚 +嗢 +嗩 +嗶 +嗹 +嘅 +嘆 +嘍 +嘏 +嘔 +嘗 +嘚 +嘜 +嘥 +嘩 +嘮 +嘯 +嘰 +嘸 +噍 +噏 +噓 +噝 +噠 +噥 +噦 +噯 +噰 +噲 +噴 +噸 +噹 +嚇 +嚈 +嚐 +嚕 +嚗 +嚙 +嚟 +嚤 +嚦 +嚧 +嚨 +嚩 +嚮 +嚳 +嚶 +嚿 +囀 +囂 +囃 +囉 +囑 +囒 +囓 +囝 +団 +囧 +囪 +囮 +囯 +囲 +図 +囶 +囷 +圂 +圄 +圉 +圏 +圓 +圪 +圯 +坌 +坖 +坣 +坬 +坮 +坵 +垈 +垍 +垕 +垞 +垯 +垰 +垵 +垻 +垿 +埅 +埇 +埈 +埏 +埒 +埜 +埡 +埤 +埧 +埨 +埪 +埮 +埴 +埵 +埻 +埼 +堅 +堈 +堉 +堊 +堍 +堖 +堝 +堦 +堮 +堯 +堺 +塀 +塅 +塆 +塋 +塏 +塙 +塜 +塡 +塢 +塤 +塨 +塩 +塭 +塰 +塱 +塲 +塵 +塹 +塽 +墀 +墎 +増 +墘 +墜 +墡 +墣 +墫 +墬 +墮 +墱 +墳 +墺 +墼 +墾 +壄 +壆 +壋 +壌 +壎 +壔 +壘 +壙 +壞 +壟 +壠 +壢 +壩 +壯 +壱 +壺 +変 +夊 +夠 +夤 +夾 +奀 +奐 +奣 +奩 +奫 +奭 +奮 +妀 +妁 +妏 +妑 +妠 +妧 +妭 +妸 +妺 +姀 +姁 +姃 +姈 +姉 +姍 +姦 +姪 +姫 +姮 +姵 +姶 +姸 +娋 +娍 +娎 +娖 +娛 +娫 +娳 +娸 +婁 +婑 +婯 +婻 +婼 +媃 +媊 +媐 +媓 +媖 +媗 +媜 +媞 +媧 +媭 +媯 +媺 +媼 +媿 +嫄 +嫈 +嫘 +嫪 +嫲 +嫳 +嫵 +嫺 +嫻 +嬅 +嬈 +嬋 +嬌 +嬛 +嬝 +嬡 +嬤 +嬨 +嬪 +嬬 +嬭 +嬸 +嬾 +嬿 +孀 +孆 +孋 +孌 +孮 +孻 +孿 +宍 +実 +宧 +宮 +寀 +寁 +寈 +寊 +寔 +寖 +寗 +寘 +寛 +寜 +寢 +審 +寯 +尋 +尗 +尢 +尪 +屄 +屇 +屍 +屓 +屚 +屜 +屢 +屬 +屭 +屺 +屻 +岀 +岈 +岡 +岣 +岧 +岪 +岬 +岰 +岵 +岻 +峅 +峇 +峍 +峘 +峚 +峠 +峴 +峼 +峽 +崁 +崈 +崍 +崐 +崑 +崒 +崗 +崘 +崙 +崚 +崞 +崟 +崠 +崢 +崱 +崵 +崶 +嵎 +嵒 +嵕 +嵖 +嵗 +嵙 +嵛 +嵜 +嵨 +嵮 +嵰 +嵴 +嵻 +嵿 +嶁 +嶃 +嶄 +嶇 +嶋 +嶌 +嶍 +嶒 +嶔 +嶗 +嶝 +嶠 +嶢 +嶦 +嶧 +嶬 +嶰 +嶲 +嶴 +嶷 +嶸 +嶼 +巂 +巄 +巆 +巋 +巌 +巎 +巑 +巒 +巔 +巖 +巘 +巛 +巰 +巶 +巻 +巿 +帔 +帙 +帡 +帢 +帳 +幀 +幃 +幗 +幟 +幣 +幪 +幫 +幵 +幷 +幾 +庀 +庁 +広 +庢 +庲 +庼 +廁 +廂 +廄 +廆 +廈 +廋 +廌 +廍 +廑 +廔 +廕 +廙 +廝 +廞 +廟 +廡 +廢 +廧 +廨 +廩 +廬 +廱 +廸 +廻 +廼 +弁 +弅 +弇 +弉 +弐 +弒 +弔 +弖 +弢 +弨 +弸 +弾 +彀 +彄 +彅 +彆 +彊 +彎 +彔 +彖 +彘 +彙 +彜 +彞 +彠 +彡 +彣 +彥 +彫 +彿 +徂 +徑 +從 +徠 +徧 +徫 +徬 +徭 +徴 +徸 +忉 +忝 +忞 +忬 +忯 +忳 +怍 +怙 +怛 +怵 +恆 +恊 +恥 +恵 +悆 +悛 +悝 +悞 +悧 +悪 +悰 +悳 +惇 +惔 +惣 +惱 +惲 +愃 +愆 +愍 +愐 +愒 +愔 +愜 +愨 +愭 +愴 +愷 +愼 +愾 +慄 +慘 +慚 +慜 +慟 +慣 +慥 +慮 +慳 +慾 +憂 +憊 +憍 +憐 +憑 +憓 +憕 +憙 +憚 +憤 +憫 +憲 +憺 +憻 +懃 +懇 +懌 +懍 +懐 +懣 +懮 +懲 +懶 +懸 +懺 +懼 +懽 +懾 +戇 +戔 +戕 +戙 +戡 +戥 +戦 +戩 +戰 +戱 +戲 +戸 +戻 +戽 +扆 +扥 +抃 +抇 +抦 +拋 +拏 +拝 +拡 +拺 +挙 +挵 +挹 +挻 +挾 +捒 +捜 +捦 +捨 +捩 +捫 +捭 +捱 +掃 +掄 +掙 +掛 +掞 +掟 +採 +掾 +揀 +揄 +揆 +揔 +揮 +揺 +搖 +搗 +搠 +搢 +搳 +搵 +搶 +搾 +摂 +摜 +摟 +摠 +摭 +摯 +摳 +摴 +摵 +摶 +摺 +摻 +摽 +撈 +撐 +撓 +撖 +撙 +撚 +撣 +撥 +撫 +撲 +撳 +撾 +撿 +擁 +擇 +擊 +擋 +擔 +擠 +擥 +擬 +擯 +擰 +擱 +擲 +擴 +擷 +擺 +擼 +擾 +攏 +攔 +攖 +攜 +攞 +攢 +攣 +攤 +攪 +攬 +攴 +攷 +攽 +敍 +敎 +敔 +敗 +敘 +敫 +敭 +敵 +敻 +敾 +斂 +斃 +斎 +斕 +斖 +斝 +斬 +斷 +斿 +旂 +旃 +旄 +旉 +旙 +旛 +旡 +旲 +旳 +旻 +旼 +旽 +旾 +旿 +昃 +昉 +昍 +昐 +昚 +昛 +昜 +昞 +昡 +昣 +昤 +昪 +昫 +昰 +昺 +晈 +晉 +晊 +晙 +晛 +晝 +晩 +晪 +晫 +晭 +晸 +暅 +暈 +暉 +暊 +暌 +暎 +暏 +暐 +暕 +暘 +暝 +暟 +暠 +暦 +暫 +暱 +暲 +暸 +暻 +暾 +曄 +曅 +曆 +曇 +曌 +曔 +曖 +曠 +曧 +曨 +曩 +曮 +曶 +曷 +曺 +曽 +朊 +朏 +朓 +朖 +朧 +朶 +杁 +杌 +杓 +杙 +杣 +杤 +杧 +杬 +杴 +杻 +杼 +枏 +枖 +枛 +枠 +枡 +枲 +枹 +柁 +柃 +柉 +柊 +柎 +柝 +柟 +柰 +柵 +柶 +柷 +査 +柾 +栃 +栄 +栐 +栒 +栜 +栝 +栞 +栨 +栲 +栴 +栻 +桄 +桕 +桙 +桜 +桝 +桫 +桱 +桲 +桴 +桿 +梀 +梂 +梃 +梉 +梔 +梘 +梟 +梠 +梣 +梫 +梱 +梶 +梽 +棄 +棆 +棐 +棓 +棖 +棗 +棡 +棧 +棨 +棩 +棪 +棫 +棲 +棶 +棹 +棻 +棼 +椆 +椇 +椏 +椙 +椥 +椪 +椲 +椵 +楙 +楡 +楢 +楤 +楧 +楨 +楫 +楮 +楯 +楳 +榊 +榍 +榎 +榑 +榖 +榗 +榘 +榢 +榣 +榤 +榦 +榲 +榿 +槀 +槁 +槃 +槊 +槓 +槔 +槙 +槤 +槩 +槭 +槰 +槱 +槳 +槺 +槻 +槼 +樀 +樁 +樅 +樆 +樋 +樑 +樗 +樘 +樞 +権 +樫 +樺 +樻 +橈 +橐 +橒 +橓 +橚 +橢 +橫 +橿 +檄 +檇 +檉 +檊 +檎 +檜 +檞 +檠 +檡 +檢 +檣 +檦 +檨 +檯 +檳 +檵 +檻 +檽 +櫂 +櫃 +櫆 +櫈 +櫓 +櫚 +櫛 +櫞 +櫟 +櫨 +櫪 +櫱 +櫸 +櫻 +櫾 +櫿 +欄 +欉 +欏 +欒 +欖 +欞 +欥 +欸 +欹 +欽 +歊 +歎 +歛 +歩 +歲 +歳 +歴 +歿 +殂 +殄 +殑 +殘 +殛 +殞 +殟 +殤 +殭 +殮 +殯 +殲 +殳 +毀 +毆 +毉 +毌 +毎 +毐 +毖 +毘 +毬 +毴 +毸 +毿 +氂 +氈 +氍 +氫 +氬 +氷 +氹 +氻 +氾 +汎 +汜 +汧 +汭 +沄 +沆 +沇 +沍 +沒 +沖 +沘 +沚 +沜 +沢 +沨 +沯 +沺 +況 +泂 +泆 +泇 +泐 +泖 +泚 +洌 +洎 +洢 +洣 +洤 +洨 +洩 +洸 +洹 +浄 +浛 +浞 +浟 +浡 +浤 +浯 +浵 +浹 +涙 +涼 +淍 +淎 +淏 +淓 +淛 +淠 +淥 +淪 +淯 +淰 +淵 +淶 +淸 +淺 +淽 +渃 +済 +渉 +渋 +渕 +渙 +渟 +渦 +渫 +渼 +渽 +渾 +湉 +湊 +湔 +湜 +湞 +湣 +湥 +湧 +湳 +湴 +湼 +満 +溁 +溈 +溋 +溎 +準 +溙 +溦 +溲 +溵 +溼 +滀 +滄 +滅 +滈 +滉 +滌 +滎 +滝 +滯 +滲 +滷 +滸 +滹 +滻 +滽 +滾 +漇 +漈 +漎 +漚 +漣 +漬 +漲 +漴 +漵 +漷 +漸 +漼 +漿 +潁 +潑 +潛 +潟 +潯 +潰 +潲 +潽 +潾 +潿 +澀 +澁 +澂 +澆 +澇 +澉 +澋 +澌 +澔 +澗 +澠 +澣 +澥 +澪 +澮 +澯 +澱 +澻 +濁 +濊 +濋 +濘 +濙 +濫 +濬 +濰 +濲 +濶 +濺 +濼 +濾 +瀁 +瀅 +瀆 +瀉 +瀍 +瀏 +瀔 +瀕 +瀝 +瀞 +瀟 +瀠 +瀦 +瀧 +瀨 +瀬 +瀰 +瀲 +瀴 +瀶 +瀾 +灃 +灊 +灑 +灘 +灝 +灤 +灧 +灴 +災 +炁 +炆 +炘 +炟 +炤 +炱 +炲 +炷 +炻 +烉 +烋 +烒 +烔 +烝 +烱 +烴 +焃 +焄 +焌 +焓 +焜 +焞 +焴 +焻 +焿 +煇 +煉 +煐 +煒 +煔 +煕 +煚 +煠 +煩 +煬 +煳 +煵 +煶 +熅 +熇 +熈 +熒 +熖 +熗 +熜 +熤 +熯 +熲 +熺 +熼 +熾 +熿 +燄 +燉 +燊 +燏 +燐 +燔 +燝 +燫 +燬 +燭 +燹 +燻 +燼 +燾 +燿 +爀 +爌 +爔 +爚 +爛 +爝 +爿 +牁 +牂 +牆 +牕 +牖 +牘 +牝 +牠 +牻 +牼 +牽 +犂 +犎 +犖 +犛 +犢 +犧 +犨 +犰 +犴 +犽 +狎 +狓 +狛 +狟 +狦 +狨 +狳 +狶 +狷 +狹 +狻 +猁 +猄 +猇 +猊 +猙 +猞 +猢 +猨 +猳 +猶 +猺 +猻 +獁 +獃 +獄 +獇 +獎 +獏 +獢 +獣 +獬 +獮 +獯 +獰 +獵 +獷 +獺 +獼 +獾 +玀 +玆 +玎 +玏 +玓 +玕 +玗 +玘 +玙 +玠 +玡 +玢 +玧 +玨 +玭 +玶 +玹 +玾 +珅 +珌 +珎 +珖 +珝 +珡 +珤 +珦 +珧 +珪 +珮 +珵 +珹 +珽 +琁 +琄 +琇 +琍 +琎 +琡 +琤 +琱 +琹 +琺 +琿 +瑀 +瑂 +瑆 +瑈 +瑊 +瑋 +瑑 +瑒 +瑝 +瑠 +瑢 +瑣 +瑤 +瑥 +瑧 +瑨 +瑯 +瑱 +瑳 +瑴 +瑺 +璄 +璆 +璉 +璌 +璕 +璘 +璙 +璚 +璠 +璡 +璣 +璥 +璦 +璪 +璫 +璬 +璮 +璱 +璵 +璸 +璹 +璽 +璿 +瓈 +瓊 +瓌 +瓏 +瓑 +瓔 +瓖 +瓘 +瓚 +瓛 +瓞 +甂 +甌 +甍 +甑 +甕 +甡 +甦 +甪 +畀 +畇 +畊 +畋 +畎 +畑 +畝 +畠 +畢 +畧 +畬 +畯 +異 +畳 +畷 +疇 +疊 +疋 +疍 +疒 +疕 +痍 +痙 +痟 +痩 +痲 +痺 +瘍 +瘓 +瘜 +瘞 +瘡 +瘧 +瘰 +瘺 +癀 +癆 +癇 +癒 +癘 +癟 +癡 +癢 +癤 +癥 +癩 +癬 +癭 +癮 +癯 +癰 +癱 +癲 +発 +皐 +皚 +皛 +皝 +皞 +皰 +皷 +皸 +盃 +盋 +盌 +盞 +盡 +監 +盦 +盧 +盨 +盩 +盪 +盫 +盷 +盺 +眀 +県 +眛 +眜 +眥 +眵 +眾 +睜 +睞 +睥 +睪 +睭 +睺 +瞋 +瞞 +瞢 +瞫 +瞼 +瞽 +矇 +矍 +矚 +矧 +矯 +砢 +砩 +砫 +砮 +砯 +砲 +砳 +砵 +硃 +硇 +硏 +硐 +硓 +硜 +硤 +硨 +硭 +硯 +碕 +碡 +碪 +碭 +碸 +碻 +碽 +磔 +磘 +磙 +磜 +磡 +磪 +磯 +磱 +磲 +磵 +磻 +磾 +礄 +礎 +礐 +礑 +礒 +礙 +礠 +礦 +礪 +礫 +礬 +礮 +礱 +礽 +祂 +祆 +祇 +祋 +祏 +祓 +祕 +祧 +祹 +祿 +禃 +禇 +禍 +禎 +禑 +禓 +禔 +禕 +禘 +禛 +禟 +禠 +禤 +禨 +禩 +禰 +禱 +禵 +禼 +禿 +秈 +秠 +秳 +稅 +稈 +稉 +稑 +稘 +稙 +稜 +稟 +稱 +稲 +稺 +稾 +穀 +穈 +穉 +穌 +積 +穎 +穟 +穠 +穡 +穢 +穣 +穫 +窅 +窋 +窣 +窩 +窪 +窮 +窯 +窰 +窶 +窺 +竄 +竅 +竇 +竈 +竊 +竑 +竜 +竦 +竩 +竻 +笄 +笘 +笞 +笥 +笩 +笪 +笭 +笮 +笯 +笱 +笳 +笹 +筅 +筊 +筌 +筍 +筘 +筥 +筦 +筧 +筬 +筭 +筲 +筳 +筶 +筻 +箆 +箇 +箋 +箏 +箑 +箒 +箜 +範 +篊 +篋 +篌 +篔 +篠 +篤 +篥 +篩 +篭 +篯 +篳 +簀 +簃 +簉 +簍 +簑 +簕 +簗 +簞 +簠 +簫 +簷 +簹 +簺 +簽 +簾 +籀 +籌 +籐 +籙 +籛 +籜 +籝 +籟 +籠 +籥 +籪 +籬 +籮 +籲 +籾 +粄 +粍 +粦 +粩 +糀 +糌 +糎 +糞 +糢 +糧 +糬 +糰 +糴 +糶 +糸 +糹 +糺 +糾 +紂 +紆 +紇 +紈 +紉 +紐 +紑 +紓 +紕 +紘 +紙 +紛 +紜 +紝 +紞 +紮 +紱 +紲 +紳 +紵 +紺 +紿 +絃 +絆 +経 +絎 +絕 +絛 +絜 +絞 +絢 +絨 +絪 +絳 +絵 +絹 +絺 +綃 +綈 +綎 +綏 +綖 +継 +続 +綜 +綝 +綞 +綠 +綢 +綣 +綧 +綬 +綮 +綰 +綳 +綴 +綸 +綺 +綻 +綽 +綾 +綿 +緁 +緃 +緄 +緈 +緊 +緋 +総 +緒 +緘 +緜 +緝 +緞 +締 +緡 +緤 +編 +緩 +緬 +緱 +緲 +練 +縂 +縄 +縈 +縉 +縊 +縕 +縛 +縝 +縞 +縠 +縡 +縯 +縱 +縴 +縵 +縷 +縹 +縻 +績 +繃 +繆 +繇 +繒 +繕 +繖 +繙 +繚 +繞 +繩 +繫 +繭 +繰 +繳 +繻 +繼 +繽 +繾 +纁 +纈 +纍 +纏 +纓 +纔 +纕 +纖 +纘 +纜 +缶 +缽 +罃 +罅 +罈 +罉 +罌 +罍 +罟 +罨 +罰 +罳 +罵 +罶 +罷 +罽 +羂 +羆 +羈 +羋 +羕 +羗 +羣 +羥 +羨 +羱 +翀 +翂 +翃 +翕 +翙 +翜 +翬 +翮 +翹 +耎 +耔 +耨 +耬 +聃 +聒 +聟 +聰 +聱 +聳 +聴 +聶 +聽 +聾 +肅 +肏 +肜 +肫 +肸 +肹 +胂 +胅 +胇 +胊 +胙 +胝 +胼 +脅 +脇 +脈 +脛 +脣 +脩 +脫 +脬 +脭 +脳 +脷 +脹 +腧 +腫 +腳 +膂 +膣 +膥 +膩 +膮 +膽 +膾 +膿 +臉 +臍 +臏 +臚 +臞 +臟 +臠 +臯 +舂 +舉 +舎 +舘 +舢 +舥 +舨 +舩 +舲 +舺 +艅 +艉 +艋 +艎 +艏 +艔 +艙 +艚 +艱 +艸 +艽 +芑 +芛 +芨 +芴 +芻 +苅 +苤 +苧 +苳 +苺 +苻 +苾 +茀 +茇 +茈 +茘 +茚 +茛 +茝 +茮 +茲 +茷 +茺 +荅 +荇 +荊 +荎 +荖 +荳 +莕 +莖 +莙 +莛 +莢 +莧 +莩 +莿 +菈 +菉 +菍 +菑 +菔 +菝 +菥 +菫 +菰 +菴 +菶 +菸 +菹 +菺 +菼 +菾 +萇 +萐 +萠 +萡 +萣 +萩 +萵 +萹 +葃 +葊 +葎 +葙 +葜 +葝 +葦 +葯 +葰 +葶 +葷 +蒍 +蒎 +蒐 +蒓 +蒔 +蒗 +蒞 +蒢 +蒧 +蒨 +蒭 +蒯 +蒴 +蒹 +蒺 +蒼 +蒾 +蓀 +蓁 +蓂 +蓆 +蓍 +蓘 +蓚 +蓧 +蓨 +蓪 +蓭 +蓯 +蓳 +蓽 +蔆 +蔎 +蔔 +蔕 +蔘 +蔝 +蔞 +蔣 +蔥 +蔦 +蔭 +蔴 +蔵 +蕁 +蕅 +蕎 +蕑 +蕖 +蕘 +蕚 +蕡 +蕢 +蕩 +蕪 +蕭 +蕷 +蕺 +蕻 +薀 +薆 +薊 +薌 +薐 +薑 +薔 +薗 +薘 +薙 +薜 +薞 +薟 +薨 +薫 +薲 +薷 +薸 +薺 +薾 +薿 +藎 +藟 +藦 +藨 +藪 +藶 +藸 +藹 +藺 +蘂 +蘄 +蘅 +蘊 +蘋 +蘐 +蘓 +蘗 +蘘 +蘚 +蘞 +蘢 +蘧 +蘩 +蘵 +蘶 +蘿 +虉 +虓 +虖 +虛 +虜 +虧 +虨 +虯 +虵 +虺 +蚆 +蚋 +蚍 +蚖 +蚡 +蚢 +蚵 +蚺 +蚼 +蛄 +蛉 +蛍 +蛑 +蛞 +蛯 +蛸 +蛺 +蛻 +蜆 +蜉 +蜑 +蜞 +蜢 +蜣 +蜨 +蜮 +蜯 +蜾 +蝀 +蝍 +蝓 +蝕 +蝘 +蝚 +蝟 +蝣 +蝤 +蝦 +蝨 +蝮 +蝯 +蝰 +蝲 +蝸 +螄 +螅 +螋 +螐 +螔 +螞 +螠 +螢 +螣 +螥 +螫 +螭 +螶 +螻 +螽 +螾 +蟄 +蟅 +蟊 +蟌 +蟎 +蟜 +蟥 +蟪 +蟫 +蟬 +蟯 +蟲 +蟳 +蟴 +蟶 +蟻 +蠂 +蠃 +蠅 +蠆 +蠊 +蠋 +蠍 +蠐 +蠑 +蠘 +蠙 +蠟 +蠣 +蠱 +蠲 +蠵 +蠶 +蠷 +蠻 +衂 +衎 +衕 +衚 +衜 +衝 +衞 +衽 +袓 +袛 +袞 +袴 +袾 +裊 +裎 +裒 +裖 +裬 +裵 +裾 +裿 +褌 +褍 +褎 +褘 +褙 +褞 +褧 +褫 +褭 +褸 +褻 +襌 +襖 +襞 +襠 +襤 +襦 +襪 +襯 +襲 +襴 +襶 +襻 +襾 +覇 +覈 +規 +覓 +覚 +覡 +覦 +覧 +覬 +覲 +観 +覽 +覿 +觔 +觙 +觚 +觜 +觭 +觱 +觴 +觶 +觿 +訁 +訃 +訇 +訌 +討 +訏 +訐 +訒 +訔 +訕 +訖 +託 +訛 +訝 +訟 +訥 +訴 +訶 +註 +証 +詁 +詆 +詈 +詐 +詒 +詔 +評 +詛 +詞 +詠 +詡 +詣 +詥 +詧 +詫 +詭 +詮 +詰 +詳 +詵 +詼 +誄 +誅 +誇 +認 +誒 +誕 +誡 +誣 +誤 +誥 +誦 +誨 +說 +読 +誰 +誴 +誹 +誼 +誾 +談 +諍 +諏 +諒 +論 +諗 +諜 +諟 +諠 +諡 +諤 +諦 +諧 +諪 +諫 +諭 +諮 +諱 +諲 +諳 +諴 +諶 +諷 +諸 +諺 +諼 +謀 +謁 +謂 +謄 +謊 +謌 +謎 +謏 +謐 +謔 +謖 +謗 +謙 +謚 +謜 +謠 +謤 +謨 +謩 +謫 +謬 +謳 +謾 +譏 +譓 +譔 +譙 +譚 +譞 +譫 +譭 +譯 +議 +譲 +譳 +譴 +譽 +譿 +讃 +讌 +讎 +讓 +讖 +讙 +讚 +讜 +讞 +谿 +豈 +豎 +豔 +豢 +豨 +豬 +豳 +豸 +豿 +貐 +貒 +貓 +貘 +貞 +貤 +貧 +貪 +貫 +責 +貮 +貯 +貲 +貳 +貶 +貸 +貺 +貽 +賁 +賂 +賃 +賄 +賈 +賊 +賑 +賒 +賔 +賕 +賚 +賞 +賡 +賤 +賦 +賨 +賬 +賭 +賹 +賺 +賻 +賽 +賾 +贄 +贅 +贇 +贊 +贌 +贍 +贓 +贔 +贖 +贛 +赧 +赬 +趐 +趕 +趖 +趨 +趺 +趼 +跅 +跏 +跗 +跡 +跣 +跩 +踎 +踐 +踰 +踴 +蹕 +蹟 +蹠 +蹤 +蹯 +蹺 +蹻 +躂 +躄 +躉 +躋 +躍 +躑 +躒 +躔 +躝 +躪 +躰 +軀 +軋 +軔 +軛 +軟 +転 +軫 +軲 +軸 +軹 +軺 +軻 +軼 +軾 +較 +輄 +輅 +輋 +輒 +輓 +輔 +輛 +輞 +輟 +輥 +輦 +輩 +輬 +輭 +輶 +輻 +輾 +輿 +轀 +轂 +轄 +轅 +轆 +轍 +轎 +轘 +轝 +轟 +轤 +辭 +辮 +辯 +辵 +辺 +辻 +込 +迴 +迵 +迺 +逈 +逋 +逌 +逎 +逕 +逖 +逤 +逨 +逴 +遄 +遊 +違 +遘 +遙 +遜 +遞 +遯 +遲 +遶 +遷 +遹 +遺 +遼 +邁 +邇 +邉 +邊 +邙 +邠 +邲 +邽 +邾 +郃 +郄 +郇 +郋 +郞 +郟 +郤 +郪 +郳 +郷 +郿 +鄃 +鄆 +鄋 +鄑 +鄒 +鄔 +鄖 +鄗 +鄘 +鄚 +鄜 +鄠 +鄤 +鄧 +鄩 +鄫 +鄰 +鄲 +鄳 +鄴 +酃 +酆 +酈 +酎 +酏 +酔 +酢 +酩 +酴 +酺 +酼 +醁 +醂 +醃 +醅 +醞 +醢 +醣 +醮 +醯 +醾 +醿 +釁 +釆 +釋 +釐 +釒 +釓 +釔 +釕 +釗 +釘 +釙 +釚 +釤 +釦 +釧 +釩 +釪 +釭 +釴 +釵 +釷 +釹 +釺 +鈀 +鈁 +鈄 +鈇 +鈈 +鈉 +鈊 +鈍 +鈏 +鈐 +鈑 +鈔 +鈕 +鈖 +鈞 +鈢 +鈣 +鈥 +鈦 +鈫 +鈮 +鈰 +鈳 +鈷 +鈸 +鈹 +鈾 +鈿 +鉀 +鉆 +鉈 +鉉 +鉋 +鉍 +鉏 +鉑 +鉓 +鉗 +鉚 +鉛 +鉞 +鉟 +鉤 +鉦 +鉬 +鉭 +鉲 +鉶 +鉷 +鉸 +鉻 +鉾 +鉿 +銂 +銃 +銅 +銋 +銍 +銓 +銕 +銖 +銚 +銜 +銠 +銣 +銥 +銦 +銨 +銩 +銪 +銫 +銬 +銱 +銲 +銳 +銶 +銹 +銻 +銼 +銾 +鋁 +鋅 +鋆 +鋇 +鋌 +鋏 +鋐 +鋒 +鋕 +鋗 +鋙 +鋡 +鋤 +鋥 +鋦 +鋨 +鋮 +鋯 +鋰 +鋱 +鋳 +鋶 +鋸 +鋹 +錀 +錏 +錐 +錒 +錕 +錘 +錚 +錞 +錟 +錠 +錡 +錢 +錨 +錫 +錬 +錮 +錳 +錸 +錻 +鍀 +鍇 +鍈 +鍉 +鍍 +鍏 +鍔 +鍘 +鍛 +鍝 +鍟 +鍠 +鍥 +鍩 +鍬 +鍱 +鍳 +鍶 +鍷 +鍺 +鍼 +鍾 +鎂 +鎅 +鎊 +鎌 +鎓 +鎔 +鎗 +鎘 +鎚 +鎛 +鎣 +鎦 +鎧 +鎪 +鎬 +鎭 +鎰 +鎳 +鎵 +鏃 +鏇 +鏈 +鏊 +鏌 +鏐 +鏑 +鏓 +鏗 +鏘 +鏝 +鏞 +鏟 +鏤 +鏦 +鏳 +鏴 +鏵 +鏷 +鏻 +鏽 +鐃 +鐇 +鐈 +鐓 +鐔 +鐙 +鐠 +鐤 +鐦 +鐧 +鐫 +鐬 +鐭 +鐮 +鐲 +鐳 +鐸 +鐺 +鐽 +鐿 +鑀 +鑁 +鑂 +鑄 +鑅 +鑊 +鑌 +鑑 +鑛 +鑠 +鑣 +鑨 +鑪 +鑭 +鑰 +鑲 +鑴 +鑷 +鑼 +鑾 +鑿 +閂 +閆 +閉 +閎 +閒 +閔 +閘 +閜 +閞 +閦 +閨 +閬 +閭 +閰 +閱 +閶 +閹 +閻 +閼 +閾 +閿 +闆 +闇 +闈 +闊 +闋 +闌 +闍 +闐 +闓 +闔 +闕 +闖 +闘 +闞 +闡 +闢 +闥 +阭 +阯 +陁 +陔 +陘 +陜 +陝 +陞 +陬 +陸 +険 +隄 +隈 +隊 +階 +隕 +隣 +險 +隰 +隱 +隲 +隳 +隴 +隷 +隸 +隻 +雋 +雑 +雖 +雛 +雝 +雩 +雫 +雱 +霅 +霈 +霊 +霑 +霙 +霤 +霧 +霨 +霶 +霽 +靁 +靂 +靄 +靉 +靚 +靫 +靬 +靭 +靺 +靼 +鞆 +鞏 +鞞 +鞥 +鞦 +鞨 +鞮 +鞴 +韁 +韃 +韆 +韋 +韌 +韑 +韙 +韜 +韞 +韠 +韡 +韮 +韺 +韾 +頁 +頃 +項 +須 +頊 +頌 +頍 +頎 +頏 +預 +頑 +頒 +頗 +領 +頜 +頠 +頦 +頫 +頴 +頵 +頷 +頸 +頹 +頻 +頼 +顆 +額 +顎 +顒 +顓 +顕 +顗 +願 +顙 +顛 +顥 +顫 +顰 +顱 +顳 +顴 +颮 +颯 +颱 +颶 +颺 +颼 +飆 +飈 +飠 +飡 +飢 +飥 +飩 +飪 +飫 +飭 +飴 +飽 +餃 +餄 +餉 +餌 +餎 +餒 +餓 +餗 +餚 +餛 +餞 +餡 +餵 +餺 +餾 +餿 +饋 +饌 +饑 +饒 +饗 +饞 +饟 +饢 +馘 +馛 +馦 +馭 +馯 +馱 +馳 +馼 +駁 +駄 +駅 +駆 +駐 +駑 +駒 +駔 +駘 +駙 +駛 +駝 +駟 +駢 +駭 +駰 +駱 +騁 +騂 +騄 +騅 +騋 +騎 +騏 +験 +騖 +騙 +騤 +騨 +騫 +騭 +騮 +騶 +騾 +驁 +驃 +驄 +驅 +驌 +驍 +驎 +驒 +驕 +驚 +驛 +驟 +驢 +驤 +驥 +驩 +驪 +骯 +髀 +髎 +髏 +髑 +髒 +髡 +髭 +髲 +髷 +髹 +鬄 +鬅 +鬆 +鬍 +鬚 +鬢 +鬥 +鬧 +鬨 +鬩 +鬪 +鬬 +鬮 +鬯 +鬱 +鬹 +鬻 +魃 +魈 +魋 +魍 +魎 +魕 +魘 +魛 +魞 +魟 +魣 +魨 +魩 +魮 +魯 +魴 +魷 +鮀 +鮁 +鮃 +鮄 +鮊 +鮋 +鮍 +鮐 +鮑 +鮒 +鮓 +鮗 +鮜 +鮟 +鮠 +鮡 +鮣 +鮨 +鮪 +鮫 +鮭 +鮰 +鮸 +鮹 +鮻 +鯀 +鯁 +鯃 +鯇 +鯊 +鯏 +鯒 +鯓 +鯔 +鯕 +鯖 +鯗 +鯙 +鯛 +鯡 +鯢 +鯤 +鯧 +鯪 +鯭 +鯮 +鯰 +鯶 +鯷 +鯻 +鯿 +鰂 +鰃 +鰆 +鰈 +鰉 +鰍 +鰏 +鰒 +鰓 +鰕 +鰗 +鰛 +鰜 +鰟 +鰣 +鰤 +鰧 +鰨 +鰩 +鰭 +鰮 +鰱 +鰲 +鰳 +鰶 +鰷 +鰹 +鰺 +鰻 +鰼 +鰾 +鱀 +鱂 +鱅 +鱇 +鱈 +鱉 +鱊 +鱒 +鱓 +鱔 +鱖 +鱗 +鱘 +鱚 +鱝 +鱟 +鱠 +鱣 +鱥 +鱧 +鱨 +鱮 +鱰 +鱲 +鱵 +鱷 +鱺 +鳧 +鳩 +鳰 +鳴 +鳶 +鳽 +鴆 +鴇 +鴉 +鴒 +鴓 +鴕 +鴗 +鴛 +鴝 +鴞 +鴟 +鴡 +鴣 +鴦 +鴫 +鴯 +鴰 +鴴 +鵂 +鵄 +鵎 +鵐 +鵑 +鵒 +鵓 +鵙 +鵜 +鵝 +鵞 +鵟 +鵠 +鵡 +鵪 +鵯 +鵰 +鵲 +鵵 +鵼 +鵾 +鶆 +鶇 +鶉 +鶒 +鶓 +鶘 +鶚 +鶡 +鶥 +鶩 +鶬 +鶯 +鶲 +鶹 +鶺 +鶻 +鶼 +鶿 +鷂 +鷉 +鷎 +鷓 +鷙 +鷚 +鷟 +鷥 +鷦 +鷫 +鷯 +鷲 +鷳 +鷸 +鸊 +鸌 +鸐 +鸑 +鸕 +鸘 +鸚 +鸛 +鸜 +鸝 +鸞 +鹮 +鹵 +鹹 +鹼 +麅 +麇 +麈 +麊 +麐 +麞 +麩 +麪 +麴 +麹 +麼 +麿 +黁 +黇 +黌 +黐 +黙 +黥 +黧 +黨 +黴 +黶 +黻 +黼 +黽 +黿 +鼂 +鼇 +鼈 +鼉 +鼐 +鼒 +鼕 +鼢 +鼩 +鼯 +鼱 +鼴 +鼷 +齒 +齕 +齡 +齣 +齦 +齧 +齲 +齶 +龎 +龐 +龑 +龔 +龕 +龜 +龝 +龠 +ず +梌 +叀 +晢 +媸 +錾 +鐖 +䰡 +櫬 +锱 +υ +鼗 +媪 +澴 +苈 +眴 +𝜏 +缱 +𝜶 +조 +晡 +≡ +ࠀ +н +廇 +嗛 +篚 +ώ +莰 +윤 +纚 +𢢞 +闼 +熌 +饎 +蓊 +倅 +년 +聭 +耩 +≅ + +≺ +诌 + + + +耰 +菗 +僦 +⇣ +甊 +冓 +缷 +枊 +沕 +𝐴 +❹ +형 +秾 + +щ +厹 + +˗ +疔 +䩦 +髴 +⨂ +莏 +≧ +垆 +銌 +桤 +隤 +ギ +벽 +⑸ +✘ +̣ +辶 +铼 +게 +へ +獶 +藳 +祍 +黉 +跱 +⽬ +埙 +だ +蓣 +亯 +구 + +鹎 + +⾃ +楩 +⌘ +汏 +虒 +谖 + +﹜ +劖 +じ +瑇 +㮑 +揕 +⇔ +𤔲 +薉 +𝑾 +硗 +〈 +は +盍 +狽 +ж +я +挆 +槨 +γ +阏 +襕 +𝜉 +❖ +└ +총 +시 + +ν +刲 +ด +嬲 +绤 +𝐰 +飦 +扱 +帻 +辀 +廴 +к +蔖 +– +같 +熭 +巣 + +裛 +𝑶 +蓺 +蔊 +그 +匳 +玚 +Ц +璲 +련 +𨒅 +변 +㤵 +饫 +𨚵 +X +筇 +镡 +ⅳ +𝛿 +轸 +𝑭 +鋈 +鵩 +縁 +˙ +ɿ +𝒴 +㝮 +𝜂 +栠 +橦 +緇 +肰 + +跼 +䭜 +蜅 +訸 +㻶 +𝑉 +เ + +嚢 +鼔 +𝒆 +閫 +阃 +𥞹 +杪 +誊 + +鲋 +骍 +τ +莾 +凊 +﹡ +箚 +蛱 +樯 +喾 +幞 +欕 +搡 +戉 +瘖 +᙭ +砟 +ས +∤ +ี +メ +𝝁 +穑 +渶 +𦬁 +서 +⊗ +穇 +⌊ +を +鐻 +蘤 +≫ +◐ +汙 +蒒 +⑷ +蹨 +x +裥 +嶤 +ァ +従 +침 +稂 +𪧶 +で +𝑹 +⑫ +闩 +槫 +舮 +𝑿 +戁 +간 +戯 + +ོ +æ +わ +チ +砉 +Ψ +劂 +・ +В +鬭 +钔 +盭 +黓 +⎯ +𝐏 +함 +钪 +𝑸 +澰 +래 +藒 +龃 +瞀 +伧 +♂ +¹ +ƞ +澼 +餍 +倶 +ð + +嚱 +跬 +貙 +磿 +娬 +氿 +鹘 +𝐁 +摅 +ヱ +傰 +พ +湝 +ˆ +Л +翾 +≃ +에 +滫 +С +嫕 +あ +㈣ +ⅇ +垧 +⺮ +∠ +躐 +硌 +眢 +乧 +𝑐 +泃 +轫 +↔ +㎝ +≜ +⽇ +撟 +⟹ +脿 + +㸁 +靯 +う +⁠ +懬 +搷 +瀓 +ˁ +ⅲ +훈 + +お +𝛄 +瓅 +葻 +猋 +ら +⾳ +喣 +⽿ +č +鈎 +⑤ +å +阸 +름 +て +圮 +⚫ +⻄ +胨 +琠 +戄 +箄 +𝒳 +鼍 +й +⼲ +廪 +睃 +囫 +͞ +죄 + +호 +み +饩 + +⊆ +х +欚 +瘚 +≯ +瞗 +ž +嗵 +근 +ま + +⾔ +罥 +ʹ +鼃 +д +✳ +ゃ +悊 +𝐅 +영 +@ +ɣ +𝛷 +𝜁 +ǜ +犄 +⽂ +ཆ +胒 +﹦ +谫 +є +・ +𝐻 +狺 +백 +舳 +𝑁 +ษ + +𝜓 +𝒦 +盕 +유 +𪯐 +茑 +礤 +거 +コ +肂 +鸻 +ã +⑬ +铚 +걸 +磳 +綷 +𝒚 +舭 +腚 +㈩ +榱 +𝐌 +畾 +馐 +罾 +∕ +𝔛 +𝑬 +ç +楬 +櫽 +顼 +阋 + +꺼 +諛 +̌ +้ +㮀 +乵 +沬 +⼀ +ư +鲠 +䜩 +樉 +鹈 +搧 +轾 +䟒 +등 +𝝉 +잠 +짤 +า +蘨 +愪 +ྟ +慪 +鮝 +𝛑 +び +𥞪 +𝐾 +レ +교 +ྲ +달 +𝐩 + +殹 +踇 +狥 +ベ +미 +매 +⑭ +钁 +Θ +못 +𝜇 +侂 +ę +ฟ +邶 +諣 +颃 +𡢕 +昑 +𝒖 +讱 +﹤ +緵 +骢 +朢 +骘 +ℜ + +ゞ +愬 +鹬 + +ッ +ར +급 +‚ +鸶 +蒫 +餽 +蓃 +ข +辠 +ğ +氺 +暆 +笿 +迚 +甝 +ή +徼 +旣 +ϖ +ヲ +倕 +匽 +蓱 +리 +剷 +ู +逪 + +나 +堋 +焠 +Δ +炑 +爫 +蒖 +𝒓 +悫 +𝛱 + +𝐮 +騧 +ⅴ +饾 +贠 +𝚲 +崀 +磀 +柤 +肈 +⻮ +鶄 +狲 +跫 +지 +鳇 +痖 +跂 +秫 +ʒ +합 +ไ +迨 +𝜐 + +屦 +𝐶 +; +辎 +∵ +鴁 +撏 +ς +⟶ +薮 +㟪 +犮 +ب +ビ +藡 +甏 + +眡 +訿 +鉥 +媵 + +柫 +𝒞 +ь +萏 +ค +트 +訮 +汚 +眚 +〞 +き +ほ +刖 +髄 +蘀 +や +ة +诹 +т +ན +𝒃 +掼 +䓁 +僥 +팰 +枵 +✔ +³ +ེ +鼖 +屖 +鍮 +砇 +カ +舐 +牴 +𝜎 +㡿 +攉 +⽤ +晅 +労 +蛕 +𝐽 +Ʃ +く +穽 +孥 +𝒏 + + +ɬ +玦 +檮 +ョ +∥ +중 +萯 +呲 +䰈 +새 + +釶 + +ɢ +⊂ +臮 + +梼 +デ +骖 +ス +蹩 +羼 +▽ +Π +≪ +匛 +𝐼 +稊 +่ +茠 +䢉 +秝 +茐 +齎 +そ + +芕 +噚 +癉 +蹱 +蓜 +𝐬 +ϑ +е +瀋 +ϕ +χ +镟 +霂 +隒 +▱ +ヶ +撄 +둔 +¢ +こ +跲 +莻 +𝑠 +輮 +็ +堠 +푟 +赕 +◦ +ا +런 +帒 +汘 +̱ +尥 +蘠 +𦟜 +옥 +腠 +夨 +⩾ +𝑝 +歯 +刱 +여 +け +溘 +釰 +肍 +擗 +矱 +鍌 +芧 +술 +발 +鼫 +舾 +⼯ +𝝓 +ƒ +怸 +པ +𣐼 +疎 +铷 +Η +⑺ +蒏 +림 +⃛ +゜ +褴 +𨒪 +れ +揢 +さ +櫫 +櫑 +䋎 +灋 +櫜 +诓 +❶ +𝐃 +Q +袳 +ℒ +菂 + +荙 +ℛ +⁄ +堙 +贋 +̅ +鳏 +̂ +、 +茍 +泜 +𝑈 +즉 +噔 + + +迓 +Ⅸ +❷ +이 +_ +⾊ +Ö +铥 +耹 +䶮 + +무 + +セ +饳 +อ +篾 +통 +‒ +ย +덕 +말 +艨 +Ω +𝐨 +螓 +澐 +巠 +⋅ +钶 +도 +鸱 +齍 +恑 +褛 +剟 +준 +勶 +𠟠 +ß +箅 +𝑆 +悃 +蘥 + +Ξ +𝑘 +妣 +𝑖 +𝐑 +纡 +釿 +⺌ +ヴ +𝕀 +涻 +箙 +塚 + +⼠ +墈 +∷ +疴 +ク +ㄕ +𝒂 +蒪 +蓡 + +鷇 +瘏 +𣹳 +橰 +嵚 +帀 +주 +ド +盓 +爇 +φ +觋 +𝜑 +钍 +화 +표 +Ɛ +篰 +명 +週 +с +蓛 +裢 +穜 +㱃 +玊 +鲕 +蒕 +箪 +⑯ +苽 +矦 +偰 +盝 +佊 +僨 +駉 +𝑳 +머 +ª +絅 + +锒 + +苆 +ั +𝛻 +碹 +咺 +竝 +и +づ +강 +辁 +́ +铽 +纩 +齑 +𝝎 +어 +ユ +躡 +𝒄 +ซ +畛 +鸰 +ླ + +骉 +❸ +揲 +廃 +湋 +𝑲 + + +旤 + +蹷 +钌 +국 +豙 +鬳 + +ɛ +轳 +俜 +眄 +萮 +𝐡 +颵 +箓 +魑 +𝑅 +漍 +ℤ + +逡 +학 +浖 +ょ +¬ +怴 +𝛤 +怿 +祌 +纥 +𝒑 +⃑ +棅 +笵 +낭 +栦 +⑰ +บ +𝔽 +𝑇 +埝 +⽓ +孱 +埶 +匜 +鸼 + + +벌 +ル +锸 +斫 +妟 +뽀 +昬 +댁 +ʂ +暯 +夳 +ノ +堞 +懘 +榼 +鞫 +오 +𝑡 +偑 +戗 +∴ +伥 +끝 +𬌗 +稯 +岜 +Ε +犲 +𩓞 +연 +鹚 + +ག +诜 +嗍 +倥 +鳣 +庑 +屾 +雚 + +椄 +颏 +酤 +𝒋 +欛 + +း +려 +缋 +¾ +ゴ +籑 +笤 +鞛 +鏺 +蓒 +설 +緍 +⑩ +迀 +鼋 +ɮ +위 +锪 +∨ +滆 +€ +躅 +鋓 +柀 +䐶 +啎 +𝛵 +骃 +ć +갈 +卨 +い +𝑺 +鸲 +壻 +偯 +𝑞 +譖 +곤 +溍 + +噫 +순 + +𝑽 +ы +赑 +蓸 +鸮 +稃 +っ +詗 +으 +⨀ +屮 +俦 +伛 +畱 +늬 +𝑂 +朼 +沰 +겨 +з +骀 +鸩 +𝜈 +º +苊 +诎 +皤 + +하 +̀ +砑 +凷 +翄 +𝑛 +赪 +≮ +浗 +𝐍 +û +オ +ƹ +𝜅 +묘 +曛 +鳊 +𝛩 +癹 +磒 +ば +⑨ +礆 + +乼 + +∽ +褱 +藴 +縶 +觥 +に +식 +凫 + +佥 +槷 +阍 +䰍 +졸 +전 +葢 +㝸 +も +⻔ +遽 + +蹰 +𝛺 +裏 +། +를 + +ろ +짭 + +ぐ +싶 +渰 +⊤ +浳 +൯ +∃ +옛 +蟞 +과 +芠 +飖 +⼆ +敶 +粝 +𥃩 +坿 +䩉 +𝑯 +「 +矰 + +사 +𝛶 +𝑎 +挐 +푎 +동 +ℝ +Γ +︃ +珒 +鹍 +κ +鑓 +傁 +惓 +臿 +丣 +悒 +侔 +ñ +訳 +櫭 +賛 +觏 +辂 +覅 +濓 +堿 +擪 +฀ +𝑵 +扨 +嫫 +珰 + +寃 +𝒔 +曱 +髣 +인 +≌ +莵 +踳 +ⅱ +Ø +⌋ +¯ +挢 +̇ +﹪ +哕 +𦫳 + +襛 +昳 +铙 +铫 +軱 +汔 +ネ + +躩 +옷 +ถ +엄 +皊 +臑 +𧄝 +𝑃 + +䢅 +𝐝 +𝒍 +ℱ +𝐓 +蓾 +𝑻 +䋁 +裼 +개 +ത +𝒊 +僪 +瞂 +𦞠 +요 + ̄ +荍 +𝜔 +ф +峣 +庋 +檏 +袢 +绬 +Σ +향 +钫 +え +枅 +≝ +荦 +들 +勍 +ö +𝒕 +툰 +遬 +𝐵 +擧 +咢 +钘 + +𝒢 +Ⅷ +➢ +讧 +ω +簟 +廐 +刳 +阘 +б +⊘ +髟 +臓 +루 +⎧ +诳 +у +诮 +蠪 +梹 +耤 +パ +ن +∆ + +𝑫 +น +べ +坼 + +𝑤 +褽 +憼 +심 +∇ +迖 +휆 +叚 +없 +⼿ +钖 +斠 +䪵 +胠 +𝜋 +殽 +剜 +⾝ +− +慸 +𝛽 +椔 +⟩ +皦 +筚 +奰 +Å +물 +𝒐 +嫱 +钆 +ï +∪ +⇢ +ş + +㖞 +璗 +葸 +殢 +𝜺 +夲 +骒 +ち +회 +선 +睒 +轡 +ξ + +鲧 +镞 +碜 +놈 +Å +紴 + +⇤ +ྷ +⑪ +喟 +𦼰 + +蔩 +埦 +𝜆 +耋 +˜ +한 +舣 +馓 +⑻ + +ɐ +椘 +し +莐 +辔 +憰 +碛 +⁃ +飏 +颀 +跽 +⇥ +赀 +撺 +襜 +ɒ +袧 +л +정 +꾸 +콩 + +박 +缑 +柈 + +樲 +𝑮 +詘 +µ +𝑷 +鹪 +𝛼 +차 +讬 +掯 +硎 +𝑨 +舄 +‹ +누 +バ +ก +萀 +兇 +숙 +貍 + +踈 +친 +𝜽 +摰 +甿 +坜 +遑 +삼 +배 +Μ +을 +譊 +沩 +빈 +飑 +钹 +镨 +鐉 +宭 +桗 +ə +歺 +А +⇒ +锞 +𝒪 +棊 +愓 +莶 +琲 +འ +プ +་ +𝐿 +艟 +欬 +ิ +в +ų +纻 +㎎ +婄 +Ρ +歟 +椢 +粜 +종 +خ +ね +剞 +베 +斄 +幠 +ト +疛 +よ +╳ +醳 +군 +諂 +芰 +穋 +禆 +길 +秊 +噙 +y +锓 +⁵ + +拠 +Ĥ +𝑒 +窬 +抈 +︰ +퐶 +铳 +각 +ش +錉 +ù +臝 +闶 +𝒟 +芐 +韎 +권 +萚 + +ど +羮 +镕 +欔 +瘐 +받 +𝑚 +𢦟 +髤 +腙 +⽽ +상 +铘 +장 +𤇈 +ニ +凂 +ȷ +육 +а +살 +雠 +荑 +태 +穤 +ɯ + +圬 +楑 +단 +ง +⾯ +λ +⁰ +성 +萿 +缌 + +毣 +矅 + +푚 +˘ +貣 +∂ +은 +ė +䌛 +경 +せ + +拫 +⅞ +餕 +鐨 +翭 +ื +ɵ +⺍ +Փ +▬ +ว +희 +짐 +屙 +洫 +ေ +∏ +臜 + +剸 +芓 +운 +∓ +계 +祔 +鶵 +𝝅 +柂 +訢 +禊 +扽 +恫 +𝜙 +狢 +勠 +ི +𝜒 +จ +铯 +ྱ +𝑙 +蟇 +울 +莤 +牱 +𝒗 +詇 +靃 +殓 +栍 +踟 +ي + +鲄 +㓷 +贳 +ナ +鲓 +𝒙 +薁 +Χ +侪 +恌 +㰤 +목 +̄ +丱 +― +𝛔 +𝑔 + +鸷 +﹣ +籢 +脢 +δ +窭 +‐ +阒 +석 +아 +ォ +두 +𝐦 +浬 +搰 +褃 + +ལ +乇 +腘 +眊 +偬 +Ⅻ +ℳ +畤 +芟 +曈 +飧 +堌 +═ +谶 +櫝 +嬑 +冋 +嗌 +抜 + +腜 +공 +𝜕 +ん +鲭 +郐 +酓 +𝑍 +⾏ +⼹ +㐬 +고 +𝟑 +缯 +碤 +濩 +ʰ +佻 +Υ +∗ +賅 +집 +跹 + +ɾ +蔧 +다 +栫 +庰 +欤 +洿 +捾 +𝜍 +𝑄 + +攆 +夂 +檿 +荜 +ц +柖 +唅 +ท +ɦ +讦 +습 +锿 + +纆 +檑 +殰 +歠 +鼑 +Ä +و +☑ +緦 +悁 +偞 +ส +絭 +저 +踯 +騀 +쉰 +蒷 +揗 +儵 +ρ +薅 +ャ +‗ +犒 +旟 + +승 +ང +소 +𝛴 +瀜 +锜 +𣱼 +谳 + +軑 +ポ +楁 +𝑜 +袚 +ྐ +Á +𝑑 +鲀 +牾 +鬌 +푥 +¤ +呴 +‑ +✓ +민 +⼦ +ⅰ +⽉ +擿 +ч +➝ +가 +≳ +漥 +踖 +枧 +莝 +⻘ +傧 +𝑢 +ю +赍 +q +絫 +о +ア +ℐ +髫 +齢 +湎 +甓 +揿 + +ℋ +怹 +자 +⑦ +져 +椟 +鶟 +浕 +ー +𝛂 +偾 +⃗ +喑 +鹡 +≦ +磽 +ⅆ + +葂 +鶱 +ン +貇 +褡 +▴ +것 +喿 +つ +闚 + +盳 +𝟒 +雔 +洭 +殫 +楎 +£ +^ +葲 +𝟖 +眗 +棸 +潏 +熕 +𝟐 +품 +သ +樳 +⁴ +イ +㈢ +렴 +脰 +돈 +⑮ +钲 +𝒘 +訽 +爟 +幨 +枻 +亷 +猃 +σ +黩 +嘑 +榹 +⁡ +鍧 +𝑋 +枘 +𝑥 +원 +睚 +饔 +酲 + +顸 +람 +𝐫 +脁 +½ +긴 +ʔ +Ⅰ +旆 +죽 + +궐 + +奡 +㭃 +杝 +忾 +ม +掮 +饍 +摛 +쓰 +慊 +踣 +푅 +悽 +礅 +毄 +俓 +데 +冣 +만 +驖 +𤉣 +̃ +廾 +匵 +阇 +𤸫 +戣 +刌 +剕 +杅 +ο +蒥 +ː +癃 +蒬 +â +À +嗥 +우 +ケ +」 +聩 +ë +吽 +檌 +苰 +⑹ +Ÿ + +⑥ +노 + +˄ +鸫 +廛 +㱿 +鹛 +赟 +℅ +菿 +辳 +阼 +𝒇 +哋 +♀ +氕 +砤 +† +舡 +偝 +飜 +넓 +鈜 +ầ +닝 +禚 +匲 +〉 +Ф +锊 +ϵ +∙ +惛 +箧 +𝑦 +宬 +𝑀 +臙 +𩡶 + +¡ +潀 +수 +敃 +か +أ + +苌 +饘 +咝 +𝑼 +∘ +涷 +樍 +厣 +蝼 +墻 +Ñ +秅 +︒ +∅ +↵ +葹 +ỽ +𝑗 +た +일 +蒊 +치 +竢 +¨ +佢 +潵 +櫼 +軵 +𧕿 +倨 +歱 +瘅 +𝐭 +黾 +脼 +ê +땋 +鶷 +ё +鹯 +掲 +\ +𨳡 + +Г +ι +탁 +溞 +殪 +菭 +𝛥 +擛 +録 +㥥 +∀ +锇 +锃 +편 +餬 +瘻 +ཟ +豤 +로 +ɸ +ℎ +랑 +ʃ +鼹 +臬 +ŋ +巵 +譁 +w +窳 +蓔 +䉜 +浧 +酂 +⒀ +མ +椠 +槖 + +衄 +𨐨 +̿ +ご +⺗ +顇 +𝒫 +搕 +ミ +𪪋 +« +䣛 +鹩 +鴈 +п +는 +䋰 +𝛹 +犕 +呌 +𝒮 +𝑪 +鋎 +嚻 +杚 +䕊 +蠜 +ザ +𝐂 +☐ +𥘔 + +赜 +Ν +廦 +瓾 +↦ +龉 +⽅ +棂 +𝜌 +큰 +踔 +ラ +。 +剤 +황 +⅜ +僈 +骈 +ɕ +О +м +椑 +𝑟 +纇 +𝑓 +𝐖 +Ш +⎦ +旹 +삶 +ึ +囵 + +す +ⅈ +ت +踽 +陴 +餱 +ป +막 +紟 +방 +剀 +簖 +闬 +キ +鋉 +납 +タ +谵 +詑 +족 +垔 +荋 +旰 +𥘸 +窾 + +신 +𝐎 +𝛃 + + +﹒ +縰 +猲 +郘 +파 +⊕ +镘 +𠊃 + +呔 +𝜗 +ʊ +𝛬 +辏 +陭 +𝑕 +庴 +ʐ +瀌 +倄 +蕞 +ل +絷 +러 +든 +བ +柅 +› +傚 +睠 +Ⅺ +饐 +蔮 +ɟ +莈 +𤨨 +⋯ +犭 +𝜃 +𥹳 +초 +⎞ +遌 +眇 +蓗 +綅 +토 +裰 + +⼼ +虘 +𝑌 +觯 +漶 +钤 +讒 +げ +螬 +鲹 +咷 +蓞 +僂 +𝒉 +が +桮 +포 +쟈 +柽 +ウ +綟 +缟 +䁾 +钭 +烕 +厠 + +孭 +礉 +­ +谲 + +⼤ +𝒒 +旒 +㫄 + + +鳓 +挈 +재 +ད +𧊒 +蝝 +𝐺 +懱 +芢 + +ⅼ +Ú +𝑱 +翯 +芶 +厽 +遉 +鲒 +η +𝛾 +趮 +虆 +汸 +嬖 +糈 +窸 + +추 +棬 +懔 +硁 +ぶ +抟 +胕 +𝑧 +⌦ +碫 +Ⅵ +속 +𝐚 + +Ç +행 +Ɵ +⑱ +贽 +箤 +р +敒 +衤 +풍 +⊛ +慉 +ψ +© +광 +ℕ +屣 +臌 +旵 +臁 +‡ +癎 +閡 +𡵂 +襐 +畟 + +萪 +娒 +瘼 +庳 +천 +觌 +Α +と +奁 +煃 +؟ +◯ +의 +攎 + +𝐞 +J +𢦒 +❑ +벤 +𝐒 +リ +蒉 +𝐱 +朹 +㈤ +„ +䗬 +Ι +ཀ +𡜵 +俣 +疬 + +墥 +吣 +У +榀 +絟 + +旓 +𝐛 +𝜷 +瑮 +≔ +笾 +ζ +김 +暵 +𝜹 +逶 +萙 +欇 +俧 +籴 +絰 +揶 +ǔ +宂 +伩 +Ө +菞 +梕 +エ +蹚 +제 +Š +沝 + +𦳢 +𝒱 +揠 +ℏ +𝐹 +箝 +규 +氒 +⼊ +鰌 +筮 +⼩ +대 +𝔾 +䄃 +𝐸 +﹕ +부 + +刓 +ȵ +缛 +기 +缊 +𝟎 +𨟻 +め +捃 +⽚ +鍪 +灆 +迻 +⾦ +荗 +v +𡒊 +汍 +斲 +姕 + +儴 +偒 +辤 +芀 +蝥 +ń +臥 +椀 +㪚 +š +담 +ø +䈰 +睏 +テ +﹐ + +참 +楘 +𝒌 +劓 +ɪ +醑 +绹 +諓 +𝛉 +ズ +怼 +埘 +臽 +잡 +镢 +𝜖 +진 +踬 +谠 +﹥ +髺 +腞 +현 +嘭 +ʑ +蓌 +〜 +锠 +蓶 +る + +∼ +枎 +緗 +薠 +芈 +耪 +𝒎 +謼 + +瘳 +诨 +忤 +œ +⇡ + +鲣 +ⅵ +Τ +㯮 + +㶲 +ⅹ +䙴 +坴 +馑 +缹 +椦 +ô +⼈ +フ +誏 +э +哙 +愎 + +埽 +祲 +마 +殍 +菋 +懑 + +辇 +鍤 +𝜀 +ɜ +り +𝐷 +㕞 +瑵 + +蔨 +Ⅶ +镴 +ภ +𝝀 +𢶡 +⃝ +ơ +柢 +𧴗 +ʁ +攙 + +な +𝑏 +挴 +餧 +絇 +怄 +曏 +洟 +軷 +∉ +咍 +⎪ +樛 +𝑣 +웃 +椊 +黢 +𝑩 +誩 +伓 +戠 +橥 +⟨ +豰 +懥 +涖 +綘 +詬 +ွ +˚ +刽 +ɑ +격 +稖 +𝟏 +禝 +墦 +𝑊 + +択 +檙 +∝ +颟 +诂 +𝐧 +踲 +𝜛 +𝑰 + +鲬 +⁸ +ǎ +문 + +紬 +楲 +䊭 +枨 +膑 +õ +던 +Ⅴ +秏 +馔 +拊 +缗 +隠 +牀 +淲 +鬰 +綵 +鶑 +詎 +慙 +劒 +閲 +鎡 +淒 +屨 +鉢 +扃 +鳬 +閤 +馿 +翛 +駸 +蛩 +驂 +嵲 +覩 +牋 +湲 +蓴 +賸 +遡 +翫 +嫋 +惻 +妬 +罇 +龏 +鵷 +閙 +鎸 +朅 +巉 +僞 +洶 +磧 +筯 +慇 +鷁 +斾 +斸 +酹 +幘 +羶 +閽 +薤 +泝 +覯 +怱 +缾 +氳 +躊 +檝 +擣 +虀 +誚 +甃 +艤 +櫳 +醖 +壚 +涴 +崦 +秪 +潄 +濆 +駡 +坰 +闉 +縑 +躕 +颻 +燠 +輙 +鏁 +嶮 +薖 +輈 +綆 +覷 +蹔 +攄 +鐩 +鑱 +羃 +轓 +麤 +驀 +欵 +亙 +朮 +邐 +箠 +怳 +鋩 +鷃 +篘 +蔌 +諄 +旐 +慍 +欷 +頽 +蜺 +覊 +禋 +秔 +蜩 +嚬 +櫺 +軿 +痾 +笻 +猱 +毳 +泬 +竚 +齪 +搘 +欻 +釂 +嚥 +誑 +籩 +韉 +幙 +舠 +飣 +颭 +颸 +歔 +屧 +巇 +艫 +壖 +牓 +薝 +銛 +皪 +枿 +剗 +歘 +鸂 +邅 +衒 +荄 +鴂 +嫗 +顦 +瀼 +繄 +搆 +狖 +貰 +醆 +秖 +蹀 +頳 +纒 +憇 +溽 +澦 +讐 +灩 +箎 +螿 +鰥 +飀 +澒 +矻 +枌 +擡 +鷖 +齬 +纊 +挼 +齟 +錙 +屩 +蠧 +氅 +漭 +躚 +翺 +瘵 +螘 +鵶 +㶁 +斵 +饜 +岏 +䍦 +哢 +彴 +豗 +靨 +鋋 +禳 +覘 +鞚 +擻 +涘 +溷 +沴 +嶓 +褊 +罏 +齏 +醲 +繅 +舴 +釃 +厖 +闤 +閴 +藂 +譍 +糲 +籞 +躞 +餳 +遰 +倐 +嘖 +鷀 +暍 +韝 +蘺 +齁 +醽 +醨 +憀 +䕷 +跕 +拶 +垤 +鸎 +漙 +躭 +傴 +蕕 +嘒 +晻 +糵 +閈 +嫠 +斁 +鶗 +詶 +囘 +羇 +橛 +鞬 +磈 +粃 +阨 +塿 +敧 +氊 +芼 +襆 +迍 +鬛 +憒 +釅 +蓐 +奬 +頲 +髠 +抔 +葅 +槧 +跧 +揜 +渇 +餔 +罝 +裯 +蹁 +椶 +幰 +逰 +饁 +棃 +噀 +轔 +囁 +惸 +騑 +呪 +鬐 +綌 +醤 +䆉 +艣 +鐶 +夐 +摐 +鸇 +睎 +羝 +紼 +鞿 +噉 +磑 +闗 +筩 +駮 +蹌 +縢 +闠 +鬙 +谹 +榾 +觳 +皁 +晼 +啗 +簣 +騕 +蹣 +麰 +觧 +怊 +朞 +鱍 +蟣 +畚 +蠁 +舁 +瞇 +劚 +鰐 +籯 +鬖 +柮 +飱 +帟 +酇 +崿 +霪 +緌 +踆 +欃 +縟 +搦 +琖 +搥 +倀 +觫 +遝 +嚅 +聵 +藋 +筴 +喁 +窻 +穏 +牥 +鎩 +礲 +膴 +鞵 +醵 +斚 +縲 +裀 +齷 +騃 +袠 +谺 +靦 +帬 +鯈 +曀 +灔 +崷 +趂 +徯 +闃 +洧 +獪 +稏 +煢 +歈 +呶 +壈 +襃 +旴 +檟 +簦 +谽 +箵 +謡 +慝 +餖 +稌 +朣 +禖 +嚀 +嵂 +黷 +甖 +洑 +釡 +蕝 +甆 +翣 +篸 +隮 +滃 +裌 +蔀 +籖 +秬 +鷴 +啅 +慼 +捄 +咮 +睟 +譎 +嘷 +駃 +檥 +蹐 +窊 +駞 +雘 +趯 +篲 +讋 +睍 +毰 +憗 +鳷 +嚲 +圞 +歃 +緼 +賫 +籋 +繐 +麏 +灕 +礧 +歜 +飇 +鵁 +疢 +賖 +窆 +螮 +毹 +硉 +耡 +甔 +篛 +娭 +髩 +燋 +輜 +籧 +撝 +嬙 +徃 +驦 +𡏖 +麕 +馹 +覔 +鶠 +褷 +綍 +螗 +嗈 +彯 +篨 +炰 +鄮 +噞 +尅 +鷰 +鋭 +饉 +霢 +䔩 +坱 +裓 +帨 +忺 +豅 +栱 +謦 +傯 +誵 +骭 +潩 +鬒 +嵫 +悮 +扊 +扅 +轢 +惝 +臈 +舃 +鞾 +譟 +袵 +眎 +簏 +埸 +堧 +憸 +雰 +腷 +嵓 +隩 +趄 +墐 +褦 +艑 +狴 +玿 +竪 +恧 +姱 +抆 +恓 +霣 +躓 +鞲 +晬 +簴 +唼 +曵 +褕 +罣 +縐 +衘 +巃 +攲 +輀 +貎 +哳 +醭 +鋣 +僛 +迕 +蠭 +膓 +欝 +洊 +敺 +纎 +栟 +鞓 +蛬 +灺 +痏 +恡 +踸 +霔 +濵 +昻 +鉘 +楖 +竛 +竮 +窱 +幬 +慤 +儗 +黮 +嘐 +睆 +頇 +麑 +壼 +㦸 +顋 +瘥 +苖 +韈 +盻 +袷 +矼 +塼 +鐍 +傞 +苶 +吷 +噇 +鶖 +僣 +髧 +䅉 +鯫 +襏 +縳 +蠨 +痡 +髽 +剉 +蝱 +鄽 +匼 +嚚 +襫 +缿 +鵊 +燖 +忸 +摝 +攅 +牷 +氎 +騣 +颿 +虡 +腯 +漘 +矓 +祫 +顢 +綀 +弮 +柙 +蔾 +胾 +筤 +馽 +砆 +冩 +賙 +唶 +麛 +簜 +蹏 +屼 +鞶 +煑 +踠 +愀 +蠒 +頬 +韲 +戞 +畆 +笊 +搨 +捽 +絙 +覉 +澨 +趫 +矹 +穮 +愠 +劘 +轣 +卭 +鼪 +杕 +轗 +擐 +蚿 +恚 +檛 +𩕳 +靆 +轕 +餼 +頮 +槹 +蔉 +皜 +扄 +鮆 +轑 +蹡 +嵽 +甎 +蟈 +橅 +笴 +膰 +蕣 +澘 +髿 +樕 +褵 +蜋 +窼 +櫧 +雊 +胷 +嘵 +麄 +裋 +繢 +啐 +臛 +簁 +巓 +羜 +攧 +簮 +壊 +齩 +晹 +臲 +鬵 +齠 +媮 +幮 +壍 +蠛 +槜 +羖 +窓 +隃 +嚘 +輳 +籹 +凴 +崕 +獍 +嗸 +趦 +囅 +戺 +涬 +諉 +箯 +輊 +桹 +嵷 +㲲 +愊 +蒱 +洚 +赩 +輴 +幈 +齔 +嗁 +阽 +圠 +荈 +碔 +揎 +巀 +洏 +卼 +𨁝 +痁 +黳 +嗾 +䆗 +戃 +蕆 +頋 +悤 +掎 +㯝 +吚 +猘 +鮎 +鬴 +墁 +飋 +呿 +窀 +沲 +枒 +窌 +爼 +頞 +譡 +鶋 +湩 +㦬 +僾 +斒 +醼 +鶂 +磥 +揫 +犗 +齗 +鄶 +囏 +崪 +爞 +籓 +斮 +觝 +嵸 +驔 +䨴 +頺 +萑 +珓 +牸 +闒 +凘 +悢 +蟭 +濈 +嬄 +翽 +旍 +鶢 +罋 +輠 +怩 +頖 +趍 +壝 +嫮 +蕋 +踦 +轇 +眘 +巗 +嶭 +糓 +甽 +籺 +矟 +佖 +絏 +憮 +懡 +駈 +擕 +淟 +皡 +膋 +潨 +鳲 +趠 +麨 +頩 +漻 +輗 +墄 +賮 +㴩 +莟 +縦 +岝 +醻 +曚 +䙰 +噭 +醥 +筰 +躧 +踘 +鑕 +咈 +僶 +鶊 +鬂 +聼 +騐 +壒 +磎 +歗 +淈 +隟 +狃 +縋 +媻 +趲 +惙 +呫 +聮 +羾 +尫 +佽 +髼 +繋 +鬘 +旜 +疐 +阬 +䰐 +塈 +徤 +祊 +灂 +祅 +樷 +颾 +凟 +頀 +蠏 +塒 +衹 +婬 +裩 +粞 +憯 +匭 +筈 +盬 +霮 +黕 +靮 +伻 +緺 +瘝 +羑 +醸 +樝 +僎 +絓 +噆 +愞 +痗 +愽 +岊 +黤 +湑 +搉 +綯 +焮 +疉 +楛 +玼 +喤 +粔 +飂 +贐 +緉 +覰 +胔 +鞳 +摑 +墢 +斅 +誶 +僝 +鹺 +諌 +齅 +嵼 +讟 +冦 +脝 +婣 +緐 +茰 +飶 +欎 +慁 +抝 +瘉 +𡎺 +鈯 +瘃 +麫 +匊 +窞 +羓 +氄 +嚌 +姤 +橑 +駬 +冺 +騠 +㕙 +楶 +靸 +圎 +尀 +䙀 +鄏 +軃 +竁 +㹠 +刜 +剨 +罛 +鏹 +鬉 +簨 +藭 +藷 +僇 +瀫 +袨 +忮 +冡 +齯 +楪 +囋 +蟉 +醱 +尠 +牣 +攟 +袿 +齾 +甞 +啑 +潚 +樐 +絖 +酖 +觖 +骹 +嶅 +玃 +嫜 +廹 +儤 +矂 +艓 +挱 +骳 +嵳 +洴 +礓 +厪 +﨑 +禜 +籊 +瓻 +彛 +狁 +腪 +骾 +娯 +罻 +璅 +簳 +姢 +猰 +眹 +䴥 +堘 +搯 +怤 +緫 +聫 +涊 +熛 +輤 +䡾 +譌 +髇 +攛 +稭 +媕 +鬷 +跰 +縚 +鉧 +踧 +嚭 +襮 +藞 +滮 +颷 +荂 +蓰 +怫 +閧 +臕 +稛 +怗 +闑 +抶 +薶 +嶕 +瀺 +𥫗 +墝 +埆 +皥 +惷 +鞔 +鞺 +蟛 +瀡 +鎁 +酧 +恝 +齓 +嚄 +簔 +蟺 +㔶 +胹 +憖 +惄 +鸒 +貛 +軏 +縗 +蓻 +娵 +抺 +鼛 +虩 +歕 +矑 +繂 +襚 +倂 +廥 +諝 +虗 +弜 +兟 +繿 +偘 +翶 +肻 +棙 +斆 +碨 +醎 +蟢 +銙 +躠 +櫩 +椮 +絀 +鷾 +溳 +詖 +葓 +谼 +𦩘 +韔 +翿 +呑 +馡 +騊 +堁 +蓏 +䟃 +頟 +渢 +趑 +堄 +滛 +擫 +豭 +騩 +騘 +䍧 +彍 +忭 +餙 +馺 +忩 +芣 +矴 +噂 +滍 +慫 +𨍏 +怲 +扵 +搊 +昩 +嶻 +禬 +憃 +忼 +榰 +箾 +撁 +鈆 +袗 +脤 +騞 +哤 +螀 +靧 +梲 +囦 +魖 +褠 +䭔 +煆 +挃 +宷 +熉 +朘 +憭 +䒠 +謭 +鷤 +拕 +骫 +穾 +襭 +喓 +冞 +勩 +媢 +麚 +椓 +俙 +幐 +磝 +蜎 +灙 +漦 +㛹 +䭀 +㜷 +粻 +懟 +箳 +滣 +糉 +冐 +韤 +湱 +糭 +栳 +勌 +慱 +㸌 +罫 +筞 +霿 +躶 +玞 +磉 +罦 +祴 +媟 +猒 +擭 +恠 +嵁 +屴 +慆 +庬 +蟁 +㹀 +薧 +鷕 +渻 +朂 +愯 +齚 +蝻 +胏 +饙 +鳦 +鸃 +叅 +肧 +篂 +脗 +雺 +飰 +筀 +頥 +毶 +弌 +逓 +瞍 +絁 +鏚 +嚵 +攂 +醄 +奼 +獫 +絣 +靷 +畮 +褉 +棁 +揑 +楥 +橤 +襥 +蹮 +窔 +忪 +潠 +杇 +黲 +擄 +蚻 +蘙 +虙 +袐 +陿 +帊 +醟 +髖 +㞦 +鱭 +譸 +鮞 +栧 +扺 +脽 +擉 +岨 +黈 +餻 +佪 +遻 +鋟 +瞶 +廽 +懨 +墖 +玁 +籉 +宼 +鰋 +瑽 +垖 +酕 +漰 +戹 +蝛 +瑲 +阤 +褆 +儛 +䍽 +觕 +箘 +碯 +灨 +燀 +膇 +韀 +䳏 +詿 +禂 +韣 +踡 +碏 +尵 +莭 +庻 +篿 +狤 +㘞 +艭 +殱 +鵔 +槮 +猧 +劙 +獝 +㭊 +㾕 +蠚 +帤 +蹢 +蛚 +輼 +麀 +檃 +䰒 +䪫 +悾 +濳 +杗 +揾 +駏 +撦 +耈 +蟟 +狌 +鸖 +䨥 +餫 +鍰 +耉 +毚 +袽 +䱥 +慓 +䔿 +艖 +舋 +弰 +蠺 +嫓 +蚳 +髾 +喨 +鴐 +䍥 +韍 +柹 +掁 +薋 +攕 +飺 +凢 +麌 +嫰 +鑚 +黦 +葠 +吿 +栰 +踶 +芿 +穭 +啝 +筓 +褁 +稇 +顚 +䎘 +恇 +珷 +緪 +墠 +蛣 +蛜 +讕 +疻 +惎 +袝 +霡 +罸 +鬽 +苢 +喭 +飊 +唎 +澾 +襋 +皭 +廏 +蔿 +穊 +䝟 +駊 +獹 +夣 +褾 +慴 +軥 +讁 +軰 +瞷 +𡋯 +晜 +潗 +衋 +揵 +覼 +鱐 +醡 +䏰 +侐 +亁 +桞 +驘 +鬋 +鷽 +懞 +㵳 +儳 +豝 +傺 +搒 +縧 +硾 +䏶 +覻 +薍 +憝 +榠 +湆 +皵 +鎞 +菆 +糇 +矉 +搤 +紃 +峿 +磹 +甒 +琭 +𩥇 +菢 +禡 +渹 +刅 +迒 +敂 +蹜 +磓 +傪 +縿 +㕮 +涏 +䰀 +㡛 +韛 +犠 +餦 +圝 +焫 +㝢 +潬 +馵 +澟 +鱏 +譾 +㪍 +煼 +鍜 +窽 +紾 +堨 +䕸 +穅 +戅 +穄 +駴 +偫 +煗 +媠 +酘 +矬 +貆 +茞 +骩 +扠 +岞 +潓 +炧 +陊 +栭 +釱 +㡚 +篴 +耞 +鞉 +䋏 +𤫩 +椸 +儜 +痀 +謷 +潙 +寠 +牐 +嫭 +慅 +獧 +鈒 +欿 +薳 +蟂 +郲 +軨 +斨 +訦 +𠴲 +剺 +駪 +贙 +禫 +噣 +茢 +茙 +鄼 +揷 +魌 +䫻 +嗋 +噐 +侲 +諵 +𠺕 +挍 +䑳 +㨷 +槸 +靘 +㩧 +虣 +瑿 +衱 +襹 +餭 +㗶 +枑 +悋 +纑 +嶫 +儓 +髵 +甗 +榝 +㗭 +贗 +熸 +嬃 +礌 +偭 +樠 +栮 +鷼 +鵀 +澬 +眂 +牿 +骴 +呞 +爕 +牎 +巹 +帉 +砠 +梴 +䛏 +攃 +餁 +哿 +蹝 +崺 +閌 +醝 +臡 +麖 +駼 +賵 +夘 +骻 +愡 +俔 +諐 +觩 +莂 +饈 +殣 +溠 +冱 +埓 +厫 +虥 +芄 +慽 +竃 +埿 +仭 +褼 +倛 +韸 +牗 +幖 +禈 +穧 +蜧 +諞 +脞 +蝃 +飃 +煁 +涒 +谾 +覢 +赮 +鼘 +艗 +䶉 +鴥 +轒 +睅 +傔 +惵 +唈 +懆 +磣 +膢 +堶 +囈 +瘕 +誷 +瑘 +絝 +鬈 +嘽 +鷅 +梜 +喎 +鼟 +㟧 +劻 +眑 +剴 +痎 +餟 +庌 +菷 +梐 +吺 +躘 +慞 +罼 +穨 +摏 +釄 +莋 +呺 +砅 +鴽 +㘭 +㟅 +艴 +犉 +籕 +跐 +惏 +陗 +刋 +襘 +醹 +紽 +痌 +㗀 +撋 +陼 +駷 +艼 +踼 +癏 +慠 +趒 +邍 +姞 +䂬 +堲 +苙 +椌 +嗃 +挶 +岯 +禗 +嵔 +觡 +豜 +睩 +㒿 +塠 +燂 +扤 +恟 +鬝 +鬇 +鬡 +揳 +霠 +㗫 +苐 +蒀 +圌 +戭 +䖃 +𥈭 +勮 +耝 +轞 +胮 +墯 +枮 +罿 +浺 +綪 +爓 +蘃 +襍 +轜 +閠 +畽 +鄊 +嶆 +籭 +蠯 +陑 +瘽 +迆 +賷 +䍡 +韂 +躃 +禴 +簄 +瓟 +碐 +躨 +侜 +岍 +䃸 +趚 +髐 +榅 +粣 +屝 +鴃 +圁 +蝜 +黫 +僽 +丗 +靣 +湏 +抏 +㟽 +跙 +餤 +朙 +㹞 +瞖 +繣 +㨫 +罙 +糒 +惉 +葽 +鼮 +蕳 +豏 +𥱼 +鵮 +獦 +悕 +𠴨 +闟 +惽 +慿 +隉 +椷 +𩅰 +艛 +眽 +凓 +儃 +奨 +埀 +瑫 +駚 +濇 +緶 +峉 +礨 +髢 +瞯 +壥 +姡 +㟯 +髬 +啀 +㶿 +歅 +殀 +縩 +疈 +鳸 +霳 +稬 +圊 +彚 +裠 +埳 +褋 +㔩 +矲 +剶 +硋 +聦 +峞 +浰 +窵 +嘂 +睘 +簵 +腒 +韘 +躣 +甈 +忲 +舽 +襂 +硠 +脃 +鐏 +奯 +脧 +矕 +䠞 +駹 +豶 +訑 +柸 +鰅 +瘨 +趿 +糦 +蟏 +饛 +尰 +諑 +汃 +毺 +鋃 +絚 +馧 +艬 +枍 +爊 +峗 +泙 +碖 +鵕 +尩 +閗 +𤧚 +幩 +塉 +箊 +覂 +玒 +橧 +謟 +庨 +籔 +欑 +厎 +尭 +氉 +蠈 +䓞 +矙 +梡 +瀩 +溔 +煴 +蔲 +僬 +嵢 +梩 +弝 +𣙙 +鞟 +敉 +鮚 +湠 +鐐 +爣 +裻 +䶎 +𦨴 +謿 +垾 +蝂 +睂 +癙 +韽 +㟳 +桒 +鳿 +樏 +峛 +瑉 +僄 +顣 +衺 +殗 +肦 +圑 +朒 +喌 +犦 +㰅 +疁 +氃 +吰 +陻 +盰 +娀 +魶 +㖃 +曒 +娿 +獱 +孏 +酅 +蝡 +齰 +莬 +鄀 +逥 +挿 +觵 +縆 +㟝 +繍 +碙 +㑂 +䎳 +兾 +壸 +賝 +桯 +跁 +跒 +蔍 +舼 +忀 +懭 +媌 +罭 +菵 +狔 +靿 +拪 +㲉 +䔲 +嬀 +鵽 +涳 +朾 +𡸣 +𢫫 +虈 +㜮 +顑 +櫋 +蔪 +旝 +湡 +蹛 +稆 +唽 +㟏 +熂 +龡 +煟 +韅 +韐 +慂 +剳 +掫 +兠 +摋 +羫 +璊 +鵻 +駓 +佌 +蜹 +晲 +矒 +玅 +剰 +斶 +紖 +懴 +駜 +羢 +麳 +㳷 +馞 +爥 +鍚 +鑢 +螵 +嗺 +鏨 +𠙶 +疪 +鷔 +鮧 +轊 +栘 +鼜 +睗 +蟘 +枓 +䖟 +剠 +瞤 +圛 +椳 +籸 +䪌 +鯹 +湌 +丳 +賧 +縭 +檾 +𦨻 +撆 +䩫 +磢 +惥 +譀 +罤 +鞸 +鉎 +㶏 +膁 +甋 +瓀 +懹 +槢 +硊 +弆 +琫 +嵠 +駻 +湢 +杮 +䌨 +訹 +藇 +穯 +蠉 +曭 +蹎 +詄 +毷 +𩃎 +熁 +灜 +蜫 +蜳 +昈 +帩 +鈋 +䐹 +顖 +鄹 +匶 +毾 +礜 +堭 +婞 +鷿 +㙞 +詀 +瘮 +䫜 +㾪 +捘 +屫 +誧 +䲔 +閍 +蒳 +㬋 +遟 +嶀 +葐 +蜼 +㻱 +曡 +䃜 +濴 +䦱 +霫 +譆 +霋 +蕰 +襓 +氋 +鴷 +魦 +㩻 +㡠 +灉 +贑 +燑 +峝 +輷 +烻 +耼 +螉 +跜 +豩 +㑃 +藙 +鋂 +胐 +𣔻 +紒 +瓓 +塯 +辴 +趷 +堛 +㒟 +㗲 +㬊 +䄡 +卄 +姧 +猓 +躗 +覤 +醊 +兎 +罯 +痯 +覸 +詉 +癿 +岋 +歝 +茟 +㘆 +㮰 +淜 +𥉌 +㫰 +鈌 +毵 +狉 +贜 +峬 +汻 +誖 +烓 +睋 +潎 +䲺 +㠓 +歖 +𠜱 +槵 +熚 +萷 +磤 +絸 +鷞 +聻 +屷 +㝵 +諕 +瘂 +㺷 +蚰 +柦 +䍐 +泿 +礰 +摎 +㜕 +㻞 +洓 +喍 +囌 +囐 +䙱 +腨 +妉 +鄛 +鄥 +㵝 +輧 +鱄 +騟 +鈚 +廜 +𨗨 +㶼 +膞 +崯 +硞 +萆 +眒 +譩 +揬 +藑 +匌 +㠾 +㥏 +㢮 +䕢 +帣 +酭 +枦 +孅 +鞙 +丷 +鍭 +䤴 +餂 +愗 +冘 +埛 +㒇 +郕 +蔯 +簰 +刔 +蠩 +耏 +鞹 +𧑅 +觹 +䐑 +磶 +蹵 +鵃 +耛 +蓤 +臄 +轙 +庤 +㒩 +翐 +榥 +晀 +輣 +蟚 +拲 +皠 +穱 +䃔 +䃧 +窡 +絍 +礿 +鑞 +栯 +㾓 +掿 +厞 +淂 +撶 +伹 +鹻 +軓 +岹 +蚷 +榸 +刾 +艂 +㤝 +塕 +蚔 +藾 +攓 +鏬 +珫 +黪 +蟧 +猭 +漑 +粺 +驆 +撘 +亾 +㼌 +蝑 +澓 +揞 +欱 +愶 +泲 +醷 +螴 +芚 +絻 +轃 +漮 +唪 +岉 +鬀 +䱹 +齖 +䂓 +趢 +荓 +覶 +鯾 +諿 +槥 +嚆 +爢 +瓬 +笐 +篢 +舝 +襵 +鎒 +𤝞 +肭 +瘇 +笓 +餑 +豋 +湗 +緎 +肐 +胲 +掤 +潫 +䖴 +𠎝 +𨺗 +諢 +毈 +寱 +唲 +䃭 +峮 +狘 +韊 +䬝 +呰 +㹱 +碞 +畞 +㠌 +黭 +蚘 +豵 +穥 +尯 +㳇 +隵 +灇 +壜 +楰 +彲 +甤 +綹 +旞 +𡏟 +曁 +喩 +𥲤 +郈 +塺 +訧 +絿 +掔 +蠮 +𡱰 +䃺 +宻 +灎 +羵 +𨠵 +糚 +摉 +壷 +勴 +瑃 +鎝 +𥜥 +婥 +鬺 +扢 +肣 +溰 +磩 +耇 +宎 +㔇 +霱 +敚 +汳 +鏄 +儹 +隥 +㿉 +膆 +崏 +𦭵 +郔 +扂 +垗 +㳂 +礛 +缻 +垜 +晱 +訩 +蘪 +珇 +怮 +垝 +㔢 +憛 +痝 +蟨 +鞁 +鶤 +肎 +傝 +䢆 +䰄 +𥊚 +㖀 +㠭 +壵 +墋 +㠔 +橜 +怓 +蚹 +塛 +憪 +鋝 +腶 +嶾 +翍 +溓 +齼 +蔂 +䃂 +鉺 +攑 +瓐 +泎 +眤 +邘 +崝 +稡 +愸 +髥 +輹 +詨 +髆 +麃 +虤 +洐 +婐 +挏 +峑 +嶣 +篬 +葄 +瑎 +瓉 +㳅 +葼 +姙 +䪜 +𩇕 +焭 +剚 +濪 +霵 +僒 + +羭 diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt new file mode 100644 index 00000000..567898b4 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt @@ -0,0 +1,18383 @@ +  +一 +乙 +二 +十 +丁 +厂 +七 +卜 +八 +人 +入 +儿 +匕 +几 +九 +刁 +了 +刀 +力 +乃 +又 +三 +干 +于 +亏 +工 +土 +士 +才 +下 +寸 +大 +丈 +与 +万 +上 +小 +口 +山 +巾 +千 +乞 +川 +亿 +个 +夕 +久 +么 +勺 +凡 +丸 +及 +广 +亡 +门 +丫 +义 +之 +尸 +己 +已 +巳 +弓 +子 +卫 +也 +女 +刃 +飞 +习 +叉 +马 +乡 +丰 +王 +开 +井 +天 +夫 +元 +无 +云 +专 +丐 +扎 +艺 +木 +五 +支 +厅 +不 +犬 +太 +区 +历 +歹 +友 +尤 +匹 +车 +巨 +牙 +屯 +戈 +比 +互 +切 +瓦 +止 +少 +曰 +日 +中 +贝 +冈 +内 +水 +见 +午 +牛 +手 +气 +毛 +壬 +升 +夭 +长 +仁 +什 +片 +仆 +化 +仇 +币 +仍 +仅 +斤 +爪 +反 +介 +父 +从 +仑 +今 +凶 +分 +乏 +公 +仓 +月 +氏 +勿 +欠 +风 +丹 +匀 +乌 +勾 +凤 +六 +文 +亢 +方 +火 +为 +斗 +忆 +计 +订 +户 +认 +冗 +讥 +心 +尺 +引 +丑 +巴 +孔 +队 +办 +以 +允 +予 +邓 +劝 +双 +书 +幻 +玉 +刊 +未 +末 +示 +击 +打 +巧 +正 +扑 +卉 +扒 +功 +扔 +去 +甘 +世 +艾 +古 +节 +本 +术 +可 +丙 +左 +厉 +石 +右 +布 +夯 +戊 +龙 +平 +灭 +轧 +东 +卡 +北 +占 +凸 +卢 +业 +旧 +帅 +归 +旦 +目 +且 +叶 +甲 +申 +叮 +电 +号 +田 +由 +只 +叭 +史 +央 +兄 +叽 +叼 +叫 +叩 +叨 +另 +叹 +冉 +皿 +凹 +囚 +四 +生 +矢 +失 +乍 +禾 +丘 +付 +仗 +代 +仙 +们 +仪 +白 +仔 +他 +斥 +瓜 +乎 +丛 +令 +用 +甩 +印 +尔 +乐 +句 +匆 +册 +卯 +犯 +外 +处 +冬 +鸟 +务 +包 +饥 +主 +市 +立 +冯 +玄 +闪 +兰 +半 +汁 +汇 +头 +汉 +宁 +穴 +它 +讨 +写 +让 +礼 +训 +议 +必 +讯 +记 +永 +司 +尼 +民 +弗 +弘 +出 +辽 +奶 +奴 +召 +加 +皮 +边 +孕 +发 +圣 +对 +台 +矛 +纠 +母 +幼 +丝 +邦 +式 +迂 +刑 +戎 +动 +扛 +寺 +吉 +扣 +考 +托 +老 +巩 +圾 +执 +扩 +扫 +地 +场 +扬 +耳 +芋 +共 +芒 +亚 +芝 +朽 +朴 +机 +权 +过 +臣 +吏 +再 +协 +西 +压 +厌 +戌 +在 +百 +有 +存 +而 +页 +匠 +夸 +夺 +灰 +达 +列 +死 +成 +夹 +夷 +轨 +邪 +尧 +划 +迈 +毕 +至 +此 +贞 +师 +尘 +尖 +劣 +光 +当 +早 +吁 +吐 +吓 +虫 +曲 +团 +吕 +同 +吊 +吃 +因 +吸 +吗 +吆 +屿 +屹 +岁 +帆 +回 +岂 +则 +刚 +网 +肉 +年 +朱 +先 +丢 +廷 +舌 +竹 +迁 +乔 +迄 +伟 +传 +乒 +乓 +休 +伍 +伏 +优 +臼 +伐 +延 +仲 +件 +任 +伤 +价 +伦 +份 +华 +仰 +仿 +伙 +伪 +自 +伊 +血 +向 +似 +后 +行 +舟 +全 +会 +杀 +合 +兆 +企 +众 +爷 +伞 +创 +肌 +肋 +朵 +杂 +危 +旬 +旨 +旭 +负 +匈 +名 +各 +多 +争 +色 +壮 +冲 +妆 +冰 +庄 +庆 +亦 +刘 +齐 +交 +衣 +次 +产 +决 +亥 +充 +妄 +闭 +问 +闯 +羊 +并 +关 +米 +灯 +州 +汗 +污 +江 +汛 +池 +汝 +汤 +忙 +兴 +宇 +守 +宅 +字 +安 +讲 +讳 +军 +讶 +许 +讹 +论 +讼 +农 +讽 +设 +访 +诀 +寻 +那 +迅 +尽 +导 +异 +弛 +孙 +阵 +阳 +收 +阶 +阴 +防 +奸 +如 +妇 +妃 +好 +她 +妈 +戏 +羽 +观 +欢 +买 +红 +驮 +纤 +驯 +约 +级 +纪 +驰 +纫 +巡 +寿 +弄 +麦 +玖 +玛 +形 +进 +戒 +吞 +远 +违 +韧 +运 +扶 +抚 +坛 +技 +坏 +抠 +扰 +扼 +拒 +找 +批 +址 +扯 +走 +抄 +贡 +汞 +坝 +攻 +赤 +折 +抓 +扳 +抡 +扮 +抢 +孝 +坎 +均 +抑 +抛 +投 +坟 +坑 +抗 +坊 +抖 +护 +壳 +志 +块 +扭 +声 +把 +报 +拟 +却 +抒 +劫 +芙 +芜 +苇 +芽 +花 +芹 +芥 +芬 +苍 +芳 +严 +芦 +芯 +劳 +克 +芭 +苏 +杆 +杠 +杜 +材 +村 +杖 +杏 +杉 +巫 +极 +李 +杨 +求 +甫 +匣 +更 +束 +吾 +豆 +两 +酉 +丽 +医 +辰 +励 +否 +还 +尬 +歼 +来 +连 +轩 +步 +卤 +坚 +肖 +旱 +盯 +呈 +时 +吴 +助 +县 +里 +呆 +吱 +吠 +呕 +园 +旷 +围 +呀 +吨 +足 +邮 +男 +困 +吵 +串 +员 +呐 +听 +吟 +吩 +呛 +吻 +吹 +呜 +吭 +吧 +邑 +吼 +囤 +别 +吮 +岖 +岗 +帐 +财 +针 +钉 +牡 +告 +我 +乱 +利 +秃 +秀 +私 +每 +兵 +估 +体 +何 +佐 +佑 +但 +伸 +佃 +作 +伯 +伶 +佣 +低 +你 +住 +位 +伴 +身 +皂 +伺 +佛 +囱 +近 +彻 +役 +返 +余 +希 +坐 +谷 +妥 +含 +邻 +岔 +肝 +肛 +肚 +肘 +肠 +龟 +甸 +免 +狂 +犹 +狈 +角 +删 +条 +彤 +卵 +灸 +岛 +刨 +迎 +饭 +饮 +系 +言 +冻 +状 +亩 +况 +床 +库 +庇 +疗 +吝 +应 +这 +冷 +庐 +序 +辛 +弃 +冶 +忘 +闰 +闲 +间 +闷 +判 +兑 +灶 +灿 +灼 +弟 +汪 +沐 +沛 +汰 +沥 +沙 +汽 +沃 +沦 +汹 +泛 +沧 +没 +沟 +沪 +沈 +沉 +沁 +怀 +忧 +忱 +快 +完 +宋 +宏 +牢 +究 +穷 +灾 +良 +证 +启 +评 +补 +初 +社 +祀 +识 +诈 +诉 +罕 +诊 +词 +译 +君 +灵 +即 +层 +屁 +尿 +尾 +迟 +局 +改 +张 +忌 +际 +陆 +阿 +陈 +阻 +附 +坠 +妓 +妙 +妖 +姊 +妨 +妒 +努 +忍 +劲 +矣 +鸡 +纬 +驱 +纯 +纱 +纲 +纳 +驳 +纵 +纷 +纸 +纹 +纺 +驴 +纽 +奉 +玩 +环 +武 +青 +责 +现 +玫 +表 +规 +抹 +卦 +坷 +坯 +拓 +拢 +拔 +坪 +拣 +坦 +担 +坤 +押 +抽 +拐 +拖 +者 +拍 +顶 +拆 +拎 +拥 +抵 +拘 +势 +抱 +拄 +垃 +拉 +拦 +幸 +拌 +拧 +拂 +拙 +招 +坡 +披 +拨 +择 +抬 +拇 +拗 +其 +取 +茉 +苦 +昔 +苛 +若 +茂 +苹 +苗 +英 +苟 +苑 +苞 +范 +直 +茁 +茄 +茎 +苔 +茅 +枉 +林 +枝 +杯 +枢 +柜 +枚 +析 +板 +松 +枪 +枫 +构 +杭 +杰 +述 +枕 +丧 +或 +画 +卧 +事 +刺 +枣 +雨 +卖 +郁 +矾 +矿 +码 +厕 +奈 +奔 +奇 +奋 +态 +欧 +殴 +垄 +妻 +轰 +顷 +转 +斩 +轮 +软 +到 +非 +叔 +歧 +肯 +齿 +些 +卓 +虎 +虏 +肾 +贤 +尚 +旺 +具 +味 +果 +昆 +国 +哎 +咕 +昌 +呵 +畅 +明 +易 +咙 +昂 +迪 +典 +固 +忠 +呻 +咒 +咋 +咐 +呼 +鸣 +咏 +呢 +咄 +咖 +岸 +岩 +帖 +罗 +帜 +帕 +岭 +凯 +败 +账 +贩 +贬 +购 +贮 +图 +钓 +制 +知 +迭 +氛 +垂 +牧 +物 +乖 +刮 +秆 +和 +季 +委 +秉 +佳 +侍 +岳 +供 +使 +例 +侠 +侥 +版 +侄 +侦 +侣 +侧 +凭 +侨 +佩 +货 +侈 +依 +卑 +的 +迫 +质 +欣 +征 +往 +爬 +彼 +径 +所 +舍 +金 +刹 +命 +肴 +斧 +爸 +采 +觅 +受 +乳 +贪 +念 +贫 +忿 +肤 +肺 +肢 +肿 +胀 +朋 +股 +肮 +肪 +肥 +服 +胁 +周 +昏 +鱼 +兔 +狐 +忽 +狗 +狞 +备 +饰 +饱 +饲 +变 +京 +享 +庞 +店 +夜 +庙 +府 +底 +疟 +疙 +疚 +剂 +卒 +郊 +庚 +废 +净 +盲 +放 +刻 +育 +氓 +闸 +闹 +郑 +券 +卷 +单 +炬 +炒 +炊 +炕 +炎 +炉 +沫 +浅 +法 +泄 +沽 +河 +沾 +泪 +沮 +油 +泊 +沿 +泡 +注 +泣 +泞 +泻 +泌 +泳 +泥 +沸 +沼 +波 +泼 +泽 +治 +怔 +怯 +怖 +性 +怕 +怜 +怪 +怡 +学 +宝 +宗 +定 +宠 +宜 +审 +宙 +官 +空 +帘 +宛 +实 +试 +郎 +诗 +肩 +房 +诚 +衬 +衫 +视 +祈 +话 +诞 +诡 +询 +该 +详 +建 +肃 +录 +隶 +帚 +屉 +居 +届 +刷 +屈 +弧 +弥 +弦 +承 +孟 +陋 +陌 +孤 +陕 +降 +函 +限 +妹 +姑 +姐 +姓 +妮 +始 +姆 +迢 +驾 +叁 +参 +艰 +线 +练 +组 +绅 +细 +驶 +织 +驹 +终 +驻 +绊 +驼 +绍 +绎 +经 +贯 +契 +贰 +奏 +春 +帮 +玷 +珍 +玲 +玻 +毒 +型 +拭 +挂 +封 +持 +拷 +拱 +项 +垮 +挎 +城 +挟 +挠 +政 +赴 +赵 +挡 +拽 +哉 +挺 +括 +垢 +拴 +拾 +挑 +垛 +指 +垫 +挣 +挤 +拼 +挖 +按 +挥 +挪 +拯 +某 +甚 +荆 +茸 +革 +茬 +荐 +巷 +带 +草 +茧 +茵 +茶 +荒 +茫 +荡 +荣 +荤 +荧 +故 +胡 +荫 +荔 +南 +药 +标 +栈 +柑 +枯 +柄 +栋 +相 +查 +柏 +栅 +柳 +柱 +柿 +栏 +柠 +树 +勃 +要 +柬 +咸 +威 +歪 +研 +砖 +厘 +厚 +砌 +砂 +泵 +砚 +砍 +面 +耐 +耍 +牵 +鸥 +残 +殃 +轴 +轻 +鸦 +皆 +韭 +背 +战 +点 +虐 +临 +览 +竖 +省 +削 +尝 +昧 +盹 +是 +盼 +眨 +哇 +哄 +哑 +显 +冒 +映 +星 +昨 +咧 +昭 +畏 +趴 +胃 +贵 +界 +虹 +虾 +蚁 +思 +蚂 +虽 +品 +咽 +骂 +勋 +哗 +咱 +响 +哈 +哆 +咬 +咳 +咪 +哪 +哟 +炭 +峡 +罚 +贱 +贴 +贻 +骨 +幽 +钙 +钝 +钞 +钟 +钢 +钠 +钥 +钦 +钧 +钩 +钮 +卸 +缸 +拜 +看 +矩 +毡 +氢 +怎 +牲 +选 +适 +秒 +香 +种 +秋 +科 +重 +复 +竿 +段 +便 +俩 +贷 +顺 +修 +俏 +保 +促 +俄 +俐 +侮 +俭 +俗 +俘 +信 +皇 +泉 +鬼 +侵 +禹 +侯 +追 +俊 +盾 +待 +徊 +衍 +律 +很 +须 +叙 +剑 +逃 +食 +盆 +胚 +胧 +胆 +胜 +胞 +胖 +脉 +胎 +勉 +狭 +狮 +独 +狰 +狡 +狱 +狠 +贸 +怨 +急 +饵 +饶 +蚀 +饺 +饼 +峦 +弯 +将 +奖 +哀 +亭 +亮 +度 +迹 +庭 +疮 +疯 +疫 +疤 +咨 +姿 +亲 +音 +帝 +施 +闺 +闻 +闽 +阀 +阁 +差 +养 +美 +姜 +叛 +送 +类 +迷 +籽 +娄 +前 +首 +逆 +兹 +总 +炼 +炸 +烁 +炮 +炫 +烂 +剃 +洼 +洁 +洪 +洒 +柒 +浇 +浊 +洞 +测 +洗 +活 +派 +洽 +染 +洛 +浏 +济 +洋 +洲 +浑 +浓 +津 +恃 +恒 +恢 +恍 +恬 +恤 +恰 +恼 +恨 +举 +觉 +宣 +宦 +室 +宫 +宪 +突 +穿 +窃 +客 +诫 +冠 +诬 +语 +扁 +袄 +祖 +神 +祝 +祠 +误 +诱 +诲 +说 +诵 +垦 +退 +既 +屋 +昼 +屏 +屎 +费 +陡 +逊 +眉 +孩 +陨 +除 +险 +院 +娃 +姥 +姨 +姻 +娇 +姚 +娜 +怒 +架 +贺 +盈 +勇 +怠 +癸 +蚤 +柔 +垒 +绑 +绒 +结 +绕 +骄 +绘 +给 +绚 +骆 +络 +绝 +绞 +骇 +统 +耕 +耘 +耗 +耙 +艳 +泰 +秦 +珠 +班 +素 +匿 +蚕 +顽 +盏 +匪 +捞 +栽 +捕 +埂 +捂 +振 +载 +赶 +起 +盐 +捎 +捍 +捏 +埋 +捉 +捆 +捐 +损 +袁 +捌 +都 +哲 +逝 +捡 +挫 +换 +挽 +挚 +热 +恐 +捣 +壶 +捅 +埃 +挨 +耻 +耿 +耽 +聂 +恭 +莽 +莱 +莲 +莫 +莉 +荷 +获 +晋 +恶 +莹 +莺 +真 +框 +梆 +桂 +桔 +栖 +档 +桐 +株 +桥 +桦 +栓 +桃 +格 +桩 +校 +核 +样 +根 +索 +哥 +速 +逗 +栗 +贾 +酌 +配 +翅 +辱 +唇 +夏 +砸 +砰 +砾 +础 +破 +原 +套 +逐 +烈 +殊 +殉 +顾 +轿 +较 +顿 +毙 +致 +柴 +桌 +虑 +监 +紧 +党 +逞 +晒 +眠 +晓 +哮 +唠 +鸭 +晃 +哺 +晌 +剔 +晕 +蚌 +畔 +蚣 +蚊 +蚪 +蚓 +哨 +哩 +圃 +哭 +哦 +恩 +鸯 +唤 +唁 +哼 +唧 +啊 +唉 +唆 +罢 +峭 +峨 +峰 +圆 +峻 +贼 +贿 +赂 +赃 +钱 +钳 +钻 +钾 +铁 +铃 +铅 +缺 +氧 +氨 +特 +牺 +造 +乘 +敌 +秤 +租 +积 +秧 +秩 +称 +秘 +透 +笔 +笑 +笋 +债 +借 +值 +倚 +俺 +倾 +倒 +倘 +俱 +倡 +候 +赁 +俯 +倍 +倦 +健 +臭 +射 +躬 +息 +倔 +徒 +徐 +殷 +舰 +舱 +般 +航 +途 +拿 +耸 +爹 +舀 +爱 +豺 +豹 +颁 +颂 +翁 +胰 +脆 +脂 +胸 +胳 +脏 +脐 +胶 +脑 +脓 +逛 +狸 +狼 +卿 +逢 +鸵 +留 +鸳 +皱 +饿 +馁 +凌 +凄 +恋 +桨 +浆 +衰 +衷 +高 +郭 +席 +准 +座 +症 +病 +疾 +斋 +疹 +疼 +疲 +脊 +效 +离 +紊 +唐 +瓷 +资 +凉 +站 +剖 +竞 +部 +旁 +旅 +畜 +阅 +羞 +羔 +瓶 +拳 +粉 +料 +益 +兼 +烤 +烘 +烦 +烧 +烛 +烟 +烙 +递 +涛 +浙 +涝 +浦 +酒 +涉 +消 +涡 +浩 +海 +涂 +浴 +浮 +涣 +涤 +流 +润 +涧 +涕 +浪 +浸 +涨 +烫 +涩 +涌 +悖 +悟 +悄 +悍 +悔 +悯 +悦 +害 +宽 +家 +宵 +宴 +宾 +窍 +窄 +容 +宰 +案 +请 +朗 +诸 +诺 +读 +扇 +诽 +袜 +袖 +袍 +被 +祥 +课 +冥 +谁 +调 +冤 +谅 +谆 +谈 +谊 +剥 +恳 +展 +剧 +屑 +弱 +陵 +祟 +陶 +陷 +陪 +娱 +娟 +恕 +娥 +娘 +通 +能 +难 +预 +桑 +绢 +绣 +验 +继 +骏 +球 +琐 +理 +琉 +琅 +捧 +堵 +措 +描 +域 +捺 +掩 +捷 +排 +焉 +掉 +捶 +赦 +堆 +推 +埠 +掀 +授 +捻 +教 +掏 +掐 +掠 +掂 +培 +接 +掷 +控 +探 +据 +掘 +掺 +职 +基 +聆 +勘 +聊 +娶 +著 +菱 +勒 +黄 +菲 +萌 +萝 +菌 +萎 +菜 +萄 +菊 +菩 +萍 +菠 +萤 +营 +乾 +萧 +萨 +菇 +械 +彬 +梦 +婪 +梗 +梧 +梢 +梅 +检 +梳 +梯 +桶 +梭 +救 +曹 +副 +票 +酝 +酗 +厢 +戚 +硅 +硕 +奢 +盔 +爽 +聋 +袭 +盛 +匾 +雪 +辅 +辆 +颅 +虚 +彪 +雀 +堂 +常 +眶 +匙 +晨 +睁 +眯 +眼 +悬 +野 +啪 +啦 +曼 +晦 +晚 +啄 +啡 +距 +趾 +啃 +跃 +略 +蚯 +蛀 +蛇 +唬 +累 +鄂 +唱 +患 +啰 +唾 +唯 +啤 +啥 +啸 +崖 +崎 +崭 +逻 +崔 +帷 +崩 +崇 +崛 +婴 +圈 +铐 +铛 +铝 +铜 +铭 +铲 +银 +矫 +甜 +秸 +梨 +犁 +秽 +移 +笨 +笼 +笛 +笙 +符 +第 +敏 +做 +袋 +悠 +偿 +偶 +偎 +偷 +您 +售 +停 +偏 +躯 +兜 +假 +衅 +徘 +徙 +得 +衔 +盘 +舶 +船 +舵 +斜 +盒 +鸽 +敛 +悉 +欲 +彩 +领 +脚 +脖 +脯 +豚 +脸 +脱 +象 +够 +逸 +猜 +猪 +猎 +猫 +凰 +猖 +猛 +祭 +馅 +馆 +凑 +减 +毫 +烹 +庶 +麻 +庵 +痊 +痒 +痕 +廊 +康 +庸 +鹿 +盗 +章 +竟 +商 +族 +旋 +望 +率 +阎 +阐 +着 +羚 +盖 +眷 +粘 +粗 +粒 +断 +剪 +兽 +焊 +焕 +清 +添 +鸿 +淋 +涯 +淹 +渠 +渐 +淑 +淌 +混 +淮 +淆 +渊 +淫 +渔 +淘 +淳 +液 +淤 +淡 +淀 +深 +涮 +涵 +婆 +梁 +渗 +情 +惜 +惭 +悼 +惧 +惕 +惟 +惊 +惦 +悴 +惋 +惨 +惯 +寇 +寅 +寄 +寂 +宿 +窒 +窑 +密 +谋 +谍 +谎 +谐 +袱 +祷 +祸 +谓 +谚 +谜 +逮 +敢 +尉 +屠 +弹 +隋 +堕 +随 +蛋 +隅 +隆 +隐 +婚 +婶 +婉 +颇 +颈 +绩 +绪 +续 +骑 +绰 +绳 +维 +绵 +绷 +绸 +综 +绽 +绿 +缀 +巢 +琴 +琳 +琢 +琼 +斑 +替 +揍 +款 +堪 +塔 +搭 +堰 +揩 +越 +趁 +趋 +超 +揽 +堤 +提 +博 +揭 +喜 +彭 +揣 +插 +揪 +搜 +煮 +援 +搀 +裁 +搁 +搓 +搂 +搅 +壹 +握 +搔 +揉 +斯 +期 +欺 +联 +葫 +散 +惹 +葬 +募 +葛 +董 +葡 +敬 +葱 +蒋 +蒂 +落 +韩 +朝 +辜 +葵 +棒 +棱 +棋 +椰 +植 +森 +焚 +椅 +椒 +棵 +棍 +椎 +棉 +棚 +棕 +棺 +榔 +椭 +惠 +惑 +逼 +粟 +棘 +酣 +酥 +厨 +厦 +硬 +硝 +确 +硫 +雁 +殖 +裂 +雄 +颊 +雳 +暂 +雅 +翘 +辈 +悲 +紫 +凿 +辉 +敞 +棠 +赏 +掌 +晴 +睐 +暑 +最 +晰 +量 +鼎 +喷 +喳 +晶 +喇 +遇 +喊 +遏 +晾 +景 +畴 +践 +跋 +跌 +跑 +跛 +遗 +蛙 +蛛 +蜓 +蜒 +蛤 +喝 +鹃 +喂 +喘 +喉 +喻 +啼 +喧 +嵌 +幅 +帽 +赋 +赌 +赎 +赐 +赔 +黑 +铸 +铺 +链 +销 +锁 +锄 +锅 +锈 +锋 +锌 +锐 +甥 +掰 +短 +智 +氮 +毯 +氯 +鹅 +剩 +稍 +程 +稀 +税 +筐 +等 +筑 +策 +筛 +筒 +筏 +答 +筋 +筝 +傲 +傅 +牌 +堡 +集 +焦 +傍 +储 +皓 +皖 +粤 +奥 +街 +惩 +御 +循 +艇 +舒 +逾 +番 +释 +禽 +腊 +脾 +腋 +腔 +腕 +鲁 +猩 +猬 +猾 +猴 +惫 +然 +馈 +馋 +装 +蛮 +就 +敦 +斌 +痘 +痢 +痪 +痛 +童 +竣 +阔 +善 +翔 +羡 +普 +粪 +尊 +奠 +道 +遂 +曾 +焰 +港 +滞 +湖 +湘 +渣 +渤 +渺 +湿 +温 +渴 +溃 +溅 +滑 +湃 +渝 +湾 +渡 +游 +滋 +渲 +溉 +愤 +慌 +惰 +愕 +愣 +惶 +愧 +愉 +慨 +割 +寒 +富 +寓 +窜 +窝 +窖 +窗 +窘 +遍 +雇 +裕 +裤 +裙 +禅 +禄 +谢 +谣 +谤 +谦 +犀 +属 +屡 +强 +粥 +疏 +隔 +隙 +隘 +媒 +絮 +嫂 +媚 +婿 +登 +缅 +缆 +缉 +缎 +缓 +缔 +缕 +骗 +编 +骚 +缘 +瑟 +鹉 +瑞 +瑰 +瑙 +魂 +肆 +摄 +摸 +填 +搏 +塌 +鼓 +摆 +携 +搬 +摇 +搞 +塘 +摊 +聘 +斟 +蒜 +勤 +靴 +靶 +鹊 +蓝 +墓 +幕 +蓬 +蓄 +蒲 +蓉 +蒙 +蒸 +献 +椿 +禁 +楚 +楷 +榄 +想 +槐 +榆 +楼 +概 +赖 +酪 +酬 +感 +碍 +碘 +碑 +碎 +碰 +碗 +碌 +尴 +雷 +零 +雾 +雹 +辐 +辑 +输 +督 +频 +龄 +鉴 +睛 +睹 +睦 +瞄 +睫 +睡 +睬 +嗜 +鄙 +嗦 +愚 +暖 +盟 +歇 +暗 +暇 +照 +畸 +跨 +跷 +跳 +跺 +跪 +路 +跤 +跟 +遣 +蜈 +蜗 +蛾 +蜂 +蜕 +嗅 +嗡 +嗓 +署 +置 +罪 +罩 +蜀 +幌 +错 +锚 +锡 +锣 +锤 +锥 +锦 +键 +锯 +锰 +矮 +辞 +稚 +稠 +颓 +愁 +筹 +签 +简 +筷 +毁 +舅 +鼠 +催 +傻 +像 +躲 +魁 +衙 +微 +愈 +遥 +腻 +腰 +腥 +腮 +腹 +腺 +鹏 +腾 +腿 +鲍 +猿 +颖 +触 +解 +煞 +雏 +馍 +馏 +酱 +禀 +痹 +廓 +痴 +痰 +廉 +靖 +新 +韵 +意 +誊 +粮 +数 +煎 +塑 +慈 +煤 +煌 +满 +漠 +滇 +源 +滤 +滥 +滔 +溪 +溜 +漓 +滚 +溢 +溯 +滨 +溶 +溺 +粱 +滩 +慎 +誉 +塞 +寞 +窥 +窟 +寝 +谨 +褂 +裸 +福 +谬 +群 +殿 +辟 +障 +媳 +嫉 +嫌 +嫁 +叠 +缚 +缝 +缠 +缤 +剿 +静 +碧 +璃 +赘 +熬 +墙 +墟 +嘉 +摧 +赫 +截 +誓 +境 +摘 +摔 +撇 +聚 +慕 +暮 +摹 +蔓 +蔑 +蔡 +蔗 +蔽 +蔼 +熙 +蔚 +兢 +模 +槛 +榴 +榜 +榨 +榕 +歌 +遭 +酵 +酷 +酿 +酸 +碟 +碱 +碳 +磁 +愿 +需 +辖 +辗 +雌 +裳 +颗 +瞅 +墅 +嗽 +踊 +蜻 +蜡 +蝇 +蜘 +蝉 +嘛 +嘀 +赚 +锹 +锻 +镀 +舞 +舔 +稳 +熏 +箕 +算 +箩 +管 +箫 +舆 +僚 +僧 +鼻 +魄 +魅 +貌 +膜 +膊 +膀 +鲜 +疑 +孵 +馒 +裹 +敲 +豪 +膏 +遮 +腐 +瘩 +瘟 +瘦 +辣 +彰 +竭 +端 +旗 +精 +粹 +歉 +弊 +熄 +熔 +煽 +潇 +漆 +漱 +漂 +漫 +滴 +漾 +演 +漏 +慢 +慷 +寨 +赛 +寡 +察 +蜜 +寥 +谭 +肇 +褐 +褪 +谱 +隧 +嫩 +翠 +熊 +凳 +骡 +缩 +慧 +撵 +撕 +撒 +撩 +趣 +趟 +撑 +撮 +撬 +播 +擒 +墩 +撞 +撤 +增 +撰 +聪 +鞋 +鞍 +蕉 +蕊 +蔬 +蕴 +横 +槽 +樱 +橡 +樟 +橄 +敷 +豌 +飘 +醋 +醇 +醉 +磕 +磊 +磅 +碾 +震 +霄 +霉 +瞒 +题 +暴 +瞎 +嘻 +嘶 +嘲 +嘹 +影 +踢 +踏 +踩 +踪 +蝶 +蝴 +蝠 +蝎 +蝌 +蝗 +蝙 +嘿 +嘱 +幢 +墨 +镇 +镐 +镑 +靠 +稽 +稻 +黎 +稿 +稼 +箱 +篓 +箭 +篇 +僵 +躺 +僻 +德 +艘 +膝 +膛 +鲤 +鲫 +熟 +摩 +褒 +瘪 +瘤 +瘫 +凛 +颜 +毅 +糊 +遵 +憋 +潜 +澎 +潮 +潭 +鲨 +澳 +潘 +澈 +澜 +澄 +懂 +憔 +懊 +憎 +额 +翩 +褥 +谴 +鹤 +憨 +慰 +劈 +履 +豫 +缭 +撼 +擂 +操 +擅 +燕 +蕾 +薯 +薛 +薇 +擎 +薪 +薄 +颠 +翰 +噩 +橱 +橙 +橘 +整 +融 +瓢 +醒 +霍 +霎 +辙 +冀 +餐 +嘴 +踱 +蹄 +蹂 +蟆 +螃 +器 +噪 +鹦 +赠 +默 +黔 +镜 +赞 +穆 +篮 +篡 +篷 +篱 +儒 +邀 +衡 +膨 +雕 +鲸 +磨 +瘾 +瘸 +凝 +辨 +辩 +糙 +糖 +糕 +燃 +濒 +澡 +激 +懒 +憾 +懈 +窿 +壁 +避 +缰 +缴 +戴 +擦 +藉 +鞠 +藏 +藐 +檬 +檐 +檀 +礁 +磷 +霜 +霞 +瞭 +瞧 +瞬 +瞳 +瞩 +瞪 +曙 +蹋 +蹈 +螺 +蟋 +蟀 +嚎 +赡 +穗 +魏 +簧 +簇 +繁 +徽 +爵 +朦 +臊 +鳄 +癌 +辫 +赢 +糟 +糠 +燥 +懦 +豁 +臀 +臂 +翼 +骤 +藕 +鞭 +藤 +覆 +瞻 +蹦 +嚣 +镰 +翻 +鳍 +鹰 +瀑 +襟 +璧 +戳 +孽 +警 +蘑 +藻 +攀 +曝 +蹲 +蹭 +蹬 +巅 +簸 +簿 +蟹 +颤 +靡 +癣 +瓣 +羹 +鳖 +爆 +疆 +鬓 +壤 +馨 +耀 +躁 +蠕 +嚼 +嚷 +巍 +籍 +鳞 +魔 +糯 +灌 +譬 +蠢 +霸 +露 +霹 +躏 +黯 +髓 +赣 +囊 +镶 +瓤 +罐 +矗 +乂 +乜 +兀 +弋 +孑 +孓 +幺 +亓 +韦 +廿 +丏 +卅 +仄 +厄 +仃 +仉 +仂 +兮 +刈 +爻 +卞 +闩 +讣 +尹 +夬 +爿 +毋 +邗 +邛 +艽 +艿 +札 +叵 +匝 +丕 +匜 +劢 +卟 +叱 +叻 +仨 +仕 +仟 +仡 +仫 +仞 +卮 +氐 +犰 +刍 +邝 +邙 +汀 +讦 +讧 +讪 +讫 +尻 +阡 +尕 +弁 +驭 +匡 +耒 +玎 +玑 +邢 +圩 +圬 +圭 +扦 +圪 +圳 +圹 +扪 +圮 +圯 +芊 +芍 +芄 +芨 +芑 +芎 +芗 +亘 +厍 +夼 +戍 +尥 +乩 +旯 +曳 +岌 +屺 +凼 +囡 +钇 +缶 +氘 +氖 +牝 +伎 +伛 +伢 +佤 +仵 +伥 +伧 +伉 +伫 +囟 +汆 +刖 +夙 +旮 +刎 +犷 +犸 +舛 +凫 +邬 +饧 +汕 +汔 +汐 +汲 +汜 +汊 +忖 +忏 +讴 +讵 +祁 +讷 +聿 +艮 +厾 +阱 +阮 +阪 +丞 +妁 +牟 +纡 +纣 +纥 +纨 +玕 +玙 +抟 +抔 +圻 +坂 +坍 +坞 +抃 +抉 +㧐 +芫 +邯 +芸 +芾 +苈 +苣 +芷 +芮 +苋 +芼 +苌 +苁 +芩 +芪 +芡 +芟 +苄 +苎 +苡 +杌 +杓 +杞 +杈 +忑 +孛 +邴 +邳 +矶 +奁 +豕 +忒 +欤 +轫 +迓 +邶 +忐 +卣 +邺 +旰 +呋 +呒 +呓 +呔 +呖 +呃 +旸 +吡 +町 +虬 +呗 +吽 +吣 +吲 +帏 +岐 +岈 +岘 +岑 +岚 +兕 +囵 +囫 +钊 +钋 +钌 +迕 +氙 +氚 +牤 +佞 +邱 +攸 +佚 +佝 +佟 +佗 +伽 +彷 +佘 +佥 +孚 +豸 +坌 +肟 +邸 +奂 +劬 +狄 +狁 +鸠 +邹 +饨 +饩 +饪 +饫 +饬 +亨 +庑 +庋 +疔 +疖 +肓 +闱 +闳 +闵 +羌 +炀 +沣 +沅 +沔 +沤 +沌 +沏 +沚 +汩 +汨 +沂 +汾 +沨 +汴 +汶 +沆 +沩 +泐 +怃 +怄 +忡 +忤 +忾 +怅 +忻 +忪 +怆 +忭 +忸 +诂 +诃 +诅 +诋 +诌 +诏 +诒 +孜 +陇 +陀 +陂 +陉 +妍 +妩 +妪 +妣 +妊 +妗 +妫 +妞 +姒 +妤 +邵 +劭 +刭 +甬 +邰 +纭 +纰 +纴 +纶 +纾 +玮 +玡 +玭 +玠 +玢 +玥 +玦 +盂 +忝 +匦 +坩 +抨 +拤 +坫 +拈 +垆 +抻 +劼 +拃 +拊 +坼 +坻 +㧟 +坨 +坭 +抿 +坳 +耶 +苷 +苯 +苤 +茏 +苫 +苜 +苴 +苒 +苘 +茌 +苻 +苓 +茚 +茆 +茑 +茓 +茔 +茕 +茀 +苕 +枥 +枇 +杪 +杳 +枧 +杵 +枨 +枞 +枋 +杻 +杷 +杼 +矸 +砀 +刳 +奄 +瓯 +殁 +郏 +轭 +郅 +鸢 +盱 +昊 +昙 +杲 +昃 +咂 +呸 +昕 +昀 +旻 +昉 +炅 +咔 +畀 +虮 +咀 +呷 +黾 +呱 +呤 +咚 +咆 +咛 +呶 +呣 +呦 +咝 +岢 +岿 +岬 +岫 +帙 +岣 +峁 +刿 +迥 +岷 +剀 +帔 +峄 +沓 +囹 +罔 +钍 +钎 +钏 +钒 +钕 +钗 +邾 +迮 +牦 +竺 +迤 +佶 +佬 +佰 +侑 +侉 +臾 +岱 +侗 +侃 +侏 +侩 +佻 +佾 +侪 +佼 +佯 +侬 +帛 +阜 +侔 +徂 +刽 +郄 +怂 +籴 +瓮 +戗 +肼 +䏝 +肽 +肱 +肫 +剁 +迩 +郇 +狙 +狎 +狍 +狒 +咎 +炙 +枭 +饯 +饴 +冽 +冼 +庖 +疠 +疝 +疡 +兖 +妾 +劾 +炜 +𬉼 +炖 +炘 +炝 +炔 +泔 +沭 +泷 +泸 +泱 +泅 +泗 +泠 +泺 +泖 +泫 +泮 +沱 +泯 +泓 +泾 +怙 +怵 +怦 +怛 +怏 +怍 +㤘 +怩 +怫 +怿 +宕 +穹 +宓 +诓 +诔 +诖 +诘 +戾 +诙 +戽 +郓 +衩 +祆 +祎 +祉 +祇 +诛 +诜 +诟 +诠 +诣 +诤 +诧 +诨 +诩 +戕 +孢 +亟 +陔 +妲 +妯 +姗 +帑 +弩 +孥 +驽 +虱 +迦 +迨 +绀 +绁 +绂 +驷 +驸 +绉 +绌 +驿 +骀 +甾 +珏 +珐 +珂 +珑 +玳 +珀 +顸 +珉 +珈 +拮 +垭 +挝 +垣 +挞 +垤 +赳 +贲 +垱 +垌 +郝 +垧 +垓 +挦 +垠 +茜 +荚 +荑 +贳 +荜 +莒 +茼 +茴 +茱 +莛 +荞 +茯 +荏 +荇 +荃 +荟 +荀 +茗 +荠 +茭 +茨 +垩 +荥 +荦 +荨 +荩 +剋 +荪 +茹 +荬 +荮 +柰 +栉 +柯 +柘 +栊 +柩 +枰 +栌 +柙 +枵 +柚 +枳 +柞 +柝 +栀 +柢 +栎 +枸 +柈 +柁 +枷 +柽 +剌 +酊 +郦 +甭 +砗 +砘 +砒 +斫 +砭 +砜 +奎 +耷 +虺 +殂 +殇 +殄 +殆 +轱 +轲 +轳 +轶 +轸 +虿 +毖 +觇 +尜 +哐 +眄 +眍 +𠳐 +郢 +眇 +眊 +眈 +禺 +哂 +咴 +曷 +昴 +昱 +昵 +咦 +哓 +哔 +畎 +毗 +呲 +胄 +畋 +畈 +虼 +虻 +盅 +咣 +哕 +剐 +郧 +咻 +囿 +咿 +哌 +哙 +哚 +咯 +咩 +咤 +哝 +哏 +哞 +峙 +峣 +罘 +帧 +峒 +峤 +峋 +峥 +贶 +钚 +钛 +钡 +钣 +钤 +钨 +钫 +钯 +氡 +氟 +牯 +郜 +秕 +秭 +竽 +笈 +笃 +俦 +俨 +俅 +俪 +叟 +垡 +牮 +俣 +俚 +皈 +俑 +俟 +逅 +徇 +徉 +舢 +俞 +郗 +俎 +郤 +爰 +郛 +瓴 +胨 +胪 +胛 +胂 +胙 +胍 +胗 +胝 +朐 +胫 +鸨 +匍 +狨 +狯 +飑 +狩 +狲 +訇 +逄 +昝 +饷 +饸 +饹 +胤 +孪 +娈 +弈 +奕 +庥 +疬 +疣 +疥 +疭 +庠 +竑 +彦 +飒 +闼 +闾 +闿 +阂 +羑 +迸 +籼 +酋 +炳 +炻 +炽 +炯 +烀 +炷 +烃 +洱 +洹 +洧 +洌 +浃 +洇 +洄 +洙 +涎 +洎 +洫 +浍 +洮 +洵 +浒 +浔 +浕 +洳 +恸 +恓 +恹 +恫 +恺 +恻 +恂 +恪 +恽 +宥 +扃 +衲 +衽 +衿 +袂 +祛 +祜 +祓 +祚 +诮 +祗 +祢 +诰 +诳 +鸩 +昶 +郡 +咫 +弭 +牁 +胥 +陛 +陟 +娅 +姮 +娆 +姝 +姣 +姘 +姹 +怼 +羿 +炱 +矜 +绔 +骁 +骅 +绗 +绛 +骈 +耖 +挈 +珥 +珙 +顼 +珰 +珩 +珧 +珣 +珞 +琤 +珲 +敖 +恚 +埔 +埕 +埘 +埙 +埚 +挹 +耆 +耄 +埒 +捋 +贽 +垸 +捃 +盍 +荸 +莆 +莳 +莴 +莪 +莠 +莓 +莜 +莅 +荼 +莩 +荽 +莸 +荻 +莘 +莎 +莞 +莨 +渇 +鸪 +莼 +栲 +栳 +郴 +桓 +桡 +桎 +桢 +桤 +梃 +栝 +桕 +桁 +桧 +桅 +栟 +桉 +栩 +逑 +逋 +彧 +鬲 +豇 +酐 +逦 +厝 +孬 +砝 +砹 +砺 +砧 +砷 +砟 +砼 +砥 +砣 +剞 +砻 +轼 +轾 +辂 +鸫 +趸 +龀 +鸬 +虔 +逍 +眬 +唛 +晟 +眩 +眙 +哧 +哽 +唔 +晁 +晏 +鸮 +趵 +趿 +畛 +蚨 +蚜 +蚍 +蚋 +蚬 +蚝 +蚧 +唢 +圄 +唣 +唏 +盎 +唑 +崂 +崃 +罡 +罟 +峪 +觊 +赅 +钰 +钲 +钴 +钵 +钹 +钺 +钽 +钼 +钿 +铀 +铂 +铄 +铆 +铈 +铉 +铊 +铋 +铌 +铍 +䥽 +铎 +氩 +氤 +氦 +毪 +舐 +秣 +秫 +盉 +笄 +笕 +笊 +笏 +笆 +俸 +倩 +俵 +偌 +俳 +俶 +倬 +倏 +恁 +倭 +倪 +俾 +倜 +隼 +隽 +倌 +倥 +臬 +皋 +郫 +倨 +衄 +颀 +徕 +舫 +釜 +奚 +衾 +胯 +胱 +胴 +胭 +脍 +胼 +朕 +脒 +胺 +鸱 +玺 +鸲 +狷 +猁 +狳 +猃 +狺 +逖 +桀 +袅 +饽 +凇 +栾 +挛 +亳 +疳 +疴 +疸 +疽 +痈 +疱 +痂 +痉 +衮 +凋 +颃 +恣 +旆 +旄 +旃 +阃 +阄 +訚 +阆 +恙 +粑 +朔 +郸 +烜 +烨 +烩 +烊 +剡 +郯 +烬 +涑 +浯 +涞 +涟 +娑 +涅 +涠 +浞 +涓 +浥 +涔 +浜 +浠 +浣 +浚 +悚 +悭 +悝 +悒 +悌 +悛 +宸 +窈 +剜 +诹 +冢 +诼 +袒 +袢 +祯 +诿 +谀 +谂 +谄 +谇 +屐 +屙 +陬 +勐 +奘 +牂 +蚩 +陲 +姬 +娠 +娌 +娉 +娲 +娩 +娴 +娣 +娓 +婀 +畚 +逡 +绠 +骊 +绡 +骋 +绥 +绦 +绨 +骎 +邕 +鸶 +彗 +耜 +焘 +舂 +琏 +琇 +麸 +揶 +埴 +埯 +捯 +掳 +掴 +埸 +埵 +赧 +埤 +捭 +逵 +埝 +堋 +堍 +掬 +鸷 +掖 +捽 +掊 +堉 +掸 +捩 +掮 +悫 +埭 +埽 +掇 +掼 +聃 +菁 +萁 +菘 +堇 +萘 +萋 +菽 +菖 +萜 +萸 +萑 +棻 +菔 +菟 +萏 +萃 +菏 +菹 +菪 +菅 +菀 +萦 +菰 +菡 +梵 +梿 +梏 +觋 +桴 +桷 +梓 +棁 +桫 +棂 +啬 +郾 +匮 +敕 +豉 +鄄 +酞 +酚 +戛 +硎 +硭 +硒 +硖 +硗 +硐 +硇 +硌 +鸸 +瓠 +匏 +厩 +龚 +殒 +殓 +殍 +赉 +雩 +辄 +堑 +眭 +眦 +啧 +晡 +晤 +眺 +眵 +眸 +圊 +喏 +喵 +啉 +勖 +晞 +唵 +晗 +冕 +啭 +畦 +趺 +啮 +跄 +蚶 +蛄 +蛎 +蛆 +蚰 +蛊 +圉 +蚱 +蛉 +蛏 +蚴 +啁 +啕 +唿 +啐 +唼 +唷 +啖 +啵 +啶 +啷 +唳 +唰 +啜 +帻 +崚 +崦 +帼 +崮 +崤 +崆 +赇 +赈 +赊 +铑 +铒 +铗 +铙 +铟 +铠 +铡 +铢 +铣 +铤 +铧 +铨 +铩 +铪 +铫 +铬 +铮 +铯 +铰 +铱 +铳 +铵 +铷 +氪 +牾 +鸹 +秾 +逶 +笺 +筇 +笸 +笪 +笮 +笠 +笥 +笤 +笳 +笾 +笞 +偾 +偃 +偕 +偈 +傀 +偬 +偻 +皑 +皎 +鸻 +徜 +舸 +舻 +舴 +舷 +龛 +翎 +脬 +脘 +脲 +匐 +猗 +猡 +猞 +猝 +斛 +猕 +馗 +馃 +馄 +鸾 +孰 +庹 +庾 +痔 +痍 +疵 +翊 +旌 +旎 +袤 +阇 +阈 +阉 +阊 +阋 +阍 +阏 +羟 +粝 +粕 +敝 +焐 +烯 +焓 +烽 +焖 +烷 +焗 +渍 +渚 +淇 +淅 +淞 +渎 +涿 +淖 +挲 +淠 +涸 +渑 +淦 +淝 +淬 +涪 +淙 +涫 +渌 +淄 +惬 +悻 +悱 +惝 +惘 +悸 +惆 +惚 +惇 +惮 +窕 +谌 +谏 +扈 +皲 +谑 +裆 +袷 +裉 +谒 +谔 +谕 +谖 +谗 +谙 +谛 +谝 +逯 +郿 +隈 +粜 +隍 +隗 +婧 +婊 +婕 +娼 +婢 +婵 +胬 +袈 +翌 +恿 +欸 +绫 +骐 +绮 +绯 +绱 +骒 +绲 +骓 +绶 +绺 +绻 +绾 +骖 +缁 +耠 +琫 +琵 +琶 +琪 +瑛 +琦 +琥 +琨 +靓 +琰 +琮 +琯 +琬 +琛 +琚 +辇 +鼋 +揳 +堞 +搽 +揸 +揠 +堙 +趄 +揖 +颉 +塄 +揿 +耋 +揄 +蛩 +蛰 +塆 +摒 +揆 +掾 +聒 +葑 +葚 +靰 +靸 +葳 +葺 +葸 +萼 +葆 +葩 +葶 +蒌 +萱 +戟 +葭 +楮 +棼 +椟 +棹 +椤 +棰 +赍 +椋 +椁 +椪 +棣 +椐 +鹁 +覃 +酤 +酢 +酡 +鹂 +厥 +殚 +殛 +雯 +雱 +辊 +辋 +椠 +辍 +辎 +斐 +睄 +睑 +睇 +睃 +戢 +喋 +嗒 +喃 +喱 +喹 +晷 +喈 +跖 +跗 +跞 +跚 +跎 +跏 +跆 +蛱 +蛲 +蛭 +蛳 +蛐 +蛔 +蛞 +蛴 +蛟 +蛘 +喁 +喟 +啾 +嗖 +喑 +嗟 +喽 +嗞 +喀 +喔 +喙 +嵘 +嵖 +崴 +遄 +詈 +嵎 +崽 +嵬 +嵛 +嵯 +嵝 +嵫 +幄 +嵋 +赕 +铻 +铼 +铿 +锃 +锂 +锆 +锇 +锉 +锏 +锑 +锒 +锔 +锕 +掣 +矬 +氰 +毳 +毽 +犊 +犄 +犋 +鹄 +犍 +嵇 +黍 +稃 +稂 +筚 +筵 +筌 +傣 +傈 +舄 +牍 +傥 +傧 +遑 +傩 +遁 +徨 +媭 +畲 +弑 +颌 +翕 +釉 +鹆 +舜 +貂 +腈 +腌 +腓 +腆 +腴 +腑 +腚 +腱 +鱿 +鲀 +鲂 +颍 +猢 +猹 +猥 +飓 +觞 +觚 +猱 +颎 +飧 +馇 +馊 +亵 +脔 +裒 +痣 +痨 +痦 +痞 +痤 +痫 +痧 +赓 +竦 +瓿 +啻 +颏 +鹇 +阑 +阒 +阕 +粞 +遒 +孳 +焯 +焜 +焙 +焱 +鹈 +湛 +渫 +湮 +湎 +湜 +渭 +湍 +湫 +溲 +湟 +溆 +湲 +湔 +湉 +渥 +湄 +滁 +愠 +惺 +愦 +惴 +愀 +愎 +愔 +喾 +寐 +谟 +扉 +裢 +裎 +裥 +祾 +祺 +谠 +幂 +谡 +谥 +谧 +遐 +孱 +弼 +巽 +骘 +媪 +媛 +婷 +巯 +翚 +皴 +婺 +骛 +缂 +缃 +缄 +彘 +缇 +缈 +缌 +缑 +缒 +缗 +飨 +耢 +瑚 +瑁 +瑜 +瑗 +瑄 +瑕 +遨 +骜 +韫 +髡 +塬 +鄢 +趔 +趑 +摅 +摁 +蜇 +搋 +搪 +搐 +搛 +搠 +摈 +彀 +毂 +搦 +搡 +蓁 +戡 +蓍 +鄞 +靳 +蓐 +蓦 +鹋 +蒽 +蓓 +蓖 +蓊 +蒯 +蓟 +蓑 +蒿 +蒺 +蓠 +蒟 +蒡 +蒹 +蒴 +蒗 +蓥 +颐 +楔 +楠 +楂 +楝 +楫 +楸 +椴 +槌 +楯 +皙 +榈 +槎 +榉 +楦 +楣 +楹 +椽 +裘 +剽 +甄 +酮 +酰 +酯 +酩 +蜃 +碛 +碓 +硼 +碉 +碚 +碇 +碜 +鹌 +辏 +龃 +龅 +訾 +粲 +虞 +睚 +嗪 +韪 +嗷 +嗉 +睨 +睢 +雎 +睥 +嘟 +嗑 +嗫 +嗬 +嗔 +嗝 +戥 +嗄 +煦 +暄 +遢 +暌 +跬 +跶 +跸 +跐 +跣 +跹 +跻 +蛸 +蜊 +蜍 +蜉 +蜣 +畹 +蛹 +嗣 +嗯 +嗥 +嗲 +嗳 +嗌 +嗍 +嗨 +嗐 +嗤 +嗵 +罨 +嵊 +嵩 +嵴 +骰 +锗 +锛 +锜 +锝 +锞 +锟 +锢 +锨 +锩 +锭 +锱 +雉 +氲 +犏 +歃 +稞 +稗 +稔 +筠 +筢 +筮 +筲 +筱 +牒 +煲 +敫 +徭 +愆 +艄 +觎 +毹 +貊 +貅 +貉 +颔 +腠 +腩 +腼 +腭 +腧 +塍 +媵 +詹 +鲅 +鲆 +鲇 +鲈 +稣 +鲋 +鲐 +肄 +鹐 +飕 +觥 +遛 +馐 +鹑 +亶 +瘃 +痱 +痼 +痿 +瘐 +瘁 +瘆 +麂 +裔 +歆 +旒 +雍 +阖 +阗 +阙 +羧 +豢 +粳 +猷 +煳 +煜 +煨 +煅 +煊 +煸 +煺 +滟 +溱 +溘 +漭 +滢 +溥 +溧 +溽 +裟 +溻 +溷 +滗 +滫 +溴 +滏 +滃 +滦 +溏 +滂 +滓 +溟 +滪 +愫 +慑 +慊 +鲎 +骞 +窦 +窠 +窣 +裱 +褚 +裨 +裾 +裰 +禊 +谩 +谪 +媾 +嫫 +媲 +嫒 +嫔 +媸 +缙 +缜 +缛 +辔 +骝 +缟 +缡 +缢 +缣 +骟 +耥 +璈 +瑶 +瑭 +獒 +觏 +慝 +嫠 +韬 +叆 +髦 +摽 +墁 +撂 +摞 +撄 +翥 +踅 +摭 +墉 +墒 +榖 +綦 +蔫 +蔷 +靺 +靼 +鞅 +靿 +甍 +蔸 +蔟 +蔺 +戬 +蕖 +蔻 +蓿 +斡 +鹕 +蓼 +榛 +榧 +榻 +榫 +榭 +槔 +榱 +槁 +槟 +槠 +榷 +僰 +酽 +酶 +酹 +厮 +碡 +碴 +碣 +碲 +磋 +臧 +豨 +殡 +霆 +霁 +辕 +蜚 +裴 +翡 +龇 +龈 +睿 +䁖 +睽 +嘞 +嘈 +嘌 +嘁 +嘎 +暧 +暝 +踌 +踉 +蜞 +蜥 +蜮 +蝈 +蜴 +蜱 +蜩 +蜷 +蜿 +螂 +蜢 +嘘 +嘡 +鹗 +嘣 +嘤 +嘚 +嗾 +嘧 +罴 +罱 +幔 +嶂 +幛 +赙 +罂 +骷 +骶 +鹘 +锲 +锴 +锶 +锷 +锸 +锵 +镁 +镂 +犒 +箐 +箦 +箧 +箍 +箸 +箬 +箅 +箪 +箔 +箜 +箢 +箓 +毓 +僖 +儆 +僳 +僭 +劁 +僮 +魃 +魆 +睾 +艋 +鄱 +膈 +膑 +鲑 +鲔 +鲚 +鲛 +鲟 +獐 +觫 +雒 +夤 +馑 +銮 +塾 +麽 +瘌 +瘊 +瘘 +瘙 +廖 +韶 +旖 +膂 +阚 +鄯 +鲞 +粿 +粼 +粽 +糁 +槊 +鹚 +熘 +熥 +潢 +漕 +滹 +漯 +漶 +潋 +潴 +漪 +漉 +漳 +漩 +澉 +潍 +慵 +搴 +窨 +寤 +綮 +谮 +褡 +褙 +褓 +褛 +褊 +谯 +谰 +谲 +暨 +屣 +鹛 +嫣 +嫱 +嫖 +嫦 +嫚 +嫘 +嫡 +鼐 +翟 +瞀 +鹜 +骠 +缥 +缦 +缧 +缨 +骢 +缪 +缫 +耦 +耧 +瑾 +璜 +璀 +璎 +璁 +璋 +璇 +奭 +髯 +髫 +撷 +撅 +赭 +撸 +鋆 +撙 +撺 +墀 +聩 +觐 +鞑 +蕙 +鞒 +蕈 +蕨 +蕤 +蕞 +蕺 +瞢 +蕃 +蕲 +赜 +槿 +樯 +槭 +樗 +樘 +樊 +槲 +醌 +醅 +靥 +魇 +餍 +磔 +磙 +霈 +辘 +龉 +龊 +觑 +瞌 +瞋 +瞑 +嘭 +噎 +噶 +颙 +暹 +噘 +踔 +踝 +踟 +踒 +踬 +踮 +踯 +踺 +踞 +蝽 +蝾 +蝻 +蝰 +蝮 +螋 +蝓 +蝣 +蝼 +噗 +嘬 +颚 +噍 +噢 +噙 +噜 +噌 +噔 +颛 +幞 +幡 +嶙 +嶝 +骺 +骼 +骸 +镊 +镉 +镌 +镍 +镏 +镒 +镓 +镔 +稷 +箴 +篑 +篁 +篌 +篆 +牖 +儋 +徵 +磐 +虢 +鹞 +膘 +滕 +鲠 +鲡 +鲢 +鲣 +鲥 +鲧 +鲩 +獗 +獠 +觯 +馓 +馔 +麾 +廛 +瘛 +瘼 +瘢 +瘠 +齑 +羯 +羰 +𥻗 +遴 +糌 +糍 +糅 +熜 +熵 +熠 +澍 +澌 +潸 +潦 +潲 +鋈 +潟 +潼 +潺 +憬 +憧 +寮 +窳 +谳 +褴 +褟 +褫 +谵 +熨 +屦 +嬉 +勰 +戮 +蝥 +缬 +缮 +缯 +骣 +畿 +耩 +耨 +耪 +璞 +璟 +靛 +璠 +璘 +聱 +螯 +髻 +髭 +髹 +擀 +熹 +甏 +擞 +縠 +磬 +颞 +蕻 +鞘 +颟 +薤 +薨 +檠 +薏 +薮 +薜 +薅 +樾 +橛 +橇 +樵 +檎 +橹 +樽 +樨 +橼 +墼 +橐 +翮 +醛 +醐 +醍 +醚 +磲 +赝 +飙 +殪 +霖 +霏 +霓 +錾 +辚 +臻 +遽 +氅 +瞟 +瞠 +瞰 +嚄 +嚆 +噤 +暾 +蹀 +踹 +踵 +踽 +蹉 +蹁 +螨 +蟒 +螈 +螅 +螭 +螠 +螟 +噱 +噬 +噫 +噻 +噼 +罹 +圜 +䦃 +镖 +镗 +镘 +镚 +镛 +镝 +镞 +镠 +氇 +氆 +憩 +穑 +篝 +篥 +篦 +篪 +篙 +盥 +劓 +翱 +魉 +魈 +徼 +歙 +膳 +膦 +膙 +鲮 +鲱 +鲲 +鲳 +鲴 +鲵 +鲷 +鲻 +獴 +獭 +獬 +邂 +鹧 +廨 +赟 +瘰 +廪 +瘿 +瘵 +瘴 +癃 +瘳 +斓 +麇 +麈 +嬴 +壅 +羲 +糗 +瞥 +甑 +燎 +燠 +燔 +燧 +濑 +濉 +潞 +澧 +澹 +澥 +澶 +濂 +褰 +寰 +窸 +褶 +禧 +嬖 +犟 +隰 +嬗 +颡 +缱 +缲 +缳 +璨 +璩 +璐 +璪 +螫 +擤 +壕 +觳 +罄 +擢 +薹 +鞡 +鞬 +薷 +薰 +藓 +藁 +檄 +檩 +懋 +醢 +翳 +礅 +磴 +鹩 +龋 +龌 +豳 +壑 +黻 +嚏 +嚅 +蹑 +蹒 +蹊 +蟥 +螬 +螵 +疃 +螳 +蟑 +嚓 +羁 +罽 +罾 +嶷 +黜 +黝 +髁 +髀 +镡 +镢 +镣 +镦 +镧 +镩 +镪 +镫 +罅 +黏 +簌 +篾 +篼 +簖 +簋 +鼢 +黛 +儡 +鹪 +鼾 +皤 +魍 +龠 +繇 +貘 +邈 +貔 +臌 +膻 +臆 +臃 +鲼 +鲽 +鳀 +鳃 +鳅 +鳇 +鳊 +螽 +燮 +鹫 +襄 +糜 +縻 +膺 +癍 +麋 +懑 +濡 +濮 +濞 +濠 +濯 +蹇 +謇 +邃 +襁 +檗 +擘 +孺 +隳 +嬷 +蟊 +鹬 +鍪 +鏊 +鳌 +鬈 +鬃 +瞽 +鞯 +鞨 +鞫 +鞧 +鞣 +藜 +藠 +藩 +醪 +蹙 +礓 +燹 +餮 +瞿 +曛 +颢 +曜 +躇 +蹚 +鹭 +蟛 +蟪 +蟠 +蟮 +鹮 +黠 +黟 +髅 +髂 +镬 +镭 +镯 +馥 +簟 +簪 +鼬 +雠 +艟 +鳎 +鳏 +鳐 +癞 +癔 +癜 +癖 +糨 +蹩 +鎏 +懵 +彝 +邋 +鬏 +攉 +攒 +鞲 +鞴 +藿 +蘧 +蘅 +麓 +醮 +醯 +酃 +霪 +霭 +霨 +黼 +嚯 +蹰 +蹶 +蹽 +蹼 +蹴 +蹾 +蹿 +蠖 +蠓 +蟾 +蠊 +黢 +髋 +髌 +镲 +籀 +籁 +齁 +魑 +艨 +鳓 +鳔 +鳕 +鳗 +鳙 +麒 +鏖 +羸 +㸆 +瀚 +瀣 +瀛 +襦 +谶 +襞 +骥 +缵 +瓒 +攘 +蘩 +蘖 +醴 +霰 +酆 +矍 +曦 +躅 +鼍 +巉 +黩 +黥 +黪 +镳 +镴 +黧 +纂 +璺 +鼯 +臜 +鳜 +鳝 +鳟 +獾 +孀 +骧 +瓘 +鼙 +醺 +礴 +颦 +曩 +鳢 +癫 +麝 +夔 +爝 +灏 +禳 +鐾 +羼 +蠡 +耱 +懿 +蘸 +鹳 +霾 +氍 +饕 +躐 +髑 +镵 +穰 +饔 +鬻 +鬟 +趱 +攫 +攥 +颧 +躜 +鼹 +癯 +麟 +蠲 +蠹 +躞 +衢 +鑫 +灞 +襻 +纛 +鬣 +攮 +囔 +馕 +戆 +爨 +齉 +亍 +尢 +彳 +卬 +殳 +𠙶 +毌 +邘 +戋 +圢 +氕 +伋 +仝 +冮 +氿 +汈 +氾 +忉 +宄 +讱 +扞 +圲 +圫 +芏 +芃 +朳 +朸 +𨙸 +邨 +吒 +吖 +屼 +屾 +辿 +钆 +仳 +伣 +伈 +癿 +甪 +邠 +犴 +冱 +邡 +闫 +汋 +䜣 +讻 +孖 +纩 +玒 +玓 +玘 +玚 +刬 +坜 +坉 +扽 +坋 +扺 +㧑 +毐 +芰 +芣 +苊 +苉 +芘 +芴 +芠 +芤 +杕 +杙 +杄 +杧 +杩 +尪 +尨 +轪 +坒 +芈 +旴 +旵 +呙 +㕮 +岍 +岠 +岜 +呇 +冏 +觃 +岙 +伾 +㑇 +伭 +佖 +伲 +佁 +飏 +狃 +闶 +汧 +汫 +𣲘 +𣲗 +沄 +沘 +汭 +㳇 +沇 +忮 +忳 +忺 +祃 +诇 +邲 +诎 +诐 +屃 +岊 +阽 +䢺 +阼 +妧 +妘 +𨚕 +纮 +驲 +纻 +纼 +玤 +玞 +玱 +玟 +邽 +邿 +坥 +坰 +坬 +坽 +弆 +耵 +䢼 +𦭜 +茋 +苧 +苾 +苠 +枅 +㭎 +枘 +枍 +矼 +矻 +匼 +旿 +昇 +昄 +昒 +昈 +咉 +咇 +咍 +岵 +岽 +岨 +岞 +峂 +㟃 +囷 +钐 +钔 +钖 +牥 +佴 +垈 +侁 +侹 +佸 +佺 +隹 +㑊 +侂 +佽 +侘 +郈 +舠 +郐 +郃 +攽 +肭 +肸 +肷 +狉 +狝 +饳 +忞 +於 +炌 +炆 +泙 +沺 +泂 +泜 +泃 +泇 +怊 +峃 +穸 +祋 +祊 +鸤 +弢 +弨 +陑 +陎 +卺 +乸 +妭 +姈 +迳 +叕 +驵 +䌹 +驺 +绋 +绐 +砉 +耔 +㛃 +玶 +珇 +珅 +珋 +玹 +珌 +玿 +韨 +垚 +垯 +垙 +垲 +埏 +垍 +耇 +垎 +垴 +垟 +垞 +挓 +垵 +垏 +拶 +荖 +荁 +荙 +荛 +茈 +茽 +荄 +茺 +荓 +茳 +𦰡 +茛 +荭 +㭕 +柷 +柃 +柊 +枹 +栐 +柖 +郚 +剅 +䴓 +迺 +厖 +砆 +砑 +砄 +耏 +奓 +䶮 +轵 +轷 +轹 +轺 +昺 +昽 +盷 +咡 +咺 +昳 +昣 +哒 +昤 +昫 +昡 +咥 +昪 +虷 +虸 +哃 +峘 +耑 +峛 +峗 +峧 +帡 +钘 +钜 +钪 +钬 +钭 +矧 +秬 +俫 +舁 +俜 +俙 +俍 +垕 +衎 +舣 +弇 +侴 +鸧 +䏡 +胠 +𦙶 +胈 +胩 +胣 +朏 +飐 +訄 +饻 +庤 +疢 +炣 +炟 +㶲 +洭 +洘 +洓 +洿 +㳚 +泚 +浈 +浉 +洸 +洑 +洢 +洈 +洚 +洺 +洨 +浐 +㳘 +洴 +洣 +恔 +宬 +窀 +扂 +袆 +祏 +祐 +祕 +叚 +陧 +陞 +娀 +姞 +姱 +姤 +姶 +姽 +枲 +绖 +骃 +彖 +骉 +恝 +珪 +珛 +珹 +琊 +玼 +珖 +珽 +珦 +珫 +珒 +珢 +珕 +珝 +埗 +垾 +垺 +埆 +垿 +埌 +埇 +莰 +茝 +鄀 +莶 +莝 +䓖 +莙 +栻 +桠 +桄 +梠 +栴 +梴 +栒 +酎 +酏 +砵 +砠 +砫 +砬 +硁 +恧 +翃 +郪 +𨐈 +辀 +辁 +剕 +赀 +哢 +晅 +晊 +唝 +哳 +哱 +冔 +晔 +晐 +晖 +畖 +蚄 +蚆 +帱 +崁 +峿 +崄 +帨 +崀 +赆 +钷 +眚 +甡 +笫 +倻 +倴 +脩 +倮 +倕 +倞 +倓 +倧 +衃 +虒 +舭 +舯 +舥 +瓞 +鬯 +鸰 +脎 +朓 +胲 +虓 +鱽 +狴 +峱 +狻 +眢 +勍 +痄 +疰 +痃 +竘 +羖 +羓 +桊 +敉 +烠 +烔 +烶 +烻 +涍 +浡 +浭 +浬 +涄 +涢 +涐 +浰 +浟 +浛 +浼 +浲 +涘 +悈 +悃 +悢 +宧 +窅 +窊 +窎 +扅 +扆 +袪 +袗 +袯 +祧 +隺 +堲 +疍 +𨺙 +陴 +烝 +砮 +㛚 +哿 +翀 +翂 +剟 +绤 +骍 +䂮 +琎 +珸 +珵 +琄 +琈 +琀 +珺 +掭 +堎 +堐 +埼 +掎 +埫 +堌 +晢 +掞 +埪 +壸 +㙍 +聍 +菝 +萚 +菥 +莿 +䓫 +勚 +䓬 +萆 +菂 +菍 +菼 +萣 +䓨 +菉 +䓛 +梼 +梽 +桲 +梾 +桯 +梣 +梌 +桹 +敔 +厣 +硔 +硙 +硚 +硊 +硍 +勔 +䴕 +龁 +逴 +唪 +啫 +翈 +㫰 +晙 +畤 +趼 +跂 +蛃 +蚲 +蚺 +啴 +䎃 +崧 +崟 +崞 +崒 +崌 +崡 +铏 +铕 +铖 +铘 +铚 +铞 +铥 +铴 +牻 +牿 +稆 +笱 +笯 +偰 +偡 +鸺 +偭 +偲 +偁 +㿠 +鄅 +偓 +徛 +衒 +舳 +舲 +鸼 +悆 +鄃 +瓻 +䝙 +脶 +脞 +脟 +䏲 +鱾 +猇 +猊 +猄 +觖 +𠅤 +庱 +庼 +庳 +痓 +䴔 +竫 +堃 +阌 +羝 +羕 +焆 +烺 +焌 +淏 +淟 +淜 +淴 +淯 +湴 +涴 +㥄 +惛 +惔 +悰 +惙 +寁 +逭 +袼 +裈 +祲 +谞 +艴 +弸 +弶 +隃 +婞 +娵 +婼 +媖 +婳 +婍 +婌 +婫 +婤 +婘 +婠 +绹 +骕 +絜 +珷 +琲 +琡 +琟 +琔 +琭 +堾 +堼 +揕 +㙘 +堧 +喆 +堨 +塅 +堠 +絷 +𡎚 +葜 +惎 +萳 +葙 +靬 +葴 +蒇 +蒈 +鄚 +蒉 +蓇 +萩 +蒐 +葰 +葎 +鄑 +蒎 +葖 +蒄 +萹 +棤 +棽 +棫 +椓 +椑 +鹀 +椆 +棓 +棬 +棪 +椀 +楗 +甦 +酦 +觌 +奡 +皕 +硪 +欹 +詟 +辌 +棐 +龂 +黹 +牚 +睎 +晫 +晪 +晱 +𧿹 +蛑 +畯 +斝 +喤 +崶 +嵁 +崾 +嵅 +崿 +嵚 +翙 +圌 +圐 +赑 +淼 +赒 +铹 +铽 +𨱇 +锊 +锍 +锎 +锓 +犇 +颋 +稌 +筀 +筘 +筜 +筥 +筅 +傃 +傉 +翛 +傒 +傕 +舾 +畬 +脿 +腘 +䐃 +腙 +腒 +鲃 +猰 +猯 +㺄 +馉 +鄗 +廋 +廆 +鄌 +粢 +遆 +旐 +焞 +欻 +𣸣 +溚 +溁 +湝 +渰 +湓 +㴔 +渟 +溠 +渼 +溇 +湣 +湑 +溞 +愐 +愃 +敩 +甯 +棨 +扊 +裣 +祼 +婻 +媆 +媞 +㛹 +媓 +媂 +媄 +毵 +矞 +缊 +缐 +骙 +瑃 +瑓 +瑅 +瑆 +䴖 +瑖 +瑝 +瑔 +瑀 +𤧛 +瑳 +瑂 +嶅 +瑑 +遘 +髢 +塥 +堽 +赪 +摛 +塝 +搒 +搌 +蒱 +蒨 +蓏 +蔀 +蓢 +蓂 +蒻 +蓣 +椹 +楪 +榃 +榅 +楒 +楞 +楩 +榇 +椸 +楙 +歅 +碃 +碏 +碈 +䃅 +硿 +鄠 +辒 +龆 +觜 +䣘 +暕 +鹍 +㬊 +暅 +跱 +蜐 +蜎 +嵲 +赗 +骱 +锖 +锘 +锳 +锧 +锪 +锫 +锬 +稑 +稙 +䅟 +筻 +筼 +筶 +筦 +筤 +傺 +鹎 +僇 +艅 +艉 +谼 +貆 +腽 +腨 +腯 +鲉 +鲊 +鲌 +䲟 +鲏 +雊 +猺 +飔 +觟 +𦝼 +馌 +裛 +廒 +瘀 +瘅 +鄘 +鹒 +鄜 +麀 +鄣 +阘 +煁 +煃 +煴 +煋 +煟 +煓 +滠 +溍 +溹 +滆 +滉 +溦 +溵 +漷 +滧 +滘 +滍 +愭 +慥 +慆 +塱 +裼 +禋 +禔 +禘 +禒 +谫 +鹔 +愍 +嫄 +媱 +戤 +戣 +缞 +耤 +瑧 +瑨 +瑱 +瑷 +瑢 +斠 +摏 +墕 +墈 +墐 +墘 +摴 +銎 +𡐓 +墚 +撖 +靽 +鞁 +蔌 +蔈 +蓰 +蔹 +蔊 +嘏 +榰 +榑 +槚 +𣗋 +槜 +榍 +疐 +酺 +酾 +酲 +酴 +碶 +䃎 +碨 +𥔲 +碹 +碥 +劂 +䴗 +夥 +瞍 +鹖 +㬎 +跽 +蜾 +幖 +嶍 +圙 +𨱏 +锺 +锼 +锽 +锾 +锿 +镃 +镄 +镅 +馝 +鹙 +箨 +箖 +劄 +僬 +僦 +僔 +僎 +槃 +㙦 +鲒 +鲕 +鲖 +鲗 +鲘 +鲙 +𩽾 +夐 +獍 +飗 +凘 +廑 +廙 +瘗 +瘥 +瘕 +鲝 +鄫 +熇 +漹 +漖 +潆 +漤 +潩 +漼 +漴 +㽏 +漈 +漋 +漻 +慬 +窬 +窭 +㮾 +褕 +禛 +禚 +隩 +嫕 +嫭 +嫜 +嫪 +㻬 +麹 +璆 +漦 +叇 +墣 +墦 +墡 +劐 +薁 +蕰 +蔃 +鼒 +槱 +鹝 +磏 +磉 +殣 +慭 +霅 +暵 +暲 +暶 +踦 +踣 +䗖 +蝘 +蝲 +蝤 +噇 +噂 +噀 +罶 +嶲 +嶓 +㠇 +嶟 +嶒 +镆 +镈 +镋 +镎 +镕 +稹 +儇 +皞 +皛 +䴘 +艎 +艏 +鹟 +𩾃 +鲦 +鲪 +鲬 +橥 +觭 +鹠 +鹡 +糇 +糈 +翦 +鹢 +鹣 +熛 +潖 +潵 +㵐 +澂 +澛 +瑬 +潽 +潾 +潏 +憭 +憕 +戭 +褯 +禤 +嫽 +遹 +璥 +璲 +璒 +憙 +擐 +鄹 +薳 +鞔 +黇 +蕗 +薢 +蕹 +橞 +橑 +橦 +醑 +觱 +磡 +𥕢 +磜 +豮 +鹾 +虤 +暿 +曌 +曈 +㬚 +蹅 +踶 +䗛 +螗 +疁 +㠓 +幪 +嶦 +𨱑 +馞 +穄 +篚 +篯 +簉 +鼽 +衠 +盦 +螣 +縢 +鲭 +鲯 +鲰 +鲺 +鲹 +亸 +癀 +瘭 +羱 +糒 +燋 +熻 +燊 +燚 +燏 +濩 +濋 +澪 +澽 +澴 +澭 +澼 +憷 +憺 +懔 +黉 +嬛 +鹨 +翯 +璱 +𤩽 +璬 +璮 +髽 +擿 +薿 +薸 +檑 +櫆 +檞 +醨 +繄 +磹 +磻 +瞫 +瞵 +蹐 +蟏 +㘎 +镤 +镥 +镨 +𨱔 +矰 +穙 +穜 +穟 +簕 +簃 +簏 +儦 +魋 +斶 +艚 +谿 +䲠 +鲾 +鲿 +鳁 +鳂 +鳈 +鳉 +獯 +䗪 +馘 +襕 +襚 +螱 +甓 +嬬 +嬥 +𦈡 +瓀 +釐 +鬶 +爇 +鞳 +鞮 +藟 +藦 +藨 +鹲 +檫 +黡 +礞 +礌 +𥖨 +蹢 +蹜 +蟫 +䗴 +嚚 +髃 +镮 +镱 +酂 +馧 +簠 +簝 +簰 +鼫 +鼩 +皦 +臑 +䲢 +鳑 +鳒 +鹱 +鹯 +癗 +𦒍 +旞 +翷 +冁 +䎖 +瀔 +瀍 +瀌 +襜 +䴙 +嚭 +㰀 +鬷 +醭 +蹯 +蠋 +翾 +鳘 +儳 +儴 +鼗 +𩾌 +鳚 +鳛 +麑 +麖 +蠃 +彟 +嬿 +鬒 +蘘 +欂 +醵 +颥 +甗 +𨟠 +巇 +酅 +髎 +犨 +𨭉 +㸌 +爔 +瀱 +瀹 +瀼 +瀵 +襫 +孅 +骦 +耰 +𤫉 +瓖 +鬘 +趯 +罍 +鼱 +鳠 +鳡 +鳣 +爟 +爚 +灈 +韂 +糵 +蘼 +礵 +鹴 +躔 +皭 +龢 +鳤 +亹 +籥 +鼷 +玃 +醾 +齇 +觿 +蠼 +𬣙 +𬇕 +𬣞 +𬘓 +𫭟 +𫭢 +𫇭 +𫐄 +𫵷 +𬇙 +𬣡 +𫸩 +𫘜 +𬘘 +𫘝 +𬨂 +𬀩 +𬀪 +𬬩 +𫍣 +𬣳 +𬩽 +𬮿 +𬯀 +𫰛 +𬳵 +𬳶 +𫠊 +𬍛 +鿍 +𬜬 +𪾢 +𪨰 +𫓧 +𬬮 +𬬱 +𬬭 +𬘡 +𬳽 +𬘩 +𫄧 +𪟝 +𬍤 +𫭼 +𬜯 +𬂩 +𫠆 +𬌗 +𫑡 +𪨶 +𬬸 +𬬻 +𬬹 +𬬿 +𬭁 +𫢸 +𫗧 +𬊈 +𬒈 +𬳿 +𫄨 +𬘫 +𫮃 +鿎 +𬱖 +𬟽 +𫓯 +𫟹 +𫟼 +𬇹 +𬍡 +𬤇 +𫍯 +𬤊 +𫍲 +𬯎 +𬘬 +𬘭 +𬴂 +𫘦 +𫟅 +𬘯 +𫘧 +𪣻 +𬃊 +𬷕 +𫐐 +𬹼 +𫶇 +𫖮 +鿏 +𬭊 +𫓶 +𬭎 +𫖯 +𬱟 +𫛭 +𫷷 +𬮱 +𬊤 +𬴃 +𫘨 +𬪩 +𬒔 +𬨎 +𫐓 +𫫇 +𫓹 +𬭚 +𬭛 +𬕂 +𬶋 +𬶍 +𫔶 +𫌀 +𫖳 +𫘪 +𫘬 +𫞩 +𪤗 +𬸘 +𬒗 +𫚖 +𬭤 +𫚕 +𬶐 +𬶏 +𬸚 +𬤝 +𬙂 +𬭩 +𬸣 +𫍽 +𬴊 +𬞟 +𫟦 +𬺈 +𫠜 +𪩘 +𬭬 +𬭯 +𫗴 +𬸦 +𫄷 +𬭳 +𬭶 +𫔍 +𬭸 +𬭼 +𫔎 +𬸪 +𬶟 +𬶠 +𬶨 +𫄸 +𬟁 +𬙊 +𬶭 +𬶮 +𬙋 +𬺓 +𫚭 +廠 +蔔 +兒 +幾 +幹 +虧 +纔 +與 +萬 +韆 +億 +個 +廣 +門 +義 +衛 +飛 +習 +馬 +鄉 +豐 +開 +無 +雲 +專 +藝 +廳 +區 +歷 +曆 +車 +貝 +岡 +見 +氣 +長 +僕 +幣 +僅 +從 +侖 +倉 +風 +烏 +鳳 +爲 +鬥 +憶 +計 +訂 +認 +譏 +醜 +隊 +辦 +鄧 +勸 +雙 +書 +擊 +撲 +節 +術 +厲 +龍 +滅 +軋 +東 +盧 +業 +舊 +帥 +歸 +葉 +電 +號 +衹 +隻 +嘰 +嘆 +們 +儀 +叢 +爾 +樂 +處 +鼕 +鳥 +務 +飢 +饑 +馮 +閃 +蘭 +匯 +彙 +頭 +漢 +寧 +討 +寫 +讓 +禮 +訓 +議 +訊 +記 +齣 +遼 +邊 +發 +髮 +聖 +對 +臺 +颱 +檯 +糾 +絲 +動 +鞏 +執 +擴 +掃 +場 +揚 +亞 +樸 +機 +權 +過 +協 +壓 +厭 +頁 +誇 +奪 +達 +夾 +軌 +堯 +劃 +邁 +畢 +貞 +師 +塵 +當 +噹 +籲 +嚇 +蟲 +麯 +團 +糰 +嗎 +嶼 +歲 +迴 +豈 +則 +剛 +網 +硃 +遷 +喬 +偉 +傳 +優 +傷 +價 +倫 +華 +僞 +嚮 +後 +會 +殺 +閤 +衆 +爺 +傘 +創 +雜 +負 +壯 +衝 +妝 +莊 +慶 +劉 +齊 +産 +閉 +問 +闖 +關 +燈 +湯 +興 +講 +諱 +軍 +訝 +許 +訛 +論 +訟 +農 +諷 +設 +訪 +訣 +尋 +盡 +儘 +導 +孫 +陣 +陽 +階 +陰 +婦 +媽 +戲 +觀 +歡 +買 +紅 +馱 +纖 +縴 +馴 +約 +級 +紀 +馳 +紉 +壽 +麥 +瑪 +進 +遠 +違 +韌 +運 +撫 +壇 +罎 +壞 +摳 +擾 +貢 +垻 +壩 +摺 +掄 +搶 +墳 +護 +殻 +塊 +聲 +報 +擬 +蕪 +葦 +蒼 +嚴 +蘆 +勞 +蘇 +囌 +極 +楊 +兩 +麗 +醫 +勵 +還 +殲 +來 +連 +軒 +鹵 +滷 +堅 +時 +縣 +裏 +嘔 +園 +曠 +圍 +噸 +郵 +睏 +員 +聽 +嗆 +嗚 +彆 +嶇 +崗 +帳 +財 +針 +釘 +亂 +體 +傭 +徹 +餘 +穀 +鄰 +腸 +龜 +猶 +狽 +條 +島 +飯 +飲 +係 +繫 +凍 +狀 +畝 +庫 +療 +應 +這 +廬 +閏 +閑 +間 +悶 +竈 +燦 +瀝 +淪 +滄 +溝 +滬 +瀋 +懷 +憂 +窮 +證 +啓 +評 +補 +識 +詐 +訴 +診 +詞 +譯 +靈 +層 +遲 +張 +際 +陸 +陳 +墜 +勁 +鷄 +緯 +驅 +純 +紗 +綱 +納 +駁 +縱 +紛 +紙 +紋 +紡 +驢 +紐 +環 +責 +現 +錶 +規 +攏 +揀 +擔 +頂 +擁 +勢 +攔 +擰 +撥 +擇 +蘋 +範 +莖 +樞 +櫃 +闆 +鬆 +槍 +楓 +構 +喪 +畫 +棗 +賣 +鬱 +礬 +礦 +碼 +厠 +奮 +態 +歐 +毆 +壟 +轟 +頃 +轉 +斬 +輪 +軟 +齒 +虜 +腎 +賢 +國 +暢 +嚨 +鳴 +羅 +幟 +嶺 +凱 +敗 +賬 +販 +貶 +購 +貯 +圖 +釣 +製 +颳 +俠 +僥 +偵 +側 +憑 +僑 +貨 +質 +徑 +捨 +覓 +貪 +貧 +膚 +腫 +脹 +骯 +脅 +魚 +獰 +備 +飾 +飽 +飼 +變 +龐 +廟 +瘧 +劑 +廢 +閘 +鬧 +鄭 +捲 +單 +爐 +淺 +濘 +瀉 +潑 +澤 +憐 +學 +寶 +寵 +審 +簾 +實 +試 +詩 +誠 +襯 +視 +話 +誕 +詭 +詢 +該 +詳 +肅 +録 +隸 +彌 +瀰 +陝 +駕 +參 +艱 +綫 +練 +組 +紳 +細 +駛 +織 +駒 +終 +駐 +絆 +駝 +紹 +繹 +經 +貫 +貳 +幫 +項 +挾 +撓 +趙 +擋 +墊 +擠 +揮 +薦 +帶 +繭 +蕩 +榮 +葷 +熒 +鬍 +蔭 +藥 +標 +棧 +棟 +欄 +檸 +樹 +鹹 +磚 +硯 +麵 +牽 +鷗 +殘 +軸 +輕 +鴉 +戰 +點 +臨 +覽 +竪 +嘗 +啞 +顯 +貴 +蝦 +蟻 +螞 +雖 +駡 +勛 +嘩 +響 +喲 +峽 +罰 +賤 +貼 +貽 +鈣 +鈍 +鈔 +鍾 +鐘 +鋼 +鈉 +鑰 +欽 +鈞 +鈎 +鈕 +氈 +氫 +選 +適 +種 +鞦 +復 +複 +倆 +貸 +順 +儉 +須 +鬚 +劍 +朧 +膽 +勝 +狹 +獅 +獨 +獄 +貿 +餌 +饒 +蝕 +餃 +餅 +巒 +彎 +將 +奬 +瘡 +瘋 +親 +閨 +聞 +閩 +閥 +閣 +養 +薑 +類 +婁 +總 +煉 +爍 +爛 +窪 +潔 +灑 +澆 +濁 +測 +瀏 +濟 +渾 +濃 +惱 +舉 +覺 +憲 +竊 +誡 +誣 +語 +襖 +誤 +誘 +誨 +説 +誦 +墾 +晝 +費 +遜 +隕 +險 +嬌 +賀 +壘 +綁 +絨 +結 +繞 +驕 +繪 +給 +絢 +駱 +絡 +絶 +絞 +駭 +統 +艷 +蠶 +頑 +盞 +撈 +載 +趕 +鹽 +損 +撿 +摯 +剝 +熱 +搗 +壺 +聶 +萊 +蓮 +獲 +穫 +惡 +噁 +瑩 +鶯 +檔 +橋 +樺 +樁 +樣 +賈 +礫 +礎 +顧 +轎 +較 +頓 +斃 +緻 +慮 +監 +緊 +黨 +曬 +曉 +嘮 +鴨 +暈 +鴦 +罷 +圓 +賊 +賄 +賂 +贜 +錢 +鉗 +鑽 +鉀 +鐵 +鈴 +鉛 +犧 +敵 +積 +稱 +筆 +債 +傾 +賃 +艦 +艙 +聳 +愛 +頒 +頌 +臟 +髒 +臍 +膠 +腦 +膿 +鴕 +鴛 +皺 +餓 +餒 +戀 +槳 +漿 +準 +癥 +齋 +離 +資 +競 +閲 +煩 +燒 +燭 +遞 +濤 +澇 +渦 +塗 +滌 +潤 +澗 +漲 +燙 +澀 +憫 +寬 +傢 +賓 +竅 +請 +諸 +諾 +讀 +誹 +襪 +課 +誰 +調 +諒 +諄 +談 +誼 +懇 +劇 +難 +預 +絹 +綉 +驗 +繼 +駿 +瑣 +擲 +據 +摻 +職 +蘿 +螢 +營 +蕭 +薩 +夢 +檢 +醖 +碩 +聾 +襲 +輔 +輛 +顱 +懸 +躍 +纍 +囉 +嘯 +嶄 +邏 +嬰 +銬 +鐺 +鋁 +銅 +銘 +鏟 +銀 +矯 +穢 +籠 +償 +軀 +釁 +銜 +盤 +鴿 +斂 +領 +臉 +獵 +餡 +館 +癢 +鏇 +閻 +闡 +蓋 +斷 +獸 +鴻 +漸 +淵 +漁 +澱 +滲 +慚 +懼 +驚 +慘 +慣 +謀 +諜 +謊 +諧 +禱 +禍 +謂 +諺 +謎 +彈 +墮 +隨 +隱 +嬸 +頗 +頸 +績 +緒 +續 +騎 +綽 +繩 +維 +綿 +綳 +綢 +綜 +綻 +緑 +綴 +瓊 +趨 +攬 +攙 +擱 +摟 +攪 +聯 +蔣 +韓 +橢 +確 +頰 +靂 +暫 +翹 +輩 +鑿 +輝 +賞 +睞 +噴 +疇 +踐 +遺 +鵑 +賦 +賭 +贖 +賜 +賠 +鑄 +鋪 +鏈 +銷 +鎖 +鋤 +鍋 +銹 +鋒 +鋅 +鋭 +鵝 +築 +篩 +儲 +懲 +禦 +釋 +臘 +魯 +憊 +饋 +饞 +裝 +蠻 +闊 +糞 +滯 +濕 +潰 +濺 +灣 +憤 +竄 +窩 +褲 +禪 +謝 +謡 +謗 +謙 +屬 +屢 +緬 +纜 +緝 +緞 +緩 +締 +縷 +騙 +編 +騷 +緣 +鵡 +攝 +擺 +襬 +攤 +鵲 +藍 +濛 +懞 +矇 +獻 +欖 +樓 +賴 +礙 +尷 +霧 +輻 +輯 +輸 +頻 +齡 +鑒 +蹺 +蝸 +錯 +錨 +錫 +鑼 +錘 +錐 +錦 +鍵 +鋸 +錳 +辭 +頽 +籌 +簽 +籤 +簡 +膩 +鵬 +騰 +鮑 +穎 +觸 +雛 +饃 +餾 +醬 +謄 +糧 +數 +滿 +濾 +濫 +灕 +濱 +灘 +譽 +窺 +寢 +謹 +謬 +闢 +縛 +縫 +纏 +繽 +贅 +墻 +衊 +藹 +檻 +釀 +願 +轄 +輾 +顆 +踴 +蠟 +蠅 +蟬 +賺 +鍬 +鍛 +鍍 +穩 +籮 +簫 +輿 +鮮 +饅 +瀟 +賽 +譚 +譜 +騾 +縮 +攆 +聰 +藴 +櫻 +飄 +黴 +瞞 +題 +囑 +鎮 +鎬 +鎊 +簍 +鯉 +鯽 +癟 +癱 +顔 +鯊 +瀾 +額 +譴 +鶴 +繚 +顛 +轍 +鸚 +贈 +鏡 +贊 +籃 +籬 +鯨 +癮 +辯 +瀕 +懶 +繮 +繳 +矚 +贍 +鰐 +辮 +贏 +驟 +囂 +鐮 +鰭 +鷹 +巔 +顫 +癬 +鱉 +鬢 +鱗 +躪 +贛 +鑲 +韋 +閂 +訃 +勱 +芻 +鄺 +訐 +訌 +訕 +訖 +馭 +璣 +壙 +捫 +薌 +厙 +釔 +傴 +倀 +傖 +獷 +獁 +鳬 +鄔 +餳 +懺 +謳 +詎 +訥 +紆 +紂 +紇 +紈 +璵 +摶 +塢 +㩳 +蕓 +藶 +莧 +萇 +蓯 +磯 +奩 +歟 +軔 +鄴 +嘸 +囈 +嚦 +暘 +唄 +幃 +峴 +嵐 +圇 +釗 +釙 +釕 +僉 +鳩 +鄒 +飩 +餼 +飪 +飫 +飭 +廡 +癤 +闈 +閎 +閔 +煬 +灃 +漚 +渢 +潙 +憮 +慪 +愾 +悵 +愴 +詁 +訶 +詛 +詆 +謅 +詔 +詒 +隴 +陘 +嫵 +嫗 +嬀 +剄 +紜 +紕 +紝 +綸 +紓 +瑋 +匭 +壚 +擓 +蘢 +蔦 +塋 +煢 +櫪 +梘 +棖 +樅 +碭 +甌 +郟 +軛 +鳶 +曇 +蟣 +黽 +嚀 +噝 +巋 +劌 +剴 +嶧 +釷 +釺 +釧 +釩 +釹 +釵 +儈 +儕 +儂 +劊 +慫 +糴 +戧 +膞 +邇 +梟 +餞 +飴 +癘 +瘍 +煒 +熰 +熗 +瀧 +瀘 +濼 +涇 +㥮 +懌 +誆 +誄 +詿 +詰 +詼 +鄆 +禕 +誅 +詵 +詬 +詮 +詣 +諍 +詫 +諢 +詡 +駑 +紺 +紲 +紱 +駟 +駙 +縐 +絀 +驛 +駘 +瓏 +頇 +埡 +撾 +撻 +賁 +壋 +撏 +莢 +貰 +蓽 +蕎 +薈 +薺 +堊 +滎 +犖 +蕁 +藎 +蓀 +蕒 +葤 +櫛 +櫳 +櫨 +櫟 +檉 +酈 +硨 +碸 +殤 +軲 +軻 +轤 +軼 +軫 +蠆 +覘 +瞘 +嘵 +嗶 +噦 +剮 +鄖 +噲 +噥 +嶢 +幀 +嶠 +貺 +鈈 +鈦 +鋇 +鈑 +鈐 +鎢 +鈁 +鈀 +篤 +儔 +儼 +儷 +腖 +臚 +脛 +鴇 +獪 +颮 +猻 +餉 +餄 +餎 +孿 +孌 +癧 +瘲 +颯 +闥 +閭 +闓 +閡 +熾 +烴 +浹 +澮 +滸 +潯 +濜 +慟 +懨 +愷 +惻 +惲 +誚 +禰 +誥 +誑 +鴆 +婭 +嬈 +懟 +絝 +驍 +驊 +絎 +絳 +駢 +頊 +璫 +琿 +塒 +塤 +堝 +贄 +蒔 +萵 +蕕 +鴣 +蒓 +橈 +楨 +榿 +檜 +邐 +礪 +礱 +軾 +輊 +輅 +鶇 +躉 +齔 +鸕 +矓 +嘜 +鴞 +蜆 +嗩 +嶗 +崍 +覬 +賅 +鈺 +鉦 +鈷 +鉢 +鈸 +鉞 +鉭 +鉬 +鈿 +鈾 +鉑 +鑠 +鉚 +鈰 +鉉 +鉈 +鉍 +鈮 +鈹 +鏺 +鐸 +氬 +筧 +頎 +徠 +膾 +鴟 +璽 +鴝 +獫 +裊 +餑 +欒 +攣 +癰 +痙 +頏 +閫 +鬮 +誾 +閬 +鄲 +燁 +燴 +燼 +淶 +漣 +潿 +慳 +諏 +諑 +禎 +諉 +諛 +諗 +諂 +誶 +媧 +嫻 +綆 +驪 +綃 +騁 +綏 +縧 +綈 +駸 +鷥 +燾 +璉 +麩 +擄 +摑 +鷙 +撣 +慤 +摜 +縈 +槤 +覡 +欞 +嗇 +匱 +硤 +磽 +鴯 +龔 +殞 +殮 +賚 +輒 +塹 +嘖 +囀 +嚙 +蹌 +蠣 +蠱 +蟶 +幘 +幗 +賕 +賑 +賒 +銠 +鉺 +鋏 +鐃 +銦 +鎧 +鍘 +銖 +銑 +鋌 +鏵 +銓 +鎩 +鉿 +銚 +鉻 +錚 +銫 +鉸 +銥 +銃 +銨 +銣 +鴰 +穠 +箋 +籩 +僨 +僂 +皚 +鴴 +艫 +龕 +玀 +獼 +餜 +餛 +鸞 +闍 +閾 +閹 +閶 +鬩 +閽 +閼 +羥 +糲 +燜 +漬 +瀆 +澠 +愜 +憚 +諶 +諫 +皸 +謔 +襠 +謁 +諤 +諭 +諼 +讒 +諳 +諦 +諞 +糶 +嬋 +綾 +騏 +綺 +緋 +緔 +騍 +緄 +騅 +綬 +綹 +綣 +綰 +驂 +緇 +靚 +輦 +黿 +頡 +撳 +蟄 +壪 +蔞 +櫝 +欏 +賫 +鵓 +鸝 +殫 +輥 +輞 +槧 +輟 +輜 +瞼 +躒 +蛺 +蟯 +螄 +蠐 +嘍 +嶸 +嶁 +賧 +鋙 +錸 +鏗 +鋥 +鋰 +鋯 +鋨 +銼 +鐧 +銻 +鋃 +鋦 +錒 +犢 +鵠 +篳 +牘 +儻 +儐 +儺 +嬃 +頜 +鵒 +魷 +魨 +魴 +潁 +颶 +觴 +熲 +餷 +餿 +褻 +臠 +癆 +癇 +賡 +頦 +鷳 +闌 +闃 +闋 +鵜 +憒 +嚳 +謨 +褳 +襇 +讜 +謖 +謚 +謐 +騭 +巰 +翬 +騖 +緙 +緗 +緘 +緹 +緲 +緦 +緱 +縋 +緡 +饗 +耮 +驁 +韞 +攄 +擯 +轂 +驀 +鶓 +薊 +蘺 +鎣 +頤 +櫚 +櫸 +磧 +磣 +鵪 +輳 +齟 +齙 +韙 +囁 +躂 +蹕 +躚 +躋 +噯 +鍺 +錛 +錡 +鍀 +錁 +錕 +錮 +鍁 +錈 +錠 +錙 +覦 +頷 +鮁 +鮃 +鮎 +鱸 +穌 +鮒 +鮐 +鵮 +颼 +饈 +鶉 +瘮 +闔 +闐 +闕 +灧 +瀅 +潷 +灤 +澦 +懾 +鱟 +騫 +竇 +謾 +謫 +嬡 +嬪 +縉 +縝 +縟 +轡 +騮 +縞 +縭 +縊 +縑 +騸 +覯 +韜 +靉 +攖 +薔 +藺 +鶘 +檳 +櫧 +釅 +殯 +霽 +轅 +齜 +齦 +瞜 +曖 +躊 +蟈 +鶚 +嚶 +羆 +賻 +罌 +鶻 +鍥 +鍇 +鍶 +鍔 +鍤 +鏘 +鎂 +鏤 +簀 +篋 +簞 +籙 +臏 +鮭 +鮪 +鱭 +鮫 +鱘 +饉 +鑾 +瘻 +闞 +鮝 +糝 +鷀 +瀲 +濰 +譖 +褸 +譙 +讕 +譎 +鶥 +嬙 +鶩 +驃 +縹 +縵 +縲 +纓 +驄 +繆 +繅 +耬 +瓔 +擷 +擼 +攛 +聵 +覲 +韃 +鞽 +蘄 +賾 +檣 +靨 +魘 +饜 +轆 +齬 +齪 +覷 +顒 +躓 +躑 +蠑 +螻 +顎 +嚕 +顓 +鑷 +鎘 +鎸 +鎳 +鎦 +鎰 +鎵 +鑌 +簣 +鷂 +鯁 +鱺 +鰱 +鰹 +鰣 +鯀 +鯇 +觶 +饊 +饌 +齏 +讞 +襤 +譫 +屨 +纈 +繕 +繒 +驏 +擻 +顳 +顢 +藪 +櫓 +櫞 +贋 +飆 +鏨 +轔 +蟎 +鐯 +鏢 +鏜 +鏝 +鏰 +鏞 +鏑 +鏃 +鏐 +氌 +穡 +魎 +鯪 +鯡 +鯤 +鯧 +鯝 +鯢 +鯛 +鯔 +獺 +鷓 +贇 +癭 +斕 +瀨 +顙 +繾 +繰 +繯 +蘚 +鷯 +齲 +齷 +躡 +蹣 +羈 +鐔 +鐝 +鐐 +鐓 +鑭 +鑹 +鏹 +鐙 +籪 +鷦 +鱝 +鰈 +鯷 +鰓 +鰍 +鰉 +鯿 +鷲 +懣 +鷸 +鰲 +韉 +顥 +鷺 +䴉 +髏 +鑊 +鐳 +鐲 +讎 +鰨 +鰥 +鰩 +癩 +攢 +靄 +躥 +髖 +髕 +鑔 +籟 +鰳 +鰾 +鱈 +鰻 +鱅 +讖 +驥 +纘 +瓚 +鼉 +黷 +黲 +鑣 +鑞 +臢 +鱖 +鱔 +鱒 +驤 +顰 +鱧 +癲 +灝 +鸛 +鑱 +趲 +顴 +躦 +饢 +戇 +戔 +訏 +訒 +釓 +俔 +閆 +澫 +訢 +訩 +詝 +紃 +纊 +瑒 +剗 +塸 +壢 +埨 +撝 +蔿 +榪 +軑 +軏 +咼 +㠣 +覎 +㑳 +颺 +閌 +潕 +湋 +澐 +浿 +諓 +禡 +詗 +詘 +詖 +屓 +彄 +紘 +馹 +馼 +紵 +紞 +駃 +紖 +瑲 +薴 +棡 +軝 +暐 +晛 +崬 +釴 +釤 +鍆 +鍚 +鄶 +獮 +飿 +嶨 +詷 +詪 +鄩 +鳲 +隑 +隮 +娙 +逕 +駓 +駔 +駉 +絅 +騶 +䮄 +紼 +紿 +瓅 +韍 +墶 +塏 +薘 +蕘 +蔄 +葒 +鳾 +龑 +軹 +軤 +轢 +軺 +睍 +曨 +噠 +鈃 +鈇 +鉅 +鋹 +釿 +錀 +鈧 +鈥 +鈄 +倈 +艤 +鶬 +颭 +餏 +湞 +溮 +滻 +褘 +絰 +駰 +絪 +駪 +綎 +綖 +驫 +勣 +璕 +𡑍 +䓣 +薟 +藭 +椏 +梜 +頍 +硜 +輄 +輈 +輇 +貲 +嗊 +曄 +暉 +鄳 +幬 +輋 +嶮 +贐 +鉥 +鉕 +鑪 +鉮 +鉊 +鉧 +僤 +鴒 +魛 +餗 +燖 +溳 +礐 +窵 +襏 +駼 +絺 +綌 +騂 +綄 +璡 +墠 +壼 +聹 +蘀 +勩 +罃 +檮 +棶 +厴 +䃮 +磑 +礄 +鴷 +齕 +頔 +廼 +凢 +亾 +枒 +屍 +匃 +匄 +紥 +紮 +疋 +殀 +讐 +觔 +兇 +宂 +㕥 +㠯 +栞 +佈 +佔 +呌 +敂 +冄 +坵 +僊 +怱 +悤 +冊 +夘 +戼 +牠 +妳 +嬭 +摃 +釦 +攷 +託 +衺 +衕 +弔 +喫 +囙 +㠶 +颿 +秊 +倣 +髣 +佀 +朶 +氷 +決 +併 +並 +竝 +汙 +汚 +異 +姦 +廵 +挵 +衖 +搤 +阯 +撦 +埳 +阬 +誌 +㕁 +卻 +刦 +刧 +刼 +芲 +蘤 +桿 +槓 +荳 +獃 +唫 +脗 +皁 +彿 +髴 +疘 +刪 +鉋 +鑤 +況 +牀 +恡 +棄 +洶 +汎 +災 +烖 +菑 +禩 +侷 +跼 +坿 +玅 +姉 +妬 +翫 +搨 +柺 +拕 +牴 +觝 +倖 +抝 +盃 +桮 +傑 +逩 +肎 +菓 +崐 +崑 +呪 +虖 +嘑 +謼 +詠 +㟁 +嵒 +巗 +巖 +雰 +稈 +咊 +嶽 +妷 +姪 +廹 +徃 +餚 +採 +寀 +唸 +週 +昬 +兎 +兔 +亯 +亱 +䘚 +淨 +劵 +匟 +㳒 +灋 +洩 +霑 +淚 +註 +恠 +箒 +屆 +絃 +圅 +旾 +珎 +掛 +垜 +艸 +茘 +査 +栢 +柵 +栁 +桺 +柹 +韮 +揹 +昰 +閧 +鬨 +冐 +暎 +嚥 +倃 +𠴰 +偺 +喒 +齩 +欬 +榘 +㑺 +儁 +敍 +敘 +肧 +脈 +䘑 +衇 +跡 +蹟 +砲 +礮 +薙 +鬀 +恆 +怳 +卹 +䘏 +賉 +婣 +畊 +揑 +綑 +輓 +恥 +躭 +晉 +棲 +覈 +慄 +翄 +脣 +槕 +㨪 +螡 +蟁 +㤙 +陗 +峩 +峯 +乗 +椉 +咲 +筍 +俛 +頫 +勌 +䠶 +躳 +慇 +拏 +㧱 +挐 +脃 +胷 +肐 +貍 +㽞 +畱 +淒 +悽 +蓆 +効 +傚 +涼 +缾 +菸 +煙 +淛 +湧 +誖 +猂 +醼 +讌 +㝠 +寃 +孃 +桒 +毬 +瑠 +璢 +瑯 +㨗 +搥 +搯 +蔆 +惏 +楳 +槑 +捄 +廂 +慽 +慼 +瞇 +埜 +畧 +虵 +稭 +棃 +犂 +迻 +媮 +兠 +舩 +慾 +綵 +腳 +𩓐 +夠 +豬 +貓 +湊 +減 +庻 +蔴 +菴 +朢 +睠 +觕 +麤 +釬 +銲 +痳 +殽 +婬 +滛 +湻 +㴱 +樑 +顇 +㝛 +窰 +窯 +琹 +欵 +墖 +趂 +隄 +愽 +揷 +揫 +煑 +朞 +㪚 +塟 +蔥 +蔕 +稜 +棊 +碁 +椶 +偪 +㕑 +廚 +廈 +鴈 +冣 +㝡 +晳 +鼃 +餧 +餵 +嗁 +諠 +㡌 +賸 +筴 +筞 +筩 +栰 +暠 +皜 +踰 +蝟 +㪟 +燄 +遊 +媿 +嘅 +庽 +窓 +牎 +牕 +窻 +徧 +僱 +帬 +裠 +強 +彊 +疎 +壻 +瓌 +䰟 +皷 +擕 +㩗 +㩦 +攜 +懃 +鞾 +幙 +㮣 +酧 +詶 +醻 +掽 +踫 +㼝 +盌 +磟 +覩 +倸 +㬉 +煗 +煖 +晻 +闇 +炤 +跥 +䗬 +蠭 +寘 +辠 +稺 +穉 +燬 +譭 +瘉 +癒 +顋 +骽 +猨 +蝯 +稟 +痺 +癡 +亷 +㢘 +韻 +泝 +遡 +昚 +躶 +臝 +羣 +㬪 +曡 +疊 +勦 +琍 +瓈 +𤋮 +熈 +牓 +搾 +謌 +堿 +鹻 +鹼 +矁 +燻 +髈 +𤺥 +辢 +旂 +𡚁 +潄 +砦 +詧 +嫰 +櫈 +撐 +墪 +譔 +鞵 +鞌 +蕋 +橤 +蘂 +醕 +譆 +跴 +蹤 +蜨 +蠍 +稾 +殭 +惪 +厀 +襃 +癅 +䊀 +餬 +潛 +癄 +顦 +鷰 +藷 +櫥 +螎 +蹏 +蟇 +譟 +簒 +彫 +琱 +鵰 +餹 +餻 +簷 +粦 +燐 +緐 +幑 +蹧 +粇 +穅 +臋 +籐 +繙 +飜 +孼 +蠏 +燿 +蝡 +稬 +穤 +惷 +覇 +鑵 +戹 +阨 +剳 +帀 +巵 +亙 +佇 +竚 +穽 +岅 +虯 +𦍑 +羗 +啎 +姙 +㘭 +袟 +袠 +逈 +㒺 +犛 +氂 +偘 +甕 +罋 +冺 +姍 +蝨 +琺 +瑇 +尅 +梔 +斮 +斲 +斵 +暱 +毘 +蝱 +吚 +哶 +峝 +粃 +竢 +狥 +秈 +烱 +㳄 +袵 +盇 +涖 +蒞 +碪 +蠔 +唕 +倐 +儵 +雋 +皐 +臯 +衂 +䶊 +臙 +獧 +痾 +皰 +湼 +澣 +濬 +塚 +襢 +娿 +勅 +勑 +戞 +廐 +廄 +眥 +覜 +勗 +啗 +噉 +傯 +挱 +㥫 +惥 +慂 +陻 +蕚 +萲 +蕿 +蘐 +藼 +櫂 +箠 +槨 +啑 +蹠 +蚘 +痐 +蛕 +蜖 +瘖 +遯 +醃 +飱 +冪 +簑 +枏 +柟 +檝 +楥 +矴 +椗 +嘷 +獋 +粺 +䈰 +諐 +齶 +堘 +疿 +雝 +秔 +稉 +槀 +搉 +廝 +叡 +嘠 +蜋 +筯 +篛 +麞 +糉 +緥 +璿 +髥 +臕 +餈 +剹 +橜 +罇 +蜺 +矙 +憇 +翺 +饍 +瞖 +羴 +羶 +爕 +繦 +騌 +鬉 +騣 +蔾 +䠀 +簮 +躕 +蹵 +䝔 +貛 +鼴 +麐 +塡 +あ +い +う +え +お +か +き +く +け +こ +さ +し +す +せ +そ +た +ち +つ +て +と +な +に +ぬ +ね +の +は +ひ +ふ +へ +ほ +ま +み +む +め +も +や +ゆ +よ +ら +り +る +れ +ろ +わ +を +ん +が +ぎ +ぐ +げ +ご +ざ +じ +ず +ぜ +ぞ +だ +ぢ +づ +で +ど +ば +び +ぶ +べ +ぼ +ぱ +ぴ +ぷ +ぺ +ぽ +ぁ +ぃ +ぅ +ぇ +ぉ +っ +ゃ +ゅ +ょ +ゎ +ゕ +ゖ +ア +イ +ウ +エ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +ソ +タ +チ +ツ +テ +ト +ナ +ニ +ヌ +ネ +ノ +ハ +ヒ +フ +ヘ +ホ +マ +ミ +ム +メ +モ +ヤ +ユ +ヨ +ラ +リ +ル +レ +ロ +ワ +ヲ +ン +ガ +ギ +グ +ゲ +ゴ +ザ +ジ +ズ +ゼ +ゾ +ダ +ヂ +ヅ +デ +ド +バ +ビ +ブ +ベ +ボ +パ +ピ +プ +ペ +ポ +ァ +ィ +ゥ +ェ +ォ +ッ +ャ +ュ +ョ +ヮ +ヵ +ヶ +ヷ +ヸ +ヹ +ヺ +・ +ー +ヽ +ヾ +ヿ +ア +イ +ウ +エ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +ソ +タ +チ +ツ +テ +ト +ナ +ニ +ヌ +ネ +ノ +ハ +ヒ +フ +ヘ +ホ +マ +ミ +ム +メ +モ +ヤ +ユ +ヨ +ラ +リ +ル +レ +ロ +ワ +ヲ +ン +゙ +゚ +ァ +ィ +ゥ +ェ +ォ +ッ +ャ +ュ +ョ +円 +気 +糸 +絵 +楽 +帰 +戸 +広 +黒 +図 +線 +読 +売 +歩 +毎 +亜 +悪 +圧 +扱 +囲 +為 +壱 +隠 +栄 +営 +駅 +塩 +縁 +艶 +応 +桜 +穏 +仮 +価 +箇 +ゑ +ゝ +ゞ +ヰ +ヴ +㈱ +両 +丼 +丿 +亀 +仏 +伝 +侶 +俤 +値 +倶 +倹 +偐 +偽 +働 +儛 +兌 +児 +冑 +冨 +凞 +処 +凪 +別 +剣 +剤 +剰 +劔 +労 +勧 +勲 +匁 +匂 +匲 +卍 +単 +厳 +収 +呂 +呉 +呑 +呰 +唖 +喚 +喩 +喰 +噛 +噺 +嚢 +囃 +団 +圀 +圏 +堀 +堺 +塀 +塁 +塙 +増 +墺 +壊 +壌 +壷 +変 +奨 +姫 +娯 +嫐 +嬢 +嬾 +孁 +宍 +実 +宮 +寔 +寛 +対 +専 +尭 +峠 +崋 +嶋 +巀 +巌 +巣 +巻 +帯 +幇 +庁 +廃 +廻 +弉 +弌 +弐 +弖 +弾 +従 +徳 +徴 +忯 +恵 +悩 +惣 +懐 +懽 +戦 +戯 +戻 +払 +抜 +択 +拝 +拠 +拡 +拵 +挙 +挿 +捗 +捜 +掟 +掲 +掻 +揃 +換 +揺 +摂 +撃 +撹 +斉 +斎 +旛 +旡 +晧 +晩 +暁 +暦 +曽 +杁 +杢 +杣 +杮 +枓 +枠 +枡 +柾 +栂 +栃 +桝 +桟 +桾 +梛 +梱 +梲 +梶 +椙 +検 +椥 +楕 +楡 +楢 +榊 +榎 +槇 +様 +槙 +槻 +樋 +権 +樫 +橿 +檥 +欅 +歎 +歓 +歯 +歳 +歴 +毀 +沖 +沢 +浄 +涙 +済 +渉 +渋 +渓 +渕 +満 +滝 +漑 +潅 +澁 +瀞 +瀬 +焔 +焼 +煇 +煕 +煥 +燗 +爼 +犠 +狛 +猟 +獏 +獣 +珊 +瑤 +甞 +畑 +畠 +畳 +畷 +畺 +痩 +癪 +発 +県 +眞 +砕 +碕 +礒 +禖 +禿 +稲 +穂 +穣 +竃 +竜 +竴 +笹 +筈 +筬 +筰 +箆 +箏 +箙 +篠 +篭 +簺 +籾 +粂 +粋 +粛 +粧 +糺 +紬 +絁 +経 +絖 +絣 +絽 +継 +続 +綟 +総 +縄 +縅 +縒 +縦 +繊 +繋 +繍 +繝 +繧 +纐 +纒 +罠 +罧 +罵 +羂 +羇 +羨 +聟 +聡 +聨 +聴 +脇 +脳 +膣 +膵 +臈 +臓 +臥 +舎 +舖 +舗 +舘 +芿 +苅 +茲 +荊 +荘 +莬 +莵 +菫 +萠 +蔵 +薗 +薫 +薬 +薭 +蘊 +蛍 +蝋 +蝿 +蟷 +衞 +衵 +袙 +袞 +袰 +袴 +袿 +裃 +裡 +裲 +褄 +褌 +襴 +襷 +覗 +覚 +覧 +観 +訳 +証 +諌 +諚 +諟 +諡 +諮 +譛 +譲 +讃 +豅 +豊 +豎 +賎 +賛 +贔 +躙 +躰 +転 +軽 +輌 +辥 +辺 +辻 +込 +逓 +遅 +遙 +邉 +郷 +酔 +醗 +醤 +醸 +釈 +鉄 +鉇 +鉤 +鉱 +鉾 +銈 +銕 +銭 +鋲 +鋳 +鋺 +錆 +錍 +錣 +錬 +錵 +鍑 +鍮 +鍼 +鎌 +鎗 +鎚 +鎹 +鐇 +鐚 +鐡 +鑁 +鑑 +鑚 +鑢 +閇 +関 +閦 +闘 +陥 +険 +隣 +隷 +雑 +雫 +霊 +靜 +靫 +靭 +靱 +鞄 +鞆 +頚 +頬 +頴 +頼 +顕 +顗 +餝 +饂 +駄 +駆 +駈 +騒 +験 +騨 +髄 +髙 +髪 +髷 +鯖 +鯰 +鯱 +鰒 +鰯 +鰰 +鳰 +鴎 +鴫 +鵄 +鵞 +鵺 +鶏 +鹸 +麁 +麺 +麿 +黌 +黙 +鼈 +齢 +龗 +縯 +蟅 +坖 +祂 +鼂 +鱚 +蛻 +屌 +呾 +煔 +吶 +扥 +蚖 +銂 +尃 +夋 +鵼 +徬 +寳 +彡 +舨 +湳 +麼 +鍈 +崈 +鱣 +盺 +拺 +瑥 +茷 +焻 +奀 +驎 +鱰 +砢 +痟 +廱 +僜 +瘺 +鱊 +擥 +嶰 +淓 +跅 +浵 +媗 +璦 +煠 +檊 +媃 +峅 +躄 +鉟 +塽 +蟴 +鯮 +弍 +烒 +鵵 +妑 +孋 +蚡 +恊 +輭 +廞 +產 +曅 +盜 +騤 +囪 +鱀 +茇 +葊 +逹 +狓 +崢 +趖 +凃 +羙 +鮸 +昞 +楿 +渽 +圗 +麪 +屇 +鍉 +葝 +沯 +爭 +幵 +筭 +寊 +銋 +貮 +鎭 +熺 +昜 +鍱 +墬 +愒 +磺 +嚈 +稘 +珮 +釆 +殑 +鍩 +䲁 +蕷 +鐿 +僡 +佹 +輶 +冴 +襶 +賔 +猙 +辧 +絛 +磾 +韁 +螔 +譳 +礑 +鋱 +魩 +嚗 +棆 +牆 +敟 +柶 +瓛 +魣 +巎 +轘 +襌 +枼 +鸌 +逺 +錏 +縡 +帢 +騄 +媼 +埅 +鄤 +萐 +祙 +旼 +詥 +鶲 +燉 +卲 +銱 +庲 +伱 +氽 +嵿 +挻 +煵 +窋 +鐤 +鮊 +鱬 +鰧 +嬤 +譞 +諲 +脭 +悳 +崘 +阭 +內 +袾 +冚 +壐 +咗 +礠 +孮 +痲 +埈 +肹 +鰮 +鮓 +濊 +塜 +凜 +蒢 +噰 +桼 +峍 +焴 +鶒 +鋮 +綠 +鶹 +熿 +毴 +咟 +嘥 +睺 +繡 +郎 +瘞 +鉶 +蔎 +秠 +緤 +蝀 +躝 +蟜 +繃 +囮 +墫 +乭 +胊 +濙 +瘓 +榣 +鑛 +鐫 +嶴 +甹 +坮 +銾 +蒭 +睜 +俋 +餠 +榢 +蓳 +盋 +堷 +鍏 +苝 +巛 +蚵 +暏 +熤 +嬨 +墎 +鏽 +戶 +菺 +膮 +熖 +睪 +栜 +捱 +榗 +鍷 +曧 +犽 +韑 +袓 +䖝 +焄 +喦 +髲 +疌 +㴪 +侊 +貐 +蕅 +禠 +蕑 +囯 +暊 +儞 +佋 +柎 +㐱 +鰤 +苳 +鱥 +謤 +遶 +眀 +鑀 +羋 +顏 +陜 +銩 +黶 +苼 +蒤 +棛 +儫 +咁 +抦 +衚 +棩 +焿 +脫 +麅 +玏 +埧 +淸 +黁 +淽 +彠 +鮨 +沜 +糀 +厓 +楧 +嶌 +簹 +檵 +鱇 +嶬 +廸 +卽 +樀 +贌 +酼 +籛 +沒 +晸 +諪 +蕡 +妏 +鄋 +蒍 +奧 +抇 +蓨 +薆 +鱷 +巘 +䝉 +亰 +寈 +槩 +誒 +麴 +蕟 +溎 +蘗 +榦 +斿 +暟 +炲 +拚 +娖 +繖 +橚 +寜 +爀 +饟 +悅 +鯏 +彜 +眾 +葯 +嬝 +埮 +獇 +馛 +溙 +瀦 +熼 +硓 +鈢 +樆 +輬 +鰜 +蔘 +渙 +澔 +嗮 +旉 +籜 +媊 +燘 +儚 +頹 +缽 +俽 +逨 +鱓 +郞 +歊 +杴 +珡 +杋 +醁 +鰏 +鵾 +鐽 +鮋 +巶 +荅 +薾 +囓 +蹻 +獎 +禑 +鎓 +榲 +僴 +綞 +尓 +敭 +曔 +褔 +鬅 +亊 +鏦 +蓘 +裬 +鱲 +薡 +鰗 +箑 +鬪 +縂 +璸 +甙 +茮 +辵 +岻 +覿 +滈 +鯶 +鑂 +囶 +舺 +溋 +拋 +菾 +敾 +虨 +綝 +蝍 +醂 +禨 +賹 +廧 +絕 +槗 +徫 +鎔 +曮 +蠂 +捒 +堈 +莕 +蓪 +敎 +禃 +櫱 +綧 +瀶 +逌 +浤 +碻 +刄 +逤 +剏 +氹 +菈 +娫 +蜛 +嵗 +糎 +螶 +譓 +鏳 +嵙 +瑊 +隲 +檨 +緈 +畵 +砯 +簗 +彅 +鰺 +騋 +窶 +嚒 +嵻 +尙 +頵 +槰 +虉 +醞 +巂 +彔 +偊 +畇 +鱨 +妸 +塲 +畐 +鈫 +錟 +磪 +摠 +彥 +璙 +囝 +寗 +耎 +鮡 +蘓 +弅 +焃 +飥 +戙 +塰 +儱 +槺 +噏 +魟 +禵 +佧 +咘 +盪 +瑈 +鉲 +睭 +鏌 +鼇 +郋 +魮 +朖 +滽 +渃 +滙 +熯 +醿 +鎅 +褀 +鬬 +巄 +螥 +眜 +釚 +柉 +壎 +峇 +姸 +唭 +鮜 +鈖 +嫈 +壄 +洤 +黃 +伕 +堦 +嶔 +鮰 +鞞 +漎 +鉓 +鮗 +壴 +阝 +妀 +矽 +獢 +倗 +銪 +鴓 +橒 +凈 +哖 +屚 +偍 +瑺 +媯 +淍 +驌 +椇 +赬 +薐 +糹 +碽 +濲 +釭 +晭 +纕 +寖 +閞 +歿 +呎 +鶆 +屄 +櫿 +犎 +旲 +㙟 +龎 +翜 +螾 +說 +衜 +泆 +軎 +鵂 +荎 +嚧 +硂 +桖 +褭 +筊 +鰷 +秳 +戩 +轀 +鬹 +飬 +卋 +暸 +狦 +搢 +娋 +鏴 +溫 +毉 +淰 +謩 +餺 +鵙 +鳽 +鮀 +狶 +氻 +轝 +妺 +袛 +蓭 +梂 +娛 +牼 +稅 +兿 +玾 +煚 +僩 +鶿 +鬄 +崠 +鉆 +鯓 +蚢 +庀 +鵟 +坣 +殼 +悞 +熅 +敻 +鍠 +曶 +愼 +搳 +姃 +砳 +槼 +臞 +韾 +靑 +鸊 +薲 +虛 +蠄 +啟 +鶺 +苺 +滾 +褞 +仺 +胇 +憻 +郳 +烉 +驩 +冇 +枖 +夌 +搵 +匸 +盨 +櫾 +霤 +麊 +貒 +噓 +嗢 +笩 +晈 +冂 +銳 +毿 +慜 +囧 +閜 +娸 +庢 +壆 +馯 +桱 +兗 +葃 +侅 +煐 +鐦 +藸 +鷎 +嵰 +逎 +弒 +匋 +鐭 +廔 +砩 +孆 +灴 +伷 +兪 +鴗 +澯 +幚 +旙 +勻 +礽 +婑 +鱮 +娍 +銶 +吳 +鍟 +仼 +鳧 +彞 +娽 +昛 +鰼 +剎 +佉 +鉏 +偸 +鰆 +讙 +橪 +啱 +岀 +孻 +釪 +乹 +鈳 +漇 +檦 +埻 +祿 +爌 +禇 +鱵 +㸃 +梉 +燝 +霙 +炁 +飮 +蠙 +勷 +鵎 +儥 +鐠 +唻 +廰 +嚿 +嵕 +墱 +紑 +搖 +瘜 +皝 +鸑 +瀁 +粵 +撚 +巑 +梀 +啯 +眛 +諴 +夊 +僙 +鍝 +裖 +鮣 +凬 +飡 +灊 +橓 +嫳 +筳 +咑 +粍 +瓑 +璌 +伃 +閰 +傜 +黐 +謢 +驒 +橫 +蛯 +寕 +蠵 +瞓 +旳 +翏 +硏 +寯 +韡 +楤 +鰃 +朿 +侞 +鵯 +愨 +祹 +厔 +丌 +盩 +謏 +魕 +啣 +閱 +曺 +枛 +罉 +卐 +樻 +鷉 +鯒 +鋡 +磱 +枱 +攴 +蠷 +穈 +嚟 +檽 +趐 +奐 +鋐 +檇 +薀 +峼 +咭 +訔 +韠 +鑴 +鸐 +唃 +捦 +鸜 +誴 +罳 +璄 +暃 +夀 +賨 +鞥 +鈊 +灡 +鮍 +懮 +籣 +昐 +陁 +襾 +鮠 +鈏 +囍 +婯 +艔 +貭 +䰾 +姁 +禼 +堖 +鋶 +仛 +鏷 +謜 +鑅 +忬 +蘶 +謠 +觙 +奫 +狟 +泩 +桙 +飈 +垰 +啍 +嚞 +鯕 +蒧 +榞 +徸 +璹 +揔 +欉 +魞 +菶 +玧 +鳯 +廍 +侚 +岰 +岧 +鋕 +凵 +彣 +崱 +媜 +倢 +鵐 +砋 +鷚 +鱠 +鮻 +繻 +摵 +贓 +磵 +錻 +痠 +粩 +胅 +奣 +塨 +瀠 +鸘 +啚 +娳 +霶 +壔 +峚 +甂 +廁 +覌 +鰂 +猳 +鱻 +盫 +裿 +杬 +歛 +澋 +蘞 +嵜 +尐 +旽 +鉌 +鎛 +豿 +凖 +榤 +禓 +龝 +悧 +鷟 +鮟 +吋 +喢 +岪 +吥 +漵 +頠 +豔 +巿 +鑨 +醣 +熳 +懍 +湥 +檡 +韺 +戱 +緖 +鐈 +凉 +緃 +鮹 +媐 +爯 +巆 +褍 +鐬 +昍 +扙 +鍳 +芛 +蟳 +嬅 +糬 +吔 +塭 +譿 +冧 +鏓 +嶪 +嗹 +椵 +姀 +閿 +褧 +錞 +玆 +笘 +篔 +萡 +鶡 +螐 +鮄 +鰟 +脷 +啲 +杤 +蓚 +尗 +娎 +殟 +淥 +蝚 +蓧 +彐 +嚤 +銍 +囒 +坶 +淩 +鶼 +鱂 +喼 +燫 +肏 +姵 +廌 +禟 +籝 +迵 +嵨 +堮 +蟌 +憍 +廕 +蜑 +緁 +唘 +竩 +崙 +璚 +粄 +栨 +罈 +梫 +貤 +藔 +蜯 +訁 +斖 +煶 +馦 +妠 +閟 +疕 +夆 +鎪 +膥 +澻 +嘢 +嚐 +靁 +鎻 +鰛 +穵 +烋 +縕 +褎 +疒 +壠 +溼 +圂 +咅 +鯭 +鯙 +磘 +玨 +珤 +朊 +蚼 +濶 +薞 +嚩 +丟 +嫺 +鯻 +椲 +鰕 +刂 +蠘 +踎 +瀴 +琁 +鰶 +瑴 +肜 +㐂 +欥 +媺 +竻 +讚 +𣇉 +裵 +緜 +廩 +齧 +叄 +俌 +厰 +滀 +錄 +鷫 +鯗 +攞 +姌 +蔝 +幷 +縤 +屻 +鯃 +雞 +纁 +嫲 +嵮 +屭 +嶃 +跩 +鋗 +蕢 +篊 +俬 +淎 +暻 +鏻 +憓 +玗 +溈 +笭 +糢 +勳 +閒 +沍 +咾 +鉷 +蘵 +俁 +崵 +毸 +苪 +掙 +鴡 +萭 +俴 +屜 +蒾 +艹 +剷 +慍 +朮 +枴 +氳 +猓 +甽 +箝 +譁 +贗 +迆 +鈽 +鍊 +鍰 +鏍 +靦 +餽 +丮 +丱 +仜 +仩 +伬 +伔 +仱 +伀 +伻 +佢 +佒 +侀 +侇 +佷 +佌 +佪 +侐 +侜 +俓 +侲 +俉 +侻 +侳 +俇 +倅 +倇 +倰 +倛 +倳 +倷 +俷 +倠 +偯 +偞 +偠 +偋 +偝 +偛 +偢 +偅 +偟 +偩 +偫 +傛 +傔 +傞 +傋 +傌 +傎 +傝 +偨 +傂 +傽 +傿 +僆 +傮 +僄 +僈 +傰 +僁 +傱 +僋 +僗 +僛 +僪 +僝 +僓 +僿 +儃 +儰 +僸 +僶 +僾 +儌 +僽 +儜 +儓 +儗 +儑 +儢 +儤 +儠 +儸 +儹 +儽 +冓 +冘 +冞 +凊 +凅 +凔 +刌 +刉 +刓 +刜 +刞 +刵 +刲 +剆 +刱 +剉 +剚 +剒 +剫 +剭 +剬 +剺 +剸 +剻 +剼 +劀 +劋 +劖 +劘 +劗 +劙 +劦 +勴 +匊 +匢 +匰 +匴 +匷 +匽 +卌 +卼 +厎 +厒 +厗 +厞 +厜 +厤 +厬 +厹 +吰 +吷 +吪 +呿 +咈 +呫 +呺 +呥 +呬 +呴 +茍 +咷 +咮 +咶 +哅 +咠 +咢 +唦 +唗 +唒 +哤 +唚 +唈 +哫 +唅 +唴 +啢 +唶 +啒 +啅 +唌 +唲 +喨 +喥 +喭 +噅 +喓 +喣 +啽 +喌 +嗃 +嗛 +嗋 +嗀 +喿 +喍 +嗏 +嗕 +嗈 +嘕 +嘒 +嗼 +嘐 +嘓 +嘂 +嗺 +嘝 +嘄 +嗿 +噈 +噊 +噆 +噚 +嘳 +嘽 +嘾 +噮 +噳 +噣 +噭 +噞 +嚌 +嚍 +嚃 +嚘 +嚜 +嚫 +嚪 +嚬 +嚲 +嚵 +嚽 +嚾 +囆 +囅 +囋 +囗 +圁 +圞 +圠 +坁 +坅 +坲 +坱 +垀 +坴 +垗 +垝 +垔 +垘 +垽 +垼 +埢 +埶 +堩 +堣 +塈 +堥 +塓 +塉 +塯 +塕 +塼 +墆 +塿 +塴 +墋 +塺 +墝 +墯 +壈 +墽 +壖 +壝 +壛 +壾 +壿 +夃 +夎 +夒 +夗 +奅 +奊 +奰 +奲 +奼 +妦 +妎 +妢 +妐 +妵 +姏 +姎 +㚷 +姡 +姺 +姼 +娭 +婐 +婟 +婥 +婓 +婗 +媔 +媟 +媢 +婸 +媦 +媥 +媬 +媕 +娷 +嫇 +嫋 +媰 +媻 +嫮 +嫥 +嫢 +嫛 +嫿 +嫴 +嫷 +嫶 +嬎 +嬓 +嬐 +嬲 +嬽 +孈 +屘 +孲 +孷 +宎 +宨 +寪 +寍 +寋 +寑 +寙 +寠 +寱 +尌 +尒 +尟 +尰 +尳 +屖 +屔 +屝 +屧 +屩 +屮 +屴 +岏 +岋 +岉 +岒 +岮 +岤 +岯 +岟 +岝 +峐 +峌 +峞 +峉 +峊 +峬 +峮 +峷 +崝 +崨 +崥 +崏 +崰 +崣 +崷 +嵃 +嵑 +崳 +崺 +嵂 +嵱 +嵣 +嵥 +嵞 +嶀 +嵽 +嶆 +嵺 +嵷 +嶊 +嶉 +嶈 +嵾 +嶕 +嶜 +嶡 +嶚 +嶞 +嶱 +嶩 +嶵 +嶭 +巃 +巏 +巕 +巟 +巹 +帊 +帗 +帟 +帣 +帠 +帤 +帩 +帾 +帴 +幏 +幎 +幓 +幩 +幝 +幠 +幧 +幨 +幦 +幭 +幰 +庂 +庉 +庌 +庈 +庰 +庛 +庣 +庨 +庮 +庪 +庬 +庴 +廅 +廇 +廘 +廗 +廎 +廜 +緳 +廦 +廥 +廮 +廯 +蠯 +廾 +弚 +弝 +弣 +弤 +弮 +弳 +彃 +彉 +彋 +彏 +彯 +彴 +彸 +彾 +徦 +徥 +徯 +徲 +徾 +徿 +忀 +忁 +忔 +忕 +忨 +忣 +忷 +忥 +怭 +怲 +怋 +怴 +怗 +怚 +怞 +怬 +怢 +怐 +怮 +怓 +怷 +怹 +恲 +恞 +恅 +恇 +恉 +恛 +恌 +恀 +恟 +悀 +悁 +悕 +悗 +悇 +悊 +悐 +悾 +悺 +惓 +惤 +惈 +悷 +惉 +悹 +惌 +惢 +惄 +愊 +愖 +愅 +惵 +愓 +惸 +惼 +惾 +慉 +慅 +愶 +愲 +愮 +愯 +愬 +慁 +慞 +慱 +慒 +慓 +慲 +憀 +慴 +慔 +慺 +慛 +憃 +慹 +憱 +憰 +憢 +憉 +憛 +憯 +憟 +憪 +憡 +憝 +憖 +懅 +憴 +懆 +懁 +憿 +憸 +憵 +憼 +懧 +懠 +懥 +懤 +懘 +懭 +懱 +懪 +懰 +懫 +懻 +戁 +戃 +戄 +戉 +戠 +酨 +戺 +扐 +扜 +扤 +扡 +扢 +抆 +抌 +抎 +抏 +扻 +抭 +抴 +拑 +抾 +抪 +抶 +抮 +挍 +挋 +挃 +拫 +拹 +挏 +挌 +拸 +挀 +拲 +捖 +挬 +挶 +揤 +捊 +挼 +挩 +捁 +挴 +捘 +捔 +捥 +掝 +掗 +掫 +掯 +捵 +掜 +捼 +掤 +掔 +掱 +揎 +揥 +揨 +揯 +揊 +揲 +揵 +摡 +揟 +揝 +揜 +揘 +揅 +揱 +搆 +搟 +搕 +搘 +搹 +搷 +搣 +搰 +搊 +搚 +摀 +搧 +搫 +摍 +摝 +摲 +摦 +摎 +摋 +摓 +摐 +摿 +摮 +摰 +撢 +撠 +撗 +撜 +撋 +撊 +撌 +撟 +擗 +擖 +擏 +擉 +撽 +擩 +擣 +擫 +擭 +擨 +擽 +擸 +攇 +攐 +攍 +攌 +攗 +攕 +攓 +攡 +攠 +攦 +攩 +攭 +攲 +攳 +敁 +敊 +敆 +敓 +敧 +敪 +敤 +敜 +敯 +敳 +敶 +敺 +敹 +敿 +斁 +斀 +斄 +斒 +斔 +斞 +斨 +斪 +斻 +旍 +旓 +旚 +旝 +旟 +昲 +昦 +昢 +晇 +晥 +晜 +晼 +晬 +暀 +暆 +暍 +暋 +暡 +暰 +暩 +曀 +曊 +曋 +曏 +曒 +曚 +曣 +曭 +朁 +朅 +朄 +朒 +朘 +朣 +朾 +朹 +朻 +朼 +杅 +杇 +杝 +杗 +枎 +杶 +枆 +枌 +柲 +枺 +枻 +柸 +柀 +柅 +柫 +柤 +柍 +柮 +柣 +柂 +柧 +栚 +桋 +桏 +栱 +栵 +栫 +栭 +栯 +栘 +栔 +梡 +梇 +梐 +桭 +梮 +楖 +梬 +梩 +桵 +梒 +椌 +椄 +棜 +棷 +棳 +棌 +椈 +楰 +棯 +椔 +棸 +楟 +楎 +楱 +楅 +楺 +楈 +楛 +楉 +楬 +椳 +楀 +楄 +楶 +楘 +榶 +槉 +榠 +榬 +榼 +榙 +榩 +榾 +榯 +槄 +榽 +榹 +槥 +槸 +樕 +樠 +槬 +槢 +樛 +樝 +槾 +樧 +槮 +樔 +槷 +橀 +樴 +橉 +橧 +樲 +橨 +橝 +橭 +橶 +樿 +橁 +檍 +檖 +檁 +檟 +橾 +檛 +檓 +檕 +檃 +櫅 +檹 +櫡 +櫠 +櫌 +櫑 +櫙 +櫋 +櫜 +櫐 +櫫 +櫬 +櫰 +櫹 +櫺 +櫼 +欃 +欋 +欈 +欐 +欑 +欘 +欨 +欴 +欯 +欭 +欱 +欶 +欳 +欷 +欿 +歂 +歈 +歍 +歋 +歕 +歔 +歜 +歠 +歭 +歾 +肂 +殈 +殏 +殔 +殗 +殙 +殠 +殥 +殢 +殦 +殧 +殰 +殶 +毃 +毄 +毈 +毇 +毊 +毚 +毞 +毦 +毤 +毨 +毣 +毰 +毲 +毻 +毼 +毾 +氁 +氀 +氄 +氠 +氶 +汃 +汒 +汏 +汍 +汸 +沋 +汱 +汯 +沕 +汦 +汳 +泬 +沶 +沬 +泧 +沷 +泭 +泲 +泒 +沴 +洟 +洊 +洀 +浺 +浶 +洍 +涒 +浘 +浢 +涊 +涆 +浧 +涗 +涳 +涬 +淢 +涷 +淔 +渀 +淈 +涾 +淊 +涽 +淭 +湆 +湇 +湅 +湢 +渿 +湁 +渜 +渳 +湀 +渻 +渮 +湨 +湡 +渱 +渨 +湠 +湱 +湩 +渹 +溛 +滖 +溓 +溔 +滒 +溰 +溾 +滜 +滵 +滱 +漃 +漥 +漮 +潎 +漙 +漧 +漘 +漒 +滭 +漊 +潳 +滮 +潀 +漰 +潃 +漅 +濆 +澒 +澅 +潚 +潠 +澖 +潶 +潬 +潒 +潐 +潗 +澓 +潝 +濇 +濎 +濈 +濄 +澞 +澨 +瀄 +濌 +澩 +濴 +濔 +濣 +濭 +濧 +濦 +瀇 +瀎 +濿 +瀀 +濻 +瀙 +瀖 +瀫 +瀡 +瀢 +瀩 +瀯 +瀷 +灂 +瀸 +瀿 +瀺 +灄 +灉 +灖 +灗 +灛 +灟 +灨 +灩 +灪 +炾 +炰 +烓 +烑 +缹 +焍 +烰 +焠 +焮 +焣 +煆 +煣 +煝 +熐 +熉 +熀 +熂 +熚 +燅 +燂 +熸 +燀 +燡 +爁 +爊 +爂 +爓 +爞 +爢 +爣 +牄 +牉 +牋 +牏 +牣 +牬 +牰 +牸 +牷 +犈 +犉 +犆 +犅 +犌 +犑 +犐 +犗 +犕 +犓 +犘 +犚 +犝 +犞 +犥 +犦 +犤 +犣 +犩 +犪 +犮 +犵 +犿 +狆 +狖 +狋 +狘 +狜 +狔 +狚 +狌 +狑 +狊 +狤 +狫 +狪 +狣 +猀 +狾 +猑 +猘 +猈 +狿 +猏 +猋 +猒 +猧 +猲 +猭 +猦 +猣 +猵 +猼 +獂 +獀 +獊 +獑 +獌 +獘 +獞 +獟 +獝 +獛 +獡 +獩 +獦 +獥 +獳 +獶 +獽 +獿 +玂 +玁 +玈 +玊 +玔 +珓 +珶 +琖 +瑵 +璊 +瑽 +璅 +瑿 +璗 +瓁 +瓋 +瓝 +瓟 +瓡 +瓥 +瓨 +瓬 +瓵 +瓾 +瓽 +甀 +甃 +甈 +甋 +甐 +甒 +甔 +甖 +甝 +甮 +甿 +畟 +畣 +畽 +疀 +疧 +痁 +疻 +痀 +痎 +痏 +痋 +痌 +痑 +痚 +痡 +痝 +痗 +痯 +瘏 +痷 +痸 +痻 +瘈 +瘑 +瘝 +瘣 +瘯 +瘱 +瘽 +癈 +癉 +癙 +癐 +癓 +癠 +癵 +癹 +皊 +皏 +皫 +皯 +皵 +皻 +皽 +皾 +盄 +盓 +盝 +盬 +盭 +盳 +眃 +眅 +盻 +眝 +眐 +眓 +眒 +眣 +眑 +眕 +眹 +眱 +眲 +眴 +眳 +眽 +睆 +睅 +睊 +睋 +睌 +睕 +睟 +睒 +睖 +睩 +睧 +睔 +瞁 +睼 +瞂 +睮 +睯 +瞏 +瞉 +瞚 +瞝 +瞡 +瞛 +瞲 +瞷 +瞶 +瞴 +矂 +矉 +矊 +矌 +矎 +矏 +矐 +矔 +矕 +矘 +矠 +矱 +矲 +矹 +矺 +砅 +砐 +砏 +砎 +砨 +硈 +硉 +硠 +硥 +硱 +硰 +硩 +碔 +碄 +碅 +碆 +硾 +碫 +碞 +磍 +磌 +磎 +磈 +磃 +磝 +磩 +磥 +磞 +磛 +磳 +磼 +磿 +礔 +礉 +礝 +礛 +礜 +礥 +礣 +礧 +礨 +礭 +礿 +祌 +祅 +祔 +祒 +祑 +祤 +祩 +祪 +祣 +祫 +祡 +祴 +祳 +禂 +禗 +禜 +禫 +禭 +禬 +禴 +禷 +禸 +歶 +秅 +秏 +秖 +秎 +秮 +秪 +秺 +秶 +稊 +稒 +稫 +穊 +稰 +稯 +穋 +穛 +穖 +穧 +穨 +穮 +穬 +穭 +穱 +穾 +窆 +窉 +窌 +窏 +窔 +窐 +窙 +窢 +窞 +窫 +窲 +窴 +窱 +窾 +竀 +竁 +竷 +笐 +笓 +笅 +笵 +笻 +笴 +笰 +笢 +笝 +笲 +筄 +筡 +箈 +箊 +箌 +箛 +箎 +箘 +箄 +箷 +箾 +篎 +箯 +箹 +篞 +篣 +篧 +篕 +篨 +篹 +簅 +篲 +篿 +篻 +簎 +篴 +簂 +簁 +篸 +篽 +簜 +簩 +簙 +簭 +簦 +簨 +簢 +簥 +簳 +簼 +簬 +簻 +籉 +籈 +籊 +籔 +籗 +籧 +籦 +籯 +籺 +籸 +籹 +粊 +粔 +粻 +糔 +糪 +糱 +糷 +紎 +紟 +紒 +紽 +紸 +紶 +紩 +絇 +紾 +絘 +絯 +絓 +絧 +絏 +絭 +絫 +綀 +綍 +絿 +綅 +絻 +絼 +綔 +綷 +緂 +綪 +緀 +緅 +緎 +緆 +緌 +綯 +綼 +緷 +緛 +緪 +緧 +縃 +緺 +緶 +緰 +縗 +縌 +縓 +縎 +縜 +縚 +縏 +縼 +繂 +縳 +顈 +繈 +縸 +縪 +繉 +繀 +縩 +緵 +縰 +縿 +縶 +繜 +繐 +繣 +繘 +繢 +繟 +繑 +繠 +繶 +繵 +繸 +繷 +繺 +繲 +繴 +纀 +纇 +纋 +纆 +纑 +纗 +纚 +缿 +罊 +罏 +罜 +罞 +罝 +罛 +罣 +罥 +罦 +罭 +罫 +罬 +罻 +罼 +罺 +罿 +羃 +羉 +羍 +羒 +羜 +羛 +羢 +羠 +羦 +羬 +羭 +羵 +羳 +羷 +羺 +羾 +翋 +翍 +翐 +翑 +翇 +翢 +翣 +翭 +翪 +翨 +翴 +翲 +翽 +翿 +耟 +耞 +耡 +耴 +耾 +耹 +聇 +聈 +聑 +聏 +聝 +肕 +肙 +肒 +肣 +肵 +胘 +胑 +胐 +胕 +胉 +胏 +胹 +胵 +脁 +胻 +脀 +胾 +胔 +脰 +脥 +脤 +脙 +脡 +脕 +脧 +腃 +腏 +腄 +腇 +脽 +腍 +腤 +腷 +腜 +腛 +腢 +腲 +朡 +腞 +腶 +膉 +膆 +膃 +膇 +膍 +膌 +膋 +膟 +膕 +膢 +膱 +膹 +膫 +膰 +膬 +膴 +膲 +臇 +膷 +臄 +臅 +臒 +臐 +臗 +臛 +臡 +臦 +臩 +臮 +臲 +臷 +臸 +臿 +舋 +舑 +舕 +舝 +舡 +舼 +舽 +艀 +艂 +艓 +艒 +艐 +艑 +艕 +艛 +艵 +艼 +芀 +芐 +芅 +芓 +芔 +苀 +芚 +芵 +芧 +芞 +芺 +苙 +苨 +苖 +苬 +苲 +苵 +苶 +茙 +茥 +茿 +茦 +茢 +荂 +茪 +荍 +茖 +茤 +茠 +茩 +茻 +莐 +莣 +莍 +荺 +莤 +荴 +莏 +莁 +荵 +莔 +莃 +莌 +莋 +荾 +莥 +菨 +萒 +菧 +菤 +菆 +菣 +菿 +菋 +菎 +菵 +萉 +菞 +菳 +菕 +蓱 +萿 +葹 +葥 +葀 +葧 +萰 +葍 +葽 +蔇 +葞 +萷 +萺 +萴 +葅 +菙 +葋 +萯 +葂 +葟 +葌 +蓎 +蒬 +蒮 +蒫 +蒪 +蒚 +蒝 +蓌 +蒛 +蒩 +蒘 +蒶 +蒠 +蔤 +蔏 +蔩 +蔉 +蔍 +蔧 +蔜 +蓻 +蓺 +蓴 +蔪 +蓲 +蓷 +蓫 +蔒 +蓩 +蔖 +蓾 +蔨 +蔮 +蔂 +蓶 +蔱 +蓹 +蔠 +蔰 +蕫 +蕍 +蕀 +蕆 +蕄 +蕇 +蕣 +蕛 +蕱 +蕵 +蕮 +蕧 +蕠 +蕦 +蕝 +薃 +薧 +薕 +薠 +薋 +薣 +薚 +蕼 +薉 +蕸 +薎 +薖 +薍 +薝 +薂 +藆 +藀 +藃 +藂 +薵 +薽 +藇 +藄 +藋 +藈 +藅 +薱 +薶 +藒 +藫 +藱 +藙 +藡 +藚 +藗 +藲 +藬 +藘 +藣 +藑 +藰 +蘁 +藾 +蘛 +蘉 +蘌 +蘪 +蘦 +蘟 +蘣 +蘜 +蘙 +蘮 +蘡 +蘠 +蘥 +蘴 +蘳 +蘬 +虀 +蘹 +蘱 +蘻 +蘾 +虃 +虆 +虇 +虈 +虌 +虋 +虙 +虡 +虣 +虩 +虪 +虰 +虭 +虴 +蚑 +蚞 +蚇 +蚗 +蚚 +蚅 +蚥 +蚙 +蚿 +蚷 +蛂 +蛁 +蛅 +蛈 +蚹 +蚳 +蚸 +蛌 +蚻 +蛢 +蛦 +蛓 +蛣 +蛚 +蛪 +蛝 +蛫 +蛜 +蛬 +蛗 +蜄 +蛷 +蜌 +蛖 +蛵 +蜁 +蛶 +蜳 +蝫 +蜙 +蝃 +蜬 +蝁 +蝆 +蜠 +蜲 +蜪 +蜭 +蜼 +蜵 +蝂 +蜦 +蜧 +蜸 +蜤 +蜰 +蝖 +蝷 +蟡 +蝳 +蝔 +蝛 +蝒 +蝑 +蝞 +蝭 +蝪 +蝐 +蝝 +蝬 +蝺 +蝜 +螛 +螏 +螓 +螒 +螁 +螖 +螘 +蝹 +螇 +螑 +螝 +螜 +螚 +螪 +螰 +螹 +螼 +螮 +蟉 +蟃 +蟂 +螷 +螴 +螿 +螸 +蟞 +蟧 +蟦 +蟢 +蟟 +蟤 +蟔 +蟓 +蟭 +蟘 +螤 +蟗 +蟙 +蠁 +蟨 +蠀 +蟺 +蠉 +蠌 +蟼 +蠈 +蟿 +蠗 +蠩 +蠝 +蠛 +蠠 +蠤 +蠜 +蠫 +蠬 +蠨 +蠦 +蠪 +蠥 +蠰 +蠮 +蠳 +蠸 +蠾 +蠽 +蠿 +衁 +衈 +衋 +衧 +衪 +衭 +衶 +袀 +衱 +衯 +袃 +袉 +袕 +袨 +袚 +袑 +袡 +袘 +袧 +袬 +袌 +袺 +裗 +袹 +袸 +裀 +袶 +袽 +袲 +裋 +裍 +裞 +裚 +裷 +裧 +裺 +裮 +裶 +裯 +裻 +褁 +褅 +褋 +褗 +褆 +褖 +褑 +褦 +褮 +褱 +褢 +褩 +褵 +褼 +褾 +襒 +褷 +襂 +褽 +襓 +襋 +襆 +襐 +襛 +襗 +襡 +襘 +襝 +襣 +襭 +襩 +襮 +襳 +襹 +襺 +覂 +覅 +覕 +覛 +覝 +覢 +覤 +覣 +覭 +覮 +覶 +觓 +觤 +觡 +觠 +觢 +觩 +觰 +觬 +觲 +觷 +觺 +觻 +觼 +觾 +訑 +訰 +訧 +訬 +訞 +詍 +訹 +詙 +詀 +詄 +詅 +訿 +誂 +詻 +誃 +誫 +誙 +誋 +諆 +誸 +諔 +諕 +誻 +諀 +諅 +諵 +諝 +諰 +諈 +謞 +謘 +謑 +謋 +謒 +謕 +謍 +謈 +謪 +謧 +謣 +謰 +謵 +譇 +謯 +謱 +謥 +謷 +謦 +譐 +譈 +譊 +譀 +譋 +譕 +譑 +譠 +譪 +譝 +譨 +譣 +譥 +譹 +譸 +譅 +譺 +譻 +譾 +讄 +讂 +讆 +讋 +讔 +讘 +讟 +谹 +谻 +谽 +谾 +豃 +豋 +豍 +豏 +豗 +豜 +豝 +豟 +豥 +豤 +豦 +豭 +豰 +豲 +豱 +豯 +豵 +豷 +豶 +豻 +豽 +貁 +貀 +貄 +貏 +貑 +貕 +貙 +貗 +貜 +貣 +貾 +賌 +賥 +賟 +賙 +賵 +賮 +贆 +贕 +贙 +赨 +赩 +赮 +赸 +趀 +趌 +趎 +趏 +趍 +趓 +趠 +趜 +趡 +趥 +趧 +趬 +趪 +趭 +趫 +趮 +趷 +趹 +跘 +跓 +跍 +跇 +跜 +跕 +跙 +跈 +跰 +跠 +跮 +跦 +跢 +跧 +跲 +跫 +踂 +跿 +踍 +踃 +踇 +踆 +跾 +踠 +踥 +踤 +踡 +踕 +踛 +踖 +踑 +踙 +踧 +踘 +踓 +踳 +踾 +踸 +踼 +蹎 +蹍 +蹓 +蹗 +蹖 +蹞 +蹥 +蹛 +蹡 +蹝 +蹔 +蹸 +蹳 +蹪 +躆 +躈 +躖 +躗 +躟 +躠 +躤 +躣 +躩 +躨 +躽 +軓 +軘 +軞 +軯 +軷 +軦 +軮 +軥 +軵 +軧 +軨 +軶 +軱 +軬 +輆 +軿 +輁 +輀 +輂 +輐 +輑 +輤 +輘 +輚 +輠 +輣 +輖 +輗 +輮 +輵 +輲 +輹 +輷 +輴 +轃 +轇 +轈 +轒 +轑 +轏 +轐 +轓 +轙 +轖 +轗 +轕 +轚 +轞 +轛 +轠 +辴 +迉 +迒 +迋 +迍 +迖 +迣 +迡 +迾 +迿 +逜 +逿 +遝 +遳 +遰 +遻 +邆 +邅 +遾 +邍 +邔 +邟 +邥 +邞 +邧 +郱 +郕 +郖 +郠 +郙 +郣 +郥 +郘 +郰 +郲 +郔 +鄬 +郼 +鄈 +郹 +郻 +鄁 +鄇 +郺 +鄐 +鄍 +鄏 +鄎 +鄟 +鄝 +鄡 +鄛 +鄨 +鄪 +鄦 +鄮 +鄵 +鄸 +鄻 +鄾 +酀 +酁 +酄 +酇 +酖 +酘 +酓 +酟 +酳 +醆 +醊 +醓 +醙 +醟 +醥 +醧 +醰 +醱 +醷 +醲 +醳 +醹 +醽 +釂 +釃 +釢 +釱 +釳 +釸 +鈚 +鈌 +鈒 +釽 +鈆 +鉒 +鉠 +鉯 +鈶 +鉼 +銤 +銛 +銔 +鉹 +銗 +鋄 +鋀 +鋟 +鋘 +鋩 +鋝 +鋂 +鋊 +錧 +錼 +錭 +錎 +鋋 +鎡 +鎃 +鎯 +鍖 +鍜 +鍐 +鍭 +鍌 +鎒 +鎷 +鎝 +鎉 +鎎 +鎞 +鏏 +鏂 +鏚 +鏬 +鏙 +鐋 +鐏 +鏾 +鐕 +鐨 +鐍 +鐀 +鐎 +鐖 +鐻 +鐶 +鑐 +鑋 +鑕 +鑮 +鑯 +钂 +钀 +钁 +钃 +镺 +镻 +镼 +镽 +閈 +閍 +閺 +閵 +闀 +闉 +闅 +閷 +闒 +闑 +闚 +闛 +闠 +闟 +闤 +阞 +阢 +阤 +阠 +阰 +阹 +阸 +阺 +陏 +陓 +陊 +陼 +陭 +陫 +隇 +陾 +隉 +隒 +隓 +隞 +隤 +隿 +雂 +雈 +雓 +雔 +雗 +雚 +雟 +雘 +雺 +雽 +雿 +霂 +霋 +霒 +霐 +霠 +霣 +霢 +霩 +霫 +霬 +霮 +霵 +霿 +靆 +靃 +靪 +靮 +靷 +靲 +靾 +鞃 +鞀 +鞂 +靻 +鞊 +鞎 +鞈 +鞙 +鞗 +鞚 +鞜 +鞤 +鞪 +鞷 +鞶 +鞹 +鞻 +鞿 +韄 +韅 +韇 +韎 +韐 +韏 +韕 +韔 +韗 +韝 +韟 +韣 +韥 +韰 +韱 +韹 +韽 +頄 +頖 +頞 +頝 +頩 +頨 +頯 +頲 +顁 +顄 +顊 +顉 +顅 +顐 +顑 +顜 +顝 +顠 +顣 +顟 +顤 +顪 +顩 +顲 +颬 +颲 +颸 +颽 +颻 +颾 +飁 +飂 +飉 +飋 +飌 +飣 +飶 +餂 +餀 +飺 +餔 +餖 +餕 +餤 +餟 +餥 +餫 +餪 +餲 +餯 +餭 +餱 +餰 +饁 +饇 +饐 +饎 +饙 +饘 +饛 +饡 +馣 +馲 +馰 +馵 +馻 +馺 +駂 +馽 +駜 +駍 +駏 +駎 +駖 +駮 +駬 +駥 +駤 +駣 +駩 +駺 +駴 +駷 +駹 +駶 +駻 +駽 +駾 +騃 +騉 +騑 +騊 +騇 +騚 +騕 +騥 +騝 +騛 +騢 +騠 +騧 +騞 +騜 +騵 +騲 +騴 +騱 +騬 +騪 +騩 +騹 +騽 +驆 +騺 +驓 +驔 +驈 +驉 +驖 +驞 +驠 +驦 +驨 +骭 +骫 +骹 +骿 +骴 +骾 +髇 +髊 +髆 +髍 +髐 +髟 +髧 +髬 +髳 +髶 +髺 +髾 +鬁 +髼 +鬋 +鬊 +鬎 +鬌 +鬐 +鬕 +鬗 +鬖 +鬙 +鬞 +鬠 +鬤 +鬫 +鬳 +鬵 +鬺 +鬾 +鬿 +魊 +魌 +魖 +魠 +魡 +魧 +魱 +魦 +魶 +魵 +鮅 +鮇 +魼 +魾 +魻 +鮂 +鮚 +鮞 +鮛 +鮦 +鮥 +鮤 +鮆 +鯆 +鮿 +鮵 +鯈 +鯫 +鯠 +鯞 +鯦 +鯬 +鰌 +鰋 +鰅 +鯸 +鰫 +鰝 +鰬 +鱆 +鰿 +鱄 +鱁 +鰴 +鱐 +鱍 +鱋 +鱕 +鱦 +鱢 +鱞 +鱴 +鱳 +鱹 +鳦 +鳪 +鳭 +鳱 +鳵 +鳼 +鳺 +鳿 +鳷 +鴀 +鳹 +鳻 +鴅 +鴃 +鴥 +鴠 +鴔 +鴩 +鴘 +鴢 +鴐 +鴳 +鵁 +鵧 +鴶 +鴮 +鴱 +鴸 +鵅 +鵃 +鴾 +鵀 +鴽 +鵏 +鵊 +鵛 +鵋 +鵖 +鵌 +鵗 +鵔 +鵷 +鶁 +鶊 +鶄 +鶈 +鵱 +鶀 +鵸 +鶋 +鶌 +鵽 +鵫 +鵴 +鵩 +鶅 +鵳 +鵻 +鶂 +鵹 +鶟 +鶙 +鶤 +鶝 +鶐 +鶛 +鶠 +鶔 +鶜 +鶪 +鶗 +鶢 +鶨 +鶞 +鶣 +鶖 +鶷 +鶶 +鷁 +鷇 +鷊 +鷏 +鶾 +鷅 +鷃 +鶵 +鷈 +鶱 +鶭 +鷛 +鷒 +鷞 +鷋 +鷐 +鷜 +鷑 +鷩 +鷘 +鷖 +鷵 +鷕 +鷻 +鷷 +鷣 +鷤 +鷶 +鷡 +鷮 +鷢 +鸂 +鷾 +鸇 +鸃 +鸆 +鸅 +鸀 +鸁 +鸉 +鷿 +鷽 +鸄 +鸋 +鸍 +鸏 +鸒 +鸔 +鸓 +鸗 +鸙 +鹺 +麃 +麆 +麉 +麎 +麌 +麔 +麙 +麛 +麚 +麜 +麠 +麡 +麧 +麮 +麰 +麶 +麷 +黀 +黂 +黈 +黓 +黕 +黖 +黚 +黤 +黫 +黮 +黭 +黰 +黳 +黵 +黺 +鼁 +鼀 +鼆 +鼊 +鼏 +鼖 +鼛 +鼘 +鼜 +鼤 +鼣 +鼥 +鼪 +鼨 +鼭 +鼰 +鼮 +鼵 +鼳 +鼲 +鼸 +鼶 +齀 +齂 +齃 +齌 +齍 +齎 +齖 +齗 +齘 +齛 +齠 +齞 +齝 +齥 +齤 +齫 +齱 +齰 +齮 +齯 +齴 +齵 +齸 +齻 +齺 +齹 +齾 +龒 +龤 +堔 +礂 +蒏 +蒆 +兙 +兛 +兞 +兝 +兡 +兣 +嗧 +瓩 +忼 +擡 +氊 +穇 +擧 +譌 +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +] +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +© +° +² +´ +½ +Á +Ä +Å +Ç +È +É +Í +Ó +Ö +× +Ü +ß +à +á +â +ã +ä +å +æ +ç +è +é +ê +ë +í +ð +ñ +ò +ó +ô +õ +ö +ø +ú +û +ü +ý +ā +ă +ą +ć +Č +č +đ +ē +ė +ę +ğ +ī +ı +Ł +ł +ń +ň +ō +ř +Ş +ş +Š +š +ţ +ū +ż +Ž +ž +Ș +ș +ț +Δ +α +λ +μ +φ +Г +О +а +в +л +о +р +с +т +я +ồ +— +― +’ +“ +” +… +℃ +→ +∇ +− +■ +☆ +、 +。 +々 +〆 +〈 +〉 +「 +」 +『 +』 +〔 +〕 +〜 +! +# +% +& +( +) ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; += +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +R +S +T +U +V +W +X +Z +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +~ +・ +ǎ +ǒ +ě +ǐ +ì +ǔ +ù +ǖ +ǘ +ǚ +ǜ +【 +】 +《 +》 +‥ +{ +} +\ +| +@ +^ +~ +÷ +∕ +∙ +⋅ +· +⊕ +⊖ +⊗ +⊘ +⊙ +± +∓ +∩ +∪ +□ +⊎ +⊓ +⊔ +≠ +≈ +≡ +≤ +≥ +≪ +≫ +≲ +≳ +≶ +≷ +≺ +≻ +≼ +≽ +∈ +∉ +⊂ +⊃ +⊆ +⊇ +⊄ +⊅ +∅ +∖ +∁ +∆ +∧ +∨ +¬ +⊻ +⊼ +⊽ +← +↔ +⇒ +⇐ +⇔ +∀ +∃ +∄ +∴ +∵ +∝ +∞ +⊥ +∟ +∠ +∡ +∢ +′ +″ +∥ +⊾ +⊿ +∂ +∫ +∬ +∭ +∮ +∯ +∰ +∑ +∏ +√ +∛ +∜ +∱ +∲ +∳ +∶ +∷ +∼ +® +≄ +≅ +≃ +≦ +≧ +⊈ +⊉ +⊢ +⊤ +⊨ +⊧ +℉ +Ω +℧ +Å +⌀ +ℏ +⅀ +⍺ +⍵ +¢ +€ +£ +¥ +¥ +₿ +↑ +↓ +↕ +↖ +↗ +↘ +↙ +↺ +↻ +↼ +↽ +↾ +↿ +⇀ +⇁ +⇂ +⇃ +⇋ +⇌ +ª +º +⁰ +¹ +³ +⁴ +⁵ +⁶ +⁷ +⁸ +⁹ +⁺ +⁻ +⁼ +⁽ +⁾ +ⁿ +₀ +₁ +₂ +₃ +₄ +₅ +₆ +₇ +₈ +₉ +₊ +₋ +₌ +₍ +₎ +Ⅰ +Ⅱ +Ⅲ +Ⅳ +Ⅴ +Ⅵ +Ⅶ +Ⅷ +Ⅸ +Ⅹ +Ⅺ +Ⅻ +ⅰ +ⅱ +ⅲ +ⅳ +ⅴ +ⅵ +ⅶ +ⅷ +ⅸ +ⅹ +ⅺ +ⅻ +☰ +☱ +☲ +☳ +☴ +☵ +☶ +☷ +♀ +♂ +♳ +♴ +♵ +♶ +♷ +♸ +♹ +♺ +♩ +♪ +♫ +♬ +⚪ +⚫ +⚬ +✶ +✷ +✸ +➀ +➁ +➂ +➃ +➄ +➅ +➆ +➇ +➈ +➉ +➊ +➋ +➌ +➍ +➎ +➏ +➐ +➑ +➒ +➓ +⏀ +⏁ +⏂ +⏃ +⏄ +⏅ +⏆ +⏇ +⏈ +⏉ +⏊ +⏋ +⏌ +⏚ +⏴ +⏵ +⏶ +⏷ +⏸ +⏹ +⏺ +⏻ +⏼ +Α +Β +Γ +Ε +Ζ +Η +Θ +Ι +Κ +Λ +Μ +Ν +Ξ +Ο +Π +Ρ +Σ +Τ +Υ +Φ +Χ +Ψ +β +γ +δ +ε +ζ +η +θ +ι +κ +ν +ξ +ο +π +ρ +σ +τ +υ +χ +ψ +ω +ϐ +ϑ +ϒ +ϕ +█ +ϖ +ϰ +ϱ +ϴ +ϵ +ϝ +Ϟ +ϟ +Ϡ +ϡ +Ϣ +ϣ +Ϥ +ϥ +Ϧ +ϧ +Ϩ +ϩ +Ϫ +ϫ +Ϭ +ϭ +Ϯ +ϯ +∸ +∹ +∺ +∻ +∽ +∾ +∿ +≀ +≁ +≂ +≆ +≇ +≉ +≊ +≋ +≌ +≍ +≎ +≏ +≐ +≑ +≒ +≓ +≔ +≕ +≖ +≗ +≘ +≙ +≚ +≛ +≜ +≝ +≞ +≟ +≢ +≣ +≨ +≩ +≬ +≭ +≮ +≯ +≰ +≱ +≴ +≵ +≸ +≹ +≾ +≿ +⊀ +⊁ +⊊ +⊋ +⊌ +⊍ +⊏ +⊐ +⊑ +⊒ +⊚ +⊛ +⊜ +⊝ +⊞ +⊟ +⊠ +⊡ +⊣ +⊦ +⊩ +⊪ +⊫ +⊬ +⊭ +⊮ +⊯ +⊰ +⊱ +⊲ +⊳ +⊴ +⊵ +⊶ +⊷ +⊸ +⊹ +⊺ +ℎ +℘ +ℜ +ℑ +ℵ +ℶ +ℷ +ℸ +⌬ +⌭ +⌮ +⌯ +⎔ +¤ +₠ +₡ +₢ +₣ +₤ +₥ +₦ +₧ +₨ +₩ +₪ +₫ +₭ +₮ +₯ +₰ +₱ +₲ +₳ +₴ +₵ +₶ +₷ +₸ +₹ +₺ +₻ +₼ +₽ +₾ +↚ +↛ +↜ +↝ +↞ +↟ +↠ +↡ +↢ +↣ +↤ +↥ +↦ +↧ +↨ +↩ +↪ +↫ +↬ +↭ +↮ +↯ +↰ +↱ +↲ +↳ +↴ +↵ +↶ +↷ +↸ +↹ +⇄ +⇅ +⇆ +⇇ +⇈ +⇉ +⇊ +⇍ +⇎ +⇏ +⇑ +⇓ +⇕ +⇖ +⇗ +⇘ +⇙ +⇚ +⇛ +⇜ +⇝ +⇞ +⇟ +⇠ +⇡ +⇢ +⇣ +⇤ +⇥ +⇦ +⇧ +⇨ +⇩ +⇪ +⇫ +⇬ +⇭ +⇮ +⇯ +⇰ +⇱ +⇲ +⇳ +⇴ +⇵ +⇶ +⇷ +⇸ +⇹ +⇺ +⇻ +⇼ +⇽ +⇾ +⇿ +ↀ +ↁ +ↂ +☀ +☁ +☂ +☃ +☄ +★ +☇ +☈ +☉ +☊ +☋ +☌ +☍ +☎ +☏ +☐ +☑ +☒ +☓ +☔ +☕ +☖ +☗ +☘ +☙ +☚ +☛ +☜ +☝ +☞ +☟ +☠ +☡ +☢ +☣ +☤ +☥ +☦ +☧ +☨ +☩ +☪ +☫ +☬ +☭ +☮ +☯ +☸ +☹ +☺ +☻ +☼ +☽ +☾ +☿ +♁ +♃ +♄ +♅ +♆ +♇ +♔ +♕ +♖ +♗ +♘ +♙ +♚ +♛ +♜ +♝ +♞ +♟ +♠ +♡ +♢ +♣ +♤ +♥ +♦ +♧ +♨ +♭ +♮ +♯ +♰ +♱ +♲ +♻ +♼ +♽ +♾ +⚀ +⚁ +⚂ +⚃ +⚄ +⚅ +⚆ +⚇ +⚈ +⚉ +⚊ +⚋ +⚌ +⚍ +⚎ +⚏ +⚐ +⚑ +⚒ +⚓ +⚔ +⚕ +⚖ +⚗ +⚘ +⚙ +⚚ +⚛ +⚜ +⚝ +⚞ +⚟ +⚠ +⚡ +⚢ +⚣ +⚤ +⚥ +⚦ +⚧ +⚨ +⚩ +⚭ +⚮ +⚯ +⚰ +⚱ +⚲ +⚳ +⚴ +⚵ +⚶ +⚷ +⚸ +⚹ +⚺ +⚻ +⚼ +⚿ +⛀ +⛁ +⛂ +⛃ +⛆ +⛇ +⛈ +⛉ +⛊ +⛋ +⛌ +⛍ +⛏ +⛐ +⛑ +⛒ +⛓ +⛕ +⛖ +⛗ +⛘ +⛙ +⛚ +⛛ +⛜ +⛝ +⛞ +⛠ +⛡ +⛢ +⛣ +⛤ +⛥ +⛦ +⛧ +⛨ +⛩ +⛪ +⛫ +⛬ +⛭ +⛮ +⛯ +⛶ +⛾ +⛿ +✆ +✇ +✈ +✉ +✌ +✍ +✎ +✏ +✐ +✑ +✒ +✓ +✔ +✕ +✙ +✚ +✛ +✜ +✝ +✞ +✟ +✠ +✡ +✢ +✣ +✤ +✥ +✦ +✧ +✩ +✪ +✫ +✬ +✭ +✮ +✯ +✰ +✱ +✲ +✳ +✴ +✵ +✹ +✺ +✻ +✼ +✽ +✾ +✿ +❀ +❁ +❂ +❃ +❄ +❅ +❆ +❇ +❈ +❉ +❊ +❋ +❍ +❏ +❐ +❑ +❒ +❖ +❘ +❙ +❚ +❛ +❜ +❝ +❞ +❡ +❢ +❣ +❤ +❥ +❦ +❧ +❨ +❩ +❪ +❫ +❬ +❭ +❮ +❯ +❰ +❱ +❲ +❳ +❴ +❵ +❶ +❷ +❸ +❹ +❺ +❻ +❼ +❽ +❾ +❿ +① +② +③ +④ +⑤ +⑥ +⑦ +⑧ +⑨ +⑩ +➔ +➕ +➖ +➗ +➘ +➙ +➚ +➛ +➜ +➝ +➞ +➟ +➠ +➡ +➢ +➣ +➤ +➥ +➦ +➧ +➨ +➩ +➪ +➫ +➬ +➭ +➮ +➯ +➰ +➱ +➲ +➳ +➴ +➵ +➶ +➷ +➸ +➹ +➺ +➻ +➼ +➽ +➾ +➿ +⌘ +⌥ +⌃ +⎋ +⌫ +⌦ +⏏ +⌤ +⌧ +⌨ +⎆ +⎇ +⎈ +⎉ +⎊ +⎌ +⎍ +⎎ +⎏ +⎐ +⎑ +⎒ +⎓ +⎕ +⎖ +⎗ +⎘ +⎙ +⎚ +⎛ +⎜ +⎝ +⎞ +⎟ +⎠ +⎡ +⎢ +⎣ +⎤ +⎥ +⎦ +⎧ +⎨ +⎩ +⎪ +⎫ +⎬ +⎭ +⎮ +⎯ +⎰ +⎱ +⎲ +⎳ +⎴ +⎵ +⎶ +⎷ +⎸ +⎹ +⎺ +⎻ +⎼ +⎽ +⎾ +⎿ +⏍ +⏎ +⏐ +⏑ +⏒ +⏓ +⏔ +⏕ +⏖ +⏗ +⏘ +⏙ +⏛ +⏜ +⏝ +⏞ +⏟ +⏠ +⏡ +⏢ +⏣ +⏤ +⏥ +⏦ +⏧ +⏨ +⏭ +⏮ +⏯ +⏱ +⏲ +▲ +▽ +◐ +⏽ +⏾ +⏿ +ɐ +ɑ +ɒ +ɓ +ɔ +ɕ +ɖ +ɗ +ɘ +ə +ɚ +ɛ +ɜ +ɝ +ɞ +ɟ +ɠ +ɡ +ɢ +ɣ +ɤ +ɥ +ɦ +ɧ +ɨ +ɩ +ɪ +ɫ +ɬ +ɭ +ɮ +ɯ +ɰ +ɱ +ɲ +ɳ +ɴ +ɵ +ɶ +ɷ +ɸ +ɹ +ɺ +ɻ +ɼ +ɽ +ɾ +ɿ +ʀ +ʁ +ʂ +ʃ +ʄ +ʅ +ʆ +ʇ +ʈ +ʉ +ʊ +ʋ +ʌ +ʍ +ʎ +ʏ +ʐ +ʑ +ʒ +ʓ +ʔ +ʕ +ʖ +ʗ +ʘ +ʙ +ʚ +ʛ +ʜ +ʝ +ʞ +ʟ +ʠ +ʡ +ʢ +ʣ +ʤ +ʥ +ʦ +ʧ +ʨ +ʩ +ʪ +ʫ +ʬ +ʭ +ʮ +ʯ +━ +Ǝ +à +● +▶ +| +𝑢 +〖 +〗 +︽ +– +﹥ +𝜓 +• +∋ +ƒ +० +✘ +Е +◉ +〒 +𝒱 +𝜆 +⟹ +﹪ +◊ +╆ +오 +˂ +〉 +𝝎 +▪ +△ +▁ +◼ +〇 +▷ +▬ +𝒮 +† +ₒ +⼁ +〵 +⭐ +╳ +⟶ +으 +⬆ +Ạ +◀ + +▫ +丄 +︾ +◥ +‖ +𝜌 +ⅼ +▼ +⁎ +﹏ +😁 +😂 +😃 +😄 +😅 +😆 +😉 +😊 +😋 +😌 +😍 +😏 +😒 +😓 +😔 +😖 +😘 +😚 +😜 +😝 +😞 +😠 +😡 +😢 +😣 +😤 +😥 +😨 +😩 +😪 +😫 +😭 +😰 +😱 +😲 +😳 +😵 +😷 +😸 +😹 +😺 +😻 +😼 +😽 +😾 +😿 +🙀 +🙅 +🙆 +🙇 +🙈 +🙉 +🙊 +🙋 +🙌 +🙍 +🙎 +🙏 +✂ +✅ +✊ +✋ +✖ +✨ +❌ +❎ +❓ +❔ +❕ +❗ +🚀 +🚃 +🚄 +🚅 +🚇 +🚉 +🚌 +🚏 +🚑 +🚒 +🚓 +🚕 +🚗 +🚙 +🚚 +🚢 +🚤 +🚥 +🚧 +🚨 +🚩 +🚪 +🚫 +🚬 +🚭 +🚲 +🚶 +🚹 +🚺 +🚻 +🚼 +🚽 +🚾 +🛀 +Ⓜ +🅰 +🅱 +🅾 +🅿 +🆎 +🆑 +🆒 +🆓 +🆔 +🆕 +🆖 +🆗 +🆘 +🆙 +🆚 +🇩🇪 +🇬🇧 +🇨🇳 +🇯🇵 +🇫🇷 +🇰🇷 +🇪🇸 +🇮🇹 +🇷🇺 +🇺🇸 +🈁 +ℹ +⌚ +⌛ +⏩ +⏪ +⏫ +⏬ +⏰ +⏳ +◻ +◽ +◾ +♈ +♉ +♊ +♋ +♌ +♍ +♎ +♏ +♐ +♑ +♒ +♓ +♿ +⚽ +⚾ +⛄ +⛅ +⛎ +⛔ +⛲ +⛳ +⛵ +⛺ +⛽ +⤴ +⤵ +⬅ +⬇ +⬛ +⬜ +⭕ +〰 +〽 +㊗ +㊙ +🀄 +🃏 +🌀 +🌁 +🌂 +🌃 +🌄 +🌅 +🌆 +🌇 +🌈 +🌉 +🌊 +🌋 +🌌 +🌏 +🌑 +🌓 +🌔 +🌕 +🌙 +🌛 +🌟 +🌠 +🌰 +🌱 +🌴 +🌵 +🌷 +🌸 +🌹 +🌺 +🌻 +🌼 +🌽 +🌾 +🌿 +🍀 +🍁 +🍂 +🍃 +🍄 +🍅 +🍆 +🍇 +🍈 +🍉 +🍊 +🍌 +🍍 +🍎 +🍏 +🍑 +🍒 +🍓 +🍔 +🍕 +🍖 +🍗 +🍘 +🍙 +🍚 +🍛 +🍜 +🍝 +🍞 +🍟 +🍠 +🍡 +🍢 +🍣 +🍤 +🍥 +🍦 +🍧 +🍨 +🍩 +🍪 +🍫 +🍬 +🍭 +🍮 +🍯 +🍰 +🍱 +🍲 +🍳 +🍴 +🍵 +🍶 +🍷 +🍸 +🍹 +🍺 +🍻 +🎀 +🎁 +🎂 +🎃 +🎄 +🎅 +🎆 +🎇 +🎈 +🎉 +🎊 +🎋 +🎌 +🎍 +🎎 +🎏 +🎐 +🎑 +🎒 +🎓 +🎠 +🎡 +🎢 +🎣 +🎤 +🎥 +🎦 +🎧 +🎨 +🎩 +🎪 +🎫 +🎬 +🎭 +🎮 +🎯 +🎰 +🎱 +🎲 +🎳 +🎴 +🎵 +🎶 +🎷 +🎸 +🎹 +🎺 +🎻 +🎼 +🎽 +🎾 +🎿 +🏀 +🏁 +🏂 +🏃 +🏄 +🏆 +🏈 +🏊 +🏠 +🏡 +🏢 +🏣 +🏥 +🏦 +🏧 +🏨 +🏩 +🏪 +🏫 +🏬 +🏭 +🏮 +🏯 +🏰 +🐌 +🐍 +🐎 +🐑 +🐒 +🐔 +🐗 +🐘 +🐙 +🐚 +🐛 +🐜 +🐝 +🐞 +🐟 +🐠 +🐡 +🐢 +🐣 +🐤 +🐥 +🐦 +🐧 +🐨 +🐩 +🐫 +🐬 +🐭 +🐮 +🐯 +🐰 +🐱 +🐲 +🐳 +🐴 +🐵 +🐶 +🐷 +🐸 +🐹 +🐺 +🐻 +🐼 +🐽 +🐾 +👀 +👂 +👃 +👄 +👅 +👆 +👇 +👈 +👉 +👊 +👋 +👌 +👍 +👎 +👏 +👐 +👑 +👒 +👓 +👔 +👕 +👖 +👗 +👘 +👙 +👚 +👛 +👜 +👝 +👞 +👟 +👠 +👡 +👢 +👣 +👤 +👦 +👧 +👨 +👩 +👪 +👫 +👮 +👯 +👰 +👱 +👲 +👳 +👴 +👵 +👶 +👷 +👸 +👹 +👺 +👻 +👼 +👽 +👾 +👿 +💀 +💁 +💂 +💃 +💄 +💅 +💆 +💇 +💈 +💉 +💊 +💋 +💌 +💍 +💎 +💏 +💐 +💑 +💒 +💓 +💔 +💕 +💖 +💗 +💘 +💙 +💚 +💛 +💜 +💝 +💞 +💟 +💠 +💡 +💢 +💣 +💤 +💥 +💦 +💧 +💨 +💩 +💪 +💫 +💬 +💮 +💯 +💰 +💲 +💳 +💴 +💵 +💸 +💹 +💺 +💻 +💼 +💽 +💾 +💿 +📀 +📁 +📂 +📃 +📄 +📅 +📆 +📇 +📈 +📉 +📊 +📋 +📌 +📍 +📎 +📏 +📐 +📑 +📒 +📓 +📔 +📕 +📖 +📗 +📘 +📙 +📚 +📛 +📜 +📝 +📞 +📟 +📠 +📡 +📢 +📣 +📤 +📥 +📦 +📧 +📨 +📩 +📪 +📫 +📮 +📰 +📱 +📲 +📳 +📴 +📶 +📷 +📹 +📺 +📻 +📼 +🔃 +🔊 +🔋 +🔌 +🔍 +🔎 +🔏 +🔐 +🔑 +🔒 +🔓 +🔔 +🔖 +🔗 +🔘 +🔙 +🔚 +🔛 +🔜 +🔝 +🔞 +🔟 +🔠 +🔡 +🔢 +🔣 +🔤 +🔥 +🔦 +🔧 +🔨 +🔩 +🔪 +🔫 +🔮 +🔯 +🔰 +🔱 +🔲 +🔳 +🔴 +🔵 +🔶 +🔷 +🔸 +🔹 +🔺 +🔻 +🔼 +🔽 +🕐 +🕑 +🕒 +🕓 +🕔 +🕕 +🕖 +🕗 +🕘 +🕙 +🕚 +🕛 +🗻 +🗼 +🗽 +🗾 +🗿 +😀 +😇 +😈 +😎 +😐 +😑 +😕 +😗 +😙 +😛 +😟 +😦 +😧 +😬 +😮 +😯 +😴 +😶 +🚁 +🚂 +🚆 +🚈 +🚊 +🚍 +🚎 +🚐 +🚔 +🚖 +🚘 +🚛 +🚜 +🚝 +🚞 +🚟 +🚠 +🚡 +🚣 +🚦 +🚮 +🚯 +🚰 +🚱 +🚳 +🚴 +🚵 +🚷 +🚸 +🚿 +🛁 +🛂 +🛃 +🛄 +🛅 +🌍 +🌎 +🌐 +🌒 +🌖 +🌗 +🌘 +🌚 +🌜 +🌝 +🌞 +🌲 +🌳 +🍋 +🍐 +🍼 +🏇 +🏉 +🏤 +🐀 +🐁 +🐂 +🐃 +🐄 +🐅 +🐆 +🐇 +🐈 +🐉 +🐊 +🐋 +🐏 +🐐 +🐓 +🐕 +🐖 +🐪 +👥 +👬 +👭 +💭 +💶 +💷 +📬 +📭 +📯 +📵 +🔀 +🔁 +🔂 +🔄 +🔅 +🔆 +🔇 +🔉 +🔕 +🔬 +🔭 +🕜 +🕝 +🕞 +🕟 +🕠 +🕡 +🕢 +🕣 +🕤 +🕥 +🕦 +🕧 diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt new file mode 100644 index 00000000..19d81892 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt @@ -0,0 +1,128 @@ +t +a +_ +i +m +g +/ +3 +I +L +S +V +R +C +2 +0 +1 +v +l +9 +7 +8 +. +j +p +ப +ூ +த +ம +ி +வ +ர +் +ந +ோ +ன +6 +ஆ +ற +ல +5 +ள +ா +ொ +ழ +ு +4 +ெ +ண +க +ட +ை +ே +ச +ய +ஒ +இ +அ +ங +உ +ீ +ஞ +எ +ஓ +ஃ +ஜ +ஷ +ஸ +ஏ +ஊ +ஹ +ஈ +ஐ +ௌ +ஔ +s +c +e +n +w +F +T +O +P +K +A +N +G +Y +E +M +H +U +B +o +b +D +d +r +W +u +y +f +X +k +q +h +J +z +Z +Q +x +- +' +$ +, +% +@ +é +! +# ++ +É +& +: +( +? + diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt new file mode 100644 index 00000000..83d74cc7 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt @@ -0,0 +1,151 @@ +t +e +_ +i +m +g +/ +5 +I +L +S +V +R +C +2 +0 +1 +v +a +l +3 +4 +8 +9 +. +j +p +త +ె +ర +క +్ +ి +ం +చ +ే +ద +ు +7 +6 +ఉ +ా +మ +ట +ో +వ +ప +ల +శ +ఆ +య +ై +భ +' +ీ +గ +ూ +డ +ధ +హ +న +జ +స +[ +‌ +ష +అ +ణ +ఫ +బ +ఎ +; +ళ +థ +ొ +ఠ +ృ +ఒ +ఇ +ః +ఊ +ఖ +- +ఐ +ఘ +ౌ +ఏ +ఈ +ఛ +, +ఓ +ఞ +| +? +: +ఢ +" +( +” +! ++ +) +* += +& +“ +€ +] +£ +$ +s +c +n +w +k +J +G +u +d +r +E +o +h +y +b +f +B +M +O +T +N +D +P +A +F +x +W +Y +U +H +K +X +z +Z +Q +q +É +% +# +@ +é diff --git a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml new file mode 100644 index 00000000..876f3ee9 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml @@ -0,0 +1,65 @@ +lang: + ch_lite: + det: ch_PP-OCRv3_det_infer.pth + rec: ch_PP-OCRv5_rec_infer.pth + dict: ppocrv5_dict.txt + ch_lite_v4: + det: ch_PP-OCRv3_det_infer.pth + rec: ch_PP-OCRv4_rec_infer.pth + dict: ppocr_keys_v1.txt + ch_server: + det: ch_PP-OCRv3_det_infer.pth + rec: ch_PP-OCRv5_rec_server_infer.pth + dict: ppocrv5_dict.txt + ch_server_v4: + det: ch_PP-OCRv3_det_infer.pth + rec: ch_PP-OCRv4_rec_server_infer.pth + dict: ppocr_keys_v1.txt + ch: + det: ch_PP-OCRv3_det_infer.pth + rec: ch_PP-OCRv4_rec_server_doc_infer.pth + dict: ppocrv4_doc_dict.txt + en: + det: en_PP-OCRv3_det_infer.pth + rec: en_PP-OCRv4_rec_infer.pth + dict: en_dict.txt + korean: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: korean_PP-OCRv3_rec_infer.pth + dict: korean_dict.txt + japan: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: japan_PP-OCRv3_rec_infer.pth + dict: japan_dict.txt + chinese_cht: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: chinese_cht_PP-OCRv3_rec_infer.pth + dict: chinese_cht_dict.txt + ta: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: ta_PP-OCRv3_rec_infer.pth + dict: ta_dict.txt + te: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: te_PP-OCRv3_rec_infer.pth + dict: te_dict.txt + ka: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: ka_PP-OCRv3_rec_infer.pth + dict: ka_dict.txt + latin: + det: en_PP-OCRv3_det_infer.pth + rec: latin_PP-OCRv3_rec_infer.pth + dict: latin_dict.txt + arabic: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: arabic_PP-OCRv3_rec_infer.pth + dict: arabic_dict.txt + cyrillic: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: cyrillic_PP-OCRv3_rec_infer.pth + dict: cyrillic_dict.txt + devanagari: + det: Multilingual_PP-OCRv3_det_infer.pth + rec: devanagari_PP-OCRv3_rec_infer.pth + dict: devanagari_dict.txt \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/tools/__init__.py b/mineru/model/ocr/paddleocr2pytorch/tools/__init__.py new file mode 100644 index 00000000..f64ba567 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/tools/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. \ No newline at end of file diff --git a/mineru/model/ocr/paddleocr2pytorch/tools/infer/__init__.py b/mineru/model/ocr/paddleocr2pytorch/tools/infer/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/tools/infer/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_cls.py b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_cls.py new file mode 100755 index 00000000..5dea3390 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_cls.py @@ -0,0 +1,106 @@ +import cv2 +import copy +import numpy as np +import math +import time +import torch +from ...pytorchocr.base_ocr_v20 import BaseOCRV20 +from . import pytorchocr_utility as utility +from ...pytorchocr.postprocess import build_post_process + + +class TextClassifier(BaseOCRV20): + def __init__(self, args, **kwargs): + self.device = args.device + self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] + self.cls_batch_num = args.cls_batch_num + self.cls_thresh = args.cls_thresh + postprocess_params = { + 'name': 'ClsPostProcess', + "label_list": args.label_list, + } + self.postprocess_op = build_post_process(postprocess_params) + + self.weights_path = args.cls_model_path + self.yaml_path = args.cls_yaml_path + network_config = utility.get_arch_config(self.weights_path) + super(TextClassifier, self).__init__(network_config, **kwargs) + + self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] + + self.limited_max_width = args.limited_max_width + self.limited_min_width = args.limited_min_width + + self.load_pytorch_weights(self.weights_path) + self.net.eval() + self.net.to(self.device) + + def resize_norm_img(self, img): + imgC, imgH, imgW = self.cls_image_shape + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + imgW = max(min(imgW, self.limited_max_width), self.limited_min_width) + ratio_imgH = math.ceil(imgH * ratio) + ratio_imgH = max(ratio_imgH, self.limited_min_width) + if ratio_imgH > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + if self.cls_image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def __call__(self, img_list): + img_list = copy.deepcopy(img_list) + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the cls process + indices = np.argsort(np.array(width_list)) + + cls_res = [['', 0.0]] * img_num + batch_num = self.cls_batch_num + elapse = 0 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img(img_list[indices[ino]]) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + starttime = time.time() + + with torch.no_grad(): + inp = torch.from_numpy(norm_img_batch) + inp = inp.to(self.device) + prob_out = self.net(inp) + prob_out = prob_out.cpu().numpy() + + cls_result = self.postprocess_op(prob_out) + elapse += time.time() - starttime + for rno in range(len(cls_result)): + label, score = cls_result[rno] + cls_res[indices[beg_img_no + rno]] = [label, score] + if '180' in label and score > self.cls_thresh: + img_list[indices[beg_img_no + rno]] = cv2.rotate( + img_list[indices[beg_img_no + rno]], 1) + return img_list, cls_res, elapse diff --git a/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_det.py b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_det.py new file mode 100755 index 00000000..c6f1f9c7 --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_det.py @@ -0,0 +1,217 @@ +import sys + +import numpy as np +import time +import torch +from ...pytorchocr.base_ocr_v20 import BaseOCRV20 +from . import pytorchocr_utility as utility +from ...pytorchocr.data import create_operators, transform +from ...pytorchocr.postprocess import build_post_process + + +class TextDetector(BaseOCRV20): + def __init__(self, args, **kwargs): + self.args = args + self.det_algorithm = args.det_algorithm + self.device = args.device + pre_process_list = [{ + 'DetResizeForTest': { + 'limit_side_len': args.det_limit_side_len, + 'limit_type': args.det_limit_type, + } + }, { + 'NormalizeImage': { + 'std': [0.229, 0.224, 0.225], + 'mean': [0.485, 0.456, 0.406], + 'scale': '1./255.', + 'order': 'hwc' + } + }, { + 'ToCHWImage': None + }, { + 'KeepKeys': { + 'keep_keys': ['image', 'shape'] + } + }] + postprocess_params = {} + if self.det_algorithm == "DB": + postprocess_params['name'] = 'DBPostProcess' + postprocess_params["thresh"] = args.det_db_thresh + postprocess_params["box_thresh"] = args.det_db_box_thresh + postprocess_params["max_candidates"] = 1000 + postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio + postprocess_params["use_dilation"] = args.use_dilation + postprocess_params["score_mode"] = args.det_db_score_mode + elif self.det_algorithm == "DB++": + postprocess_params['name'] = 'DBPostProcess' + postprocess_params["thresh"] = args.det_db_thresh + postprocess_params["box_thresh"] = args.det_db_box_thresh + postprocess_params["max_candidates"] = 1000 + postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio + postprocess_params["use_dilation"] = args.use_dilation + postprocess_params["score_mode"] = args.det_db_score_mode + pre_process_list[1] = { + 'NormalizeImage': { + 'std': [1.0, 1.0, 1.0], + 'mean': + [0.48109378172549, 0.45752457890196, 0.40787054090196], + 'scale': '1./255.', + 'order': 'hwc' + } + } + elif self.det_algorithm == "EAST": + postprocess_params['name'] = 'EASTPostProcess' + postprocess_params["score_thresh"] = args.det_east_score_thresh + postprocess_params["cover_thresh"] = args.det_east_cover_thresh + postprocess_params["nms_thresh"] = args.det_east_nms_thresh + elif self.det_algorithm == "SAST": + pre_process_list[0] = { + 'DetResizeForTest': { + 'resize_long': args.det_limit_side_len + } + } + postprocess_params['name'] = 'SASTPostProcess' + postprocess_params["score_thresh"] = args.det_sast_score_thresh + postprocess_params["nms_thresh"] = args.det_sast_nms_thresh + self.det_sast_polygon = args.det_sast_polygon + if self.det_sast_polygon: + postprocess_params["sample_pts_num"] = 6 + postprocess_params["expand_scale"] = 1.2 + postprocess_params["shrink_ratio_of_width"] = 0.2 + else: + postprocess_params["sample_pts_num"] = 2 + postprocess_params["expand_scale"] = 1.0 + postprocess_params["shrink_ratio_of_width"] = 0.3 + elif self.det_algorithm == "PSE": + postprocess_params['name'] = 'PSEPostProcess' + postprocess_params["thresh"] = args.det_pse_thresh + postprocess_params["box_thresh"] = args.det_pse_box_thresh + postprocess_params["min_area"] = args.det_pse_min_area + postprocess_params["box_type"] = args.det_pse_box_type + postprocess_params["scale"] = args.det_pse_scale + self.det_pse_box_type = args.det_pse_box_type + elif self.det_algorithm == "FCE": + pre_process_list[0] = { + 'DetResizeForTest': { + 'rescale_img': [1080, 736] + } + } + postprocess_params['name'] = 'FCEPostProcess' + postprocess_params["scales"] = args.scales + postprocess_params["alpha"] = args.alpha + postprocess_params["beta"] = args.beta + postprocess_params["fourier_degree"] = args.fourier_degree + postprocess_params["box_type"] = args.det_fce_box_type + else: + print("unknown det_algorithm:{}".format(self.det_algorithm)) + sys.exit(0) + + self.preprocess_op = create_operators(pre_process_list) + self.postprocess_op = build_post_process(postprocess_params) + + self.weights_path = args.det_model_path + self.yaml_path = args.det_yaml_path + network_config = utility.get_arch_config(self.weights_path) + super(TextDetector, self).__init__(network_config, **kwargs) + self.load_pytorch_weights(self.weights_path) + self.net.eval() + self.net.to(self.device) + + def order_points_clockwise(self, pts): + """ + reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py + # sort the points based on their x-coordinates + """ + xSorted = pts[np.argsort(pts[:, 0]), :] + + # grab the left-most and right-most points from the sorted + # x-roodinate points + leftMost = xSorted[:2, :] + rightMost = xSorted[2:, :] + + # now, sort the left-most coordinates according to their + # y-coordinates so we can grab the top-left and bottom-left + # points, respectively + leftMost = leftMost[np.argsort(leftMost[:, 1]), :] + (tl, bl) = leftMost + + rightMost = rightMost[np.argsort(rightMost[:, 1]), :] + (tr, br) = rightMost + + rect = np.array([tl, tr, br, bl], dtype="float32") + return rect + + def clip_det_res(self, points, img_height, img_width): + for pno in range(points.shape[0]): + points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) + points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) + return points + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.order_points_clockwise(box) + box = self.clip_det_res(box, img_height, img_width) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 3 or rect_height <= 3: + continue + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.clip_det_res(box, img_height, img_width) + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def __call__(self, img): + ori_im = img.copy() + data = {'image': img} + data = transform(data, self.preprocess_op) + img, shape_list = data + if img is None: + return None, 0 + img = np.expand_dims(img, axis=0) + shape_list = np.expand_dims(shape_list, axis=0) + img = img.copy() + starttime = time.time() + + with torch.no_grad(): + inp = torch.from_numpy(img) + inp = inp.to(self.device) + outputs = self.net(inp) + + preds = {} + if self.det_algorithm == "EAST": + preds['f_geo'] = outputs['f_geo'].cpu().numpy() + preds['f_score'] = outputs['f_score'].cpu().numpy() + elif self.det_algorithm == 'SAST': + preds['f_border'] = outputs['f_border'].cpu().numpy() + preds['f_score'] = outputs['f_score'].cpu().numpy() + preds['f_tco'] = outputs['f_tco'].cpu().numpy() + preds['f_tvo'] = outputs['f_tvo'].cpu().numpy() + elif self.det_algorithm in ['DB', 'PSE', 'DB++']: + preds['maps'] = outputs['maps'].cpu().numpy() + elif self.det_algorithm == 'FCE': + for i, (k, output) in enumerate(outputs.items()): + preds['level_{}'.format(i)] = output + else: + raise NotImplementedError + + post_result = self.postprocess_op(preds, shape_list) + dt_boxes = post_result[0]['points'] + if (self.det_algorithm == "SAST" and + self.det_sast_polygon) or (self.det_algorithm in ["PSE", "FCE"] and + self.postprocess_op.box_type == 'poly'): + dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape) + else: + dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) + + elapse = time.time() - starttime + return dt_boxes, elapse diff --git a/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_rec.py b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_rec.py new file mode 100755 index 00000000..c06ca5fe --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_rec.py @@ -0,0 +1,446 @@ +from PIL import Image +import cv2 +import numpy as np +import math +import time +import torch +from tqdm import tqdm + +from ...pytorchocr.base_ocr_v20 import BaseOCRV20 +from . import pytorchocr_utility as utility +from ...pytorchocr.postprocess import build_post_process + + +class TextRecognizer(BaseOCRV20): + def __init__(self, args, **kwargs): + self.device = args.device + self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] + self.character_type = args.rec_char_type + self.rec_batch_num = args.rec_batch_num + self.rec_algorithm = args.rec_algorithm + self.max_text_length = args.max_text_length + postprocess_params = { + 'name': 'CTCLabelDecode', + "character_type": args.rec_char_type, + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } + if self.rec_algorithm == "SRN": + postprocess_params = { + 'name': 'SRNLabelDecode', + "character_type": args.rec_char_type, + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } + elif self.rec_algorithm == "RARE": + postprocess_params = { + 'name': 'AttnLabelDecode', + "character_type": args.rec_char_type, + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } + elif self.rec_algorithm == 'NRTR': + postprocess_params = { + 'name': 'NRTRLabelDecode', + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } + elif self.rec_algorithm == "SAR": + postprocess_params = { + 'name': 'SARLabelDecode', + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } + elif self.rec_algorithm == 'ViTSTR': + postprocess_params = { + 'name': 'ViTSTRLabelDecode', + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } + elif self.rec_algorithm == "CAN": + self.inverse = args.rec_image_inverse + postprocess_params = { + 'name': 'CANLabelDecode', + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } + elif self.rec_algorithm == 'RFL': + postprocess_params = { + 'name': 'RFLLabelDecode', + "character_dict_path": None, + "use_space_char": args.use_space_char + } + self.postprocess_op = build_post_process(postprocess_params) + + self.limited_max_width = args.limited_max_width + self.limited_min_width = args.limited_min_width + + self.weights_path = args.rec_model_path + self.yaml_path = args.rec_yaml_path + + network_config = utility.get_arch_config(self.weights_path) + weights = self.read_pytorch_weights(self.weights_path) + + self.out_channels = self.get_out_channels(weights) + if self.rec_algorithm == 'NRTR': + self.out_channels = list(weights.values())[-1].numpy().shape[0] + elif self.rec_algorithm == 'SAR': + self.out_channels = list(weights.values())[-3].numpy().shape[0] + + kwargs['out_channels'] = self.out_channels + super(TextRecognizer, self).__init__(network_config, **kwargs) + + self.load_state_dict(weights) + self.net.eval() + self.net.to(self.device) + + def resize_norm_img(self, img, max_wh_ratio): + imgC, imgH, imgW = self.rec_image_shape + if self.rec_algorithm == 'NRTR' or self.rec_algorithm == 'ViTSTR': + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + # return padding_im + image_pil = Image.fromarray(np.uint8(img)) + if self.rec_algorithm == 'ViTSTR': + img = image_pil.resize([imgW, imgH], Image.BICUBIC) + else: + img = image_pil.resize([imgW, imgH], Image.ANTIALIAS) + img = np.array(img) + norm_img = np.expand_dims(img, -1) + norm_img = norm_img.transpose((2, 0, 1)) + if self.rec_algorithm == 'ViTSTR': + norm_img = norm_img.astype(np.float32) / 255. + else: + norm_img = norm_img.astype(np.float32) / 128. - 1. + return norm_img + elif self.rec_algorithm == 'RFL': + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_CUBIC) + resized_image = resized_image.astype('float32') + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + resized_image -= 0.5 + resized_image /= 0.5 + return resized_image + + assert imgC == img.shape[2] + max_wh_ratio = max(max_wh_ratio, imgW / imgH) + imgW = int((imgH * max_wh_ratio)) + imgW = max(min(imgW, self.limited_max_width), self.limited_min_width) + h, w = img.shape[:2] + ratio = w / float(h) + ratio_imgH = math.ceil(imgH * ratio) + ratio_imgH = max(ratio_imgH, self.limited_min_width) + if ratio_imgH > imgW: + resized_w = imgW + else: + resized_w = int(ratio_imgH) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def resize_norm_img_svtr(self, img, image_shape): + + imgC, imgH, imgW = image_shape + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + return resized_image + + + def resize_norm_img_srn(self, img, image_shape): + imgC, imgH, imgW = image_shape + + img_black = np.zeros((imgH, imgW)) + im_hei = img.shape[0] + im_wid = img.shape[1] + + if im_wid <= im_hei * 1: + img_new = cv2.resize(img, (imgH * 1, imgH)) + elif im_wid <= im_hei * 2: + img_new = cv2.resize(img, (imgH * 2, imgH)) + elif im_wid <= im_hei * 3: + img_new = cv2.resize(img, (imgH * 3, imgH)) + else: + img_new = cv2.resize(img, (imgW, imgH)) + + img_np = np.asarray(img_new) + img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) + img_black[:, 0:img_np.shape[1]] = img_np + img_black = img_black[:, :, np.newaxis] + + row, col, c = img_black.shape + c = 1 + + return np.reshape(img_black, (c, row, col)).astype(np.float32) + + def srn_other_inputs(self, image_shape, num_heads, max_text_length): + + imgC, imgH, imgW = image_shape + feature_dim = int((imgH / 8) * (imgW / 8)) + + encoder_word_pos = np.array(range(0, feature_dim)).reshape( + (feature_dim, 1)).astype('int64') + gsrm_word_pos = np.array(range(0, max_text_length)).reshape( + (max_text_length, 1)).astype('int64') + + gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) + gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( + [-1, 1, max_text_length, max_text_length]) + gsrm_slf_attn_bias1 = np.tile( + gsrm_slf_attn_bias1, + [1, num_heads, 1, 1]).astype('float32') * [-1e9] + + gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( + [-1, 1, max_text_length, max_text_length]) + gsrm_slf_attn_bias2 = np.tile( + gsrm_slf_attn_bias2, + [1, num_heads, 1, 1]).astype('float32') * [-1e9] + + encoder_word_pos = encoder_word_pos[np.newaxis, :] + gsrm_word_pos = gsrm_word_pos[np.newaxis, :] + + return [ + encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, + gsrm_slf_attn_bias2 + ] + + def process_image_srn(self, img, image_shape, num_heads, max_text_length): + norm_img = self.resize_norm_img_srn(img, image_shape) + norm_img = norm_img[np.newaxis, :] + + [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ + self.srn_other_inputs(image_shape, num_heads, max_text_length) + + gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32) + gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32) + encoder_word_pos = encoder_word_pos.astype(np.int64) + gsrm_word_pos = gsrm_word_pos.astype(np.int64) + + return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, + gsrm_slf_attn_bias2) + + def resize_norm_img_sar(self, img, image_shape, + width_downsample_ratio=0.25): + imgC, imgH, imgW_min, imgW_max = image_shape + h = img.shape[0] + w = img.shape[1] + valid_ratio = 1.0 + # make sure new_width is an integral multiple of width_divisor. + width_divisor = int(1 / width_downsample_ratio) + # resize + ratio = w / float(h) + resize_w = math.ceil(imgH * ratio) + if resize_w % width_divisor != 0: + resize_w = round(resize_w / width_divisor) * width_divisor + if imgW_min is not None: + resize_w = max(imgW_min, resize_w) + if imgW_max is not None: + valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) + resize_w = min(imgW_max, resize_w) + resized_image = cv2.resize(img, (resize_w, imgH)) + resized_image = resized_image.astype('float32') + # norm + if image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + resize_shape = resized_image.shape + padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) + padding_im[:, :, 0:resize_w] = resized_image + pad_shape = padding_im.shape + + return padding_im, resize_shape, pad_shape, valid_ratio + + + def norm_img_can(self, img, image_shape): + + img = cv2.cvtColor( + img, cv2.COLOR_BGR2GRAY) # CAN only predict gray scale image + + if self.inverse: + img = 255 - img + + if self.rec_image_shape[0] == 1: + h, w = img.shape + _, imgH, imgW = self.rec_image_shape + if h < imgH or w < imgW: + padding_h = max(imgH - h, 0) + padding_w = max(imgW - w, 0) + img_padded = np.pad(img, ((0, padding_h), (0, padding_w)), + 'constant', + constant_values=(255)) + img = img_padded + + img = np.expand_dims(img, 0) / 255.0 # h,w,c -> c,h,w + img = img.astype('float32') + + return img + + def __call__(self, img_list, tqdm_enable=False): + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the recognition process + indices = np.argsort(np.array(width_list)) + + # rec_res = [] + rec_res = [['', 0.0]] * img_num + batch_num = self.rec_batch_num + elapse = 0 + # for beg_img_no in range(0, img_num, batch_num): + with tqdm(total=img_num, desc='OCR-rec Predict', disable=not tqdm_enable) as pbar: + index = 0 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + # h, w = img_list[ino].shape[0:2] + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + if self.rec_algorithm == "SAR": + norm_img, _, _, valid_ratio = self.resize_norm_img_sar( + img_list[indices[ino]], self.rec_image_shape) + norm_img = norm_img[np.newaxis, :] + valid_ratio = np.expand_dims(valid_ratio, axis=0) + valid_ratios = [] + valid_ratios.append(valid_ratio) + norm_img_batch.append(norm_img) + + elif self.rec_algorithm == "SVTR": + norm_img = self.resize_norm_img_svtr(img_list[indices[ino]], + self.rec_image_shape) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + elif self.rec_algorithm == "SRN": + norm_img = self.process_image_srn(img_list[indices[ino]], + self.rec_image_shape, 8, + self.max_text_length) + encoder_word_pos_list = [] + gsrm_word_pos_list = [] + gsrm_slf_attn_bias1_list = [] + gsrm_slf_attn_bias2_list = [] + encoder_word_pos_list.append(norm_img[1]) + gsrm_word_pos_list.append(norm_img[2]) + gsrm_slf_attn_bias1_list.append(norm_img[3]) + gsrm_slf_attn_bias2_list.append(norm_img[4]) + norm_img_batch.append(norm_img[0]) + elif self.rec_algorithm == "CAN": + norm_img = self.norm_img_can(img_list[indices[ino]], + max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_image_mask = np.ones(norm_img.shape, dtype='float32') + word_label = np.ones([1, 36], dtype='int64') + norm_img_mask_batch = [] + word_label_list = [] + norm_img_mask_batch.append(norm_image_mask) + word_label_list.append(word_label) + else: + norm_img = self.resize_norm_img(img_list[indices[ino]], + max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + + if self.rec_algorithm == "SRN": + starttime = time.time() + encoder_word_pos_list = np.concatenate(encoder_word_pos_list) + gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list) + gsrm_slf_attn_bias1_list = np.concatenate( + gsrm_slf_attn_bias1_list) + gsrm_slf_attn_bias2_list = np.concatenate( + gsrm_slf_attn_bias2_list) + + with torch.no_grad(): + inp = torch.from_numpy(norm_img_batch) + encoder_word_pos_inp = torch.from_numpy(encoder_word_pos_list) + gsrm_word_pos_inp = torch.from_numpy(gsrm_word_pos_list) + gsrm_slf_attn_bias1_inp = torch.from_numpy(gsrm_slf_attn_bias1_list) + gsrm_slf_attn_bias2_inp = torch.from_numpy(gsrm_slf_attn_bias2_list) + + inp = inp.to(self.device) + encoder_word_pos_inp = encoder_word_pos_inp.to(self.device) + gsrm_word_pos_inp = gsrm_word_pos_inp.to(self.device) + gsrm_slf_attn_bias1_inp = gsrm_slf_attn_bias1_inp.to(self.device) + gsrm_slf_attn_bias2_inp = gsrm_slf_attn_bias2_inp.to(self.device) + + backbone_out = self.net.backbone(inp) # backbone_feat + prob_out = self.net.head(backbone_out, [encoder_word_pos_inp, gsrm_word_pos_inp, gsrm_slf_attn_bias1_inp, gsrm_slf_attn_bias2_inp]) + # preds = {"predict": prob_out[2]} + preds = {"predict": prob_out["predict"]} + + elif self.rec_algorithm == "SAR": + starttime = time.time() + # valid_ratios = np.concatenate(valid_ratios) + # inputs = [ + # norm_img_batch, + # valid_ratios, + # ] + + with torch.no_grad(): + inp = torch.from_numpy(norm_img_batch) + inp = inp.to(self.device) + preds = self.net(inp) + + elif self.rec_algorithm == "CAN": + starttime = time.time() + norm_img_mask_batch = np.concatenate(norm_img_mask_batch) + word_label_list = np.concatenate(word_label_list) + inputs = [norm_img_batch, norm_img_mask_batch, word_label_list] + + inp = [torch.from_numpy(e_i) for e_i in inputs] + inp = [e_i.to(self.device) for e_i in inp] + with torch.no_grad(): + outputs = self.net(inp) + outputs = [v.cpu().numpy() for k, v in enumerate(outputs)] + + preds = outputs + + else: + starttime = time.time() + + with torch.no_grad(): + inp = torch.from_numpy(norm_img_batch) + inp = inp.to(self.device) + prob_out = self.net(inp) + + if isinstance(prob_out, list): + preds = [v.cpu().numpy() for v in prob_out] + else: + preds = prob_out.cpu().numpy() + + rec_result = self.postprocess_op(preds) + for rno in range(len(rec_result)): + rec_res[indices[beg_img_no + rno]] = rec_result[rno] + elapse += time.time() - starttime + + # 更新进度条,每次增加batch_size,但要注意最后一个batch可能不足batch_size + current_batch_size = min(batch_num, img_num - index * batch_num) + index += 1 + pbar.update(current_batch_size) + + # Fix NaN values in recognition results + for i in range(len(rec_res)): + text, score = rec_res[i] + if isinstance(score, float) and math.isnan(score): + rec_res[i] = (text, 0.0) + + return rec_res, elapse diff --git a/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_system.py b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_system.py new file mode 100755 index 00000000..e35b9a4b --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/tools/infer/predict_system.py @@ -0,0 +1,104 @@ +import cv2 +import copy +import numpy as np + +from . import predict_rec +from . import predict_det +from . import predict_cls + + +class TextSystem(object): + def __init__(self, args, **kwargs): + self.text_detector = predict_det.TextDetector(args, **kwargs) + self.text_recognizer = predict_rec.TextRecognizer(args, **kwargs) + self.use_angle_cls = args.use_angle_cls + self.drop_score = args.drop_score + if self.use_angle_cls: + self.text_classifier = predict_cls.TextClassifier(args, **kwargs) + + def get_rotate_crop_image(self, img, points): + ''' + img_height, img_width = img.shape[0:2] + left = int(np.min(points[:, 0])) + right = int(np.max(points[:, 0])) + top = int(np.min(points[:, 1])) + bottom = int(np.max(points[:, 1])) + img_crop = img[top:bottom, left:right, :].copy() + points[:, 0] = points[:, 0] - left + points[:, 1] = points[:, 1] - top + ''' + img_crop_width = int( + max( + np.linalg.norm(points[0] - points[1]), + np.linalg.norm(points[2] - points[3]))) + img_crop_height = int( + max( + np.linalg.norm(points[0] - points[3]), + np.linalg.norm(points[1] - points[2]))) + pts_std = np.float32([[0, 0], [img_crop_width, 0], + [img_crop_width, img_crop_height], + [0, img_crop_height]]) + M = cv2.getPerspectiveTransform(points, pts_std) + dst_img = cv2.warpPerspective( + img, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC) + dst_img_height, dst_img_width = dst_img.shape[0:2] + if dst_img_height * 1.0 / dst_img_width >= 1.5: + dst_img = np.rot90(dst_img) + return dst_img + + def __call__(self, img): + ori_im = img.copy() + dt_boxes, elapse = self.text_detector(img) + print("dt_boxes num : {}, elapse : {}".format( + len(dt_boxes), elapse)) + if dt_boxes is None: + return None, None + img_crop_list = [] + + dt_boxes = sorted_boxes(dt_boxes) + + for bno in range(len(dt_boxes)): + tmp_box = copy.deepcopy(dt_boxes[bno]) + img_crop = self.get_rotate_crop_image(ori_im, tmp_box) + img_crop_list.append(img_crop) + if self.use_angle_cls: + img_crop_list, angle_list, elapse = self.text_classifier( + img_crop_list) + print("cls num : {}, elapse : {}".format( + len(img_crop_list), elapse)) + + rec_res, elapse = self.text_recognizer(img_crop_list) + print("rec_res num : {}, elapse : {}".format( + len(rec_res), elapse)) + # self.print_draw_crop_rec_res(img_crop_list, rec_res) + filter_boxes, filter_rec_res = [], [] + for box, rec_reuslt in zip(dt_boxes, rec_res): + text, score = rec_reuslt + if score >= self.drop_score: + filter_boxes.append(box) + filter_rec_res.append(rec_reuslt) + return filter_boxes, filter_rec_res + + +def sorted_boxes(dt_boxes): + """ + Sort text boxes in order from top to bottom, left to right + args: + dt_boxes(array):detected text boxes with shape [4, 2] + return: + sorted boxes(array) with shape [4, 2] + """ + num_boxes = dt_boxes.shape[0] + sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) + _boxes = list(sorted_boxes) + + for i in range(num_boxes - 1): + if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ + (_boxes[i + 1][0][0] < _boxes[i][0][0]): + tmp = _boxes[i] + _boxes[i] = _boxes[i + 1] + _boxes[i + 1] = tmp + return _boxes diff --git a/mineru/model/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py b/mineru/model/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py new file mode 100755 index 00000000..912d124e --- /dev/null +++ b/mineru/model/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py @@ -0,0 +1,227 @@ +import os +import math +from pathlib import Path +import numpy as np +import cv2 +import argparse + + +root_dir = Path(__file__).resolve().parent.parent.parent +DEFAULT_CFG_PATH = root_dir / "pytorchocr" / "utils" / "resources" / "arch_config.yaml" + + +def init_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + # params for prediction engine + parser.add_argument("--use_gpu", type=str2bool, default=False) + parser.add_argument("--det", type=str2bool, default=True) + parser.add_argument("--rec", type=str2bool, default=True) + parser.add_argument("--device", type=str, default='cpu') + # parser.add_argument("--ir_optim", type=str2bool, default=True) + # parser.add_argument("--use_tensorrt", type=str2bool, default=False) + # parser.add_argument("--use_fp16", type=str2bool, default=False) + parser.add_argument("--gpu_mem", type=int, default=500) + parser.add_argument("--warmup", type=str2bool, default=False) + + # params for text detector + parser.add_argument("--image_dir", type=str) + parser.add_argument("--det_algorithm", type=str, default='DB') + parser.add_argument("--det_model_path", type=str) + parser.add_argument("--det_limit_side_len", type=float, default=960) + parser.add_argument("--det_limit_type", type=str, default='max') + + # DB parmas + parser.add_argument("--det_db_thresh", type=float, default=0.3) + parser.add_argument("--det_db_box_thresh", type=float, default=0.6) + parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5) + parser.add_argument("--max_batch_size", type=int, default=10) + parser.add_argument("--use_dilation", type=str2bool, default=False) + parser.add_argument("--det_db_score_mode", type=str, default="fast") + + # EAST parmas + parser.add_argument("--det_east_score_thresh", type=float, default=0.8) + parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) + parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) + + # SAST parmas + parser.add_argument("--det_sast_score_thresh", type=float, default=0.5) + parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2) + parser.add_argument("--det_sast_polygon", type=str2bool, default=False) + + # PSE parmas + parser.add_argument("--det_pse_thresh", type=float, default=0) + parser.add_argument("--det_pse_box_thresh", type=float, default=0.85) + parser.add_argument("--det_pse_min_area", type=float, default=16) + parser.add_argument("--det_pse_box_type", type=str, default='box') + parser.add_argument("--det_pse_scale", type=int, default=1) + + # FCE parmas + parser.add_argument("--scales", type=list, default=[8, 16, 32]) + parser.add_argument("--alpha", type=float, default=1.0) + parser.add_argument("--beta", type=float, default=1.0) + parser.add_argument("--fourier_degree", type=int, default=5) + parser.add_argument("--det_fce_box_type", type=str, default='poly') + + # params for text recognizer + parser.add_argument("--rec_algorithm", type=str, default='CRNN') + parser.add_argument("--rec_model_path", type=str) + parser.add_argument("--rec_image_inverse", type=str2bool, default=True) + parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320") + parser.add_argument("--rec_char_type", type=str, default='ch') + parser.add_argument("--rec_batch_num", type=int, default=6) + parser.add_argument("--max_text_length", type=int, default=25) + + parser.add_argument("--use_space_char", type=str2bool, default=True) + parser.add_argument("--drop_score", type=float, default=0.5) + parser.add_argument("--limited_max_width", type=int, default=1280) + parser.add_argument("--limited_min_width", type=int, default=16) + + parser.add_argument( + "--vis_font_path", type=str, + default=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'doc/fonts/simfang.ttf')) + parser.add_argument( + "--rec_char_dict_path", + type=str, + default=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + 'pytorchocr/utils/ppocr_keys_v1.txt')) + + # params for text classifier + parser.add_argument("--use_angle_cls", type=str2bool, default=False) + parser.add_argument("--cls_model_path", type=str) + parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") + parser.add_argument("--label_list", type=list, default=['0', '180']) + parser.add_argument("--cls_batch_num", type=int, default=6) + parser.add_argument("--cls_thresh", type=float, default=0.9) + + parser.add_argument("--enable_mkldnn", type=str2bool, default=False) + parser.add_argument("--use_pdserving", type=str2bool, default=False) + + # params for e2e + parser.add_argument("--e2e_algorithm", type=str, default='PGNet') + parser.add_argument("--e2e_model_path", type=str) + parser.add_argument("--e2e_limit_side_len", type=float, default=768) + parser.add_argument("--e2e_limit_type", type=str, default='max') + + # PGNet parmas + parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5) + parser.add_argument( + "--e2e_char_dict_path", type=str, + default=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + 'pytorchocr/utils/ic15_dict.txt')) + parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext') + parser.add_argument("--e2e_pgnet_polygon", type=bool, default=True) + parser.add_argument("--e2e_pgnet_mode", type=str, default='fast') + + # SR parmas + parser.add_argument("--sr_model_path", type=str) + parser.add_argument("--sr_image_shape", type=str, default="3, 32, 128") + parser.add_argument("--sr_batch_num", type=int, default=1) + + # params .yaml + parser.add_argument("--det_yaml_path", type=str, default=None) + parser.add_argument("--rec_yaml_path", type=str, default=None) + parser.add_argument("--cls_yaml_path", type=str, default=None) + parser.add_argument("--e2e_yaml_path", type=str, default=None) + parser.add_argument("--sr_yaml_path", type=str, default=None) + + # multi-process + parser.add_argument("--use_mp", type=str2bool, default=False) + parser.add_argument("--total_process_num", type=int, default=1) + parser.add_argument("--process_id", type=int, default=0) + + parser.add_argument("--benchmark", type=str2bool, default=False) + parser.add_argument("--save_log_path", type=str, default="./log_output/") + + parser.add_argument("--show_log", type=str2bool, default=True) + + return parser + +def parse_args(): + parser = init_args() + return parser.parse_args() + +def get_default_config(args): + return vars(args) + + +def read_network_config_from_yaml(yaml_path, char_num=None): + if not os.path.exists(yaml_path): + raise FileNotFoundError('{} is not existed.'.format(yaml_path)) + import yaml + with open(yaml_path, encoding='utf-8') as f: + res = yaml.safe_load(f) + if res.get('Architecture') is None: + raise ValueError('{} has no Architecture'.format(yaml_path)) + if res['Architecture']['Head']['name'] == 'MultiHead' and char_num is not None: + res['Architecture']['Head']['out_channels_list'] = { + 'CTCLabelDecode': char_num, + 'SARLabelDecode': char_num + 2, + 'NRTRLabelDecode': char_num + 3 + } + return res['Architecture'] + +def AnalysisConfig(weights_path, yaml_path=None, char_num=None): + if not os.path.exists(os.path.abspath(weights_path)): + raise FileNotFoundError('{} is not found.'.format(weights_path)) + + if yaml_path is not None: + return read_network_config_from_yaml(yaml_path, char_num=char_num) + + +def resize_img(img, input_size=600): + """ + resize img and limit the longest side of the image to input_size + """ + img = np.array(img) + im_shape = img.shape + im_size_max = np.max(im_shape[0:2]) + im_scale = float(input_size) / float(im_size_max) + img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) + return img + + +def str_count(s): + """ + Count the number of Chinese characters, + a single English character and a single number + equal to half the length of Chinese characters. + args: + s(string): the input of string + return(int): + the number of Chinese characters + """ + import string + count_zh = count_pu = 0 + s_len = len(s) + en_dg_count = 0 + for c in s: + if c in string.ascii_letters or c.isdigit() or c.isspace(): + en_dg_count += 1 + elif c.isalpha(): + count_zh += 1 + else: + count_pu += 1 + return s_len - math.ceil(en_dg_count / 2) + + +def base64_to_cv2(b64str): + import base64 + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_arch_config(model_path): + from omegaconf import OmegaConf + all_arch_config = OmegaConf.load(DEFAULT_CFG_PATH) + path = Path(model_path) + file_name = path.stem + if file_name not in all_arch_config: + raise ValueError(f"architecture {file_name} is not in arch_config.yaml") + + arch_config = all_arch_config[file_name] + return arch_config \ No newline at end of file diff --git a/mineru/model/reading_order/__init__.py b/mineru/model/reading_order/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/reading_order/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/reading_order/layout_reader.py b/mineru/model/reading_order/layout_reader.py new file mode 100644 index 00000000..dfe71a89 --- /dev/null +++ b/mineru/model/reading_order/layout_reader.py @@ -0,0 +1,125 @@ +from collections import defaultdict +from typing import List, Dict + +import torch +from transformers import LayoutLMv3ForTokenClassification + +MAX_LEN = 510 +CLS_TOKEN_ID = 0 +UNK_TOKEN_ID = 3 +EOS_TOKEN_ID = 2 + + +class DataCollator: + def __call__(self, features: List[dict]) -> Dict[str, torch.Tensor]: + bbox = [] + labels = [] + input_ids = [] + attention_mask = [] + + # clip bbox and labels to max length, build input_ids and attention_mask + for feature in features: + _bbox = feature["source_boxes"] + if len(_bbox) > MAX_LEN: + _bbox = _bbox[:MAX_LEN] + _labels = feature["target_index"] + if len(_labels) > MAX_LEN: + _labels = _labels[:MAX_LEN] + _input_ids = [UNK_TOKEN_ID] * len(_bbox) + _attention_mask = [1] * len(_bbox) + assert len(_bbox) == len(_labels) == len(_input_ids) == len(_attention_mask) + bbox.append(_bbox) + labels.append(_labels) + input_ids.append(_input_ids) + attention_mask.append(_attention_mask) + + # add CLS and EOS tokens + for i in range(len(bbox)): + bbox[i] = [[0, 0, 0, 0]] + bbox[i] + [[0, 0, 0, 0]] + labels[i] = [-100] + labels[i] + [-100] + input_ids[i] = [CLS_TOKEN_ID] + input_ids[i] + [EOS_TOKEN_ID] + attention_mask[i] = [1] + attention_mask[i] + [1] + + # padding to max length + max_len = max(len(x) for x in bbox) + for i in range(len(bbox)): + bbox[i] = bbox[i] + [[0, 0, 0, 0]] * (max_len - len(bbox[i])) + labels[i] = labels[i] + [-100] * (max_len - len(labels[i])) + input_ids[i] = input_ids[i] + [EOS_TOKEN_ID] * (max_len - len(input_ids[i])) + attention_mask[i] = attention_mask[i] + [0] * ( + max_len - len(attention_mask[i]) + ) + + ret = { + "bbox": torch.tensor(bbox), + "attention_mask": torch.tensor(attention_mask), + "labels": torch.tensor(labels), + "input_ids": torch.tensor(input_ids), + } + # set label > MAX_LEN to -100, because original labels may be > MAX_LEN + ret["labels"][ret["labels"] > MAX_LEN] = -100 + # set label > 0 to label-1, because original labels are 1-indexed + ret["labels"][ret["labels"] > 0] -= 1 + return ret + + +def boxes2inputs(boxes: List[List[int]]) -> Dict[str, torch.Tensor]: + bbox = [[0, 0, 0, 0]] + boxes + [[0, 0, 0, 0]] + input_ids = [CLS_TOKEN_ID] + [UNK_TOKEN_ID] * len(boxes) + [EOS_TOKEN_ID] + attention_mask = [1] + [1] * len(boxes) + [1] + return { + "bbox": torch.tensor([bbox]), + "attention_mask": torch.tensor([attention_mask]), + "input_ids": torch.tensor([input_ids]), + } + + +def prepare_inputs( + inputs: Dict[str, torch.Tensor], model: LayoutLMv3ForTokenClassification +) -> Dict[str, torch.Tensor]: + ret = {} + for k, v in inputs.items(): + v = v.to(model.device) + if torch.is_floating_point(v): + v = v.to(model.dtype) + ret[k] = v + return ret + + +def parse_logits(logits: torch.Tensor, length: int) -> List[int]: + """ + parse logits to orders + + :param logits: logits from model + :param length: input length + :return: orders + """ + logits = logits[1 : length + 1, :length] + orders = logits.argsort(descending=False).tolist() + ret = [o.pop() for o in orders] + while True: + order_to_idxes = defaultdict(list) + for idx, order in enumerate(ret): + order_to_idxes[order].append(idx) + # filter idxes len > 1 + order_to_idxes = {k: v for k, v in order_to_idxes.items() if len(v) > 1} + if not order_to_idxes: + break + # filter + for order, idxes in order_to_idxes.items(): + # find original logits of idxes + idxes_to_logit = {} + for idx in idxes: + idxes_to_logit[idx] = logits[idx, order] + idxes_to_logit = sorted( + idxes_to_logit.items(), key=lambda x: x[1], reverse=True + ) + # keep the highest logit as order, set others to next candidate + for idx, _ in idxes_to_logit[1:]: + ret[idx] = orders[idx].pop() + + return ret + + +def check_duplicate(a: List[int]) -> bool: + return len(a) != len(set(a)) diff --git a/mineru/model/reading_order/xycut.py b/mineru/model/reading_order/xycut.py new file mode 100644 index 00000000..7a36f527 --- /dev/null +++ b/mineru/model/reading_order/xycut.py @@ -0,0 +1,242 @@ +from typing import List +import cv2 +import numpy as np + + +def projection_by_bboxes(boxes: np.array, axis: int) -> np.ndarray: + """ + 通过一组 bbox 获得投影直方图,最后以 per-pixel 形式输出 + + Args: + boxes: [N, 4] + axis: 0-x坐标向水平方向投影, 1-y坐标向垂直方向投影 + + Returns: + 1D 投影直方图,长度为投影方向坐标的最大值(我们不需要图片的实际边长,因为只是要找文本框的间隔) + + """ + assert axis in [0, 1] + length = np.max(boxes[:, axis::2]) + res = np.zeros(length, dtype=int) + # TODO: how to remove for loop? + for start, end in boxes[:, axis::2]: + res[start:end] += 1 + return res + + +# from: https://dothinking.github.io/2021-06-19-%E9%80%92%E5%BD%92%E6%8A%95%E5%BD%B1%E5%88%86%E5%89%B2%E7%AE%97%E6%B3%95/#:~:text=%E9%80%92%E5%BD%92%E6%8A%95%E5%BD%B1%E5%88%86%E5%89%B2%EF%BC%88Recursive%20XY,%EF%BC%8C%E5%8F%AF%E4%BB%A5%E5%88%92%E5%88%86%E6%AE%B5%E8%90%BD%E3%80%81%E8%A1%8C%E3%80%82 +def split_projection_profile(arr_values: np.array, min_value: float, min_gap: float): + """Split projection profile: + + ``` + ┌──┐ + arr_values │ │ ┌─┐─── + ┌──┐ │ │ │ │ | + │ │ │ │ ┌───┐ │ │min_value + │ │<- min_gap ->│ │ │ │ │ │ | + ────┴──┴─────────────┴──┴─┴───┴─┴─┴─┴─── + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + ``` + + Args: + arr_values (np.array): 1-d array representing the projection profile. + min_value (float): Ignore the profile if `arr_value` is less than `min_value`. + min_gap (float): Ignore the gap if less than this value. + + Returns: + tuple: Start indexes and end indexes of split groups. + """ + # all indexes with projection height exceeding the threshold + arr_index = np.where(arr_values > min_value)[0] + if not len(arr_index): + return + + # find zero intervals between adjacent projections + # | | || + # ||||<- zero-interval -> ||||| + arr_diff = arr_index[1:] - arr_index[0:-1] + arr_diff_index = np.where(arr_diff > min_gap)[0] + arr_zero_intvl_start = arr_index[arr_diff_index] + arr_zero_intvl_end = arr_index[arr_diff_index + 1] + + # convert to index of projection range: + # the start index of zero interval is the end index of projection + arr_start = np.insert(arr_zero_intvl_end, 0, arr_index[0]) + arr_end = np.append(arr_zero_intvl_start, arr_index[-1]) + arr_end += 1 # end index will be excluded as index slice + + return arr_start, arr_end + + +def recursive_xy_cut(boxes: np.ndarray, indices: List[int], res: List[int]): + """ + + Args: + boxes: (N, 4) + indices: 递归过程中始终表示 box 在原始数据中的索引 + res: 保存输出结果 + + """ + # 向 y 轴投影 + assert len(boxes) == len(indices) + + _indices = boxes[:, 1].argsort() + y_sorted_boxes = boxes[_indices] + y_sorted_indices = indices[_indices] + + # debug_vis(y_sorted_boxes, y_sorted_indices) + + y_projection = projection_by_bboxes(boxes=y_sorted_boxes, axis=1) + pos_y = split_projection_profile(y_projection, 0, 1) + if not pos_y: + return + + arr_y0, arr_y1 = pos_y + for r0, r1 in zip(arr_y0, arr_y1): + # [r0, r1] 表示按照水平切分,有 bbox 的区域,对这些区域会再进行垂直切分 + _indices = (r0 <= y_sorted_boxes[:, 1]) & (y_sorted_boxes[:, 1] < r1) + + y_sorted_boxes_chunk = y_sorted_boxes[_indices] + y_sorted_indices_chunk = y_sorted_indices[_indices] + + _indices = y_sorted_boxes_chunk[:, 0].argsort() + x_sorted_boxes_chunk = y_sorted_boxes_chunk[_indices] + x_sorted_indices_chunk = y_sorted_indices_chunk[_indices] + + # 往 x 方向投影 + x_projection = projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0) + pos_x = split_projection_profile(x_projection, 0, 1) + if not pos_x: + continue + + arr_x0, arr_x1 = pos_x + if len(arr_x0) == 1: + # x 方向无法切分 + res.extend(x_sorted_indices_chunk) + continue + + # x 方向上能分开,继续递归调用 + for c0, c1 in zip(arr_x0, arr_x1): + _indices = (c0 <= x_sorted_boxes_chunk[:, 0]) & ( + x_sorted_boxes_chunk[:, 0] < c1 + ) + recursive_xy_cut( + x_sorted_boxes_chunk[_indices], x_sorted_indices_chunk[_indices], res + ) + + +def points_to_bbox(points): + assert len(points) == 8 + + # [x1,y1,x2,y2,x3,y3,x4,y4] + left = min(points[::2]) + right = max(points[::2]) + top = min(points[1::2]) + bottom = max(points[1::2]) + + left = max(left, 0) + top = max(top, 0) + right = max(right, 0) + bottom = max(bottom, 0) + return [left, top, right, bottom] + + +def bbox2points(bbox): + left, top, right, bottom = bbox + return [left, top, right, top, right, bottom, left, bottom] + + +def vis_polygon(img, points, thickness=2, color=None): + br2bl_color = color + tl2tr_color = color + tr2br_color = color + bl2tl_color = color + cv2.line( + img, + (points[0][0], points[0][1]), + (points[1][0], points[1][1]), + color=tl2tr_color, + thickness=thickness, + ) + + cv2.line( + img, + (points[1][0], points[1][1]), + (points[2][0], points[2][1]), + color=tr2br_color, + thickness=thickness, + ) + + cv2.line( + img, + (points[2][0], points[2][1]), + (points[3][0], points[3][1]), + color=br2bl_color, + thickness=thickness, + ) + + cv2.line( + img, + (points[3][0], points[3][1]), + (points[0][0], points[0][1]), + color=bl2tl_color, + thickness=thickness, + ) + return img + + +def vis_points( + img: np.ndarray, points, texts: List[str] = None, color=(0, 200, 0) +) -> np.ndarray: + """ + + Args: + img: + points: [N, 8] 8: x1,y1,x2,y2,x3,y3,x3,y4 + texts: + color: + + Returns: + + """ + points = np.array(points) + if texts is not None: + assert len(texts) == points.shape[0] + + for i, _points in enumerate(points): + vis_polygon(img, _points.reshape(-1, 2), thickness=2, color=color) + bbox = points_to_bbox(_points) + left, top, right, bottom = bbox + cx = (left + right) // 2 + cy = (top + bottom) // 2 + + txt = texts[i] + font = cv2.FONT_HERSHEY_SIMPLEX + cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0] + + img = cv2.rectangle( + img, + (cx - 5 * len(txt), cy - cat_size[1] - 5), + (cx - 5 * len(txt) + cat_size[0], cy - 5), + color, + -1, + ) + + img = cv2.putText( + img, + txt, + (cx - 5 * len(txt), cy - 5), + font, + 0.5, + (255, 255, 255), + thickness=1, + lineType=cv2.LINE_AA, + ) + + return img + + +def vis_polygons_with_index(image, points): + texts = [str(i) for i in range(len(points))] + res_img = vis_points(image.copy(), points, texts) + return res_img \ No newline at end of file diff --git a/mineru/model/table/__init__.py b/mineru/model/table/__init__.py new file mode 100644 index 00000000..1e17167c --- /dev/null +++ b/mineru/model/table/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Opendatalab. All rights reserved. diff --git a/mineru/model/table/rapid_table.py b/mineru/model/table/rapid_table.py new file mode 100644 index 00000000..dd085ff9 --- /dev/null +++ b/mineru/model/table/rapid_table.py @@ -0,0 +1,79 @@ +import os +from pathlib import Path +import cv2 +import numpy as np +from loguru import logger +from rapid_table import RapidTable, RapidTableInput + + +class RapidTableModel(object): + def __init__(self, ocr_engine): + root_dir = Path(__file__).absolute().parent.parent.parent.parent.parent + slanet_plus_model_path = os.path.join(root_dir, 'resources', 'slanet_plus', 'slanet-plus.onnx') + input_args = RapidTableInput(model_type='slanet_plus', model_path=slanet_plus_model_path) + self.table_model = RapidTable(input_args) + self.ocr_engine = ocr_engine + + + def predict(self, image): + bgr_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR) + + # First check the overall image aspect ratio (height/width) + img_height, img_width = bgr_image.shape[:2] + img_aspect_ratio = img_height / img_width if img_width > 0 else 1.0 + img_is_portrait = img_aspect_ratio > 1.2 + + if img_is_portrait: + + det_res = self.ocr_engine.ocr(bgr_image, rec=False)[0] + # Check if table is rotated by analyzing text box aspect ratios + is_rotated = False + if det_res: + vertical_count = 0 + + for box_ocr_res in det_res: + p1, p2, p3, p4 = box_ocr_res + + # Calculate width and height + width = p3[0] - p1[0] + height = p3[1] - p1[1] + + aspect_ratio = width / height if height > 0 else 1.0 + + # Count vertical vs horizontal text boxes + if aspect_ratio < 0.8: # Taller than wide - vertical text + vertical_count += 1 + # elif aspect_ratio > 1.2: # Wider than tall - horizontal text + # horizontal_count += 1 + + # If we have more vertical text boxes than horizontal ones, + # and vertical ones are significant, table might be rotated + if vertical_count >= len(det_res) * 0.3: + is_rotated = True + + # logger.debug(f"Text orientation analysis: vertical={vertical_count}, det_res={len(det_res)}, rotated={is_rotated}") + + # Rotate image if necessary + if is_rotated: + # logger.debug("Table appears to be in portrait orientation, rotating 90 degrees clockwise") + image = cv2.rotate(np.asarray(image), cv2.ROTATE_90_CLOCKWISE) + bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + + # Continue with OCR on potentially rotated image + ocr_result = self.ocr_engine.ocr(bgr_image)[0] + if ocr_result: + ocr_result = [[item[0], item[1][0], item[1][1]] for item in ocr_result if + len(item) == 2 and isinstance(item[1], tuple)] + else: + ocr_result = None + + + if ocr_result: + table_results = self.table_model(np.asarray(image), ocr_result) + html_code = table_results.pred_html + table_cell_bboxes = table_results.cell_bboxes + logic_points = table_results.logic_points + elapse = table_results.elapse + return html_code, table_cell_bboxes, logic_points, elapse + else: + return None, None, None, None diff --git a/mineru/libs/boxbase.py b/mineru/utils/boxbase.py similarity index 100% rename from mineru/libs/boxbase.py rename to mineru/utils/boxbase.py diff --git a/mineru/libs/cut_image.py b/mineru/utils/cut_image.py similarity index 100% rename from mineru/libs/cut_image.py rename to mineru/utils/cut_image.py diff --git a/mineru/libs/draw_bbox.py b/mineru/utils/draw_bbox.py similarity index 100% rename from mineru/libs/draw_bbox.py rename to mineru/utils/draw_bbox.py diff --git a/mineru/libs/enum_class.py b/mineru/utils/enum_class.py similarity index 100% rename from mineru/libs/enum_class.py rename to mineru/utils/enum_class.py diff --git a/mineru/libs/hash_utils.py b/mineru/utils/hash_utils.py similarity index 100% rename from mineru/libs/hash_utils.py rename to mineru/utils/hash_utils.py diff --git a/mineru/libs/magic_model.py b/mineru/utils/magic_model.py similarity index 100% rename from mineru/libs/magic_model.py rename to mineru/utils/magic_model.py diff --git a/mineru/utils/model_utils.py b/mineru/utils/model_utils.py new file mode 100644 index 00000000..60570404 --- /dev/null +++ b/mineru/utils/model_utils.py @@ -0,0 +1,323 @@ +import time +import torch +import gc +from loguru import logger +import numpy as np + +from magic_pdf.libs.boxbase import get_minbox_if_overlap_by_ratio + + +def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0): + + crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1]) + crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5]) + + # Calculate new dimensions + crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2 + crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2 + + # Create a white background array + return_image = np.ones((crop_new_height, crop_new_width, 3), dtype=np.uint8) * 255 + + # Crop the original image using numpy slicing + cropped_img = input_np_img[crop_ymin:crop_ymax, crop_xmin:crop_xmax] + + # Paste the cropped image onto the white background + return_image[crop_paste_y:crop_paste_y + (crop_ymax - crop_ymin), + crop_paste_x:crop_paste_x + (crop_xmax - crop_xmin)] = cropped_img + + return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width, + crop_new_height] + return return_image, return_list + + +def get_coords_and_area(block_with_poly): + """Extract coordinates and area from a table.""" + xmin, ymin = int(block_with_poly['poly'][0]), int(block_with_poly['poly'][1]) + xmax, ymax = int(block_with_poly['poly'][4]), int(block_with_poly['poly'][5]) + area = (xmax - xmin) * (ymax - ymin) + return xmin, ymin, xmax, ymax, area + + +def calculate_intersection(box1, box2): + """Calculate intersection coordinates between two boxes.""" + intersection_xmin = max(box1[0], box2[0]) + intersection_ymin = max(box1[1], box2[1]) + intersection_xmax = min(box1[2], box2[2]) + intersection_ymax = min(box1[3], box2[3]) + + # Check if intersection is valid + if intersection_xmax <= intersection_xmin or intersection_ymax <= intersection_ymin: + return None + + return intersection_xmin, intersection_ymin, intersection_xmax, intersection_ymax + + +def calculate_iou(box1, box2): + """Calculate IoU between two boxes.""" + intersection = calculate_intersection(box1[:4], box2[:4]) + + if not intersection: + return 0 + + intersection_xmin, intersection_ymin, intersection_xmax, intersection_ymax = intersection + intersection_area = (intersection_xmax - intersection_xmin) * (intersection_ymax - intersection_ymin) + + area1, area2 = box1[4], box2[4] + union_area = area1 + area2 - intersection_area + + return intersection_area / union_area if union_area > 0 else 0 + + +def is_inside(small_box, big_box, overlap_threshold=0.8): + """Check if small_box is inside big_box by at least overlap_threshold.""" + intersection = calculate_intersection(small_box[:4], big_box[:4]) + + if not intersection: + return False + + intersection_xmin, intersection_ymin, intersection_xmax, intersection_ymax = intersection + intersection_area = (intersection_xmax - intersection_xmin) * (intersection_ymax - intersection_ymin) + + # Check if overlap exceeds threshold + return intersection_area >= overlap_threshold * small_box[4] + + +def do_overlap(box1, box2): + """Check if two boxes overlap.""" + return calculate_intersection(box1[:4], box2[:4]) is not None + + +def merge_high_iou_tables(table_res_list, layout_res, table_indices, iou_threshold=0.7): + """Merge tables with IoU > threshold.""" + if len(table_res_list) < 2: + return table_res_list, table_indices + + table_info = [get_coords_and_area(table) for table in table_res_list] + merged = True + + while merged: + merged = False + i = 0 + while i < len(table_res_list) - 1: + j = i + 1 + while j < len(table_res_list): + iou = calculate_iou(table_info[i], table_info[j]) + + if iou > iou_threshold: + # Merge tables by taking their union + x1_min, y1_min, x1_max, y1_max, _ = table_info[i] + x2_min, y2_min, x2_max, y2_max, _ = table_info[j] + + union_xmin = min(x1_min, x2_min) + union_ymin = min(y1_min, y2_min) + union_xmax = max(x1_max, x2_max) + union_ymax = max(y1_max, y2_max) + + # Create merged table + merged_table = table_res_list[i].copy() + merged_table['poly'][0] = union_xmin + merged_table['poly'][1] = union_ymin + merged_table['poly'][2] = union_xmax + merged_table['poly'][3] = union_ymin + merged_table['poly'][4] = union_xmax + merged_table['poly'][5] = union_ymax + merged_table['poly'][6] = union_xmin + merged_table['poly'][7] = union_ymax + + # Update layout_res + to_remove = [table_indices[j], table_indices[i]] + for idx in sorted(to_remove, reverse=True): + del layout_res[idx] + layout_res.append(merged_table) + + # Update tracking lists + table_indices = [k if k < min(to_remove) else + k - 1 if k < max(to_remove) else + k - 2 if k > max(to_remove) else + len(layout_res) - 1 + for k in table_indices + if k not in to_remove] + table_indices.append(len(layout_res) - 1) + + # Update table lists + table_res_list.pop(j) + table_res_list.pop(i) + table_res_list.append(merged_table) + + # Update table_info + table_info = [get_coords_and_area(table) for table in table_res_list] + + merged = True + break + j += 1 + + if merged: + break + i += 1 + + return table_res_list, table_indices + + +def filter_nested_tables(table_res_list, overlap_threshold=0.8, area_threshold=0.8): + """Remove big tables containing multiple smaller tables within them.""" + if len(table_res_list) < 3: + return table_res_list + + table_info = [get_coords_and_area(table) for table in table_res_list] + big_tables_idx = [] + + for i in range(len(table_res_list)): + # Find tables inside this one + tables_inside = [j for j in range(len(table_res_list)) + if i != j and is_inside(table_info[j], table_info[i], overlap_threshold)] + + # Continue if there are at least 3 tables inside + if len(tables_inside) >= 3: + # Check if inside tables overlap with each other + tables_overlap = any(do_overlap(table_info[tables_inside[idx1]], table_info[tables_inside[idx2]]) + for idx1 in range(len(tables_inside)) + for idx2 in range(idx1 + 1, len(tables_inside))) + + # If no overlaps, check area condition + if not tables_overlap: + total_inside_area = sum(table_info[j][4] for j in tables_inside) + big_table_area = table_info[i][4] + + if total_inside_area > area_threshold * big_table_area: + big_tables_idx.append(i) + + return [table for i, table in enumerate(table_res_list) if i not in big_tables_idx] + + +def remove_overlaps_min_blocks(res_list): + # 重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。 + # 删除重叠blocks中较小的那些 + need_remove = [] + for res1 in res_list: + for res2 in res_list: + if res1 != res2: + overlap_box = get_minbox_if_overlap_by_ratio( + res1['bbox'], res2['bbox'], 0.8 + ) + if overlap_box is not None: + res_to_remove = next( + (res for res in res_list if res['bbox'] == overlap_box), + None, + ) + if ( + res_to_remove is not None + and res_to_remove not in need_remove + ): + large_res = res1 if res1 != res_to_remove else res2 + x1, y1, x2, y2 = large_res['bbox'] + sx1, sy1, sx2, sy2 = res_to_remove['bbox'] + x1 = min(x1, sx1) + y1 = min(y1, sy1) + x2 = max(x2, sx2) + y2 = max(y2, sy2) + large_res['bbox'] = [x1, y1, x2, y2] + need_remove.append(res_to_remove) + + if len(need_remove) > 0: + for res in need_remove: + res_list.remove(res) + + return res_list, need_remove + + +def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshold=0.8, area_threshold=0.8): + """Extract OCR, table and other regions from layout results.""" + ocr_res_list = [] + text_res_list = [] + table_res_list = [] + table_indices = [] + single_page_mfdetrec_res = [] + + # Categorize regions + for i, res in enumerate(layout_res): + category_id = int(res['category_id']) + + if category_id in [13, 14]: # Formula regions + single_page_mfdetrec_res.append({ + "bbox": [int(res['poly'][0]), int(res['poly'][1]), + int(res['poly'][4]), int(res['poly'][5])], + }) + elif category_id in [0, 2, 4, 6, 7, 3]: # OCR regions + ocr_res_list.append(res) + elif category_id == 5: # Table regions + table_res_list.append(res) + table_indices.append(i) + elif category_id in [1]: # Text regions + res['bbox'] = [int(res['poly'][0]), int(res['poly'][1]), int(res['poly'][4]), int(res['poly'][5])] + text_res_list.append(res) + + # Process tables: merge high IoU tables first, then filter nested tables + table_res_list, table_indices = merge_high_iou_tables( + table_res_list, layout_res, table_indices, iou_threshold) + + filtered_table_res_list = filter_nested_tables( + table_res_list, overlap_threshold, area_threshold) + + # Remove filtered out tables from layout_res + if len(filtered_table_res_list) < len(table_res_list): + kept_tables = set(id(table) for table in filtered_table_res_list) + to_remove = [table_indices[i] for i, table in enumerate(table_res_list) + if id(table) not in kept_tables] + + for idx in sorted(to_remove, reverse=True): + del layout_res[idx] + + # Remove overlaps in OCR and text regions + text_res_list, need_remove = remove_overlaps_min_blocks(text_res_list) + for res in text_res_list: + # 将res的poly使用bbox重构 + res['poly'] = [res['bbox'][0], res['bbox'][1], res['bbox'][2], res['bbox'][1], + res['bbox'][2], res['bbox'][3], res['bbox'][0], res['bbox'][3]] + # 删除res的bbox + del res['bbox'] + + ocr_res_list.extend(text_res_list) + + if len(need_remove) > 0: + for res in need_remove: + del res['bbox'] + layout_res.remove(res) + + return ocr_res_list, filtered_table_res_list, single_page_mfdetrec_res + + +def clean_memory(device='cuda'): + if device == 'cuda': + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + elif str(device).startswith("npu"): + import torch_npu + if torch_npu.npu.is_available(): + torch_npu.npu.empty_cache() + elif str(device).startswith("mps"): + torch.mps.empty_cache() + gc.collect() + + +def clean_vram(device, vram_threshold=8): + total_memory = get_vram(device) + if total_memory and total_memory <= vram_threshold: + gc_start = time.time() + clean_memory(device) + gc_time = round(time.time() - gc_start, 2) + logger.info(f"gc time: {gc_time}") + + +def get_vram(device): + if torch.cuda.is_available() and str(device).startswith("cuda"): + total_memory = torch.cuda.get_device_properties(device).total_memory / (1024 ** 3) # 将字节转换为 GB + return total_memory + elif str(device).startswith("npu"): + import torch_npu + if torch_npu.npu.is_available(): + total_memory = torch_npu.npu.get_device_properties(device).total_memory / (1024 ** 3) # 转为 GB + return total_memory + else: + return None \ No newline at end of file diff --git a/mineru/utils/ocr_utils.py b/mineru/utils/ocr_utils.py new file mode 100644 index 00000000..850e1611 --- /dev/null +++ b/mineru/utils/ocr_utils.py @@ -0,0 +1,401 @@ +# Copyright (c) Opendatalab. All rights reserved. +import copy +import cv2 +import numpy as np + + +def merge_spans_to_line(spans, threshold=0.6): + if len(spans) == 0: + return [] + else: + # 按照y0坐标排序 + spans.sort(key=lambda span: span['bbox'][1]) + + lines = [] + current_line = [spans[0]] + for span in spans[1:]: + # 如果当前的span与当前行的最后一个span在y轴上重叠,则添加到当前行 + if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], threshold): + current_line.append(span) + else: + # 否则,开始新行 + lines.append(current_line) + current_line = [span] + + # 添加最后一行 + if current_line: + lines.append(current_line) + + return lines + +def __is_overlaps_y_exceeds_threshold(bbox1, + bbox2, + overlap_ratio_threshold=0.8): + """检查两个bbox在y轴上是否有重叠,并且该重叠区域的高度占两个bbox高度更低的那个超过80%""" + _, y0_1, _, y1_1 = bbox1 + _, y0_2, _, y1_2 = bbox2 + + overlap = max(0, min(y1_1, y1_2) - max(y0_1, y0_2)) + height1, height2 = y1_1 - y0_1, y1_2 - y0_2 + # max_height = max(height1, height2) + min_height = min(height1, height2) + + return (overlap / min_height) > overlap_ratio_threshold + + +def img_decode(content: bytes): + np_arr = np.frombuffer(content, dtype=np.uint8) + return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED) + +def check_img(img): + if isinstance(img, bytes): + img = img_decode(img) + if isinstance(img, np.ndarray) and len(img.shape) == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + return img + + +def alpha_to_color(img, alpha_color=(255, 255, 255)): + if len(img.shape) == 3 and img.shape[2] == 4: + B, G, R, A = cv2.split(img) + alpha = A / 255 + + R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8) + G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8) + B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8) + + img = cv2.merge((B, G, R)) + return img + + +def preprocess_image(_image): + alpha_color = (255, 255, 255) + _image = alpha_to_color(_image, alpha_color) + return _image + + +def sorted_boxes(dt_boxes): + """ + Sort text boxes in order from top to bottom, left to right + args: + dt_boxes(array):detected text boxes with shape [4, 2] + return: + sorted boxes(array) with shape [4, 2] + """ + num_boxes = dt_boxes.shape[0] + sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) + _boxes = list(sorted_boxes) + + for i in range(num_boxes - 1): + for j in range(i, -1, -1): + if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \ + (_boxes[j + 1][0][0] < _boxes[j][0][0]): + tmp = _boxes[j] + _boxes[j] = _boxes[j + 1] + _boxes[j + 1] = tmp + else: + break + return _boxes + + +def bbox_to_points(bbox): + """ 将bbox格式转换为四个顶点的数组 """ + x0, y0, x1, y1 = bbox + return np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]).astype('float32') + + +def points_to_bbox(points): + """ 将四个顶点的数组转换为bbox格式 """ + x0, y0 = points[0] + x1, _ = points[1] + _, y1 = points[2] + return [x0, y0, x1, y1] + + +def merge_intervals(intervals): + # Sort the intervals based on the start value + intervals.sort(key=lambda x: x[0]) + + merged = [] + for interval in intervals: + # If the list of merged intervals is empty or if the current + # interval does not overlap with the previous, simply append it. + if not merged or merged[-1][1] < interval[0]: + merged.append(interval) + else: + # Otherwise, there is overlap, so we merge the current and previous intervals. + merged[-1][1] = max(merged[-1][1], interval[1]) + + return merged + + +def remove_intervals(original, masks): + # Merge all mask intervals + merged_masks = merge_intervals(masks) + + result = [] + original_start, original_end = original + + for mask in merged_masks: + mask_start, mask_end = mask + + # If the mask starts after the original range, ignore it + if mask_start > original_end: + continue + + # If the mask ends before the original range starts, ignore it + if mask_end < original_start: + continue + + # Remove the masked part from the original range + if original_start < mask_start: + result.append([original_start, mask_start - 1]) + + original_start = max(mask_end + 1, original_start) + + # Add the remaining part of the original range, if any + if original_start <= original_end: + result.append([original_start, original_end]) + + return result + + +def update_det_boxes(dt_boxes, mfd_res): + new_dt_boxes = [] + angle_boxes_list = [] + for text_box in dt_boxes: + + if calculate_is_angle(text_box): + angle_boxes_list.append(text_box) + continue + + text_bbox = points_to_bbox(text_box) + masks_list = [] + for mf_box in mfd_res: + mf_bbox = mf_box['bbox'] + if __is_overlaps_y_exceeds_threshold(text_bbox, mf_bbox): + masks_list.append([mf_bbox[0], mf_bbox[2]]) + text_x_range = [text_bbox[0], text_bbox[2]] + text_remove_mask_range = remove_intervals(text_x_range, masks_list) + temp_dt_box = [] + for text_remove_mask in text_remove_mask_range: + temp_dt_box.append(bbox_to_points([text_remove_mask[0], text_bbox[1], text_remove_mask[1], text_bbox[3]])) + if len(temp_dt_box) > 0: + new_dt_boxes.extend(temp_dt_box) + + new_dt_boxes.extend(angle_boxes_list) + + return new_dt_boxes + + +def merge_overlapping_spans(spans): + """ + Merges overlapping spans on the same line. + + :param spans: A list of span coordinates [(x1, y1, x2, y2), ...] + :return: A list of merged spans + """ + # Return an empty list if the input spans list is empty + if not spans: + return [] + + # Sort spans by their starting x-coordinate + spans.sort(key=lambda x: x[0]) + + # Initialize the list of merged spans + merged = [] + for span in spans: + # Unpack span coordinates + x1, y1, x2, y2 = span + # If the merged list is empty or there's no horizontal overlap, add the span directly + if not merged or merged[-1][2] < x1: + merged.append(span) + else: + # If there is horizontal overlap, merge the current span with the previous one + last_span = merged.pop() + # Update the merged span's top-left corner to the smaller (x1, y1) and bottom-right to the larger (x2, y2) + x1 = min(last_span[0], x1) + y1 = min(last_span[1], y1) + x2 = max(last_span[2], x2) + y2 = max(last_span[3], y2) + # Add the merged span back to the list + merged.append((x1, y1, x2, y2)) + + # Return the list of merged spans + return merged + + +def merge_det_boxes(dt_boxes): + """ + Merge detection boxes. + + This function takes a list of detected bounding boxes, each represented by four corner points. + The goal is to merge these bounding boxes into larger text regions. + + Parameters: + dt_boxes (list): A list containing multiple text detection boxes, where each box is defined by four corner points. + + Returns: + list: A list containing the merged text regions, where each region is represented by four corner points. + """ + # Convert the detection boxes into a dictionary format with bounding boxes and type + dt_boxes_dict_list = [] + angle_boxes_list = [] + for text_box in dt_boxes: + text_bbox = points_to_bbox(text_box) + + if calculate_is_angle(text_box): + angle_boxes_list.append(text_box) + continue + + text_box_dict = {'bbox': text_bbox} + dt_boxes_dict_list.append(text_box_dict) + + # Merge adjacent text regions into lines + lines = merge_spans_to_line(dt_boxes_dict_list) + + # Initialize a new list for storing the merged text regions + new_dt_boxes = [] + for line in lines: + line_bbox_list = [] + for span in line: + line_bbox_list.append(span['bbox']) + + # Merge overlapping text regions within the same line + merged_spans = merge_overlapping_spans(line_bbox_list) + + # Convert the merged text regions back to point format and add them to the new detection box list + for span in merged_spans: + new_dt_boxes.append(bbox_to_points(span)) + + new_dt_boxes.extend(angle_boxes_list) + + return new_dt_boxes + + +def get_adjusted_mfdetrec_res(single_page_mfdetrec_res, useful_list): + paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list + # Adjust the coordinates of the formula area + adjusted_mfdetrec_res = [] + for mf_res in single_page_mfdetrec_res: + mf_xmin, mf_ymin, mf_xmax, mf_ymax = mf_res["bbox"] + # Adjust the coordinates of the formula area to the coordinates relative to the cropping area + x0 = mf_xmin - xmin + paste_x + y0 = mf_ymin - ymin + paste_y + x1 = mf_xmax - xmin + paste_x + y1 = mf_ymax - ymin + paste_y + # Filter formula blocks outside the graph + if any([x1 < 0, y1 < 0]) or any([x0 > new_width, y0 > new_height]): + continue + else: + adjusted_mfdetrec_res.append({ + "bbox": [x0, y0, x1, y1], + }) + return adjusted_mfdetrec_res + + +def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang): + paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list + ocr_result_list = [] + ori_im = new_image.copy() + for box_ocr_res in ocr_res: + + if len(box_ocr_res) == 2: + p1, p2, p3, p4 = box_ocr_res[0] + text, score = box_ocr_res[1] + # logger.info(f"text: {text}, score: {score}") + if score < 0.6: # 过滤低置信度的结果 + continue + else: + p1, p2, p3, p4 = box_ocr_res + text, score = "", 1 + + if ocr_enable: + tmp_box = copy.deepcopy(np.array([p1, p2, p3, p4]).astype('float32')) + img_crop = get_rotate_crop_image(ori_im, tmp_box) + + # average_angle_degrees = calculate_angle_degrees(box_ocr_res[0]) + # if average_angle_degrees > 0.5: + poly = [p1, p2, p3, p4] + if calculate_is_angle(poly): + # logger.info(f"average_angle_degrees: {average_angle_degrees}, text: {text}") + # 与x轴的夹角超过0.5度,对边界做一下矫正 + # 计算几何中心 + x_center = sum(point[0] for point in poly) / 4 + y_center = sum(point[1] for point in poly) / 4 + new_height = ((p4[1] - p1[1]) + (p3[1] - p2[1])) / 2 + new_width = p3[0] - p1[0] + p1 = [x_center - new_width / 2, y_center - new_height / 2] + p2 = [x_center + new_width / 2, y_center - new_height / 2] + p3 = [x_center + new_width / 2, y_center + new_height / 2] + p4 = [x_center - new_width / 2, y_center + new_height / 2] + + # Convert the coordinates back to the original coordinate system + p1 = [p1[0] - paste_x + xmin, p1[1] - paste_y + ymin] + p2 = [p2[0] - paste_x + xmin, p2[1] - paste_y + ymin] + p3 = [p3[0] - paste_x + xmin, p3[1] - paste_y + ymin] + p4 = [p4[0] - paste_x + xmin, p4[1] - paste_y + ymin] + + if ocr_enable: + ocr_result_list.append({ + 'category_id': 15, + 'poly': p1 + p2 + p3 + p4, + 'score': 1, + 'text': text, + 'np_img': img_crop, + 'lang': lang, + }) + else: + ocr_result_list.append({ + 'category_id': 15, + 'poly': p1 + p2 + p3 + p4, + 'score': float(round(score, 2)), + 'text': text, + }) + + return ocr_result_list + + +def calculate_is_angle(poly): + p1, p2, p3, p4 = poly + height = ((p4[1] - p1[1]) + (p3[1] - p2[1])) / 2 + if 0.8 * height <= (p3[1] - p1[1]) <= 1.2 * height: + return False + else: + # logger.info((p3[1] - p1[1])/height) + return True + + +def get_rotate_crop_image(img, points): + ''' + img_height, img_width = img.shape[0:2] + left = int(np.min(points[:, 0])) + right = int(np.max(points[:, 0])) + top = int(np.min(points[:, 1])) + bottom = int(np.max(points[:, 1])) + img_crop = img[top:bottom, left:right, :].copy() + points[:, 0] = points[:, 0] - left + points[:, 1] = points[:, 1] - top + ''' + assert len(points) == 4, "shape of points must be 4*2" + img_crop_width = int( + max( + np.linalg.norm(points[0] - points[1]), + np.linalg.norm(points[2] - points[3]))) + img_crop_height = int( + max( + np.linalg.norm(points[0] - points[3]), + np.linalg.norm(points[1] - points[2]))) + pts_std = np.float32([[0, 0], [img_crop_width, 0], + [img_crop_width, img_crop_height], + [0, img_crop_height]]) + M = cv2.getPerspectiveTransform(points, pts_std) + dst_img = cv2.warpPerspective( + img, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC) + dst_img_height, dst_img_width = dst_img.shape[0:2] + if dst_img_height * 1.0 / dst_img_width >= 1.5: + dst_img = np.rot90(dst_img) + return dst_img \ No newline at end of file diff --git a/mineru/libs/pdf_image_tools.py b/mineru/utils/pdf_image_tools.py similarity index 95% rename from mineru/libs/pdf_image_tools.py rename to mineru/utils/pdf_image_tools.py index 4b041f5c..53a7262d 100644 --- a/mineru/libs/pdf_image_tools.py +++ b/mineru/utils/pdf_image_tools.py @@ -5,8 +5,8 @@ import pypdfium2 as pdfium from loguru import logger from PIL import Image -from ..data.data_reader_writer import FileBasedDataWriter -from ..utils.pdf_reader import image_to_b64str, image_to_bytes, page_to_image +from mineru.data.data_reader_writer import FileBasedDataWriter +from mineru.utils.pdf_reader import image_to_b64str, image_to_bytes, page_to_image from .hash_utils import str_sha256 diff --git a/mineru/libs/version.py b/mineru/version.py similarity index 100% rename from mineru/libs/version.py rename to mineru/version.py -- GitLab