v

f9b1a89a · HHL · 60e27226 · f9b1a89a · f9b1a89a · f9b1a89a
Commit f9b1a89a authored Dec 27, 2023 by HHL
20 changed files
--- a/layoutlmft/data/datasets/__pycache__/__init__.cpython-38.pyc
+++ b/layoutlmft/data/datasets/__pycache__/__init__.cpython-38.pyc
--- a/layoutlmft/data/datasets/__pycache__/funsd.cpython-37.pyc
+++ b/layoutlmft/data/datasets/__pycache__/funsd.cpython-37.pyc
--- a/layoutlmft/data/datasets/__pycache__/funsd.cpython-38.pyc
+++ b/layoutlmft/data/datasets/__pycache__/funsd.cpython-38.pyc
--- a/layoutlmft/data/datasets/funsd.py
+++ b/layoutlmft/data/datasets/funsd.py
+# coding=utf-8
+import json
+import os
+import datasets
+from layoutlmft.data.utils import load_image, normalize_bbox
+logger = datasets.logging.get_logger(__name__)
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        # downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        downloaded_file = '/yrfs1/intern/zrzhang6/DocumentPretrain/dataset/funsd'
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        bboxes.append(normalize_bbox(w["box"], size))
+            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags, "image": image}
--- a/layoutlmft/data/datasets/funsd.py.lock
+++ b/layoutlmft/data/datasets/funsd.py.lock
--- a/layoutlmft/data/datasets/xfun.py
+++ b/layoutlmft/data/datasets/xfun.py
+# Lint as: python3
+import json
+import logging
+import os
+import datasets
+from layoutlmft.data.utils import load_image, merge_bbox, normalize_bbox, simplify_bbox
+from transformers import AutoTokenizer
+_URL = "https://github.com/doc-analysis/XFUN/releases/download/v1.0/"
+_LANG = ["zh", "de", "es", "fr", "en", "it", "ja", "pt"]
+logger = logging.getLogger(__name__)
+class XFUNConfig(datasets.BuilderConfig):
+    """BuilderConfig for XFUN."""
+    def __init__(self, lang, additional_langs=None, **kwargs):
+        """
+        Args:
+            lang: string, language for the input text
+            **kwargs: keyword arguments forwarded to super.
+        """
+        super(XFUNConfig, self).__init__(**kwargs)
+        self.lang = lang
+        self.additional_langs = additional_langs
+class XFUN(datasets.GeneratorBasedBuilder):
+    """XFUN dataset."""
+    BUILDER_CONFIGS = [XFUNConfig(name=f"xfun.{lang}", lang=lang) for lang in _LANG]
+    tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
+    def _info(self):
+        return datasets.DatasetInfo(
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "input_ids": datasets.Sequence(datasets.Value("int64")),
+                    "bbox": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "labels": datasets.Sequence(
+                        datasets.ClassLabel(
+                            names=["O", "B-QUESTION", "B-ANSWER", "B-HEADER", "I-ANSWER", "I-QUESTION", "I-HEADER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                    "entities": datasets.Sequence(
+                        {
+                            "start": datasets.Value("int64"),
+                            "end": datasets.Value("int64"),
+                            "label": datasets.ClassLabel(names=["HEADER", "QUESTION", "ANSWER"]),
+                        }
+                    ),
+                    "relations": datasets.Sequence(
+                        {
+                            "head": datasets.Value("int64"),
+                            "tail": datasets.Value("int64"),
+                            "start_index": datasets.Value("int64"),
+                            "end_index": datasets.Value("int64"),
+                        }
+                    ),
+                }
+            ),
+            supervised_keys=None,
+        )
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        urls_to_download = {
+            "train": [f"{_URL}{self.config.lang}.train.json", f"{_URL}{self.config.lang}.train.zip"],
+            "val": [f"{_URL}{self.config.lang}.val.json", f"{_URL}{self.config.lang}.val.zip"],
+            # "test": [f"{_URL}{self.config.lang}.test.json", f"{_URL}{self.config.lang}.test.zip"],
+        }
+        downloaded_files = dl_manager.download_and_extract(urls_to_download)
+        train_files_for_many_langs = [downloaded_files["train"]]
+        val_files_for_many_langs = [downloaded_files["val"]]
+        # test_files_for_many_langs = [downloaded_files["test"]]
+        if self.config.additional_langs:
+            additional_langs = self.config.additional_langs.split("+")
+            if "all" in additional_langs:
+                additional_langs = [lang for lang in _LANG if lang != self.config.lang]
+            for lang in additional_langs:
+                urls_to_download = {"train": [f"{_URL}{lang}.train.json", f"{_URL}{lang}.train.zip"]}
+                additional_downloaded_files = dl_manager.download_and_extract(urls_to_download)
+                train_files_for_many_langs.append(additional_downloaded_files["train"])
+        logger.info(f"Training on {self.config.lang} with additional langs({self.config.additional_langs})")
+        logger.info(f"Evaluating on {self.config.lang}")
+        logger.info(f"Testing on {self.config.lang}")
+        return [
+            datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": train_files_for_many_langs}),
+            datasets.SplitGenerator(
+                name=datasets.Split.VALIDATION, gen_kwargs={"filepaths": val_files_for_many_langs}
+            ),
+            # datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepaths": test_files_for_many_langs}),
+        ]
+    def _generate_examples(self, filepaths):
+        for filepath in filepaths:
+            logger.info("Generating examples from = %s", filepath)
+            with open(filepath[0], "r") as f:
+                data = json.load(f)
+            for doc in data["documents"]:
+                doc["img"]["fpath"] = os.path.join(filepath[1], doc["img"]["fname"])
+                image, size = load_image(doc["img"]["fpath"])
+                document = doc["document"]
+                tokenized_doc = {"input_ids": [], "bbox": [], "labels": []}
+                entities = []
+                relations = []
+                id2label = {}
+                entity_id_to_index_map = {}
+                empty_entity = set()
+                for line in document:
+                    if len(line["text"]) == 0:
+                        empty_entity.add(line["id"])
+                        continue
+                    id2label[line["id"]] = line["label"]
+                    relations.extend([tuple(sorted(l)) for l in line["linking"]])
+                    tokenized_inputs = self.tokenizer(
+                        line["text"],
+                        add_special_tokens=False,
+                        return_offsets_mapping=True,
+                        return_attention_mask=False,
+                    )
+                    text_length = 0
+                    ocr_length = 0
+                    bbox = []
+                    for token_id, offset in zip(tokenized_inputs["input_ids"], tokenized_inputs["offset_mapping"]):
+                        if token_id == 6:
+                            bbox.append(None)
+                            continue
+                        text_length += offset[1] - offset[0]
+                        tmp_box = []
+                        while ocr_length < text_length:
+                            ocr_word = line["words"].pop(0)
+                            ocr_length += len(
+                                self.tokenizer._tokenizer.normalizer.normalize_str(ocr_word["text"].strip())
+                            )
+                            tmp_box.append(simplify_bbox(ocr_word["box"]))
+                        if len(tmp_box) == 0:
+                            tmp_box = last_box
+                        bbox.append(normalize_bbox(merge_bbox(tmp_box), size))
+                        last_box = tmp_box  # noqa
+                    bbox = [
+                        [bbox[i + 1][0], bbox[i + 1][1], bbox[i + 1][0], bbox[i + 1][1]] if b is None else b
+                        for i, b in enumerate(bbox)
+                    ]
+                    if line["label"] == "other":
+                        label = ["O"] * len(bbox)
+                    else:
+                        label = [f"I-{line['label'].upper()}"] * len(bbox)
+                        label[0] = f"B-{line['label'].upper()}"
+                    tokenized_inputs.update({"bbox": bbox, "labels": label})
+                    if label[0] != "O":
+                        entity_id_to_index_map[line["id"]] = len(entities)
+                        entities.append(
+                            {
+                                "start": len(tokenized_doc["input_ids"]),
+                                "end": len(tokenized_doc["input_ids"]) + len(tokenized_inputs["input_ids"]),
+                                "label": line["label"].upper(),
+                            }
+                        )
+                    for i in tokenized_doc:
+                        tokenized_doc[i] = tokenized_doc[i] + tokenized_inputs[i]
+                relations = list(set(relations))
+                relations = [rel for rel in relations if rel[0] not in empty_entity and rel[1] not in empty_entity]
+                kvrelations = []
+                for rel in relations:
+                    pair = [id2label[rel[0]], id2label[rel[1]]]
+                    if pair == ["question", "answer"]:
+                        kvrelations.append(
+                            {"head": entity_id_to_index_map[rel[0]], "tail": entity_id_to_index_map[rel[1]]}
+                        )
+                    elif pair == ["answer", "question"]:
+                        kvrelations.append(
+                            {"head": entity_id_to_index_map[rel[1]], "tail": entity_id_to_index_map[rel[0]]}
+                        )
+                    else:
+                        continue
+                def get_relation_span(rel):
+                    bound = []
+                    for entity_index in [rel["head"], rel["tail"]]:
+                        bound.append(entities[entity_index]["start"])
+                        bound.append(entities[entity_index]["end"])
+                    return min(bound), max(bound)
+                relations = sorted(
+                    [
+                        {
+                            "head": rel["head"],
+                            "tail": rel["tail"],
+                            "start_index": get_relation_span(rel)[0],
+                            "end_index": get_relation_span(rel)[1],
+                        }
+                        for rel in kvrelations
+                    ],
+                    key=lambda x: x["head"],
+                )
+                chunk_size = 512
+                for chunk_id, index in enumerate(range(0, len(tokenized_doc["input_ids"]), chunk_size)):
+                    item = {}
+                    for k in tokenized_doc:
+                        item[k] = tokenized_doc[k][index : index + chunk_size]
+                    entities_in_this_span = []
+                    global_to_local_map = {}
+                    for entity_id, entity in enumerate(entities):
+                        if (
+                            index <= entity["start"] < index + chunk_size
+                            and index <= entity["end"] < index + chunk_size
+                        ):
+                            entity["start"] = entity["start"] - index
+                            entity["end"] = entity["end"] - index
+                            global_to_local_map[entity_id] = len(entities_in_this_span)
+                            entities_in_this_span.append(entity)
+                    relations_in_this_span = []
+                    for relation in relations:
+                        if (
+                            index <= relation["start_index"] < index + chunk_size
+                            and index <= relation["end_index"] < index + chunk_size
+                        ):
+                            relations_in_this_span.append(
+                                {
+                                    "head": global_to_local_map[relation["head"]],
+                                    "tail": global_to_local_map[relation["tail"]],
+                                    "start_index": relation["start_index"] - index,
+                                    "end_index": relation["end_index"] - index,
+                                }
+                            )
+                    item.update(
+                        {
+                            "id": f"{doc['id']}_{chunk_id}",
+                            "image": image,
+                            "entities": entities_in_this_span,
+                            "relations": relations_in_this_span,
+                        }
+                    )
+                    yield f"{doc['id']}_{chunk_id}", item
--- a/layoutlmft/data/utils.py
+++ b/layoutlmft/data/utils.py
+import torch
+from detectron2.data.detection_utils import read_image
+from detectron2.data.transforms import ResizeTransform, TransformList
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+def simplify_bbox(bbox):
+    return [
+        min(bbox[0::2]),
+        min(bbox[1::2]),
+        max(bbox[2::2]),
+        max(bbox[3::2]),
+    ]
+def merge_bbox(bbox_list):
+    x0, y0, x1, y1 = list(zip(*bbox_list))
+    return [min(x0), min(y0), max(x1), max(y1)]
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+    return image, (w, h)
--- a/layoutlmft/evaluation.py
+++ b/layoutlmft/evaluation.py
+import os
+import re
+import numpy as np
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+PREFIX_CHECKPOINT_DIR = "checkpoint"
+_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d+)$")
+def get_last_checkpoint(folder):
+    content = os.listdir(folder)
+    checkpoints = [
+        path
+        for path in content
+        if _re_checkpoint.search(path) is not None and os.path.isdir(os.path.join(folder, path))
+    ]
+    if len(checkpoints) == 0:
+        return
+    return os.path.join(folder, max(checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0])))
+def re_score(pred_relations, gt_relations, mode="strict"):
+    """Evaluate RE predictions
+    Args:
+        pred_relations (list) :  list of list of predicted relations (several relations in each sentence)
+        gt_relations (list) :    list of list of ground truth relations
+            rel = { "head": (start_idx (inclusive), end_idx (exclusive)),
+                    "tail": (start_idx (inclusive), end_idx (exclusive)),
+                    "head_type": ent_type,
+                    "tail_type": ent_type,
+                    "type": rel_type}
+        vocab (Vocab) :         dataset vocabulary
+        mode (str) :            in 'strict' or 'boundaries'"""
+    assert mode in ["strict", "boundaries"]
+    relation_types = [v for v in [0, 1] if not v == 0]
+    scores = {rel: {"tp": 0, "fp": 0, "fn": 0} for rel in relation_types + ["ALL"]}
+    # Count GT relations and Predicted relations
+    n_sents = len(gt_relations)
+    n_rels = sum([len([rel for rel in sent]) for sent in gt_relations])
+    n_found = sum([len([rel for rel in sent]) for sent in pred_relations])
+    # Count TP, FP and FN per type
+    for pred_sent, gt_sent in zip(pred_relations, gt_relations):
+        for rel_type in relation_types:
+            # strict mode takes argument types into account
+            if mode == "strict":
+                pred_rels = {
+                    (rel["head"], rel["head_type"], rel["tail"], rel["tail_type"])
+                    for rel in pred_sent
+                    if rel["type"] == rel_type
+                }
+                gt_rels = {
+                    (rel["head"], rel["head_type"], rel["tail"], rel["tail_type"])
+                    for rel in gt_sent
+                    if rel["type"] == rel_type
+                }
+            # boundaries mode only takes argument spans into account
+            elif mode == "boundaries":
+                pred_rels = {(rel["head"], rel["tail"]) for rel in pred_sent if rel["type"] == rel_type}
+                gt_rels = {(rel["head"], rel["tail"]) for rel in gt_sent if rel["type"] == rel_type}
+            scores[rel_type]["tp"] += len(pred_rels & gt_rels)
+            scores[rel_type]["fp"] += len(pred_rels - gt_rels)
+            scores[rel_type]["fn"] += len(gt_rels - pred_rels)
+    # Compute per entity Precision / Recall / F1
+    for rel_type in scores.keys():
+        if scores[rel_type]["tp"]:
+            scores[rel_type]["p"] = scores[rel_type]["tp"] / (scores[rel_type]["fp"] + scores[rel_type]["tp"])
+            scores[rel_type]["r"] = scores[rel_type]["tp"] / (scores[rel_type]["fn"] + scores[rel_type]["tp"])
+        else:
+            scores[rel_type]["p"], scores[rel_type]["r"] = 0, 0
+        if not scores[rel_type]["p"] + scores[rel_type]["r"] == 0:
+            scores[rel_type]["f1"] = (
+                2 * scores[rel_type]["p"] * scores[rel_type]["r"] / (scores[rel_type]["p"] + scores[rel_type]["r"])
+            )
+        else:
+            scores[rel_type]["f1"] = 0
+    # Compute micro F1 Scores
+    tp = sum([scores[rel_type]["tp"] for rel_type in relation_types])
+    fp = sum([scores[rel_type]["fp"] for rel_type in relation_types])
+    fn = sum([scores[rel_type]["fn"] for rel_type in relation_types])
+    if tp:
+        precision = tp / (tp + fp)
+        recall = tp / (tp + fn)
+        f1 = 2 * precision * recall / (precision + recall)
+    else:
+        precision, recall, f1 = 0, 0, 0
+    scores["ALL"]["p"] = precision
+    scores["ALL"]["r"] = recall
+    scores["ALL"]["f1"] = f1
+    scores["ALL"]["tp"] = tp
+    scores["ALL"]["fp"] = fp
+    scores["ALL"]["fn"] = fn
+    # Compute Macro F1 Scores
+    scores["ALL"]["Macro_f1"] = np.mean([scores[ent_type]["f1"] for ent_type in relation_types])
+    scores["ALL"]["Macro_p"] = np.mean([scores[ent_type]["p"] for ent_type in relation_types])
+    scores["ALL"]["Macro_r"] = np.mean([scores[ent_type]["r"] for ent_type in relation_types])
+    logger.info(f"RE Evaluation in *** {mode.upper()} *** mode")
+    logger.info(
+        "processed {} sentences with {} relations; found: {} relations; correct: {}.".format(
+            n_sents, n_rels, n_found, tp
+        )
+    )
+    logger.info(
+        "\tALL\t TP: {};\tFP: {};\tFN: {}".format(scores["ALL"]["tp"], scores["ALL"]["fp"], scores["ALL"]["fn"])
+    )
+    logger.info("\t\t(m avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (micro)".format(precision, recall, f1))
+    logger.info(
+        "\t\t(M avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (Macro)\n".format(
+            scores["ALL"]["Macro_p"], scores["ALL"]["Macro_r"], scores["ALL"]["Macro_f1"]
+        )
+    )
+    for rel_type in relation_types:
+        logger.info(
+            "\t{}: \tTP: {};\tFP: {};\tFN: {};\tprecision: {:.2f};\trecall: {:.2f};\tf1: {:.2f};\t{}".format(
+                rel_type,
+                scores[rel_type]["tp"],
+                scores[rel_type]["fp"],
+                scores[rel_type]["fn"],
+                scores[rel_type]["p"],
+                scores[rel_type]["r"],
+                scores[rel_type]["f1"],
+                scores[rel_type]["tp"] + scores[rel_type]["fp"],
+            )
+        )
+    return scores
--- a/layoutlmft/models/__init__.py
+++ b/layoutlmft/models/__init__.py
--- a/layoutlmft/models/__pycache__/__init__.cpython-37.pyc
+++ b/layoutlmft/models/__pycache__/__init__.cpython-37.pyc
--- a/layoutlmft/models/__pycache__/__init__.cpython-38.pyc
+++ b/layoutlmft/models/__pycache__/__init__.cpython-38.pyc
--- a/layoutlmft/models/__pycache__/model_args.cpython-37.pyc
+++ b/layoutlmft/models/__pycache__/model_args.cpython-37.pyc
--- a/layoutlmft/models/__pycache__/model_args.cpython-38.pyc
+++ b/layoutlmft/models/__pycache__/model_args.cpython-38.pyc
--- a/layoutlmft/models/graphdoc/__init__.py
+++ b/layoutlmft/models/graphdoc/__init__.py
+from .configuration_graphdoc import GraphDocConfig
+from .modeling_graphdoc import GraphDocForTokenClassification, GraphDocModel, GraphDocForPretrain
\ No newline at end of file
--- a/layoutlmft/models/graphdoc/__pycache__/__init__.cpython-37.pyc
+++ b/layoutlmft/models/graphdoc/__pycache__/__init__.cpython-37.pyc
--- a/layoutlmft/models/graphdoc/__pycache__/__init__.cpython-38.pyc
+++ b/layoutlmft/models/graphdoc/__pycache__/__init__.cpython-38.pyc
--- a/layoutlmft/models/graphdoc/__pycache__/configuration_graphdoc.cpython-37.pyc
+++ b/layoutlmft/models/graphdoc/__pycache__/configuration_graphdoc.cpython-37.pyc
--- a/layoutlmft/models/graphdoc/__pycache__/configuration_graphdoc.cpython-38.pyc
+++ b/layoutlmft/models/graphdoc/__pycache__/configuration_graphdoc.cpython-38.pyc
--- a/layoutlmft/models/graphdoc/__pycache__/configuration_layoutclm.cpython-37.pyc
+++ b/layoutlmft/models/graphdoc/__pycache__/configuration_layoutclm.cpython-37.pyc
--- a/layoutlmft/models/graphdoc/__pycache__/configuration_layoutclmV12.cpython-37.pyc
+++ b/layoutlmft/models/graphdoc/__pycache__/configuration_layoutclmV12.cpython-37.pyc