"tools/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "e42137a85cf8bafe90392175a57133e994d462cb"
Unverified Commit 448c4672 authored by Julien Chaumond's avatar Julien Chaumond Committed by GitHub
Browse files

Fix: unpin flake8 and fix cs errors (#4367)

* Fix: unpin flake8 and fix cs errors

* Ok we still need to quote those
parent c547f15a
...@@ -478,7 +478,7 @@ def _compute_pytorch( ...@@ -478,7 +478,7 @@ def _compute_pytorch(
dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
if not no_speed: if not no_speed:
print_fn("Going through model with sequence of shape".format(sequence.shape)) print_fn("Going through model with sequence of shape {}".format(sequence.shape))
runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
average_time = sum(runtimes) / float(len(runtimes)) / 3.0 average_time = sum(runtimes) / float(len(runtimes)) / 3.0
dictionary[model_name]["time"][batch_size][slice_size] = average_time dictionary[model_name]["time"][batch_size][slice_size] = average_time
......
...@@ -80,7 +80,7 @@ class Distiller: ...@@ -80,7 +80,7 @@ class Distiller:
self.mlm = params.mlm self.mlm = params.mlm
if self.mlm: if self.mlm:
logger.info(f"Using MLM loss for LM step.") logger.info("Using MLM loss for LM step.")
self.mlm_mask_prop = params.mlm_mask_prop self.mlm_mask_prop = params.mlm_mask_prop
assert 0.0 <= self.mlm_mask_prop <= 1.0 assert 0.0 <= self.mlm_mask_prop <= 1.0
assert params.word_mask + params.word_keep + params.word_rand == 1.0 assert params.word_mask + params.word_keep + params.word_rand == 1.0
...@@ -91,7 +91,7 @@ class Distiller: ...@@ -91,7 +91,7 @@ class Distiller:
self.pred_probs = self.pred_probs.half() self.pred_probs = self.pred_probs.half()
self.token_probs = self.token_probs.half() self.token_probs = self.token_probs.half()
else: else:
logger.info(f"Using CLM loss for LM step.") logger.info("Using CLM loss for LM step.")
self.epoch = 0 self.epoch = 0
self.n_iter = 0 self.n_iter = 0
...@@ -365,8 +365,8 @@ class Distiller: ...@@ -365,8 +365,8 @@ class Distiller:
self.end_epoch() self.end_epoch()
if self.is_master: if self.is_master:
logger.info(f"Save very last checkpoint as `pytorch_model.bin`.") logger.info("Save very last checkpoint as `pytorch_model.bin`.")
self.save_checkpoint(checkpoint_name=f"pytorch_model.bin") self.save_checkpoint(checkpoint_name="pytorch_model.bin")
logger.info("Training is finished") logger.info("Training is finished")
def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, lm_labels: torch.tensor): def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, lm_labels: torch.tensor):
......
...@@ -60,7 +60,7 @@ def main(): ...@@ -60,7 +60,7 @@ def main():
with open(args.file_path, "r", encoding="utf8") as fp: with open(args.file_path, "r", encoding="utf8") as fp:
data = fp.readlines() data = fp.readlines()
logger.info(f"Start encoding") logger.info("Start encoding")
logger.info(f"{len(data)} examples to process.") logger.info(f"{len(data)} examples to process.")
rslt = [] rslt = []
......
...@@ -93,7 +93,7 @@ if __name__ == "__main__": ...@@ -93,7 +93,7 @@ if __name__ == "__main__":
elif args.model_type == "gpt2": elif args.model_type == "gpt2":
for w in ["weight", "bias"]: for w in ["weight", "bias"]:
compressed_sd[f"{prefix}.ln_f.{w}"] = state_dict[f"{prefix}.ln_f.{w}"] compressed_sd[f"{prefix}.ln_f.{w}"] = state_dict[f"{prefix}.ln_f.{w}"]
compressed_sd[f"lm_head.weight"] = state_dict[f"lm_head.weight"] compressed_sd["lm_head.weight"] = state_dict["lm_head.weight"]
print(f"N layers selected for distillation: {std_idx}") print(f"N layers selected for distillation: {std_idx}")
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}") print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")
......
...@@ -37,7 +37,7 @@ if __name__ == "__main__": ...@@ -37,7 +37,7 @@ if __name__ == "__main__":
model = BertForMaskedLM.from_pretrained(args.model_name) model = BertForMaskedLM.from_pretrained(args.model_name)
prefix = "bert" prefix = "bert"
else: else:
raise ValueError(f'args.model_type should be "bert".') raise ValueError('args.model_type should be "bert".')
state_dict = model.state_dict() state_dict = model.state_dict()
compressed_sd = {} compressed_sd = {}
...@@ -78,12 +78,12 @@ if __name__ == "__main__": ...@@ -78,12 +78,12 @@ if __name__ == "__main__":
] ]
std_idx += 1 std_idx += 1
compressed_sd[f"vocab_projector.weight"] = state_dict[f"cls.predictions.decoder.weight"] compressed_sd["vocab_projector.weight"] = state_dict["cls.predictions.decoder.weight"]
compressed_sd[f"vocab_projector.bias"] = state_dict[f"cls.predictions.bias"] compressed_sd["vocab_projector.bias"] = state_dict["cls.predictions.bias"]
if args.vocab_transform: if args.vocab_transform:
for w in ["weight", "bias"]: for w in ["weight", "bias"]:
compressed_sd[f"vocab_transform.{w}"] = state_dict[f"cls.predictions.transform.dense.{w}"] compressed_sd[f"vocab_transform.{w}"] = state_dict["cls.predictions.transform.dense.{w}"]
compressed_sd[f"vocab_layer_norm.{w}"] = state_dict[f"cls.predictions.transform.LayerNorm.{w}"] compressed_sd[f"vocab_layer_norm.{w}"] = state_dict["cls.predictions.transform.LayerNorm.{w}"]
print(f"N layers selected for distillation: {std_idx}") print(f"N layers selected for distillation: {std_idx}")
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}") print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")
......
...@@ -273,7 +273,7 @@ def main(): ...@@ -273,7 +273,7 @@ def main():
token_probs = None token_probs = None
train_lm_seq_dataset = LmSeqsDataset(params=args, data=data) train_lm_seq_dataset = LmSeqsDataset(params=args, data=data)
logger.info(f"Data loader created.") logger.info("Data loader created.")
# STUDENT # # STUDENT #
logger.info(f"Loading student config from {args.student_config}") logger.info(f"Loading student config from {args.student_config}")
...@@ -288,7 +288,7 @@ def main(): ...@@ -288,7 +288,7 @@ def main():
if args.n_gpu > 0: if args.n_gpu > 0:
student.to(f"cuda:{args.local_rank}") student.to(f"cuda:{args.local_rank}")
logger.info(f"Student loaded.") logger.info("Student loaded.")
# TEACHER # # TEACHER #
teacher = teacher_model_class.from_pretrained(args.teacher_name, output_hidden_states=True) teacher = teacher_model_class.from_pretrained(args.teacher_name, output_hidden_states=True)
......
...@@ -36,5 +36,5 @@ multi_line_output = 3 ...@@ -36,5 +36,5 @@ multi_line_output = 3
use_parentheses = True use_parentheses = True
[flake8] [flake8]
ignore = E203, E501, W503 ignore = E203, E501, E741, W503
max-line-length = 119 max-line-length = 119
...@@ -79,7 +79,7 @@ extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rt ...@@ -79,7 +79,7 @@ extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rt
extras["quality"] = [ extras["quality"] = [
"black", "black",
"isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort", "isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort",
"flake8==3.7.9", "flake8",
] ]
extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"] extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"]
......
...@@ -226,7 +226,7 @@ def lmap(f, x) -> List: ...@@ -226,7 +226,7 @@ def lmap(f, x) -> List:
def fetch_test_set(test_set_url): def fetch_test_set(test_set_url):
import wget import wget
fname = wget.download(test_set_url, f"opus_test.txt") fname = wget.download(test_set_url, "opus_test.txt")
lns = Path(fname).open().readlines() lns = Path(fname).open().readlines()
src = lmap(str.strip, lns[::4]) src = lmap(str.strip, lns[::4])
gold = lmap(str.strip, lns[1::4]) gold = lmap(str.strip, lns[1::4])
......
...@@ -114,7 +114,7 @@ class GlueDataset(Dataset): ...@@ -114,7 +114,7 @@ class GlueDataset(Dataset):
torch.save(self.features, cached_features_file) torch.save(self.features, cached_features_file)
# ^ This seems to take a lot of time so I want to investigate why and how we can improve. # ^ This seems to take a lot of time so I want to investigate why and how we can improve.
logger.info( logger.info(
f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
) )
def __len__(self): def __len__(self):
......
...@@ -65,7 +65,7 @@ class TextDataset(Dataset): ...@@ -65,7 +65,7 @@ class TextDataset(Dataset):
with open(cached_features_file, "wb") as handle: with open(cached_features_file, "wb") as handle:
pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
logger.info( logger.info(
f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
) )
def __len__(self): def __len__(self):
......
...@@ -24,7 +24,7 @@ from abc import ABC, abstractmethod ...@@ -24,7 +24,7 @@ from abc import ABC, abstractmethod
from contextlib import contextmanager from contextlib import contextmanager
from itertools import chain from itertools import chain
from os.path import abspath, exists from os.path import abspath, exists
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
import numpy as np import numpy as np
...@@ -58,6 +58,10 @@ if is_torch_available(): ...@@ -58,6 +58,10 @@ if is_torch_available():
AutoModelWithLMHead, AutoModelWithLMHead,
) )
if TYPE_CHECKING:
from .modeling_utils import PreTrainedModel
from .modeling_tf_utils import TFPreTrainedModel
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -19,11 +19,21 @@ import pickle ...@@ -19,11 +19,21 @@ import pickle
import shutil import shutil
import tempfile import tempfile
from collections import OrderedDict from collections import OrderedDict
from typing import Dict, Tuple, Union from typing import TYPE_CHECKING, Dict, Tuple, Union
from tests.utils import require_tf, require_torch from tests.utils import require_tf, require_torch
if TYPE_CHECKING:
from transformers import (
PretrainedConfig,
PreTrainedTokenizer,
PreTrainedTokenizerFast,
PreTrainedModel,
TFPreTrainedModel,
)
def merge_model_tokenizer_mappings( def merge_model_tokenizer_mappings(
model_mapping: Dict["PretrainedConfig", Union["PreTrainedModel", "TFPreTrainedModel"]], model_mapping: Dict["PretrainedConfig", Union["PreTrainedModel", "TFPreTrainedModel"]],
tokenizer_mapping: Dict["PretrainedConfig", Tuple["PreTrainedTokenizer", "PreTrainedTokenizerFast"]], tokenizer_mapping: Dict["PretrainedConfig", Tuple["PreTrainedTokenizer", "PreTrainedTokenizerFast"]],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment