Commit 7143f128 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'hepj-test' into 'main'

更新transformer代码

See merge request dcutoolkit/deeplearing/dlexamples_new!47
parents a30b77fe c0f05c10
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
#Special cases are included for prefixes that ONLY appear before 0-9 numbers.
#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
#usually upper case letters are initials in a name
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
Adj
Adm
Adv
Asst
Bart
Bldg
Brig
Bros
Capt
Cmdr
Col
Comdr
Con
Corp
Cpl
DR
Dr
Drs
Ens
Gen
Gov
Hon
Hr
Hosp
Insp
Lt
MM
MR
MRS
MS
Maj
Messrs
Mlle
Mme
Mr
Mrs
Ms
Msgr
Op
Ord
Pfc
Ph
Prof
Pvt
Rep
Reps
Res
Rev
Rt
Sen
Sens
Sfc
Sgt
Sr
St
Supt
Surg
#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
v
vs
i.e
rev
e.g
#Numbers only. These should only induce breaks when followed by a numeric sequence
# add NUMERIC_ONLY after the word for this function
#This case is mostly for the english "No." which can either be a sentence of its own, or
#if followed by a number, a non-breaking prefix
No #NUMERIC_ONLY#
Nos
Art #NUMERIC_ONLY#
Nr
pp #NUMERIC_ONLY#
#month abbreviations
Jan
Feb
Mar
Apr
#May is a full word
Jun
Jul
Aug
Sep
Oct
Nov
Dec
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
#
#-------------------------------------------------------------------------
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import torch
import torch.nn.functional as F
from torch.cuda import amp
from fairseq import utils
from fairseq.models import FairseqIncrementalDecoder
class SequenceGenerator(object):
def __init__(
self, models, vocab_meta, maxlen, beam_size=1, minlen=1, stop_early=True,
normalize_scores=True, len_penalty=1, unk_penalty=0, retain_dropout=False,
sampling=False, sampling_topk=-1, sampling_temperature=1, use_amp=False
):
"""Generates translations of a given source sentence.
Args:
min/maxlen: The length of the generated output will be bounded by
minlen and maxlen (not including the end-of-sentence marker).
stop_early: Stop generation immediately after we finalize beam_size
hypotheses, even though longer hypotheses might have better
normalized scores.
normalize_scores: Normalize scores by the length of the output.
"""
self.models = models
self.pad = vocab_meta['pad']
self.unk = vocab_meta['unk']
self.eos = vocab_meta['eos']
self.vocab_size = vocab_meta['len']
self.beam_size = beam_size
self.minlen = minlen
#max_decoder_len = min(m.max_decoder_positions() for m in self.models)
#max_decoder_len -= 1 # we define maxlen not including the EOS marker
#self.maxlen = max_decoder_len if maxlen is None else min(maxlen, max_decoder_len)
self.maxlen = maxlen
self.stop_early = stop_early
self.normalize_scores = normalize_scores
self.len_penalty = len_penalty
self.unk_penalty = unk_penalty
self.retain_dropout = retain_dropout
self.sampling = sampling
self.sampling_topk = sampling_topk
self.sampling_temperature = sampling_temperature
self.use_amp = use_amp
def cuda(self):
for model in self.models:
model.cuda()
return self
def generate_batched_itr(
self, data_itr, beam_size=None, maxlen_a=0.0, maxlen_b=None,
cuda=False, timer=None, prefix_size=0,
):
"""Iterate over a batched dataset and yield individual translations.
Args:
maxlen_a/b: generate sequences of maximum length ax + b,
where x is the source sentence length.
cuda: use GPU for generation
timer: StopwatchMeter for timing generations.
"""
if maxlen_b is None:
maxlen_b = self.maxlen
for sample in data_itr:
s = utils.move_to_cuda(sample) if cuda else sample
if 'net_input' not in s:
continue
input = s['net_input']
srclen = input['src_tokens'].size(1)
if timer is not None:
timer.start()
with torch.no_grad():
hypos = self.generate(
input['src_tokens'],
input['src_lengths'],
beam_size=beam_size,
maxlen=int(maxlen_a * srclen + maxlen_b),
prefix_tokens=s['target'][:, :prefix_size] if prefix_size > 0 else None,
)
if timer is not None:
timer.stop(sum(len(h[0]['tokens']) for h in hypos))
for i, id in enumerate(s['id'].data):
# remove padding
src = utils.strip_pad(input['src_tokens'].data[i, :], self.pad)
ref = utils.strip_pad(s['target'].data[i, :], self.pad) if s['target'] is not None else None
yield id, src, ref, hypos[i]
def generate(self, src_tokens, src_lengths, beam_size=None, maxlen=None, prefix_tokens=None):
"""Generate a batch of translations."""
with torch.no_grad():
with amp.autocast(enabled=self.use_amp):
return self._generate(src_tokens, src_lengths, beam_size, maxlen, prefix_tokens)
def _generate(self, src_tokens, src_lengths, beam_size=None, maxlen=None, prefix_tokens=None):
bsz, srclen = src_tokens.size()
maxlen = min(maxlen, self.maxlen) if maxlen is not None else self.maxlen
# the max beam size is the dictionary size - 1, since we never select pad
beam_size = beam_size if beam_size is not None else self.beam_size
beam_size = min(beam_size, self.vocab_size - 1)
encoder_outs = []
incremental_states = {}
for model in self.models:
if not self.retain_dropout:
model.eval()
if isinstance(model.decoder, FairseqIncrementalDecoder):
incremental_states[model] = {}
else:
incremental_states[model] = None
# compute the encoder output for each beam
encoder_out = model.encoder(
src_tokens.repeat(1, beam_size).view(-1, srclen),
src_lengths.expand(beam_size, src_lengths.numel()).t().contiguous().view(-1),
)
encoder_outs.append(encoder_out)
# initialize buffers
scores = src_tokens.data.new(bsz * beam_size, maxlen + 1).float().fill_(0)
scores_buf = scores.clone()
tokens = src_tokens.data.new(bsz * beam_size, maxlen + 2).fill_(self.pad)
tokens_buf = tokens.clone()
tokens[:, 0] = self.eos
attn, attn_buf = None, None
nonpad_idxs = None
# list of completed sentences
finalized = [[] for i in range(bsz)]
finished = [False for i in range(bsz)]
worst_finalized = [{'idx': None, 'score': -math.inf} for i in range(bsz)]
num_remaining_sent = bsz
# number of candidate hypos per step
cand_size = 2 * beam_size # 2 x beam size in case half are EOS
# offset arrays for converting between different indexing schemes
bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens)
cand_offsets = torch.arange(0, cand_size).type_as(tokens)
# helper function for allocating buffers on the fly
buffers = {}
def buffer(name, type_of=tokens): # noqa
if name not in buffers:
buffers[name] = type_of.new()
return buffers[name]
def is_finished(sent, step, unfinalized_scores=None):
"""
Check whether we've finished generation for a given sentence, by
comparing the worst score among finalized hypotheses to the best
possible score among unfinalized hypotheses.
"""
assert len(finalized[sent]) <= beam_size
if len(finalized[sent]) == beam_size:
if self.stop_early or step == maxlen or unfinalized_scores is None:
return True
# stop if the best unfinalized score is worse than the worst
# finalized one
best_unfinalized_score = unfinalized_scores[sent].max()
if self.normalize_scores:
best_unfinalized_score /= maxlen ** self.len_penalty
if worst_finalized[sent]['score'] >= best_unfinalized_score:
return True
return False
def finalize_hypos(step, bbsz_idx, eos_scores, unfinalized_scores=None):
"""
Finalize the given hypotheses at this step, while keeping the total
number of finalized hypotheses per sentence <= beam_size.
Note: the input must be in the desired finalization order, so that
hypotheses that appear earlier in the input are preferred to those
that appear later.
Args:
step: current time step
bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
indicating which hypotheses to finalize
eos_scores: A vector of the same size as bbsz_idx containing
scores for each hypothesis
unfinalized_scores: A vector containing scores for all
unfinalized hypotheses
"""
assert bbsz_idx.numel() == eos_scores.numel()
# clone relevant token and attention tensors
tokens_clone = tokens.index_select(0, bbsz_idx)
tokens_clone = tokens_clone[:, 1:step + 2] # skip the first index, which is EOS
tokens_clone[:, step] = self.eos
attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step + 2] if attn is not None else None
# compute scores per token position
pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1]
pos_scores[:, step] = eos_scores
# convert from cumulative to per-position scores
pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]
# normalize sentence-level scores
if self.normalize_scores:
eos_scores /= (step + 1) ** self.len_penalty
cum_unfin = []
prev = 0
for f in finished:
if f:
prev += 1
else:
cum_unfin.append(prev)
sents_seen = set()
for i, (idx, score) in enumerate(zip(bbsz_idx.tolist(), eos_scores.tolist())):
unfin_idx = idx // beam_size
sent = unfin_idx + cum_unfin[unfin_idx]
sents_seen.add((sent, unfin_idx))
def get_hypo():
if attn_clone is not None:
# remove padding tokens from attn scores
hypo_attn = attn_clone[i][nonpad_idxs[sent]]
_, alignment = hypo_attn.max(dim=0)
else:
hypo_attn = None
alignment = None
return {
'tokens': tokens_clone[i],
'score': score,
'attention': hypo_attn, # src_len x tgt_len
'alignment': alignment,
'positional_scores': pos_scores[i],
}
if len(finalized[sent]) < beam_size:
finalized[sent].append(get_hypo())
elif not self.stop_early and score > worst_finalized[sent]['score']:
# replace worst hypo for this sentence with new/better one
worst_idx = worst_finalized[sent]['idx']
if worst_idx is not None:
finalized[sent][worst_idx] = get_hypo()
# find new worst finalized hypo for this sentence
idx, s = min(enumerate(finalized[sent]), key=lambda r: r[1]['score'])
worst_finalized[sent] = {
'score': s['score'],
'idx': idx,
}
newly_finished = []
for sent, unfin_idx in sents_seen:
# check termination conditions for this sentence
if not finished[sent] and is_finished(sent, step, unfinalized_scores):
finished[sent] = True
newly_finished.append(unfin_idx)
return newly_finished
reorder_state = None
batch_idxs = None
for step in range(maxlen + 1): # one extra step for EOS marker
# reorder decoder internal states based on the prev choice of beams
if reorder_state is not None:
if batch_idxs is not None:
# update beam indices to take into account removed sentences
corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(batch_idxs)
reorder_state.view(-1, beam_size).add_(corr.unsqueeze(-1) * beam_size)
for i, model in enumerate(self.models):
if isinstance(model.decoder, FairseqIncrementalDecoder):
model.decoder.reorder_incremental_state(incremental_states[model], reorder_state)
encoder_outs[i] = model.encoder.reorder_encoder_out(*encoder_outs[i], reorder_state)
probs, avg_attn_scores = self._decode(tokens[:, :step + 1], encoder_outs, incremental_states)
if step == 0:
# at the first step all hypotheses are equally likely, so use
# only the first beam
probs = probs.unfold(0, 1, beam_size).squeeze(2).contiguous()
scores = scores.type_as(probs)
scores_buf = scores_buf.type_as(probs)
elif not self.sampling:
# make probs contain cumulative scores for each hypothesis
probs.add_(scores[:, step - 1].view(-1, 1))
probs[:, self.pad] = -math.inf # never select pad
probs[:, self.unk] -= self.unk_penalty # apply unk penalty
# Record attention scores
if avg_attn_scores is not None:
if attn is None:
attn = scores.new(bsz * beam_size, src_tokens.size(1), maxlen + 2)
attn_buf = attn.clone()
nonpad_idxs = src_tokens.ne(self.pad)
attn[:, :, step + 1].copy_(avg_attn_scores)
cand_scores = buffer('cand_scores', type_of=scores)
cand_indices = buffer('cand_indices')
cand_beams = buffer('cand_beams')
eos_bbsz_idx = buffer('eos_bbsz_idx')
eos_scores = buffer('eos_scores', type_of=scores)
if step < maxlen:
if prefix_tokens is not None and step < prefix_tokens.size(1):
probs_slice = probs.view(bsz, -1, probs.size(-1))[:, 0, :]
cand_scores = torch.gather(
probs_slice, dim=1,
index=prefix_tokens[:, step].view(-1, 1).data
).expand(-1, cand_size)
cand_indices = prefix_tokens[:, step].view(-1, 1).expand(bsz, cand_size).data
cand_beams.resize_as_(cand_indices).fill_(0)
elif self.sampling:
assert self.pad == 1, 'sampling assumes the first two symbols can be ignored'
if self.sampling_topk > 0:
values, indices = probs[:, 2:].topk(self.sampling_topk)
exp_probs = values.div_(self.sampling_temperature).exp()
if step == 0:
torch.multinomial(exp_probs, beam_size, replacement=True, out=cand_indices)
else:
torch.multinomial(exp_probs, 1, replacement=True, out=cand_indices)
torch.gather(exp_probs, dim=1, index=cand_indices, out=cand_scores)
torch.gather(indices, dim=1, index=cand_indices, out=cand_indices)
cand_indices.add_(2)
else:
exp_probs = probs.div_(self.sampling_temperature).exp_().view(-1, self.vocab_size)
if step == 0:
# we exclude the first two vocab items, one of which is pad
torch.multinomial(exp_probs[:, 2:], beam_size, replacement=True, out=cand_indices)
else:
torch.multinomial(exp_probs[:, 2:], 1, replacement=True, out=cand_indices)
cand_indices.add_(2)
torch.gather(exp_probs, dim=1, index=cand_indices, out=cand_scores)
cand_scores.log_()
cand_indices = cand_indices.view(bsz, -1).repeat(1, 2)
cand_scores = cand_scores.view(bsz, -1).repeat(1, 2)
if step == 0:
cand_beams = torch.zeros(bsz, cand_size).type_as(cand_indices)
else:
cand_beams = torch.arange(0, beam_size).repeat(bsz, 2).type_as(cand_indices)
# make scores cumulative
cand_scores.add_(
torch.gather(
scores[:, step - 1].view(bsz, beam_size), dim=1,
index=cand_beams,
)
)
else:
# take the best 2 x beam_size predictions. We'll choose the first
# beam_size of these which don't predict eos to continue with.
torch.topk(
probs.view(bsz, -1),
k=min(cand_size, probs.view(bsz, -1).size(1) - 1), # -1 so we never select pad
out=(cand_scores, cand_indices),
)
torch.div(cand_indices, self.vocab_size, out=cand_beams, rounding_mode='trunc')
cand_indices.fmod_(self.vocab_size)
else:
# finalize all active hypotheses once we hit maxlen
# pick the hypothesis with the highest prob of EOS right now
torch.sort(
probs[:, self.eos],
descending=True,
out=(eos_scores, eos_bbsz_idx),
)
num_remaining_sent -= len(finalize_hypos(
step, eos_bbsz_idx, eos_scores))
assert num_remaining_sent == 0
break
# cand_bbsz_idx contains beam indices for the top candidate
# hypotheses, with a range of values: [0, bsz*beam_size),
# and dimensions: [bsz, cand_size]
cand_bbsz_idx = cand_beams.add(bbsz_offsets)
# finalize hypotheses that end in eos
eos_mask = cand_indices.eq(self.eos)
finalized_sents = set()
if step >= self.minlen:
# only consider eos when it's among the top beam_size indices
torch.masked_select(
cand_bbsz_idx[:, :beam_size],
mask=eos_mask[:, :beam_size],
out=eos_bbsz_idx,
)
if eos_bbsz_idx.numel() > 0:
torch.masked_select(
cand_scores[:, :beam_size],
mask=eos_mask[:, :beam_size],
out=eos_scores,
)
finalized_sents = finalize_hypos(
step, eos_bbsz_idx, eos_scores, cand_scores)
num_remaining_sent -= len(finalized_sents)
assert num_remaining_sent >= 0
if num_remaining_sent == 0:
break
assert step < maxlen
if len(finalized_sents) > 0:
new_bsz = bsz - len(finalized_sents)
# construct batch_idxs which holds indices of batches to keep for the next pass
batch_mask = torch.ones(bsz).type_as(cand_indices)
batch_mask[cand_indices.new(finalized_sents)] = 0
batch_idxs = batch_mask.nonzero().squeeze(-1)
eos_mask = eos_mask[batch_idxs]
cand_beams = cand_beams[batch_idxs]
bbsz_offsets.resize_(new_bsz, 1)
cand_bbsz_idx = cand_beams.add(bbsz_offsets)
cand_scores = cand_scores[batch_idxs]
cand_indices = cand_indices[batch_idxs]
if prefix_tokens is not None:
prefix_tokens = prefix_tokens[batch_idxs]
scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
scores_buf.resize_as_(scores)
tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
tokens_buf.resize_as_(tokens)
if attn is not None:
attn = attn.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, attn.size(1), -1)
attn_buf.resize_as_(attn)
bsz = new_bsz
else:
batch_idxs = None
# set active_mask so that values > cand_size indicate eos hypos
# and values < cand_size indicate candidate active hypos.
# After, the min values per row are the top candidate active hypos
active_mask = buffer('active_mask')
torch.add(
eos_mask.type_as(cand_offsets) * cand_size,
cand_offsets[:eos_mask.size(1)],
out=active_mask,
)
# get the top beam_size active hypotheses, which are just the hypos
# with the smallest values in active_mask
active_hypos, _ignore = buffer('active_hypos'), buffer('_ignore')
torch.topk(
active_mask, k=beam_size, dim=1, largest=False,
out=(_ignore, active_hypos)
)
active_bbsz_idx = buffer('active_bbsz_idx')
torch.gather(
cand_bbsz_idx, dim=1, index=active_hypos,
out=active_bbsz_idx,
)
active_scores = torch.gather(
cand_scores, dim=1, index=active_hypos,
out=scores[:, step].view(bsz, beam_size),
)
active_bbsz_idx = active_bbsz_idx.view(-1)
active_scores = active_scores.view(-1)
# copy tokens and scores for active hypotheses
torch.index_select(
tokens[:, :step + 1], dim=0, index=active_bbsz_idx,
out=tokens_buf[:, :step + 1],
)
torch.gather(
cand_indices, dim=1, index=active_hypos,
out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
)
if step > 0:
torch.index_select(
scores[:, :step], dim=0, index=active_bbsz_idx,
out=scores_buf[:, :step],
)
torch.gather(
cand_scores, dim=1, index=active_hypos,
out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
)
# copy attention for active hypotheses
if attn is not None:
torch.index_select(
attn[:, :, :step + 2], dim=0, index=active_bbsz_idx,
out=attn_buf[:, :, :step + 2],
)
# swap buffers
tokens, tokens_buf = tokens_buf, tokens
scores, scores_buf = scores_buf, scores
if attn is not None:
attn, attn_buf = attn_buf, attn
# reorder incremental state in decoder
reorder_state = active_bbsz_idx
# sort by score descending
for sent in range(len(finalized)):
finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True)
return finalized
def _decode(self, tokens, encoder_outs, incremental_states):
if len(self.models) == 1:
return self._decode_one(tokens, self.models[0], encoder_outs[0], incremental_states, log_probs=True)
avg_probs = None
avg_attn = None
for model, encoder_out in zip(self.models, encoder_outs):
probs, attn = self._decode_one(tokens, model, encoder_out, incremental_states, log_probs=False)
if avg_probs is None:
avg_probs = probs
else:
avg_probs.add_(probs)
if attn is not None:
if avg_attn is None:
avg_attn = attn
else:
avg_attn.add_(attn)
avg_probs.div_(len(self.models))
avg_probs.log_()
if avg_attn is not None:
avg_attn.div_(len(self.models))
return avg_probs, avg_attn
def _decode_one(self, tokens, model, encoder_out, incremental_states, log_probs):
with torch.no_grad():
if incremental_states[model] is not None:
decoder_out = list(model.decoder(tokens, encoder_out[0], encoder_out[1], incremental_state=incremental_states[model]))
else:
decoder_out = list(model.decoder(tokens, encoder_out[0], encoder_out[1]))
decoder_out[0] = decoder_out[0][:, -1, :]
attn = decoder_out[1]
if isinstance(attn, torch.Tensor) and attn.numel() == 0:
attn = None
if attn is not None:
attn = attn[:, -1, :]
logits = decoder_out[0]
if log_probs:
probs = F.log_softmax(logits, dim=-1, dtype=torch.float32)
else:
probs = F.softmax(logits, dim=-1, dtype=torch.float32)
return probs, attn
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
#
#-------------------------------------------------------------------------
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from collections import Counter
import re
import torch
SPACE_NORMALIZER = re.compile("\s+")
path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'prefixes/nonbreaking_prefix.en')
prefixes ={}
with open(path, 'r') as f:
for line in f:
line = line.strip()
if line and not line[0] == '#':
match = re.search(r'(.*)[\s]+(\#NUMERIC_ONLY\#)', line)
if match:
prefixes[match.group(1)] = 2
else:
prefixes[line] = 1
def get_unicode_categories():
import sys
from collections import defaultdict
import unicodedata
cats = defaultdict(list)
for c in map(chr, range(sys.maxunicode + 1)):
cats[unicodedata.category(c)].append(c)
return cats
NUMERICS = ''.join(get_unicode_categories()['No'])
def tokenize_line(line):
line = SPACE_NORMALIZER.sub(" ", line)
line = line.strip()
return line
def tokenize_en(line):
line = line.strip()
line = ' ' + line + ' '
# remove ASCII junk
line = re.sub(r'\s+', ' ', line)
line = re.sub(r'[\x00-\x1F]', '', line)
#fix whitespaces
line = re.sub('\ +', ' ', line)
line = re.sub('^ ', '', line)
line = re.sub(' $', '', line)
#separate other special characters
line = re.sub(r'([^\s\.\'\`\,\-\w]|[_'+NUMERICS+'])', r' \g<1> ', line)
line = re.sub(r'(\w)\-(?=\w)', r'\g<1> @-@ ', line)
#multidots stay together
line = re.sub(r'\.([\.]+)', r' DOTMULTI\g<1>', line)
while re.search(r'DOTMULTI\.', line):
line = re.sub(r'DOTMULTI\.([^\.])', r'DOTDOTMULTI \g<1>', line)
line = re.sub(r'DOTMULTI\.', r'DOTDOTMULTI', line)
# separate out "," except if within numbers (5,300)
line = re.sub(r'([\D])[,]', r'\g<1> , ', line)
line = re.sub(r'[,]([\D])', r' , \g<1>', line)
# separate "," after a number if it's the end of sentence
line = re.sub(r'(\d)[,]$', r'\g<1> ,', line)
# split contractions right
line = re.sub(r'([\W\d])[\']([\W\d])', '\g<1> \' \g<2>', line)
line = re.sub(r'(\W)[\']([\w\D])', '\g<1> \' \g<2>', line)
line = re.sub(r'([\w\D])[\']([\W\d])', '\g<1> \' \g<2>', line)
line = re.sub(r'([\w\D])[\']([\w\D])', '\g<1> \'\g<2>', line)
# special case for "1990's"
line = re.sub(r'([\W\d])[\']([s])', '\g<1> \'\g<2>', line)
# apply nonbreaking prefixes
words = line.split()
line = ''
for i in range(len(words)):
word = words[i]
match = re.search(r'^(\S+)\.$', word)
if match:
pre = match.group(1)
if i==len(words)-1:
# split last words independently as they are unlikely to be non-breaking prefixes
word = pre+' .'
elif ((re.search(r'\.', pre) and re.search(r'[^\.\W\d]', pre))
or (pre in prefixes and prefixes[pre]==1)
or re.search(r'^[a-z]', words[i+1])
or (pre in prefixes and prefixes[pre]==2 and re.search(r'^[0-9]+', words[i+1]))):
pass
else:
word = pre+' .'
word +=' '
line += word
# clean up extraneous spaces
line = re.sub(' +', ' ', line)
line = re.sub('^ ', '', line)
line = re.sub(' $', '', line)
# .' at end of sentence is missed
line = re.sub(r'\.\' ?$', ' . \' ', line)
#restore multi-dots
while re.search('DOTDOTMULTI', line):
line = re.sub('DOTDOTMULTI', 'DOTMULTI.', line)
line = re.sub('DOTMULTI', '.', line)
# escape special characters
line = re.sub(r'\&', r'&amp;', line)
line = re.sub(r'\|', r'&#124;', line)
line = re.sub(r'\<', r'&lt;', line)
line = re.sub(r'\>', r'&gt;', line)
line = re.sub(r'\'', r'&apos;', line)
line = re.sub(r'\"', r'&quot;', line)
line = re.sub(r'\[', r'&#91;', line)
line = re.sub(r'\]', r'&#93;', line)
#ensure final line breaks
if line[-1] != '\n':
line += '\n'
return line
def deescape(line):
line = re.sub(r'&#124;', r'|', line)
line = re.sub(r'&lt;', r'<', line)
line = re.sub(r'&gt;', r'>', line)
line = re.sub(r'&quot;', '\"', line)
line = re.sub(r'&apos;', '\'', line)
line = re.sub(r'&#91;', r'[', line)
line = re.sub(r'&#93;', r']', line)
line = re.sub(r'&amp;', r'&', line)
return line
class Tokenizer:
@staticmethod
def add_file_to_dictionary(filename, dict, tokenize):
with open(filename, 'r') as f:
for line in f:
for word in tokenize(line).split():
dict.add_symbol(word)
dict.add_symbol(dict.eos_word)
@staticmethod
def binarize(filename, dict, consumer, tokenize=tokenize_line,
append_eos=True, reverse_order=False):
nseq, ntok = 0, 0
replaced = Counter()
def replaced_consumer(word, idx):
if idx == dict.unk_index and word != dict.unk_word:
replaced.update([word])
with open(filename, 'r') as f:
for line in f:
ids = Tokenizer.tokenize(
line=line,
dictionary=dict,
tokenize=tokenize,
add_if_not_exist=False,
consumer=replaced_consumer,
append_eos=append_eos,
reverse_order=reverse_order,
)
nseq += 1
consumer(ids)
ntok += len(ids)
return {'nseq': nseq, 'nunk': sum(replaced.values()), 'ntok': ntok, 'replaced': len(replaced)}
@staticmethod
def tokenize(line, dictionary, tokenize=tokenize_line, add_if_not_exist=True,
consumer=None, append_eos=True, reverse_order=False, bpe=None):
line = tokenize(line)
if bpe:
line = bpe.process_line(line)
words = line.split()
if reverse_order:
words = list(reversed(words))
nwords = len(words)
ids = torch.IntTensor(nwords + 1 if append_eos else nwords)
for i, word in enumerate(words):
if add_if_not_exist:
idx = dictionary.add_symbol(word)
else:
idx = dictionary.index(word)
if consumer is not None:
consumer(word, idx)
ids[i] = idx
if append_eos:
ids[nwords] = dictionary.eos_index
return ids
@staticmethod
def detokenize(line, lang):
#don't try to detokenize XML/HTML tag lines
if re.search(r'^<.+>$', line) or re.search(r'^\s*$', line):
return line
line = line.strip()
line = ' '+line+' '
line = re.sub(r' @-@ ', '-', line)
line = deescape(line)
words = line.split()
line = ''
quote_count = {'\'':0, '\"':0}
prepend_space = ' '
for i in range(len(words)):
#perform rught shift of currency and some punctuation
if re.search(r'^[\u20ac\x24\(\[\{]+$', words[i]):
line += prepend_space + words[i]
prepend_space = ''
elif re.search(r'^[\,\.\?\!\:\;\\\%\}\]\)]+$', words[i]):
if lang=='fr' and re.search(r'^[\?\!\:\;\\\%]$', words[i]):
line += ' '
line += words[i]
prepend_space = ' '
elif lang=='en' and i>0 and re.search(r'^[\'][\w\D]', words[i]) and re.search(r'\w$', words[i-1]):
line += words[i]
prepend_space = ' '
elif lang=='cs' and i>1 and re.search(r'^\d+$', words[i-2]) and re.search(r'^[.,]$', words[i-1]) and re.search(r'^\w+$', words[i]):
line += words[i]
prepend_space = ' '
elif (lang=='fr' or lang=='it') and i<len(words)-1 and re.search(r'[\w\D][\']$', words[i]) and re.search(r'^[\w\D]', words[i+1]):
line += prepend_space + words[i]
prepend_space = ''
elif lang=='cs' and i<len(words)-3 and \
re.search(r'[\w\D]$', words[i]) and \
re.search(r'^-$', words[i+1]) and \
re.search(r'^li$|^mail.*', words[i+2], re.I):
#line += ' '+words[i]+words[i+1]
pass #TODO: skip one word
elif re.search(r'^[\'\"\x60\u201c\u201d]+$', words[i]):
normalized_quo = '\"' if re.search(r'^[\u201c\u201d]+$', words[i]) else words[i]
quote_count[normalized_quo] = 0 if normalized_quo not in quote_count.keys() else quote_count[normalized_quo]
if lang=='cs' and words[i] == '\u201c':
quote_count[normalized_quo] = 0
if lang=='cs' and words[i] == '\u201d':
quote_count[normalized_quo] = 1
if quote_count[normalized_quo] % 2 == 0:
if lang=='en' and words[i]=='\'' and i > 0 and re.search(r'[s]$', words[i-1]):
#single quote for posessives ending in s... "The Jones' house"
#left shift
line += words[i]
prepend_space = ' '
else:
#right shift
line += prepend_space + words[i]
prepend_space = ''
quote_count[normalized_quo] += 1
else:
#left shift
line += words[i]
prepend_space = ' '
quote_count[normalized_quo] += 1
elif lang=='fi' and re.search(r':$', words[i-1]) and re.search(r'^(N|n|A|a|Ä|ä|ssa|Ssa|ssä|Ssä|sta|stä|Sta|Stä|hun|Hun|hyn|Hyn|han|Han|hän|Hän|hön|Hön|un|Un|yn|Yn|an|An|än|Än|ön|Ön|seen|Seen|lla|Lla|llä|Llä|lta|Lta|ltä|Ltä|lle|Lle|ksi|Ksi|kse|Kse|tta|Tta|ine|Ine)(ni|si|mme|nne|nsa)?(ko|kö|han|hän|pa|pä|kaan|kään|kin)?$', words[i]):
line += words[i].lower()
prepend_space = ' '
else:
line += prepend_space + words[i]
prepend_space = ' '
#clean up spaces at head and tail of each line as well as any double-spacing
line = re.sub(r' +', ' ', line)
line = re.sub(r'\n ', '\n', line)
line = re.sub(r' \n', '\n', line)
line = re.sub(r'^ ', '', line)
line = re.sub(r' $', '', line)
#add trailing break
line += '\n' if line[-1] != '\n' else ''
return line
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
#
#--------------------------------------------------------------------
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict, OrderedDict
import logging
import os
import re
import torch
import traceback
from torch.serialization import default_restore_location
def torch_persistent_save(*args, **kwargs):
for i in range(3):
try:
return torch.save(*args, **kwargs)
except Exception:
if i == 2:
logging.error(traceback.format_exc())
def convert_state_dict_type(state_dict, ttype=torch.FloatTensor):
if isinstance(state_dict, dict):
cpu_dict = OrderedDict()
for k, v in state_dict.items():
cpu_dict[k] = convert_state_dict_type(v)
return cpu_dict
elif isinstance(state_dict, list):
return [convert_state_dict_type(v) for v in state_dict]
elif torch.is_tensor(state_dict):
return state_dict.type(ttype)
else:
return state_dict
def save_state(filename, args, model, criterion, optimizer, lr_scheduler,
num_updates, optim_history=None, extra_state=None):
if optim_history is None:
optim_history = []
if extra_state is None:
extra_state = {}
state_dict = {
'args': args,
'model': convert_state_dict_type(model.state_dict()),
'optimizer_history': optim_history + [
{
'criterion_name': criterion.__class__.__name__,
'optimizer_name': optimizer.__class__.__name__,
'lr_scheduler_state': lr_scheduler.state_dict(),
'num_updates': num_updates,
}
],
'last_optimizer_state': convert_state_dict_type(optimizer.state_dict()),
'extra_state': extra_state,
}
torch_persistent_save(state_dict, filename)
def load_model_state(filename, model):
if not os.path.exists(filename):
return None, [], None
state = torch.load(filename, map_location=lambda s, l: default_restore_location(s, 'cpu'))
# load model parameters
try:
model.load_state_dict(state['model'], strict=True)
except Exception:
raise Exception('Cannot load model parameters from checkpoint, '
'please ensure that the architectures match')
return state['extra_state'], state['optimizer_history'], state['last_optimizer_state']
def move_to_cuda(sample):
if len(sample) == 0:
return {}
def _move_to_cuda(maybe_tensor):
if torch.is_tensor(maybe_tensor):
return maybe_tensor.cuda()
elif isinstance(maybe_tensor, dict):
return {
key: _move_to_cuda(value)
for key, value in maybe_tensor.items()
}
elif isinstance(maybe_tensor, list):
return [_move_to_cuda(x) for x in maybe_tensor]
else:
return maybe_tensor
return _move_to_cuda(sample)
INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0)
def _get_full_incremental_state_key(module_instance, key):
module_name = module_instance.__class__.__name__
# assign a unique ID to each module instance, so that incremental state is
# not shared across module instances
if not hasattr(module_instance, '_fairseq_instance_id'):
INCREMENTAL_STATE_INSTANCE_ID[module_name] += 1
module_instance._fairseq_instance_id = INCREMENTAL_STATE_INSTANCE_ID[module_name]
return '{}.{}.{}'.format(module_name, module_instance._fairseq_instance_id, key)
def get_incremental_state(module, incremental_state, key):
"""Helper for getting incremental state for an nn.Module."""
full_key = _get_full_incremental_state_key(module, key)
if incremental_state is None or full_key not in incremental_state:
return None
return incremental_state[full_key]
def set_incremental_state(module, incremental_state, key, value):
"""Helper for setting incremental state for an nn.Module."""
if incremental_state is not None:
full_key = _get_full_incremental_state_key(module, key)
incremental_state[full_key] = value
def load_align_dict(replace_unk):
if replace_unk is None:
align_dict = None
elif isinstance(replace_unk, str):
# Load alignment dictionary for unknown word replacement if it was passed as an argument.
align_dict = {}
with open(replace_unk, 'r') as f:
for line in f:
cols = line.split()
align_dict[cols[0]] = cols[1]
else:
# No alignment dictionary provided but we still want to perform unknown word replacement by copying
# the original source word.
align_dict = {}
return align_dict
def print_embed_overlap(embed_dict, vocab_dict):
embed_keys = set(embed_dict.keys())
vocab_keys = set(vocab_dict.symbols)
overlap = len(embed_keys & vocab_keys)
print("| Found {}/{} types in embedding file.".format(overlap, len(vocab_dict)))
def parse_embedding(embed_path):
"""Parse embedding text file into a dictionary of word and embedding tensors.
The first line can have vocabulary size and dimension. The following lines
should contain word and embedding separated by spaces.
Example:
2 5
the -0.0230 -0.0264 0.0287 0.0171 0.1403
at -0.0395 -0.1286 0.0275 0.0254 -0.0932
"""
embed_dict = {}
with open(embed_path) as f_embed:
next(f_embed) # skip header
for line in f_embed:
pieces = line.rstrip().split(" ")
embed_dict[pieces[0]] = torch.Tensor([float(weight) for weight in pieces[1:]])
return embed_dict
def load_embedding(embed_dict, vocab, embedding):
for idx in range(len(vocab)):
token = vocab[idx]
if token in embed_dict:
embedding.weight.data[idx] = embed_dict[token]
return embedding
def replace_unk(hypo_str, src_str, alignment, align_dict, unk):
from fairseq import tokenizer
# Tokens are strings here
hypo_tokens = tokenizer.tokenize_line(hypo_str)
# TODO: Very rare cases where the replacement is '<eos>' should be handled gracefully
src_tokens = tokenizer.tokenize_line(src_str) + ['<eos>']
for i, ht in enumerate(hypo_tokens):
if ht == unk:
src_token = src_tokens[alignment[i]]
# Either take the corresponding value in the aligned dictionary or just copy the original value.
hypo_tokens[i] = align_dict.get(src_token, src_token)
return ' '.join(hypo_tokens)
def post_process_prediction(hypo_tokens, src_str, alignment, align_dict, tgt_dict, remove_bpe):
from fairseq import tokenizer
hypo_str = tgt_dict.string(hypo_tokens, remove_bpe)
if align_dict is not None:
hypo_str = replace_unk(hypo_str, src_str, alignment, align_dict, tgt_dict.unk_string())
if align_dict is not None or remove_bpe is not None:
# Convert back to tokens for evaluating with unk replacement or without BPE
# Note that the dictionary can be modified inside the method.
hypo_tokens = tokenizer.Tokenizer.tokenize(hypo_str, tgt_dict, add_if_not_exist=True)
return hypo_tokens, hypo_str, alignment
def make_positions(tensor, padding_idx, left_pad):
"""Replace non-padding symbols with their position numbers.
Position numbers begin at padding_idx+1.
Padding symbols are ignored, but it is necessary to specify whether padding
is added on the left side (left_pad=True) or right side (left_pad=False).
"""
max_pos = padding_idx + 1 + tensor.size(1)
if not hasattr(make_positions, 'range_buf'):
make_positions.range_buf = torch.arange(padding_idx + 1, 768,
dtype=tensor.dtype, device=tensor.device)
make_positions.range_buf = make_positions.range_buf.type_as(tensor)
if make_positions.range_buf.numel() < max_pos:
torch.arange(padding_idx + 1, max_pos, out=make_positions.range_buf)
mask = tensor.ne(padding_idx)
positions = make_positions.range_buf[:tensor.size(1)].expand_as(tensor)
if left_pad:
positions = positions - mask.size(1) + mask.long().sum(dim=1).unsqueeze(1)
return tensor.clone().masked_scatter_(mask, positions[mask])
def strip_pad(tensor, pad):
return tensor[tensor.ne(pad)]
def buffered_arange(max):
if not hasattr(buffered_arange, 'buf'):
buffered_arange.buf = torch.LongTensor()
if max > buffered_arange.buf.numel():
torch.arange(max, out=buffered_arange.buf)
return buffered_arange.buf[:max]
def convert_padding_direction(src_tokens, padding_idx, right_to_left=False, left_to_right=False):
assert right_to_left ^ left_to_right
pad_mask = src_tokens.eq(padding_idx)
if not pad_mask.any():
# no padding, return early
return src_tokens
if left_to_right and not pad_mask[:, 0].any():
# already right padded
return src_tokens
if right_to_left and not pad_mask[:, -1].any():
# already left padded
return src_tokens
max_len = src_tokens.size(1)
range = buffered_arange(max_len).type_as(src_tokens).expand_as(src_tokens)
num_pads = pad_mask.long().sum(dim=1, keepdim=True)
if right_to_left:
index = torch.remainder(range - num_pads, max_len)
else:
index = torch.remainder(range + num_pads, max_len)
return src_tokens.gather(1, index)
def item(tensor):
if hasattr(tensor, 'item'):
return tensor.item()
if hasattr(tensor, '__getitem__'):
return tensor[0]
return tensor
def clip_grad_norm_(tensor, max_norm):
grad_norm = item(torch.norm(tensor))
if grad_norm > max_norm > 0:
clip_coef = max_norm / (grad_norm + 1e-6)
tensor.mul_(clip_coef)
return grad_norm
def fill_with_neg_inf(t):
"""FP16-compatible function that fills a tensor with -inf."""
return t.float().fill_(float('-inf')).type_as(t)
def checkpoint_paths(path, pattern=r'checkpoint(\d+)\.pt'):
"""Retrieves all checkpoints found in `path` directory.
Checkpoints are identified by matching filename to the specified pattern. If
the pattern contains groups, the result will be sorted by the first group in
descending order.
"""
pt_regexp = re.compile(pattern)
files = os.listdir(path)
entries = []
for i, f in enumerate(files):
m = pt_regexp.fullmatch(f)
if m is not None:
idx = int(m.group(1)) if len(m.groups()) > 0 else i
entries.append((idx, m.group(0)))
return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)]
#!/usr/bin/env python3 -u
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
#
#-------------------------------------------------------------------------
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import time
from collections import namedtuple
import numpy as np
import torch
from torch.serialization import default_restore_location
from fairseq import data, options, tokenizer, utils, log_helper
from fairseq.sequence_generator import SequenceGenerator
from fairseq.meters import StopwatchMeter
from fairseq.models.transformer import TransformerModel
import dllogger
from apply_bpe import BPE
Batch = namedtuple('Batch', 'srcs tokens lengths')
Translation = namedtuple('Translation', 'src_str hypos pos_scores alignments')
def load_ensemble_for_inference(filenames):
"""Load an ensemble of models for inference.
model_arg_overrides allows you to pass a dictionary model_arg_overrides --
{'arg_name': arg} -- to override model args that were used during model
training
"""
# load model architectures and weights
states = []
for filename in filenames:
if not os.path.exists(filename):
raise IOError('Model file not found: {}'.format(filename))
state = torch.load(filename, map_location=lambda s, l: default_restore_location(s, 'cpu'))
states.append(state)
ensemble = []
for state in states:
args = state['args']
# build model for ensemble
model = TransformerModel.build_model(args)
model.load_state_dict(state['model'], strict=True)
ensemble.append(model)
src_dict = states[0]['extra_state']['src_dict']
tgt_dict = states[0]['extra_state']['tgt_dict']
return ensemble, args, src_dict, tgt_dict
def buffered_read(buffer_size, data_descriptor):
buffer = []
for src_str in data_descriptor:
buffer.append(src_str.strip())
if len(buffer) >= buffer_size:
yield buffer
buffer = []
if buffer:
yield buffer
def make_batches(lines, args, src_dict, max_positions, bpe=None):
tokens = [
tokenizer.Tokenizer.tokenize(
src_str,
src_dict,
tokenize=tokenizer.tokenize_en,
add_if_not_exist=False,
bpe=bpe
).long()
for src_str in lines
]
lengths = np.array([t.numel() for t in tokens])
itr = data.EpochBatchIterator(
dataset=data.LanguagePairDataset(tokens, lengths, src_dict),
max_tokens=args.max_tokens,
max_sentences=args.max_sentences,
max_positions=max_positions,
).next_epoch_itr(shuffle=False)
for batch in itr:
yield Batch(
srcs=[lines[i] for i in batch['id']],
tokens=batch['net_input']['src_tokens'],
lengths=batch['net_input']['src_lengths'],
), batch['id']
def setup_logger(args):
if not args.no_dllogger:
dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=1, filename=args.stat_file)])
for k, v in vars(args).items():
dllogger.log(step='PARAMETER', data={k:v}, verbosity=0)
container_setup_info = log_helper.get_framework_env_vars()
dllogger.log(step='PARAMETER', data=container_setup_info, verbosity=0)
dllogger.metadata('throughput',
{'unit':'tokens/s', 'format':':/3f', 'GOAL':'MAXIMIZE', 'STAGE':'INFER'})
else:
dllogger.init(backends=[])
def main(args):
setup_logger(args)
args.interactive = sys.stdin.isatty() and not args.file # Just make the code more understendable
if args.file:
data_descriptor = open(args.file, 'r')
else:
data_descriptor = sys.stdin
if args.interactive:
args.buffer_size = 1
if args.max_tokens is None and args.max_sentences is None:
args.max_sentences = 1
if args.buffer_size > 50000:
print("WARNING: To prevent memory exhaustion buffer size is set to 50000", file=sys.stderr)
args.buffer_size = 50000
assert not args.sampling or args.nbest == args.beam, \
'--sampling requires --nbest to be equal to --beam'
assert not args.max_sentences or args.max_sentences <= args.buffer_size, \
'--max-sentences/--batch-size cannot be larger than --buffer-size'
print(args, file=sys.stderr)
use_cuda = torch.cuda.is_available() and not args.cpu
processing_start = time.time()
# Load ensemble
print('| loading model(s) from {}'.format(args.path), file=sys.stderr)
model_paths = args.path.split(':')
models, model_args, src_dict, tgt_dict = load_ensemble_for_inference(model_paths)
if args.fp16:
for model in models:
model.half()
# Optimize ensemble for generation
for model in models:
model.make_generation_fast_(need_attn=args.print_alignment)
# Initialize generator
translator = SequenceGenerator(
models,
tgt_dict.get_metadata(),
maxlen=args.max_target_positions,
beam_size=args.beam,
stop_early=(not args.no_early_stop),
normalize_scores=(not args.unnormalized),
len_penalty=args.lenpen,
unk_penalty=args.unkpen,
sampling=args.sampling,
sampling_topk=args.sampling_topk,
minlen=args.min_len,
sampling_temperature=args.sampling_temperature
)
if use_cuda:
translator.cuda()
# Load BPE codes file
if args.bpe_codes:
codes = open(args.bpe_codes, 'r')
bpe = BPE(codes)
# Load alignment dictionary for unknown word replacement
# (None if no unknown word replacement, empty if no path to align dictionary)
align_dict = utils.load_align_dict(args.replace_unk)
def make_result(src_str, hypos):
result = Translation(
src_str=src_str,
hypos=[],
pos_scores=[],
alignments=[],
)
# Process top predictions
for hypo in hypos[:min(len(hypos), args.nbest)]:
hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
hypo_tokens=hypo['tokens'].int().cpu(),
src_str=src_str,
alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None,
align_dict=align_dict,
tgt_dict=tgt_dict,
remove_bpe=args.remove_bpe,
)
hypo_str = tokenizer.Tokenizer.detokenize(hypo_str, 'de').strip()
result.hypos.append((hypo['score'], hypo_str))
result.pos_scores.append('P\t' + ' '.join(f'{x:.4f}' for x in hypo['positional_scores'].tolist()))
result.alignments.append('A\t' + ' '.join(str(utils.item(x)) for x in alignment)
if args.print_alignment else None
)
return result
gen_timer = StopwatchMeter()
def process_batch(batch):
tokens = batch.tokens
lengths = batch.lengths
if use_cuda:
tokens = tokens.cuda()
lengths = lengths.cuda()
translation_start = time.time()
gen_timer.start()
translations = translator.generate(
tokens,
lengths,
maxlen=int(args.max_len_a * tokens.size(1) + args.max_len_b),
)
gen_timer.stop(sum(len(h[0]['tokens']) for h in translations))
dllogger.log(step='infer', data={'latency': time.time() - translation_start})
return [make_result(batch.srcs[i], t) for i, t in enumerate(translations)]
if args.interactive:
print('| Type the input sentence and press return:')
for inputs in buffered_read(args.buffer_size, data_descriptor):
indices = []
results = []
for batch, batch_indices in make_batches(inputs, args, src_dict, args.max_positions, bpe):
indices.extend(batch_indices)
results += process_batch(batch)
for i in np.argsort(indices):
result = results[i]
print(result.src_str, file=sys.stderr)
for hypo, pos_scores, align in zip(result.hypos, result.pos_scores, result.alignments):
print(f'Score {hypo[0]}', file=sys.stderr)
print(hypo[1])
print(pos_scores, file=sys.stderr)
if align is not None:
print(align, file=sys.stderr)
if args.file:
data_descriptor.close()
log_dict = {
'throughput': 1./gen_timer.avg,
'latency_avg': sum(gen_timer.intervals)/len(gen_timer.intervals),
'latency_p90': gen_timer.p(90),
'latency_p95': gen_timer.p(95),
'latency_p99': gen_timer.p(99),
'total_infernece_time': gen_timer.sum,
'total_run_time': time.time() - processing_start,
}
print('Translation time: {} s'.format(log_dict['total_infernece_time']),
file=sys.stderr)
print('Model throughput (beam {}): {} tokens/s'.format(args.beam, log_dict['throughput']),
file=sys.stderr)
print('Latency:\n\tAverage {:.3f}s\n\tp90 {:.3f}s\n\tp95 {:.3f}s\n\tp99 {:.3f}s'.format(
log_dict['latency_avg'], log_dict['latency_p90'], log_dict['latency_p95'], log_dict['latency_p99']),
file=sys.stderr)
print('End to end time: {} s'.format(log_dict['total_run_time']), file=sys.stderr)
dllogger.log(step=(), data=log_dict)
if __name__ == '__main__':
parser = options.get_inference_parser()
parser.add_argument('--no-dllogger', action='store_true')
ARGS = options.parse_args_and_arch(parser)
main(ARGS)
#!/usr/bin/env python3
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
#
import argparse
from itertools import zip_longest
import os
import shutil
from fairseq.data import indexed_dataset, dictionary
from fairseq.tokenizer import Tokenizer, tokenize_line
def get_parser():
parser = argparse.ArgumentParser(
description='Data pre-processing: Create dictionary and store data in binary format')
parser.add_argument('-s', '--source-lang', default=None, metavar='SRC',
help='source language')
parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET',
help='target language')
parser.add_argument('--trainpref', metavar='FP', default=None,
help='train file prefix')
parser.add_argument('--validpref', metavar='FP', default=None,
help='comma separated, valid file prefixes')
parser.add_argument('--testpref', metavar='FP', default=None,
help='comma separated, test file prefixes')
parser.add_argument('--destdir', metavar='DIR', default='data-bin',
help='destination dir')
parser.add_argument('--thresholdtgt', metavar='N', default=0, type=int,
help='map words appearing less than threshold times to unknown')
parser.add_argument('--thresholdsrc', metavar='N', default=0, type=int,
help='map words appearing less than threshold times to unknown')
parser.add_argument('--tgtdict', metavar='FP', help='reuse given target dictionary')
parser.add_argument('--srcdict', metavar='FP', help='reuse given source dictionary')
parser.add_argument('--nwordstgt', metavar='N', default=-1, type=int,
help='number of target words to retain')
parser.add_argument('--nwordssrc', metavar='N', default=-1, type=int,
help='number of source words to retain')
parser.add_argument('--alignfile', metavar='ALIGN', default=None,
help='an alignment file (optional)')
parser.add_argument('--output-format', metavar='FORMAT', default='binary', choices=['binary', 'raw'],
help='output format (optional)')
parser.add_argument('--joined-dictionary', action='store_true', help='Generate joined dictionary')
parser.add_argument('--only-source', action='store_true', help='Only process the source language')
parser.add_argument('--padding-factor', metavar='N', default=8, type=int,
help='Pad dictionary size to be multiple of N')
return parser
def main(args):
print(args)
os.makedirs(args.destdir, exist_ok=True)
target = not args.only_source
def build_dictionary(filenames):
d = dictionary.Dictionary()
for filename in filenames:
Tokenizer.add_file_to_dictionary(filename, d, tokenize_line)
return d
def train_path(lang):
return '{}{}'.format(args.trainpref, ('.' + lang) if lang else '')
def file_name(prefix, lang):
fname = prefix
if lang is not None:
fname += f'.{lang}'
return fname
def dest_path(prefix, lang):
return os.path.join(args.destdir, file_name(prefix, lang))
def dict_path(lang):
return dest_path('dict', lang) + '.txt'
def dataset_dest_path(output_prefix, lang, extension):
base = f'{args.destdir}/{output_prefix}'
lang_part = f'.{args.source_lang}-{args.target_lang}.{lang}' if lang is not None else ''
return f'{base}{lang_part}.{extension}'
if args.joined_dictionary:
assert not args.srcdict, 'cannot combine --srcdict and --joined-dictionary'
assert not args.tgtdict, 'cannot combine --tgtdict and --joined-dictionary'
src_dict = build_dictionary({
train_path(lang)
for lang in [args.source_lang, args.target_lang]
})
tgt_dict = src_dict
else:
if args.srcdict:
src_dict = dictionary.Dictionary.load(args.srcdict)
else:
assert args.trainpref, "--trainpref must be set if --srcdict is not specified"
src_dict = build_dictionary([train_path(args.source_lang)])
if target:
if args.tgtdict:
tgt_dict = dictionary.Dictionary.load(args.tgtdict)
else:
assert args.trainpref, "--trainpref must be set if --tgtdict is not specified"
tgt_dict = build_dictionary([train_path(args.target_lang)])
src_dict.finalize(
threshold=args.thresholdsrc,
nwords=args.nwordssrc,
padding_factor=args.padding_factor,
)
src_dict.save(dict_path(args.source_lang))
if target:
if not args.joined_dictionary:
tgt_dict.finalize(
threshold=args.thresholdtgt,
nwords=args.nwordstgt,
padding_factor=args.padding_factor,
)
tgt_dict.save(dict_path(args.target_lang))
def make_binary_dataset(input_prefix, output_prefix, lang):
_dict = dictionary.Dictionary.load(dict_path(lang))
print('| [{}] Dictionary: {} types'.format(lang, len(_dict) - 1))
ds = indexed_dataset.IndexedDatasetBuilder(dataset_dest_path(output_prefix, lang, 'bin'))
def consumer(tensor):
ds.add_item(tensor)
input_file = '{}{}'.format(input_prefix, ('.' + lang) if lang is not None else '')
res = Tokenizer.binarize(input_file, _dict, consumer)
print('| [{}] {}: {} sents, {} tokens, {:.3}% replaced by {}'.format(
lang, input_file, res['nseq'], res['ntok'],
100 * res['nunk'] / res['ntok'], _dict.unk_word))
ds.finalize(dataset_dest_path(output_prefix, lang, 'idx'))
def make_dataset(input_prefix, output_prefix, lang):
if args.output_format == 'binary':
make_binary_dataset(input_prefix, output_prefix, lang)
elif args.output_format == 'raw':
# Copy original text file to destination folder
output_text_file = dest_path(
output_prefix + '.{}-{}'.format(args.source_lang, args.target_lang),
lang,
)
shutil.copyfile(file_name(input_prefix, lang), output_text_file)
def make_all(lang):
if args.trainpref:
make_dataset(args.trainpref, 'train', lang)
if args.validpref:
for k, validpref in enumerate(args.validpref.split(',')):
outprefix = 'valid{}'.format(k) if k > 0 else 'valid'
make_dataset(validpref, outprefix, lang)
if args.testpref:
for k, testpref in enumerate(args.testpref.split(',')):
outprefix = 'test{}'.format(k) if k > 0 else 'test'
make_dataset(testpref, outprefix, lang)
make_all(args.source_lang)
if target:
make_all(args.target_lang)
print('| Wrote preprocessed data to {}'.format(args.destdir))
if args.alignfile:
assert args.trainpref, "--trainpref must be set if --alignfile is specified"
src_file_name = train_path(args.source_lang)
tgt_file_name = train_path(args.target_lang)
src_dict = dictionary.Dictionary.load(dict_path(args.source_lang))
tgt_dict = dictionary.Dictionary.load(dict_path(args.target_lang))
freq_map = {}
with open(args.alignfile, 'r') as align_file:
with open(src_file_name, 'r') as src_file:
with open(tgt_file_name, 'r') as tgt_file:
for a, s, t in zip_longest(align_file, src_file, tgt_file):
si = Tokenizer.tokenize(s, src_dict, add_if_not_exist=False)
ti = Tokenizer.tokenize(t, tgt_dict, add_if_not_exist=False)
ai = list(map(lambda x: tuple(x.split('-')), a.split()))
for sai, tai in ai:
srcidx = si[int(sai)]
tgtidx = ti[int(tai)]
if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk():
assert srcidx != src_dict.pad()
assert srcidx != src_dict.eos()
assert tgtidx != tgt_dict.pad()
assert tgtidx != tgt_dict.eos()
if srcidx not in freq_map:
freq_map[srcidx] = {}
if tgtidx not in freq_map[srcidx]:
freq_map[srcidx][tgtidx] = 1
else:
freq_map[srcidx][tgtidx] += 1
align_dict = {}
for srcidx in freq_map:
align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get)
with open(os.path.join(args.destdir, 'alignment.{}-{}.txt'.format(
args.source_lang, args.target_lang)), 'w') as f:
for k, v in align_dict.items():
print('{} {}'.format(src_dict[k], tgt_dict[v]), file=f)
if __name__ == '__main__':
parser = get_parser()
ARGS = parser.parse_args()
main(ARGS)
cffi
numpy
torch
tqdm
tensorboardX
#!/usr/bin/python
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import torch
import argparse
import deployer_lib
#
import torch
from fairseq import data
from fairseq.data import load_dataset_splits, data_utils
from fairseq.models.transformer import TransformerModel
from copy import deepcopy
def get_model_and_args(model_args):
''' the arguments initialize_model will receive '''
parser = argparse.ArgumentParser()
## Required parameters by the model.
parser.add_argument("--checkpoint",
default=None,
type=str,
required=True,
help="The checkpoint of the model. ")
parser.add_argument('--batch-size',
default=10240,
type=int,
help='Batch size for inference')
parser.add_argument('--num-batches',
default=2,
type=int,
help='Number of batches to check accuracy on')
parser.add_argument("--data",
default=None,
type=str,
required=True,
help="Path to the dataset")
parser.add_argument('--part',
choices=['encoder', 'decoder', 'model'],
default='model',
type=str,
help='Choose the part of the model to export')
args = parser.parse_args(model_args)
state_dict = torch.load(args.checkpoint, map_location='cpu')
model_args = state_dict['args']
model_args.data = args.data
model_args.num_batches = args.num_batches
model_args.max_tokens = args.batch_size
model_args.fuse_layer_norm = False
model_args.part = args.part
model = TransformerModel.build_model(model_args)
model.load_state_dict(state_dict['model'], strict=True)
model.make_generation_fast_(need_attn=False)
return model, model_args
def get_dataloader(args, encoder=None):
''' return dataloader for inference '''
assert not(args.part == 'decoder' and encoder is None), "Cannot export decoder without providing encoder"
src_dict, tgt_dict = data_utils.load_dictionaries(args)
datasets = load_dataset_splits(args, ['valid'], src_dict, tgt_dict)
itr = data.EpochBatchIterator(
dataset=datasets['valid'],
max_tokens=args.max_tokens,
max_positions=args.max_positions,
).next_epoch_itr(shuffle=False)
def input_itr():
for batch in itr:
if itr.count > args.num_batches:
break
ni = batch['net_input']
if args.part == 'decoder': #this part works only on GPU
with torch.no_grad():
encoder_out = encoder(ni['src_tokens'].cuda(), ni['src_lengths'].cuda())
yield ni['prev_output_tokens'], encoder_out[0], encoder_out[1]
elif args.part == 'encoder':
yield ni['src_tokens'], ni['src_lengths']
else:
yield ni['src_tokens'], ni['src_lengths'], ni['prev_output_tokens']
return input_itr()
if __name__=='__main__':
# don't touch this!
deployer, model_argv = deployer_lib.create_deployer(sys.argv[1:]) # deployer and returns removed deployer arguments
model, model_args = get_model_and_args(model_argv)
if model_args.part == 'decoder':
encoder = model.encoder
encoder.embed_tokens = deepcopy(encoder.embed_tokens)
encoder.cuda()
else:
encoder = None
dataloader = get_dataloader(model_args, encoder=encoder)
if model_args.part == 'encoder':
model = model.encoder
elif model_args.part == 'decoder':
model = model.decoder
deployer.deploy(dataloader, model)
#!/usr/bin/python
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time
import json
import torch
import argparse
import statistics
from collections import Counter
torch_type_to_triton_type = {
torch.bool: 'TYPE_BOOL',
torch.int8: 'TYPE_INT8',
torch.int16: 'TYPE_INT16',
torch.int32: 'TYPE_INT32',
torch.int64: 'TYPE_INT64',
torch.uint8: 'TYPE_UINT8',
torch.float16: 'TYPE_FP16',
torch.float32: 'TYPE_FP32',
torch.float64: 'TYPE_FP64'
}
CONFIG_TEMPLATE = r"""
name: "{model_name}"
platform: "{platform}"
max_batch_size: {max_batch_size}
input [
{spec_inputs}
]
output [
{spec_outputs}
]
{dynamic_batching}
{model_optimizations}
instance_group [
{{
count: {engine_count}
kind: KIND_GPU
gpus: [ {gpu_list} ]
}}
]"""
INPUT_TEMPLATE = r"""
{{
name: "input__{num}"
data_type: {type}
dims: {dims}
{reshape}
}},"""
OUTPUT_TEMPLATE = r"""
{{
name: "output__{num}"
data_type: {type}
dims: {dims}
{reshape}
}},"""
MODEL_OPTIMIZATION_TEMPLATE = r"""
optimization {{
{execution_accelerator}
cuda {{
graphs: {capture_cuda_graph}
}}
}}"""
EXECUTION_ACCELERATOR_TEMPLATE = r"""
execution_accelerators {{
gpu_execution_accelerator: [
{{
name: "tensorrt"
}}
]
}},"""
def remove_empty_lines(text):
''' removes empty lines from text, returns the result '''
ret = "".join([s for s in text.strip().splitlines(True) if s.strip()])
return ret
def create_deployer(argv):
''' takes a list of arguments, returns a deployer object and the list of unused arguments '''
parser = argparse.ArgumentParser()
# required args
method = parser.add_mutually_exclusive_group(required=True)
method.add_argument('--ts-script',
action='store_true',
help='convert to torchscript using torch.jit.script')
method.add_argument('--ts-trace',
action='store_true',
help='convert to torchscript using torch.jit.trace')
method.add_argument('--onnx',
action='store_true',
help='convert to onnx using torch.onnx.export')
method.add_argument('--trt',
action='store_true',
help='convert to trt using tensorrt')
# triton related args
arguments = parser.add_argument_group('triton related flags')
arguments.add_argument('--triton-no-cuda',
action='store_true',
help='Use the CPU for tracing.')
arguments.add_argument('--triton-model-name',
type=str,
default="model",
help="exports to appropriate directory structure for TRTIS")
arguments.add_argument("--triton-model-version",
type=int,
default=1,
help="exports to appropriate directory structure for TRTIS")
arguments.add_argument("--triton-server-url",
type=str,
default="localhost:8001",
help="exports to appropriate directory structure for TRTIS")
arguments.add_argument("--triton-max-batch-size",
type=int,
default=8,
help="Specifies the 'max_batch_size' in the TRTIS model config.\
See the TRTIS documentation for more info.")
arguments.add_argument("--triton-dyn-batching-delay",
type=float,
default=0,
help="Determines the dynamic_batching queue delay in milliseconds(ms) for\
the TRTIS model config. Use '0' or '-1' to specify static batching.\
See the TRTIS documentation for more info.")
arguments.add_argument("--triton-engine-count",
type=int,
default=1,
help="Specifies the 'instance_group' count value in the TRTIS model config.\
See the TRTIS documentation for more info.")
arguments.add_argument('--save-dir', type=str, default='./triton_models', help='Saved model directory')
# optimization args
arguments = parser.add_argument_group('optimization flags')
arguments.add_argument("--max_workspace_size",
type=int,
default=512*1024*1024,
help="set the size of the workspace for trt export")
arguments.add_argument("--trt-fp16",
action='store_true',
help="trt flag ---- export model in mixed precision mode")
arguments.add_argument("--capture-cuda-graph",
type=int,
default=1,
help="capture cuda graph for obtaining speedup. possible values: 0, 1. default: 1. ")
arguments.add_argument('--quantize',
action='store_true',
help='apply quantization for supported nodes')
arguments.add_argument('--calibrate',
action='store_true',
help='apply calibration for supported nodes')
# remainder args
arguments.add_argument('model_arguments', nargs=argparse.REMAINDER, help='arguments that will be ignored by deployer lib and will be forwarded to your deployer script')
#
args = parser.parse_args(argv)
deployer = Deployer(args)
#
return deployer, args.model_arguments[1:]
class DeployerLibrary:
def __init__(self, args):
self.args = args
self.platform = None
def set_platform(self, platform):
''' sets the platform
:: platform :: "pytorch_libtorch" or "onnxruntime_onnx" or "tensorrt_plan"
'''
self.platform = platform
def build_trt_engine(self, model_file, shapes):
''' takes a path to an onnx file, and shape information, returns a trt engine
:: model_file :: path to an onnx model
:: shapes :: dictionary containing min shape, max shape, opt shape for the trt engine
'''
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)
builder.fp16_mode = self.args.trt_fp16
builder.max_batch_size = self.args.triton_max_batch_size
#
config = builder.create_builder_config()
config.max_workspace_size = self.args.max_workspace_size
if self.args.trt_fp16:
config.flags |= 1 << int(trt.BuilderFlag.FP16)
profile = builder.create_optimization_profile()
for s in shapes:
profile.set_shape(s['name'], min=s['min'], opt=s['opt'], max=s['max'])
config.add_optimization_profile(profile)
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(explicit_batch)
#
with trt.OnnxParser(network, TRT_LOGGER) as parser:
with open(model_file, 'rb') as model:
parser.parse(model.read())
for i in range(parser.num_errors):
e = parser.get_error(i)
print("||||e", e)
engine = builder.build_engine(network, config=config)
return engine
def load_engine(self, engine_filepath):
''' loads a trt engine from engine_filepath, returns it '''
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open(engine_filepath, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
return engine
def prepare_inputs(self, dataloader, device):
''' load sample inputs to device '''
def _move_to_device(maybe_tensor):
if torch.is_tensor(maybe_tensor):
return maybe_tensor.to(device)
elif isinstance(maybe_tensor, dict):
return {
key: _move_to_device(value)
for key, value in maybe_tensor.items()
}
elif isinstance(maybe_tensor, list) or isinstance(maybe_tensor, tuple):
return [_move_to_device(x) for x in maybe_tensor]
else:
return maybe_tensor
inputs = []
for batch in dataloader:
batch_d = _move_to_device(batch)
if not hasattr(batch_d, '__iter__'):
batch_d = (batch_d,)
inputs.append(batch_d)
return inputs
def get_list_of_shapes(self, l, fun):
''' returns the list of min/max shapes, depending on fun
:: l :: list of tuples of tensors
:: fun :: min or max
'''
tensor_tuple = l[0]
shapes = [list(x.shape) for x in tensor_tuple]
for tensor_tuple in l:
assert len(tensor_tuple) == len(shapes), "tensors with varying shape lengths are not supported"
for i,x in enumerate(tensor_tuple):
for j in range(len(x.shape)):
shapes[i][j] = fun(shapes[i][j], x.shape[j])
return shapes # a list of shapes
def get_tuple_of_min_shapes(self, l):
''' returns the tuple of min shapes
:: l :: list of tuples of tensors '''
shapes = self.get_list_of_shapes(l, min)
min_batch = 1
shapes = [[min_batch,*shape[1:]] for shape in shapes]
shapes = tuple(shapes)
return shapes # tuple of min shapes
def get_tuple_of_max_shapes(self, l):
''' returns the tuple of max shapes
:: l :: list of tuples of tensors '''
shapes = self.get_list_of_shapes(l, max)
max_batch = max(2,shapes[0][0])
shapes = [[max_batch,*shape[1:]] for shape in shapes]
shapes = tuple(shapes)
return shapes # tuple of max shapes
def get_tuple_of_opt_shapes(self, l):
''' returns the tuple of opt shapes
:: l :: list of tuples of tensors '''
counter = Counter()
for tensor_tuple in l:
shapes = [tuple(x.shape) for x in tensor_tuple]
shapes = tuple(shapes)
counter[shapes] += 1
shapes = counter.most_common(1)[0][0]
return shapes # tuple of most common occuring shapes
def get_tuple_of_dynamic_shapes(self, l):
''' returns a tuple of dynamic shapes: variable tensor dimensions
(for ex. batch size) occur as -1 in the tuple
:: l :: list of tuples of tensors '''
tensor_tuple = l[0]
shapes = [list(x.shape) for x in tensor_tuple]
for tensor_tuple in l:
err_msg = "tensors with varying shape lengths are not supported"
assert len(tensor_tuple) == len(shapes), err_msg
for i,x in enumerate(tensor_tuple):
for j in range(len(x.shape)):
if shapes[i][j] != x.shape[j] or j == 0:
shapes[i][j] = -1
shapes = tuple(shapes)
return shapes # tuple of dynamic shapes
def run_models(self, models, inputs):
''' run the models on inputs, return the outputs and execution times '''
ret = []
for model in models:
torch.cuda.synchronize()
time_start = time.time()
outputs = []
for input in inputs:
with torch.no_grad():
output = model(*input)
if type(output) is torch.Tensor:
output = [output]
elif type(output) is dict:
output = list(output.items())
output.sort(key=lambda x: x[0])
output = [x[0] for x in output]
outputs.append(output)
torch.cuda.synchronize()
time_end = time.time()
t = time_end - time_start
ret.append(outputs)
ret.append(t)
return ret
def compute_tensor_stats(self, tensor):
#if tensor is not empty
if tensor.numel():
return {'std': tensor.std().item(),
'mean': tensor.mean().item(),
'max': tensor.max().item(),
'min': tensor.min().item(),
}
else:
return {'std': 0,
'mean':0,
'max': 0,
'min': 0,
}
def compute_errors(self, outputs_A, outputs_B):
''' returns dictionary with errors statistics '''
device = outputs_A[0][0][0].device
dtype = outputs_A[0][0][0].dtype
num_outputs = len(outputs_A[0])
x_values = [torch.zeros(0, device = device, dtype = dtype) for _ in range(num_outputs)]
y_values = [torch.zeros(0, device = device, dtype = dtype) for _ in range(num_outputs)]
d_values = [torch.zeros(0, device = device, dtype = dtype) for _ in range(num_outputs)]
for output_A,output_B in zip(outputs_A,outputs_B):
for i,(x,y) in enumerate(zip(output_A, output_B)):
x = x.view(-1).float()
y = y.view(-1).float()
d = abs(x - y)
x_values[i] = torch.cat((x_values[i], x), 0)
y_values[i] = torch.cat((y_values[i], y), 0)
d_values[i] = torch.cat((d_values[i], d), 0)
Error_stats = [{'Original': self.compute_tensor_stats(x),
'Converted': self.compute_tensor_stats(y),
'Absolute difference': self.compute_tensor_stats(d),
} for x,y,z in zip(x_values, y_values, d_values)]
return Error_stats
def print_errors(self, Error_stats):
''' print various statistcs of Linf errors '''
print()
print("conversion correctness test results")
print("-----------------------------------")
import pandas as pd
for i,e in enumerate(Error_stats):
print(f'Output {i}:')
print(pd.DataFrame(e))
def write_config(self, config_filename,
input_shapes, input_types,
output_shapes, output_types):
''' writes TRTIS config file
:: config_filename :: the file to write the config file into
:: input_shapes :: tuple of dynamic shapes of the input tensors
:: input_types :: tuple of torch types of the input tensors
:: output_shapes :: tuple of dynamic shapes of the output tensors
:: output_types :: tuple of torch types of the output tensors
'''
assert self.platform is not None, "error - platform is not set"
config_template = CONFIG_TEMPLATE
input_template = INPUT_TEMPLATE
optimization_template = MODEL_OPTIMIZATION_TEMPLATE
accelerator_template = EXECUTION_ACCELERATOR_TEMPLATE
spec_inputs = r""""""
for i,(shape,typ) in enumerate(zip(input_shapes,input_types)):
d = {
'num' : str(i),
'type': torch_type_to_triton_type[typ],
'dims': str([1]) if len(shape) == 1 else str(list(shape)[1:]) # first dimension is the batch size
}
d['reshape'] = 'reshape: { shape: [ ] }' if len(shape) == 1 else ''
spec_inputs += input_template.format_map(d)
spec_inputs = spec_inputs[:-1]
output_template = OUTPUT_TEMPLATE
spec_outputs = r""""""
for i,(shape,typ) in enumerate(zip(output_shapes,output_types)):
d = {
'num' : str(i),
'type': torch_type_to_triton_type[typ],
'dims': str([1]) if len(shape) == 1 else str(list(shape)[1:]) # first dimension is the batch size
}
d['reshape'] = 'reshape: { shape: [ ] }' if len(shape) == 1 else ''
spec_outputs += output_template.format_map(d)
spec_outputs = spec_outputs[:-1]
batching_str = ""
max_batch_size = self.args.triton_max_batch_size
if (self.args.triton_dyn_batching_delay > 0):
# Use only full and half full batches
pref_batch_size = [int(max_batch_size / 2.0), max_batch_size]
batching_str = r"""
dynamic_batching {{
preferred_batch_size: [{0}]
max_queue_delay_microseconds: {1}
}}""".format(", ".join([str(x) for x in pref_batch_size]),
int(self.args.triton_dyn_batching_delay * 1000.0))
accelerator_str = ""
if self.platform == 'onnxruntime_onnx':
accelerator_str = accelerator_template.format_map({})
d = {
"execution_accelerator": accelerator_str,
"capture_cuda_graph": str(self.args.capture_cuda_graph)
}
optimization_str = optimization_template.format_map(d)
config_values = {
"model_name": self.args.triton_model_name,
"platform": self.platform,
"max_batch_size": max_batch_size,
"spec_inputs": spec_inputs,
"spec_outputs": spec_outputs,
"dynamic_batching": batching_str,
"model_optimizations" : optimization_str,
"gpu_list": ", ".join([str(x) for x in range(torch.cuda.device_count())]),
"engine_count": self.args.triton_engine_count
}
# write config
with open(config_filename, "w") as file:
final_config_str = config_template.format_map(config_values)
final_config_str = remove_empty_lines(final_config_str)
file.write(final_config_str)
class Deployer:
def __init__(self, args):
self.args = args
self.lib = DeployerLibrary(args)
def deploy(self, dataloader, model):
''' deploy the model and test for correctness with dataloader '''
if self.args.ts_script or self.args.ts_trace:
self.lib.set_platform("pytorch_libtorch")
print("deploying model " + self.args.triton_model_name + " in format " + self.lib.platform)
self.to_triton_torchscript(dataloader, model)
elif self.args.onnx:
self.lib.set_platform("onnxruntime_onnx")
print("deploying model " + self.args.triton_model_name + " in format " + self.lib.platform)
self.to_triton_onnx(dataloader, model)
elif self.args.trt:
self.lib.set_platform("tensorrt_plan")
print("deploying model " + self.args.triton_model_name + " in format " + self.lib.platform)
self.to_triton_trt(dataloader, model)
else:
assert False, "error"
print("done")
def to_triton_trt(self, dataloader, model):
''' export the model to trt and test correctness on dataloader '''
import tensorrt as trt
# setup device
if self.args.triton_no_cuda:
device = torch.device('cpu')
else:
device = torch.device('cuda')
assert not self.args.quantize, 'quantize flag not supported by trt'
assert not self.args.calibrate, 'calibrate flag not supported by trt'
# prepare model
model.to(device)
model.eval()
assert not model.training, "internal error - model should be in eval() mode! "
# prepare inputs
inputs = self.lib.prepare_inputs(dataloader, device)
# generate outputs
outputs = []
for input in inputs:
with torch.no_grad():
output = model(*input)
if type(output) is torch.Tensor:
output = [output]
outputs.append(output)
# generate input shapes - dynamic tensor shape support
input_shapes = self.lib.get_tuple_of_dynamic_shapes(inputs)
# generate output shapes - dynamic tensor shape support
output_shapes = self.lib.get_tuple_of_dynamic_shapes(outputs)
# generate input types
input_types = [x.dtype for x in inputs[0]]
# generate output types
output_types = [x.dtype for x in outputs[0]]
# get input names
rng = range(len(input_types))
input_names = ["input__" + str(num) for num in rng]
# get output names
rng = range(len(output_types))
output_names = ["output__" + str(num) for num in rng]
# prepare save path
model_folder = os.path.join(self.args.save_dir, self.args.triton_model_name)
version_folder = os.path.join(model_folder, str(self.args.triton_model_version))
if not os.path.exists(version_folder):
os.makedirs(version_folder)
final_model_path = os.path.join(version_folder, 'model.plan')
# get indices of dynamic input and output shapes
dynamic_axes = {}
for input_name,shape in zip(input_names,input_shapes):
dynamic_axes[input_name] = [i for i,x in enumerate(shape) if x == -1]
for output_name,shape in zip(output_names,output_shapes):
dynamic_axes[output_name] = [i for i,x in enumerate(shape) if x == -1]
# export the model to onnx first
with torch.no_grad():
torch.onnx.export(model, inputs[0], final_model_path, verbose=False,
input_names=input_names, output_names=output_names,
dynamic_axes=dynamic_axes, opset_version=11)
# get shapes
min_shapes = self.lib.get_tuple_of_min_shapes(inputs)
opt_shapes = self.lib.get_tuple_of_opt_shapes(inputs)
max_shapes = self.lib.get_tuple_of_max_shapes(inputs)
zipped = zip(input_names, min_shapes, opt_shapes, max_shapes)
shapes = []
for name,min_shape,opt_shape,max_shape in zipped:
d = {
"name":name,
"min": min_shape,
"opt": opt_shape,
"max": max_shape
}
shapes.append(d)
# build trt engine
engine = self.lib.build_trt_engine(final_model_path, shapes)
assert engine is not None, " trt export failure "
# write trt engine
with open(final_model_path, 'wb') as f:
f.write(engine.serialize())
# load the model
engine = self.lib.load_engine(final_model_path)
class TRT_model:
def __init__(self, engine, input_names, output_names, output_types, device):
self.engine = engine
self.context = self.engine.create_execution_context()
self.input_names = input_names
self.output_names = output_names
self.output_types = output_types
self.device = device
def is_dimension_dynamic(self, dim):
return dim is None or dim <= 0
def is_shape_dynamic(self, shape):
return any([self.is_dimension_dynamic(dim) for dim in shape])
def __call__(self, *inputs):
# get input shapes
input_shapes = [x.shape for x in inputs]
# bindings
bindings = [None] * self.engine.num_bindings
# set input shapes, bind input tensors
zipped = zip(self.input_names, inputs)
for key,input in zipped:
idx = self.engine.get_binding_index(key)
bindings[idx] = input.data_ptr()
if self.engine.is_shape_binding(idx) and self.is_shape_dynamic(self.context.get_shape(idx)):
self.context.set_shape_input(idx, input)
elif self.is_shape_dynamic(self.engine.get_binding_shape(idx)):
self.context.set_binding_shape(idx, input.shape)
assert self.context.all_binding_shapes_specified, "trt error"
assert self.context.all_shape_inputs_specified, "trt error"
# calculate output shapes, allocate output tensors and bind them
outputs = []
zipped = zip(self.output_names, self.output_types)
for key,dtype in zipped:
idx = self.engine.get_binding_index(key)
shape = self.context.get_binding_shape(idx)
shape = tuple(shape)
assert -1 not in shape, "trt error"
tensor = torch.zeros(shape, dtype=dtype, device=self.device)
outputs.append(tensor)
bindings[idx] = outputs[-1].data_ptr()
# run inference
self.context.execute_v2(bindings=bindings)
# return the result
if len(outputs) == 1:
outputs = outputs[0]
return outputs
model_trt = TRT_model(engine, input_names, output_names, output_types, device)
# run both models on inputs
assert not model.training, "internal error - model should be in eval() mode! "
models = (model, model_trt)
outputs, time_model, outputs_trt, time_model_trt = self.lib.run_models(models, inputs)
# check for errors
Error_stats = self.lib.compute_errors(outputs, outputs_trt)
self.lib.print_errors(Error_stats)
print('time of error check of native model: ', time_model, 'seconds')
print('time of error check of trt model: ', time_model_trt, 'seconds')
print()
# write TRTIS config
config_filename = os.path.join(model_folder, "config.pbtxt")
self.lib.write_config(config_filename,
input_shapes, input_types,
output_shapes, output_types)
def name_onnx_nodes(self, model_path):
'''
Name all unnamed nodes in ONNX model
parameter model_path: path ONNX model
return: none
'''
model = onnx.load(model_path)
node_id = 0
for node in model.graph.node:
if len(node.name) == 0:
node.name = "unnamed_node_%d" % node_id
node_id += 1
# This check partially validates model
onnx.checker.check_model(model)
onnx.save(model, model_path)
# Only inference really checks ONNX model for some issues
# like duplicated node names
onnxruntime.InferenceSession(model_path, None)
def to_triton_onnx(self, dataloader, model):
''' export the model to onnx and test correctness on dataloader '''
import onnx as local_onnx
global onnx
onnx = local_onnx
import onnxruntime as local_onnxruntime
global onnxruntime
onnxruntime = local_onnxruntime
# setup device
if self.args.triton_no_cuda:
device = torch.device('cpu')
else:
device = torch.device('cuda')
if self.args.calibrate:
assert self.args.quantize, ("calibrate flag not supported "
"without quantize")
if self.args.quantize:
try:
from quantize import quantize, QuantizationMode
except ImportError as error:
print('quantize scripts are not present')
raise error
if self.args.calibrate:
try:
import calibrate
except ImportError as error:
print('calibrate scripts are not present')
raise error
# prepare model
model.to(device)
model.eval()
assert not model.training, "internal error - model should be in eval() mode! "
# prepare inputs
inputs = self.lib.prepare_inputs(dataloader, device)
# generate outputs
outputs = []
for input in inputs:
with torch.no_grad():
output = model(*input)
if type(output) is torch.Tensor:
output = [output]
outputs.append(output)
# generate input shapes - dynamic tensor shape support
input_shapes = self.lib.get_tuple_of_dynamic_shapes(inputs)
# generate output shapes - dynamic tensor shape support
output_shapes = self.lib.get_tuple_of_dynamic_shapes(outputs)
# generate input types
input_types = [x.dtype for x in inputs[0]]
# generate output types
output_types = [x.dtype for x in outputs[0]]
# get input names
rng = range(len(input_types))
input_names = ["input__" + str(num) for num in rng]
# get output names
rng = range(len(output_types))
output_names = ["output__" + str(num) for num in rng]
# prepare save path
model_folder = os.path.join(self.args.save_dir, self.args.triton_model_name)
version_folder = os.path.join(model_folder, str(self.args.triton_model_version))
if not os.path.exists(version_folder):
os.makedirs(version_folder)
final_model_path = os.path.join(version_folder, 'model.onnx')
# get indices of dynamic input and output shapes
dynamic_axes = {}
for input_name,input_shape in zip(input_names,input_shapes):
dynamic_axes[input_name] = [i for i,x in enumerate(input_shape) if x == -1]
for output_name,output_shape in zip(output_names,output_shapes):
dynamic_axes[output_name] = [i for i,x in enumerate(output_shape) if x == -1]
# export the model
assert not model.training, "internal error - model should be in eval() mode! "
with torch.no_grad():
torch.onnx.export(model, inputs[0], final_model_path, verbose=False,
input_names=input_names, output_names=output_names,
dynamic_axes=dynamic_axes, opset_version=11)
# syntactic error check
converted_model = onnx.load(final_model_path)
# check that the IR is well formed
onnx.checker.check_model(converted_model)
# Name unnamed nodes - it helps for some other processing tools
self.name_onnx_nodes(final_model_path)
converted_model = onnx.load(final_model_path)
# quantize model
if self.args.quantize:
if not self.args.calibrate:
quantized_model = quantize(
converted_model,
quantization_mode = QuantizationMode.IntegerOps,
)
# check that the IR is well formed
try:
onnx.checker.check_model(quantized_model)
except onnx.onnx_cpp2py_export.checker.ValidationError as error:
# FIXME: It is unclear, why checker fails for quantized model so
# this error is ignored currently. Inference works for
# some quantized models so lets show warning here
print("model check failed with warning: [", error, "]")
print("Warning during onnx.checker.check_model in quantized model ignored")
onnx.save(quantized_model, final_model_path)
else:
#assert not self.args.calibrate, 'calibrate flag not supported by ONNX'
# Parsing command-line arguments
#parser = argparse.ArgumentParser(description='parsing model and test data set paths')
#parser.add_argument('--model_path', required=True)
#parser.add_argument('--dataset_path', required=True)
#parser.add_argument('--output_model_path', type=str, default='calibrated_quantized_model.onnx')
#parser.add_argument('--dataset_size', type=int, default=0, help="Number of images or tensors to load. Default is 0 which means all samples")
#parser.add_argument('--data_preprocess', type=str, required=True, choices=['preprocess_method1', 'preprocess_method2', 'None'], help="Refer to Readme.md for guidance on choosing this option.")
#args = parser.parse_args()
#model_path = args.model_path
#output_model_path = args.output_model_path
#images_folder = args.dataset_path
calib_mode = "naive"
size_limit = 0 # int(args.dataset_size)
# Generating augmented ONNX model
# FIXME: use proper temporary file path
augmented_model_path = 'augmented_model.onnx'
#model = onnx.load(model_path)
augmented_model = calibrate.augment_graph(converted_model)
onnx.checker.check_model(augmented_model)
#onnx.save(augmented_model, final_model_path)
onnx.save(augmented_model, augmented_model_path)
# Conducting inference
#session = onnxruntime.InferenceSession(final_model_path, None)
print(augmented_model_path)
session = onnxruntime.InferenceSession(augmented_model_path, None)
#session = onnxruntime.InferenceSession('augmented_modelv3.onnx', None)
(samples, channels, height, width) = session.get_inputs()[0].shape
print(session.get_inputs()[0].shape)
#return
# Generating inputs for quantization
#if args.data_preprocess == "None":
# inputs = load_pb_file(images_folder, args.dataset_size, samples, channels, height, width)
#else:
# inputs = load_batch(images_folder, height, width, args.data_preprocess, size_limit)
import numpy as np
inputs_calibrate_tmp = inputs[0][0].cpu().numpy()
dict_for_quantization = calibrate.get_intermediate_outputs(
final_model_path,
session,
inputs_calibrate_tmp,
calib_mode,
)
quantization_params_dict = calibrate.calculate_quantization_params(
augmented_model,
quantization_thresholds = dict_for_quantization,
)
calibrated_quantized_model = quantize(
converted_model,
quantization_mode = QuantizationMode.QLinearOps,
quantization_params = quantization_params_dict,
)
onnx.save(calibrated_quantized_model, final_model_path)
print("Calibrated, quantized model saved.")
# load the model
session = onnxruntime.InferenceSession(final_model_path, None)
class ONNX_model:
def __init__(self, session, input_names, device):
self.session = session
self.input_names = input_names
def to_numpy(self, tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
def __call__(self, *inputs):
inp = [(input_name, inputs[i]) for i,input_name in enumerate(self.input_names)]
inp = {input_name : self.to_numpy(x) for input_name,x in inp}
outputs = self.session.run(None, inp)
outputs = [torch.from_numpy(output) for output in outputs]
outputs = [output.to(device) for output in outputs]
if len(outputs) == 1:
outputs = outputs[0]
return outputs
# switch to eval mode
model_onnx = ONNX_model(session, input_names, device)
# run both models on inputs
assert not model.training, "internal error - model should be in eval() mode! "
models = (model, model_onnx)
outputs, time_model, outputs_onnx, time_model_onnx = self.lib.run_models(models, inputs)
# check for errors
Error_stats = self.lib.compute_errors(outputs, outputs_onnx)
self.lib.print_errors(Error_stats)
print('time of error check of native model: ', time_model, 'seconds')
print('time of error check of onnx model: ', time_model_onnx, 'seconds')
print()
# write TRTIS config
config_filename = os.path.join(model_folder, "config.pbtxt")
self.lib.write_config(config_filename,
input_shapes, input_types,
output_shapes, output_types)
def to_triton_torchscript(self, dataloader, model):
''' export the model to torchscript and test correctness on dataloader '''
# setup device
if self.args.triton_no_cuda:
device = torch.device('cpu')
else:
device = torch.device('cuda')
# prepare model
model.to(device)
model.eval()
assert not model.training, "internal error - model should be in eval() mode! "
#TODO: support quantize
assert not self.args.quantize, 'quantize flag not supported by torchscript yet'
# prepare inputs
inputs = self.lib.prepare_inputs(dataloader, device)
# generate input shapes - dynamic tensor shape support
input_shapes = self.lib.get_tuple_of_dynamic_shapes(inputs)
# generate input types
input_types = [x.dtype for x in inputs[0]]
# prepare save path
model_folder = os.path.join(self.args.save_dir, self.args.triton_model_name)
version_folder = os.path.join(model_folder, str(self.args.triton_model_version))
if not os.path.exists(version_folder):
os.makedirs(version_folder)
final_model_path = os.path.join(version_folder, 'model.pt')
# convert the model
with torch.no_grad():
if self.args.ts_trace: # trace it
model_ts = torch.jit.trace(model, inputs[0])
if self.args.ts_script: # script it
model_ts = torch.jit.script(model)
# save the model
torch.jit.save(model_ts, final_model_path)
# load the model
model_ts = torch.jit.load(final_model_path)
model_ts.eval() # WAR for bug : by default, model_ts gets loaded in training mode
# run both models on inputs
assert not model.training, "internal error - model should be in eval() mode! "
assert not model_ts.training, "internal error - converted model should be in eval() mode! "
models = (model, model_ts)
outputs, time_model, outputs_ts, time_model_ts = self.lib.run_models(models, inputs)
# check for errors
Error_stats = self.lib.compute_errors(outputs, outputs_ts)
self.lib.print_errors(Error_stats)
print('time of error check of native model: ', time_model, 'seconds')
print('time of error check of ts model: ', time_model_ts, 'seconds')
print()
# generate output shapes - dynamic tensor shape support
output_shapes = self.lib.get_tuple_of_dynamic_shapes(outputs)
# generate output types
output_types = [x.dtype for x in outputs[0]]
# now we build the config for TRTIS
config_filename = os.path.join(model_folder, "config.pbtxt")
self.lib.write_config(config_filename,
input_shapes, input_types,
output_shapes, output_types)
docker build . --network=host -t transformer_pyt
#!/bin/bash
CMD=${1:-/bin/bash}
NV_VISIBLE_DEVICES=${2:-"0,1,2,3,4,5,6,7,8"}
DOCKER_BRIDGE=${3:-"host"}
nvidia-docker run -it --rm \
--net=$DOCKER_BRIDGE \
--shm-size=1g \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
-e NVIDIA_VISIBLE_DEVICES=${NV_VISIBLE_DEVICES} \
-v $PWD/results:/results \
-v $PWD/data:/data \
transformer_pyt $CMD
import json
import argparse
from collections import defaultdict, OrderedDict
import matplotlib.pyplot as plt
import numpy as np
def smooth_moving_average(x, n):
fil = np.ones(n)/n
smoothed = np.convolve(x, fil, mode='valid')
smoothed = np.concatenate((x[:n-1], smoothed), axis=0)
return smoothed
def moving_stdev(x, n):
fil = np.ones(n)/n
avg_sqare = np.convolve(np.power(x, 2), fil, mode='valid')
squared_avg = np.power(np.convolve(x, fil, mode='valid'), 2)
var = avg_sqare - squared_avg
stdev = np.sqrt(var)
#pad first few values
stdev = np.concatenate(([0]*(n-1), stdev), axis=0)
return stdev
def get_plot(log):
steps = [x[0] for x in log if isinstance(x[0], int)]
values = [x[2] for x in log if isinstance(x[0], int)]
return steps, values
def highlight_max_point(plot, color):
point = max(zip(*plot), key=lambda x: x[1])
plt.plot(point[0], point[1], 'bo-', color=color)
plt.annotate("{:.2f}".format(point[1]), point)
return point
def main(args):
jlog = defaultdict(list)
jlog['parameters'] = {}
with open(args.log_file, 'r') as f:
for line in f.readlines():
line_dict = json.loads(line[5:])
if line_dict['type'] == 'LOG':
if line_dict['step'] == 'PARAMETER':
jlog['parameters'].update(line_dict['data'])
elif line_dict['step'] == [] and 'training_summary' not in jlog:
jlog['training_summary']=line_dict['data']
else:
for k, v in line_dict['data'].items():
jlog[k].append((line_dict['step'], line_dict['elapsedtime'], v))
fig, ax1 = plt.subplots(figsize=(20,5))
fig.suptitle(args.title, fontsize=16)
ax1.set_xlabel('steps')
ax1.set_ylabel('loss')
# Define colors for specific curves
VAL_LOSS_COLOR = 'blue'
VAL_BLEU_COLOR = 'red'
TEST_BLEU_COLOR = 'pink'
# Plot smoothed loss curve
steps, loss = get_plot(jlog['loss'])
smoothed_loss = smooth_moving_average(loss, 150)
stdev = moving_stdev(loss, 150)
ax1.plot(steps, smoothed_loss, label='Training loss')
ax1.plot(steps, smoothed_loss + stdev, '--', color='orange', linewidth=0.3, label='Stdev')
ax1.plot(steps, smoothed_loss - stdev, '--', color='orange', linewidth=0.3)
# Plot validation loss curve
val_steps, val_loss = get_plot(jlog['val_loss'])
ax1.plot(val_steps, val_loss, color='blue', label='Validation loss')
min_val_loss_step = val_steps[np.argmin(val_loss)]
ax1.axvline(min_val_loss_step, linestyle='dashed', color=VAL_LOSS_COLOR, linewidth=0.5, label='Validation loss minimum')
# Plot BLEU curves
ax2 = ax1.twinx()
ax2.set_ylabel('BLEU')
val_steps, val_bleu = get_plot(jlog['val_bleu'])
ax2.plot(val_steps, val_bleu, color=VAL_BLEU_COLOR, label='Validation BLEU')
mvb_step, _ =highlight_max_point((val_steps,val_bleu), color=VAL_BLEU_COLOR)
# values to be labeled on plot
max_val_bleu_step = val_steps[np.argmax(val_bleu)]
max_val_bleu = val_bleu[val_steps.index(max_val_bleu_step)]
min_loss_bleu = val_bleu[val_steps.index(min_val_loss_step)]
if 'test_bleu' in jlog:
test_steps, test_bleu = get_plot(jlog['test_bleu'])
ax2.plot(val_steps, test_bleu, color=TEST_BLEU_COLOR, label='Test BLEU')
highlight_max_point((test_steps, test_bleu), color=TEST_BLEU_COLOR)
ax2.tick_params(axis='y')
# Annotate points with highest BLEU score as well as those for minimal validation loss
ax2.plot(min_val_loss_step, min_loss_bleu, 'bo-', color=VAL_BLEU_COLOR)
ax2.annotate("{:.2f}".format(min_loss_bleu), (min_val_loss_step, min_loss_bleu))
if 'test_bleu' in jlog:
min_loss_test_bleu = test_bleu[val_steps.index(min_val_loss_step)] #BLEU score on test set when validation loss is minimal
ax2.plot(min_val_loss_step, min_loss_test_bleu, 'bo-', color=TEST_BLEU_COLOR)
ax2.annotate("{:.2f}".format(min_loss_test_bleu), (min_val_loss_step, min_loss_test_bleu))
max_val_bleu_test = test_bleu[val_steps.index(max_val_bleu_step)] #BLEU score on test set when BLEU score on dev set is maximal
ax2.plot(mvb_step, max_val_bleu_test, 'bo-', color=TEST_BLEU_COLOR)
ax2.annotate("{:.2f}".format(max_val_bleu_test), (max_val_bleu_step, max_val_bleu_test))
ax1.legend(loc='lower left', bbox_to_anchor=(1,0))
ax2.legend(loc='upper left', bbox_to_anchor=(1,1))
plt.grid()
plt.savefig(args.output)
# Produce json with training summary
if args.dump_json:
summary = OrderedDict()
summary['args'] = OrderedDict(jlog['parameters'])
summary['min_val_loss'] = min(val_loss)
summary['max_val_bleu'] = max(val_bleu)
summary['max_test_bleu'] = max(test_bleu)
summary['final_values'] = jlog['training_summary']
summary['avg_epoch_loss'] = [x.mean() for x in np.array_split(np.array(loss), jlog['parameters']['max_epoch'])]
summary['min_val_loss_step'] = min_val_loss_step
json.dump(summary, open(args.dump_json, 'w'))
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--title', type=str)
parser.add_argument('--log-file', type=str)
parser.add_argument('--output' ,'-o', type=str)
parser.add_argument('--dump-json', '-j', type=str)
args = parser.parse_args()
main(args)
#!/bin/bash
# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
NV_VISIBLE_DEVICES=${1:-"0"}
DOCKER_BRIDGE=${2:-"host"}
checkpoint=${3:-"/checkpoints/checkpoint_jit.pt"}
batch_size=${4:-"5120"}
WORKSPACE=${5:-"/workspace/translation"}
triton_model_version=${6:-1}
triton_model_name=${7:-"transformer"}
triton_dyn_batching_delay=${8:-0}
triton_engine_count=${9:-1}
triton_model_overwrite=${10:-"False"}
DEPLOYER="deployer.py"
#TODO: add fp16 option
CMD="python triton/${DEPLOYER} \
--ts-script \
--save-dir ${WORKSPACE}/triton/triton_models \
--triton-model-name ${triton_model_name} \
--triton-model-version ${triton_model_version} \
--triton-max-batch-size ${batch_size} \
--triton-dyn-batching-delay ${triton_dyn_batching_delay} \
--triton-engine-count ${triton_engine_count} "
ENCODER_EXPORT_CMD="$CMD --triton-model-name ${triton_model_name}-encoder"
DECODER_EXPORT_CMD="$CMD --triton-model-name ${triton_model_name}-decoder"
MODEL_ARGS=" -- --checkpoint ${checkpoint} \
--batch-size=${batch_size} \
--num-batches=2 \
--data /data "
ENCODER_EXPORT_CMD+="${MODEL_ARGS} --part encoder"
DECODER_EXPORT_CMD+="${MODEL_ARGS} --part decoder"
echo Exporting encoder...
bash scripts/docker/launch.sh "${ENCODER_EXPORT_CMD}" ${NV_VISIBLE_DEVICES} ${DOCKER_BRIDGE}
echo Exporting decoder...
bash scripts/docker/launch.sh "${DECODER_EXPORT_CMD}" ${NV_VISIBLE_DEVICES} ${DOCKER_BRIDGE}
#! /bin/bash
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
nvidia-smi
RESULTS_DIR='/results'
CHECKPOINTS_DIR='/results/checkpoints'
STAT_FILE=${RESULTS_DIR}/DGX1_amp_8GPU.json
mkdir -p $CHECKPOINTS_DIR
SEED=${1:-1}
LR=${2:-0.000846}
WARMUP=${3:-4000}
NUM_EPOCHS=${4:-40}
BATCH_SIZE=${5:-10240}
NUM_GPU=${6:-8}
DISTRIBUTED="-m torch.distributed.launch --nproc_per_node=${NUM_GPU}"
python ${DISTRIBUTED} /workspace/translation/train.py \
/data/wmt14_en_de_joined_dict \
--arch transformer_wmt_en_de_big_t2t \
--share-all-embeddings \
--optimizer adam \
--adam-betas 0.9 0.997 \
--adam-eps 1e-9 \
--clip-norm 0.0 \
--lr-scheduler inverse_sqrt \
--warmup-init-lr 0.0 \
--warmup-updates ${WARMUP} \
--lr $LR \
--min-lr 0.0 \ --dropout 0.1 \
--weight-decay 0.0 \
--criterion label_smoothed_cross_entropy \
--label-smoothing 0.1 \
--max-tokens ${BATCH_SIZE} \
--seed ${SEED} \
--max-epoch ${NUM_EPOCHS} \
--no-epoch-checkpoints \
--fuse-layer-norm \
--online-eval \
--log-interval 500 \
--save-dir ${RESULTS_DIR} \
--stat-file ${STAT_FILE} \
--amp
#! /bin/bash
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
nvidia-smi
RESULTS_DIR='/results'
CHECKPOINTS_DIR='/results/checkpoints'
STAT_FILE=${RESULTS_DIR}/DGX1_fp32_8GPU.json
mkdir -p $CHECKPOINTS_DIR
SEED=${1:-1}
LR=${2:-0.0006}
WARMUP=${3:-4000}
NUM_EPOCHS=${4:-40}
BATCH_SIZE=${5:-5120}
NUM_GPU=${6:-8}
DISTRIBUTED="-m torch.distributed.launch --nproc_per_node=${NUM_GPU}"
python ${DISTRIBUTED} /workspace/translation/train.py \
/data/wmt14_en_de_joined_dict \
--arch transformer_wmt_en_de_big_t2t \
--share-all-embeddings \
--optimizer adam \
--adam-betas 0.9 0.997 \
--adam-eps 1e-9 \
--clip-norm 0.0 \
--lr-scheduler inverse_sqrt \
--warmup-init-lr 0.0 \
--warmup-updates ${WARMUP} \
--lr $LR \
--min-lr 0.0 \
--dropout 0.1 \
--weight-decay 0.0 \
--criterion label_smoothed_cross_entropy \
--label-smoothing 0.1 \
--max-tokens ${BATCH_SIZE} \
--seed ${SEED} \
--max-epoch ${NUM_EPOCHS} \
--no-epoch-checkpoints \
--fuse-layer-norm \
--online-eval \
--log-interval 500 \
--save-dir ${RESULTS_DIR} \
--stat-file ${STAT_FILE}
#! /bin/bash
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
nvidia-smi
RESULTS_DIR='/results'
CHECKPOINTS_DIR='/results/checkpoints'
STAT_FILE=${RESULTS_DIR}/DGXA100_amp_8GPU_log.json
mkdir -p $CHECKPOINTS_DIR
SEED=${1:-1}
LR=${2:-0.000846}
WARMUP=${3:-4000}
NUM_EPOCHS=${4:-40}
BATCH_SIZE=${5:-10240}
NUM_GPU=${6:-8}
DISTRIBUTED="-m torch.distributed.launch --nproc_per_node=${NUM_GPU}"
python ${DISTRIBUTED} /workspace/translation/train.py \
/data/wmt14_en_de_joined_dict \
--arch transformer_wmt_en_de_big_t2t \
--share-all-embeddings \
--optimizer adam \
--adam-betas 0.9 0.997 \
--adam-eps 1e-9 \
--clip-norm 0.0 \
--lr-scheduler inverse_sqrt \
--warmup-init-lr 0.0 \
--warmup-updates ${WARMUP} \
--lr $LR \
--min-lr 0.0 \
--dropout 0.1 \
--weight-decay 0.0 \
--criterion label_smoothed_cross_entropy \
--label-smoothing 0.1 \
--max-tokens ${BATCH_SIZE} \
--seed ${SEED} \
--max-epoch ${NUM_EPOCHS} \
--no-epoch-checkpoints \
--fuse-layer-norm \
--online-eval \
--log-interval 500 \
--save-dir ${RESULTS_DIR} \
--stat-file ${STAT_FILE} \
--amp
#! /bin/bash
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
nvidia-smi
RESULTS_DIR='/results'
CHECKPOINTS_DIR='/results/checkpoints'
STAT_FILE=${RESULTS_DIR}/DGXA100_tf32_8GPU_log.json
mkdir -p $CHECKPOINTS_DIR
PREC=${1:-'tf32'}
SEED=${2:-1}
LR=${3:-0.000846}
WARMUP=${4:-4000}
NUM_EPOCHS=${5:-40}
BATCH_SIZE=${6:-10240}
NUM_GPU=${7:-8}
DISTRIBUTED="-m torch.distributed.launch --nproc_per_node=${NUM_GPU}"
if [ "$PREC" = "fp32" ];
then
PREC=''
export NVIDIA_TF32_OVERRIDE=0
else
PREC=''
fi
python ${DISTRIBUTED} /workspace/translation/train.py \
/data/wmt14_en_de_joined_dict \
--arch transformer_wmt_en_de_big_t2t \
--share-all-embeddings \
--optimizer adam \
--adam-betas 0.9 0.997 \
--adam-eps 1e-9 \
--clip-norm 0.0 \
--lr-scheduler inverse_sqrt \
--warmup-init-lr 0.0 \
--warmup-updates ${WARMUP} \
--lr $LR \
--min-lr 0.0 \
--dropout 0.1 \
--weight-decay 0.0 \
--criterion label_smoothed_cross_entropy \
--label-smoothing 0.1 \
--max-tokens ${BATCH_SIZE} \
--seed ${SEED} \
--max-epoch ${NUM_EPOCHS} \
--no-epoch-checkpoints \
--fuse-layer-norm \
--online-eval \
--log-interval 500 \
--save-dir ${RESULTS_DIR} \
--stat-file ${STAT_FILE}
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
DATASET_DIR=/data/wmt14_en_de_joined_dict
TEXT=examples/translation/wmt14_en_de
(
cd examples/translation
bash prepare-wmt14en2de.sh --scaling18
)
python preprocess.py \
--source-lang en \
--target-lang de \
--trainpref $TEXT/train \
--validpref $TEXT/valid \
--testpref $TEXT/test \
--destdir ${DATASET_DIR} \
--nwordssrc 33712 \
--nwordstgt 33712 \
--joined-dictionary
cp $TEXT/code $DATASET_DIR/code
cp $TEXT/tmp/valid.raw.de $DATASET_DIR/valid.raw.de
sacrebleu -t wmt14/full -l en-de --echo ref > $DATASET_DIR/test.raw.de
#! /bin/bash
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
nvidia-smi
RESULTS_DIR='/results'
CHECKPOINTS_DIR='/results/checkpoints'
STAT_FILE=${RESULTS_DIR}/run_log.json
mkdir -p $CHECKPOINTS_DIR
: ${PREC:='amp'}
: ${SEED:=1}
: ${LR:=0.000846}
: ${WARMUP:=4000}
: ${NUM_EPOCHS:=40}
: ${BS:=5120}
: ${NUM_GPU:=8}
: ${USE_SLURM:=0}
: ${USE_DISTRIBUTED:=1}
DISTRIBUTED=""
[ ${USE_DISTRIBUTED} = 1 ] && DISTRIBUTED+="-m torch.distributed.launch --nproc_per_node=${NUM_GPU}"
[ ${USE_DISTRIBUTED} = 1 ] && [ ${USE_SLURM} = 1 ] && DISTRIBUTED+=" --nnodes ${WORLD_SIZE} --node_rank ${SLURM_NODEID} \
--master_addr ${MASTER_ADDR} --master_port ${MASTER_PORT} "
if [ "$PREC" = "amp" ];
then
PREC='--amp '
else
PREC=''
fi
python ${DISTRIBUTED} /workspace/translation/train.py \
/data/ \
--arch transformer_wmt_en_de_big_t2t \
--share-all-embeddings \
--optimizer adam \
--adam-betas 0.9 0.997 \
--adam-eps 1e-9 \
--clip-norm 0.0 \
--lr-scheduler inverse_sqrt \
--warmup-init-lr 0.0 \
--warmup-updates ${WARMUP} \
--lr $LR \
--min-lr 0.0 \
--dropout 0.1 \
--weight-decay 0.0 \
--criterion label_smoothed_cross_entropy \
--label-smoothing 0.1 \
--max-tokens ${BS} \
--seed ${SEED} \
--max-epoch ${NUM_EPOCHS} \
--no-save \
--fuse-layer-norm \
--online-eval \
--log-interval 500 \
--save-dir ${RESULTS_DIR} \
--stat-file ${STAT_FILE} \
${PREC}
#!/usr/bin/env python3
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
#
#-------------------------------------------------------------------------
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import setup, find_packages, Extension
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
import sys
if sys.version_info < (3,):
sys.exit('Sorry, Python3 is required for fairseq.')
with open('README.md') as f:
readme = f.read()
with open('LICENSE') as f:
license = f.read()
with open('requirements.txt') as f:
reqs = f.read()
extra_compile_args = {'cxx' : ['-O2']}
extra_compile_args['nvcc'] = ['-O3',
'-I./cutlass/',
'-U__CUDA_NO_HALF_OPERATORS__',
'-U__CUDA_NO_HALF_CONVERSIONS__',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_70,code=compute_70',
'-gencode', 'arch=compute_80,code=sm_80',
'-gencode', 'arch=compute_80,code=compute_80',
]
strided_batched_gemm = CUDAExtension(
name='strided_batched_gemm',
sources=['fairseq/modules/strided_batched_gemm/strided_batched_gemm.cpp', 'fairseq/modules/strided_batched_gemm/strided_batched_gemm_cuda.cu'],
extra_compile_args=extra_compile_args
)
batch_utils = CppExtension(
name='fairseq.data.batch_C',
sources=['fairseq/data/csrc/make_batches.cpp'],
extra_compile_args={
'cxx': ['-O2',],
}
)
setup(
name='fairseq',
version='0.5.0',
description='Facebook AI Research Sequence-to-Sequence Toolkit',
long_description=readme,
license=license,
install_requires=reqs.strip().split('\n'),
packages=find_packages(),
ext_modules=[strided_batched_gemm, batch_utils],
cmdclass={
'build_ext': BuildExtension.with_options(use_ninja=False)
},
test_suite='tests',
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment