"vscode:/vscode.git/clone" did not exist on "17485202632393af4967ce727acc320e017ece44"
Commit 9e8a8c05 authored by jerrrrry's avatar jerrrrry
Browse files

Initial commit

parents
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
import ctypes
import math
import torch
try:
from fairseq import libbleu
except ImportError as e:
import sys
sys.stderr.write('ERROR: missing libbleu.so. run `python setup.py install`\n')
raise e
C = ctypes.cdll.LoadLibrary(libbleu.__file__)
class BleuStat(ctypes.Structure):
_fields_ = [
('reflen', ctypes.c_size_t),
('predlen', ctypes.c_size_t),
('match1', ctypes.c_size_t),
('count1', ctypes.c_size_t),
('match2', ctypes.c_size_t),
('count2', ctypes.c_size_t),
('match3', ctypes.c_size_t),
('count3', ctypes.c_size_t),
('match4', ctypes.c_size_t),
('count4', ctypes.c_size_t),
]
class Scorer(object):
def __init__(self, pad, eos):
self.stat = BleuStat()
self.pad = pad
self.eos = eos
self.reset()
def reset(self, one_init=False):
if one_init:
C.bleu_one_init(ctypes.byref(self.stat))
else:
C.bleu_zero_init(ctypes.byref(self.stat))
def add(self, ref, pred):
if not isinstance(ref, torch.IntTensor):
raise TypeError('ref must be a torch.IntTensor (got {})'
.format(type(ref)))
if not isinstance(pred, torch.IntTensor):
raise TypeError('pred must be a torch.IntTensor(got {})'
.format(type(pred)))
# don't match unknown words
rref = ref.clone()
assert not rref.lt(0).any()
#rref[rref.eq(self.unk)] = -999
rref = rref.contiguous().view(-1)
pred = pred.contiguous().view(-1)
C.bleu_add(
ctypes.byref(self.stat),
ctypes.c_size_t(rref.size(0)),
ctypes.c_void_p(rref.data_ptr()),
ctypes.c_size_t(pred.size(0)),
ctypes.c_void_p(pred.data_ptr()),
ctypes.c_int(self.pad),
ctypes.c_int(self.eos))
def score(self, order=4):
psum = sum(math.log(p) if p > 0 else float('-Inf') for p in self.precision()[:order])
return self.brevity() * math.exp(psum / order) * 100
def precision(self):
def ratio(a, b):
return a / b if b > 0 else 0
return [
ratio(self.stat.match1, self.stat.count1),
ratio(self.stat.match2, self.stat.count2),
ratio(self.stat.match3, self.stat.count3),
ratio(self.stat.match4, self.stat.count4),
]
def brevity(self):
r = self.stat.reflen / self.stat.predlen
return min(1, math.exp(1 - r))
def result_string(self, order=4):
assert order <= 4, "BLEU scores for order > 4 aren't supported"
fmt = 'BLEU{} = {:2.2f}, {:2.1f}'
for _ in range(1, order):
fmt += '/{:2.1f}'
fmt += ' (BP={:.3f}, ratio={:.3f}, syslen={}, reflen={})'
bleup = [p * 100 for p in self.precision()[:order]]
return fmt.format(order, self.score(order=order), *bleup,
self.brevity(), self.stat.predlen/self.stat.reflen,
self.stat.predlen, self.stat.reflen)
/**
* Copyright 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <map>
#include <array>
#include <cstring>
#include <cstdio>
typedef struct
{
size_t reflen;
size_t predlen;
size_t match1;
size_t count1;
size_t match2;
size_t count2;
size_t match3;
size_t count3;
size_t match4;
size_t count4;
} bleu_stat;
// left trim (remove pad)
void bleu_ltrim(size_t* len, int** sent, int pad) {
size_t start = 0;
while(start < *len) {
if (*(*sent + start) != pad) { break; }
start++;
}
*sent += start;
*len -= start;
}
// right trim remove (eos)
void bleu_rtrim(size_t* len, int** sent, int pad, int eos) {
size_t end = *len - 1;
while (end > 0) {
if (*(*sent + end) != eos && *(*sent + end) != pad) { break; }
end--;
}
*len = end + 1;
}
// left and right trim
void bleu_trim(size_t* len, int** sent, int pad, int eos) {
bleu_ltrim(len, sent, pad);
bleu_rtrim(len, sent, pad, eos);
}
size_t bleu_hash(int len, int* data) {
size_t h = 14695981039346656037ul;
size_t prime = 0x100000001b3;
char* b = (char*) data;
size_t blen = sizeof(int) * len;
while (blen-- > 0) {
h ^= *b++;
h *= prime;
}
return h;
}
void bleu_addngram(
size_t *ntotal, size_t *nmatch, size_t n,
size_t reflen, int* ref, size_t predlen, int* pred) {
if (predlen < n) { return; }
predlen = predlen - n + 1;
(*ntotal) += predlen;
if (reflen < n) { return; }
reflen = reflen - n + 1;
std::map<size_t, size_t> count;
while (predlen > 0) {
size_t w = bleu_hash(n, pred++);
count[w]++;
predlen--;
}
while (reflen > 0) {
size_t w = bleu_hash(n, ref++);
if (count[w] > 0) {
(*nmatch)++;
count[w] -=1;
}
reflen--;
}
}
extern "C" {
void bleu_zero_init(bleu_stat* stat) {
std::memset(stat, 0, sizeof(bleu_stat));
}
void bleu_one_init(bleu_stat* stat) {
bleu_zero_init(stat);
stat->count1 = 0;
stat->count2 = 1;
stat->count3 = 1;
stat->count4 = 1;
stat->match1 = 0;
stat->match2 = 1;
stat->match3 = 1;
stat->match4 = 1;
}
void bleu_add(
bleu_stat* stat,
size_t reflen, int* ref, size_t predlen, int* pred, int pad, int eos) {
bleu_trim(&reflen, &ref, pad, eos);
bleu_trim(&predlen, &pred, pad, eos);
stat->reflen += reflen;
stat->predlen += predlen;
bleu_addngram(&stat->count1, &stat->match1, 1, reflen, ref, predlen, pred);
bleu_addngram(&stat->count2, &stat->match2, 2, reflen, ref, predlen, pred);
bleu_addngram(&stat->count3, &stat->match3, 3, reflen, ref, predlen, pred);
bleu_addngram(&stat->count4, &stat->match4, 4, reflen, ref, predlen, pred);
}
}
/**
* Copyright 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <Python.h>
static PyMethodDef method_def[] = {
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef module_def = {
PyModuleDef_HEAD_INIT,
"libbleu", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
method_def
};
#if PY_MAJOR_VERSION == 2
PyMODINIT_FUNC init_libbleu()
#else
PyMODINIT_FUNC PyInit_libbleu()
#endif
{
PyObject *m = PyModule_Create(&module_def);
if (!m) {
return NULL;
}
return m;
}
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
import importlib
import os
from .fairseq_criterion import FairseqCriterion
CRITERION_REGISTRY = {}
CRITERION_CLASS_NAMES = set()
def build_criterion(args, task):
return CRITERION_REGISTRY[args.criterion](args, task)
def register_criterion(name):
"""Decorator to register a new criterion."""
def register_criterion_cls(cls):
if name in CRITERION_REGISTRY:
raise ValueError('Cannot register duplicate criterion ({})'.format(name))
if not issubclass(cls, FairseqCriterion):
raise ValueError('Criterion ({}: {}) must extend FairseqCriterion'.format(name, cls.__name__))
if cls.__name__ in CRITERION_CLASS_NAMES:
# We use the criterion class name as a unique identifier in
# checkpoints, so all criterions must have unique class names.
raise ValueError('Cannot register criterion with duplicate class name ({})'.format(cls.__name__))
CRITERION_REGISTRY[name] = cls
CRITERION_CLASS_NAMES.add(cls.__name__)
return cls
return register_criterion_cls
# automatically import any Python files in the criterions/ directory
for file in os.listdir(os.path.dirname(__file__)):
if file.endswith('.py') and not file.startswith('_'):
module = file[:file.find('.py')]
importlib.import_module('fairseq.criterions.' + module)
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
import math
import torch.nn.functional as F
from fairseq import utils
from . import FairseqCriterion, register_criterion
@register_criterion('adaptive_loss')
class AdaptiveLoss(FairseqCriterion):
"""This is an implementation of the loss function accompanying the adaptive softmax approximation for
graphical processing units (GPU), described in the paper "Efficient softmax approximation for GPUs"
(http://arxiv.org/abs/1609.04309)."""
def __init__(self, args, task):
super().__init__(args, task)
def forward(self, model, sample, reduce=True):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
assert hasattr(model.decoder, 'adaptive_softmax') and model.decoder.adaptive_softmax is not None
adaptive_softmax = model.decoder.adaptive_softmax
net_output = model(**sample['net_input'])
target = model.get_targets(sample, net_output).view(-1)
bsz = target.size(0)
logits, target = adaptive_softmax(net_output[0], target)
assert len(target) == len(logits)
loss = net_output[0].new(1 if reduce else bsz).zero_()
for i in range(len(target)):
if target[i] is not None:
assert (target[i].min() >= 0 and target[i].max() <= logits[i].size(1))
loss += F.cross_entropy(logits[i], target[i], size_average=False, ignore_index=self.padding_idx,
reduce=reduce)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = {
'loss': utils.item(loss.data) if reduce else loss.data,
'ntokens': sample['ntokens'],
'sample_size': sample_size,
}
return loss, sample_size, logging_output
@staticmethod
def aggregate_logging_outputs(logging_outputs):
"""Aggregate logging outputs from data parallel training."""
loss_sum = sum(log.get('loss', 0) for log in logging_outputs)
ntokens = sum(log.get('ntokens', 0) for log in logging_outputs)
sample_size = sum(log.get('sample_size', 0) for log in logging_outputs)
agg_output = {
'loss': loss_sum / sample_size / math.log(2),
'sample_size': sample_size,
}
if sample_size != ntokens:
agg_output['nll_loss'] = loss_sum / ntokens / math.log(2)
return agg_output
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
import math
import torch.nn.functional as F
from fairseq import utils
from . import FairseqCriterion, register_criterion
@register_criterion('cross_entropy')
class CrossEntropyCriterion(FairseqCriterion):
def __init__(self, args, task):
super().__init__(args, task)
def forward(self, model, sample, reduce=True):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
net_output = model(**sample['net_input'])
lprobs = model.get_normalized_probs(net_output, log_probs=True)
lprobs = lprobs.view(-1, lprobs.size(-1))
target = model.get_targets(sample, net_output).view(-1)
loss = F.nll_loss(lprobs, target, size_average=False, ignore_index=self.padding_idx,
reduce=reduce)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = {
'loss': utils.item(loss.data) if reduce else loss.data,
'ntokens': sample['ntokens'],
'sample_size': sample_size,
}
return loss, sample_size, logging_output
@staticmethod
def aggregate_logging_outputs(logging_outputs):
"""Aggregate logging outputs from data parallel training."""
loss_sum = sum(log.get('loss', 0) for log in logging_outputs)
ntokens = sum(log.get('ntokens', 0) for log in logging_outputs)
sample_size = sum(log.get('sample_size', 0) for log in logging_outputs)
agg_output = {
'loss': loss_sum / sample_size / math.log(2),
'sample_size': sample_size,
}
if sample_size != ntokens:
agg_output['nll_loss'] = loss_sum / ntokens / math.log(2)
return agg_output
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
from torch.nn.modules.loss import _Loss
class FairseqCriterion(_Loss):
def __init__(self, args, task):
super().__init__()
self.args = args
self.padding_idx = task.target_dictionary.pad()
@staticmethod
def add_args(parser):
"""Add criterion-specific arguments to the parser."""
pass
def forward(self, model, sample, reduce=True):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
raise NotImplementedError
@staticmethod
def aggregate_logging_outputs(logging_outputs):
"""Aggregate logging outputs from data parallel training."""
raise NotImplementedError
@staticmethod
def grad_denom(sample_sizes):
"""Compute the gradient denominator for a set of sample sizes."""
return sum(sample_sizes)
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
import torch
import math
from fairseq import utils
from . import FairseqCriterion, register_criterion
@register_criterion('label_smoothed_cross_entropy')
class LabelSmoothedCrossEntropyCriterion(FairseqCriterion):
def __init__(self, args, task):
super().__init__(args, task)
self.eps = args.label_smoothing
self.cpu_loss = torch.empty(1, dtype=torch.float32, device=torch.device('cpu'))
self.cpu_loss = self.cpu_loss.pin_memory()
if args.fast_xentropy :
from apex.contrib.xentropy import SoftmaxCrossEntropyLoss
self.xentropy_func = SoftmaxCrossEntropyLoss.apply
else:
self.xentropy_func = None
@staticmethod
def add_args(parser):
"""Add criterion-specific arguments to the parser."""
parser.add_argument('--label-smoothing', default=0., type=float, metavar='D',
help='epsilon for label smoothing, 0 means no label smoothing')
parser.add_argument('--fast-xentropy', action='store_true',
help='Execute fast logSoftmax and Cross Entropy function.')
def forward(self, model, sample, reduce=True):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
net_output = model(**sample['net_input'])
target = model.get_targets(sample, net_output).view(-1, 1)
if self.xentropy_func is not None:
assert (net_output[0].dtype == torch.float16) or (net_output[0].dtype == torch.float32), "Unsupported data types"
output = net_output[0].view(net_output[0].size(0)*net_output[0].size(1),net_output[0].size(2))
labels = target.view(target.size(0)*target.size(1))
losses = self.xentropy_func(output, labels, self.eps, self.padding_idx, net_output[0].dtype == torch.float16)
loss = losses.sum()
else :
lprobs = model.get_normalized_probs(net_output, log_probs=True)
lprobs = lprobs.view(-1, lprobs.size(-1))
non_pad_mask = target.ne(self.padding_idx)
nll_loss = -lprobs.gather(dim=-1, index=target)[non_pad_mask]
smooth_loss = -lprobs.sum(dim=-1, keepdim=True)[non_pad_mask]
if reduce:
nll_loss = nll_loss.sum()
smooth_loss = smooth_loss.sum()
eps_i = self.eps / lprobs.size(-1)
loss = (1. - self.eps) * nll_loss + eps_i * smooth_loss
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
# Copy the Loss to the CPU without generating a Synchronize
self.cpu_loss.copy_(loss.detach(),non_blocking=True)
logging_output = {
'loss': utils.item(self.cpu_loss) if reduce else self.cpu_loss.data,
#'nll_loss': utils.item(nll_loss.data) if reduce else nll_loss.data,
'ntokens': sample['ntokens'],
'sample_size': sample_size,
}
return loss, sample_size, logging_output
@staticmethod
def aggregate_logging_outputs(logging_outputs):
"""Aggregate logging outputs from data parallel training."""
ntokens = sum(log.get('ntokens', 0) for log in logging_outputs)
sample_size = sum(log.get('sample_size', 0) for log in logging_outputs)
return {
'loss': sum(log.get('loss', 0) for log in logging_outputs) / sample_size / math.log(2),
#'nll_loss': sum(log.get('nll_loss', 0) for log in logging_outputs) / ntokens / math.log(2),
'sample_size': sample_size,
}
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
from .dictionary import Dictionary
from .fairseq_dataset import FairseqDataset
from .indexed_dataset import IndexedDataset, IndexedInMemoryDataset, IndexedRawTextDataset # noqa: F401
from .language_pair_dataset import LanguagePairDataset
from .monolingual_dataset import MonolingualDataset
from .token_block_dataset import TokenBlockDataset
from .data_utils import EpochBatchIterator
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment