Commit 581b8d15 authored by liangjing's avatar liangjing
Browse files

version 1

parents
Pipeline #169 failed with stages
in 0 seconds
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import io
import json
import numpy as np
import os
import warnings
class Vocab(object):
"""
The class used to convert between tokens and ids. It also includes some
store/load functions.
Args:
counter (collections.Counter, optional): A Counter intance describes
the tokens and their frequencies. Its keys will be indexed accroding
to the order of frequency sorting to construct mapping relationship.
If None, `token_to_idx` must be provided as the mapping relationship.
Default: None.
max_size (int, optional): Max size of vocab, not including special tokens.
Default: None.
min_freq (int, optional): Ignore tokens whose frequencies are less than
`min_freq`. Default: 1.
token_to_idx (dict, optional): A dict specifies the mapping relationship
between tokens and indices to be used. If provided, adjust the tokens
and indices mapping according to it. If None, counter must be provided.
Default: None.
unk_token (str, optional): Special token for unknow token. If no need,
it also could be None. Default: None.
pad_token (str, optional): Special token for padding token. If no need,
it also could be None. Default: None.
bos_token (str, optional): Special token for bos token. If no need, it
also could be None. Default: None.
eos_token (str, optional): Special token for eos token. If no need, it
lso could be None. Default: None.
kwargs (dict): Keyword arguments ending with `_token`. It can be used
to specify further special tokens that will be exposed as attribute
of the vocabulary and associated with an index.
"""
def __init__(self,
counter=None,
max_size=None,
min_freq=1,
token_to_idx=None,
unk_token=None,
pad_token=None,
bos_token=None,
eos_token=None,
**kwargs):
# Handle special tokens
combs = (('unk_token', unk_token), ('pad_token', pad_token),
('bos_token', bos_token), ('eos_token', eos_token))
for name, value in combs:
kwargs[name] = value
special_tokens = []
special_iter = kwargs.keys()
# sort alphabetically
special_iter = sorted(special_iter)
for special_token_name in special_iter:
# Test if kwarg specifies a special token
if not special_token_name.endswith('_token'):
raise ValueError('{} is invalid. Only keyword arguments '
'that end in \'_token\' are supported '
'to declare special tokens.'.format(
special_token_name))
special_token = kwargs[special_token_name]
if special_token is not None and special_token not in special_tokens:
special_tokens.append(special_token)
if counter is None:
# use token_to_idx as dict to import pretrained vocabulary
assert token_to_idx, (
'token_to_idx should not be None when counter is None')
for special_token in special_tokens:
assert special_token in token_to_idx, '{} is not in token_to_idx'.format(
special_token)
self._token_to_idx = token_to_idx
self._idx_to_token = {
idx: token
for token, idx in token_to_idx.items()
}
if unk_token:
unk_index = self._token_to_idx[unk_token]
self._token_to_idx = collections.defaultdict(lambda: unk_index)
self._token_to_idx.update(token_to_idx)
else:
self._idx_to_token = {
idx: special_token
for idx, special_token in enumerate(special_tokens)
}
self._token_to_idx = collections.defaultdict()
self._token_to_idx.update(
(token, idx) for idx, token in self._idx_to_token.items())
self._index_counter_keys(counter, special_tokens, max_size,
min_freq)
if token_to_idx:
self._sort_index_according_to_user_specification(token_to_idx)
if unk_token:
self._token_to_idx.default_factory = lambda: self._token_to_idx[unk_token]
# _expose_tokens_as_attributes
self._identifiers_to_tokens = kwargs
for identifier, token in kwargs.items():
if identifier.startswith('_'):
raise ValueError(
'It is not allowed to use identifiers starting with '
'underscore. In Python identifier names beginning with '
'underscore are internal.')
if hasattr(self, identifier):
raise ValueError(
'vocab.{} already exists. '
'Please choose a different identifier for token {}'.format(
identifier, token))
setattr(self, identifier, token)
def _index_counter_keys(self, counter, special_tokens, max_size, min_freq):
# sort by frequency, then alphabetically
token_freqs = sorted(counter.items(), key=lambda x: x[0])
token_freqs.sort(key=lambda x: x[1], reverse=True)
# frequencies of special tokens are not counted when building vocabulary
# in frequency order
special_tokens = set(special_tokens)
max_size = None if max_size is None else max_size + len(special_tokens)
for token, freq in token_freqs:
if freq < min_freq or len(self._idx_to_token) == max_size:
break
if token not in special_tokens:
self._idx_to_token[max(list(self._idx_to_token.keys()) + [-1]) +
1] = token
self._token_to_idx[token] = max(self._idx_to_token.keys())
def _sort_index_according_to_user_specification(self, token_to_idx):
# Sanity checks
if not set(token_to_idx.keys()).issubset(self.token_to_idx.keys()):
raise ValueError(
'User-specified token_to_idx mapping can only contain '
'tokens that will be part of the vocabulary.')
if len(set(token_to_idx.values())) != len(token_to_idx):
raise ValueError(
'User-specified indices must not contain duplicates.')
if min(token_to_idx.values()) < 0 or max(token_to_idx.values()) >= len(
self.token_to_idx):
raise ValueError(
'User-specified indices must not be < 0 or >= the number of tokens '
'that will be in the vocabulary. The current vocab contains {}'
'tokens.'.format(len(self.token_to_idx)))
# Update index ordering
for token, new_idx in token_to_idx.items():
old_idx = self.token_to_idx[token]
ousted_token = self.idx_to_token[new_idx]
self.token_to_idx[token] = new_idx
self.token_to_idx[ousted_token] = old_idx
self.idx_to_token[old_idx] = ousted_token
self.idx_to_token[new_idx] = token
def to_tokens(self, indices):
"""
Maps the input indices to token list.
Args:
indices (int|list[int]|tuple[int]|numpy.ndarray): The input indice(s) for mapping.
Must be an `int` or 1D `list[int]`|`tuple[int]`|`numpy.ndarray`.
Returns:
str|list[str]: Obtained token(s). If `indices` is an integer, it
will return a str. If `indices` is a list/tuple of integers, it will
return a list of str.
Example:
.. code-block:: python
from paddlenlp.data import Vocab
# The vocab file. The sample file can be downloaded firstly.
# wget https://paddlenlp.bj.bcebos.com/data/senta_word_dict.txt
vocab_file_path = './senta_word_dict.txt'
# Initialize the Vocab
vocab = Vocab.load_vocabulary(
vocab_file_path,
unk_token='[UNK]',
pad_token='[PAD]')
tokens = vocab.to_tokens([0, 1, 2, 3])
print(tokens)
# ['[PAD]', '[UNK]', '一斤三', '意面屋']
"""
to_reduce = False
if not isinstance(indices, (list, tuple, np.ndarray)):
indices = [indices]
to_reduce = True
if isinstance(indices, (list, tuple)):
indices = np.asarray(indices)
if isinstance(indices, (np.ndarray)) and len(indices.shape) > 1:
raise ValueError(
'Token indices is invalid. Expected 1D array, but received {}D array. '.
format(len(indices.shape)))
tokens = []
for idx in indices:
if not isinstance(idx, (int, np.integer)):
warnings.warn(
"The type of `to_tokens()`'s input `indices` is not `int` which will be forcibly transfered to `int`. "
)
idx = int(idx)
try:
tokens.append(self._idx_to_token[idx])
except KeyError:
raise ValueError(
'Token index {} in the provided `indices` is invalid.'.
format(idx))
return tokens[0] if to_reduce else tokens
def to_indices(self, tokens):
"""
Maps the input tokens into indices.
Args:
tokens (str|list[str]|tuple[str], optional): The input token(s) for
mapping.
Returns:
int|list[int]: Obationed indice(s). If `tokens` is a str, it will
return an integer. If `tokens` is a list/tuple of str, it will
return a list of integers.
Example:
.. code-block:: python
from paddlenlp.data import Vocab
# The vocab file. The sample file can be downloaded firstly.
# wget https://paddlenlp.bj.bcebos.com/data/senta_word_dict.txt
vocab_file_path = './senta_word_dict.txt'
# Initialize the Vocab
vocab = Vocab.load_vocabulary(
vocab_file_path,
unk_token='[UNK]',
pad_token='[PAD]')
tokens = vocab.to_indices(['[PAD]', '[UNK]', '一斤三', '意面屋'])
print(tokens)
# [0, 1, 2, 3]
"""
return self[tokens]
def __getitem__(self, tokens):
if not isinstance(tokens, (list, tuple)):
return self._token_to_idx[tokens]
else:
return [self._token_to_idx[token] for token in tokens]
def __len__(self):
return len(self._idx_to_token)
def __contains__(self, token):
return token in self._token_to_idx
def __call__(self, tokens):
"""
Maps the input tokens into indices. Its function is the same as the
:meth:`to_indices` method.
See detail at `to_indices`.
"""
return self[tokens]
@property
def idx_to_token(self):
# Returns index-token dict
return self._idx_to_token
@property
def token_to_idx(self):
# Return token-index dict
return self._token_to_idx
def to_json(self, path=None):
"""
Summarizes some information of vocab as JSON string. If path is gaven,
the JSON string will be saved into files. The JSON string and the saved
file all can be used to reconstruct the :class:`Vocab` by calling
:meth:`from_json` method.
Args:
path (str, optional): The path to save JSON string. If None, the
JSON will not be saved. Default: None.
Returns:
str: The JSON string including information of vocab.
Example:
.. code-block:: python
from paddlenlp.data import Vocab
# The vocab file. The sample file can be downloaded firstly.
# wget https://paddlenlp.bj.bcebos.com/data/senta_word_dict.txt
vocab_file_path = './senta_word_dict.txt'
# Initialize the Vocab
vocab = Vocab.load_vocabulary(
vocab_file_path,
unk_token='[UNK]',
pad_token='[PAD]')
json_str = vocab.to_json(path='./vocab.json')
"""
vocab_dict = {}
vocab_dict['idx_to_token'] = dict(self.idx_to_token)
vocab_dict['token_to_idx'] = dict(self.token_to_idx)
vocab_dict['unk_token'] = self.unk_token
vocab_dict['identifiers_to_tokens'] = self._identifiers_to_tokens
json_str = json.dumps(vocab_dict)
if path:
with io.open(path, 'w', encoding='utf-8') as f:
f.write(json_str)
return json_str
@classmethod
def from_json(cls, json_str):
"""
Loads :class:`Vocab` from JSON string or JSON file, which is gotten by
calling :meth:`to_json` method.
Args:
json_str (str): JSON string or file path of JSON string.
Returns:
Vocab: An instance of :class:`Vocab` generated from information
contained in JSON string.
Example:
.. code-block:: python
from paddlenlp.data import Vocab
# The vocab file. The sample file can be downloaded firstly.
# wget https://paddlenlp.bj.bcebos.com/data/senta_word_dict.txt
vocab_file_path = './senta_word_dict.txt'
# Initialize the Vocab
vocab = Vocab.load_vocabulary(
vocab_file_path,
unk_token='[UNK]',
pad_token='[PAD]')
json_str = vocab.to_json(path='./vocab.json')
vocab1 = Vocab.from_json(json_str)
vocab2 = Vocab.from_json('./vocab.json')
print(len(vocab), len(vocab1), len(vocab2))
# 1256608 1256608 1256608
"""
if os.path.isfile(json_str):
with io.open(json_str, 'r', encoding='utf-8') as f:
vocab_dict = json.load(f)
else:
vocab_dict = json.loads(json_str)
token_to_idx = vocab_dict.get('token_to_idx')
unk_token = vocab_dict.get('unk_token')
identifiers_to_tokens = vocab_dict.get('identifiers_to_tokens', dict())
if 'unk_token' in identifiers_to_tokens:
del identifiers_to_tokens['unk_token']
vocab = cls(counter=None,
token_to_idx=token_to_idx,
unk_token=unk_token,
**identifiers_to_tokens)
return vocab
@classmethod
def from_dict(cls,
token_to_idx,
unk_token=None,
pad_token=None,
bos_token=None,
eos_token=None,
**kwargs):
"""
Builds the :class:`Vocab` from a dict.
Args:
token_to_idx (dict): A dict describes the mapping relationship between
tokens and indices.
unk_token (str, optional): The special token for unknow token. If
no need, it also could be None. Default: None.
pad_token (str, optional): The special token for padding token. If
no need, it also could be None. Default: None.
bos_token (str, optional): The special token for bos token. If no
need, it also could be None. Default: None.
eos_token (str, optional): The special token for eos token. If no
need, it also could be None. Default: None.
kwargs (dict): Keyword arguments ending with `_token`. It can be
used to specify further special tokens that will be exposed as
attribute of the vocabulary and associated with an index.
Returns:
Vocab: An instance of :class:`Vocab` generated from the given dict
and special tokens.
Example:
.. code-block:: python
from paddlenlp.data import Vocab
# The vocab file. The sample file can be downloaded firstly.
# wget https://paddlenlp.bj.bcebos.com/data/senta_word_dict.txt
vocab_file_path = './senta_word_dict.txt'
# Initialize the Vocab
vocab = Vocab.load_vocabulary(
vocab_file_path,
unk_token='[UNK]',
pad_token='[PAD]')
vocab1 = Vocab.from_dict(vocab.token_to_idx)
print(len(vocab), len(vocab.token_to_idx), len(vocab1))
# 1256608 1256608 1256608
"""
vocab = cls(counter=None,
token_to_idx=token_to_idx,
unk_token=unk_token,
pad_token=pad_token,
bos_token=bos_token,
eos_token=eos_token,
**kwargs)
return vocab
@staticmethod
def build_vocab(iterator,
max_size=None,
min_freq=1,
token_to_idx=None,
unk_token=None,
pad_token=None,
bos_token=None,
eos_token=None,
**kwargs):
"""
Builds the :class:`Vocab` accoring to given iterator and other
information. Firstly, iterate over the `iterator` to construct a
:class:`collections.Counter` and used to init the as :class:`Vocab`.
Args:
iterator (collections.Iterable): Iterator of tokens. Each element
should be a list of tokens if wordlevel vocab is needed.
max_size (int, optional): The max size of vocab, not including
special tokens. Default: None.
min_freq (int, optional): Ignore tokens whose frequencies are less
than `min_freq`. Default: 1.
token_to_idx (dict, optional): A dict specifies the mapping
relationship between tokens and indices to be used. If provided,
adjust the tokens and indices mapping according to it. If None,
counter must be provided. Default: None.
unk_token (str, optional): The special token for unknow token
'<unk>'. If no need, it also could be None. Default: None.
pad_token (str, optional): The special token for padding token
'<pad>'. If no need, it also could be None. Default: None.
bos_token (str, optional): The special token for bos token '<bos>'.
If no need, it also could be None. Default: None.
eos_token (str, optional): The special token for eos token '<eos>'.
If no need, it also could be None. Default: None.
kwargs (dict): Keyword arguments ending with `_token`. It can be
used to specify further special tokens that will be exposed as
attribute of the vocabulary and associated with an index.
Returns:
Vocab: An instance of :class:`Vocab` generated from given iterator
and other informations.
Example:
.. code-block:: python
from paddlenlp.data import Vocab
# The vocab file. The sample file can be downloaded firstly.
# wget https://paddlenlp.bj.bcebos.com/data/senta_word_dict.txt
vocab_file_path = './senta_word_dict.txt'
# Initialize the Vocab
vocab = Vocab.load_vocabulary(
vocab_file_path,
unk_token='[UNK]',
pad_token='[PAD]')
vocab1 = Vocab.build_vocab([list(vocab.token_to_idx.keys())])
print(len(vocab), len(vocab1))
# 1256608 1256608
"""
counter = collections.Counter()
for tokens in iterator:
counter.update(tokens)
vocab = Vocab(
counter,
max_size=max_size,
min_freq=min_freq,
token_to_idx=token_to_idx,
unk_token=unk_token,
pad_token=pad_token,
bos_token=bos_token,
eos_token=eos_token,
**kwargs)
return vocab
@staticmethod
def load_vocabulary(filepath,
unk_token=None,
pad_token=None,
bos_token=None,
eos_token=None,
**kwargs):
"""
Builds the :class:`Vocab` from a file reserving all tokens by calling
:meth:`Vocab.from_dict` method. The file contains a token per line, and
the line index would be the index of corresponding token.
Args:
filepath (str): the path of file to construct vocabulary.
unk_token (str, optional): special token for unknown token. If no
need, it also could be None. Default: None.
pad_token (str, optional): special token for padding token. If no
need, it also could be None. Default: None.
bos_token (str, optional): special token for bos token. If no need,
it also could be None. Default: None.
eos_token (str, optional): special token for eos token. If no need,
it also could be None. Default: None.
kwargs (dict): Keyword arguments ending with `_token`. It can be
used to specify further special tokens that will be exposed as
attribute of the vocabulary and associated with an index.
Returns:
Vocab: An instance of :class:`Vocab` generated from the given file.
Example:
.. code-block:: python
from paddlenlp.data import Vocab
# The vocab file. The sample file can be downloaded firstly.
# wget https://paddlenlp.bj.bcebos.com/data/senta_word_dict.txt
vocab_file_path = './senta_word_dict.txt'
# Initialize the Vocab
vocab = Vocab.load_vocabulary(
vocab_file_path,
unk_token='[UNK]',
pad_token='[PAD]')
print(len(vocab))
# 1256608
"""
token_to_idx = {}
with io.open(filepath, 'r', encoding='utf-8') as f:
for index, line in enumerate(f):
token = line.rstrip('\n')
token_to_idx[token] = int(index)
vocab = Vocab.from_dict(
token_to_idx,
unk_token=unk_token,
pad_token=pad_token,
bos_token=bos_token,
eos_token=eos_token,
**kwargs)
return vocab
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
from abc import ABC
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.fluid.data_feeder import convert_dtype
from paddle.fluid.layers.utils import map_structure
__all__ = ["GenerationMixin"]
class BeamHypotheses:
def __init__(self, num_beams, length_penalty, early_stopping):
"""
Initialize n-best list of hypotheses.
"""
self.length_penalty = length_penalty
self.early_stopping = early_stopping
self.num_beams = num_beams
self.beams = []
self.worst_score = 1e9
def __len__(self):
"""
Number of hypotheses in the list.
"""
return len(self.beams)
def add(self, hyp, sum_logprobs, origin_len=0):
"""
Add a new hypothesis to the list.
"""
score = sum_logprobs / (((hyp.shape[-1] - origin_len + 5) / 6)
**self.length_penalty)
if len(self) < self.num_beams or score > self.worst_score:
self.beams.append((score, hyp))
if len(self) > self.num_beams:
sorted_next_scores = sorted(
[(s, idx) for idx, (s, _) in enumerate(self.beams)])
del self.beams[sorted_next_scores[0][1]]
self.worst_score = sorted_next_scores[1][0]
else:
self.worst_score = min(score, self.worst_score)
def is_done(self, best_sum_logprobs, cur_len, origin_len=0):
"""
If there are enough hypotheses and that none of the hypotheses being
generated can become better than the worst one in the heap, then we
are done with this sentence.
"""
if len(self) < self.num_beams:
return False
elif self.early_stopping:
return True
else:
cur_score = best_sum_logprobs / (
(cur_len - origin_len + 5) / 6)**self.length_penalty
ret = self.worst_score >= cur_score
return ret
class BeamSearchScorer(object):
"""
implementing standard beam search decoding.
"""
def __init__(self,
batch_size,
max_length,
num_beams,
length_penalty=1.0,
do_early_stopping=False,
num_beam_hyps_to_keep=1,
num_beam_groups=1):
self.max_length = max_length
self.num_beams = num_beams
self.length_penalty = length_penalty
self.do_early_stopping = do_early_stopping
self.num_beam_hyps_to_keep = num_beam_hyps_to_keep
self.num_beam_groups = num_beam_groups
self.group_size = self.num_beams // self.num_beam_groups
self._is_init = False
self._beam_hyps = [
BeamHypotheses(
num_beams=self.num_beams,
length_penalty=self.length_penalty,
early_stopping=self.do_early_stopping)
for _ in range(batch_size)
]
self._done = paddle.to_tensor(
[0 for _ in range(batch_size)], dtype='int64')
if not isinstance(num_beams, int) or num_beams <= 1:
raise ValueError(
"`num_beams` has to be an integer strictly greater than 1, but "
"received {}. For `num_beams` == 1, one should make use of "
"`greedy_search` instead.".format(num_beams))
if not isinstance(num_beam_groups, int) or (
num_beam_groups > num_beams) or (
num_beams % num_beam_groups != 0):
raise ValueError(
"`num_beam_groups` has to be an integer smaller or equal than "
"`num_beams` and `num_beams` has to be divisible by "
"`num_beam_groups`, but received num_beam_groups={}, num_beams="
"{}.".format(num_beam_groups, num_beams))
@property
def is_done(self):
return paddle.min(self._done) == 1
def process(self,
input_ids,
next_scores,
next_tokens,
next_indices,
origin_len=0,
pad_token_id=None,
eos_token_id=None):
cur_len = input_ids.shape[-1]
batch_size = len(self._beam_hyps)
assert batch_size == (input_ids.shape[0] // self.group_size)
next_beam_scores = paddle.zeros(
[batch_size, self.group_size], dtype=next_scores.dtype)
next_beam_tokens = paddle.zeros(
[batch_size, self.group_size], dtype=next_tokens.dtype)
next_beam_indices = paddle.zeros(
[batch_size, self.group_size], dtype=next_indices.dtype)
for batch_idx, beam_hyp in enumerate(self._beam_hyps):
if self._done[batch_idx] == 1:
assert (
len(beam_hyp) >= self.num_beams
), "Batch can only be done if at least {} beams have been generated".format(
self.num_beams)
assert (
eos_token_id is not None and pad_token_id is not None
), "generated beams >= num_beams -> eos_token_id and pad_token have to be defined"
# pad the batch
next_beam_scores[batch_idx, :] = 0
next_beam_tokens[batch_idx, :] = pad_token_id
next_beam_indices[batch_idx, :] = 0
continue
# next tokens for this sentence
beam_idx = 0
for beam_token_rank, (next_token, next_score,
next_index) in enumerate(
zip(next_tokens[batch_idx], next_scores[
batch_idx], next_indices[batch_idx])):
batch_beam_idx = batch_idx * self.group_size + next_index
# add to generated hypotheses if end of sentence
if (eos_token_id is not None) and (
next_token.numpy().item() == eos_token_id):
# If beam_token does not belong to top num_beams tokens,
# it should not be added
is_beam_token_worse_than_top_num_beams = (
beam_token_rank >= self.group_size)
if is_beam_token_worse_than_top_num_beams:
continue
beam_hyp.add(
input_ids[batch_beam_idx.numpy().item()].clone(),
next_score.numpy().item(), origin_len)
else:
# add next predicted token since it is not eos_token
next_beam_scores[batch_idx, beam_idx] = next_score
next_beam_tokens[batch_idx, beam_idx] = next_token.numpy(
).item()
next_beam_indices[batch_idx,
beam_idx] = batch_beam_idx.numpy().item()
beam_idx += 1
# once the beam for next step is full, don't add more tokens to it.
if beam_idx == self.group_size:
break
if beam_idx < self.group_size:
raise ValueError(
"At most {} tokens in `next_tokens[batch_idx]` can be equal "
"to `eos_token_id: {}`. Make sure `next_tokens[batch_idx]` "
"are corrected.".format(self.group_size, eos_token_id))
# Check if we are done so that we can save a pad step if all(done)
if beam_hyp.is_done(next_scores[batch_idx].max().numpy().item(),
cur_len, origin_len):
self._done[batch_idx] = 1
return {
"next_beam_scores": next_beam_scores.reshape([-1]),
"next_beam_tokens": next_beam_tokens.reshape([-1]),
"next_beam_indices": next_beam_indices.reshape([-1])
}
def finalize(self,
input_ids,
final_beam_scores,
final_beam_tokens,
final_beam_indices,
pad_token_id=None,
eos_token_id=None):
batch_size = len(self._beam_hyps)
# finalize all open beam hypotheses and add to generated hypotheses
for batch_idx, beam_hyp in enumerate(self._beam_hyps):
if self._done[batch_idx] == 1:
continue
# all open beam hypotheses are added to the beam hypothesis
# beam hypothesis class automatically keeps the best beams
for beam_id in range(self.num_beams):
batch_beam_idx = batch_idx * self.num_beams + beam_id
final_score = final_beam_scores[batch_beam_idx].numpy().item()
final_tokens = input_ids[batch_beam_idx]
beam_hyp.add(final_tokens, final_score)
# select the best hypotheses
sent_lengths = paddle.zeros(
[batch_size * self.num_beam_hyps_to_keep], dtype=input_ids.dtype)
best = []
# retrieve best hypotheses
for i, beam_hyp in enumerate(self._beam_hyps):
sorted_hyps = sorted(beam_hyp.beams, key=lambda x: x[0])
for j in range(self.num_beam_hyps_to_keep):
best_score, best_hyp = sorted_hyps.pop()
sent_lengths[self.num_beam_hyps_to_keep * i + j] = len(best_hyp)
best.append([best_hyp, best_score])
# prepare for adding eos
sent_max_len = min(sent_lengths.max().numpy().item() + 1,
self.max_length)
decoded = paddle.zeros(
[batch_size * self.num_beam_hyps_to_keep, sent_max_len],
dtype=input_ids.dtype)
# shorter batches are padded if needed
if sent_lengths.min().numpy().item() != sent_lengths.max().numpy().item(
):
assert pad_token_id is not None, "`pad_token_id` has to be defined"
decoded[:, :] = pad_token_id
decoded_score = paddle.zeros(
[batch_size * self.num_beam_hyps_to_keep, 1])
# fill with hypotheses and eos_token_id if the latter fits in
for i, (hypo, score) in enumerate(best):
decoded[i, :sent_lengths[i].numpy().item()] = hypo.numpy()
decoded_score[i] = score
if sent_lengths[i] < self.max_length:
decoded[i, sent_lengths[i].numpy().item()] = eos_token_id
return decoded, decoded_score
class GenerationMixin(object):
r"""
This class implements the interface for generation task.
It's used as the base class of `paddlenlp.transformers.PretrainedModel
<https://paddlenlp.readthedocs.io/zh/latest/source/paddlenlp.transformers.model_utils.html>`__.
"""
@staticmethod
def prepare_input_ids_for_generation(bos_token_id, encoder_output=None):
batch_size = 1
if bos_token_id is None:
raise ValueError("`bos_token_id` should be defined when no "
"`input_ids` are provided.")
if encoder_output is not None:
batch_size = encoder_output.shape[0]
return paddle.ones([batch_size, 1], dtype="int64") * bos_token_id
@staticmethod
def prepare_attention_mask_for_generation(input_ids, pad_token_id,
eos_token_id):
is_pad_token_in_inputs_ids = (pad_token_id is not None) and paddle.any(
input_ids == pad_token_id).numpy().item()
is_pad_token_not_equal_to_eos_token_id = (eos_token_id is None) or (
(eos_token_id is not None) and (pad_token_id != eos_token_id))
if is_pad_token_in_inputs_ids and is_pad_token_not_equal_to_eos_token_id:
attention_mask = (input_ids == pad_token_id
).astype(paddle.get_default_dtype()) * -1e9
else:
attention_mask = paddle.zeros_like(
input_ids, dtype=paddle.get_default_dtype())
return paddle.unsqueeze(attention_mask, axis=[1, 2])
@staticmethod
def get_logits_processor(min_length=None,
eos_token_id=None,
repetition_penalty=None):
processors = LogitsProcessorList()
if min_length is not None and eos_token_id is not None and min_length > -1:
processors.append(
MinLengthLogitsProcessor(min_length, eos_token_id))
if repetition_penalty is not None and repetition_penalty != 1.0:
processors.append(
RepetitionPenaltyLogitsProcessor(penalty=repetition_penalty))
# TODO
# Add more pre_processing for distribution
return processors
@staticmethod
def expand_inputs_for_generation(input_ids,
expand_size,
attention_mask=None,
**model_kwargs):
index = paddle.tile(
paddle.arange(input_ids.shape[0]).unsqueeze(-1),
[1, expand_size]).reshape([-1])
input_ids = paddle.index_select(input_ids, index)
if attention_mask is not None:
model_kwargs["attention_mask"] = paddle.index_select(attention_mask,
index)
if "token_type_ids" in model_kwargs:
token_type_ids = model_kwargs["token_type_ids"]
model_kwargs["token_type_ids"] = paddle.index_select(token_type_ids,
index)
if "position_ids" in model_kwargs:
position_ids = model_kwargs["position_ids"]
model_kwargs["position_ids"] = paddle.index_select(position_ids,
index)
if "seq_len" in model_kwargs:
seq_len = model_kwargs["seq_len"]
model_kwargs["seq_len"] = paddle.index_select(seq_len, index)
if "encoder_output" in model_kwargs:
encoder_output = model_kwargs["encoder_output"]
model_kwargs["encoder_output"] = paddle.index_select(encoder_output,
index)
return input_ids, model_kwargs
@staticmethod
def update_model_kwargs_for_generation(outputs,
model_kwargs,
is_encoder_decoder=False):
# Update the model inputs during generation.
# Note that If `token_type_ids` and `attention_mask` in `model_kwargs`
# and they contain pad value, the result vectors updated by this method
# may be different from expected. In this case, you need to rewrite the
# method.
# update cache
if isinstance(outputs, tuple):
model_kwargs["cache"] = outputs[1]
# update token_type_ids with last value
if "token_type_ids" in model_kwargs:
token_type_ids = model_kwargs["token_type_ids"]
model_kwargs["token_type_ids"] = paddle.concat(
[token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], axis=-1)
# update position_ids
if "position_ids" in model_kwargs:
position_ids = model_kwargs["position_ids"]
model_kwargs["position_ids"] = paddle.concat(
[position_ids, position_ids[:, -1].reshape((-1, 1)) + 1],
axis=-1)
# update attention_mask
if not is_encoder_decoder and "attention_mask" in model_kwargs:
attention_mask = model_kwargs["attention_mask"]
# nn.Pad2D don't support the data type `bool`
if convert_dtype(attention_mask.dtype) == 'bool':
attention_mask = paddle.cast(attention_mask, 'int64')
attention_mask = nn.Pad2D(
[0, 0, 0, 1], mode='replicate')(attention_mask)
attention_mask = nn.Pad2D([0, 1, 0, 0], value=-1e9)(attention_mask)
dtype = convert_dtype(attention_mask.dtype)
if 'int' in dtype:
attention_mask[:, :, -1, -1] = 1
elif 'float' in dtype:
attention_mask[:, :, -1, -1] = 0.0
else:
raise ValueError('The data type of input `attention_mask` must '
'be bool, int or float')
model_kwargs["attention_mask"] = attention_mask
return model_kwargs
@staticmethod
def update_scores_for_generation(scores, next_scores, length,
unfinished_flag):
# update scores
unfinished_scores = (scores * length + next_scores) / (length + 1)
scores = paddle.where(unfinished_flag, unfinished_scores, scores)
return scores
def prepare_encoder_decoder_kwargs_for_generation(self, input_ids,
model_kwargs):
if "encoder_output" not in model_kwargs:
# retrieve encoder hidden states
encoder = self.get_encoder()
encoder_kwargs = {
argument: value
for argument, value in model_kwargs.items()
if not (argument.startswith("decoder_") or argument.startswith(
"cross_attn"))
}
model_kwargs["encoder_output"] = encoder(input_ids,
**encoder_kwargs)
return model_kwargs
def prepare_inputs_for_generation(self, input_ids, **kwargs):
# Implement in subclasses for custom behavior to prepare inputs in the
# generate method.
return {"input_ids": input_ids}
def adjust_logits_during_generation(self, logits):
# Implement in subclasses for custom behavior to adjust the logits in
# the generate method.
return logits
@paddle.no_grad()
def generate(self,
input_ids=None,
max_length=20,
min_length=0,
decode_strategy='greedy_search',
temperature=1.0,
top_k=0,
top_p=1.0,
repetition_penalty=1.0,
num_beams=1,
length_penalty=0.0,
early_stopping=False,
bos_token_id=None,
eos_token_id=None,
pad_token_id=None,
num_return_sequences=1,
diversity_rate=0.0,
use_cache=True,
**model_kwargs):
r"""
The interface for generation task. This method can generate sequences
by using decoding strategy. Currently, there are three decoding
strategies supported: "greedy_search", "sampling" and "beam_search".
Args:
input_ids (Tensor, optional): The input sequence ids for the
generation. It is a Tensor with shape [batch_size, sequence_length].
The data type should be int32 or int64. Default to None, which
we will initialize it as a Tensor with shape [1, 1], filled
with the value `bos_token_id`.
max_length (int, optional): The maximum length of the sequence to
be generated. Default to 20.
min_length (int, optional): The minimum length of the sequence to
be generated. Default to 0.
decode_strategy (str, optional): The decoding strategy in generation.
Currently, there are three decoding strategies supported:
"greedy_search", "sampling" and "beam_search". Default to
"greedy_search".
temperature (float, optional): The value used to module the next
token probabilities in the "sampling" strategy. Default to 1.0,
which means no effect.
top_k (int, optional): The number of highest probability tokens to
keep for top-k-filtering in the "sampling" strategy. Default to
0, which means no effect.
top_p (float, optional): The cumulative probability for
top-p-filtering in the "sampling" strategy. The value should
satisfy :math:`0 <= top\_p < 1`. Default to 1.0, which means no
effect.
repetition_penalty (float, optional):
The parameter for repetition penalty. 1.0 means no penalty. See `this paper
<https://arxiv.org/pdf/1909.05858.pdf>`__ for more details. Defaults to 1.0.
num_beams (int, optional): The number of beams in the "beam_search"
strategy. Default to 1.
length_penalty (float, optional): The exponential penalty to the
sequence length in the "beam_search" strategy. The larger this
param is, the more that the model would generate shorter
sequences. Default to 0.0, which means no penalty.
early_stopping (bool, optional): Whether to stop searching in the
"beam_search" strategy when at least `num_beams` sentences are
finished per batch or not. Default to False.
bos_token_id (int, optional): The id of the `bos_token`. Default to
None.
eos_token_id (int, optional): The id of the `eos_token`. Default to
None.
pad_token_id (int, optional): The id of the `pad_token`. Default to
None.
num_return_sequences (int, optional): The number of returned
sequences for each sequence in the batch. Default to 1.
diversity_rate (float, optional): The diversity_rate for diverse
siblings search. See this paper for more details.
`https://arxiv.org/abs/1611.08562`.
use_cache: (bool, optional): Whether or not use the model cache to
speed up decoding. Default to True.
model_kwargs (dict): It can be used to specify additional kwargs
passed to the model.
Returns:
tuple[Tensor]: It is a tuple contains two elements: ids and scores.
Each element is a Tensor.
With the fields:
- ids (Tensor):
The ids of the generated sequences. It is a Tensor with shape
[batch_size * num_return_sequences, sequence_length]. The data
type is same as the input `input_ids`.
- scores (Tensor):
The scores of the generated sequences. It is a Tensor with shape
[batch_size * num_return_sequences, 1]. The data type is float32
or float64, which is the same as the parameters in the model.
Example:
.. code-block::
import paddle
from paddlenlp.transformers import (
UnifiedTransformerLMHeadModel,
UnifiedTransformerTokenizer
)
paddle.seed(2)
# Initialize the model and tokenizer
model_name_or_path = 'unified_transformer-12L-cn-luge'
model = UnifiedTransformerLMHeadModel.from_pretrained(model_name_or_path)
tokenizer = UnifiedTransformerTokenizer.from_pretrained(model_name_or_path)
# Prepare the model inputs.
history = "早上好,今天空气质量不错。"
inputs = tokenizer.dialogue_encode(history, task_type='chitchat',
add_start_token_as_response=True, return_tensors=True)
.. code-block::
# Generate the sequence by using "greedy_search" strategy
ids, scores = model.generate(
input_ids=inputs['input_ids'],
token_type_ids=inputs['token_type_ids'],
position_ids=inputs['position_ids'],
attention_mask=inputs['attention_mask'],
decode_strategy="greedy_search")
print(ids.shape, scores.shape)
# [1, 3] [1, 1]
sequence_ids = ids.numpy().tolist()[0]
sequence_ids = sequence_ids[:sequence_ids.index(tokenizer.sep_token_id)]
response = tokenizer.convert_ids_to_string(sequence_ids, keep_space=False)
print(response)
# 是的
.. code-block::
# Generate 2 sequences by using "sampling" strategy (top_k=5)
ids, scores = model.generate(
input_ids=inputs['input_ids'],
token_type_ids=inputs['token_type_ids'],
position_ids=inputs['position_ids'],
attention_mask=inputs['attention_mask'],
decode_strategy="sampling",
top_k=5,
num_return_sequences=2)
print(ids.shape, scores.shape)
# [2, 7] [2, 1]
response = []
for sequence_ids in ids.numpy().tolist():
sequence_ids = sequence_ids[:sequence_ids.index(tokenizer.sep_token_id)]
text = tokenizer.convert_ids_to_string(sequence_ids, keep_space=False)
response.append(text)
print(response)
# ['天气好,心情也好', '你也是']
.. code-block::
# Generate 2 sequences by using "beam_search" strategy (num_beams=5)
ids, scores = model.generate(
input_ids=inputs['input_ids'],
token_type_ids=inputs['token_type_ids'],
position_ids=inputs['position_ids'],
attention_mask=inputs['attention_mask'],
decode_strategy="beam_search",
num_beams=5,
num_return_sequences=2)
print(ids.shape, scores.shape)
# [2, 3] [2, 1]
response = []
for sequence_ids in ids.numpy().tolist():
sequence_ids = sequence_ids[:sequence_ids.index(tokenizer.sep_token_id)]
text = tokenizer.convert_ids_to_string(sequence_ids, keep_space=False)
response.append(text)
print(response)
# ['是的', '嗯嗯']
"""
# params check
bos_token_id = bos_token_id if bos_token_id is not None else getattr(
self, 'bos_token_id', None)
eos_token_id = eos_token_id if eos_token_id is not None else getattr(
self, 'eos_token_id', None)
pad_token_id = pad_token_id if pad_token_id is not None else getattr(
self, 'pad_token_id', None)
if input_ids is None:
# Init `input_ids` with bos_token_id
input_ids = self.prepare_input_ids_for_generation(bos_token_id)
if model_kwargs.get("attention_mask", None) is None:
# TODO
# Init `attention_mask` depending on `pad_token_id`
model_kwargs[
"attention_mask"] = self.prepare_attention_mask_for_generation(
input_ids, pad_token_id, eos_token_id)
self.is_encoder_decoder = hasattr(self, 'encoder') and hasattr(
self, 'decoder')
if self.is_encoder_decoder:
model_kwargs = self.prepare_encoder_decoder_kwargs_for_generation(
input_ids, model_kwargs)
# set input_ids as decoder_input_ids
if "decoder_input_ids" in model_kwargs:
input_ids = model_kwargs.pop("decoder_input_ids")
else:
input_ids = self.prepare_input_ids_for_generation(
bos_token_id, model_kwargs["encoder_output"])
if pad_token_id is None and eos_token_id is not None:
print("Setting `pad_token_id` to `eos_token_id`:{} for "
"open-end generation.".format(eos_token_id))
pad_token_id = eos_token_id
model_kwargs["use_cache"] = use_cache
max_length += input_ids.shape[-1]
min_length += input_ids.shape[-1]
logits_processors = self.get_logits_processor(min_length, eos_token_id,
repetition_penalty)
if decode_strategy == 'greedy_search':
if num_return_sequences > 1:
raise ValueError(
"`num_return_sequences` has to be 1, but is {} "
"when doing greedy search.".format(num_return_sequences))
return self.greedy_search(input_ids, logits_processors, max_length,
pad_token_id, eos_token_id,
**model_kwargs)
elif decode_strategy == 'sampling':
if num_return_sequences > 1:
input_ids, model_kwargs = self.expand_inputs_for_generation(
input_ids, expand_size=num_return_sequences, **model_kwargs)
return self.sample(input_ids, logits_processors, max_length,
pad_token_id, eos_token_id, top_k, top_p,
temperature, **model_kwargs)
elif decode_strategy == 'beam_search':
batch_size = input_ids.shape[0]
if num_return_sequences > num_beams:
raise ValueError(
"`num_return_sequences` has to be smaller or equal to "
"`num_beams`. But received `num_return_sequences` is {}, "
"`num_beams` is {}".format(num_return_sequences, num_beams))
if num_beams <= 1:
raise ValueError(
"`num_beams` has to be bigger than 1. But received "
"`num_beams` is {}. If `num_beams` is 1, `decode_strategy` "
"should be 'greedy_search'".format(num_beams))
beam_scorer = BeamSearchScorer(
batch_size=batch_size,
max_length=max_length,
num_beams=num_beams,
length_penalty=length_penalty,
do_early_stopping=early_stopping,
num_beam_hyps_to_keep=num_return_sequences)
input_ids, model_kwargs = self.expand_inputs_for_generation(
input_ids, expand_size=num_beams, **model_kwargs)
return self.beam_search(input_ids, beam_scorer, logits_processors,
max_length, diversity_rate, pad_token_id,
eos_token_id, **model_kwargs)
else:
raise ValueError(
'`decode_strategy` must be one of "greedy_search", "sampling" '
'and "beam_search".')
def greedy_search(self, input_ids, logits_processors, max_length,
pad_token_id, eos_token_id, **model_kwargs):
batch_size, cur_len = input_ids.shape
origin_len = cur_len
unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool')
scores = paddle.full(
[batch_size, 1], 0.0, dtype=paddle.get_default_dtype())
while cur_len < max_length:
# prepare model inputs & get model output
model_inputs = self.prepare_inputs_for_generation(input_ids,
**model_kwargs)
outputs = self(**model_inputs)
logits = outputs[0] if isinstance(outputs, tuple) else outputs
# [batch_size, vocab_size]
logits = logits[:, -1, :]
# pre-process distribution
logits = self.adjust_logits_during_generation(logits)
logits = logits_processors(input_ids, logits)
# greedy
probs = F.softmax(logits)
probs = paddle.log(probs)
next_tokens = paddle.argmax(probs, axis=-1).unsqueeze(-1)
next_scores = paddle.index_sample(probs, next_tokens)
if eos_token_id is not None:
next_tokens = paddle.where(unfinished_flag, next_tokens,
paddle.full_like(next_tokens,
pad_token_id))
scores = self.update_scores_for_generation(
scores, next_scores, cur_len - origin_len, unfinished_flag)
cur_len += 1
input_ids = paddle.concat([input_ids, next_tokens], axis=1)
if eos_token_id is not None:
unfinished_flag = paddle.logical_and(
unfinished_flag, next_tokens != eos_token_id)
# Stop when there is a </s> in all sentences
if not paddle.any(unfinished_flag):
break
model_kwargs = self.update_model_kwargs_for_generation(
outputs,
model_kwargs,
is_encoder_decoder=self.is_encoder_decoder)
return input_ids[:, origin_len:], scores
def sample(self,
input_ids,
logits_processors,
max_length,
pad_token_id,
eos_token_id,
top_k=None,
top_p=None,
temperature=None,
min_tokens_to_keep=1,
**model_kwargs):
def TopKProcess(probs, top_k, min_tokens_to_keep):
top_k = min(max(top_k, min_tokens_to_keep), probs.shape[-1])
# Remove all tokens with a probability less than the last token of the top-k
topk_probs, _ = paddle.topk(probs, k=top_k)
probs = paddle.where(probs >= topk_probs[:, -1:], probs,
paddle.full_like(probs, 0.0))
return probs
def TopPProcess(probs, top_p, min_tokens_to_keep):
sorted_probs = paddle.sort(probs, descending=True)
sorted_indices = paddle.argsort(probs, descending=True)
cumulative_probs = paddle.cumsum(sorted_probs, axis=-1)
# Remove tokens with cumulative probs above the top_p, But keep at
# least min_tokens_to_keep tokens
sorted_indices_to_remove = cumulative_probs > top_p
if min_tokens_to_keep > 1:
# Set 'min_tokens_to_keep - 1' because the first token is kept
sorted_indices_to_remove[:, :min_tokens_to_keep - 1] = 0
# Keep the first token
sorted_indices_to_remove = paddle.cast(
sorted_indices_to_remove, dtype='int64')
sorted_indices_to_remove[:, 1:] = (
sorted_indices_to_remove[:, :-1].clone())
sorted_indices_to_remove[:, 0] = 0
# Scatter sorted tensors to original indexing
sorted_indices = sorted_indices + paddle.arange(probs.shape[
0]).unsqueeze(-1) * probs.shape[-1]
condition = paddle.scatter(sorted_indices_to_remove.flatten(),
sorted_indices.flatten(),
sorted_indices_to_remove.flatten())
condition = paddle.cast(condition, 'bool').reshape(probs.shape)
probs = paddle.where(condition, paddle.full_like(probs, 0.0), probs)
return probs
batch_size, cur_len = input_ids.shape
origin_len = cur_len
unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool')
scores = paddle.full(
[batch_size, 1], 0.0, dtype=paddle.get_default_dtype())
while cur_len < max_length:
# prepare model inputs & get model output
model_inputs = self.prepare_inputs_for_generation(input_ids,
**model_kwargs)
outputs = self(**model_inputs)
logits = outputs[0] if isinstance(outputs, tuple) else outputs
# [batch_size, vocab_size]
logits = logits[:, -1, :]
# pre-process distribution
logits = self.adjust_logits_during_generation(logits)
logits = logits_processors(input_ids, logits)
# sample
origin_probs = F.softmax(logits)
origin_probs = paddle.log(origin_probs)
if temperature is not None and temperature != 1.0:
logits = logits / temperature
probs = F.softmax(logits)
if top_k is not None and top_k != 0:
probs = TopKProcess(probs, top_k, min_tokens_to_keep)
if top_p is not None and top_p < 1.0:
probs = TopPProcess(probs, top_p, min_tokens_to_keep)
next_tokens = paddle.multinomial(probs)
next_scores = paddle.index_sample(origin_probs, next_tokens)
if eos_token_id is not None:
next_tokens = paddle.where(unfinished_flag, next_tokens,
paddle.full_like(next_tokens,
pad_token_id))
scores = self.update_scores_for_generation(
scores, next_scores, cur_len - origin_len, unfinished_flag)
cur_len += 1
input_ids = paddle.concat([input_ids, next_tokens], axis=1)
if eos_token_id is not None:
unfinished_flag = paddle.logical_and(
unfinished_flag, next_tokens != eos_token_id)
# Stop when there is a </s> in all sentences
if not paddle.any(unfinished_flag):
break
model_kwargs = self.update_model_kwargs_for_generation(
outputs,
model_kwargs,
is_encoder_decoder=self.is_encoder_decoder)
return input_ids[:, origin_len:], scores
def beam_search(self, input_ids, beam_scorer, logits_processors, max_length,
diversity_rate, pad_token_id, eos_token_id, **model_kwargs):
batch_size = len(beam_scorer._beam_hyps)
num_beams = beam_scorer.num_beams
batch_beam_size, cur_len = input_ids.shape
origin_len = cur_len
assert (
num_beams * batch_size == batch_beam_size
), "Batch dimension of `input_ids` should be {}, but received {}.".format(
num_beams * batch_size, batch_beam_size)
beam_scores = paddle.zeros(
(batch_size, num_beams), dtype=paddle.get_default_dtype())
beam_scores[:, 1:] = -1e9
beam_scores = paddle.reshape(beam_scores, [-1])
while cur_len < max_length:
# prepare model inputs & get model output
model_inputs = self.prepare_inputs_for_generation(input_ids,
**model_kwargs)
outputs = self(**model_inputs)
logits = outputs[0] if isinstance(outputs, tuple) else outputs
# [batch_size, vocab_size]
logits = logits[:, -1, :]
# pre-process distribution
logits = self.adjust_logits_during_generation(logits)
logits = logits_processors(input_ids, logits)
# beam search
# [batch_size * num_beams, vocab_size]
next_scores = F.softmax(logits)
next_scores = paddle.log(next_scores)
next_scores = next_scores + beam_scores.unsqueeze(-1)
vocab_size = next_scores.shape[-1]
if diversity_rate == 0.0:
# reshape for beam search
next_scores = next_scores.reshape(
[batch_size, num_beams * vocab_size])
next_scores, next_tokens = paddle.topk(
next_scores, 2 * num_beams, axis=1)
next_indices = next_tokens // vocab_size
else:
next_scores, next_tokens = paddle.topk(
next_scores, 2 * num_beams, axis=1)
sibling_score = paddle.tile(
paddle.arange(1, 2 * num_beams + 1),
repeat_times=[batch_size * num_beams, 1]) * diversity_rate
diversed_score = next_scores - sibling_score
next_scores = next_scores.reshape(
[batch_size, 2 * num_beams * num_beams])
next_tokens = next_tokens.reshape(
[batch_size, 2 * num_beams * num_beams])
diversed_score = diversed_score.reshape(
[batch_size, 2 * num_beams * num_beams])
diversed_score, diversed_tokens = paddle.topk(
diversed_score, 2 * num_beams, axis=1)
# TODO
# Use gather_nd() to select origan token and score
next_scores = paddle.stack([
paddle.index_select(next_scores[i], diversed_tokens[i])
for i in range(next_scores.shape[0])
])
next_tokens = paddle.stack([
paddle.index_select(next_tokens[i], diversed_tokens[i])
for i in range(next_tokens.shape[0])
])
next_indices = next_tokens // (2 * num_beams)
next_tokens = next_tokens % vocab_size
# stateless
beam_outputs = beam_scorer.process(
input_ids,
next_scores,
next_tokens,
next_indices,
origin_len=origin_len,
pad_token_id=pad_token_id,
eos_token_id=eos_token_id, )
beam_scores = beam_outputs["next_beam_scores"]
beam_next_tokens = beam_outputs["next_beam_tokens"]
beam_idx = beam_outputs["next_beam_indices"]
cur_len += 1
input_ids = paddle.concat(
[
paddle.index_select(input_ids, beam_idx),
beam_next_tokens.unsqueeze(-1)
],
axis=-1)
if beam_scorer.is_done:
break
model_kwargs = self.update_model_kwargs_for_generation(
outputs,
model_kwargs,
is_encoder_decoder=self.is_encoder_decoder)
if model_kwargs["cache"] is not None:
# reorder the cache
model_kwargs["cache"] = map_structure(
lambda x: paddle.index_select(x, beam_idx),
model_kwargs["cache"])
pred_ids, scores = beam_scorer.finalize(
input_ids,
beam_scores,
next_tokens,
next_indices,
pad_token_id=pad_token_id,
eos_token_id=eos_token_id)
return pred_ids[:, origin_len:], scores
class LogitsProcessorList(List):
def __call__(self, input_ids, logits):
for processor in self:
logits = processor(input_ids, logits)
return logits
class LogitsProcessor(ABC):
"""
Abstract base class for all logit processors that can be applied during
generation.
"""
def __call__(self, input_ids, logits):
raise NotImplementedError(
f"{self.__class__} is an abstract class. "
"Only classes inheriting this class can be called.")
class MinLengthLogitsProcessor(LogitsProcessor):
r"""
Enforcing a min-length by setting EOS probability to 0.
Args:
min_length (int): The minimum length of generation sequence.
eos_token_id (int): The id of the `end-of-sequence` token.
"""
def __init__(self, min_length, eos_token_id):
if not isinstance(min_length, int) or min_length < 0:
raise ValueError(
"`min_length` should be a positive integer, but get {}".format(
min_length))
if not isinstance(eos_token_id, int) or eos_token_id < 0:
raise ValueError(
"`eos_token_id` should be a positive integer, but get {}".
format(eos_token_id))
self.min_length = min_length
self.eos_token_id = eos_token_id
def __call__(self, input_ids, logits):
cur_len = input_ids.shape[-1]
if cur_len < self.min_length:
logits[:, self.eos_token_id] = -1e9
return logits
class RepetitionPenaltyLogitsProcessor(LogitsProcessor):
r"""
Enforcing an exponential penalty on repeated sequences.
Args:
repetition_penalty (float):
The parameter for repetition penalty. 1.0 means no penalty. See `this paper
<https://arxiv.org/pdf/1909.05858.pdf>`__ for more details.
"""
def __init__(self, penalty: float):
if not isinstance(penalty, float) or not (penalty > 0):
raise ValueError(
f"`penalty` has to be a strictly positive float, but is {penalty}"
)
self.penalty = penalty
def __call__(self, input_ids, logits):
score = paddle.index_sample(logits, input_ids)
score = paddle.where(score < 0, score * self.penalty,
score / self.penalty)
input_ids = input_ids + paddle.arange(logits.shape[0]).unsqueeze(
-1) * logits.shape[-1]
outputs = paddle.scatter(logits.flatten(),
input_ids.flatten(),
score.flatten()).reshape(logits.shape)
return outputs
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import pickle
def load_tf_checkpoint(input_path, output_path=None):
import tensorflow as tf
tf_path = os.path.abspath(input_path)
print("Converting TensorFlow checkpoint from {}".format(tf_path))
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
print("Loading TF weight {} with shape {}".format(name, shape))
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)
assert len(set(names)) == len(names)
assert len(names) == len(arrays)
name_to_array = dict(zip(names, arrays))
if output_path:
print("Save TF numpy weight to {}".format(output_path))
save_pickled_tf_checkpoint(name_to_array, output_path)
return name_to_array
def load_pickled_tf_checkpoint(input_path):
with open(input_path, "rb") as f:
return pickle.load(f)
def save_pickled_tf_checkpoint(name_to_array, output_path):
with open(output_path, "wb") as f:
pickle.dump(name_to_array, f, protocol=2)
if __name__ == "__main__":
#input_path = "/data/zengjinle/dataset/bert_data/phase1/model.ckpt-28252"
#output_path = "tf_ckpt.pickle"
assert len(sys.argv) == 3
input_path = sys.argv[1]
output_path = sys.argv[2]
load_tf_checkpoint(input_path, output_path)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from mlperf_logging import mllog
mllogger = mllog.get_mllogger()
def _paddle_bert_print(logger,
key,
val=None,
metadata=None,
stack_offset=3,
namespace="paddle_mlperf"):
logger(
key=key,
value=val,
metadata=metadata,
stack_offset=stack_offset,
namespace=namespace)
def paddle_bert_print_start(key, val=None, metadata=None):
_paddle_bert_print(mllogger.start, key, val, metadata)
def paddle_bert_print_end(key, val=None, metadata=None):
_paddle_bert_print(mllogger.end, key, val, metadata)
def paddle_bert_print_event(key, val=None, metadata=None):
_paddle_bert_print(mllogger.event, key, val, metadata)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import copy
import math
import numpy as np
import numbers
import json
import sys
import os
import paddle
import paddle.nn as nn
import paddle.tensor as tensor
import paddle.nn.functional as F
from paddle.nn import TransformerEncoder, Linear, Layer, Embedding, LayerNorm, Tanh
from paddle.nn import Layer, LayerList
from paddle.fluid.initializer import Constant
import utility
from bert_padding import generate_mask
#try:
# from custom_setup_ops import custom_fmha, custom_fused_dropout_residual_ln, custom_fused_dense
#except ImportError as e:
# print('custom_setup_ops import error: {}'.format(e))
from .load_tf_checkpoint import load_pickled_tf_checkpoint, save_pickled_tf_checkpoint
from .mlperf_logging_helper import paddle_bert_print_event
__all__ = [
'BertConfig',
'BertModel',
'BertForPretraining',
'BertPretrainingCriterion',
'BertPretrainingHeads',
]
use_nv_input = utility.use_nv_input()
GELU_APPROXIMATE = True
def get_activation():
return nn.GELU(approximate=GELU_APPROXIMATE)
# return nn.ReLU()
def mask_gather(var, mask):
return paddle.gather_nd(var, paddle.fluid.layers.where(mask))
def gen_pos_id(input_ids):
ones = paddle.ones_like(input_ids)
seq_length = paddle.cumsum(ones, axis=-1)
position_ids = seq_length - ones
position_ids.stop_gradient = True
return position_ids
def fuse_dense(x,
y,
bias,
transx=False,
transy=False,
with_gelu=False,
use_addto=False):
#out = custom_fused_dense(
# x=x, y=y, bias=bias, transx=transx, transy=transy, use_addto=use_addto)
# paddle.static.Print(x, message="print matmul x:")
# paddle.static.Print(y, message="print matmul y:")
#paddle.static.Print(transx, message="print matmul transx:")
#paddle.static.Print(transy, message="print matmul transy:")
# print("print matmul transx: ", transx)
# print("print matmul transy", transy)
# print("B.shape", bias.shape)
# os.environ["ROCBLAS_LAYER"] = "3"
# setenv_status = int(os.environ.get('ROCBLAS_LAYER', '3'))
out = paddle.matmul(x, y, transpose_x=transx, transpose_y=transy)
# os.environ["ROCBLAS_LAYER"] = "0"
# paddle.static.Print(out, message="print matmul out:")
# paddle.static.Print(bias, message="print matmul bias:")
#paddle.static.Print(out, message="print matmul out before bias:")
if bias is not None:
out = out + bias
# paddle.static.Print(out, message="print matmul out after bias:")
if with_gelu:
out = get_activation()(out)
# paddle.static.Print(out, message="print matmul out after bias:")
return out
class BertConfig(object):
"""Configuration class to store the configuration of a `BertModel`.
"""
def __init__(self, vocab_size_or_config_json_file, **kwargs):
"""Constructs BertConfig.
Args:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
the Transformer encoder.
intermediate_size: The size of the "intermediate" (i.e., feed-forward)
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.
max_position_embeddings: The maximum sequence length that this model might
ever be used with. Typically set this to something large just in case
(e.g., 512 or 1024 or 2048).
type_vocab_size: The vocabulary size of the `token_type_ids` passed into
`BertModel`.
initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices.
"""
if isinstance(vocab_size_or_config_json_file, str) or (
sys.version_info[0] == 2 and
isinstance(vocab_size_or_config_json_file, unicode)):
with open(
vocab_size_or_config_json_file, "r",
encoding='utf-8') as reader:
json_config = json.loads(reader.read())
assert not kwargs, "kwargs should be empty if config json file is provided"
self._fill_dict(json_config)
elif isinstance(vocab_size_or_config_json_file, int):
self.vocab_size = vocab_size_or_config_json_file
self._fill_dict(kwargs)
else:
raise ValueError(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)")
@classmethod
def from_dict(cls, json_object):
"""Constructs a `BertConfig` from a Python dictionary of parameters."""
config = BertConfig(vocab_size_or_config_json_file=-1)
config._fill_dict(json_object)
return config
@classmethod
def from_json_file(cls, json_file):
"""Constructs a `BertConfig` from a json file of parameters."""
with open(json_file, "r", encoding='utf-8') as reader:
text = reader.read()
return cls.from_dict(json.loads(text))
def __repr__(self):
return str(self.to_json_string())
def to_dict(self):
"""Serializes this instance to a Python dictionary."""
output = copy.deepcopy(self.__dict__)
return output
def to_json_string(self):
"""Serializes this instance to a JSON string."""
return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
def _fill_dict(self, kwargs=None):
defaults = {
"vocab_size": 30522,
"hidden_size": 768,
"num_hidden_layers": 12,
"num_attention_heads": 12,
"intermediate_size": 3072,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"attention_probs_dropout_prob": 0.1,
"max_position_embeddings": 512,
"type_vocab_size": 2,
"initializer_range": 0.02,
"pool_act": "tanh",
"pad_token_id": 0,
}
# fill defaults
for key, value in defaults.items():
if key not in self.__dict__:
self.__dict__[key] = value
# fill other values
if kwargs:
for key, value in kwargs.items():
self.__dict__[key] = value
assert self.pad_token_id == 0, "pad_token_id must be 0"
def transpose_2d(x):
assert len(x.shape) == 2
return np.ascontiguousarray(np.transpose(x, (1, 0)))
class TFCkptHelper:
def __init__(self, args, config, checkpoint_path, place):
self.pd_vars_to_tf_vars = collections.OrderedDict()
self.transpose_vars = set()
self.args = args
self.config = config
self.place = place
self.checkpoint_path = checkpoint_path
self._tf_vars = None
self.fuse_attn_qkv = self.args.unpad_fmha
self.attn_fused_qkv_weights = [
None for _ in range(self.config.num_hidden_layers)
]
self.attn_fused_qkv_biases = [
None for _ in range(self.config.num_hidden_layers)
]
@property
def tf_vars(self):
if self._tf_vars is None:
self._tf_vars = load_pickled_tf_checkpoint(self.checkpoint_path)
return self._tf_vars
def save(self, output_path, get_parameter_func):
tf_vars = {}
if self.fuse_attn_qkv:
for idx in range(self.config.num_hidden_layers):
prefix = self._enc_prefix(idx) + "attention/self/"
pd_weight_name = self.attn_fused_qkv_weights[idx]
pd_weight_var = self._get_fp32_param(pd_weight_name,
get_parameter_func)
assert len(pd_weight_var.shape) == 2
pd_bias_name = self.attn_fused_qkv_biases[idx]
pd_bias_var = self._get_fp32_param(pd_bias_name,
get_parameter_func)
assert len(pd_bias_var.shape) == 1
need_transpose = pd_weight_name in self.transpose_vars
if need_transpose: # split along dim 0 and transpose
assert pd_weight_var.shape[0] == 3 * pd_weight_var.shape[1]
assert pd_weight_var.shape[0] == pd_bias_var.shape[0]
tf_weights = np.split(pd_weight_var, 3, axis=0)
tf_weights = [transpose_2d(w) for w in tf_weights]
else: # split along dim 1
assert pd_weight_var.shape[1] == 3 * pd_weight_var.shape[0]
assert pd_weight_var.shape[1] == pd_bias_var.shape[0]
tf_weights = np.split(pd_weight_var, 3, axis=1)
tf_biases = np.split(pd_bias_var, 3, axis=0)
assert len(tf_weights) == 3
assert len(tf_biases) == 3
for i, name in enumerate(["query", "key", "value"]):
tf_var_name = prefix + name + "/"
weight_name = tf_var_name + "kernel"
bias_name = tf_var_name + "bias"
tf_vars[weight_name] = tf_weights[i]
tf_vars[bias_name] = tf_biases[i]
for pd_var_name, tf_var_name in self.pd_vars_to_tf_vars.items():
pd_var = self._get_fp32_param(pd_var_name, get_parameter_func)
if "output_weights" in tf_var_name or pd_var_name in self.transpose_vars:
pd_var = transpose_2d(pd_var)
tf_vars[tf_var_name] = pd_var
for key, tf_value in self.tf_vars.items():
if key not in tf_vars:
tf_vars[key] = tf_value
continue
pd_value = tf_vars[key]
if tf_value.shape == pd_value.shape:
continue
if key == 'bert/embeddings/word_embeddings':
assert len(tf_value.shape) == 2
assert len(pd_value.shape) == 2
assert tf_value.shape[1] == pd_value.shape[1]
pd_value = pd_value[0:tf_value.shape[0]]
elif key == 'cls/predictions/output_bias':
assert len(tf_value.shape) == 1
assert len(pd_value.shape) == 1
pd_value = pd_value[0:tf_value.shape[0]]
else:
raise ValueError("unsupported key {}".format(key))
tf_vars[key] = pd_value
return save_pickled_tf_checkpoint(tf_vars, output_path)
def load(self, get_parameter_func):
tf_vars = self.tf_vars
loaded_var_names = set()
if self.fuse_attn_qkv:
for idx in range(self.config.num_hidden_layers):
weights = []
biases = []
prefix = self._enc_prefix(idx) + "attention/self/"
pd_weight_name = self.attn_fused_qkv_weights[idx]
pd_bias_name = self.attn_fused_qkv_biases[idx]
need_transpose = pd_weight_name in self.transpose_vars
for name in ["query", "key", "value"]:
tf_var_name = prefix + name + "/"
weight_name = tf_var_name + "kernel"
bias_name = tf_var_name + "bias"
if utility.get_trainer_id() == 0:
paddle_bert_print_event(key='weights_initialization', metadata={'tensor':weight_name})
paddle_bert_print_event(key='weights_initialization', metadata={'tensor':bias_name})
if need_transpose:
weights.append(transpose_2d(tf_vars[weight_name]))
else:
weights.append(tf_vars[weight_name])
biases.append(tf_vars[bias_name])
loaded_var_names.add(weight_name)
loaded_var_names.add(bias_name)
weight = np.concatenate(
weights, axis=0 if need_transpose else 1)
bias = np.concatenate(biases)
weight_pd_vars = get_parameter_func(pd_weight_name)
bias_pd_vars = get_parameter_func(pd_bias_name)
self._set_var_value(
weight_pd_vars, weight,
self.attn_fused_qkv_weights[idx] + "/qkv/kernel",
prefix + "qkv/kernel")
self._set_var_value(
bias_pd_vars, bias,
self.attn_fused_qkv_biases[idx] + "/qkv/bias",
prefix + "qkv/bias")
for idx, (pd_var_name,
tf_var_name) in enumerate(self.pd_vars_to_tf_vars.items()):
if utility.get_trainer_id() == 0:
paddle_bert_print_event(key='weights_initialization', metadata={'tensor':tf_var_name})
var_value = tf_vars[tf_var_name]
if "output_weights" in tf_var_name or pd_var_name in self.transpose_vars:
if utility.get_trainer_id() == 0:
print('{} needs to transpose'.format(tf_var_name))
var_value = transpose_2d(var_value)
pd_vars = get_parameter_func(pd_var_name)
self._set_var_value(pd_vars, var_value, pd_var_name, tf_var_name)
loaded_var_names.add(tf_var_name)
left_var_names = set()
for var_name in self.pd_vars_to_tf_vars.values():
if var_name not in loaded_var_names:
left_var_names.add(var_name)
else:
loaded_var_names.remove(var_name)
if self.fuse_attn_qkv:
assert len(loaded_var_names
) == 6 * self.config.num_hidden_layers, loaded_var_names
else:
assert len(loaded_var_names) == 0, loaded_var_names
assert len(left_var_names) == 0, left_var_names
def _set_var_value(self, pd_vars, var_value, pd_var_name, tf_var_name):
if isinstance(pd_vars, (list, tuple)):
assert len(pd_vars) == 2
pd_var, master_pd_var = pd_vars
else:
pd_var = pd_vars
master_pd_var = None
pd_var_shape = tuple(pd_var.shape())
tf_var_shape = tuple(var_value.shape)
if pd_var_shape != tf_var_shape:
if utility.get_trainer_id() == 0:
print("{} vs {} shape differs: {} vs {}".format(
pd_var_name, tf_var_name, pd_var_shape, tf_var_shape))
assert len(pd_var_shape) == len(tf_var_shape)
slices = []
n = len(pd_var_shape)
for i in range(n):
assert pd_var_shape[i] >= tf_var_shape[i]
slices.append(slice(0, tf_var_shape[i], 1))
new_var_value = np.zeros(pd_var_shape, dtype=var_value.dtype)
new_var_value[slices] = var_value
var_value = new_var_value
if pd_var._dtype() == paddle.float16:
assert var_value.dtype == np.float32
if master_pd_var is not None:
assert master_pd_var._dtype() == paddle.float32
if utility.get_trainer_id() == 0:
print("Set master weight for {} {}".format(pd_var_name,
tf_var_name))
self._inplace_set_tensor(master_pd_var, var_value)
self._inplace_set_tensor(pd_var, var_value.astype(np.float16))
elif pd_var._dtype() == paddle.float32:
assert var_value.dtype == np.float32
assert master_pd_var is None
self._inplace_set_tensor(pd_var, var_value)
else:
raise TypeError("unsupported data type {}".format(pd_var._dtype()))
def _inplace_set_tensor(self, tensor, value):
old_ptr = tensor._ptr()
tensor.set(value, self.place)
new_ptr = tensor._ptr()
assert old_ptr == new_ptr
def _get_fp32_param(self, pd_var_name, get_parameter_func):
pd_var_name = self._to_pd_var_name(pd_var_name)
pd_vars = get_parameter_func(pd_var_name)
assert isinstance(pd_vars, (list, tuple))
assert len(pd_vars) == 2
pd_var, master_pd_var = pd_vars
if master_pd_var is not None:
assert pd_var._dtype() == paddle.float16
assert master_pd_var._dtype() == paddle.float32
assert pd_var.shape() == master_pd_var.shape()
return np.array(master_pd_var)
else:
assert pd_var._dtype() == paddle.float32
return np.array(pd_var)
def _enc_prefix(self, idx):
return "bert/encoder/layer_{}/".format(idx)
def _to_pd_var_name(self, var):
return var if isinstance(var, (str, bytes)) else var.name
def _record_pd_vars(self,
pd_vars,
tf_vars,
tf_var_prefix="",
weight_transpose=None):
if not isinstance(pd_vars, (list, tuple)):
pd_vars = [pd_vars]
pd_vars = [self._to_pd_var_name(v) for v in pd_vars]
if not isinstance(tf_vars, (list, tuple)):
tf_vars = [tf_vars]
tf_vars = [tf_var_prefix + v for v in tf_vars]
assert len(pd_vars) == len(tf_vars)
for pd_var, tf_var in zip(pd_vars, tf_vars):
assert pd_var not in self.pd_vars_to_tf_vars, pd_var
self.pd_vars_to_tf_vars[pd_var] = tf_var
if weight_transpose:
assert len(pd_vars) == 2
self.transpose_vars.add(pd_vars[0])
def embeddings(self, pd_vars):
return self._record_pd_vars(pd_vars, [
"word_embeddings", "position_embeddings", "token_type_embeddings"
], "bert/embeddings/")
def norm_after_embeddings(self, pd_vars):
return self._record_pd_vars(pd_vars, ["gamma", "beta"],
"bert/embeddings/LayerNorm/")
def enc_attn_query_fc(self, pd_vars, idx, weight_transpose=None):
assert not self.fuse_attn_qkv
prefix = self._enc_prefix(idx) + "attention/self/query/"
return self._record_pd_vars(pd_vars, ["kernel", "bias"], prefix,
weight_transpose)
def enc_attn_key_fc(self, pd_vars, idx, weight_transpose=False):
assert not self.fuse_attn_qkv
prefix = self._enc_prefix(idx) + "attention/self/key/"
return self._record_pd_vars(pd_vars, ["kernel", "bias"], prefix,
weight_transpose)
def enc_attn_value_fc(self, pd_vars, idx, weight_transpose=False):
assert not self.fuse_attn_qkv
prefix = self._enc_prefix(idx) + "attention/self/value/"
return self._record_pd_vars(pd_vars, ["kernel", "bias"], prefix,
weight_transpose)
def enc_fused_attn_qkv_fc(self, pd_vars, idx, weight_transpose=False):
assert self.fuse_attn_qkv
assert self.attn_fused_qkv_weights[idx] is None
assert self.attn_fused_qkv_biases[idx] is None
weight, bias = pd_vars
self.attn_fused_qkv_weights[idx] = self._to_pd_var_name(weight)
self.attn_fused_qkv_biases[idx] = self._to_pd_var_name(bias)
if weight_transpose:
self.transpose_vars.add(self.attn_fused_qkv_weights[idx])
def enc_attn_proj_fc(self, pd_vars, idx, weight_transpose=False):
prefix = self._enc_prefix(idx) + "attention/output/dense/"
return self._record_pd_vars(pd_vars, ["kernel", "bias"], prefix,
weight_transpose)
def enc_attn_norm(self, pd_vars, idx):
prefix = self._enc_prefix(idx) + "attention/output/LayerNorm/"
return self._record_pd_vars(pd_vars, ["gamma", "beta"], prefix)
def enc_intermediate_fc(self, pd_vars, idx, weight_transpose=False):
prefix = self._enc_prefix(idx) + "intermediate/dense/"
return self._record_pd_vars(pd_vars, ["kernel", "bias"], prefix,
weight_transpose)
def enc_output_fc(self, pd_vars, idx, weight_transpose=False):
prefix = self._enc_prefix(idx) + "output/dense/"
return self._record_pd_vars(pd_vars, ["kernel", "bias"], prefix,
weight_transpose)
def enc_output_norm(self, pd_vars, idx):
prefix = self._enc_prefix(idx) + "output/LayerNorm/"
return self._record_pd_vars(pd_vars, ["gamma", "beta"], prefix)
def pooler_fc(self, pd_vars, weight_transpose=False):
return self._record_pd_vars(pd_vars, ["kernel", "bias"],
"bert/pooler/dense/", weight_transpose)
def cls_pred_trans_fc(self, pd_vars, weight_transpose=False):
return self._record_pd_vars(pd_vars, ["kernel", "bias"],
"cls/predictions/transform/dense/",
weight_transpose)
def cls_pred_trans_norm(self, pd_vars):
return self._record_pd_vars(pd_vars, ["gamma", "beta"],
"cls/predictions/transform/LayerNorm/")
def cls_pred_fc_bias(self, pd_vars):
return self._record_pd_vars(pd_vars, "output_bias", "cls/predictions/")
def cls_seq_relation_fc(self, pd_vars, weight_transpose=False):
return self._record_pd_vars(pd_vars, ["output_weights", "output_bias"],
"cls/seq_relationship/", weight_transpose)
class FMHA(Layer):
def __init__(self, config):
super(FMHA, self).__init__()
self.p_dropout = config.attention_probs_dropout_prob
self.h = config.num_attention_heads
self.hidden_size = config.hidden_size
self.d = self.hidden_size // self.h
self.fused_qkv_bias = config.fused_bias_mha
self.weight_transpose = True
self.use_unpad_fmha_mke_opt = config.unpad_fmha_mke_opt
assert self.d * self.h == self.hidden_size, "Invalid hidden size/num_heads"
# create_parameter
self._dtype = self._helper.get_default_dtype()
if self.weight_transpose:
Wqkv_shape = [3 * config.hidden_size, config.hidden_size]
else:
Wqkv_shape = [config.hidden_size, 3 * config.hidden_size]
self.Wqkv = self.create_parameter(
shape=Wqkv_shape, attr=None, dtype=self._dtype, is_bias=False)
self.Bqkv = self.create_parameter(
shape=[3 * config.hidden_size],
attr=None,
dtype=self._dtype,
is_bias=True)
def forward(self,
hidden_states,
cu_seqlens,
host_cu_seqlens,
max_s,
is_training=True):
#paddle.static.Print(hidden_states, message="fmha layer, input shape")
#paddle.static.Print(self.Wqkv, message="Wqkv.shape")
#paddle.static.Print(self.Bqkv, message="Bqkv.shape")
#print("fmha layer, input shape: ", hidden_states.shape)
#print("Wqkv.shape", self.Wqkv.shape)
#print("Bqkv.shape", self.Bqkv.shape)
if not self.fused_qkv_bias:
qkv = paddle.matmul(
hidden_states,
self.Wqkv,
transpose_x=False,
transpose_y=self.weight_transpose)
qkv = qkv + self.Bqkv
else:
qkv = fuse_dense(
hidden_states,
self.Wqkv,
self.Bqkv,
transx=False,
transy=self.weight_transpose)
qkv = paddle.reshape(qkv, [-1, 3, self.h, self.d])
#print("qkv.shape", qkv.shape)
# FMHA: max_s = var memcpy_0.tmp_0 : LOD_TENSOR.shape(1,).dtype(int32).stop_gradient(False)
#print("FMHA: max_s = ", max_s)
out, _ = custom_fmha(
qkv,
cu_seqlens,
host_cu_seqlens,
not is_training,
self.p_dropout,
zero_tensors=False,
use_fmha_mke_opt=self.use_unpad_fmha_mke_opt)
return paddle.reshape(out, [-1, self.hidden_size])
class BertSelfAttention(Layer):
def __init__(self, config):
super(BertSelfAttention, self).__init__()
assert config.hidden_size % config.num_attention_heads == 0
self.num_attention_heads = config.num_attention_heads
self.attention_head_size = int(config.hidden_size /
config.num_attention_heads)
self.all_head_size = self.num_attention_heads * self.attention_head_size
self.query = nn.Linear(config.hidden_size, self.all_head_size)
self.key = nn.Linear(config.hidden_size, self.all_head_size)
self.value = nn.Linear(config.hidden_size, self.all_head_size)
self.weight_transpose = False
self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
self.softmax = nn.Softmax(axis=-1)
def transpose_for_scores(self, x):
new_x_shape = [0] * (
len(x.shape) - 1
) + [self.num_attention_heads, self.attention_head_size]
x = paddle.reshape(x, new_x_shape)
return paddle.transpose(x, [0, 2, 1, 3])
def transpose_key_for_scores(self, x):
new_x_shape = [0] * (
len(x.shape) - 1
) + [self.num_attention_heads, self.attention_head_size]
x = paddle.reshape(x, new_x_shape)
return paddle.transpose(x, [0, 2, 3, 1])
def forward(self, hidden_states, attention_mask):
mixed_query_layer = self.query(hidden_states)
mixed_key_layer = self.key(hidden_states)
mixed_value_layer = self.value(hidden_states)
query_layer = self.transpose_for_scores(mixed_query_layer)
key_layer = self.transpose_key_for_scores(mixed_key_layer)
value_layer = self.transpose_for_scores(mixed_value_layer)
# paddle.static.Print(query_layer, message="query_layer")
# paddle.static.Print(key_layer, message="key_layer")
attention_scores = paddle.matmul(query_layer, key_layer)
attention_scores = attention_scores / math.sqrt(
self.attention_head_size)
attention_scores = attention_scores + attention_mask.unsqueeze(
1).unsqueeze(2)
attention_probs = self.softmax(attention_scores)
attention_probs = self.dropout(attention_probs)
# paddle.static.Print(attention_probs, message="attention_probs")
# paddle.static.Print(value_layer, message="value_layer")
context_layer = paddle.matmul(attention_probs, value_layer)
#paddle.static.Print(context_layer, message="context_layer")
context_layer = paddle.transpose(context_layer, [0, 2, 1, 3])
new_context_layer_shape = [0] * (len(context_layer.shape) - 2
) + [self.all_head_size]
context_layer = paddle.reshape(context_layer, new_context_layer_shape)
return context_layer
class BertAttention(Layer):
def __init__(self, config):
super(BertAttention, self).__init__()
self.self = BertSelfAttention(config)
self.output = BertSelfOutput(config)
def forward(self, input_tensor, attention_mask):
self_output = self.self(input_tensor, attention_mask)
attention_output = self.output(self_output, input_tensor)
return attention_output
class BertLayer(Layer):
def __init__(self, config):
super(BertLayer, self).__init__()
assert use_nv_input
assert not config.pad_fmha
self.unpad = config.unpad
if config.unpad_fmha:
assert self.unpad
self.attention = UnpadFMHABertAttention(config)
else:
self.attention = BertAttention(config)
self.intermediate = BertIntermediate(config)
self.output = BertOutput(config)
def forward(self,
hidden_states,
attention_mask,
seqlen=None,
host_seqlen=None,
batch=None):
if self.unpad:
attention_output = self.attention(hidden_states, attention_mask,
seqlen, host_seqlen, batch)
else:
attention_output = self.attention(hidden_states, attention_mask)
intermediate_output = self.intermediate(attention_output)
layer_output = self.output(intermediate_output, attention_output)
return layer_output
class BertEncoder(Layer):
def __init__(self, config):
super(BertEncoder, self).__init__()
if use_nv_input:
self.layers = nn.LayerList(
[BertLayer(config) for _ in range(config.num_hidden_layers)])
else:
encoder_layer = nn.TransformerEncoderLayer(
config.hidden_size,
config.num_attention_heads,
config.intermediate_size,
dropout=config.hidden_dropout_prob,
activation=config.hidden_act,
attn_dropout=config.attention_probs_dropout_prob,
act_dropout=0)
self.encoder = nn.TransformerEncoder(encoder_layer,
config.num_hidden_layers)
self.num_attention_heads = config.num_attention_heads
self.unpad = config.unpad
self.unpad_embed = config.unpad_embed
self.unpad_fmha = config.unpad_fmha
self.pad_fmha = config.pad_fmha
self.hidden_size = config.hidden_size
self.maxseqlen = config.max_seq_length
def record_ckpt_vars(self, ckpt, idx):
if use_nv_input:
layer = self.layers[idx]
attn = layer.attention
if isinstance(attn, UnpadFMHABertAttention):
ckpt.enc_fused_attn_qkv_fc([attn.fmha.Wqkv, attn.fmha.Bqkv],
idx, attn.fmha.weight_transpose)
else:
ckpt.enc_attn_query_fc(
[attn.self.query.weight, attn.self.query.bias], idx,
attn.self.weight_transpose)
ckpt.enc_attn_key_fc(
[attn.self.key.weight, attn.self.key.bias], idx,
attn.self.weight_transpose)
ckpt.enc_attn_value_fc(
[attn.self.value.weight, attn.self.value.bias], idx,
attn.self.weight_transpose)
ckpt.enc_attn_proj_fc(
[attn.output.dense.weight, attn.output.dense.bias], idx,
attn.output.weight_transpose)
if attn.output.fused_dropout:
ckpt.enc_attn_norm([
attn.output.fused_dropout_add_ln.weight,
attn.output.fused_dropout_add_ln.bias
], idx)
else:
ckpt.enc_attn_norm([
attn.output.layer_norm.weight, attn.output.layer_norm.bias
], idx)
intermediate = layer.intermediate
last_output = layer.output
ckpt.enc_intermediate_fc(
[intermediate.dense.weight, intermediate.dense.bias], idx,
intermediate.weight_transpose)
ckpt.enc_output_fc(
[last_output.dense.weight, last_output.dense.bias], idx,
last_output.weight_transpose)
if last_output.fused_dropout:
ckpt.enc_output_norm([
last_output.fused_dropout_add_ln.weight,
last_output.fused_dropout_add_ln.bias
], idx)
else:
ckpt.enc_output_norm([
last_output.layer_norm.weight, last_output.layer_norm.bias
], idx)
else:
layer = self.encoder.layers[idx]
attn = layer.self_attn
ckpt.enc_attn_query_fc([attn.q_proj.weight, attn.q_proj.bias], idx,
False)
ckpt.enc_attn_key_fc([attn.k_proj.weight, attn.k_proj.bias], idx,
False)
ckpt.enc_attn_value_fc([attn.v_proj.weight, attn.v_proj.bias], idx,
False)
ckpt.enc_attn_proj_fc([attn.out_proj.weight, attn.out_proj.bias],
idx, False)
ckpt.enc_attn_norm([layer.norm1.weight, layer.norm1.bias], idx)
ckpt.enc_intermediate_fc(
[layer.linear1.weight, layer.linear1.bias], idx, False)
ckpt.enc_output_fc([layer.linear2.weight, layer.linear2.bias], idx,
False)
ckpt.enc_output_norm([layer.norm2.weight, layer.norm2.bias], idx)
def forward(self,
hidden_states,
attention_mask,
output_all_encoded_layers=False,
batch=56,
maxseqlen=512,
hidden_size=1024,
zero_tensor=None,
attention_indices=None,
new_attention_mask=None,
seqlen=None,
cu_seqlens=None,
host_cu_seqlens=None,
actual_seqlens=None,
maxseqlen_in_batch=None):
if use_nv_input:
return self.forward_with_nv_input(
hidden_states, attention_mask, output_all_encoded_layers, batch,
maxseqlen, hidden_size, zero_tensor, attention_indices,
new_attention_mask, seqlen, cu_seqlens, host_cu_seqlens,
actual_seqlens, maxseqlen_in_batch)
if output_all_encoded_layers:
output = hidden_states
encoder_outputs = []
for mod in self.encoder.layers:
output = mod(output, src_mask=attention_mask)
encoder_outputs.append(output)
if self.encoder.norm is not None:
encoder_outputs[-1] = self.encoder.norm(encoder_outputs[-1])
return encoder_outputs
else:
sequence_output = self.encoder(hidden_states, attention_mask)
return [sequence_output]
def forward_with_nv_input(self,
hidden_states,
attention_mask,
output_all_encoded_layers=False,
batch=56,
maxseqlen=512,
hidden_size=1024,
zero_tensor=None,
attention_indices=None,
new_attention_mask=None,
seqlen=None,
cu_seqlens=None,
host_cu_seqlens=None,
actual_seqlens=None,
maxseqlen_in_batch=None):
# Unpad inputs and mask. It will remove tokens that are padded. Assume ntokens is total number of tokens (padded and non-padded)
# and ntokens_unpad is total number of non-padded tokens. Then unpadding performs the following compression of the inputs:
# hidden_states[ntokens,hidden] -> hidden_states[ntokens_unpad,hidden]
if not self.unpad_embed and self.unpad_fmha:
#batch = None
#seqlen = None
if self.unpad_fmha:
batch = hidden_states.shape[0]
maxseqlen = hidden_states.shape[1]
assert maxseqlen == self.maxseqlen
hidden_size = hidden_states.shape[2]
assert hidden_size == self.hidden_size
zero_tensor = paddle.zeros_like(hidden_states)
zero_tensor = paddle.reshape(zero_tensor,
[-1, self.hidden_size])
# attention_indices: 把attn_mask flatten后,提取非零元的下标。
# seqlen: 对[bs, max_seq_len],每一行求和,代表获取每一行的实际seq_len(一维)。
# ntokens: 代表当前batch的所有实际seq_len之和。
# cu_seqlens: 对seq_len求prefix_sum的结果。
# actual_seqlens: 与seqlen相同的值。
# maxseqlen_in_batch: the max seqlen in a batch.
#attention_indices, attention_mask, seqlen, ntokens, cu_seqlens, actual_seqlens, maxseqlen_in_batch = generate_mask(
# attention_mask, unpad_fmha=self.unpad_fmha)
print("maxseqlen_in_batch = ", maxseqlen_in_batch)
hidden_states = paddle.reshape(hidden_states,
[-1, self.hidden_size])
hidden_states = paddle.gather(hidden_states, attention_indices)
# print("unpad after shape: ", hidden_states)
elif self.unpad_fmha:
attention_mask = new_attention_mask
all_encoder_layers = []
def custom(start, end):
def custom_forward(*inputs):
layers = self.layers[start:end]
x_ = inputs[0]
for layer in layers:
x_ = layer(x_, inputs[1])
return x_
return custom_forward
for i, layer_module in enumerate(self.layers):
if seqlen is None and batch is None:
hidden_states = layer_module(hidden_states, attention_mask)
else:
assert seqlen is not None
assert batch is not None
if self.unpad_fmha:
hidden_states = layer_module(hidden_states, cu_seqlens,
host_cu_seqlens,
maxseqlen_in_batch)
print("hidden_states:", hidden_states)
else:
hidden_states = layer_module(hidden_states, attention_mask,
seqlen, batch)
if output_all_encoded_layers:
all_encoder_layers.append(hidden_states)
# Pad inputs and mask. It will insert back zero-padded tokens. Assume ntokens is total number of tokens (padded and non-padded)
# and ntokens_unpad is total number of non-padded tokens. Then padding performs the following de-compression:
# hidden_states[ntokens_unpad,hidden] -> hidden_states[ntokens,hidden]
if self.unpad_fmha:
hidden_states = paddle.scatter(zero_tensor, attention_indices,
hidden_states)
# todo: is self.maxseqlen same as maxseqlen?
hidden_states = paddle.reshape(
hidden_states, [batch, self.maxseqlen, self.hidden_size])
#print("hidden_states.shape:", hidden_states.shape)
#print("hidden_states:", hidden_states)
all_encoder_layers.append(hidden_states)
return all_encoder_layers
class UnpadFMHABertAttention(Layer):
def __init__(self, config):
super(UnpadFMHABertAttention, self).__init__()
self.fmha = FMHA(config)
self.output = BertSelfOutput(config)
def forward(self,
input_tensor,
cu_seqlens,
host_cu_seqlens,
max_s,
batch_size=None):
self_output = self.fmha(
input_tensor,
cu_seqlens,
host_cu_seqlens,
max_s,
is_training=self.training)
attention_output = self.output(self_output, input_tensor)
return attention_output
class FusedDropoutResidualLn(Layer):
def __init__(self, config, normalized_shape, epsilon=1e-12):
super(FusedDropoutResidualLn, self).__init__()
if isinstance(normalized_shape, numbers.Integral):
normalized_shape = [normalized_shape]
self._normalized_shape = list(normalized_shape)
param_shape = [np.prod(self._normalized_shape)]
self._weight_attr = None
self._bias_attr = None
self.weight = self.create_parameter(
attr=self._weight_attr,
shape=param_shape,
default_initializer=Constant(1.0))
self.bias = self.create_parameter(
attr=self._bias_attr, shape=param_shape, is_bias=True)
self.p = config.hidden_dropout_prob
self.epsilon = epsilon
self.is_test = not self.training
# todo: use default configs.
self.fix_seed = False
self.is_upscale_in_train = True
self.seed_val = 0
def forward(self, hidden_states, input_tensor):
out, dropout_mask, ln_mean, ln_var, dropout_residual_out = custom_fused_dropout_residual_ln(
hidden_states, input_tensor, self.weight, self.bias, self.epsilon,
self.is_test, self.fix_seed, self.seed_val,
self.is_upscale_in_train, self.p)
return out
def extra_repr(self):
return 'normalized_shape={}, epsilon={}'.format(self._normalized_shape,
self.epsilon)
# support nn(weight is not transposed) and nt(weight is transposed)
class FusedDense(Layer):
def __init__(self,
in_features,
out_features,
weight_transpose=False,
weight_attr=None,
bias_attr=None,
with_gelu=False,
name=None):
super(FusedDense, self).__init__()
self._dtype = self._helper.get_default_dtype()
self._weight_attr = weight_attr
self._bias_attr = bias_attr
self.weight_transpose = weight_transpose
if weight_transpose:
self.weight = self.create_parameter(
shape=[out_features, in_features],
attr=self._weight_attr,
dtype=self._dtype,
is_bias=False)
else:
self.weight = self.create_parameter(
shape=[in_features, out_features],
attr=self._weight_attr,
dtype=self._dtype,
is_bias=False)
self.bias = self.create_parameter(
shape=[out_features],
attr=self._bias_attr,
dtype=self._dtype,
is_bias=True)
self.with_gelu = with_gelu
self.name = name
def forward(self, hidden_states):
out = fuse_dense(
hidden_states,
self.weight,
self.bias,
transx=False,
transy=self.weight_transpose,
with_gelu=self.with_gelu)
return out
class BertSelfOutput(Layer):
def __init__(self, config):
super(BertSelfOutput, self).__init__()
self.fused_fc_bias = config.fused_bias_fc
self.fused_dropout = config.fused_dropout_add_ln
if not self.fused_fc_bias:
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
self.weight_transpose = False
else:
self.dense = FusedDense(
config.hidden_size,
config.hidden_size,
weight_transpose=config.weight_transpose)
self.weight_transpose = config.weight_transpose
if self.fused_dropout:
self.fused_dropout_add_ln = FusedDropoutResidualLn(
config, config.hidden_size, epsilon=1e-12)
else:
self.layer_norm = nn.LayerNorm(config.hidden_size, epsilon=1e-12)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
def forward(self, hidden_states, input_tensor):
#print("selfoutput input: hidden_states.shape = ", hidden_states.shape)
hidden_states = self.dense(hidden_states)
if not self.fused_dropout:
hidden_states = self.dropout(hidden_states)
hidden_states = hidden_states + input_tensor
hidden_states = self.layer_norm(hidden_states)
else:
hidden_states = self.fused_dropout_add_ln(hidden_states,
input_tensor)
return hidden_states
class BertIntermediate(Layer):
def __init__(self, config):
super(BertIntermediate, self).__init__()
self.fused_fc_bias = config.fused_bias_fc
if not self.fused_fc_bias:
self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
self.weight_transpose = False
self.intermediate_act_fn = get_activation()
else:
self.weight_transpose = config.weight_transpose
self.dense = FusedDense(
config.hidden_size,
config.intermediate_size,
weight_transpose=self.weight_transpose,
with_gelu=True)
def forward(self, hidden_states):
if not self.fused_fc_bias:
hidden_states = self.dense(hidden_states)
hidden_states = self.intermediate_act_fn(hidden_states)
else:
hidden_states = self.dense(hidden_states)
return hidden_states
class BertOutput(Layer):
def __init__(self, config):
super(BertOutput, self).__init__()
self.fused_fc_bias = config.fused_bias_fc
self.fused_dropout = config.fused_dropout_add_ln
if not self.fused_fc_bias:
self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
self.weight_transpose = False
else:
self.dense = FusedDense(
config.intermediate_size,
config.hidden_size,
weight_transpose=config.weight_transpose)
self.weight_transpose = config.weight_transpose
if self.fused_dropout:
self.fused_dropout_add_ln = FusedDropoutResidualLn(
config, config.hidden_size, epsilon=1e-12)
else:
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.layer_norm = nn.LayerNorm(config.hidden_size, epsilon=1e-12)
# todo: add fused_dropout opt.
self.p = config.hidden_dropout_prob
def forward(self, hidden_states, input_tensor):
#print("BertOutput, input.shape = ", hidden_states.shape)
hidden_states = self.dense(hidden_states)
if not self.fused_dropout:
hidden_states = self.dropout(hidden_states)
hidden_states = hidden_states + input_tensor
hidden_states = self.layer_norm(hidden_states)
else:
hidden_states = self.fused_dropout_add_ln(hidden_states,
input_tensor)
return hidden_states
class BertEmbeddings(Layer):
"""
Include embeddings from word, position and token_type embeddings
"""
def __init__(self,
vocab_size,
hidden_size=768,
hidden_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
config=None):
super(BertEmbeddings, self).__init__()
self.unpad_embed = False
if config is not None:
self.unpad_embed = config.unpad_embed
self.unpad_fmha = config.unpad_fmha
if self.unpad_embed:
self.batch_size = config.batch_size
self.max_seq_length = config.max_seq_length
self.hidden_size = hidden_size
self.word_embeddings = nn.Embedding(
vocab_size, hidden_size, padding_idx=0)
self.position_embeddings = nn.Embedding(max_position_embeddings,
hidden_size)
self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)
self.layer_norm = nn.LayerNorm(hidden_size, epsilon=1e-12)
self.dropout = nn.Dropout(hidden_dropout_prob)
def forward(self,
input_ids,
token_type_ids=None,
position_ids=None,
attention_mask=None,
attention_indices=None,
seqlen=None,
cu_seqlens=None,
actual_seqlens=None,
maxseqlen_in_batch=None):
if position_ids is None:
position_ids = gen_pos_id(input_ids)
if token_type_ids is None:
token_type_ids = paddle.zeros_like(input_ids)
# todo(@limin29): in order to construct the shape of zero_tensor, we use pad method to compute token_type_embeddings.
token_type_embeddings = self.token_type_embeddings(token_type_ids)
if self.unpad_embed:
assert self.unpad_fmha
assert attention_mask is not None
assert attention_indices is not None
assert seqlen is not None
assert cu_seqlens is not None
assert maxseqlen_in_batch is not None
cur_batch_size = input_ids.shape[0]
zero_tensor = paddle.zeros_like(token_type_embeddings)
zero_tensor = paddle.reshape(zero_tensor, [-1, self.hidden_size])
# attention_indices: 把attn_mask flatten后,提取非零元的下标。
# seqlen: 对[bs, max_seq_len],每一行求和,代表获取每一行的实际seq_len(一维)。
# ntokens: 代表当前batch的所有实际seq_len之和。
# cu_seqlens: 对seq_len求prefix_sum的结果。
# actual_seqlens: 与seqlen相同的值。
# maxseqlen_in_batch: the max seqlen in a batch.
#attention_indices, attention_mask, seqlen, ntokens, cu_seqlens, actual_seqlens, maxseqlen_in_batch = generate_mask(
# attention_mask, unpad_fmha=self.unpad_fmha)
print("maxseqlen_in_batch = ", maxseqlen_in_batch)
input_ids = paddle.reshape(input_ids, [-1])
input_ids = paddle.gather(input_ids, attention_indices)
position_ids = paddle.reshape(position_ids, [-1])
position_ids = paddle.gather(position_ids, attention_indices)
#token_type_ids = paddle.reshape(token_type_ids, [-1])
#token_type_ids = paddle.gather(token_type_ids, attention_indices)
token_type_embeddings = paddle.reshape(token_type_embeddings,
[-1, self.hidden_size])
token_type_embeddings = paddle.gather(token_type_embeddings,
attention_indices)
input_embedings = self.word_embeddings(input_ids)
position_embeddings = self.position_embeddings(position_ids)
#token_type_embeddings = self.token_type_embeddings(token_type_ids)
embeddings = input_embedings + position_embeddings + token_type_embeddings
embeddings = self.layer_norm(embeddings)
embeddings = self.dropout(embeddings)
if not self.unpad_embed:
return embeddings
else:
return embeddings, cur_batch_size, zero_tensor
class BertPooler(Layer):
"""
Pool the result of BertEncoder.
"""
def __init__(self, hidden_size, pool_act="tanh"):
super(BertPooler, self).__init__()
#self.dense = nn.Linear(hidden_size, hidden_size)
#self.weight_transpose = False
self.dense = FusedDense(hidden_size, hidden_size)
self.weight_transpose = self.dense.weight_transpose
self.activation = nn.Tanh()
self.pool_act = pool_act
assert self.pool_act == "tanh"
def forward(self, hidden_states):
# We "pool" the model by simply taking the hidden state corresponding
# to the first token.
first_token_tensor = hidden_states[:, 0]
pooled_output = self.dense(first_token_tensor)
if self.pool_act == "tanh":
pooled_output = self.activation(pooled_output)
return pooled_output
class BertModel(nn.Layer):
"""
The bare BERT Model transformer outputting raw hidden-states without any specific head on top.
This model inherits from :class:`~paddlenlp.transformers.model_utils.PretrainedModel`.
Refer to the superclass documentation for the generic methods.
This model is also a Paddle `paddle.nn.Layer <https://www.paddlepaddle.org.cn/documentation
/docs/en/api/paddle/fluid/dygraph/layers/Layer_en.html>`__ subclass. Use it as a regular Paddle Layer
and refer to the Paddle documentation for all matter related to general usage and behavior.
Args:
vocab_size (int):
Vocabulary size of `inputs_ids` in `BertModel`. Also is the vocab size of token embedding matrix.
Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling `BertModel`.
hidden_size (int, optional):
Dimensionality of the embedding layer, encoder layer and pooler layer. Defaults to `768`.
num_hidden_layers (int, optional):
Number of hidden layers in the Transformer encoder. Defaults to `12`.
num_attention_heads (int, optional):
Number of attention heads for each attention layer in the Transformer encoder.
Defaults to `12`.
intermediate_size (int, optional):
Dimensionality of the feed-forward (ff) layer in the encoder. Input tensors
to ff layers are firstly projected from `hidden_size` to `intermediate_size`,
and then projected back to `hidden_size`. Typically `intermediate_size` is larger than `hidden_size`.
Defaults to `3072`.
hidden_act (str, optional):
The non-linear activation function in the feed-forward layer.
``"gelu"``, ``"relu"`` and any other paddle supported activation functions
are supported. Defaults to `"gelu"`.
hidden_dropout_prob (float, optional):
The dropout probability for all fully connected layers in the embeddings and encoder.
Defaults to `0.1`.
attention_probs_dropout_prob (float, optional):
The dropout probability used in MultiHeadAttention in all encoder layers to drop some attention target.
Defaults to `0.1`.
max_position_embeddings (int, optional):
The maximum value of the dimensionality of position encoding, which dictates the maximum supported length of an input
sequence. Defaults to `512`.
type_vocab_size (int, optional):
The vocabulary size of `token_type_ids`.
Defaults to `16`.
initializer_range (float, optional):
The standard deviation of the normal initializer.
Defaults to 0.02.
pad_token_id (int, optional):
The index of padding token in the token vocabulary.
Defaults to `0`.
pool_act (str, optional):
The non-linear activation function in the pooling layer.
Defaults to `"tanh"`.
"""
def __init__(self, config):
super(BertModel, self).__init__()
self.unpad = config.unpad
self.pad_fmha = config.pad_fmha
self.unpad_embed = config.unpad_embed
self.unpad_fmha = config.unpad_fmha
self.pad_token_id = config.pad_token_id
self.initializer_range = config.initializer_range
self.num_hidden_layers = config.num_hidden_layers
self.maxseqlen = config.max_seq_length
self.hidden_size = config.hidden_size
# todo:
self.embeddings = BertEmbeddings(
config.vocab_size, config.hidden_size, config.hidden_dropout_prob,
config.max_position_embeddings, config.type_vocab_size, config)
self.encoder = BertEncoder(config)
self.pooler = BertPooler(config.hidden_size, config.pool_act)
def record_ckpt_vars(self, ckpt):
emb = self.embeddings
ckpt.embeddings([
emb.word_embeddings.weight,
emb.position_embeddings.weight,
emb.token_type_embeddings.weight,
])
ckpt.norm_after_embeddings([
emb.layer_norm.weight,
emb.layer_norm.bias,
])
for idx in range(self.num_hidden_layers):
self.encoder.record_ckpt_vars(ckpt, idx)
pooler = self.pooler
ckpt.pooler_fc([pooler.dense.weight, pooler.dense.bias],
pooler.weight_transpose)
def forward(self,
input_ids,
token_type_ids=None,
position_ids=None,
attention_mask=None,
seq_len=None,
prefix_sum_seq_len=None,
host_prefix_sum_seq_len=None,
max_seq_len=None,
nonzeros_indices=None,
output_hidden_states=False):
if use_nv_input:
if attention_mask is None:
attention_mask = paddle.ones_like(input_ids)
if token_type_ids is None:
token_type_ids = paddle.zeros_like(input_ids)
extended_attention_mask = attention_mask
if not self.unpad and not self.pad_fmha:
extended_attention_mask = extended_attention_mask.cast(
dtype=paddle.float32)
extended_attention_mask = (
1.0 - extended_attention_mask) * -10000.0
attention_mask = extended_attention_mask
else:
if attention_mask is None:
attention_mask = paddle.unsqueeze(
(input_ids == self.pad_token_id
).astype(self.pooler.dense.weight.dtype) * -1e9,
axis=[1, 2])
new_attention_mask = attention_mask
attention_indices = nonzeros_indices
seqlen = seq_len
cu_seqlens = prefix_sum_seq_len
host_cu_seqlens = host_prefix_sum_seq_len
maxseqlen_in_batch = max_seq_len
actual_seqlens = seqlen
if self.unpad_embed or self.unpad_fmha:
assert attention_indices is not None
assert seqlen is not None
assert cu_seqlens is not None
assert host_cu_seqlens is not None
assert maxseqlen_in_batch is not None
if not self.unpad_embed:
embedding_output = self.embeddings(
input_ids=input_ids,
position_ids=position_ids,
token_type_ids=token_type_ids,
attention_mask=attention_mask)
cur_batch_size = None
zero_tensor = None
attention_indices = None
new_attention_mask = None
seqlen = None
cu_seqlens = None
host_cu_seqlens = None
actual_seqlens = None
maxseqlen_in_batch = None
else:
embedding_output, cur_batch_size, zero_tensor = self.embeddings(
input_ids=input_ids,
position_ids=position_ids,
token_type_ids=token_type_ids,
attention_mask=attention_mask,
attention_indices=attention_indices,
seqlen=seqlen,
cu_seqlens=cu_seqlens,
actual_seqlens=seqlen,
maxseqlen_in_batch=maxseqlen_in_batch)
encoder_outputs = self.encoder(
embedding_output, attention_mask, output_hidden_states,
cur_batch_size, self.maxseqlen, self.hidden_size, zero_tensor,
attention_indices, new_attention_mask, seqlen, cu_seqlens,
host_cu_seqlens, actual_seqlens, maxseqlen_in_batch)
pooled_output = self.pooler(encoder_outputs[-1])
print("encoder_outputs[-1]:", encoder_outputs[-1])
print("pooled_output:", pooled_output)
if output_hidden_states:
return encoder_outputs, pooled_output
else:
return encoder_outputs[-1], pooled_output
class BertLMPredictionHead(Layer):
"""
Bert Model with a `language modeling` head on top for CLM fine-tuning.
"""
def __init__(self, config, embedding_weights=None):
super(BertLMPredictionHead, self).__init__()
self.transform = FusedDense(
config.hidden_size, config.hidden_size, with_gelu=True)
self.weight_transpose = self.transform.weight_transpose
assert config.hidden_act == "gelu"
self.layer_norm = nn.LayerNorm(config.hidden_size, epsilon=1e-12)
self.decoder_weight = self.create_parameter(
shape=[config.vocab_size, config.hidden_size],
dtype=self.transform.weight.dtype,
is_bias=False) if embedding_weights is None else embedding_weights
self.decoder_bias = self.create_parameter(
shape=[config.vocab_size],
dtype=self.decoder_weight.dtype,
is_bias=True)
def forward(self, hidden_states, masked_positions=None):
if masked_positions is not None:
hidden_states = paddle.reshape(hidden_states,
[-1, hidden_states.shape[-1]])
hidden_states = paddle.tensor.gather(hidden_states,
masked_positions)
# gather masked tokens might be more quick
hidden_states = self.transform(hidden_states)
hidden_states = self.layer_norm(hidden_states)
hidden_states = fuse_dense(
hidden_states,
self.decoder_weight,
self.decoder_bias,
transx=False,
transy=True)
return hidden_states
class BertPretrainingHeads(Layer):
"""
Perform language modeling task and next sentence classification task.
Args:
hidden_size (int):
See :class:`BertModel`.
vocab_size (int):
See :class:`BertModel`.
activation (str):
Activation function used in the language modeling task.
embedding_weights (Tensor, optional):
Decoding weights used to map hidden_states to logits of the masked token prediction.
Its data type should be float32 and its shape is [vocab_size, hidden_size].
Defaults to `None`, which means use the same weights of the embedding layer.
"""
def __init__(self, config, embedding_weights=None):
super(BertPretrainingHeads, self).__init__()
self.predictions = BertLMPredictionHead(config, embedding_weights)
#self.seq_relationship = nn.Linear(config.hidden_size, 2)
#self.seq_relationship_weight_transpose = False
self.seq_relationship = FusedDense(config.hidden_size, 2)
self.seq_relationship_weight_transpose = self.seq_relationship.weight_transpose
self.dense_seq_output = config.dense_seq_output
self.share_weight = embedding_weights is not None
def record_ckpt_vars(self, ckpt):
pred_trans = self.predictions.transform
ckpt.cls_pred_trans_fc([pred_trans.weight, pred_trans.bias],
self.predictions.weight_transpose)
norm = self.predictions.layer_norm
ckpt.cls_pred_trans_norm([norm.weight, norm.bias])
assert self.share_weight
ckpt.cls_pred_fc_bias(self.predictions.decoder_bias)
seq_relation_fc = self.seq_relationship
ckpt.cls_seq_relation_fc(
[seq_relation_fc.weight, seq_relation_fc.bias],
self.seq_relationship_weight_transpose)
def forward(self, sequence_output, pooled_output, masked_positions=None):
"""
Args:
sequence_output(Tensor):
Sequence of hidden-states at the last layer of the model.
It's data type should be float32 and its shape is [batch_size, sequence_length, hidden_size].
pooled_output(Tensor):
The output of first token (`[CLS]`) in sequence.
We "pool" the model by simply taking the hidden state corresponding to the first token.
Its data type should be float32 and its shape is [batch_size, hidden_size].
masked_positions(Tensor, optional):
A tensor indicates positions to be masked in the position embedding.
Its data type should be int64 and its shape is [batch_size, mask_token_num].
`mask_token_num` is the number of masked tokens. It should be no bigger than `sequence_length`.
Defaults to `None`, which means we output hidden-states of all tokens in masked token prediction.
Returns:
tuple: Returns tuple (``prediction_scores``, ``seq_relationship_score``).
With the fields:
- `prediction_scores` (Tensor):
The scores of masked token prediction. Its data type should be float32.
If `masked_positions` is None, its shape is [batch_size, sequence_length, vocab_size].
Otherwise, its shape is [batch_size, mask_token_num, vocab_size].
- `seq_relationship_score` (Tensor):
The scores of next sentence prediction.
Its data type should be float32 and its shape is [batch_size, 2].
"""
prediction_scores = self.predictions(sequence_output, masked_positions)
seq_relationship_score = self.seq_relationship(pooled_output)
return prediction_scores, seq_relationship_score
def nv_forward(self,
sequence_output,
pooled_output,
masked_lm_labels,
num_valid=None,
masked_lm_ids=None,
masked_lm_positions=None):
print("sequence_output: ", sequence_output)
if self.dense_seq_output:
# nonzero indices
index = masked_lm_positions
sequence_flattened = paddle.index_select(
sequence_output.reshape((-1, sequence_output.shape[-1])),
index=index,
axis=0)
sequence_output = sequence_flattened
prediction_scores = self.predictions(sequence_output)
seq_relationship_score = self.seq_relationship(pooled_output)
return prediction_scores, seq_relationship_score
class BertForPretraining(nn.Layer):
"""
Bert Model with pretraining tasks on top.
Args:
bert (:class:`BertModel`):
An instance of :class:`BertModel`.
"""
def __init__(self, bert, config):
super(BertForPretraining, self).__init__()
self.config = config
self.bert = bert
self.cls = BertPretrainingHeads(
config,
embedding_weights=self.bert.embeddings.word_embeddings.weight)
def load_tf_ckpt(self, args, get_parameter_func):
place = utility.get_place()
ckpt = TFCkptHelper(args, self.config, args.tf_ckpt_path, place)
self.bert.record_ckpt_vars(ckpt)
self.cls.record_ckpt_vars(ckpt)
ckpt.load(get_parameter_func)
return ckpt
def forward(self,
input_ids,
token_type_ids=None,
position_ids=None,
attention_mask=None,
masked_positions=None,
seq_len=None,
prefix_sum_seq_len=None,
host_prefix_sum_seq_len=None,
max_seq_len=None,
nonzeros_indices=None,
num_valid=None,
masked_lm_ids=None,
masked_lm_positions=None):
r"""
Args:
input_ids (Tensor):
See :class:`BertModel`.
token_type_ids (Tensor, optional):
See :class:`BertModel`.
position_ids (Tensor, optional):
See :class:`BertModel`.
attention_mask (Tensor, optional):
See :class:`BertModel`.
masked_positions(Tensor, optional):
See :class:`BertPretrainingHeads`.
Returns:
tuple: Returns tuple (``prediction_scores``, ``seq_relationship_score``).
With the fields:
- `prediction_scores` (Tensor):
The scores of masked token prediction. Its data type should be float32.
If `masked_positions` is None, its shape is [batch_size, sequence_length, vocab_size].
Otherwise, its shape is [batch_size, mask_token_num, vocab_size].
- `seq_relationship_score` (Tensor):
The scores of next sentence prediction.
Its data type should be float32 and its shape is [batch_size, 2].
"""
outputs = self.bert(
input_ids,
token_type_ids=token_type_ids,
position_ids=position_ids,
attention_mask=attention_mask,
seq_len=seq_len,
prefix_sum_seq_len=prefix_sum_seq_len,
host_prefix_sum_seq_len=host_prefix_sum_seq_len,
max_seq_len=max_seq_len,
nonzeros_indices=nonzeros_indices)
sequence_output, pooled_output = outputs
prediction_scores, seq_relationship_score = self.cls(
sequence_output, pooled_output, masked_positions, num_valid,
masked_lm_ids, masked_lm_positions)
return prediction_scores, seq_relationship_score
class BertPretrainingCriterion(paddle.nn.Layer):
"""
Args:
vocab_size(int):
Vocabulary size of `inputs_ids` in `BertModel`. Defines the number of different tokens that can
be represented by the `inputs_ids` passed when calling `BertModel`.
"""
def __init__(self, config):
super(BertPretrainingCriterion, self).__init__()
# CrossEntropyLoss is expensive since the inner reshape (copy)
self.loss_fn = paddle.nn.loss.CrossEntropyLoss(ignore_index=-1)
self.vocab_size = config.vocab_size
self.dense_seq_output = config.dense_seq_output
def forward(self, prediction_scores, seq_relationship_score,
masked_lm_labels, next_sentence_labels, masked_lm_scale):
"""
Args:
prediction_scores(Tensor):
The scores of masked token prediction. Its data type should be float32.
If `masked_positions` is None, its shape is [batch_size, sequence_length, vocab_size].
Otherwise, its shape is [batch_size, mask_token_num, vocab_size]
seq_relationship_score(Tensor):
The scores of next sentence prediction. Its data type should be float32 and
its shape is [batch_size, 2]
masked_lm_labels(Tensor):
The labels of the masked language modeling, its dimensionality is equal to `prediction_scores`.
Its data type should be int64. If `masked_positions` is None, its shape is [batch_size, sequence_length, 1].
Otherwise, its shape is [batch_size, mask_token_num, 1]
next_sentence_labels(Tensor):
The labels of the next sentence prediction task, the dimensionality of `next_sentence_labels`
is equal to `seq_relation_labels`. Its data type should be int64 and
its shape is [batch_size, 1]
masked_lm_scale(Tensor or int):
The scale of masked tokens. Used for the normalization of masked language modeling loss.
If it is a `Tensor`, its data type should be int64 and its shape is equal to `prediction_scores`.
Returns:
Tensor: The pretraining loss, equals to the sum of `masked_lm_loss` plus the mean of `next_sentence_loss`.
Its data type should be float32 and its shape is [1].
"""
# paddle.static.Print(prediction_scores, message="print prediction_scores:")
# paddle.static.Print(masked_lm_labels, message="print masked_lm_labels:")
masked_lm_loss = F.cross_entropy(
prediction_scores,
masked_lm_labels,
reduction='none',
ignore_index=-1)
# paddle.static.Print(masked_lm_loss, message="print masked_lm_loss:")
masked_lm_loss = masked_lm_loss / masked_lm_scale
next_sentence_loss = F.cross_entropy(
seq_relationship_score, next_sentence_labels, reduction='none')
valid_mask = masked_lm_labels != -1
total_loss_before_cast = paddle.sum(masked_lm_loss) + paddle.mean(
next_sentence_loss)
def func():
total_loss = total_loss_before_cast.astype('float32')
mlm_acc = paddle.cast(
paddle.sum((paddle.argmax(
prediction_scores, axis=-1, keepdim=True) ==
masked_lm_labels) * valid_mask),
dtype=masked_lm_scale.dtype) / masked_lm_scale
return total_loss, mlm_acc, masked_lm_scale
return func
def nv_forward(self,
prediction_scores,
seq_relationship_score,
masked_lm_labels,
next_sentence_labels,
num_valid=None,
masked_lm_ids=None,
masked_lm_positions=None):
if self.dense_seq_output:
loss_fct = paddle.nn.loss.CrossEntropyLoss(ignore_index=-1)
else:
loss_fct = paddle.nn.loss.CrossEntropyLoss(ignore_index=0)
if self.dense_seq_output:
masked_lm_labels_dense = masked_lm_ids
#print("masked_lm_labels_dense = ", masked_lm_labels_dense)
#paddle.static.Print(prediction_scores.reshape((-1, self.vocab_size)), message="print prediction_scores.reshape((-1, self.vocab_size)):",summarize=1024,print_phase="forward")
#paddle.static.Print(masked_lm_labels_dense, message="print masked_lm_labels_dense:")
masked_lm_loss = loss_fct(
prediction_scores.reshape((-1, self.vocab_size)),
masked_lm_labels_dense)
#paddle.static.Print(masked_lm_loss, message="print masked_lm_loss:",print_phase="forward")
#with paddle.static.device_guard('cpu'):
# num_valid = paddle.numel(masked_lm_labels_dense)
else:
masked_lm_loss = loss_fct(
prediction_scores.reshape((-1, self.vocab_size)),
masked_lm_labels.reshape((-1, )))
nsp_loss_fct = paddle.nn.loss.CrossEntropyLoss(ignore_index=-1)
next_sentence_loss = nsp_loss_fct(
seq_relationship_score.reshape([-1, 2]),
next_sentence_labels.reshape([-1]))
#paddle.static.Print(next_sentence_loss, message="print next_sentence_loss:")
total_loss = masked_lm_loss + next_sentence_loss
# Masked Language Model Accuracy
# NOTE: total_loss and mlm_acc use float32 in NV
if not self.dense_seq_output:
def func():
valid_mask = masked_lm_labels_flat != 0
num_valid_cnt = valid_mask.astype('int32').sum(dtype='float32')
mlm_labels = masked_lm_labels_flat
prediction_scores_flat = prediction_scores.reshape(
(-1, prediction_scores.shape[-1]))
mlm_predictions_scores = prediction_scores_flat
mlm_predictions = mlm_predictions_scores.argmax(axis=-1)
mlm_acc = ((mlm_predictions == mlm_labels) *
valid_mask).sum(dtype='float32') / num_valid_cnt
return total_loss, mlm_acc, num_valid_cnt
else:
mlm_labels = masked_lm_ids
dtype = masked_lm_labels_dense.dtype
if dtype == paddle.int32:
dtype = 'int32'
elif dtype == paddle.int64:
dtype = 'int64'
else:
assert False
mlm_predictions = prediction_scores.argmax(
axis=-1, dtype=dtype, keepdim=False)
assert len(mlm_predictions.shape) == 1
num_valid_cnt = num_valid
def func():
mlm_acc = paddle.cast(mlm_predictions == masked_lm_labels_dense,
'float32').mean()
return total_loss, mlm_acc, num_valid_cnt
return func
if use_nv_input:
BertPretrainingHeads.forward = BertPretrainingHeads.nv_forward
BertPretrainingCriterion.forward = BertPretrainingCriterion.nv_forward
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from paddle.optimizer.lr import LRScheduler
__all__ = ['LinearWarmupPolyDecayScheduler', ]
class LinearWarmupPolyDecayScheduler(LRScheduler):
def __init__(self,
startup_warmup_steps,
warmup_steps,
total_steps,
base_lr,
end_lr=0.0,
degree=1.0,
last_epoch=-1):
self.startup_warmup_steps = startup_warmup_steps
self.offset_step = int(startup_warmup_steps == 0)
self.warmup_steps = warmup_steps
self.total_steps = total_steps
self.base_lr = base_lr
self.end_lr = end_lr
self.degree = degree
super(LinearWarmupPolyDecayScheduler, self).__init__(
learning_rate=base_lr, last_epoch=last_epoch)
def get_lr(self):
step = self.last_epoch + 1
mod_step = step - self.offset_step - self.startup_warmup_steps
if mod_step < self.warmup_steps:
p = mod_step / (self.warmup_steps + 1e-6)
lr = self.base_lr * p
else:
p = min(1, (step - self.offset_step) / self.total_steps)
lr = (self.base_lr - self.end_lr) * (1 - p
)**self.degree + self.end_lr
return lr
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import os.path as osp
import shutil
import json
import requests
import hashlib
import tarfile
import zipfile
import time
import uuid
import threading
from collections import OrderedDict
from .env import DOWNLOAD_SERVER, SUCCESS_STATUS, FAILED_STATUS
try:
from tqdm import tqdm
except:
class tqdm(object):
def __init__(self, total=None):
self.total = total
self.n = 0
def update(self, n):
self.n += n
if self.total is None:
sys.stderr.write("\r{0:.1f} bytes".format(self.n))
else:
sys.stderr.write("\r{0:.1f}%".format(100 * self.n / float(
self.total)))
sys.stderr.flush()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stderr.write('\n')
from .log import logger
__all__ = ['get_weights_path_from_url']
COMMUNITY_MODEL_PREFIX = "https://paddlenlp.bj.bcebos.com/models/transformers/community/"
WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3
nlp_models = OrderedDict((
('RoBERTa-zh-base',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz'
),
('RoBERTa-zh-large',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz'
),
('ERNIE-v2-en-base',
'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'),
('ERNIE-v2-en-large',
'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz'),
('XLNet-cased-base',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-12_H-768_A-12.tgz'),
('XLNet-cased-large',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-24_H-1024_A-16.tgz'),
('ERNIE-v1-zh-base',
'https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz'),
('ERNIE-v1-zh-base-max-len-512',
'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz'),
('BERT-en-uncased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_cased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-uncased-base',
'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz'),
('BERT-en-uncased-large',
'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-base',
'https://bert-models.bj.bcebos.com/cased_L-12_H-768_A-12.tar.gz'),
('BERT-en-cased-large',
'https://bert-models.bj.bcebos.com/cased_L-24_H-1024_A-16.tar.gz'),
('BERT-multilingual-uncased-base',
'https://bert-models.bj.bcebos.com/multilingual_L-12_H-768_A-12.tar.gz'),
('BERT-multilingual-cased-base',
'https://bert-models.bj.bcebos.com/multi_cased_L-12_H-768_A-12.tar.gz'),
('BERT-zh-base',
'https://bert-models.bj.bcebos.com/chinese_L-12_H-768_A-12.tar.gz'), ))
def is_url(path):
"""
Whether path is URL.
Args:
path (string): URL string or not.
"""
return path.startswith('http://') or path.startswith('https://')
def get_weights_path_from_url(url, md5sum=None):
"""Get weights path from WEIGHT_HOME, if not exists,
download it from url.
Args:
url (str): download url
md5sum (str): md5 sum of download package
Returns:
str: a local path to save downloaded weights.
Examples:
.. code-block:: python
from paddle.utils.download import get_weights_path_from_url
resnet18_pretrained_weight_url = 'https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams'
local_weight_path = get_weights_path_from_url(resnet18_pretrained_weight_url)
"""
path = get_path_from_url(url, WEIGHTS_HOME, md5sum)
return path
def _map_path(url, root_dir):
# parse path after download under root_dir
fname = osp.split(url)[-1]
fpath = fname
return osp.join(root_dir, fpath)
def get_path_from_url(url, root_dir, md5sum=None, check_exist=True):
""" Download from given url to root_dir.
if file or directory specified by url is exists under
root_dir, return the path directly, otherwise download
from url and decompress it, return the path.
Args:
url (str): download url
root_dir (str): root dir for downloading, it should be
WEIGHTS_HOME or DATASET_HOME
md5sum (str): md5 sum of download package
Returns:
str: a local path to save downloaded models & weights & datasets.
"""
from paddle.fluid.dygraph.parallel import ParallelEnv
assert is_url(url), "downloading from {} not a url".format(url)
# parse path after download to decompress under root_dir
fullpath = _map_path(url, root_dir)
if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
logger.info("Found {}".format(fullpath))
else:
if ParallelEnv().local_rank % 8 == 0:
fullpath = _download(url, root_dir, md5sum)
else:
while not os.path.exists(fullpath):
time.sleep(1)
if ParallelEnv().local_rank % 8 == 0:
if tarfile.is_tarfile(fullpath) or zipfile.is_zipfile(fullpath):
fullpath = _decompress(fullpath)
return fullpath
def _download(url, path, md5sum=None):
"""
Download from url, save to path.
url (str): download url
path (str): download to given path
"""
if not osp.exists(path):
os.makedirs(path)
fname = osp.split(url)[-1]
fullname = osp.join(path, fname)
retry_cnt = 0
while not (osp.exists(fullname) and _md5check(fullname, md5sum)):
if retry_cnt < DOWNLOAD_RETRY_LIMIT:
retry_cnt += 1
else:
raise RuntimeError("Download from {} failed. "
"Retry limit reached".format(url))
logger.info("Downloading {} from {}".format(fname, url))
req = requests.get(url, stream=True)
if req.status_code != 200:
raise RuntimeError("Downloading from {} failed with code "
"{}!".format(url, req.status_code))
# For protecting download interupted, download to
# tmp_fullname firstly, move tmp_fullname to fullname
# after download finished
tmp_fullname = fullname + "_tmp"
total_size = req.headers.get('content-length')
with open(tmp_fullname, 'wb') as f:
if total_size:
with tqdm(total=(int(total_size) + 1023) // 1024) as pbar:
for chunk in req.iter_content(chunk_size=1024):
f.write(chunk)
pbar.update(1)
else:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
shutil.move(tmp_fullname, fullname)
return fullname
def _md5check(fullname, md5sum=None):
if md5sum is None:
return True
logger.info("File {} md5 checking...".format(fullname))
md5 = hashlib.md5()
with open(fullname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
calc_md5sum = md5.hexdigest()
if calc_md5sum != md5sum:
logger.info("File {} md5 check failed, {}(calc) != "
"{}(base)".format(fullname, calc_md5sum, md5sum))
return False
return True
def _md5(text):
"""
Calculate the md5 value of the input text.
"""
md5code = hashlib.md5(text.encode())
return md5code.hexdigest()
def _decompress(fname):
"""
Decompress for zip and tar file
"""
logger.info("Decompressing {}...".format(fname))
# For protecting decompressing interupted,
# decompress to fpath_tmp directory firstly, if decompress
# successed, move decompress files to fpath and delete
# fpath_tmp and remove download compress file.
if tarfile.is_tarfile(fname):
uncompressed_path = _uncompress_file_tar(fname)
elif zipfile.is_zipfile(fname):
uncompressed_path = _uncompress_file_zip(fname)
else:
raise TypeError("Unsupport compress file type {}".format(fname))
return uncompressed_path
def _uncompress_file_zip(filepath):
files = zipfile.ZipFile(filepath, 'r')
file_list = files.namelist()
file_dir = os.path.dirname(filepath)
if _is_a_single_file(file_list):
rootpath = file_list[0]
uncompressed_path = os.path.join(file_dir, rootpath)
for item in file_list:
files.extract(item, file_dir)
elif _is_a_single_dir(file_list):
rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1]
uncompressed_path = os.path.join(file_dir, rootpath)
for item in file_list:
files.extract(item, file_dir)
else:
rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1]
uncompressed_path = os.path.join(file_dir, rootpath)
if not os.path.exists(uncompressed_path):
os.makedirs(uncompressed_path)
for item in file_list:
files.extract(item, os.path.join(file_dir, rootpath))
files.close()
return uncompressed_path
def _uncompress_file_tar(filepath, mode="r:*"):
files = tarfile.open(filepath, mode)
file_list = files.getnames()
file_dir = os.path.dirname(filepath)
if _is_a_single_file(file_list):
rootpath = file_list[0]
uncompressed_path = os.path.join(file_dir, rootpath)
files.extractall(file_dir, files.getmembers())
elif _is_a_single_dir(file_list):
rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1]
uncompressed_path = os.path.join(file_dir, rootpath)
files.extractall(file_dir, files.getmembers())
else:
rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1]
uncompressed_path = os.path.join(file_dir, rootpath)
if not os.path.exists(uncompressed_path):
os.makedirs(uncompressed_path)
files.extractall(os.path.join(file_dir, rootpath), files.getmembers())
files.close()
return uncompressed_path
def _is_a_single_file(file_list):
if len(file_list) == 1 and file_list[0].find(os.sep) < -1:
return True
return False
def _is_a_single_dir(file_list):
new_file_list = []
for file_path in file_list:
if '/' in file_path:
file_path = file_path.replace('/', os.sep)
elif '\\' in file_path:
file_path = file_path.replace('\\', os.sep)
new_file_list.append(file_path)
file_name = new_file_list[0].split(os.sep)[0]
for i in range(1, len(new_file_list)):
if file_name != new_file_list[i].split(os.sep)[0]:
return False
return True
class DownloaderCheck(threading.Thread):
"""
Check the resource applicability when downloading the models.
"""
def __init__(self, task, command="taskflow", addition=None):
threading.Thread.__init__(self)
self.command = command
self.task = task
self.addition = addition
self.hash_flag = _md5(str(uuid.uuid1())[9:18]) + "-" + str(
int(time.time()))
def uri_path(self, server_url, api):
srv = server_url
if server_url.endswith('/'):
srv = server_url[:-1]
if api.startswith('/'):
srv += api
else:
api = '/' + api
srv += api
return srv
def request_check(self, task, command, addition):
if task is None:
return SUCCESS_STATUS
payload = {'word': self.task}
api_url = self.uri_path(DOWNLOAD_SERVER, 'search')
cache_path = os.path.join("~")
if os.path.exists(cache_path):
extra = {
"command": self.command,
"mtime": os.stat(cache_path).st_mtime,
"hub_name": self.hash_flag
}
else:
extra = {
"command": self.command,
"mtime": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
"hub_name": self.hash_flag
}
if addition is not None:
extra.update({"addition": addition})
try:
import paddle
payload['hub_version'] = " "
payload['paddle_version'] = paddle.__version__.split('-')[0]
payload['extra'] = json.dumps(extra)
r = requests.get(api_url, payload, timeout=1).json()
if r.get("update_cache", 0) == 1:
return SUCCESS_STATUS
else:
return FAILED_STATUS
except Exception as err:
return FAILED_STATUS
def run(self):
self.request_check(self.task, self.command, self.addition)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
This module is used to store environmental variables in PaddleNLP.
PPNLP_HOME --> the root directory for storing PaddleNLP related data. Default to ~/.paddlenlp. Users can change the
├ default value through the PPNLP_HOME environment variable.
├─ MODEL_HOME --> Store model files.
└─ DATA_HOME --> Store automatically downloaded datasets.
'''
import os
def _get_user_home():
return os.path.expanduser('~')
def _get_ppnlp_home():
if 'PPNLP_HOME' in os.environ:
home_path = os.environ['PPNLP_HOME']
if os.path.exists(home_path):
if os.path.isdir(home_path):
return home_path
else:
raise RuntimeError(
'The environment variable PPNLP_HOME {} is not a directory.'.
format(home_path))
else:
return home_path
return os.path.join(_get_user_home(), '.paddlenlp')
def _get_sub_home(directory, parent_home=_get_ppnlp_home()):
home = os.path.join(parent_home, directory)
if not os.path.exists(home):
os.makedirs(home)
return home
USER_HOME = _get_user_home()
PPNLP_HOME = _get_ppnlp_home()
MODEL_HOME = _get_sub_home('models')
DATA_HOME = _get_sub_home('datasets')
DOWNLOAD_SERVER = "http://paddlepaddle.org.cn/paddlehub"
FAILED_STATUS = -1
SUCCESS_STATUS = 0
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import numpy as np
def static_params_to_dygraph(model, static_tensor_dict):
"""Simple tool for convert static paramters to dygraph paramters dict.
**NOTE** The model must both support static graph and dygraph mode.
Args:
model (nn.Layer): the model of a neural network.
static_tensor_dict (string): path of which locate the saved paramters in static mode.
Usualy load by `paddle.static.load_program_state`.
Returns:
[tensor dict]: a state dict the same as the dygraph mode.
"""
state_dict = model.state_dict()
# static_tensor_dict = paddle.static.load_program_state(static_params_path)
ret_dict = dict()
for n, p in state_dict.items():
ret_dict[n] = static_tensor_dict[p.name]
return ret_dict
def dygraph_params_to_static(model, dygraph_tensor_dict, topo=None):
"""Simple tool for convert dygraph paramters to static paramters dict.
**NOTE** The model must both support static graph and dygraph mode.
Args:
model (nn.Layer): the model of a neural network.
dygraph_tensor_dict (string): path of which locate the saved paramters in static mode.
Returns:
[tensor dict]: a state dict the same as the dygraph mode.
"""
state_dict = model.state_dict()
ret_dict = dict()
for name, parm in state_dict.items():
if name not in dygraph_tensor_dict:
print("Miss \t\t", name)
continue
tensor = dygraph_tensor_dict[name]
if parm.is_distributed:
assert topo is not None
for dim, v in enumerate(tensor.shape):
if parm.shape[dim] != v:
break
splited = np.split(
tensor, topo.mp_info.size, axis=dim)[topo.mp_info.rank]
ret_dict[parm.name] = splited
else:
ret_dict[parm.name] = tensor
return ret_dict
class TimeCostAverage(object):
"""
Simple tool for calcluating time average cost in the process of training and inferencing.
"""
def __init__(self):
self.reset()
def reset(self):
"""
Reset the recoder state, and reset the `cnt` to zero.
"""
self.cnt = 0
self.total_time = 0
def record(self, usetime):
"""
Recoding the time cost in current step and accumulating the `cnt`.
"""
self.cnt += 1
self.total_time += usetime
def get_average(self):
"""
Returning the average time cost after the start of training.
"""
if self.cnt == 0:
return 0
return self.total_time / self.cnt
def get_env_device():
"""
Return the device name of running enviroment.
"""
if paddle.is_compiled_with_cuda():
return 'gpu'
elif paddle.is_compiled_with_npu():
return 'npu'
elif paddle.is_compiled_with_rocm():
return 'rocm'
elif paddle.is_compiled_with_xpu():
return 'xpu'
return 'cpu'
# Copyright (c) 2019-2021 NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from argparse import ArgumentParser, REMAINDER
def parse_args():
"""
Helper function parsing the command line options
@retval ArgumentParser
"""
parser = ArgumentParser(
description="The script to print PaddlePaddle CPU binding cmd.")
# Optional arguments for the launch helper
parser.add_argument(
"--nnodes",
type=int,
default=1,
help="The number of nodes to use for distributed "
"training")
parser.add_argument(
"--node_rank",
type=int,
default=0,
help="The rank of the node for multi-node distributed "
"training")
parser.add_argument(
"--local_rank", type=int, default=0, help="The local rank.")
parser.add_argument(
"--nproc_per_node",
type=int,
default=1,
help="The number of processes to launch on each node, "
"for GPU training, this is recommended to be set "
"to the number of GPUs in your system so that "
"each process can be bound to a single GPU.")
parser.add_argument(
'--no_hyperthreads',
action='store_true',
help='Flag to disable binding to hyperthreads')
parser.add_argument(
'--no_membind',
action='store_true',
help='Flag to disable memory binding')
# non-optional arguments for binding
parser.add_argument(
"--nsockets_per_node",
type=int,
required=True,
help="Number of CPU sockets on a node")
parser.add_argument(
"--ncores_per_socket",
type=int,
required=True,
help="Number of CPU cores per socket")
return parser.parse_args()
def main():
args = parse_args()
# variables for numactrl binding
NSOCKETS = args.nsockets_per_node
NGPUS_PER_SOCKET = (args.nproc_per_node // args.nsockets_per_node) + (1 if (
args.nproc_per_node % args.nsockets_per_node) else 0)
NCORES_PER_GPU = args.ncores_per_socket // NGPUS_PER_SOCKET
# world size in terms of number of processes
dist_world_size = args.nproc_per_node * args.nnodes
local_rank = args.local_rank
# each process's rank
dist_rank = args.nproc_per_node * args.node_rank + local_rank
# form numactrl binding command
cpu_ranges = [
local_rank * NCORES_PER_GPU, (local_rank + 1) * NCORES_PER_GPU - 1,
local_rank * NCORES_PER_GPU +
(NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS),
(local_rank + 1) * NCORES_PER_GPU +
(NCORES_PER_GPU * NGPUS_PER_SOCKET * NSOCKETS) - 1
]
numactlargs = []
if args.no_hyperthreads:
numactlargs += ["--physcpubind={}-{}".format(*cpu_ranges[0:2])]
else:
numactlargs += ["--physcpubind={}-{},{}-{}".format(*cpu_ranges)]
if not args.no_membind:
memnode = local_rank // NGPUS_PER_SOCKET
numactlargs += ["--membind={}".format(memnode)]
cmd = ["/usr/bin/numactl"] + numactlargs
print(" ".join(cmd))
if __name__ == "__main__":
main()
cmake_minimum_required(VERSION 3.4...3.18)
project(functions LANGUAGES CXX)
add_subdirectory(pybind11)
include_directories(/public/home/zhangqha/.conda/envs/hhenv/include/python3.6m)
include_directories(/public/home/zhangqha/.conda/envs/hhenv/lib/python3.6/site-packages/numpy/core/include)
include_directories(/public/home/zhangqha/dtk-21.04/hipcub/include)
include_directories(/public/home/zhangqha/dtk-21.04/hiprand/include)
include_directories(/public/home/zhangqha/dtk-21.04/hipsparse/include)
include_directories(/public/home/zhangqha/dtk-21.04/include)
include_directories(/public/home/zhangqha/dtk-21.04/miopen/include)
include_directories(/public/home/zhangqha/dtk-21.04/rccl/include)
include_directories(/public/home/zhangqha/dtk-21.04/rocblas/include)
include_directories(/public/home/zhangqha/dtk-21.04/rocfft/include)
include_directories(/public/home/zhangqha/dtk-21.04/rocprim/include)
include_directories(/public/home/zhangqha/dtk-21.04/rocrand/include)
include_directories(/public/home/zhangqha/dtk-21.04/rocsparse/include)
include_directories(/public/home/zhangqha/dtk-21.04/rocthrust/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/paddle/fluid/platform)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/dlpack/src/extern_dlpack/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/eigen3/src/extern_eigen3)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/brpc/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/gflags/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/glog/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/gloo/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/gtest/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/leveldb/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/libmct/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/mklml/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/mklml/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/protobuf/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/rocksdb/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/snappy/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/utf8proc/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/warpctc/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/xbyak/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/xbyak/include/xbyak)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/xxhash/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/xxhash/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/install/zlib/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/pocketfft/src)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/pybind/src/extern_pybind/include)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/build/third_party/threadpool/src/extern_threadpool)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/paddle/fluid/framework/io)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/paddle/fluid/platform)
include_directories(/public/home/zhangqha/for_baidu/Paddle-develop/patches/thrust)
add_definitions(-DPADDLE_WITH_RCCL)
add_definitions(-DEIGEN_USE_HIP)
add_definitions(-DTHRUST_IGNORE_CUB_VERSION_CHECK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -D_GLIBCXX_USE_CXX11_ABI=1 -fPIC -D__HIP_PLATFORM_HCC__=1 ")
set(extension_name "functions")
add_definitions("-DMLPERF_EXTENSION_NAME=${extension_name}")
pybind11_add_module(${extension_name} functions.cc)
target_link_libraries(${extension_name} PRIVATE /usr/local/lib/python3.6/site-packages/paddle/fluid/core_avx.so)
# How to compile
```shell
export COMPILE_DIR=<PaddlePaddle build directory>
python compile.py
```
# This is the CMakeCache file.
# For build in directory: /public/home/zhangqha/bert/pybind/build
# It was generated by CMake: /opt/cmake/bin/cmake
# You can edit this file to change values found and used by cmake.
# If you do not want to change any of the values, simply exit the editor.
# If you do want to change a value, simply edit, save, and exit the editor.
# The syntax for the file is as follows:
# KEY:TYPE=VALUE
# KEY is the name of a variable in the cache.
# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
# VALUE is the current value for the KEY.
########################
# EXTERNAL cache entries
########################
//Path to a program.
CMAKE_ADDR2LINE:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/addr2line
//Path to a program.
CMAKE_AR:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/ar
//Choose the type of build, options are: None Debug Release RelWithDebInfo
// MinSizeRel ...
CMAKE_BUILD_TYPE:STRING=Release
//Enable/Disable color output during build.
CMAKE_COLOR_MAKEFILE:BOOL=ON
//CXX compiler
CMAKE_CXX_COMPILER:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/c++
//A wrapper around 'ar' adding the appropriate '--plugin' option
// for the GCC compiler
CMAKE_CXX_COMPILER_AR:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/gcc-ar
//A wrapper around 'ranlib' adding the appropriate '--plugin' option
// for the GCC compiler
CMAKE_CXX_COMPILER_RANLIB:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/gcc-ranlib
//Flags used by the CXX compiler during all build types.
CMAKE_CXX_FLAGS:STRING=
//Flags used by the CXX compiler during DEBUG builds.
CMAKE_CXX_FLAGS_DEBUG:STRING=-g
//Flags used by the CXX compiler during MINSIZEREL builds.
CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
//Flags used by the CXX compiler during RELEASE builds.
CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
//Flags used by the CXX compiler during RELWITHDEBINFO builds.
CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG
//Path to a program.
CMAKE_DLLTOOL:FILEPATH=CMAKE_DLLTOOL-NOTFOUND
//Flags used by the linker during all build types.
CMAKE_EXE_LINKER_FLAGS:STRING=
//Flags used by the linker during DEBUG builds.
CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=
//Flags used by the linker during MINSIZEREL builds.
CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=
//Flags used by the linker during RELEASE builds.
CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=
//Flags used by the linker during RELWITHDEBINFO builds.
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
//Enable/Disable output of compile commands during generation.
CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=OFF
//User executables (bin)
CMAKE_INSTALL_BINDIR:PATH=bin
//Read-only architecture-independent data (DATAROOTDIR)
CMAKE_INSTALL_DATADIR:PATH=
//Read-only architecture-independent data root (share)
CMAKE_INSTALL_DATAROOTDIR:PATH=share
//Documentation root (DATAROOTDIR/doc/PROJECT_NAME)
CMAKE_INSTALL_DOCDIR:PATH=
//C header files (include)
CMAKE_INSTALL_INCLUDEDIR:PATH=include
//Info documentation (DATAROOTDIR/info)
CMAKE_INSTALL_INFODIR:PATH=
//Object code libraries (lib64)
CMAKE_INSTALL_LIBDIR:PATH=lib64
//Program executables (libexec)
CMAKE_INSTALL_LIBEXECDIR:PATH=libexec
//Locale-dependent data (DATAROOTDIR/locale)
CMAKE_INSTALL_LOCALEDIR:PATH=
//Modifiable single-machine data (var)
CMAKE_INSTALL_LOCALSTATEDIR:PATH=var
//Man documentation (DATAROOTDIR/man)
CMAKE_INSTALL_MANDIR:PATH=
//C header files for non-gcc (/usr/include)
CMAKE_INSTALL_OLDINCLUDEDIR:PATH=/usr/include
//Install path prefix, prepended onto install directories.
CMAKE_INSTALL_PREFIX:PATH=/usr/local
//Run-time variable data (LOCALSTATEDIR/run)
CMAKE_INSTALL_RUNSTATEDIR:PATH=
//System admin executables (sbin)
CMAKE_INSTALL_SBINDIR:PATH=sbin
//Modifiable architecture-independent data (com)
CMAKE_INSTALL_SHAREDSTATEDIR:PATH=com
//Read-only single-machine data (etc)
CMAKE_INSTALL_SYSCONFDIR:PATH=etc
//Path to a program.
CMAKE_LINKER:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/ld
//Path to a program.
CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake
//Flags used by the linker during the creation of modules during
// all build types.
CMAKE_MODULE_LINKER_FLAGS:STRING=
//Flags used by the linker during the creation of modules during
// DEBUG builds.
CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=
//Flags used by the linker during the creation of modules during
// MINSIZEREL builds.
CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=
//Flags used by the linker during the creation of modules during
// RELEASE builds.
CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=
//Flags used by the linker during the creation of modules during
// RELWITHDEBINFO builds.
CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
//Path to a program.
CMAKE_NM:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/nm
//Path to a program.
CMAKE_OBJCOPY:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/objcopy
//Path to a program.
CMAKE_OBJDUMP:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/objdump
//Value Computed by CMake
CMAKE_PROJECT_DESCRIPTION:STATIC=
//Value Computed by CMake
CMAKE_PROJECT_HOMEPAGE_URL:STATIC=
//Value Computed by CMake
CMAKE_PROJECT_NAME:STATIC=functions
//Path to a program.
CMAKE_RANLIB:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/ranlib
//Path to a program.
CMAKE_READELF:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/readelf
//Flags used by the linker during the creation of shared libraries
// during all build types.
CMAKE_SHARED_LINKER_FLAGS:STRING=
//Flags used by the linker during the creation of shared libraries
// during DEBUG builds.
CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=
//Flags used by the linker during the creation of shared libraries
// during MINSIZEREL builds.
CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=
//Flags used by the linker during the creation of shared libraries
// during RELEASE builds.
CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=
//Flags used by the linker during the creation of shared libraries
// during RELWITHDEBINFO builds.
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=
//If set, runtime paths are not added when installing shared libraries,
// but are added when building.
CMAKE_SKIP_INSTALL_RPATH:BOOL=NO
//If set, runtime paths are not added when using shared libraries.
CMAKE_SKIP_RPATH:BOOL=NO
//Flags used by the linker during the creation of static libraries
// during all build types.
CMAKE_STATIC_LINKER_FLAGS:STRING=
//Flags used by the linker during the creation of static libraries
// during DEBUG builds.
CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING=
//Flags used by the linker during the creation of static libraries
// during MINSIZEREL builds.
CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING=
//Flags used by the linker during the creation of static libraries
// during RELEASE builds.
CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING=
//Flags used by the linker during the creation of static libraries
// during RELWITHDEBINFO builds.
CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING=
//Path to a program.
CMAKE_STRIP:FILEPATH=/opt/rh/devtoolset-7/root/usr/bin/strip
//If this value is on, makefiles will be generated without the
// .SILENT directive, and all commands will be echoed to the console
// during the make. This is useful for debugging only. With Visual
// Studio IDE projects all commands are done without /nologo.
CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE
//C++ standard flag, e.g. -std=c++11, -std=c++14, /std:c++14.
// Defaults to C++14 mode.
PYBIND11_CPP_STANDARD:STRING=-std=c++14
//Install pybind11 header files?
PYBIND11_INSTALL:BOOL=OFF
//No help, variable specified on the command line.
PYBIND11_PYTHON_VERSION:UNINITIALIZED=3.6
//Build pybind11 test suite?
PYBIND11_TEST:BOOL=OFF
//Path to a program.
PYTHON_EXECUTABLE:FILEPATH=/usr/local/bin/python3.6
//Path to a library.
PYTHON_LIBRARY:FILEPATH=/usr/local/lib/libpython3.6m.so
//Install pybind11 headers in Python include directory instead
// of default installation prefix
USE_PYTHON_INCLUDE_DIR:BOOL=OFF
//Value Computed by CMake
functions_BINARY_DIR:STATIC=/public/home/zhangqha/bert/pybind/build
//Dependencies for the target
functions_LIB_DEPENDS:STATIC=general;/usr/local/lib/python3.6/site-packages/paddle/fluid/core_avx.so;
//Value Computed by CMake
functions_SOURCE_DIR:STATIC=/public/home/zhangqha/bert/pybind
//Value Computed by CMake
pybind11_BINARY_DIR:STATIC=/public/home/zhangqha/bert/pybind/build/pybind11
//Value Computed by CMake
pybind11_SOURCE_DIR:STATIC=/public/home/zhangqha/bert/pybind/pybind11
########################
# INTERNAL cache entries
########################
//ADVANCED property for variable: CMAKE_ADDR2LINE
CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_AR
CMAKE_AR-ADVANCED:INTERNAL=1
//This is the directory where this CMakeCache.txt was created
CMAKE_CACHEFILE_DIR:INTERNAL=/public/home/zhangqha/bert/pybind/build
//Major version of cmake used to create the current loaded cache
CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
//Minor version of cmake used to create the current loaded cache
CMAKE_CACHE_MINOR_VERSION:INTERNAL=16
//Patch version of cmake used to create the current loaded cache
CMAKE_CACHE_PATCH_VERSION:INTERNAL=3
//ADVANCED property for variable: CMAKE_COLOR_MAKEFILE
CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1
//Path to CMake executable.
CMAKE_COMMAND:INTERNAL=/opt/cmake/bin/cmake
//Path to cpack program executable.
CMAKE_CPACK_COMMAND:INTERNAL=/opt/cmake/bin/cpack
//Path to ctest program executable.
CMAKE_CTEST_COMMAND:INTERNAL=/opt/cmake/bin/ctest
//ADVANCED property for variable: CMAKE_CXX_COMPILER
CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_COMPILER_AR
CMAKE_CXX_COMPILER_AR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_COMPILER_RANLIB
CMAKE_CXX_COMPILER_RANLIB-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_DLLTOOL
CMAKE_DLLTOOL-ADVANCED:INTERNAL=1
//Path to cache edit program executable.
CMAKE_EDIT_COMMAND:INTERNAL=/opt/cmake/bin/ccmake
//Executable file format
CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG
CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL
CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE
CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS
CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1
//Name of external makefile project generator.
CMAKE_EXTRA_GENERATOR:INTERNAL=
//Name of generator.
CMAKE_GENERATOR:INTERNAL=Unix Makefiles
//Generator instance identifier.
CMAKE_GENERATOR_INSTANCE:INTERNAL=
//Name of generator platform.
CMAKE_GENERATOR_PLATFORM:INTERNAL=
//Name of generator toolset.
CMAKE_GENERATOR_TOOLSET:INTERNAL=
//Source directory with the top level CMakeLists.txt file for this
// project
CMAKE_HOME_DIRECTORY:INTERNAL=/public/home/zhangqha/bert/pybind
//ADVANCED property for variable: CMAKE_INSTALL_BINDIR
CMAKE_INSTALL_BINDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_DATADIR
CMAKE_INSTALL_DATADIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_DATAROOTDIR
CMAKE_INSTALL_DATAROOTDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_DOCDIR
CMAKE_INSTALL_DOCDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_INCLUDEDIR
CMAKE_INSTALL_INCLUDEDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_INFODIR
CMAKE_INSTALL_INFODIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_LIBDIR
CMAKE_INSTALL_LIBDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_LIBEXECDIR
CMAKE_INSTALL_LIBEXECDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_LOCALEDIR
CMAKE_INSTALL_LOCALEDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_LOCALSTATEDIR
CMAKE_INSTALL_LOCALSTATEDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_MANDIR
CMAKE_INSTALL_MANDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_OLDINCLUDEDIR
CMAKE_INSTALL_OLDINCLUDEDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_RUNSTATEDIR
CMAKE_INSTALL_RUNSTATEDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_SBINDIR
CMAKE_INSTALL_SBINDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_INSTALL_SHAREDSTATEDIR
CMAKE_INSTALL_SHAREDSTATEDIR-ADVANCED:INTERNAL=1
//Install .so files without execute permission.
CMAKE_INSTALL_SO_NO_EXE:INTERNAL=0
//ADVANCED property for variable: CMAKE_INSTALL_SYSCONFDIR
CMAKE_INSTALL_SYSCONFDIR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_LINKER
CMAKE_LINKER-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_MAKE_PROGRAM
CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS
CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG
CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL
CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE
CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_NM
CMAKE_NM-ADVANCED:INTERNAL=1
//number of local generators
CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=2
//ADVANCED property for variable: CMAKE_OBJCOPY
CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_OBJDUMP
CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
//Platform information initialized
CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1
//ADVANCED property for variable: CMAKE_RANLIB
CMAKE_RANLIB-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_READELF
CMAKE_READELF-ADVANCED:INTERNAL=1
//Path to CMake installation.
CMAKE_ROOT:INTERNAL=/opt/cmake/share/cmake-3.16
//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG
CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL
CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE
CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH
CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_SKIP_RPATH
CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS
CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG
CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL
CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE
CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_STRIP
CMAKE_STRIP-ADVANCED:INTERNAL=1
//uname command
CMAKE_UNAME:INTERNAL=/usr/bin/uname
//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE
CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1
//Details about finding PYTHON
FIND_PACKAGE_MESSAGE_DETAILS_PYTHON:INTERNAL=/usr/local/bin/python3.6
//Details about finding PythonInterp
FIND_PACKAGE_MESSAGE_DETAILS_PythonInterp:INTERNAL=[/usr/local/bin/python3.6][v3.6.8(3.6)]
//Test HAS_CPP14_FLAG
HAS_CPP14_FLAG:INTERNAL=1
//Test HAS_FLTO
HAS_FLTO:INTERNAL=1
PYBIND11_INCLUDE_DIR:INTERNAL=/public/home/zhangqha/bert/pybind/pybind11/include
PYBIND11_LTO_CXX_FLAGS:INTERNAL=-flto;-fno-fat-lto-objects
PYBIND11_LTO_LINKER_FLAGS:INTERNAL=-flto
PYBIND11_VERSION_MAJOR:INTERNAL=2
PYBIND11_VERSION_MINOR:INTERNAL=4
PYBIND11_VERSION_PATCH:INTERNAL=3
//ADVANCED property for variable: PYTHON_EXECUTABLE
PYTHON_EXECUTABLE-ADVANCED:INTERNAL=1
PYTHON_INCLUDE_DIRS:INTERNAL=/usr/local/include/python3.6m
PYTHON_LIBRARIES:INTERNAL=/usr/local/lib/libpython3.6m.so
//ADVANCED property for variable: PYTHON_LIBRARY
PYTHON_LIBRARY-ADVANCED:INTERNAL=1
PYTHON_MODULE_EXTENSION:INTERNAL=.cpython-36m-x86_64-linux-gnu.so
PYTHON_MODULE_PREFIX:INTERNAL=
PYTHON_VERSION_MAJOR:INTERNAL=3
PYTHON_VERSION_MINOR:INTERNAL=6
//CMAKE_INSTALL_PREFIX during last run
_GNUInstallDirs_LAST_CMAKE_INSTALL_PREFIX:INTERNAL=/usr/local
set(CMAKE_CXX_COMPILER "/opt/rh/devtoolset-7/root/usr/bin/c++")
set(CMAKE_CXX_COMPILER_ARG1 "")
set(CMAKE_CXX_COMPILER_ID "GNU")
set(CMAKE_CXX_COMPILER_VERSION "7.3.1")
set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
set(CMAKE_CXX_COMPILER_WRAPPER "")
set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "14")
set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17")
set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
set(CMAKE_CXX20_COMPILE_FEATURES "")
set(CMAKE_CXX_PLATFORM_ID "Linux")
set(CMAKE_CXX_SIMULATE_ID "")
set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "")
set(CMAKE_CXX_SIMULATE_VERSION "")
set(CMAKE_AR "/opt/rh/devtoolset-7/root/usr/bin/ar")
set(CMAKE_CXX_COMPILER_AR "/opt/rh/devtoolset-7/root/usr/bin/gcc-ar")
set(CMAKE_RANLIB "/opt/rh/devtoolset-7/root/usr/bin/ranlib")
set(CMAKE_CXX_COMPILER_RANLIB "/opt/rh/devtoolset-7/root/usr/bin/gcc-ranlib")
set(CMAKE_LINKER "/opt/rh/devtoolset-7/root/usr/bin/ld")
set(CMAKE_MT "")
set(CMAKE_COMPILER_IS_GNUCXX 1)
set(CMAKE_CXX_COMPILER_LOADED 1)
set(CMAKE_CXX_COMPILER_WORKS TRUE)
set(CMAKE_CXX_ABI_COMPILED TRUE)
set(CMAKE_COMPILER_IS_MINGW )
set(CMAKE_COMPILER_IS_CYGWIN )
if(CMAKE_COMPILER_IS_CYGWIN)
set(CYGWIN 1)
set(UNIX 1)
endif()
set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
if(CMAKE_COMPILER_IS_MINGW)
set(MINGW 1)
endif()
set(CMAKE_CXX_COMPILER_ID_RUN 1)
set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;CPP)
set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
foreach (lang C OBJC OBJCXX)
if (CMAKE_${lang}_COMPILER_ID_RUN)
foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
endforeach()
endif()
endforeach()
set(CMAKE_CXX_LINKER_PREFERENCE 30)
set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
# Save compiler ABI information.
set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
set(CMAKE_CXX_COMPILER_ABI "ELF")
set(CMAKE_CXX_LIBRARY_ARCHITECTURE "")
if(CMAKE_CXX_SIZEOF_DATA_PTR)
set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
endif()
if(CMAKE_CXX_COMPILER_ABI)
set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
endif()
if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
set(CMAKE_LIBRARY_ARCHITECTURE "")
endif()
set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
endif()
set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/opt/dtk/include;/opt/dtk/llvm/include;/opt/rh/devtoolset-7/root/usr/include/c++/7;/opt/rh/devtoolset-7/root/usr/include/c++/7/x86_64-redhat-linux;/opt/rh/devtoolset-7/root/usr/include/c++/7/backward;/opt/rh/devtoolset-7/root/usr/lib/gcc/x86_64-redhat-linux/7/include;/usr/local/include;/opt/rh/devtoolset-7/root/usr/include;/usr/include")
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/opt/rh/devtoolset-7/root/usr/lib/gcc/x86_64-redhat-linux/7;/opt/rh/devtoolset-7/root/usr/lib64;/lib64;/usr/lib64;/opt/rh/devtoolset-7/root/usr/lib")
set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
set(CMAKE_HOST_SYSTEM "Linux-3.10.0-957.el7.x86_64")
set(CMAKE_HOST_SYSTEM_NAME "Linux")
set(CMAKE_HOST_SYSTEM_VERSION "3.10.0-957.el7.x86_64")
set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
set(CMAKE_SYSTEM "Linux-3.10.0-957.el7.x86_64")
set(CMAKE_SYSTEM_NAME "Linux")
set(CMAKE_SYSTEM_VERSION "3.10.0-957.el7.x86_64")
set(CMAKE_SYSTEM_PROCESSOR "x86_64")
set(CMAKE_CROSSCOMPILING "FALSE")
set(CMAKE_SYSTEM_LOADED 1)
/* This source file must have a .cpp extension so that all C++ compilers
recognize the extension without flags. Borland does not know .cxx for
example. */
#ifndef __cplusplus
# error "A C compiler has been selected for C++."
#endif
/* Version number components: V=Version, R=Revision, P=Patch
Version date components: YYYY=Year, MM=Month, DD=Day */
#if defined(__COMO__)
# define COMPILER_ID "Comeau"
/* __COMO_VERSION__ = VRR */
# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100)
# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100)
#elif defined(__INTEL_COMPILER) || defined(__ICC)
# define COMPILER_ID "Intel"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# endif
# if defined(__GNUC__)
# define SIMULATE_ID "GNU"
# endif
/* __INTEL_COMPILER = VRP */
# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
# if defined(__INTEL_COMPILER_UPDATE)
# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
# else
# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10)
# endif
# if defined(__INTEL_COMPILER_BUILD_DATE)
/* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
# endif
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
# if defined(__GNUC__)
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
# elif defined(__GNUG__)
# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
# endif
# if defined(__GNUC_MINOR__)
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
# endif
# if defined(__GNUC_PATCHLEVEL__)
# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
#elif defined(__PATHCC__)
# define COMPILER_ID "PathScale"
# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
# if defined(__PATHCC_PATCHLEVEL__)
# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
# endif
#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
# define COMPILER_ID "Embarcadero"
# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF)
#elif defined(__BORLANDC__)
# define COMPILER_ID "Borland"
/* __BORLANDC__ = 0xVRR */
# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
# define COMPILER_ID "Watcom"
/* __WATCOMC__ = VVRR */
# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
# if (__WATCOMC__ % 10) > 0
# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
# endif
#elif defined(__WATCOMC__)
# define COMPILER_ID "OpenWatcom"
/* __WATCOMC__ = VVRP + 1100 */
# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
# if (__WATCOMC__ % 10) > 0
# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
# endif
#elif defined(__SUNPRO_CC)
# define COMPILER_ID "SunPro"
# if __SUNPRO_CC >= 0x5100
/* __SUNPRO_CC = 0xVRRP */
# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
# else
/* __SUNPRO_CC = 0xVRP */
# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
# endif
#elif defined(__HP_aCC)
# define COMPILER_ID "HP"
/* __HP_aCC = VVRRPP */
# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100)
#elif defined(__DECCXX)
# define COMPILER_ID "Compaq"
/* __DECCXX_VER = VVRRTPPPP */
# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100)
# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000)
#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
# define COMPILER_ID "zOS"
/* __IBMCPP__ = VRP */
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__ibmxl__) && defined(__clang__)
# define COMPILER_ID "XLClang"
# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
# define COMPILER_ID "XL"
/* __IBMCPP__ = VRP */
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
# define COMPILER_ID "VisualAge"
/* __IBMCPP__ = VRP */
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__PGI)
# define COMPILER_ID "PGI"
# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
# if defined(__PGIC_PATCHLEVEL__)
# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
# endif
#elif defined(_CRAYC)
# define COMPILER_ID "Cray"
# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
#elif defined(__TI_COMPILER_VERSION__)
# define COMPILER_ID "TI"
/* __TI_COMPILER_VERSION__ = VVVRRRPPP */
# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000)
# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000)
#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version)
# define COMPILER_ID "Fujitsu"
#elif defined(__ghs__)
# define COMPILER_ID "GHS"
/* __GHS_VERSION_NUMBER = VVVVRP */
# ifdef __GHS_VERSION_NUMBER
# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10)
# endif
#elif defined(__SCO_VERSION__)
# define COMPILER_ID "SCO"
#elif defined(__ARMCC_VERSION) && !defined(__clang__)
# define COMPILER_ID "ARMCC"
#if __ARMCC_VERSION >= 1000000
/* __ARMCC_VERSION = VRRPPPP */
# define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
# define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
# define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
#else
/* __ARMCC_VERSION = VRPPPP */
# define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
# define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
# define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
#endif
#elif defined(__clang__) && defined(__apple_build_version__)
# define COMPILER_ID "AppleClang"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# endif
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
# define COMPILER_ID "ARMClang"
# define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
# define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
# define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000)
# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
#elif defined(__clang__)
# define COMPILER_ID "Clang"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# endif
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
#elif defined(__GNUC__) || defined(__GNUG__)
# define COMPILER_ID "GNU"
# if defined(__GNUC__)
# define COMPILER_VERSION_MAJOR DEC(__GNUC__)
# else
# define COMPILER_VERSION_MAJOR DEC(__GNUG__)
# endif
# if defined(__GNUC_MINOR__)
# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
# endif
# if defined(__GNUC_PATCHLEVEL__)
# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
#elif defined(_MSC_VER)
# define COMPILER_ID "MSVC"
/* _MSC_VER = VVRR */
# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
# if defined(_MSC_FULL_VER)
# if _MSC_VER >= 1400
/* _MSC_FULL_VER = VVRRPPPPP */
# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
# else
/* _MSC_FULL_VER = VVRRPPPP */
# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
# endif
# endif
# if defined(_MSC_BUILD)
# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
# endif
#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
# define COMPILER_ID "ADSP"
#if defined(__VISUALDSPVERSION__)
/* __VISUALDSPVERSION__ = 0xVVRRPP00 */
# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24)
# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF)
# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF)
#endif
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
# define COMPILER_ID "IAR"
# if defined(__VER__) && defined(__ICCARM__)
# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__))
# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
# endif
/* These compilers are either not known or too old to define an
identification macro. Try to identify the platform and guess that
it is the native compiler. */
#elif defined(__hpux) || defined(__hpua)
# define COMPILER_ID "HP"
#else /* unknown compiler */
# define COMPILER_ID ""
#endif
/* Construct the string literal in pieces to prevent the source from
getting matched. Store it in a pointer rather than an array
because some compilers will just produce instructions to fill the
array rather than assigning a pointer to a static array. */
char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
#ifdef SIMULATE_ID
char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
#endif
#ifdef __QNXNTO__
char const* qnxnto = "INFO" ":" "qnxnto[]";
#endif
#if defined(__CRAYXE) || defined(__CRAYXC)
char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
#endif
#define STRINGIFY_HELPER(X) #X
#define STRINGIFY(X) STRINGIFY_HELPER(X)
/* Identify known platforms by name. */
#if defined(__linux) || defined(__linux__) || defined(linux)
# define PLATFORM_ID "Linux"
#elif defined(__CYGWIN__)
# define PLATFORM_ID "Cygwin"
#elif defined(__MINGW32__)
# define PLATFORM_ID "MinGW"
#elif defined(__APPLE__)
# define PLATFORM_ID "Darwin"
#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
# define PLATFORM_ID "Windows"
#elif defined(__FreeBSD__) || defined(__FreeBSD)
# define PLATFORM_ID "FreeBSD"
#elif defined(__NetBSD__) || defined(__NetBSD)
# define PLATFORM_ID "NetBSD"
#elif defined(__OpenBSD__) || defined(__OPENBSD)
# define PLATFORM_ID "OpenBSD"
#elif defined(__sun) || defined(sun)
# define PLATFORM_ID "SunOS"
#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
# define PLATFORM_ID "AIX"
#elif defined(__hpux) || defined(__hpux__)
# define PLATFORM_ID "HP-UX"
#elif defined(__HAIKU__)
# define PLATFORM_ID "Haiku"
#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
# define PLATFORM_ID "BeOS"
#elif defined(__QNX__) || defined(__QNXNTO__)
# define PLATFORM_ID "QNX"
#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
# define PLATFORM_ID "Tru64"
#elif defined(__riscos) || defined(__riscos__)
# define PLATFORM_ID "RISCos"
#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
# define PLATFORM_ID "SINIX"
#elif defined(__UNIX_SV__)
# define PLATFORM_ID "UNIX_SV"
#elif defined(__bsdos__)
# define PLATFORM_ID "BSDOS"
#elif defined(_MPRAS) || defined(MPRAS)
# define PLATFORM_ID "MP-RAS"
#elif defined(__osf) || defined(__osf__)
# define PLATFORM_ID "OSF1"
#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
# define PLATFORM_ID "SCO_SV"
#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
# define PLATFORM_ID "ULTRIX"
#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
# define PLATFORM_ID "Xenix"
#elif defined(__WATCOMC__)
# if defined(__LINUX__)
# define PLATFORM_ID "Linux"
# elif defined(__DOS__)
# define PLATFORM_ID "DOS"
# elif defined(__OS2__)
# define PLATFORM_ID "OS2"
# elif defined(__WINDOWS__)
# define PLATFORM_ID "Windows3x"
# else /* unknown platform */
# define PLATFORM_ID
# endif
#elif defined(__INTEGRITY)
# if defined(INT_178B)
# define PLATFORM_ID "Integrity178"
# else /* regular Integrity */
# define PLATFORM_ID "Integrity"
# endif
#else /* unknown platform */
# define PLATFORM_ID
#endif
/* For windows compilers MSVC and Intel we can determine
the architecture of the compiler being used. This is because
the compilers do not have flags that can change the architecture,
but rather depend on which compiler is being used
*/
#if defined(_WIN32) && defined(_MSC_VER)
# if defined(_M_IA64)
# define ARCHITECTURE_ID "IA64"
# elif defined(_M_X64) || defined(_M_AMD64)
# define ARCHITECTURE_ID "x64"
# elif defined(_M_IX86)
# define ARCHITECTURE_ID "X86"
# elif defined(_M_ARM64)
# define ARCHITECTURE_ID "ARM64"
# elif defined(_M_ARM)
# if _M_ARM == 4
# define ARCHITECTURE_ID "ARMV4I"
# elif _M_ARM == 5
# define ARCHITECTURE_ID "ARMV5I"
# else
# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
# endif
# elif defined(_M_MIPS)
# define ARCHITECTURE_ID "MIPS"
# elif defined(_M_SH)
# define ARCHITECTURE_ID "SHx"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__WATCOMC__)
# if defined(_M_I86)
# define ARCHITECTURE_ID "I86"
# elif defined(_M_IX86)
# define ARCHITECTURE_ID "X86"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
# if defined(__ICCARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__ICCRX__)
# define ARCHITECTURE_ID "RX"
# elif defined(__ICCRH850__)
# define ARCHITECTURE_ID "RH850"
# elif defined(__ICCRL78__)
# define ARCHITECTURE_ID "RL78"
# elif defined(__ICCRISCV__)
# define ARCHITECTURE_ID "RISCV"
# elif defined(__ICCAVR__)
# define ARCHITECTURE_ID "AVR"
# elif defined(__ICC430__)
# define ARCHITECTURE_ID "MSP430"
# elif defined(__ICCV850__)
# define ARCHITECTURE_ID "V850"
# elif defined(__ICC8051__)
# define ARCHITECTURE_ID "8051"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__ghs__)
# if defined(__PPC64__)
# define ARCHITECTURE_ID "PPC64"
# elif defined(__ppc__)
# define ARCHITECTURE_ID "PPC"
# elif defined(__ARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__x86_64__)
# define ARCHITECTURE_ID "x64"
# elif defined(__i386__)
# define ARCHITECTURE_ID "X86"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#else
# define ARCHITECTURE_ID
#endif
/* Convert integer to decimal digit literals. */
#define DEC(n) \
('0' + (((n) / 10000000)%10)), \
('0' + (((n) / 1000000)%10)), \
('0' + (((n) / 100000)%10)), \
('0' + (((n) / 10000)%10)), \
('0' + (((n) / 1000)%10)), \
('0' + (((n) / 100)%10)), \
('0' + (((n) / 10)%10)), \
('0' + ((n) % 10))
/* Convert integer to hex digit literals. */
#define HEX(n) \
('0' + ((n)>>28 & 0xF)), \
('0' + ((n)>>24 & 0xF)), \
('0' + ((n)>>20 & 0xF)), \
('0' + ((n)>>16 & 0xF)), \
('0' + ((n)>>12 & 0xF)), \
('0' + ((n)>>8 & 0xF)), \
('0' + ((n)>>4 & 0xF)), \
('0' + ((n) & 0xF))
/* Construct a string literal encoding the version number components. */
#ifdef COMPILER_VERSION_MAJOR
char const info_version[] = {
'I', 'N', 'F', 'O', ':',
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
COMPILER_VERSION_MAJOR,
# ifdef COMPILER_VERSION_MINOR
'.', COMPILER_VERSION_MINOR,
# ifdef COMPILER_VERSION_PATCH
'.', COMPILER_VERSION_PATCH,
# ifdef COMPILER_VERSION_TWEAK
'.', COMPILER_VERSION_TWEAK,
# endif
# endif
# endif
']','\0'};
#endif
/* Construct a string literal encoding the internal version number. */
#ifdef COMPILER_VERSION_INTERNAL
char const info_version_internal[] = {
'I', 'N', 'F', 'O', ':',
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
'i','n','t','e','r','n','a','l','[',
COMPILER_VERSION_INTERNAL,']','\0'};
#endif
/* Construct a string literal encoding the version number components. */
#ifdef SIMULATE_VERSION_MAJOR
char const info_simulate_version[] = {
'I', 'N', 'F', 'O', ':',
's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
SIMULATE_VERSION_MAJOR,
# ifdef SIMULATE_VERSION_MINOR
'.', SIMULATE_VERSION_MINOR,
# ifdef SIMULATE_VERSION_PATCH
'.', SIMULATE_VERSION_PATCH,
# ifdef SIMULATE_VERSION_TWEAK
'.', SIMULATE_VERSION_TWEAK,
# endif
# endif
# endif
']','\0'};
#endif
/* Construct the string literal in pieces to prevent the source from
getting matched. Store it in a pointer rather than an array
because some compilers will just produce instructions to fill the
array rather than assigning a pointer to a static array. */
char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG) && _MSVC_LANG < 201403L
# if defined(__INTEL_CXX11_MODE__)
# if defined(__cpp_aggregate_nsdmi)
# define CXX_STD 201402L
# else
# define CXX_STD 201103L
# endif
# else
# define CXX_STD 199711L
# endif
#elif defined(_MSC_VER) && defined(_MSVC_LANG)
# define CXX_STD _MSVC_LANG
#else
# define CXX_STD __cplusplus
#endif
const char* info_language_dialect_default = "INFO" ":" "dialect_default["
#if CXX_STD > 201703L
"20"
#elif CXX_STD >= 201703L
"17"
#elif CXX_STD >= 201402L
"14"
#elif CXX_STD >= 201103L
"11"
#else
"98"
#endif
"]";
/*--------------------------------------------------------------------------*/
int main(int argc, char* argv[])
{
int require = 0;
require += info_compiler[argc];
require += info_platform[argc];
#ifdef COMPILER_VERSION_MAJOR
require += info_version[argc];
#endif
#ifdef COMPILER_VERSION_INTERNAL
require += info_version_internal[argc];
#endif
#ifdef SIMULATE_ID
require += info_simulate[argc];
#endif
#ifdef SIMULATE_VERSION_MAJOR
require += info_simulate_version[argc];
#endif
#if defined(__CRAYXE) || defined(__CRAYXC)
require += info_cray[argc];
#endif
require += info_language_dialect_default[argc];
(void)argv;
return require;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment