Commit 01bc05b7 authored by Pan,Huiwen's avatar Pan,Huiwen
Browse files

updata GNMT-v2

parent 20291e9d
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
set -e set -e
DATASET_DIR='../wmt16_de_en/' DATASET_DIR='data/wmt16_de_en'
ACTUAL_SRC_TRAIN=`cat ${DATASET_DIR}/train.tok.clean.bpe.32000.en |md5sum` ACTUAL_SRC_TRAIN=`cat ${DATASET_DIR}/train.tok.clean.bpe.32000.en |md5sum`
EXPECTED_SRC_TRAIN='b7482095b787264a310d4933d197a134 -' EXPECTED_SRC_TRAIN='b7482095b787264a310d4933d197a134 -'
......
...@@ -64,9 +64,7 @@ wget -nc -nv -O ${OUTPUT_DIR_DATA}/dev.tgz \ ...@@ -64,9 +64,7 @@ wget -nc -nv -O ${OUTPUT_DIR_DATA}/dev.tgz \
wget -nc -nv -O ${OUTPUT_DIR_DATA}/test.tgz \ wget -nc -nv -O ${OUTPUT_DIR_DATA}/test.tgz \
http://data.statmt.org/wmt16/translation-task/test.tgz http://data.statmt.org/wmt16/translation-task/test.tgz
OUTPUT_DIR=${1:-"/public/home/aiss/code/mlperf/wmt16_de_en"} # Extract everything
OUTPUT_DIR_DATA="${OUTPUT_DIR}/data"
## Extract everything
echo "Extracting all files..." echo "Extracting all files..."
mkdir -p "${OUTPUT_DIR_DATA}/europarl-v7-de-en" mkdir -p "${OUTPUT_DIR_DATA}/europarl-v7-de-en"
tar -xvzf "${OUTPUT_DIR_DATA}/europarl-v7-de-en.tgz" -C "${OUTPUT_DIR_DATA}/europarl-v7-de-en" tar -xvzf "${OUTPUT_DIR_DATA}/europarl-v7-de-en.tgz" -C "${OUTPUT_DIR_DATA}/europarl-v7-de-en"
...@@ -160,10 +158,10 @@ cat "${OUTPUT_DIR}/newstest2015.tok.clean.de" \ ...@@ -160,10 +158,10 @@ cat "${OUTPUT_DIR}/newstest2015.tok.clean.de" \
> "${OUTPUT_DIR}/newstest_dev.tok.clean.de" > "${OUTPUT_DIR}/newstest_dev.tok.clean.de"
# Filter datasets # Filter datasets
python3 `pwd`/scripts/filter_dataset.py \ python3 scripts/filter_dataset.py \
-f1 ${OUTPUT_DIR}/train.tok.clean.en \ -f1 ${OUTPUT_DIR}/train.tok.clean.en \
-f2 ${OUTPUT_DIR}/train.tok.clean.de -f2 ${OUTPUT_DIR}/train.tok.clean.de
python3 `pwd`/scripts/filter_dataset.py \ python3 scripts/filter_dataset.py \
-f1 ${OUTPUT_DIR}/newstest_dev.tok.clean.en \ -f1 ${OUTPUT_DIR}/newstest_dev.tok.clean.en \
-f2 ${OUTPUT_DIR}/newstest_dev.tok.clean.de -f2 ${OUTPUT_DIR}/newstest_dev.tok.clean.de
...@@ -173,23 +171,20 @@ python3 `pwd`/scripts/filter_dataset.py \ ...@@ -173,23 +171,20 @@ python3 `pwd`/scripts/filter_dataset.py \
for merge_ops in 32000; do for merge_ops in 32000; do
echo "Learning BPE with merge_ops=${merge_ops}. This may take a while..." echo "Learning BPE with merge_ops=${merge_ops}. This may take a while..."
cat "${OUTPUT_DIR}/train.tok.clean.de" "${OUTPUT_DIR}/train.tok.clean.en" | \ cat "${OUTPUT_DIR}/train.tok.clean.de" "${OUTPUT_DIR}/train.tok.clean.en" | \
#subword-nmt learn-bpe -s $merge_ops > "${OUTPUT_DIR}/bpe.${merge_ops}" subword-nmt learn-bpe -s $merge_ops > "${OUTPUT_DIR}/bpe.${merge_ops}"
${OUTPUT_DIR}/subword-nmt/learn_bpe.py -s $merge_ops > "${OUTPUT_DIR}/bpe.${merge_ops}"
echo "Apply BPE with merge_ops=${merge_ops} to tokenized files..." echo "Apply BPE with merge_ops=${merge_ops} to tokenized files..."
for lang in en de; do for lang in en de; do
for f in ${OUTPUT_DIR}/*.tok.${lang} ${OUTPUT_DIR}/*.tok.clean.${lang}; do for f in ${OUTPUT_DIR}/*.tok.${lang} ${OUTPUT_DIR}/*.tok.clean.${lang}; do
outfile="${f%.*}.bpe.${merge_ops}.${lang}" outfile="${f%.*}.bpe.${merge_ops}.${lang}"
#subword-nmt apply-bpe -c "${OUTPUT_DIR}/bpe.${merge_ops}" < $f > "${outfile}" subword-nmt apply-bpe -c "${OUTPUT_DIR}/bpe.${merge_ops}" < $f > "${outfile}"
${OUTPUT_DIR}/subword-nmt/apply_bpe.py -c "${OUTPUT_DIR}/bpe.${merge_ops}" < $f > "${outfile}"
echo ${outfile} echo ${outfile}
done done
done done
# Create vocabulary file for BPE # Create vocabulary file for BPE
cat "${OUTPUT_DIR}/train.tok.clean.bpe.${merge_ops}.en" "${OUTPUT_DIR}/train.tok.clean.bpe.${merge_ops}.de" | \ cat "${OUTPUT_DIR}/train.tok.clean.bpe.${merge_ops}.en" "${OUTPUT_DIR}/train.tok.clean.bpe.${merge_ops}.de" | \
#subword-nmt get-vocab | cut -f1 -d ' ' > "${OUTPUT_DIR}/vocab.bpe.${merge_ops}" subword-nmt get-vocab | cut -f1 -d ' ' > "${OUTPUT_DIR}/vocab.bpe.${merge_ops}"
${OUTPUT_DIR}/subword-nmt/get_vocab.py | cut -f1 -d ' ' > "${OUTPUT_DIR}/vocab.bpe.${merge_ops}"
done done
......
This diff is collapsed.
#include <torch/torch.h>
#include <vector>
// CUDA forward declarations
at::Tensor attn_score_forward_cuda(
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn);
std::vector<at::Tensor> attn_score_backward_cuda(
const at::Tensor &grad_output,
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn);
// C++ interface
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
at::Tensor attn_score_forward(
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn) {
CHECK_INPUT(attn_query);
CHECK_INPUT(attn_keys);
CHECK_INPUT(bias);
CHECK_INPUT(linear_attn);
return attn_score_forward_cuda(attn_query, attn_keys, bias, linear_attn);
}
std::vector<at::Tensor> attn_score_backward(
const at::Tensor &grad_output,
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn) {
CHECK_INPUT(grad_output);
CHECK_INPUT(attn_query);
CHECK_INPUT(attn_keys);
CHECK_INPUT(bias);
CHECK_INPUT(linear_attn);
return attn_score_backward_cuda(grad_output, attn_query, attn_keys, bias, linear_attn);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &attn_score_forward, "Attention score calculation forward (CUDA)");
m.def("backward", &attn_score_backward, "Attention score calculation backward (CUDA)");
}
#include <torch/torch.h>
#include <vector>
// CUDA forward declarations
at::Tensor attn_score_forward_cuda(
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn);
std::vector<at::Tensor> attn_score_backward_cuda(
const at::Tensor &grad_output,
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn);
// C++ interface
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
at::Tensor attn_score_forward(
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn) {
CHECK_INPUT(attn_query);
CHECK_INPUT(attn_keys);
CHECK_INPUT(bias);
CHECK_INPUT(linear_attn);
return attn_score_forward_cuda(attn_query, attn_keys, bias, linear_attn);
}
std::vector<at::Tensor> attn_score_backward(
const at::Tensor &grad_output,
const at::Tensor &attn_query,
const at::Tensor &attn_keys,
const at::Tensor &bias,
const at::Tensor &linear_attn) {
CHECK_INPUT(grad_output);
CHECK_INPUT(attn_query);
CHECK_INPUT(attn_keys);
CHECK_INPUT(bias);
CHECK_INPUT(linear_attn);
return attn_score_backward_cuda(grad_output, attn_query, attn_keys, bias, linear_attn);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &attn_score_forward, "Attention score calculation forward (CUDA)");
m.def("backward", &attn_score_backward, "Attention score calculation backward (CUDA)");
}
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <torch/torch.h>
namespace at {
namespace native {
at::Tensor revert_varlen_tensor(const Tensor &input, const Tensor &offsets);
at::Tensor get_offsets(const Tensor &input, const Tensor &lengths);
void checkLongTensor(const Tensor &tensor);
at::Tensor set_mask_cpp(const Tensor &_lengths) {
at::native::checkLongTensor(_lengths);
int64_t batch_size = _lengths.size(0);
int64_t *lengths = _lengths.data_ptr<int64_t>();
int64_t seq_length = (lengths == NULL) ? 0 : lengths[0];
auto output = torch::empty({seq_length, batch_size}, torch::CPU(at::kByte));
auto output_data = output.data_ptr<uint8_t>();
for (int64_t t = 0; t < seq_length; t++) {
for (int64_t i = 0; i < batch_size; i++) {
if (lengths[i] > t) {
output_data[t * batch_size + i] = 1;
} else {
output_data[t * batch_size + i] = 0;
}
}
}
return output;
}
} // namespace native
} // namespace at
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("revert_varlen_tensor", &at::native::revert_varlen_tensor);
m.def("set_mask_cpp", &at::native::set_mask_cpp);
m.def("get_offsets", &at::native::get_offsets);
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment