Unverified Commit 9ceb96c0 authored by moto's avatar moto Committed by GitHub
Browse files

Add base test image with Kaldi featbin and codecs (#581)

* Add test image with Kaldi executable and third_parties

* Use Ubuntu as bese image since pytorch/many-linux is huge
parent 13057829
...@@ -236,35 +236,34 @@ jobs: ...@@ -236,35 +236,34 @@ jobs:
unittest_linux: unittest_linux:
<<: *binary_common <<: *binary_common
docker: docker:
- image: "pytorch/manylinux-cuda100" - image: "pytorch/torchaudio_unittest_base:manylinux"
resource_class: 2xlarge+ resource_class: 2xlarge+
steps: steps:
- checkout - checkout
- restore_cache: - restore_cache:
keys: keys:
- env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum "./packaging/build_from_source.sh" }}-{{ checksum ".circleci/unittest/environment.yml" }} - env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}
- run: - run:
name: Setup name: Setup
command: .circleci/unittest/setup_env.sh command: .circleci/unittest/scripts/setup_env.sh
- save_cache: - save_cache:
key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum "./packaging/build_from_source.sh" }}-{{ checksum ".circleci/unittest/environment.yml" }} key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}
paths: paths:
- conda - conda
- env - env
- third_party
- run: - run:
name: Installation name: Install torchaudio
command: .circleci/unittest/install.sh command: .circleci/unittest/scripts/install.sh
- run: - run:
name: Run test name: Run tests
command: .circleci/unittest/run_test.sh command: .circleci/unittest/scripts/run_test.sh
- run: - run:
name: Post process name: Post process
command: .circleci/unittest/post_process.sh command: .circleci/unittest/scripts/post_process.sh
- store_test_results: - store_test_results:
path: test-results path: test-results
......
...@@ -236,35 +236,34 @@ jobs: ...@@ -236,35 +236,34 @@ jobs:
unittest_linux: unittest_linux:
<<: *binary_common <<: *binary_common
docker: docker:
- image: "pytorch/manylinux-cuda100" - image: "pytorch/torchaudio_unittest_base:manylinux"
resource_class: 2xlarge+ resource_class: 2xlarge+
steps: steps:
- checkout - checkout
- restore_cache: - restore_cache:
{% raw %} {% raw %}
keys: keys:
- env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum "./packaging/build_from_source.sh" }}-{{ checksum ".circleci/unittest/environment.yml" }} - env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}
{% endraw %} {% endraw %}
- run: - run:
name: Setup name: Setup
command: .circleci/unittest/setup_env.sh command: .circleci/unittest/scripts/setup_env.sh
- save_cache: - save_cache:
{% raw %} {% raw %}
key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum "./packaging/build_from_source.sh" }}-{{ checksum ".circleci/unittest/environment.yml" }} key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/scripts/environment.yml" }}
{% endraw %} {% endraw %}
paths: paths:
- conda - conda
- env - env
- third_party
- run: - run:
name: Installation name: Install torchaudio
command: .circleci/unittest/install.sh command: .circleci/unittest/scripts/install.sh
- run: - run:
name: Run test name: Run tests
command: .circleci/unittest/run_test.sh command: .circleci/unittest/scripts/run_test.sh
- run: - run:
name: Post process name: Post process
command: .circleci/unittest/post_process.sh command: .circleci/unittest/scripts/post_process.sh
- store_test_results: - store_test_results:
path: test-results path: test-results
......
This directory contains;
- docker
Docker image definition and scripts to build and update Docker image for unittest.
- scripts
Scripts used by CircleCI to run unit tests.
scripts/build_third_parties.sh
FROM ubuntu:18.04 as builder
RUN apt update -q
################################################################################
# Build Kaldi
################################################################################
RUN apt install -q -y \
autoconf \
automake \
bzip2 \
g++ \
gfortran \
git \
libatlas-base-dev \
libtool \
make \
python2.7 \
python3 \
sox \
subversion \
unzip \
wget \
zlib1g-dev
# KALDI uses MKL as a default math library, but we are going to copy featbin binaries and dependent
# shared libraries to the final image, so we use ATLAS, which is easy to reinstall in the final image.
RUN git clone --depth 1 https://github.com/kaldi-asr/kaldi.git /opt/kaldi && \
cd /opt/kaldi/tools && \
make -j $(nproc) && \
cd /opt/kaldi/src && \
./configure --shared --mathlib=ATLAS --use-cuda=no && \
make featbin -j $(nproc)
# Copy featbins and dependent libraries
ADD ./scripts /scripts
RUN bash /scripts/copy_kaldi_executables.sh /opt/kaldi /kaldi
################################################################################
# Build third party dependencies
################################################################################
RUN apt install -q -y curl
RUN bash /scripts/build_third_parties.sh /
################################################################################
# Build the final image
################################################################################
FROM ubuntu:18.04
RUN apt update && apt install -y \
g++ \
gfortran \
git \
libatlas3-base \
wget \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /kaldi /kaldi
COPY --from=builder /third_party /third_party
ENV PATH="${PATH}:/kaldi/bin" LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/kaldi/lib"
#!/usr/bin/env bash
set -euo pipefail
cd "$( dirname "${BASH_SOURCE[0]}" )"
root_dir="$(git rev-parse --show-toplevel)"
cp "${root_dir}"/packaging/build_from_source.sh ./scripts/build_third_parties.sh
tag="manylinux"
image="pytorch/torchaudio_unittest_base:${tag}"
docker build -t "${image}" .
docker push "${image}"
#!/usr/bin/env bash
list_executables() {
# List up executables in the given directory
find "$1" -type f -executable
}
list_kaldi_libraries() {
# List up shared libraries used by executables found in the given directory ($1)
# that reside in Kaldi directory ($2)
while read file; do
ldd "${file}" | grep -o "${2}.* ";
done < <(list_executables "$1") | sort -u
}
set -euo pipefail
kaldi_root="$(realpath "$1")"
target_dir="$(realpath "$2")"
bin_dir="${target_dir}/bin"
lib_dir="${target_dir}/lib"
mkdir -p "${bin_dir}" "${lib_dir}"
# 1. Copy featbins
printf "Copying executables to %s\n" "${bin_dir}"
while read file; do
printf " %s\n" "${file}"
cp "${file}" "${bin_dir}"
done < <(list_executables "${kaldi_root}/src/featbin")
# 2. Copy dependent libraries from Kaldi
printf "Copying libraries to %s\n" "${lib_dir}"
while read file; do
printf " %s\n" "$file"
# If it is not symlink, just copy to the target directory
if [ ! -L "${file}" ]; then
cp "${file}" "${lib_dir}"
continue
fi
# If it is symlink,
# 1. Copy the actual library to the target directory.
library="$(realpath "${file}")"
cp "${library}" "${lib_dir}"
# 2. then if the name of the symlink is different from the actual library name,
# create the symlink in the target directory.
lib_name="$(basename "${library}")"
link_name="$(basename "${file}")"
if [ "${lib_name}" != "${link_name}" ]; then
printf " Linking %s -> %s\n" "${lib_name}" "${link_name}"
(
cd "${lib_dir}"
ln -sf "${lib_name}" "${link_name}"
)
fi
done < <(list_kaldi_libraries "${bin_dir}" "${kaldi_root}")
#!/usr/bin/env bash #!/usr/bin/env bash
unset PYTORCH_VERSION unset PYTORCH_VERSION
# For unittest, nightly PyTorch is used, and we do not want to fixiate on the version # For unittest, nightly PyTorch is used as the following section,
# so no need to set PYTORCH_VERSION.
# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
set -e set -e
...@@ -9,7 +11,9 @@ eval "$(./conda/bin/conda shell.bash hook)" ...@@ -9,7 +11,9 @@ eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env conda activate ./env
printf "* Installing PyTorch nightly build" printf "* Installing PyTorch nightly build"
conda install -c pytorch-nightly pytorch cpuonly conda install -y -c pytorch-nightly pytorch cpuonly
printf "* Setting up torchaudio\n" printf "* Installing torchaudio\n"
# Link codecs present at /third_party. See Dockerfile for how this is built
ln -fs /third_party ./third_party
IS_CONDA=true python setup.py develop IS_CONDA=true python setup.py develop
#!/usr/bin/env bash #!/usr/bin/env bash
# This script is for setting up environment for running unit test on CircleCI. # This script is for setting up environment in which unit test is ran.
# To speed up the CI time, the result of environment is cached. # To speed up the CI time, the resulting environment is cached.
# PyTorch is not included here, so that it won't be cached. #
# Do not install PyTorch and torchaudio here, otherwise they also get cached.
set -e set -e
...@@ -19,21 +20,15 @@ if [ ! -d "${conda_dir}" ]; then ...@@ -19,21 +20,15 @@ if [ ! -d "${conda_dir}" ]; then
wget -O miniconda.sh http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh wget -O miniconda.sh http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash ./miniconda.sh -b -f -p "${conda_dir}" bash ./miniconda.sh -b -f -p "${conda_dir}"
fi fi
printf "* Checking conda update\n"
eval "$(${conda_dir}/bin/conda shell.bash hook)" eval "$(${conda_dir}/bin/conda shell.bash hook)"
conda update -n base -c defaults conda
# 2. Create test environment at ./env # 2. Create test environment at ./env
if [ ! -d "${env_dir}" ]; then if [ ! -d "${env_dir}" ]; then
printf "* Creating a test environment\n" printf "* Creating a test environment\n"
conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
fi fi
printf "* Installing dependencies (except PyTorch)\n"
conda activate "${env_dir}" conda activate "${env_dir}"
conda env update --file "${this_dir}/environment.yml" --prune
# 3. Build codecs at ./third_party # 3. Install Conda dependencies
if [ ! -d "./third_party" ]; then printf "* Installing dependencies (except PyTorch)\n"
printf "* Building Codecs" conda env update --file "${this_dir}/environment.yml" --prune
./packaging/build_from_source.sh "$PWD"
fi
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment