FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 ARG REF=main USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler git-lfs curl ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz RUN tar xvf jumanpp-2.0.0-rc3.tar.xz RUN mkdir jumanpp-2.0.0-rc3/bld WORKDIR ./jumanpp-2.0.0-rc3/bld RUN wget -LO catch.hpp https://github.com/catchorg/Catch2/releases/download/v2.13.8/catch.hpp RUN mv catch.hpp ../libs/ RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local RUN make install -j 10 WORKDIR / RUN uv pip install --no-cache --upgrade 'torch<2.9' --index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,spacy,ftfy,rjieba]" unidic unidic-lite # spacy is not used so not tested. Causes to failures. TODO fix later RUN uv run python -m unidic download # fetch test data and hub objects within CircleCI docker images to reduce even more connections # we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py` # the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers` RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py RUN uv pip uninstall transformers RUN apt-get clean && rm -rf /var/lib/apt/lists/* RUN apt remove -y g++ cmake xz-utils libprotobuf-dev protobuf-compiler