"...lm-evaluation-harness.git" did not exist on "5cb7af7e961f0c46803b49eb6ba4d8da99cff0af"
custom-tokenizers.dockerfile 1.29 KB
Newer Older
1
2
3
4
5
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
ENV VIRTUAL_ENV=/usr/local
Yih-Dar's avatar
Yih-Dar committed
6
RUN pip --no-cache-dir install uv==0.1.45 && uv venv && uv pip install --no-cache-dir -U pip setuptools
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz
RUN tar xvf jumanpp-2.0.0-rc3.tar.xz
RUN mkdir jumanpp-2.0.0-rc3/bld
WORKDIR ./jumanpp-2.0.0-rc3/bld
RUN wget -LO catch.hpp https://github.com/catchorg/Catch2/releases/download/v2.13.8/catch.hpp
RUN mv catch.hpp ../libs/
RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
RUN make install -j 10


RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
RUN uv pip install  --no-cache-dir "transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
# spacy is not used so not tested. Causes to failures. TODO fix later
RUN python3 -m unidic download
RUN pip uninstall -y transformers

RUN apt-get clean && rm -rf /var/lib/apt/lists/*
RUN apt remove -y g++ cmake  xz-utils libprotobuf-dev protobuf-compiler