Commit fa43fb71 authored by OlivierDehaene's avatar OlivierDehaene
Browse files

fix(server): Fix Transformers fork version

parent 4236e41b
aml aml
target target
server/transformers
\ No newline at end of file
...@@ -2,6 +2,7 @@ FROM rust:1.64 as router-builder ...@@ -2,6 +2,7 @@ FROM rust:1.64 as router-builder
WORKDIR /usr/src WORKDIR /usr/src
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto COPY proto proto
COPY router router COPY router router
...@@ -13,6 +14,7 @@ FROM rust:1.64 as launcher-builder ...@@ -13,6 +14,7 @@ FROM rust:1.64 as launcher-builder
WORKDIR /usr/src WORKDIR /usr/src
COPY rust-toolchain.toml rust-toolchain.toml
COPY launcher launcher COPY launcher launcher
WORKDIR /usr/src/launcher WORKDIR /usr/src/launcher
......
...@@ -8,7 +8,7 @@ environment_variables: ...@@ -8,7 +8,7 @@ environment_variables:
MODEL_NAME: bigscience/bloom MODEL_NAME: bigscience/bloom
NUM_GPUS: 8 NUM_GPUS: 8
environment: environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2 image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
inference_config: inference_config:
liveness_route: liveness_route:
port: 3000 port: 3000
...@@ -25,14 +25,14 @@ request_settings: ...@@ -25,14 +25,14 @@ request_settings:
max_concurrent_requests_per_instance: 256 max_concurrent_requests_per_instance: 256
liveness_probe: liveness_probe:
initial_delay: 600 initial_delay: 600
timeout: 20 timeout: 90
period: 120 period: 120
success_threshold: 1 success_threshold: 1
failure_threshold: 3 failure_threshold: 5
readiness_probe: readiness_probe:
initial_delay: 600 initial_delay: 600
timeout: 20 timeout: 90
period: 120 period: 120
success_threshold: 1 success_threshold: 1
failure_threshold: 3 failure_threshold: 5
instance_count: 1 instance_count: 1
...@@ -7,13 +7,13 @@ gen-server: ...@@ -7,13 +7,13 @@ gen-server:
touch text_generation/pb/__init__.py touch text_generation/pb/__init__.py
install-transformers: install-transformers:
# Install specific version of transformers # Install specific version of transformers with custom cuda kernels
rm transformers || true rm transformers || true
rm transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 || true rm transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 || true
curl -L -O https://github.com/OlivierDehaene/transformers/archive/7302a24535e8dc5637ea5b4e4572fc971d404098.zip curl -L -O https://github.com/OlivierDehaene/transformers/archive/b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
unzip 7302a24535e8dc5637ea5b4e4572fc971d404098.zip unzip b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
rm 7302a24535e8dc5637ea5b4e4572fc971d404098.zip rm b55f16c5b71aeef47a66a4270e19c154f050a7a7.zip
mv transformers-7302a24535e8dc5637ea5b4e4572fc971d404098 transformers mv transformers-b55f16c5b71aeef47a66a4270e19c154f050a7a7 transformers
cd transformers && python setup.py install cd transformers && python setup.py install
install-torch: install-torch:
......
...@@ -38,7 +38,7 @@ class BLOOMSharded(CausalLM): ...@@ -38,7 +38,7 @@ class BLOOMSharded(CausalLM):
self.master = self.rank == 0 self.master = self.rank == 0
if torch.cuda.is_available(): if torch.cuda.is_available():
device = torch.device(f"cuda:{self.rank}") device = torch.device(f"cuda:{self.rank}")
dtype = torch.float16 dtype = torch.bfloat16
else: else:
device = torch.device("cpu") device = torch.device("cpu")
dtype = torch.float32 dtype = torch.float32
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment