Unverified Commit f9d0ec37 authored by OlivierDehaene's avatar OlivierDehaene Committed by GitHub
Browse files

feat(docker): Make the image compatible with api-inference (#29)

parent 1f570d18
...@@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 ...@@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ENV LANG=C.UTF-8 \ ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \ LC_ALL=C.UTF-8 \
DEBIAN_FRONTEND=noninteractive \ DEBIAN_FRONTEND=noninteractive \
MODEL_BASE_PATH=/var/azureml-model \ MODEL_BASE_PATH=/data \
MODEL_NAME=bigscience/bloom \ MODEL_ID=bigscience/bloom-560m \
QUANTIZE=false \ QUANTIZE=false \
NUM_GPUS=8 \ NUM_GPUS=1 \
SAFETENSORS_FAST_GPU=1 \ SAFETENSORS_FAST_GPU=1 \
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
NCCL_ASYNC_ERROR_HANDLING=1 \ NCCL_ASYNC_ERROR_HANDLING=1 \
......
...@@ -5,7 +5,7 @@ model: azureml:bloom:1 ...@@ -5,7 +5,7 @@ model: azureml:bloom:1
model_mount_path: /var/azureml-model model_mount_path: /var/azureml-model
environment_variables: environment_variables:
MODEL_BASE_PATH: /var/azureml-model/bloom MODEL_BASE_PATH: /var/azureml-model/bloom
MODEL_NAME: bigscience/bloom MODEL_ID: bigscience/bloom
NUM_GPUS: 8 NUM_GPUS: 8
environment: environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1 image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
......
...@@ -197,6 +197,7 @@ pub async fn run( ...@@ -197,6 +197,7 @@ pub async fn run(
let app = Router::new() let app = Router::new()
.route("/generate", post(generate)) .route("/generate", post(generate))
.layer(Extension(shared_state.clone())) .layer(Extension(shared_state.clone()))
.route("/", get(health))
.route("/health", get(health)) .route("/health", get(health))
.layer(Extension(shared_state.clone())); .layer(Extension(shared_state.clone()));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment