name: 'Docker Build' description: 'Build Dynamo container images' inputs: framework: description: 'Framework to build' required: true default: 'vllm' target: description: 'Target to build' required: false default: 'runtime' platform: description: 'Docker platform to build on, ie. linux/amd64' required: false default: 'linux/amd64' image_tag: description: 'Custom image tag (optional, defaults to framework:latest)' required: false ci_token: description: 'CI Token' required: false aws_default_region: description: 'AWS Default Region' required: false sccache_s3_bucket: description: 'SCCache S3 Bucket' required: false aws_account_id: description: 'AWS Account ID' required: false aws_access_key_id: description: 'AWS Access Key ID' required: false aws_secret_access_key: description: 'AWS Secret Access Key' required: false base_image_tag: description: 'Optional override for base image tag passed to build.sh' required: false runtime_image_tag: description: 'Optional override for RUNTIME_IMAGE_TAG build-arg' required: false cuda_version: description: 'Optional override for CUDA_VERSION build-arg' required: true enable_kvbm: description: 'Enable KVBM support (optional)' required: false dynamo_base_image: description: 'Pre-built Dynamo base image to use instead of building from scratch' required: false outputs: image_tag: description: 'Image Tag' value: ${{ steps.build.outputs.image_tag }} runs: using: "composite" steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 #v3.11.1 with: driver: docker-container # Enable BuildKit for enhanced metadata buildkitd-flags: --debug - name: Cleanup if: always() shell: bash run: | docker system prune -af - name: Build image id: build shell: bash env: GITHUB_TOKEN: ${{ inputs.ci_token }} AWS_DEFAULT_REGION: ${{ inputs.aws_default_region }} SCCACHE_S3_BUCKET: ${{ inputs.sccache_s3_bucket }} AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} PLATFORM: ${{ inputs.platform }} ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com GITHUB_RUN_ID: ${{ github.run_id }} GITHUB_JOB: ${{ github.job }} GITHUB_REF_NAME: ${{ github.ref_name }} CUDA_VERSION: ${{ inputs.cuda_version }} run: | set -x # Determine image tag if [ -n "${{ inputs.image_tag }}" ]; then IMAGE_TAG="${{ inputs.image_tag }}" else IMAGE_TAG="${{ inputs.framework }}:latest" fi CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*} BUILD_START_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) echo "BUILD_START_TIME=${BUILD_START_TIME}" >> $GITHUB_ENV echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT # Create build logs directory mkdir -p build-logs BUILD_LOG_FILE="build-logs/build-${{ inputs.framework }}-$(echo '${{ inputs.platform }}' | sed 's/linux\///').log" echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}" # Collect optional overrides provided by the workflow # Set base cache args and set --cache-to if this is a main commit EXTRA_ARGS="" EXTRA_ARGS="--cache-to type=inline " EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} " if [[ "$GITHUB_REF_NAME" == "main" ]]; then EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " fi echo "$EXTRA_ARGS" # Collect optional overrides provided by the workflow if [ -n "${{ inputs.base_image_tag }}" ]; then EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} " fi if [ -n "${{ inputs.runtime_image_tag }}" ]; then EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} " fi if [ -n "${{ inputs.cuda_version }}" ]; then EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} " fi if [ -n "${{ inputs.dynamo_base_image }}" ]; then EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}" fi if [ -n "${{ inputs.enable_kvbm }}" ]; then EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}" fi # Execute build and capture output (show on console AND save to file) ./container/build.sh --tag "$IMAGE_TAG" \ --target ${{ inputs.target }} \ --vllm-max-jobs 10 \ --framework ${{ inputs.framework }} \ --platform ${{ inputs.platform }} \ --use-sccache \ --sccache-bucket "$SCCACHE_S3_BUCKET" \ --sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}" BUILD_EXIT_CODE=${PIPESTATUS[0]} BUILD_END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) echo "BUILD_END_TIME=${BUILD_END_TIME}" >> $GITHUB_ENV # Exit with the build's exit code exit ${BUILD_EXIT_CODE} - name: Run Sanity Check on Runtime Image if: inputs.target == 'runtime' shell: bash run: | IMAGE_TAG="${{ steps.build.outputs.image_tag }}" echo "Running sanity check on image: $IMAGE_TAG" # Run the sanity check script inside the container # The script is located in /workspace/deploy/sanity_check.py in runtime containers export WORKSPACE=/workspace set +e docker run --rm "$IMAGE_TAG" python ${WORKSPACE}/deploy/sanity_check.py --runtime-check --no-gpu-check SANITY_CHECK_EXIT_CODE=$? set -e if [ ${SANITY_CHECK_EXIT_CODE} -ne 0 ]; then echo "ERROR: Sanity check failed - ai-dynamo packages not properly installed" exit ${SANITY_CHECK_EXIT_CODE} else echo "✅ Sanity check passed" fi - name: Capture Build Metrics id: metrics shell: bash run: | # Create metrics directory mkdir -p build-metrics # Get accurate build timing BUILD_START_TIME="${{ env.BUILD_START_TIME }}" BUILD_END_TIME="${{ env.BUILD_END_TIME }}" # Calculate duration START_EPOCH=$(date -d "$BUILD_START_TIME" +%s) END_EPOCH=$(date -d "$BUILD_END_TIME" +%s) BUILD_DURATION_SEC=$((END_EPOCH - START_EPOCH)) echo "🕐 Build timing:" echo " Start: ${BUILD_START_TIME}" echo " End: ${BUILD_END_TIME}" echo " Duration: ${BUILD_DURATION_SEC} seconds" # Get image size using docker inspect IMAGE_TAG="${{ steps.build.outputs.image_tag }}" if [ -n "$IMAGE_TAG" ]; then IMAGE_SIZE_BYTES=$(docker image inspect "$IMAGE_TAG" --format='{{.Size}}' 2>/dev/null || echo "0") echo "đŸ“Ļ Image size: ${IMAGE_SIZE_BYTES} bytes" else IMAGE_SIZE_BYTES=0 echo "âš ī¸ No image tag available" fi PLATFORM_ARCH=$(echo "${{ inputs.platform }}" | sed 's/linux\///') echo " Architecture: ${PLATFORM_ARCH}" echo "PLATFORM_ARCH=${PLATFORM_ARCH}" >> $GITHUB_ENV JOB_KEY="${{ inputs.framework }}-${PLATFORM_ARCH}" echo " Job Key: ${JOB_KEY}" # Create job-specific metrics file mkdir -p build-metrics METRICS_FILE="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${{ github.run_id }}-${{ job.check_run_id }}.json" # Create the job metrics file cat > "$METRICS_FILE" << EOF { "framework": "${{ inputs.framework }}", "target": "${{ inputs.target }}", "platform": "${{ inputs.platform }}", "platform_arch": "${PLATFORM_ARCH}", "image_size_bytes": ${IMAGE_SIZE_BYTES}, "build_start_time": "${BUILD_START_TIME}", "build_end_time": "${BUILD_END_TIME}", "build_duration_sec": ${BUILD_DURATION_SEC} } EOF cat "$METRICS_FILE" - name: Generate Comprehensive Build Metrics id: comprehensive-metrics if: always() shell: bash run: | echo "==========================================" echo "📊 GENERATING COMPREHENSIVE BUILD METRICS" echo "==========================================" # Create metrics directory mkdir -p build-metrics PLATFORM_ARCH="${{ env.PLATFORM_ARCH }}" WORKFLOW_ID="${{ github.run_id }}" JOB_ID="${{ job.check_run_id }}" FRAMEWORK_LOWER=$(echo "${{ inputs.framework }}" | tr '[:upper:]' '[:lower:]') # Make parser executable chmod +x .github/scripts/parse_buildkit_output.py # Check for build logs and build stage arguments dynamically BUILD_LOG="build-logs/single-stage-build.log" # Path to container metadata created in previous step CONTAINER_METADATA="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${WORKFLOW_ID}-${JOB_ID}.json" # Output single comprehensive JSON with all build stages COMPREHENSIVE_JSON="build-metrics/build-${{ inputs.framework }}-${PLATFORM_ARCH}-${WORKFLOW_ID}-${JOB_ID}.json" echo "🚀 Parsing BuildKit outputs and merging with container metrics..." # Build stage arguments dynamically based on which logs exist STAGE_ARGS=() if [ -f "$BUILD_LOG" ]; then echo " ✓ Found base image log: ${BUILD_LOG}" STAGE_ARGS+=("runtime:${BUILD_LOG}") else echo " â„šī¸ No image log found" fi # Check for any additional stage logs (e.g., build-logs/stage3-*.log) for extra_log in build-logs/stage*.log; do if [ -f "$extra_log" ]; then stage_name=$(basename "$extra_log" .log) echo " ✓ Found additional stage log: ${extra_log} (${stage_name})" STAGE_ARGS+=("${stage_name}:${extra_log}") fi done echo "Container Metadata: ${CONTAINER_METADATA}" echo "Output: ${COMPREHENSIVE_JSON}" echo "" # Run parser with all discovered stages # Usage: parse_buildkit_output.py [stage2_name:log_file] ... [--metadata=] set +e python3 .github/scripts/parse_buildkit_output.py \ "$COMPREHENSIVE_JSON" \ "${STAGE_ARGS[@]}" \ "--metadata=${CONTAINER_METADATA}" PARSER_EXIT_CODE=$? set -e echo "" echo "📊 Parser exit code: ${PARSER_EXIT_CODE}" if [ ${PARSER_EXIT_CODE} -eq 0 ] && [ -f "$COMPREHENSIVE_JSON" ]; then echo "✅ Comprehensive build metrics generated successfully" echo "📄 Output file: ${COMPREHENSIVE_JSON}" else echo "âš ī¸ Metrics generation had issues but continuing..." fi # Upload comprehensive build metrics as artifact - name: Upload Comprehensive Build Metrics uses: actions/upload-artifact@v4 if: always() with: name: build-metrics-${{ inputs.framework }}-${{ inputs.target }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }} path: build-metrics/build-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}.json retention-days: 7