Unverified Commit 1ddb62b4 authored by nv-nmailhot's avatar nv-nmailhot Committed by GitHub
Browse files

feat: add test metrics upload (#3648)


Signed-off-by: default avatarnv-nmailhot <nmailhot@nvidia.com>
parent fdcc8d5b
......@@ -12,11 +12,37 @@ inputs:
description: 'Maximum number of cores available to docker'
required: false
default: '10'
framework:
description: 'Framework name for test metrics'
required: false
default: 'unknown'
test_type:
description: 'Test type (unit, e2e, integration)'
required: false
default: 'e2e'
platform_arch:
description: 'Platform architecture (amd64, arm64)'
required: false
default: 'amd64'
runs:
using: "composite"
steps:
- name: Setup Test Environment
shell: bash
run: |
# Setup test directories
mkdir -p test-results
# Set platform architecture from input
PLATFORM_ARCH="${{ inputs.platform_arch }}"
if [[ -z "${PLATFORM_ARCH}" ]]; then
PLATFORM_ARCH="amd64"
fi
echo "PLATFORM_ARCH=${PLATFORM_ARCH}" >> $GITHUB_ENV
echo "🏗️ Platform architecture: ${PLATFORM_ARCH}"
- name: Run tests
shell: bash
env:
......@@ -25,9 +51,64 @@ runs:
PYTEST_XML_FILE: pytest_test_report.xml
HF_HOME: /runner/_work/_temp
run: |
# Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures
docker run --runtime=nvidia --rm --gpus all -w /workspace \
--cpus=${NUM_CPUS} \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
-v "$(pwd)/test-results:/test-results" \
${{ inputs.image_tag }} \
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ inputs.pytest_marks }}\""
bash -c "pytest -v --tb=short --basetemp=/tmp --junitxml=/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 -m \"${{ inputs.pytest_marks }}\""
TEST_EXIT_CODE=$?
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
echo "🧪 Tests completed with exit code: ${TEST_EXIT_CODE}"
# Always continue to results processing
exit 0
- name: Process Test Results
shell: bash
run: |
# Check for JUnit XML file and determine test status
JUNIT_FILE="test-results/pytest_test_report.xml"
if [[ -f "$JUNIT_FILE" ]]; then
echo "✅ JUnit XML generated successfully"
# Extract basic test counts for status determination
TOTAL_TESTS=$(grep -o 'tests="[0-9]*"' "$JUNIT_FILE" | grep -o '[0-9]*' | head -1 || echo "0")
FAILED_TESTS=$(grep -o 'failures="[0-9]*"' "$JUNIT_FILE" | grep -o '[0-9]*' | head -1 || echo "0")
ERROR_TESTS=$(grep -o 'errors="[0-9]*"' "$JUNIT_FILE" | grep -o '[0-9]*' | head -1 || echo "0")
echo "📊 ${TOTAL_TESTS} tests completed (${FAILED_TESTS} failed, ${ERROR_TESTS} errors)"
# Create metadata file with step context information
METADATA_FILE="test-results/test_metadata.json"
echo '{' > "$METADATA_FILE"
echo ' "job_name": "${{ github.job }}",' >> "$METADATA_FILE"
echo ' "framework": "${{ inputs.framework }}",' >> "$METADATA_FILE"
echo ' "test_type": "${{ inputs.test_type }}",' >> "$METADATA_FILE"
echo ' "platform_arch": "${{ inputs.platform_arch }}",' >> "$METADATA_FILE"
echo ' "junit_xml_file": "pytest_test_report.xml",' >> "$METADATA_FILE"
echo ' "step_name": "Run ${{ inputs.test_type }} tests"' >> "$METADATA_FILE"
echo '}' >> "$METADATA_FILE"
echo "📝 Created test metadata file"
else
echo "⚠️ JUnit XML file not found - test results may not be available for upload"
TOTAL_TESTS=0
FAILED_TESTS=1 # Treat missing XML as failure
ERROR_TESTS=0
fi
# Exit with original test result to maintain workflow behavior
exit ${TEST_EXIT_CODE}
- name: Upload Test Results
uses: actions/upload-artifact@v4
if: always() # Always upload test results, even if tests failed
with:
name: test-results-${{ inputs.framework }}-${{ inputs.test_type }}-${{ env.PLATFORM_ARCH }}
path: test-results/${{ env.PYTEST_XML_FILE }}
retention-days: 7
\ No newline at end of file
......@@ -126,12 +126,18 @@ jobs:
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "unit and vllm and gpu_1"
framework: "vllm"
test_type: "unit"
platform_arch: ${{ matrix.platform.arch }}
- name: Run e2e tests
if: ${{ matrix.platform.arch != 'arm64' }}
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and vllm and gpu_1 and not slow"
framework: "vllm"
test_type: "e2e, gpu_1"
platform_arch: ${{ matrix.platform.arch }}
sglang:
needs: changed-files
......@@ -190,6 +196,9 @@ jobs:
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "unit and sglang and gpu_1"
framework: "sglang"
test_type: "unit"
platform_arch: ${{ matrix.platform.arch }}
- name: Run e2e tests
# OPS-1140: Uncomment the below line
# if: ${{ matrix.platform.arch != 'arm64' }}
......@@ -197,6 +206,9 @@ jobs:
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and sglang and gpu_1"
framework: "sglang"
test_type: "e2e, gpu_1"
platform_arch: ${{ matrix.platform.arch }}
trtllm:
needs: changed-files
......@@ -247,12 +259,18 @@ jobs:
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "unit and trtllm_marker and gpu_1"
framework: "trtllm"
test_type: "unit"
platform_arch: ${{ matrix.platform.arch }}
- name: Run e2e tests
if: ${{ matrix.platform.arch != 'arm64' }}
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
framework: "trtllm"
test_type: "e2e, gpu_1"
platform_arch: ${{ matrix.platform.arch }}
# Upload metrics for this workflow and all its jobs
upload-workflow-metrics:
......@@ -283,14 +301,23 @@ jobs:
merge-multiple: true
continue-on-error: true # Don't fail if artifacts don't exist
- name: Download test results
uses: actions/download-artifact@v4
with:
pattern: test-results-*
path: test-results/
merge-multiple: true
continue-on-error: true # Don't fail if artifacts don't exist
- name: Upload Complete Workflow Metrics
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
WORKFLOW_INDEX: ${{ secrets.WORKFLOW_INDEX }}
JOB_INDEX: ${{ secrets.JOB_INDEX }}
STEPS_INDEX: ${{ secrets.STEPS_INDEX }}
# Container index configuration
# Container and test index configuration
CONTAINER_INDEX: ${{ secrets.CONTAINER_INDEX }}
TEST_INDEX: ${{ secrets.TEST_INDEX }}
run: |
# Upload complete workflow metrics including container metrics
python3 .github/workflows/upload_complete_workflow_metrics.py
......@@ -6,10 +6,12 @@ This version runs as the final job in a workflow and captures metrics for
the entire workflow including all previous jobs.
"""
import glob
import json
import os
import re
import time
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from typing import Any, Dict, Optional
from urllib.parse import urlparse
......@@ -66,6 +68,16 @@ FIELD_BUILD_TARGET = "s_build_target"
FIELD_BUILD_FRAMEWORK = "s_build_framework"
FIELD_BUILD_SIZE_BYTES = "l_build_size_bytes"
# Test Info
FIELD_FRAMEWORK = "s_framework"
FIELD_ERROR_MESSAGE = "s_error_message"
FIELD_TEST_NAME = "s_test_name" # Test name (e.g., test_sglang_deployment[aggregated])
FIELD_TEST_CLASSNAME = (
"s_test_classname" # Test class name (e.g., tests.serve.test_sglang)
)
FIELD_TEST_DURATION = "l_test_duration_ms"
FIELD_TEST_STATUS = "s_test_status" # Test status (passed, failed, error, skipped)
class BuildMetricsReader:
"""Reader for build metrics from environment variables and artifacts"""
......@@ -630,6 +642,8 @@ class WorkflowMetricsUploader:
if is_framework_job:
self._upload_container_metrics(job_data)
# Also upload test metrics if available for this framework job
self._upload_test_metrics(job_data)
def _upload_job_step_metrics(self, job_data: Dict[str, Any]) -> int:
"""Extract and post metrics for all steps in a job"""
......@@ -801,6 +815,180 @@ class WorkflowMetricsUploader:
except Exception as e:
print(f"❌ Failed to upload container metrics: {e}")
def _upload_test_metrics(self, job_data: Dict[str, Any]) -> None:
"""Upload individual test metrics by parsing JUnit XML directly from test-results"""
test_index = os.getenv("TEST_INDEX")
if not test_index:
print("⚠️ TEST_INDEX not configured, skipping test metrics upload")
return
job_name = job_data.get("name", "")
job_id = str(job_data["id"])
print(f"🧪 Looking for test results for job '{job_name}'")
# Look for test results directory
test_results_dir = "test-results"
if not os.path.exists(test_results_dir):
print(f"⚠️ Test results directory not found: {test_results_dir}")
return
# Look for metadata files to get accurate step and framework info
metadata_files = glob.glob(f"{test_results_dir}/test_metadata.json")
if not metadata_files:
print(f"⚠️ No test metadata files found in {test_results_dir}")
return
print(f"📄 Found {len(metadata_files)} test metadata files")
total_tests_processed = 0
# Process each metadata file
for metadata_file in metadata_files:
try:
# Read metadata to get accurate step and framework info
with open(metadata_file, "r") as f:
metadata = json.load(f)
framework = metadata.get("framework", "unknown")
test_type = metadata.get("test_type", "unknown")
step_name = metadata.get("step_name", "Run tests")
junit_xml_file = metadata.get(
"junit_xml_file", "pytest_test_report.xml"
)
# Construct step ID from metadata
test_step_id = f"{job_id}_{step_name.lower().replace(' ', '_')}"
print("📋 Processing test results:")
print(f" Framework: {framework}")
print(f" Test Type: {test_type}")
print(f" Step Name: {step_name}")
print(f" Step ID: {test_step_id}")
# Find the corresponding XML file
xml_file = f"{test_results_dir}/{junit_xml_file}"
if not os.path.exists(xml_file):
print(f"⚠️ JUnit XML file not found: {xml_file}")
continue
print(f"📄 Processing JUnit XML: {xml_file}")
# Parse JUnit XML using xml.etree.ElementTree
tree = ET.parse(xml_file)
root = tree.getroot()
# Process each test case
for testsuite in root.findall(".//testsuite"):
for testcase in testsuite.findall("testcase"):
# Extract test case information
test_classname = testcase.get("classname", "")
test_name = testcase.get("name", "")
test_time = float(testcase.get("time", 0))
test_status = "passed" # Default status
# Create individual test data payload
test_data = {}
# Identity & Context
test_full_name = (
f"{test_classname}::{test_name}"
if test_classname
else test_name
)
test_data[
FIELD_ID
] = f"github-test-{job_id}-{hash(test_full_name) & 0x7FFFFFFF}" # Use hash for unique ID
test_data[FIELD_STEP_ID] = test_step_id
test_data[FIELD_JOB_ID] = job_id
# Test Info
test_data[FIELD_FRAMEWORK] = framework
test_data[FIELD_TEST_NAME] = test_name
test_data[FIELD_TEST_CLASSNAME] = test_classname
test_data[FIELD_TEST_DURATION] = int(
test_time * 1000
) # Convert to milliseconds
# Check for failure, error, or skipped elements
error_msg = ""
if testcase.find("failure") is not None:
test_status = "failed"
failure_elem = testcase.find("failure")
error_msg = (
failure_elem.get("message", "")
if failure_elem is not None
else ""
)
if (
not error_msg
and failure_elem is not None
and failure_elem.text
):
error_msg = failure_elem.text
elif testcase.find("error") is not None:
test_status = "error"
error_elem = testcase.find("error")
error_msg = (
error_elem.get("message", "")
if error_elem is not None
else ""
)
if (
not error_msg
and error_elem is not None
and error_elem.text
):
error_msg = error_elem.text
elif testcase.find("skipped") is not None:
test_status = "skipped"
skipped_elem = testcase.find("skipped")
error_msg = (
skipped_elem.get("message", "")
if skipped_elem is not None
else ""
)
test_data[FIELD_TEST_STATUS] = test_status
test_data[
FIELD_STATUS
] = test_status # Also set general status field
if error_msg:
test_data[FIELD_ERROR_MESSAGE] = error_msg[
:1000
] # Limit error message length
# Add timing (use job completion time as more accurate timestamp)
job_completed_at = job_data.get("completed_at")
if job_completed_at:
test_data["@timestamp"] = job_completed_at
else:
# Fallback to current time if job completion time not available
test_data["@timestamp"] = datetime.now(
timezone.utc
).isoformat()
# Add common context fields (repo, branch, pr_id, etc.)
self.add_common_context_fields(test_data)
# Upload individual test
try:
self.post_to_db(test_index, test_data)
print(
f"✅ Uploaded test: {test_full_name} ({test_status}, {test_time:.3f}s)"
)
total_tests_processed += 1
except Exception as e:
print(f"❌ Failed to upload test {test_full_name}: {e}")
except Exception as e:
print(f"❌ Failed to process metadata file {metadata_file}: {e}")
print(f"📊 Processed {total_tests_processed} individual tests total")
print(" " + "=" * 50)
def main():
"""Main function to upload complete GitHub Actions workflow metrics"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment