Unverified Commit a28d9f44 authored by Divakar Verma's avatar Divakar Verma Committed by GitHub
Browse files

[ROCm][CI] Handle pytest status code 5 when a shard isn't allocated any tests (#32040)


Signed-off-by: default avatarDivakar Verma <divakar.verma@amd.com>
parent 629584bf
......@@ -209,12 +209,21 @@ if [[ $commands == *"--shard-id="* ]]; then
wait "${pid}"
STATUS+=($?)
done
at_least_one_shard_with_tests=0
for st in "${STATUS[@]}"; do
if [[ ${st} -ne 0 ]]; then
if [[ ${st} -ne 0 ]] && [[ ${st} -ne 5 ]]; then
echo "One of the processes failed with $st"
exit "${st}"
elif [[ ${st} -eq 5 ]]; then
echo "Shard exited with status 5 (no tests collected) - treating as success"
else # This means st is 0
at_least_one_shard_with_tests=1
fi
done
if [[ ${#STATUS[@]} -gt 0 && ${at_least_one_shard_with_tests} -eq 0 ]]; then
echo "All shards reported no tests collected. Failing the build."
exit 1
fi
else
echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES"
docker run \
......
......@@ -870,7 +870,7 @@ steps:
- label: Language Models Tests (Extra Standard) %N
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
agent_pool: mi325_2
agent_pool: mi325_8
# grade: Blocking
torch_nightly: true
source_file_dependencies:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment