Unverified Commit 0cea8d55 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Add offline runners info in the Slack report (#19169)



* send slack report for offline runners
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 49bf5698
...@@ -19,6 +19,8 @@ jobs: ...@@ -19,6 +19,8 @@ jobs:
check_runner_status: check_runner_status:
name: Check Runner Status name: Check Runner Status
runs-on: ubuntu-latest runs-on: ubuntu-latest
outputs:
offline_runners: ${{ steps.set-offline_runners.outputs.offline_runners }}
steps: steps:
- name: Checkout transformers - name: Checkout transformers
uses: actions/checkout@v2 uses: actions/checkout@v2
...@@ -26,7 +28,14 @@ jobs: ...@@ -26,7 +28,14 @@ jobs:
fetch-depth: 2 fetch-depth: 2
- name: Check Runner Status - name: Check Runner Status
run: python utils/check_self_hosted_runner.py --target_runners single-gpu-ci-runner-docker,multi-gpu-ci-runner-docker,single-gpu-scheduled-ci-runner-docker,multi-scheduled-scheduled-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} run: python utils/check_self_hosted_runner.py --target_runners single-gpu-ci-runner-docker,multi-gpu-ci-runner-docker,single-gpu-scheduled-ci-runner-docker,multi-scheduled-scheduled-ci-runner-docker,single-gpu-doctest-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
- id: set-offline_runners
name: Set output for offline runners
if: ${{ always() }}
run: |
offline_runners=$(python3 -c 'fp = open("offline_runners.txt"); failed = fp.read(); fp.close(); print(failed)')
echo "::set-output name=offline_runners::$offline_runners"
send_results: send_results:
name: Send results to webhook name: Send results to webhook
...@@ -50,6 +59,7 @@ jobs: ...@@ -50,6 +59,7 @@ jobs:
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_EVENT: runner status check CI_EVENT: runner status check
RUNNER_STATUS: ${{ needs.check_runner_status.result }} RUNNER_STATUS: ${{ needs.check_runner_status.result }}
OFFLINE_RUNNERS: ${{ needs.check_runner_status.outputs.offline_runners }}
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: | run: |
......
...@@ -5,6 +5,8 @@ import subprocess ...@@ -5,6 +5,8 @@ import subprocess
def get_runner_status(target_runners, token): def get_runner_status(target_runners, token):
offline_runners = []
cmd = ( cmd = (
f'curl -H "Accept: application/vnd.github+json" -H "Authorization: Bearer {token}"' f'curl -H "Accept: application/vnd.github+json" -H "Authorization: Bearer {token}"'
" https://api.github.com/repos/huggingface/transformers/actions/runners" " https://api.github.com/repos/huggingface/transformers/actions/runners"
...@@ -17,7 +19,15 @@ def get_runner_status(target_runners, token): ...@@ -17,7 +19,15 @@ def get_runner_status(target_runners, token):
for runner in runners: for runner in runners:
if runner["name"] in target_runners: if runner["name"] in target_runners:
if runner["status"] == "offline": if runner["status"] == "offline":
raise ValueError(f"{runner['name']} is offline!") offline_runners.append(runner)
# save the result so we can report them on Slack
with open("offline_runners.txt", "w") as fp:
fp.write(json.dumps(offline_runners))
if len(offline_runners) > 0:
failed = "\n".join(offline_runners)
raise ValueError(f"The following runners are offline:\n{failed}")
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -397,8 +397,12 @@ class Message: ...@@ -397,8 +397,12 @@ class Message:
ci_title_block = {"type": "section", "text": {"type": "mrkdwn", "text": ci_title}} ci_title_block = {"type": "section", "text": {"type": "mrkdwn", "text": ci_title}}
blocks.append(ci_title_block) blocks.append(ci_title_block)
offline_runners = []
if runner_not_available: if runner_not_available:
text = "💔 CI runners are not available! Tests are not run. 😭" text = "💔 CI runners are not available! Tests are not run. 😭"
result = os.environ.get("OFFLINE_RUNNERS")
if result is not None:
offline_runners = json.loads(result)
elif runner_failed: elif runner_failed:
text = "💔 CI runners have problems! Tests are not run. 😭" text = "💔 CI runners have problems! Tests are not run. 😭"
elif setup_failed: elif setup_failed:
...@@ -413,11 +417,18 @@ class Message: ...@@ -413,11 +417,18 @@ class Message:
"text": text, "text": text,
}, },
} }
text = ""
if len(offline_runners) > 0:
text = "\n • " + "\n • ".join(offline_runners)
text = f"The following runners are offline:\n{text}\n\n"
text += "🙏 Let's fix it ASAP! 🙏"
error_block_2 = { error_block_2 = {
"type": "section", "type": "section",
"text": { "text": {
"type": "plain_text", "type": "plain_text",
"text": "🙏 Let's fix it ASAP! 🙏", "text": text,
}, },
"accessory": { "accessory": {
"type": "button", "type": "button",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment