Unverified Commit 2c5747ed authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Update notification service (#17921)


Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 07575e86
...@@ -164,6 +164,11 @@ jobs: ...@@ -164,6 +164,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
# Create a directory to store test failure tables in the next step
- name: Create directory
run: mkdir test_failure_tables
- name: Send message to Slack - name: Send message to Slack
env: env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
...@@ -176,4 +181,12 @@ jobs: ...@@ -176,4 +181,12 @@ jobs:
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: | run: |
pip install slack_sdk pip install slack_sdk
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
\ No newline at end of file
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
path: test_failure_tables
\ No newline at end of file
...@@ -233,15 +233,11 @@ class Message: ...@@ -233,15 +233,11 @@ class Message:
individual_reports.append(key) individual_reports.append(key)
header = "Single | Multi | Category\n" header = "Single | Multi | Category\n"
category_failures_report = header + "\n".join(sorted(individual_reports)) category_failures_report = prepare_reports(
title="The following modeling categories had failures", header=header, reports=individual_reports
)
return { return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"The following modeling categories had failures:\n\n```\n{category_failures_report}\n```",
},
}
@property @property
def model_failures(self) -> Dict: def model_failures(self) -> Dict:
...@@ -302,21 +298,44 @@ class Message: ...@@ -302,21 +298,44 @@ class Message:
model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n" model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n"
sorted_model_reports = sorted(model_reports, key=lambda s: s.split("] ")[-1]) sorted_model_reports = sorted(model_reports, key=lambda s: s.split("] ")[-1])
model_failures_report = model_header + "\n".join(sorted_model_reports) model_failures_report = prepare_reports(
title="These following model modules had failures", header=model_header, reports=sorted_model_reports
)
module_header = "Single | Multi | Category\n" module_header = "Single | Multi | Category\n"
sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("] ")[-1]) sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("] ")[-1])
module_failures_report = module_header + "\n".join(sorted_module_reports) module_failures_report = prepare_reports(
title="The following non-model modules had failures", header=module_header, reports=sorted_module_reports
report = "" )
if len(model_reports): model_failure_sections = [
report += f"These following model modules had failures:\n```\n{model_failures_report}\n```\n\n" {"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}},
{"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}},
]
if len(other_module_reports): # Save complete tables (for past CI) - to be uploaded as artifacts
report += f"The following non-model modules had failures:\n```\n{module_failures_report}\n```\n\n" if ci_event.startswith("Past CI"):
model_failures_report = prepare_reports(
title="These following model modules had failures",
header=model_header,
reports=sorted_model_reports,
to_truncate=False,
)
file_path = os.path.join(os.getcwd(), "test_failure_tables/model_failures_report.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(model_failures_report)
module_failures_report = prepare_reports(
title="The following non-model modules had failures",
header=module_header,
reports=sorted_module_reports,
to_truncate=False,
)
file_path = os.path.join(os.getcwd(), "test_failure_tables/module_failures_report.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(module_failures_report)
return {"type": "section", "text": {"type": "mrkdwn", "text": report}} return model_failure_sections
@property @property
def additional_failures(self) -> Dict: def additional_failures(self) -> Dict:
...@@ -337,15 +356,11 @@ class Message: ...@@ -337,15 +356,11 @@ class Message:
individual_reports.append(report) individual_reports.append(report)
header = "Single | Multi | Category\n" header = "Single | Multi | Category\n"
failures_report = header + "\n".join(sorted(individual_reports)) failures_report = prepare_reports(
title="The following non-modeling tests had failures", header=header, reports=individual_reports
)
return { return {"type": "section", "text": {"type": "mrkdwn", "text": failures_report}}
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"The following non-modeling tests had failures:\n```\n{failures_report}\n```",
},
}
@property @property
def payload(self) -> str: def payload(self) -> str:
...@@ -358,7 +373,10 @@ class Message: ...@@ -358,7 +373,10 @@ class Message:
blocks.append(self.failures) blocks.append(self.failures)
if self.n_model_failures > 0: if self.n_model_failures > 0:
blocks.extend([self.category_failures, self.model_failures]) blocks.append(self.category_failures)
for block in self.model_failures:
if block["text"]["text"]:
blocks.append(block)
if self.n_additional_failures > 0: if self.n_additional_failures > 0:
blocks.append(self.additional_failures) blocks.append(self.additional_failures)
...@@ -582,6 +600,29 @@ def retrieve_available_artifacts(): ...@@ -582,6 +600,29 @@ def retrieve_available_artifacts():
return _available_artifacts return _available_artifacts
def prepare_reports(title, header, reports, to_truncate=True):
report = ""
MAX_ERROR_TEXT = 3000 - len("[Truncated]")
if not to_truncate:
MAX_ERROR_TEXT = float("inf")
if len(reports) > 0:
# `text` must be less than 3001 characters in Slack SDK
# keep some room for adding "[Truncated]" when necessary
for idx in range(len(reports)):
_report = header + "\n".join(reports[: idx + 1])
new_report = f"{title}:\n```\n{_report}\n```\n"
if len(new_report) > MAX_ERROR_TEXT:
# `report` here has length <= 3000
report = report + "[Truncated]"
break
report = new_report
return report
if __name__ == "__main__": if __name__ == "__main__":
org = "huggingface" org = "huggingface"
...@@ -686,16 +727,24 @@ if __name__ == "__main__": ...@@ -686,16 +727,24 @@ if __name__ == "__main__":
unclassified_model_failures = [] unclassified_model_failures = []
# This prefix is used to get job links below. For past CI, we use `workflow_call`, which changes the job names from
# `Model tests (...)` to `PyTorch 1.5 / Model tests (...)` for example.
job_name_prefix = ""
if ci_event.startswith("Past CI - "):
framework, version = ci_event.replace("Past CI - ", "").split("-")
framework = "PyTorch" if framework == "pytorch" else "TensorFlow"
job_name_prefix = f"{framework} {version}"
for model in model_results.keys(): for model in model_results.keys():
for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths: for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths:
artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"]) artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"])
if "stats" in artifact: if "stats" in artifact:
# Link to the GitHub Action job # Link to the GitHub Action job
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get( # The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert` job_name = f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)" if job_name_prefix:
) job_name = f"{job_name_prefix} / {job_name}"
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name)
failed, success, time_spent = handle_test_results(artifact["stats"]) failed, success, time_spent = handle_test_results(artifact["stats"])
model_results[model]["success"] += success model_results[model]["success"] += success
model_results[model]["time_spent"] += time_spent[1:-1] + ", " model_results[model]["time_spent"] += time_spent[1:-1] + ", "
...@@ -809,7 +858,7 @@ if __name__ == "__main__": ...@@ -809,7 +858,7 @@ if __name__ == "__main__":
message = Message(title, ci_title, model_results, additional_results) message = Message(title, ci_title, model_results, additional_results)
# send report only if there is any failure # send report only if there is any failure (for push CI)
if message.n_failures: if message.n_failures or ci_event != "push":
message.post() message.post()
message.post_reply() message.post_reply()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment