mirror of https://github.com/zulip/zulip.git
integration: Update topic & content of grafana webhook.
Sending to a topic based on the number of firing alerts makes no sense, and leads to conversations and alerts scattered randomly across topics based on how on fire the alerting is. Send a separate message for each alert in the Grafana webhook payload, with the alert's name as its topic; if no alert name can be found, fall back to the alert's fingerprint. Also include all alert values in the body of the message, along with links to the alert generator, silence, and image, if available. Co-authored-by: Alex Vandiver <alexmv@zulip.com>
This commit is contained in:
parent
18067794ae
commit
fc2aac6baa
|
@ -758,7 +758,7 @@ DOC_SCREENSHOT_CONFIG: Dict[str, List[BaseScreenshotConfig]] = {
|
|||
"gocd": [ScreenshotConfig("pipeline.json")],
|
||||
"gogs": [ScreenshotConfig("pull_request__opened.json")],
|
||||
"gosquared": [ScreenshotConfig("traffic_spike.json", image_name="000.png")],
|
||||
"grafana": [ScreenshotConfig("alert_v7.json")],
|
||||
"grafana": [ScreenshotConfig("alert_values_v11.json")],
|
||||
"greenhouse": [ScreenshotConfig("candidate_stage_change.json", image_name="000.png")],
|
||||
"groove": [ScreenshotConfig("ticket_started.json")],
|
||||
"harbor": [ScreenshotConfig("scanning_completed.json")],
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
{
|
||||
"receiver": "Debug webhook",
|
||||
"status": "firing",
|
||||
"alerts": [
|
||||
{
|
||||
"status": "firing",
|
||||
"labels": {
|
||||
"debug": "true",
|
||||
"grafana_folder": "device"
|
||||
},
|
||||
"annotations": {
|
||||
"summary": "High memory usage"
|
||||
},
|
||||
"startsAt": "2024-03-01T02:09:00Z",
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
"generatorURL": "https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1",
|
||||
"fingerprint": "e6349a25f5ef0e9e",
|
||||
"silenceURL": "https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1",
|
||||
"dashboardURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1",
|
||||
"panelURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2",
|
||||
"values": {
|
||||
"A": 2473545728,
|
||||
"B": 0,
|
||||
"C": 1,
|
||||
"minute": 9
|
||||
},
|
||||
"valueString": "[ var='A' labels={instance=node_exporter:9100, job=node} value=2.473545728e+09 ], [ var='B' labels={instance=node_exporter:9100, job=node} value=0 ], [ var='C' labels={} value=1 ], [ var='minute' labels={} value=9 ]",
|
||||
"imageURL": "https://grafana.com/assets/img/blog/mixed_styles.png"
|
||||
}
|
||||
],
|
||||
"groupLabels": {
|
||||
"alertname": "Memory (copy)",
|
||||
"grafana_folder": "device"
|
||||
},
|
||||
"commonLabels": {
|
||||
"alertname": "Memory (copy)",
|
||||
"debug": "true",
|
||||
"grafana_folder": "device"
|
||||
},
|
||||
"commonAnnotations": {
|
||||
"summary": "High memory usage"
|
||||
},
|
||||
"externalURL": "https://play.grafana.org/",
|
||||
"version": "1",
|
||||
"groupKey": "{}/{debug=\"true\"}:{alertname=\"Memory (copy)\", grafana_folder=\"device\"}",
|
||||
"truncatedAlerts": 0,
|
||||
"orgId": 1,
|
||||
"title": "[FIRING:1] Memory (copy) device (true)",
|
||||
"state": "alerting",
|
||||
"message": "**Firing**\n\nValue: A=2.473545728e+09, B=0, C=1, minute=9\nLabels:\n - alertname = Memory (copy)\n - debug = true\n - grafana_folder = device\nAnnotations:\n - summary = High memory usage\nSource: https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1\nSilence: https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1\nDashboard: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\nPanel: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2\n"
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
{
|
||||
"receiver": "Debug webhook",
|
||||
"status": "firing",
|
||||
"alerts": [
|
||||
{
|
||||
"status": "firing",
|
||||
"labels": {
|
||||
"alertname": "Memory (copy)",
|
||||
"debug": "true",
|
||||
"grafana_folder": "device"
|
||||
},
|
||||
"annotations": {
|
||||
"summary": "High memory usage"
|
||||
},
|
||||
"startsAt": "2024-03-01T02:09:00Z",
|
||||
"endsAt": "0001-01-01T00:00:00Z",
|
||||
"generatorURL": "https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1",
|
||||
"fingerprint": "e6349a25f5ef0e9e",
|
||||
"silenceURL": "https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1",
|
||||
"dashboardURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1",
|
||||
"panelURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2",
|
||||
"values": {
|
||||
"A": 2473545728,
|
||||
"B": 0,
|
||||
"C": 1,
|
||||
"minute": 9
|
||||
},
|
||||
"valueString": "[ var='A' labels={instance=node_exporter:9100, job=node} value=2.473545728e+09 ], [ var='B' labels={instance=node_exporter:9100, job=node} value=0 ], [ var='C' labels={} value=1 ], [ var='minute' labels={} value=9 ]",
|
||||
"imageURL": "https://grafana.com/assets/img/blog/mixed_styles.png"
|
||||
}
|
||||
],
|
||||
"groupLabels": {
|
||||
"alertname": "Memory (copy)",
|
||||
"grafana_folder": "device"
|
||||
},
|
||||
"commonLabels": {
|
||||
"alertname": "Memory (copy)",
|
||||
"debug": "true",
|
||||
"grafana_folder": "device"
|
||||
},
|
||||
"commonAnnotations": {
|
||||
"summary": "High memory usage"
|
||||
},
|
||||
"externalURL": "https://play.grafana.org/",
|
||||
"version": "1",
|
||||
"groupKey": "{}/{debug=\"true\"}:{alertname=\"Memory (copy)\", grafana_folder=\"device\"}",
|
||||
"truncatedAlerts": 0,
|
||||
"orgId": 1,
|
||||
"title": "[FIRING:1] Memory (copy) device (true)",
|
||||
"state": "alerting",
|
||||
"message": "**Firing**\n\nValue: A=2.473545728e+09, B=0, C=1, minute=9\nLabels:\n - alertname = Memory (copy)\n - debug = true\n - grafana_folder = device\nAnnotations:\n - summary = High memory usage\nSource: https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1\nSilence: https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1\nDashboard: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\nPanel: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2\n"
|
||||
}
|
|
@ -128,14 +128,11 @@ Someone is testing the alert notification within grafana.
|
|||
)
|
||||
|
||||
def test_alert_v8(self) -> None:
|
||||
expected_topic_name = "[RESOLVED:1]"
|
||||
expected_topic_name = "[TestAlert]"
|
||||
expected_message = """
|
||||
:checkbox: **RESOLVED**
|
||||
|
||||
Webhook test message.
|
||||
|
||||
---
|
||||
**Alert 1**: TestAlert.
|
||||
**TestAlert**
|
||||
|
||||
This alert was fired at <time:2022-08-31T05:54:04.52289368Z>.
|
||||
|
||||
|
@ -145,10 +142,13 @@ Labels:
|
|||
- alertname: TestAlert
|
||||
- instance: Grafana
|
||||
|
||||
Values:
|
||||
[ metric='foo' labels={instance=bar} value=10 ]
|
||||
|
||||
Annotations:
|
||||
- summary: Notification test
|
||||
|
||||
1 alert(s) truncated.
|
||||
[Silence](https://zuliptestingwh2.grafana.net/alerting/silence/new?alertmanager=grafana&matcher=alertname%3DTestAlert&matcher=instance%3DGrafana)
|
||||
""".strip()
|
||||
|
||||
self.check_webhook(
|
||||
|
@ -159,44 +159,143 @@ Annotations:
|
|||
)
|
||||
|
||||
def test_alert_multiple_v8(self) -> None:
|
||||
expected_topic_name = "[FIRING:2]"
|
||||
expected_message = """
|
||||
expected_topic_name_1 = "[High memory usage]"
|
||||
expected_topic_name_2 = "[High CPU usage]"
|
||||
expected_message_1 = """
|
||||
:alert: **FIRING**
|
||||
|
||||
Webhook test message.
|
||||
|
||||
---
|
||||
**Alert 1**: High memory usage.
|
||||
**High memory usage**
|
||||
|
||||
This alert was fired at <time:2021-10-12T09:51:03.157076+02:00>.
|
||||
|
||||
Labels:
|
||||
- alertname: High memory usage
|
||||
- team: blue
|
||||
- zone: us-1
|
||||
|
||||
Values:
|
||||
[ metric='' labels={} value=14151.331895396988 ]
|
||||
|
||||
Annotations:
|
||||
- description: The system has high memory usage
|
||||
- runbook_url: https://myrunbook.com/runbook/1234
|
||||
- summary: This alert was triggered for zone us-1
|
||||
|
||||
[Generator](https://play.grafana.org/alerting/1afz29v7z/edit)
|
||||
[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana&matchers=alertname%3DT2%2Cteam%3Dblue%2Czone%3Dus-1)
|
||||
""".strip()
|
||||
expected_message_2 = """
|
||||
:alert: **FIRING**
|
||||
|
||||
---
|
||||
**Alert 2**: High CPU usage.
|
||||
**High CPU usage**
|
||||
|
||||
This alert was fired at <time:2021-10-12T09:56:03.157076+02:00>.
|
||||
|
||||
Labels:
|
||||
- alertname: High CPU usage
|
||||
- team: blue
|
||||
- zone: eu-1
|
||||
|
||||
Values:
|
||||
[ metric='' labels={} value=47043.702386305304 ]
|
||||
|
||||
Annotations:
|
||||
- description: The system has high CPU usage
|
||||
- runbook_url: https://myrunbook.com/runbook/1234
|
||||
- summary: This alert was triggered for zone eu-1
|
||||
|
||||
[Generator](https://play.grafana.org/alerting/d1rdpdv7k/edit)
|
||||
[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana&matchers=alertname%3DT1%2Cteam%3Dblue%2Czone%3Deu-1)
|
||||
""".strip()
|
||||
|
||||
self.subscribe(self.test_user, self.CHANNEL_NAME)
|
||||
payload = self.get_body("alert_multiple_v8")
|
||||
|
||||
msg = self.send_webhook_payload(
|
||||
self.test_user,
|
||||
self.url,
|
||||
payload,
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
msg = self.get_second_to_last_message()
|
||||
self.assert_channel_message(
|
||||
message=msg,
|
||||
channel_name=self.CHANNEL_NAME,
|
||||
topic_name=expected_topic_name_1,
|
||||
content=expected_message_1,
|
||||
)
|
||||
|
||||
msg = self.get_last_message()
|
||||
self.assert_channel_message(
|
||||
message=msg,
|
||||
channel_name=self.CHANNEL_NAME,
|
||||
topic_name=expected_topic_name_2,
|
||||
content=expected_message_2,
|
||||
)
|
||||
|
||||
def test_alert_values_v11(self) -> None:
|
||||
expected_topic_name = "[Memory (copy)]" # alertname
|
||||
expected_message = """
|
||||
:alert: **FIRING**
|
||||
|
||||
**Memory (copy)**
|
||||
|
||||
This alert was fired at <time:2024-03-01T02:09:00Z>.
|
||||
|
||||
Labels:
|
||||
- alertname: Memory (copy)
|
||||
- debug: true
|
||||
- grafana_folder: device
|
||||
|
||||
Values:
|
||||
- A: 2473545728
|
||||
- B: 0
|
||||
- C: 1
|
||||
- minute: 9
|
||||
|
||||
Annotations:
|
||||
- summary: High memory usage
|
||||
|
||||
[Generator](https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1)
|
||||
[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1)
|
||||
[Image](https://grafana.com/assets/img/blog/mixed_styles.png)
|
||||
""".strip()
|
||||
|
||||
self.check_webhook(
|
||||
"alert_multiple_v8",
|
||||
"alert_values_v11",
|
||||
expected_topic_name,
|
||||
expected_message,
|
||||
content_type="application/x-www-form-urlencoded",
|
||||
)
|
||||
|
||||
def test_alert_no_alertname_v11(self) -> None:
|
||||
expected_topic_name = "[e6349a25f5ef0e9e]" # fingerprint
|
||||
expected_message = """
|
||||
:alert: **FIRING**
|
||||
|
||||
This alert was fired at <time:2024-03-01T02:09:00Z>.
|
||||
|
||||
Labels:
|
||||
- debug: true
|
||||
- grafana_folder: device
|
||||
|
||||
Values:
|
||||
- A: 2473545728
|
||||
- B: 0
|
||||
- C: 1
|
||||
- minute: 9
|
||||
|
||||
Annotations:
|
||||
- summary: High memory usage
|
||||
|
||||
[Generator](https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1)
|
||||
[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1)
|
||||
[Image](https://grafana.com/assets/img/blog/mixed_styles.png)
|
||||
""".strip()
|
||||
|
||||
self.check_webhook(
|
||||
"alert_no_alertname_v11",
|
||||
expected_topic_name,
|
||||
expected_message,
|
||||
content_type="application/x-www-form-urlencoded",
|
||||
|
|
|
@ -5,6 +5,7 @@ from zerver.lib.response import json_success
|
|||
from zerver.lib.typed_endpoint import JsonBodyPayload, typed_endpoint
|
||||
from zerver.lib.validator import (
|
||||
WildValue,
|
||||
check_anything,
|
||||
check_float,
|
||||
check_int,
|
||||
check_none_or,
|
||||
|
@ -21,20 +22,23 @@ ALERT_STATUS_TEMPLATE = "{alert_icon} **{alert_state}**\n\n"
|
|||
|
||||
OLD_MESSAGE_TEMPLATE = "{alert_status}[{rule_name}]({rule_url})\n\n{alert_message}{eval_matches}"
|
||||
|
||||
NEW_TOPIC_TEMPLATE = "[{alert_status}:{alert_count}]"
|
||||
NEW_TOPIC_TEMPLATE = "[{alertname}]"
|
||||
|
||||
ALERT_HEADER_TEMPLATE = """\n---
|
||||
**Alert {count}**"""
|
||||
START_TIME_TEMPLATE = "This alert was fired at <time:{start_time}>."
|
||||
|
||||
START_TIME_TEMPLATE = "\n\nThis alert was fired at <time:{start_time}>.\n"
|
||||
END_TIME_TEMPLATE = "\n\nThis alert was resolved at <time:{end_time}>."
|
||||
|
||||
END_TIME_TEMPLATE = "\nThis alert was resolved at <time:{end_time}>.\n\n"
|
||||
MESSAGE_LABELS_TEMPLATE = "\n\nLabels:\n{label_information}\n"
|
||||
|
||||
MESSAGE_LABELS_TEMPLATE = "Labels:\n{label_information}\n"
|
||||
MESSAGE_VALUES_TEMPLATE = "Values:\n{value_information}\n"
|
||||
|
||||
MESSAGE_ANNOTATIONS_TEMPLATE = "Annotations:\n{annotation_information}\n"
|
||||
MESSAGE_ANNOTATIONS_TEMPLATE = "Annotations:\n{annotation_information}"
|
||||
|
||||
TRUNCATED_ALERTS_TEMPLATE = "{count} alert(s) truncated.\n"
|
||||
MESSAGE_GENERATOR_TEMPLATE = "\n[Generator]({generator_url})"
|
||||
|
||||
MESSAGE_SILENCE_TEMPLATE = "\n[Silence]({silence_url})"
|
||||
|
||||
MESSAGE_IMAGE_TEMPLATE = "\n[Image]({image_url})"
|
||||
|
||||
LEGACY_EVENT_TYPES = ["ok", "pending", "alerting", "paused"]
|
||||
|
||||
|
@ -53,24 +57,31 @@ def api_grafana_webhook(
|
|||
) -> HttpResponse:
|
||||
# Grafana alerting system.
|
||||
if "alerts" in payload:
|
||||
status = payload["status"].tame(check_string_in(["firing", "resolved"]))
|
||||
alert_count = len(payload["alerts"])
|
||||
|
||||
topic_name = NEW_TOPIC_TEMPLATE.format(alert_status=status.upper(), alert_count=alert_count)
|
||||
|
||||
# Grafana 8.0 and above alerting; works for:
|
||||
# - https://grafana.com/docs/grafana/v8.0/alerting/unified-alerting/message-templating/template-data/
|
||||
# - https://grafana.com/docs/grafana/v9.0/alerting/contact-points/notifiers/webhook-notifier/
|
||||
# - https://grafana.com/docs/grafana/v10.0/alerting/alerting-rules/manage-contact-points/webhook-notifier/
|
||||
# - https://grafana.com/docs/grafana/v11.0/alerting/configure-notifications/manage-contact-points/integrations/webhook-notifier/
|
||||
for alert in payload["alerts"]:
|
||||
status = alert["status"].tame(check_string_in(["firing", "resolved"]))
|
||||
if status == "firing":
|
||||
body = ALERT_STATUS_TEMPLATE.format(alert_icon=":alert:", alert_state=status.upper())
|
||||
body = ALERT_STATUS_TEMPLATE.format(
|
||||
alert_icon=":alert:", alert_state=status.upper()
|
||||
)
|
||||
else:
|
||||
body = ALERT_STATUS_TEMPLATE.format(alert_icon=":checkbox:", alert_state=status.upper())
|
||||
|
||||
if payload["message"]:
|
||||
body += payload["message"].tame(check_string) + "\n"
|
||||
|
||||
for index, alert in enumerate(payload["alerts"], 1):
|
||||
body += ALERT_HEADER_TEMPLATE.format(count=index)
|
||||
body = ALERT_STATUS_TEMPLATE.format(
|
||||
alert_icon=":checkbox:", alert_state=status.upper()
|
||||
)
|
||||
|
||||
if "alertname" in alert["labels"] and alert["labels"]["alertname"]:
|
||||
body += ": " + alert["labels"]["alertname"].tame(check_string) + "."
|
||||
alertname = alert["labels"]["alertname"].tame(check_string)
|
||||
topic_name = NEW_TOPIC_TEMPLATE.format(alertname=alertname)
|
||||
body += "**" + alertname + "**\n\n"
|
||||
else:
|
||||
# if no alertname, fallback to the alert fingerprint
|
||||
topic_name = NEW_TOPIC_TEMPLATE.format(
|
||||
alertname=alert["fingerprint"].tame(check_string)
|
||||
)
|
||||
|
||||
body += START_TIME_TEMPLATE.format(start_time=alert["startsAt"].tame(check_string))
|
||||
|
||||
|
@ -84,6 +95,19 @@ def api_grafana_webhook(
|
|||
label_information += "- " + key + ": " + value.tame(check_string) + "\n"
|
||||
body += MESSAGE_LABELS_TEMPLATE.format(label_information=label_information)
|
||||
|
||||
if alert.get("values"):
|
||||
value_information = ""
|
||||
for key, value in alert["values"].items():
|
||||
value_information += "- " + key + ": " + str(value.tame(check_anything)) + "\n"
|
||||
body += MESSAGE_VALUES_TEMPLATE.format(value_information=value_information)
|
||||
elif alert.get("valueString"):
|
||||
body += (
|
||||
MESSAGE_VALUES_TEMPLATE.format(
|
||||
value_information=alert["valueString"].tame(check_string)
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
if alert["annotations"]:
|
||||
annotation_information = ""
|
||||
for key, value in alert["annotations"].items():
|
||||
|
@ -92,17 +116,30 @@ def api_grafana_webhook(
|
|||
annotation_information=annotation_information
|
||||
)
|
||||
|
||||
if payload["truncatedAlerts"]:
|
||||
body += TRUNCATED_ALERTS_TEMPLATE.format(
|
||||
count=payload["truncatedAlerts"].tame(check_int)
|
||||
if alert["generatorURL"]:
|
||||
body += MESSAGE_GENERATOR_TEMPLATE.format(
|
||||
generator_url=alert["generatorURL"].tame(check_string)
|
||||
)
|
||||
|
||||
if alert["silenceURL"]:
|
||||
body += MESSAGE_SILENCE_TEMPLATE.format(
|
||||
silence_url=alert["silenceURL"].tame(check_string)
|
||||
)
|
||||
|
||||
if alert.get("imageURL"):
|
||||
body += MESSAGE_IMAGE_TEMPLATE.format(
|
||||
image_url=alert["imageURL"].tame(check_string)
|
||||
)
|
||||
|
||||
body += "\n"
|
||||
|
||||
check_send_webhook_message(request, user_profile, topic_name, body, status)
|
||||
|
||||
return json_success(request)
|
||||
|
||||
# Legacy Grafana alerts.
|
||||
else:
|
||||
# Grafana 7.0 alerts:
|
||||
# https://grafana.com/docs/grafana/v7.0/alerting/notifications/#webhook
|
||||
topic_name = OLD_TOPIC_TEMPLATE.format(alert_title=payload["title"].tame(check_string))
|
||||
|
||||
eval_matches_text = ""
|
||||
|
|
Loading…
Reference in New Issue