Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ Examples:
| total=2 firing=1 pending=0 inactive=1

Flags:
-S, --label-key-state string Use the given AlertRule label to override the exit state for firing alerts.
If this flag is set the plugin looks for warning/critical/ok in the provided label key
--exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex.
--exclude-label stringArray The label of one or more specific alerts to exclude.
This parameter can be repeated e.g.: '--exclude-label prio=high --exclude-label another=example'
Expand All @@ -170,6 +172,11 @@ Flags:
-P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed
```

The `--label-key-state` can be used to override the exit code for firing alerts.
When the flag is set, the plugin looks for the given label key on the AlertRule and uses
the specified as label value (`warning/critical/ok`) as exit code.
An invalid value will result in an UNKNOWN exit code.

#### Checking all defined alerts

```bash
Expand Down
19 changes: 13 additions & 6 deletions cmd/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type AlertConfig struct {
ExcludeLabels []string
IncludeLabels []string
ProblemsOnly bool
StateLabelKey string
NoAlertsState string
}

Expand Down Expand Up @@ -144,8 +145,9 @@ inactive = 0`,

// Handle Inactive Alerts
if len(rl.AlertingRule.Alerts) == 0 {
// Counting states for perfdata
switch rl.GetStatus() {
// Counting states for perfdata. We don't use the state-label override here
// to have the acutal count from Prometheus
switch rl.GetStatus("") {
case 0:
counterInactive++
case 1:
Expand All @@ -156,7 +158,7 @@ inactive = 0`,

sc := result.NewPartialResult()

_ = sc.SetState(rl.GetStatus())
_ = sc.SetState(rl.GetStatus(cliAlertConfig.StateLabelKey))
sc.Output = rl.GetOutput()
overall.AddSubcheck(sc)
}
Expand All @@ -165,8 +167,9 @@ inactive = 0`,
if len(rl.AlertingRule.Alerts) > 0 {
// Handle Pending or Firing Alerts
for _, alert := range rl.AlertingRule.Alerts {
// Counting states for perfdata
switch rl.GetStatus() {
// Counting states for perfdata. We don't use the state-label override here
// to have the acutal count from Prometheus
switch rl.GetStatus("") {
case 0:
counterInactive++
case 1:
Expand All @@ -177,7 +180,7 @@ inactive = 0`,

sc := result.NewPartialResult()

_ = sc.SetState(rl.GetStatus())
_ = sc.SetState(rl.GetStatus(cliAlertConfig.StateLabelKey))
// Set the alert in the internal Type to generate the output
rl.Alert = alert
sc.Output = rl.GetOutput()
Expand Down Expand Up @@ -248,6 +251,10 @@ func init() {

fs.BoolVarP(&cliAlertConfig.ProblemsOnly, "problems", "P", false,
"Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed")

fs.StringVarP(&cliAlertConfig.StateLabelKey, "label-key-state", "S", "",
"Use the given AlertRule label to override the exit state for firing alerts."+
"\nIf this flag is set the plugin looks for warning/critical/ok in the provided label key")
}

// Function to convert state to integer.
Expand Down
16 changes: 16 additions & 0 deletions cmd/alert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,22 @@ exit status 2
args: []string{"run", "../main.go", "alert", "--exclude-label", "team=database", "--exclude-label", "severity=critical"},
expected: "[OK] - 0 Alerts: 0 Firing - 0 Pending - 0 Inactive\n\\_ [OK] No alerts retrieved\n|total=0 firing=0 pending=0 inactive=0\n\n",
},
{
name: "alert-state-label",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write(loadTestdata(alertTestDataSet1))
})),
args: []string{"run", "../main.go", "alert", "--label-key-state=icinga"},
expected: `[WARNING] - 3 Alerts: 1 Firing - 1 Pending - 1 Inactive
\_ [OK] [HostOutOfMemory] is inactive
\_ [WARNING] [SqlAccessDeniedRate] - Job: [mysql] on Instance: [localhost] is pending - value: 0.40 - {"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"}
\_ [OK] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 - {"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"}
|total=3 firing=1 pending=1 inactive=1

exit status 1
`,
},
}

for _, test := range tests {
Expand Down
26 changes: 24 additions & 2 deletions internal/alert/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ func FlattenRules(groups []v1.RuleGroup, wantedGroups []string) []Rule {
return rules
}

func (a *Rule) GetStatus() (status int) {
switch a.AlertingRule.State {
func (a *Rule) GetStatus(labelKey string) (status int) {
state := a.AlertingRule.State

switch state {
case string(v1.AlertStateFiring):
status = check.Critical
case string(v1.AlertStatePending):
Expand All @@ -70,6 +72,26 @@ func (a *Rule) GetStatus() (status int) {
status = check.Unknown
}

if state == string(v1.AlertStateFiring) && labelKey != "" {
stateLabel, ok := a.AlertingRule.Labels[model.LabelName(labelKey)]
// If there is no such label key, we're done
if !ok {
return status
}

lb := strings.ToLower(string(stateLabel))
switch lb {
case "warning":
status = check.Warning
case "critical":
status = check.Critical
case "ok":
status = check.OK
default:
status = check.Unknown
}
}

return status
}

Expand Down
53 changes: 50 additions & 3 deletions internal/alert/alert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
)

func TestGetStatus(t *testing.T) {

testTime := time.Now()

ar := v1.AlertingRule{
Expand Down Expand Up @@ -49,19 +48,67 @@ func TestGetStatus(t *testing.T) {
Alert: ar.Alerts[0],
}

actual := r.GetStatus()
actual := r.GetStatus("")
if actual != check.Critical {
t.Error("\nActual: ", actual, "\nExpected: ", check.Critical)
}

r.AlertingRule.State = "pending"
actual = r.GetStatus()
actual = r.GetStatus("")
if actual != check.Warning {
t.Error("\nActual: ", actual, "\nExpected: ", check.Warning)
}

}

func TestGetStatus_WithLabel(t *testing.T) {
ar := v1.AlertingRule{
Alerts: []*v1.Alert{
{
Annotations: model.LabelSet{
"summary": "High request latency",
},
Labels: model.LabelSet{
"alertname": "HighRequestLatency",
"severity": "page",
},
State: v1.AlertStateFiring,
Value: "1e+00",
},
},
Annotations: model.LabelSet{
"summary": "High request latency",
},
Labels: model.LabelSet{
"severity": "page",
"icingaState": "OK",
},
Duration: 600,
Health: v1.RuleHealthGood,
Name: "HighRequestLatency",
Query: "job:request_latency_seconds:mean5m{job=\"myjob\"} > 0.5",
LastError: "",
EvaluationTime: 0.5,
State: "firing",
}

r := Rule{
AlertingRule: ar,
Alert: ar.Alerts[0],
}

actual := r.GetStatus("icingaState")
if actual != check.OK {
t.Error("\nActual: ", actual, "\nExpected: ", check.Critical)
}

r.AlertingRule.State = "pending"
actual = r.GetStatus("icingaState")
if actual != check.Warning {
t.Error("\nActual: ", actual, "\nExpected: ", check.Warning)
}
}

func TestGetOutput(t *testing.T) {

testTime := time.Now()
Expand Down
2 changes: 2 additions & 0 deletions testdata/alertmanager/alert.rules
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ groups:
for: 0m
labels:
severity: critical
icingaState: warning
annotations:
summary: Prometheus target missing (instance {{ $labels.instance }})
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
Expand All @@ -16,6 +17,7 @@ groups:
for: 0m
labels:
severity: low
icingaState: warning
annotations:
summary: Prometheus AlertManager job missing (instance {{ $labels.instance }})
description: "A Prometheus AlertManager job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
Expand Down
1 change: 1 addition & 0 deletions testdata/unittest/alertDataset1.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
"duration": 0,
"labels": {
"severity": "critical",
"icinga": "ok",
"team": "network"
},
"annotations": {
Expand Down