Unverified Commit b4e2580c authored by David Ansari's avatar David Ansari Committed by GitHub
Browse files

Add support for RabbitMQ alerting without depending on Prometheus Operator (#676)

* Integrate RabbitMQ Alerting without Prometheus Operator

Before this commit, RabbitMQ Alerting was set up with the assumption
that Prometheus Operator is installed.
However, many K8s clusters have Prometheus installed without using
Prometheus Operator.
Therefore, support RabbitMQ alerting integration with and without using Prometheus Operator.

Auto-generate single Prometheus rule file from PrometheusRules.
Add Prometheus RabbitMQ scrape targets (do not rely on outdated
annotation approach).

* Remove line to set dev image since v1.7.0 will be cut very soon

* Make clear that Prometheus CRDs can be put in any namespace

...NamespaceSelector {} means any namespace will be selected.

This is not to be confused with value `nil` which means "only check own
namespace".

* Skip SSL verification in dev setup
parent ae3231cf
Showing with 620 additions and 20 deletions
+620 -20
name: "Prometheus Rules"
on:
push:
branches:
- main
paths:
- observability/prometheus/rules/**/*.y*ml
jobs:
rules:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Create Prometheus rule file
run: |
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
make install-tools
cd observability/prometheus/
echo "# This file got auto-generated by GitHub workflow '$GITHUB_WORKFLOW'" > rule-file.yml
cat >> rule-file.yml << EOF
---
groups:
- name: rabbitmq-cluster-operator
rules: []
- name: rabbitmq
rules: []
EOF
find rules/rabbitmq-cluster-operator -name "*.y*ml" -exec yq eval-all --inplace --no-colors --prettyPrint 'select(fileIndex==0).groups.[0].rules = select(fileIndex==0).groups.[0].rules + select(fileIndex==1).spec.groups.[0].rules | select(fileIndex==0)' rule-file.yml {} ';'
append_rabbitmq_rules='select(fileIndex==0).groups.[1].rules = select(fileIndex==0).groups.[1].rules + select(fileIndex==1).spec.groups.[0].rules | select(fileIndex==0)'
find rules/rabbitmq -name "*.y*ml" ! -name recording-rules.yml -exec yq eval-all --inplace --no-colors --prettyPrint "$append_rabbitmq_rules" rule-file.yml {} ';'
yq eval-all --inplace --no-colors --prettyPrint "$append_rabbitmq_rules" rule-file.yml rules/rabbitmq/recording-rules.yml
- name: Check Prometheus rule file
run: |
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
# need to use @main because of https://github.com/prometheus/prometheus/issues/8586#issuecomment-796976710
GO111MODULE=on go get github.com/prometheus/prometheus/cmd/promtool@main
promtool check rules observability/prometheus/rule-file.yml
- name: Commit Prometheus rule file
working-directory: observability/prometheus
run: |
if [[ `git status --porcelain -- rule-file.yml` ]]; then
git config --global user.name 'RabbitMQ CI Bot'
git config --global user.email 'rabbitmq-ci@users.noreply.github.com'
git add -- rule-file.yml
git commit -m "Update Prometheus rule file"
git push
fi
......@@ -10,6 +10,7 @@ require (
github.com/go-logr/logr v0.3.0
github.com/go-stomp/stomp v2.1.4+incompatible
github.com/michaelklishin/rabbit-hole/v2 v2.8.0
github.com/mikefarah/yq/v4 v4.7.1
github.com/onsi/ginkgo v1.16.2
github.com/onsi/gomega v1.11.0
github.com/rabbitmq/rabbitmq-stream-go-client v0.0.0-20210422170636-520637be5dde
......
......@@ -162,6 +162,8 @@ github.com/elastic/crd-ref-docs v0.0.7/go.mod h1:osieo9JUDPSestb0X9RsantkSvWqIvh
github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153 h1:yUdfgN0XgIJw7foRItutHYUIhlcKzcSf5vDpdhQAKTc=
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/elliotchance/orderedmap v1.4.0 h1:wZtfeEONCbx6in1CZyE6bELEt/vFayMvsxqI5SgsR+A=
github.com/elliotchance/orderedmap v1.4.0/go.mod h1:wsDwEaX5jEoyhbs7x93zk2H/qv0zwuhg4inXhDkYqys=
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
......@@ -173,8 +175,9 @@ github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi
github.com/evanphx/json-patch/v5 v5.1.0 h1:B0aXl1o/1cP8NbviYiBMkcHBtUjIJ1/Ccg6b+SwCLQg=
github.com/evanphx/json-patch/v5 v5.1.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg=
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTgWiPvpYe4Xau31I0PRk=
github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
......@@ -260,10 +263,13 @@ github.com/go-openapi/validate v0.18.0/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+
github.com/go-openapi/validate v0.19.2/go.mod h1:1tRCw7m3jtI8eNWEEliiAqUIcBztB2KDnRCRMUi7GTA=
github.com/go-openapi/validate v0.19.8 h1:YFzsdWIDfVuLvIOF+ZmKjVg1MbPJ1QgY9PihMwei1ys=
github.com/go-openapi/validate v0.19.8/go.mod h1:8DJv2CVJQ6kGNpFW6eV9N3JviE1C85nY1c2z52x1Gk4=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE=
github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
......@@ -289,8 +295,9 @@ github.com/gobuffalo/flect v0.2.2 h1:PAVD7sp0KOdfswjAw9BpLCU9hXo7wFSzgpQ+zNeks/A
github.com/gobuffalo/flect v0.2.2/go.mod h1:vmkQwuZYhN5Pc4ljYQZzP+1sq+NEkK+lh20jmEmX3jc=
github.com/gobuffalo/here v0.6.0/go.mod h1:wAG085dHOYqUpf+Ap+WOdrPTp5IYcDAs/x7PLa8Y5fM=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/goccy/go-yaml v1.1.5 h1:b8mLuSkAaBDntdh7UUL2aU/PXZ7dPfsrNNG13VhvKGs=
github.com/goccy/go-yaml v1.1.5/go.mod h1:wS4gNoLalDSJxo/SpngzPQ2BN4uuZVLCmbM4S3vd4+Y=
github.com/goccy/go-yaml v1.8.9 h1:4AEXg2qx+/w29jXnXpMY6mTckmYu1TMoHteKuMf0HFg=
github.com/goccy/go-yaml v1.8.9/go.mod h1:U/jl18uSupI5rdI2jmuCswEA2htH9eXfferR3KfscvA=
github.com/gofrs/flock v0.0.0-20190320160742-5135e617513b/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
......@@ -435,6 +442,8 @@ github.com/imdario/mergo v0.3.10/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jinzhu/copier v0.2.8 h1:N8MbL5niMwE3P4dOwurJixz5rMkKfujmMRFmAanSzWE=
github.com/jinzhu/copier v0.2.8/go.mod h1:24xnZezI2Yqac9J61UC6/dG/k76ttpq0DdJI3QmUvro=
github.com/jmhodges/clock v0.0.0-20160418191101-880ee4c33548/go.mod h1:hGT6jSUVzF6no3QaDSMLGLEHtHSBSefs+MgcDWnmhmo=
github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
......@@ -513,6 +522,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182aff
github.com/michaelklishin/rabbit-hole/v2 v2.8.0 h1:5tehiLwdVtCeDcrxOlvoveRqU/AJMOcMeQntSf63fdc=
github.com/michaelklishin/rabbit-hole/v2 v2.8.0/go.mod h1:VZQTDutXFmoyrLvlRjM79MEPb0+xCLLhV5yBTjwMWkM=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/mikefarah/yq/v4 v4.7.1 h1:sl381SkedGntGwSPLjkpnUqCMEGGjf3zUyeAvK/Oqtg=
github.com/mikefarah/yq/v4 v4.7.1/go.mod h1:lhxVpMWdGvoeon5cMtBD3MKRuLKPHgRidR8oDFRNCsw=
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
......@@ -695,12 +706,17 @@ github.com/stretchr/testify v1.2.3-0.20181224173747-660f15d67dbb/go.mod h1:a8OnR
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk=
github.com/timtadh/data-structures v0.5.3 h1:F2tEjoG9qWIyUjbvXVgJqEOGJPMIiYn7U5W5mE+i/vQ=
github.com/timtadh/data-structures v0.5.3/go.mod h1:9R4XODhJ8JdWFEI8P/HJKqxuJctfBQw6fDibMQny2oU=
github.com/timtadh/lexmachine v0.2.2 h1:g55RnjdYazm5wnKv59pwFcBJHOyvTPfDEoz21s4PHmY=
github.com/timtadh/lexmachine v0.2.2/go.mod h1:GBJvD5OAfRn/gnp92zb9KTgHLB7akKyxmVivoYCcjQI=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
......@@ -938,8 +954,9 @@ golang.org/x/sys v0.0.0-20200928205150-006507a75852/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4 h1:myAQVi0cGEoqQVR5POX+8RR2mrocKqNN1hmeMqhX27k=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e h1:XNp2Flc/1eWQGk5BLzqTAN7fQIwIbfyVTuVxXxZh73M=
golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
......@@ -1090,7 +1107,6 @@ gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qS
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE=
gopkg.in/go-playground/validator.v9 v9.30.0 h1:Wk0Z37oBmKj9/n+tPyBHZmeL19LaCoK3Qq48VwYENss=
gopkg.in/go-playground/validator.v9 v9.30.0/go.mod h1:+c9/zcJMFNgbLvly1L1V+PpxWdVbfP1avr/N00E2vyQ=
gopkg.in/inf.v0 v0.9.0/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
......@@ -1099,6 +1115,8 @@ gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU=
gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473 h1:6D+BvnJ/j6e222UW8s2qTSe3wGBtvo0MbVQG/c5k8RE=
gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473/go.mod h1:N1eN2tsCx0Ydtgjl4cqmbRCsY4/+z4cYDeqwZTk6zog=
gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
......@@ -1115,8 +1133,9 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20190905181640-827449938966/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
......
# Observability
This directory contains scrape targets, RabbitMQ alerting rules, Alertmanager configuration, and RabbitMQ dashboards published in a format that is compatible with [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator).
This directory contains scrape targets, RabbitMQ alerting rules, Alertmanager configuration, and RabbitMQ dashboards.
## Quick Start
If you don't have Prometheus and Grafana installed, the quickest way to try out RabbitMQ observability is as follows:
......@@ -25,3 +27,4 @@ Learn more on RabbitMQ monitoring in:
* [RabbitMQ Prometheus documentation](https://www.rabbitmq.com/prometheus.html)
* [Operator monitoring documentation](https://www.rabbitmq.com/kubernetes/operator/operator-monitoring.html)
* [TGIR S01E07: How to monitor RabbitMQ?](https://youtu.be/NWISW6AwpOE)
* [Notify me when RabbitMQ has a problem](https://blog.rabbitmq.com/posts/2021/05/alerting/)
# Prometheus configuration
RabbitMQ alerting rules depend on [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). Refer to the kube-state-metrics documentation to deploy and scrape kube-state-metrics.
## With Prometheus Operator
If Prometheus and Alertmanager are installed by [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator), apply the YAML files in [alertmanager](./alertmanager), [monitors](./monitors), and [rules](./rules) directories. They contain K8s objects watched by Prometheus Operator configuring Prometheus.
## Without Prometheus Operator
If Prometheus and Alertmanager are not installed by Prometheus Operator, use [config-file.yml](./config-file.yml) and [rule-file.yml](./rule-file.yml) as a starting point for RabbitMQ monitoring and alerting.
`rule-file.yml` is an auto-generated file containing the same rules as the [rules](./rules/) directory.
For the [Alertmanager configuration file](https://prometheus.io/docs/alerting/latest/configuration/#configuration-file), use the same `alertmanager.yaml` as provided in [alertmanager/slack.yml](alertmanager/slack.yml).
# Scheme described in
# https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file
global:
evaluation_interval: 30s
# Change this value to the path where file
# https://github.com/rabbitmq/cluster-operator/blob/main/observability/prometheus/rule-file.yml
# is located on the Prometheus instance.
#
# rule_files:
# - /etc/prometheus/rules/*.yml
# Specify Alertmanager instances.
# See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config
#
# alerting:
# alertmanagers:
# <alertmanager_config>
# Below scrape configs include RabbitMQ nodes (HTTP and HTTPS) and RabbitMQ Cluster Operator (HTTP).
# The config was copied from a Prometheus installed by Prometheus Operator and applied with the RabbitMQ ServiceMonitor and
# RabbitMQ Cluster Operator PodMonitor. The config is slightly modified (e.g. Helm Chart specifics are removed).
# Make sure to additionally include the scrape config for kube-state-metrics if you deploy RabbitMQ alerting rules.
scrape_configs:
- job_name: rabbitmq-http
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 14s
metrics_path: /metrics
scheme: http
follow_redirects: true
relabel_configs:
- source_labels: [job]
separator: ;
regex: (.*)
target_label: __tmp_prometheus_job_name
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_component]
separator: ;
regex: rabbitmq
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
separator: ;
regex: prometheus
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Node;(.*)
target_label: node
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Pod;(.*)
target_label: pod
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: namespace
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: service
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_name]
separator: ;
regex: (.*)
target_label: pod
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: container
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: job
replacement: ${1}
action: replace
- separator: ;
regex: (.*)
target_label: endpoint
replacement: prometheus
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
modulus: 1
target_label: __tmp_hash
replacement: $1
action: hashmod
- source_labels: [__tmp_hash]
separator: ;
regex: "0"
replacement: $1
action: keep
kubernetes_sd_configs:
- role: endpoints
follow_redirects: true
- job_name: rabbitmq-https
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 14s
metrics_path: /metrics
scheme: https
tls_config:
insecure_skip_verify: true
follow_redirects: true
relabel_configs:
- source_labels: [job]
separator: ;
regex: (.*)
target_label: __tmp_prometheus_job_name
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_component]
separator: ;
regex: rabbitmq
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
separator: ;
regex: prometheus-tls
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Node;(.*)
target_label: node
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
separator: ;
regex: Pod;(.*)
target_label: pod
replacement: ${1}
action: replace
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: namespace
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: service
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_name]
separator: ;
regex: (.*)
target_label: pod
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: container
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_name]
separator: ;
regex: (.*)
target_label: job
replacement: ${1}
action: replace
- separator: ;
regex: (.*)
target_label: endpoint
replacement: prometheus-tls
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
modulus: 1
target_label: __tmp_hash
replacement: $1
action: hashmod
- source_labels: [__tmp_hash]
separator: ;
regex: "0"
replacement: $1
action: keep
kubernetes_sd_configs:
- role: endpoints
follow_redirects: true
- job_name: rabbitmq-cluster-operator
honor_timestamps: true
scrape_interval: 30s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
follow_redirects: true
relabel_configs:
- source_labels: [job]
separator: ;
regex: (.*)
target_label: __tmp_prometheus_job_name
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component]
separator: ;
regex: rabbitmq-cluster-operator
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
separator: ;
regex: metrics
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: namespace
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: container
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_pod_name]
separator: ;
regex: (.*)
target_label: pod
replacement: $1
action: replace
- separator: ;
regex: (.*)
target_label: job
replacement: rabbitmq-cluster-operator
action: replace
- separator: ;
regex: (.*)
target_label: endpoint
replacement: metrics
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
modulus: 1
target_label: __tmp_hash
replacement: $1
action: hashmod
- source_labels: [__tmp_hash]
separator: ;
regex: "0"
replacement: $1
action: keep
kubernetes_sd_configs:
- role: pod
follow_redirects: true
namespaces:
names:
- rabbitmq-system
......@@ -10,12 +10,12 @@ apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
...
podMonitorNamespaceSelector: {}
podMonitorNamespaceSelector: {} # auto discover pod monitors accross all namespaces
podMonitorSelector:
matchLabels:
release: my-prometheus
...
serviceMonitorNamespaceSelector: {}
serviceMonitorNamespaceSelector: {} # auto discover service monitors accross all namespaces
serviceMonitorSelector:
matchLabels:
release: my-prometheus
......@@ -24,7 +24,6 @@ metadata:
```
Given the `matchLabels` fields from the Prometheus spec above, you would need to add the label `release: my-prometheus` to the `PodMonitor` and `ServiceMonitor` objects.
Since the `NamespaceSelector` is empty, deploy the objects into the same namespace where Prometheus is running.
File [rabbitmq-servicemonitor.yml](./rabbitmq-servicemonitor.yml) contains scrape targets for RabbitMQ.
Metrics listed in [RabbitMQ metrics](https://github.com/rabbitmq/rabbitmq-server/blob/master/deps/rabbitmq_prometheus/metrics.md) will be scraped from all RabbitMQ nodes.
......
# This file got auto-generated by GitHub workflow 'Prometheus Rules'
groups:
- name: rabbitmq-cluster-operator
rules:
- alert: RabbitMQClusterOperatorUnavailableReplicas
expr: |
kube_deployment_status_replicas_unavailable{deployment="rabbitmq-cluster-operator"}
>
0
for: 5m
annotations:
description: |
`{{ $value }}` replicas are unavailable in Deployment `rabbitmq-cluster-operator`
in namespace `{{ $labels.namespace }}`.
summary: |
There are pods that are either running but not yet available or pods that still have not been created.
Check the status of the deployment: `kubectl -n {{ $labels.namespace }} describe deployment rabbitmq-cluster-operator`
Check the status of the pod: `kubectl -n {{ $labels.namespace }} describe pod -l app.kubernetes.io/component=rabbitmq-cluster-operator`
labels:
rulesgroup: rabbitmq-operator
severity: warning
- name: rabbitmq
rules:
- alert: InsufficientEstablishedErlangDistributionLinks
# erlang_vm_dist_node_state: 1=pending, 2=up_pending, 3=up
expr: |
count by (namespace, rabbitmq_cluster) (erlang_vm_dist_node_state * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info == 3)
<
count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info)
*
(count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info) -1 )
for: 10m
annotations:
description: |
There are only `{{ $value }}` established Erlang distribution links
in RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` in namespace `{{ $labels.namespace }}`.
summary: |
RabbitMQ clusters have a full mesh topology.
All RabbitMQ nodes connect to all other RabbitMQ nodes in both directions.
The expected number of established Erlang distribution links is therefore `n*(n-1)` where `n` is the number of RabbitMQ nodes in the cluster.
Therefore, the expected number of distribution links are `0` for a 1-node cluster, `6` for a 3-node cluster, and `20` for a 5-node cluster.
This alert reports that the number of established distributions links is less than the expected number.
Some reasons for this alert include failed network links, network partitions, failed clustering (i.e. nodes can't join the cluster).
Check the panels `All distribution links`, `Established distribution links`, `Connecting distributions links`, `Waiting distribution links`, and `distribution links`
of the Grafana dashboard `Erlang-Distribution`.
Check the logs of the RabbitMQ nodes: `kubectl -n {{ $labels.namespace }} logs -l app.kubernetes.io/component=rabbitmq,app.kubernetes.io/name={{ $labels.rabbitmq_cluster }}`
labels:
rulesgroup: rabbitmq
severity: warning
- alert: LowDiskWatermarkPredicted
# The 2nd condition ensures that data points are available until 24 hours ago such that no false positive alerts are triggered for newly created RabbitMQ clusters.
expr: |
(
predict_linear(rabbitmq_disk_space_available_bytes[24h], 60*60*24) * on (instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info
<
rabbitmq_disk_space_available_limit_bytes * on (instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info
)
and
(
count_over_time(rabbitmq_disk_space_available_limit_bytes[2h] offset 22h) * on (instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info
>
0
)
for: 60m
annotations:
description: |
The predicted free disk space in 24 hours from now is `{{ $value | humanize1024 }}B`
in RabbitMQ node `{{ $labels.rabbitmq_node }}`, pod `{{ $labels.pod }}`,
RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`, namespace `{{ $labels.namespace }}`.
summary: |
Based on the trend of available disk space over the past 24 hours, it's predicted that, in 24 hours from now, a disk alarm will be triggered since the free disk space will drop below the free disk space limit.
This alert is reported for the partition where the RabbitMQ data directory is stored.
When the disk alarm will be triggered, all publishing connections across all cluster nodes will be blocked.
See
https://www.rabbitmq.com/alarms.html,
https://www.rabbitmq.com/disk-alarms.html,
https://www.rabbitmq.com/production-checklist.html#resource-limits-disk-space,
https://www.rabbitmq.com/persistence-conf.html,
https://www.rabbitmq.com/connection-blocked.html.
labels:
rulesgroup: rabbitmq
severity: warning
- alert: HighConnectionChurn
expr: |
(
sum(rate(rabbitmq_connections_closed_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info) by(namespace, rabbitmq_cluster)
+
sum(rate(rabbitmq_connections_opened_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info) by(namespace, rabbitmq_cluster)
)
/
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info) by (namespace, rabbitmq_cluster)
> 0.1
unless
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info) by (namespace, rabbitmq_cluster)
< 100
for: 10m
annotations:
description: |
Over the last 5 minutes, `{{ $value | humanizePercentage }}`
of total connections are closed or opened per second in RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`
in namespace `{{ $labels.namespace }}`.
summary: |
More than 10% of total connections are churning.
This means that client application connections are short-lived instead of long-lived.
Read https://www.rabbitmq.com/connections.html#high-connection-churn to understand why this is an anti-pattern.
labels:
rulesgroup: rabbitmq
severity: warning
- alert: NoMajorityOfNodesReady
expr: |
kube_statefulset_status_replicas_ready * on (namespace, statefulset) group_left(label_app_kubernetes_io_name) kube_statefulset_labels{label_app_kubernetes_io_component="rabbitmq"}
<=
kube_statefulset_replicas * on (namespace, statefulset) group_left(label_app_kubernetes_io_name) kube_statefulset_labels{label_app_kubernetes_io_component="rabbitmq"}
/ 2
unless
kube_statefulset_replicas * on (namespace, statefulset) group_left(label_app_kubernetes_io_name) kube_statefulset_labels{label_app_kubernetes_io_component="rabbitmq"}
== 0
for: 5m
annotations:
description: |
Only `{{ $value }}` replicas are ready in StatefulSet `{{ $labels.statefulset }}`
of RabbitMQ cluster `{{ $labels.label_app_kubernetes_io_name }}` in namespace `{{ $labels.namespace }}`.
summary: |
No majority of nodes have been ready for the last 5 minutes.
Check the details of the pods:
`kubectl -n {{ $labels.namespace }} describe pods -l app.kubernetes.io/component=rabbitmq,app.kubernetes.io/name={{ $labels.label_app_kubernetes_io_name }}`
labels:
rabbitmq_cluster: '{{ $labels.label_app_kubernetes_io_name }}'
rulesgroup: rabbitmq
severity: warning
- alert: PersistentVolumeMissing
expr: |
kube_persistentvolumeclaim_status_phase{phase="Bound"} * on (namespace, persistentvolumeclaim) group_left(label_app_kubernetes_io_name) kube_persistentvolumeclaim_labels{label_app_kubernetes_io_component="rabbitmq"}
==
0
for: 10m
annotations:
description: |
PersistentVolumeClaim `{{ $labels.persistentvolumeclaim }}` of
RabbitMQ cluster `{{ $labels.label_app_kubernetes_io_name }}` in namespace
`{{ $labels.namespace }}` is not bound.
summary: |
RabbitMQ needs a PersistentVolume for its data.
However, there is no PersistentVolume bound to the PersistentVolumeClaim.
This means the requested storage could not be provisioned.
Check the status of the PersistentVolumeClaim: `kubectl -n {{ $labels.namespace }} describe pvc {{ $labels.persistentvolumeclaim }}`.
labels:
rabbitmq_cluster: '{{ $labels.label_app_kubernetes_io_name }}'
rulesgroup: rabbitmq
severity: critical
- alert: UnroutableMessages
expr: |
sum by(namespace, rabbitmq_cluster) (increase(rabbitmq_channel_messages_unroutable_dropped_total[5m]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info)
>= 1
or
sum by(namespace, rabbitmq_cluster) (increase(rabbitmq_channel_messages_unroutable_returned_total[5m]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info)
>= 1
annotations:
description: |
There were `{{ $value | printf "%.0f" }}` unroutable messages within the last
5 minutes in RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` in namespace
`{{ $labels.namespace }}`.
summary: |
There are messages published into an exchange which cannot be routed and are either dropped silently, or returned to publishers.
Is your routing topology set up correctly?
Check your application code and bindings between exchanges and queues.
See
https://www.rabbitmq.com/publishers.html#unroutable,
https://www.rabbitmq.com/confirms.html#when-publishes-are-confirmed.
labels:
rulesgroup: rabbitmq
severity: warning
- alert: FileDescriptorsNearLimit
expr: |
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (max_over_time(rabbitmq_process_open_fds[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info)
/
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (rabbitmq_process_max_tcp_sockets * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info)
> 0.8
for: 10m
annotations:
description: |
`{{ $value | humanizePercentage }}` file descriptors of file
descriptor limit are used in RabbitMQ node `{{ $labels.rabbitmq_node }}`,
pod `{{ $labels.pod }}`, RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`,
namespace `{{ $labels.namespace }}`.
summary: |
More than 80% of file descriptors are used on the RabbitMQ node.
When this value reaches 100%, new connections will not be accepted and disk write operations may fail.
Client libraries, peer nodes and CLI tools will not be able to connect when the node runs out of available file descriptors.
See https://www.rabbitmq.com/production-checklist.html#resource-limits-file-handle-limit.
labels:
rulesgroup: rabbitmq
severity: warning
- alert: ContainerRestarts
expr: |
increase(kube_pod_container_status_restarts_total[10m]) * on(namespace, pod, container) group_left(rabbitmq_cluster) rabbitmq_identity_info
>=
1
for: 5m
annotations:
description: |
Over the last 10 minutes, container `{{ $labels.container }}`
restarted `{{ $value | printf "%.0f" }}` times in pod `{{ $labels.pod }}` of RabbitMQ cluster
`{{ $labels.rabbitmq_cluster }}` in namespace `{{ $labels.namespace }}`.
summary: |
Investigate why the container got restarted.
Check the logs of the current container: `kubectl -n {{ $labels.namespace }} logs {{ $labels.pod }}`
Check the logs of the previous container: `kubectl -n {{ $labels.namespace }} logs {{ $labels.pod }} --previous`
Check the last state of the container: `kubectl -n {{ $labels.namespace }} get pod {{ $labels.pod }} -o jsonpath='{.status.containerStatuses[].lastState}'`
labels:
rabbitmq_cluster: '{{ $labels.rabbitmq_cluster }}'
rulesgroup: rabbitmq
severity: warning
- alert: TCPSocketsNearLimit
expr: |
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (max_over_time(rabbitmq_process_open_tcp_sockets[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info)
/
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (rabbitmq_process_max_tcp_sockets * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info)
> 0.8
for: 10m
annotations:
description: |
`{{ $value | humanizePercentage }}` TCP sockets of TCP socket
limit are open in RabbitMQ node `{{ $labels.rabbitmq_node }}`, pod `{{ $labels.pod }}`,
RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`, namespace `{{ $labels.namespace }}`.
summary: |
More than 80% of TCP sockets are open on the RabbitMQ node.
When this value reaches 100%, new connections will not be accepted.
Client libraries, peer nodes and CLI tools will not be able to connect when the node runs out of available TCP sockets.
See https://www.rabbitmq.com/networking.html.
labels:
rulesgroup: rabbitmq
severity: warning
# The first 2 rules create a metric ALERTS:rabbitmq_alert_state_numeric which has value 1 for alertstate pending and value 2 for alertstate firing
- expr: |
ALERTS{rulesgroup="rabbitmq", alertstate="pending"} * 0 + 1
record: ALERTS:rabbitmq_alert_state_numeric
- expr: |
ALERTS{rulesgroup="rabbitmq", alertstate="firing"} * 0 + 2
record: ALERTS:rabbitmq_alert_state_numeric
# The 3rd rule creates a metric ALERTS:rabbitmq_alert_state_discrete with label alert_state_numeric.
# The label value is either 1 for pending or 2 for firing.
# The metric value is binary: 1 means the alert is active in that state (pending or firing), 0 means there is no such alert in that state.
- expr: |
count_values
by (namespace, rabbitmq_cluster, alertname, severity, instance, endpoint, pod, container, persistentvolumeclaim)
("alert_state_numeric", ALERTS:rabbitmq_alert_state_numeric)
record: ALERTS:rabbitmq_alert_state_discrete
......@@ -12,7 +12,7 @@ metadata:
...
spec:
...
ruleNamespaceSelector: {}
ruleNamespaceSelector: {} # auto discover PrometheusRules accross all namespaces
ruleSelector:
matchLabels:
release: my-prometheus
......@@ -21,4 +21,3 @@ spec:
```
Given the `matchLabels` field from the Prometheus spec above, you would need to add the label `release: my-prometheus` to the `PrometheusRule` objects.
Since the `NamespaceSelector` is empty, deploy the objects into the same namespace where Prometheus is running.
......@@ -26,4 +26,4 @@ spec:
Check the status of the pod: `kubectl -n {{ $labels.namespace }} describe pod -l app.kubernetes.io/component=rabbitmq-cluster-operator`
labels:
rulesgroup: rabbitmq-operator
severity: critical
severity: warning
# RabbitMQ Prometheus Rules
This directory splits Prometheus rules into different files so that you can apply rules individually.
Although the [rule groups](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/#rule_group) in this directory have the same name `rabbitmq`, they are in fact different rule groups evaluated at different points in time without clear evaluation order.
## Adding new Prometheus Rules
To allow filtering in the [RabbitMQ-Alerts Grafana dashboard](../../../grafana/dashboards/rabbitmq-alerts.yml) and allowing [configuration of Alertmanager](../../alertmanager) all RabbitMQ alerts must output at least the labels:
......
......@@ -35,4 +35,4 @@ spec:
Check the logs of the RabbitMQ nodes: `kubectl -n {{ $labels.namespace }} logs -l app.kubernetes.io/component=rabbitmq,app.kubernetes.io/name={{ $labels.rabbitmq_cluster }}`
labels:
rulesgroup: rabbitmq
severity: critical
severity: warning
......@@ -31,4 +31,4 @@ spec:
labels:
rabbitmq_cluster: '{{ $labels.label_app_kubernetes_io_name }}'
rulesgroup: rabbitmq
severity: critical
severity: warning
......@@ -59,9 +59,7 @@ kubectl -n "$KUBE_PROMETHEUS_STACK_NAMESPACE" apply --filename "$DIR"/grafana/da
printf "%bInstalling RabbitMQ Cluster Operator...%b\n" "$GREEN" "$NO_COLOR"
kubectl apply --filename https://github.com/rabbitmq/cluster-operator/releases/latest/download/cluster-operator.yml
#TODO ansd remove below line once cluster-operator >v1.6.0 is cut (which will include https://github.com/rabbitmq/cluster-operator/pull/664)
kubectl -n rabbitmq-system set image deployment/rabbitmq-cluster-operator operator="rabbitmqoperator/cluster-operator-dev:1.6.0-build.10"
printf "\n%bTo open Prometheus UI execute \nkubectl -n $KUBE_PROMETHEUS_STACK_NAMESPACE port-forward svc/${KUBE_PROMETHEUS_STACK_NAME}-kube-prometheus-stack-prometheus 9090\nand open your browser at http://localhost:9090\n\n" "$GREEN"
printf "To open Alertmanager UI execute \nkubectl -n $KUBE_PROMETHEUS_STACK_NAMESPACE port-forward svc/${KUBE_PROMETHEUS_STACK_NAME}-kube-prometheus-stack-alertmanager 9093\nand open your browser at http://localhost:9093\n\n"
printf "To open Grafana UI execute \nkubectl -n $KUBE_PROMETHEUS_STACK_NAMESPACE port-forward svc/${KUBE_PROMETHEUS_STACK_NAME}-grafana 3000:80\nand open your browser at http://localhost:3000\nusername: admin, password: admin%b\n" "$NO_COLOR"
printf "\n%bTo open Prometheus UI execute \nkubectl -n %s port-forward svc/%s-kube-prometheus-stack-prometheus 9090\nand open your browser at http://localhost:9090\n\n" "$GREEN" "$KUBE_PROMETHEUS_STACK_NAMESPACE" "$KUBE_PROMETHEUS_STACK_NAME"
printf "To open Alertmanager UI execute \nkubectl -n %s port-forward svc/%s-kube-prometheus-stack-alertmanager 9093\nand open your browser at http://localhost:9093\n\n" "$KUBE_PROMETHEUS_STACK_NAMESPACE" "$KUBE_PROMETHEUS_STACK_NAME"
printf "To open Grafana UI execute \nkubectl -n %s port-forward svc/%s-grafana 3000:80\nand open your browser at http://localhost:3000\nusername: admin, password: admin%b\n" "$KUBE_PROMETHEUS_STACK_NAMESPACE" "$KUBE_PROMETHEUS_STACK_NAME" "$NO_COLOR"
......@@ -5,6 +5,7 @@ package tools
import (
_ "github.com/elastic/crd-ref-docs"
_ "github.com/go-delve/delve/cmd/dlv"
_ "github.com/mikefarah/yq/v4"
_ "github.com/onsi/ginkgo/ginkgo"
_ "github.com/sclevine/yj"
_ "sigs.k8s.io/controller-tools/cmd/controller-gen"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment