Skip to content

Commit

Permalink
feat: add prometheusRule
Browse files Browse the repository at this point in the history
  • Loading branch information
micborens committed Feb 8, 2024
1 parent fda5004 commit c706f62
Show file tree
Hide file tree
Showing 6 changed files with 327 additions and 12 deletions.
26 changes: 26 additions & 0 deletions README.md
Expand Up @@ -378,10 +378,36 @@ The following table lists the configurable parameters of the Harbor chart and th
| `metrics.exporter.path` | the url path for exporter metrics | `/metrics` |
| `metrics.exporter.port` | the port for exporter metrics | `8001` |
| `metrics.serviceMonitor.enabled` | create prometheus serviceMonitor. Requires prometheus CRD's | `false` |
| `metrics.serviceMonitor.matchLabels` | additional labels to be discovered by prometheus | `{}` |
| `metrics.serviceMonitor.additionalLabels` | additional labels to upsert to the manifest | `""` |
| `metrics.serviceMonitor.interval` | scrape period for harbor metrics | `""` |
| `metrics.serviceMonitor.metricRelabelings` | metrics relabel to add/mod/del before ingestion | `[]` |
| `metrics.serviceMonitor.relabelings` | relabels to add/mod/del to sample before scrape | `[]` |
| `metrics.rules.enabled` | create prometheus prometheusRule. Requires prometheus CRD's | `false` |
| `metrics.rules.disabled` | specify which individual alerts should be disabled. | `{}` |
| `metrics.rules.alerting` | instead of turning off each alert one by one, set the .rules.alerting value to false instead | `true` |
| `metrics.rules.additionalAggregationLabels` | additional labels when using expression aggregation | `{}` |
| `metrics.rules.additionalLabels` | additional labels for PrometheusRule resource | `{}` |
| `metrics.rules.additionalRuleLabels` | Additional labels for specific PrometheusRule alert | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborCoreDown` | Additional labels for specific PrometheusRule alert groups HarborCoreDown | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborDatabaseDown` | Additional labels for specific PrometheusRule alert groups HarborDatabaseDown | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborRegistryDown` | Additional labels for specific PrometheusRule alert groups HarborRegistryDown | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborRedisDown` | Additional labels for specific PrometheusRule alert groups HarborRedisDown | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborTrivyDown` | Additional labels for specific PrometheusRule alert groups HarborTrivyDown | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborJobServiceDown` | Additional labels for specific PrometheusRule alert groups HarborJobServiceDown | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborLatency99` | Additional labels for specific PrometheusRule alert groups HarborLatency99 | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborRateErrors` | Additional labels for specific PrometheusRule alert groups HarborRateErrors | `{}` |
| `metrics.rules.additionalRuleGroupLabels.HarborQuotaProjectLimit` | Additional labels for specific PrometheusRule alert groups HarborQuotaProjectLimit | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborCoreDown` | Additional annotations for specific PrometheusRule alert groups HarborCoreDown | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborDatabaseDown` | Additional annotations for specific PrometheusRule alert groups HarborDatabaseDown | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborRegistryDown` | Additional annotations for specific PrometheusRule alert groups HarborRegistryDown | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborRedisDown` | Additional annotations for specific PrometheusRule alert groups HarborRedisDown | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborTrivyDown` | Additional annotations for specific PrometheusRule alert groups HarborTrivyDown | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborJobServiceDown` | Additional annotations for specific PrometheusRule alert groups HarborJobServiceDown | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborLatency99` | Additional annotations for specific PrometheusRule alert groups HarborLatency99 | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborRateErrors` | Additional annotations for specific PrometheusRule alert groups HarborRateErrors | `{}` |
| `metrics.rules.additionalRuleGroupAnnotations.HarborQuotaProjectLimit` | Additional annotations for specific PrometheusRule alert groups HarborQuotaProjectLimit | `{}` |
| `metrics.rules.additionalGroups` | additional groups to add to the rules file | `[]` |
| **Trace** | | |
| `trace.enabled` | Enable tracing or not | `false` |
| `trace.provider` | The tracing provider: `jaeger` or `otel`. `jaeger` should be 1.26+ | `jaeger` |
Expand Down
205 changes: 205 additions & 0 deletions templates/metrics/_alerts.yaml.tpl
@@ -0,0 +1,205 @@
{{/* Base alert for Harbor */}}
{{- define "harbor.rules" -}}
groups:
- name: harbor_alerts
rules:
{{- if not (.Values.metrics.rules.disabled.HarborCoreDown | default false) }}
- alert: HarborCoreDown
annotations:
summary: Harbor core is down.
expr: |-
harbor_up{component="core"} == 0
for: 5m
labels:
severity: critical
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborCoreDown }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborCoreDown }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborCoreDown }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborCoreDown | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborDatabaseDown | default false) }}
- alert: HarborDatabaseDown
annotations:
summary: Harbor database is down.
expr: |-
harbor_up{component="database"} == 0
for: 5m
labels:
severity: critical
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborDatabaseDown }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborDatabaseDown }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborDatabaseDown }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborDatabaseDown | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborRegistryDown | default false) }}
- alert: HarborRegistryDown
annotations:
summary: Harbor registry is down.
expr: |-
harbor_up{component="registry"} == 0
for: 5m
labels:
severity: critical
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborRegistryDown }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborRegistryDown }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborRegistryDown }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborRegistryDown | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborRedisDown | default false) }}
- alert: HarborRedisDown
annotations:
summary: Harbor redis is down.
expr: |-
harbor_up{component="redis"} == 0
for: 5m
labels:
severity: critical
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborRedisDown }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborRedisDown }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborRedisDown }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborRedisDown | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborTrivyDown | default false) }}
- alert: HarborTrivyDown
annotations:
summary: Harbor trivy is down.
expr: |-
harbor_up{component="trivy"} == 0
for: 5m
labels:
severity: critical
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborTrivyDown }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborTrivyDown }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborTrivyDown }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborTrivyDown | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborJobServiceDown | default false) }}
- alert: HarborJobServiceDown
annotations:
summary: Harbor job service is down.
expr: |-
harbor_up{component="jobservice"} == 0
for: 5m
labels:
severity: critical
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborJobServiceDown }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborJobServiceDown }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborJobServiceDown }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborJobServiceDown | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborLatency99 | default false) }}
- alert: HarborLatency99
annotations:
summary: Harbor p99 latency is higher than 10 seconds.
expr: |-
histogram_quantile(0.99,
sum by ({{ range $.Values.metrics.rules.additionalAggregationLabels }}{{ . }},{{ end }})(
rate(registry_http_request_duration_seconds_bucket[30m])))
> 10
for: 5m
labels:
severity: warning
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborLatency99 }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborLatency99 }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborLatency99 }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborLatency99 | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborRateErrors | default false) }}
- alert: HarborRateErrors
annotations:
summary: Harbor Error Rate is High.
expr: |-
sum by ({{ range $.Values.metrics.rules.additionalAggregationLabels }}{{ . }},{{ end }})(
rate(registry_http_requests_total{code=~"4..|5.."}[5m])
)
/
sum by ({{ range $.Values.metrics.rules.additionalAggregationLabels }}{{ . }},{{ end }})(
rate(registry_http_requests_total[5m])
)
> 0.15
for: 5m
labels:
severity: warning
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborRateErrors }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborRateErrors }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborRateErrors }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborRateErrors | indent 8 }}
{{- end }}
{{- end }}
{{- if not (.Values.metrics.rules.disabled.HarborQuotaProjectLimit | default false) }}
- alert: HarborQuotaProjectLimit
annotations:
summary: Project Quota Is Raising The Limit.
expr: |-
((harbor_project_quota_usage_byte > 0) / harbor_quotas_size_bytes) > 0.95
for: 5m
labels:
severity: critical
{{- if or .Values.metrics.rules.additionalRuleLabels .Values.metrics.rules.additionalRuleGroupLabels.HarborQuotaProjectLimit }}
{{- with .Values.metrics.rules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.metrics.rules.additionalRuleGroupLabels.HarborQuotaProjectLimit }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.metrics.rules.additionalRuleGroupAnnotations.HarborQuotaProjectLimit }}
{{ toYaml .Values.metrics.rules.additionalRuleGroupAnnotations.HarborQuotaProjectLimit | indent 8 }}
{{- end }}
{{- end }}
{{- end }}
18 changes: 18 additions & 0 deletions templates/metrics/_helpers-monitoring.tpl
@@ -0,0 +1,18 @@
{{/*
Convert a recording rule group to yaml
*/}}
{{- define "harbor.ruleGroupToYaml" -}}
{{- range . }}
- name: {{ .name }}
rules:
{{- toYaml .rules | nindent 2 }}
{{- end }}
{{- end }}

{{- define "harbor.serviceMonitorMatchLabels"}}
{{- if .Values.metrics.serviceMonitor.matchLabels }}
{{- toYaml .Values.metrics.serviceMonitor.matchLabels }}
{{- else }}
{{- include "harbor.matchLabels" $ }}
{{- end }}
{{- end }}
16 changes: 16 additions & 0 deletions templates/metrics/metrics-rules.yaml
@@ -0,0 +1,16 @@
{{- if and .Values.metrics.enabled .Values.metrics.serviceMonitor.enabled .Values.metrics.rules.enabled .Values.metrics.rules.alerting }}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
{{- include "harbor.labels" $ | nindent 4 }}
{{- with .Values.metrics.rules.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
name: {{ template "harbor.fullname" . }}
spec:
groups:
{{- include "harbor.ruleGroupToYaml" (include "harbor.rules" . | fromYaml).groups | indent 2 }}
{{- include "harbor.ruleGroupToYaml" .Values.metrics.rules.additionalGroups | indent 2 }}
{{- end }}
2 changes: 1 addition & 1 deletion templates/metrics/metrics-svcmon.yaml
Expand Up @@ -24,5 +24,5 @@ spec:
{{ toYaml .Values.metrics.serviceMonitor.relabelings | indent 4 }}
{{- end }}
selector:
matchLabels: {{ include "harbor.matchLabels" . | nindent 6 }}
matchLabels: {{ include "harbor.serviceMonitorMatchLabels" . | nindent 6 }}
{{- end }}

0 comments on commit c706f62

Please sign in to comment.