Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CHAOSPLT-531: Add a config for the minimum cron frequency #941

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions api/v1beta1/disruption_cron_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ var (
disruptionCronPermittedUserGroups map[string]struct{}
disruptionCronPermittedUserGroupString string
defaultCronDelayedStartTolerance time.Duration
minimumCronFrequency time.Duration
)

func (d *DisruptionCron) SetupWebhookWithManager(setupWebhookConfig utils.SetupWebhookWithManagerConfig) error {
Expand All @@ -55,6 +56,8 @@ func (d *DisruptionCron) SetupWebhookWithManager(setupWebhookConfig utils.SetupW

disruptionCronPermittedUserGroupString = strings.Join(setupWebhookConfig.PermittedUserGroups, ",")
defaultCronDelayedStartTolerance = setupWebhookConfig.DefaultCronDelayedStartTolerance
minimumCronFrequency = setupWebhookConfig.MinimumCronFrequency
defaultDuration = setupWebhookConfig.DefaultDurationFlag

return ctrl.NewWebhookManagedBy(setupWebhookConfig.Manager).
For(d).
Expand Down Expand Up @@ -100,6 +103,10 @@ func (d *DisruptionCron) ValidateCreate() (admission.Warnings, error) {
return nil, err
}

if err := d.validateMinimumFrequency(minimumCronFrequency); err != nil {
return nil, err
}

// send informative event to disruption cron to broadcast
d.emitEvent(EventDisruptionCronCreated)

Expand Down Expand Up @@ -196,3 +203,36 @@ func (d *DisruptionCron) validateDisruptionCronSpec() error {

return nil
}

func (d *DisruptionCron) validateMinimumFrequency(minFrequency time.Duration) error {
schedule, err := cron.ParseStandard(d.Spec.Schedule)
if err != nil {
return fmt.Errorf("spec.Schedule must follow the standard cron syntax: %w", err)
}

specDuration := defaultDuration
if d.Spec.DisruptionTemplate.Duration.Duration() > 0 {
specDuration = d.Spec.DisruptionTemplate.Duration.Duration()
}

now := time.Now()
nextDisruptionStarts := schedule.Next(now)
nextDisruptionCompletes := nextDisruptionStarts.Add(specDuration)

// Measure, "frequency", the time between when we would schedule the next two disruptions.
// We don't want to measure from "now", because the cron standard will try to run at whole intervals, e.g.,
// a schedule for "every 15 minutes", created at 1:05, will try to run the first disruption at 1:15. So we find the next two intervals,
// which would be 1:15 and 1:30, and find the difference
frequency := schedule.Next(nextDisruptionStarts).Sub(nextDisruptionStarts)

// Measure, "interval", the time from when the next disruption completes, until the following disruption would start.
// This lets us know how long the target will be undisrupted for, between two disruptions. If that's less than the minimum frequency,
// we need to return an error
interval := schedule.Next(nextDisruptionCompletes).Sub(nextDisruptionCompletes)
if interval < minFrequency {
return fmt.Errorf("this cron's spec.Schedule is \"%s\", which will create disruptions that last %s every %s. This leaves only %s between disruptions, but the minimum tolerated frequency is %s",
d.Spec.Schedule, specDuration.String(), frequency.String(), interval.String(), minFrequency.String())
}

return nil
}
15 changes: 15 additions & 0 deletions api/v1beta1/disruption_cron_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package v1beta1

import (
"encoding/json"
"time"

"github.com/DataDog/chaos-controller/mocks"
"github.com/stretchr/testify/mock"
Expand Down Expand Up @@ -42,6 +43,7 @@ var _ = Describe("DisruptionCron Webhook", func() {
disruptionCronPermittedUserGroups = nil
defaultUserGroups = nil
defaultUserGroupsStr = ""
minimumCronFrequency = time.Second
})

Describe("ValidateCreate", func() {
Expand Down Expand Up @@ -201,6 +203,19 @@ var _ = Describe("DisruptionCron Webhook", func() {
})
})

When("disruption cron schedule is too brief", func() {
It("should return an error", func() {
minimumCronFrequency = time.Hour * 24 * 365

disruptionCron := makeValidDisruptionCron()
warnings, err := disruptionCron.ValidateCreate()

Expect(warnings).To(BeNil())
Expect(err).Should(HaveOccurred())
Expect(err).To(MatchError(ContainSubstring("between disruptions, but the minimum tolerated frequency is 8760h")))
})
})

When("disruption cron spec.delayedStartTolerance is invalid", func() {
It("should return an error", func() {
// Arrange
Expand Down
1 change: 1 addition & 0 deletions chart/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ data:
deleteOnly: {{ .Values.controller.deleteOnly }}
defaultDuration: {{ .Values.controller.defaultDuration }}
defaultCronDelayedStartTolerance: {{ .Values.controller.defaultCronDelayedStartTolerance }}
minimumCronFrequency: {{ .Values.controller.minimumCronFrequency }}
maxDuration: {{ .Values.controller.maxDuration }}
finalizerDeletionDelay: {{ .Values.controller.finalizerDeletionDelay }}
expiredDisruptionGCDelay: {{ .Values.controller.expiredDisruptionGCDelay }}
Expand Down
1 change: 1 addition & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ controller:
defaultDuration: 5m # default spec.duration for a disruption with none specified
maxDuration: 2h # maximum spec.duration for a disruption
defaultCronDelayedStartTolerance: 15m
minimumCronFrequency: 15m # a disruption cron with a spec.schedule that runs more often than this will be rejected.
finalizerDeletionDelay: 20s
expiredDisruptionGCDelay: 10m # time after a disruption expires before deleting it
userInfoHook: true
Expand Down
1 change: 1 addition & 0 deletions chart/values/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ controller:
defaultDuration: 5m
finalizerDeletionDelay: 2s
expiredDisruptionGCDelay: 30s
minimumCronFrequency: 1m
resources: # resources assigned to the controller pod. may need to be increased when deploying to larger scale clusters
cpu: 1
memory: 2Gi
Expand Down
8 changes: 8 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024 Datadog, Inc.

package config

import (
Expand Down Expand Up @@ -36,6 +37,7 @@ type controllerConfig struct {
MaxDuration time.Duration `json:"maxDuration,omitempty" yaml:"maxDuration,omitempty"`
DefaultDuration time.Duration `json:"defaultDuration" yaml:"defaultDuration"`
DefaultCronDelayedStartTolerance time.Duration `json:"defaultCronDelayedStartTolerance" yaml:"defaultCronDelayedStartTolerance"`
MinimumCronFrequency time.Duration `json:"minimumCronFrequency" yaml:"minimumCronFrequency"`
DeleteOnly bool `json:"deleteOnly" yaml:"deleteOnly"`
EnableSafeguards bool `json:"enableSafeguards" yaml:"enableSafeguards"`
EnableObserver bool `json:"enableObserver" yaml:"enableObserver"`
Expand Down Expand Up @@ -186,6 +188,12 @@ func New(client corev1client.ConfigMapInterface, logger *zap.SugaredLogger, osAr
return cfg, err
}

mainFS.DurationVar(&cfg.Controller.MinimumCronFrequency, "minimum-cron-frequency", time.Minute, "Minimum frequency for a disruption cron schedule")

if err := viper.BindPFlag("controller.minimumCronFrequency", mainFS.Lookup("minimum-cron-frequency")); err != nil {
return cfg, err
}

mainFS.StringVar(&cfg.Controller.Notifiers.Common.ClusterName, "notifiers-common-clustername", "", "Cluster Name for notifiers output")

if err := viper.BindPFlag("controller.notifiers.common.clusterName", mainFS.Lookup("notifiers-common-clustername")); err != nil {
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,8 @@ func main() {
DeleteOnlyFlag: cfg.Controller.DeleteOnly,
PermittedUserGroups: cfg.Controller.SafeMode.PermittedUserGroups,
DefaultCronDelayedStartTolerance: cfg.Controller.DefaultCronDelayedStartTolerance,
MinimumCronFrequency: cfg.Controller.MinimumCronFrequency,
DefaultDurationFlag: cfg.Controller.DefaultDuration,
}

if err = (&chaosv1beta1.DisruptionCron{}).SetupWebhookWithManager(disruptionCronSetupWebhookConfig); err != nil {
Expand Down
1 change: 1 addition & 0 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ type SetupWebhookWithManagerConfig struct {
DefaultDurationFlag time.Duration
MaxDurationFlag time.Duration
DefaultCronDelayedStartTolerance time.Duration
MinimumCronFrequency time.Duration
ChaosNamespace string
CloudServicesProvidersManager cloudservice.CloudServicesProvidersManager
Environment string
Expand Down