diff --git a/README.md b/README.md index 6af01c3b..a16d05c3 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,9 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh * `--worker-threads`: Number of worker threads. Default value is 10. +* `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second. + +*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes. #### Other recognized arguments * `--kubeconfig `: Path to Kubernetes client configuration that the snapshot controller uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the snapshot controller does not run as a Kubernetes pod, e.g. for debugging. diff --git a/cmd/csi-snapshotter/main.go b/cmd/csi-snapshotter/main.go index afdfcad2..a3287549 100644 --- a/cmd/csi-snapshotter/main.go +++ b/cmd/csi-snapshotter/main.go @@ -32,6 +32,7 @@ import ( "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/util/workqueue" klog "k8s.io/klog/v2" "github.com/container-storage-interface/spec/lib/go/csi" @@ -57,7 +58,7 @@ const ( var ( kubeconfig = flag.String("kubeconfig", "", "Absolute path to the kubeconfig file. Required only when running out of cluster.") csiAddress = flag.String("csi-address", "/run/csi/socket", "Address of the CSI driver socket.") - resyncPeriod = flag.Duration("resync-period", 15*time.Minute, "Resync interval of the controller.") + resyncPeriod = flag.Duration("resync-period", 15*time.Minute, "Resync interval of the controller. Default is 15 minutes") snapshotNamePrefix = flag.String("snapshot-name-prefix", "snapshot", "Prefix to apply to the name of a created snapshot") snapshotNameUUIDLength = flag.Int("snapshot-name-uuid-length", -1, "Length in characters for the generated uuid of a created snapshot. Defaults behavior is to NOT truncate.") showVersion = flag.Bool("version", false, "Show version.") @@ -68,9 +69,11 @@ var ( leaderElection = flag.Bool("leader-election", false, "Enables leader election.") leaderElectionNamespace = flag.String("leader-election-namespace", "", "The namespace where the leader election resource exists. Defaults to the pod namespace if not set.") - metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.") - httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.") - metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.") + metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.") + httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.") + metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.") + retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second") + retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes") ) var ( @@ -198,6 +201,7 @@ func main() { *snapshotNamePrefix, *snapshotNameUUIDLength, *extraCreateMetadata, + workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax), ) run := func(context.Context) { diff --git a/pkg/sidecar-controller/framework_test.go b/pkg/sidecar-controller/framework_test.go index 7a8f0f0e..078cfd0a 100644 --- a/pkg/sidecar-controller/framework_test.go +++ b/pkg/sidecar-controller/framework_test.go @@ -46,6 +46,7 @@ import ( core "k8s.io/client-go/testing" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" klog "k8s.io/klog/v2" ) @@ -522,6 +523,7 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte "snapshot", -1, true, + workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute), ) ctrl.eventRecorder = record.NewFakeRecorder(1000) diff --git a/pkg/sidecar-controller/snapshot_controller_base.go b/pkg/sidecar-controller/snapshot_controller_base.go index 56b9849b..0d203065 100644 --- a/pkg/sidecar-controller/snapshot_controller_base.go +++ b/pkg/sidecar-controller/snapshot_controller_base.go @@ -72,6 +72,7 @@ func NewCSISnapshotSideCarController( snapshotNamePrefix string, snapshotNameUUIDLength int, extraCreateMetadata bool, + contentRateLimiter workqueue.RateLimiter, ) *csiSnapshotSideCarController { broadcaster := record.NewBroadcaster() broadcaster.StartLogging(klog.Infof) @@ -87,7 +88,7 @@ func NewCSISnapshotSideCarController( handler: NewCSIHandler(snapshotter, timeout, snapshotNamePrefix, snapshotNameUUIDLength), resyncPeriod: resyncPeriod, contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc), - contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "csi-snapshotter-content"), + contentQueue: workqueue.NewNamedRateLimitingQueue(contentRateLimiter, "csi-snapshotter-content"), extraCreateMetadata: extraCreateMetadata, }