Allow tuning Common-controller Ratelimiter with retryIntervalStart & retryIntervalMax
This patch adds two new parameters `retryIntervalStart & retryIntervalMax` which can be configured to adjust the ratelimiters of snapshotqueue and contentqueue in the controller. Signed-off-by: Humble Chirammal <hchiramm@redhat.com> ```release-note `retry-interval-start` and `retry-interval-max` arguments are added to common-controller which controls retry interval of failed volume snapshot creation and deletion. These values set the ratelimiter for snapshot and content queues. ``` Signed-off-by: Humble Chirammal <hchiramm@redhat.com>
This commit is contained in:
@@ -156,6 +156,9 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh
|
|||||||
|
|
||||||
* `--worker-threads`: Number of worker threads for running create snapshot and delete snapshot operations. Default value is 10.
|
* `--worker-threads`: Number of worker threads for running create snapshot and delete snapshot operations. Default value is 10.
|
||||||
|
|
||||||
|
* `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second.
|
||||||
|
|
||||||
|
*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
|
||||||
#### Other recognized arguments
|
#### Other recognized arguments
|
||||||
* `--kubeconfig <path>`: Path to Kubernetes client configuration that the CSI external-snapshotter uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the external-snapshotter does not run as a Kubernetes pod, e.g. for debugging.
|
* `--kubeconfig <path>`: Path to Kubernetes client configuration that the CSI external-snapshotter uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the external-snapshotter does not run as a Kubernetes pod, e.g. for debugging.
|
||||||
|
|
||||||
|
@@ -72,8 +72,8 @@ var (
|
|||||||
metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
|
metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
|
||||||
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
|
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
|
||||||
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
|
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
|
||||||
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second")
|
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
|
||||||
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes")
|
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@@ -20,6 +20,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"k8s.io/client-go/util/workqueue"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -55,8 +56,10 @@ var (
|
|||||||
leaderElection = flag.Bool("leader-election", false, "Enables leader election.")
|
leaderElection = flag.Bool("leader-election", false, "Enables leader election.")
|
||||||
leaderElectionNamespace = flag.String("leader-election-namespace", "", "The namespace where the leader election resource exists. Defaults to the pod namespace if not set.")
|
leaderElectionNamespace = flag.String("leader-election-namespace", "", "The namespace where the leader election resource exists. Defaults to the pod namespace if not set.")
|
||||||
|
|
||||||
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
|
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
|
||||||
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
|
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
|
||||||
|
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
|
||||||
|
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -170,6 +173,8 @@ func main() {
|
|||||||
coreFactory.Core().V1().PersistentVolumeClaims(),
|
coreFactory.Core().V1().PersistentVolumeClaims(),
|
||||||
metricsManager,
|
metricsManager,
|
||||||
*resyncPeriod,
|
*resyncPeriod,
|
||||||
|
workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax),
|
||||||
|
workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax),
|
||||||
)
|
)
|
||||||
|
|
||||||
if err := ensureCustomResourceDefinitionsExist(snapClient); err != nil {
|
if err := ensureCustomResourceDefinitionsExist(snapClient); err != nil {
|
||||||
|
@@ -19,6 +19,7 @@ package common_controller
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"k8s.io/client-go/util/workqueue"
|
||||||
"reflect"
|
"reflect"
|
||||||
sysruntime "runtime"
|
sysruntime "runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -749,6 +750,8 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte
|
|||||||
coreFactory.Core().V1().PersistentVolumeClaims(),
|
coreFactory.Core().V1().PersistentVolumeClaims(),
|
||||||
metricsManager,
|
metricsManager,
|
||||||
60*time.Second,
|
60*time.Second,
|
||||||
|
workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute),
|
||||||
|
workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute),
|
||||||
)
|
)
|
||||||
|
|
||||||
ctrl.eventRecorder = record.NewFakeRecorder(1000)
|
ctrl.eventRecorder = record.NewFakeRecorder(1000)
|
||||||
|
@@ -76,6 +76,8 @@ func NewCSISnapshotCommonController(
|
|||||||
pvcInformer coreinformers.PersistentVolumeClaimInformer,
|
pvcInformer coreinformers.PersistentVolumeClaimInformer,
|
||||||
metricsManager metrics.MetricsManager,
|
metricsManager metrics.MetricsManager,
|
||||||
resyncPeriod time.Duration,
|
resyncPeriod time.Duration,
|
||||||
|
snapshotRateLimiter workqueue.RateLimiter,
|
||||||
|
contentRateLimiter workqueue.RateLimiter,
|
||||||
) *csiSnapshotCommonController {
|
) *csiSnapshotCommonController {
|
||||||
broadcaster := record.NewBroadcaster()
|
broadcaster := record.NewBroadcaster()
|
||||||
broadcaster.StartLogging(klog.Infof)
|
broadcaster.StartLogging(klog.Infof)
|
||||||
@@ -90,8 +92,8 @@ func NewCSISnapshotCommonController(
|
|||||||
resyncPeriod: resyncPeriod,
|
resyncPeriod: resyncPeriod,
|
||||||
snapshotStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
|
snapshotStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
|
||||||
contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
|
contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
|
||||||
snapshotQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-snapshot"),
|
snapshotQueue: workqueue.NewNamedRateLimitingQueue(snapshotRateLimiter, "snapshot-controller-snapshot"),
|
||||||
contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-content"),
|
contentQueue: workqueue.NewNamedRateLimitingQueue(contentRateLimiter, "snapshot-controller-content"),
|
||||||
metricsManager: metricsManager,
|
metricsManager: metricsManager,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user