diff --git a/pkg/common-controller/snapshot_controller.go b/pkg/common-controller/snapshot_controller.go index b814770b..70e059ce 100644 --- a/pkg/common-controller/snapshot_controller.go +++ b/pkg/common-controller/snapshot_controller.go @@ -337,10 +337,16 @@ func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.Vol } // update snapshot status - klog.V(5).Infof("syncUnreadySnapshot [%s]: trying to update snapshot status", utils.SnapshotKey(snapshot)) - _, err = ctrl.updateSnapshotStatus(snapshot, newContent) - if err != nil { + for i := 0; i < ctrl.createSnapshotContentRetryCount; i++ { + klog.V(5).Infof("syncUnreadySnapshot [%s]: trying to update snapshot status", utils.SnapshotKey(snapshot)) + _, err = ctrl.updateSnapshotStatus(snapshot, newContent) + if err == nil { + break + } klog.V(4).Infof("failed to update snapshot %s status: %v", utils.SnapshotKey(snapshot), err) + } + + if err != nil { // update snapshot status failed ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotStatusUpdateFailed", fmt.Sprintf("Snapshot status update failed, %v", err)) return err @@ -393,10 +399,16 @@ func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.Vol } // Update snapshot status with BoundVolumeSnapshotContentName - klog.V(5).Infof("syncUnreadySnapshot [%s]: trying to update snapshot status", utils.SnapshotKey(snapshot)) - _, err = ctrl.updateSnapshotStatus(snapshot, content) - if err != nil { + for i := 0; i < ctrl.createSnapshotContentRetryCount; i++ { + klog.V(5).Infof("syncUnreadySnapshot [%s]: trying to update snapshot status", utils.SnapshotKey(snapshot)) + _, err = ctrl.updateSnapshotStatus(snapshot, content) + if err == nil { + break + } klog.V(4).Infof("failed to update snapshot %s status: %v", utils.SnapshotKey(snapshot), err) + } + + if err != nil { // update snapshot status failed ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotStatusUpdateFailed", fmt.Sprintf("Snapshot status update failed, %v", err)) return err @@ -489,17 +501,23 @@ func (ctrl *csiSnapshotCommonController) createSnapshotContent(snapshot *crdv1.V var updateContent *crdv1.VolumeSnapshotContent klog.V(3).Infof("volume snapshot content %#v", snapshotContent) - // Try to create the VolumeSnapshotContent object - klog.V(5).Infof("createSnapshotContent [%s]: trying to save volume snapshot content %s", utils.SnapshotKey(snapshot), snapshotContent.Name) - if updateContent, err = ctrl.clientset.SnapshotV1beta1().VolumeSnapshotContents().Create(snapshotContent); err == nil || apierrs.IsAlreadyExists(err) { - // Save succeeded. - if err != nil { - klog.V(3).Infof("volume snapshot content %q for snapshot %q already exists, reusing", snapshotContent.Name, utils.SnapshotKey(snapshot)) - err = nil - updateContent = snapshotContent - } else { - klog.V(3).Infof("volume snapshot content %q for snapshot %q saved, %v", snapshotContent.Name, utils.SnapshotKey(snapshot), snapshotContent) + // Try to create the VolumeSnapshotContent object several times + for i := 0; i < ctrl.createSnapshotContentRetryCount; i++ { + klog.V(5).Infof("createSnapshotContent [%s]: trying to save volume snapshot content %s", utils.SnapshotKey(snapshot), snapshotContent.Name) + if updateContent, err = ctrl.clientset.SnapshotV1beta1().VolumeSnapshotContents().Create(snapshotContent); err == nil || apierrs.IsAlreadyExists(err) { + // Save succeeded. + if err != nil { + klog.V(3).Infof("volume snapshot content %q for snapshot %q already exists, reusing", snapshotContent.Name, utils.SnapshotKey(snapshot)) + err = nil + updateContent = snapshotContent + } else { + klog.V(3).Infof("volume snapshot content %q for snapshot %q saved, %v", snapshotContent.Name, utils.SnapshotKey(snapshot), snapshotContent) + } + break } + // Save failed, try again after a while. + klog.V(3).Infof("failed to save volume snapshot content %q for snapshot %q: %v", snapshotContent.Name, utils.SnapshotKey(snapshot), err) + time.Sleep(ctrl.createSnapshotContentInterval) } if err != nil { @@ -849,14 +867,17 @@ func (ctrl *csiSnapshotCommonController) bindandUpdateVolumeSnapshot(snapshotCon snapshotCopy := snapshotObj.DeepCopy() // update snapshot status - klog.V(5).Infof("bindandUpdateVolumeSnapshot [%s]: trying to update snapshot status", utils.SnapshotKey(snapshotCopy)) - updateSnapshot, err := ctrl.updateSnapshotStatus(snapshotCopy, snapshotContent) - if err == nil { - snapshotCopy = updateSnapshot + for i := 0; i < ctrl.createSnapshotContentRetryCount; i++ { + klog.V(5).Infof("bindandUpdateVolumeSnapshot [%s]: trying to update snapshot status", utils.SnapshotKey(snapshotCopy)) + updateSnapshot, err := ctrl.updateSnapshotStatus(snapshotCopy, snapshotContent) + if err == nil { + snapshotCopy = updateSnapshot + break + } + klog.V(4).Infof("failed to update snapshot %s status: %v", utils.SnapshotKey(snapshot), err) } if err != nil { - klog.V(4).Infof("failed to update snapshot %s status: %v", utils.SnapshotKey(snapshot), err) // update snapshot status failed ctrl.updateSnapshotErrorStatusWithEvent(snapshotCopy, v1.EventTypeWarning, "SnapshotStatusUpdateFailed", fmt.Sprintf("Snapshot status update failed, %v", err)) return nil, err diff --git a/pkg/common-controller/snapshot_controller_base.go b/pkg/common-controller/snapshot_controller_base.go index 66c73e8c..2518dad8 100644 --- a/pkg/common-controller/snapshot_controller_base.go +++ b/pkg/common-controller/snapshot_controller_base.go @@ -42,6 +42,12 @@ import ( "k8s.io/kubernetes/pkg/util/goroutinemap" ) +// Number of retries when we create a VolumeSnapshotContent object +const createSnapshotContentRetryCount = 5 + +// Interval between retries when we create a VolumeSnapshotContent object +const createSnapshotContentInterval = 10 * time.Second + type csiSnapshotCommonController struct { clientset clientset.Interface client kubernetes.Interface @@ -64,7 +70,9 @@ type csiSnapshotCommonController struct { // Map of scheduled/running operations. runningOperations goroutinemap.GoRoutineMap - resyncPeriod time.Duration + createSnapshotContentRetryCount int + createSnapshotContentInterval time.Duration + resyncPeriod time.Duration } // NewCSISnapshotController returns a new *csiSnapshotCommonController @@ -84,15 +92,17 @@ func NewCSISnapshotCommonController( eventRecorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: fmt.Sprintf("snapshot-controller")}) ctrl := &csiSnapshotCommonController{ - clientset: clientset, - client: client, - eventRecorder: eventRecorder, - runningOperations: goroutinemap.NewGoRoutineMap(true), - resyncPeriod: resyncPeriod, - snapshotStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc), - contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc), - snapshotQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-snapshot"), - contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-content"), + clientset: clientset, + client: client, + eventRecorder: eventRecorder, + runningOperations: goroutinemap.NewGoRoutineMap(true), + createSnapshotContentRetryCount: createSnapshotContentRetryCount, + createSnapshotContentInterval: createSnapshotContentInterval, + resyncPeriod: resyncPeriod, + snapshotStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc), + contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc), + snapshotQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-snapshot"), + contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-content"), } ctrl.pvcLister = pvcInformer.Lister()