Add snapshot controller metrics

Signed-off-by: Grant Griffiths <grant@portworx.com>
This commit is contained in:
Grant Griffiths
2020-10-07 14:44:02 -07:00
parent 684a1d3566
commit 941821bf99
7 changed files with 655 additions and 282 deletions

View File

@@ -22,6 +22,7 @@ import (
"fmt" "fmt"
"os" "os"
"os/signal" "os/signal"
"sync"
"time" "time"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
@@ -32,6 +33,7 @@ import (
"github.com/kubernetes-csi/csi-lib-utils/leaderelection" "github.com/kubernetes-csi/csi-lib-utils/leaderelection"
controller "github.com/kubernetes-csi/external-snapshotter/v3/pkg/common-controller" controller "github.com/kubernetes-csi/external-snapshotter/v3/pkg/common-controller"
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned" clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme" snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme"
@@ -48,6 +50,9 @@ var (
leaderElection = flag.Bool("leader-election", false, "Enables leader election.") leaderElection = flag.Bool("leader-election", false, "Enables leader election.")
leaderElectionNamespace = flag.String("leader-election-namespace", "", "The namespace where the leader election resource exists. Defaults to the pod namespace if not set.") leaderElectionNamespace = flag.String("leader-election-namespace", "", "The namespace where the leader election resource exists. Defaults to the pod namespace if not set.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
) )
var ( var (
@@ -87,6 +92,28 @@ func main() {
factory := informers.NewSharedInformerFactory(snapClient, *resyncPeriod) factory := informers.NewSharedInformerFactory(snapClient, *resyncPeriod)
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, *resyncPeriod) coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, *resyncPeriod)
// Create and register metrics manager
metricsManager := metrics.NewMetricsManager()
wg := &sync.WaitGroup{}
wg.Add(1)
if *httpEndpoint != "" {
srv, err := metricsManager.StartMetricsEndpoint(*metricsPath, *httpEndpoint, promklog{}, wg)
if err != nil {
klog.Errorf("Failed to start metrics server: %s", err.Error())
os.Exit(1)
}
defer func() {
err := srv.Shutdown(context.Background())
if err != nil {
klog.Errorf("Failed to shutdown metrics server: %s", err.Error())
}
klog.Infof("Metrics server successfully shutdown")
wg.Done()
}()
klog.Infof("Metrics server successfully started on %s, %s", *httpEndpoint, *metricsPath)
}
// Add Snapshot types to the default Kubernetes so events can be logged for them // Add Snapshot types to the default Kubernetes so events can be logged for them
snapshotscheme.AddToScheme(scheme.Scheme) snapshotscheme.AddToScheme(scheme.Scheme)
@@ -99,6 +126,7 @@ func main() {
factory.Snapshot().V1beta1().VolumeSnapshotContents(), factory.Snapshot().V1beta1().VolumeSnapshotContents(),
factory.Snapshot().V1beta1().VolumeSnapshotClasses(), factory.Snapshot().V1beta1().VolumeSnapshotClasses(),
coreFactory.Core().V1().PersistentVolumeClaims(), coreFactory.Core().V1().PersistentVolumeClaims(),
metricsManager,
*resyncPeriod, *resyncPeriod,
) )
@@ -142,3 +170,9 @@ func buildConfig(kubeconfig string) (*rest.Config, error) {
} }
return rest.InClusterConfig() return rest.InClusterConfig()
} }
type promklog struct{}
func (pl promklog) Println(v ...interface{}) {
klog.Error(v...)
}

View File

@@ -34,6 +34,7 @@ import (
snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme" snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme"
informers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions" informers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions"
storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1" storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1"
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils" "github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
@@ -734,6 +735,10 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte
} }
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, utils.NoResyncPeriodFunc()) coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, utils.NoResyncPeriodFunc())
metricsManager := metrics.NewMetricsManager()
wg := &sync.WaitGroup{}
wg.Add(1)
metricsManager.StartMetricsEndpoint("/metrics", "localhost:0", nil, wg)
ctrl := NewCSISnapshotCommonController( ctrl := NewCSISnapshotCommonController(
clientset, clientset,
@@ -742,6 +747,7 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte
informerFactory.Snapshot().V1beta1().VolumeSnapshotContents(), informerFactory.Snapshot().V1beta1().VolumeSnapshotContents(),
informerFactory.Snapshot().V1beta1().VolumeSnapshotClasses(), informerFactory.Snapshot().V1beta1().VolumeSnapshotClasses(),
coreFactory.Core().V1().PersistentVolumeClaims(), coreFactory.Core().V1().PersistentVolumeClaims(),
metricsManager,
60*time.Second, 60*time.Second,
) )

View File

@@ -32,6 +32,7 @@ import (
klog "k8s.io/klog/v2" klog "k8s.io/klog/v2"
crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v3/apis/volumesnapshot/v1beta1" crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v3/apis/volumesnapshot/v1beta1"
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils" "github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
) )
@@ -235,6 +236,20 @@ func (ctrl *csiSnapshotCommonController) syncSnapshot(snapshot *crdv1.VolumeSnap
// 2. Call checkandRemoveSnapshotFinalizersAndCheckandDeleteContent() with information obtained from step 1. This function name is very long but the name suggests what it does. It determines whether to remove finalizers on snapshot and whether to delete content. // 2. Call checkandRemoveSnapshotFinalizersAndCheckandDeleteContent() with information obtained from step 1. This function name is very long but the name suggests what it does. It determines whether to remove finalizers on snapshot and whether to delete content.
func (ctrl *csiSnapshotCommonController) processSnapshotWithDeletionTimestamp(snapshot *crdv1.VolumeSnapshot) error { func (ctrl *csiSnapshotCommonController) processSnapshotWithDeletionTimestamp(snapshot *crdv1.VolumeSnapshot) error {
klog.V(5).Infof("processSnapshotWithDeletionTimestamp VolumeSnapshot[%s]: %s", utils.SnapshotKey(snapshot), utils.GetSnapshotStatusForLogging(snapshot)) klog.V(5).Infof("processSnapshotWithDeletionTimestamp VolumeSnapshot[%s]: %s", utils.SnapshotKey(snapshot), utils.GetSnapshotStatusForLogging(snapshot))
driverName, err := ctrl.getSnapshotDriverName(snapshot)
if err != nil {
klog.Errorf("failed to getSnapshotDriverName while recording metrics for snapshot %q: %v", utils.SnapshotKey(snapshot), err)
}
snapshotProvisionType := metrics.DynamicSnapshotType
if snapshot.Spec.Source.VolumeSnapshotContentName != nil {
snapshotProvisionType = metrics.PreProvisionedSnapshotType
}
// Processing delete, start operation metric
deleteOperationKey := metrics.NewOperationKey(metrics.DeleteSnapshotOperationName, snapshot.UID)
deleteOperationValue := metrics.NewOperationValue(driverName, snapshotProvisionType)
ctrl.metricsManager.OperationStart(deleteOperationKey, deleteOperationValue)
var contentName string var contentName string
if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil { if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil {
@@ -269,6 +284,7 @@ func (ctrl *csiSnapshotCommonController) processSnapshotWithDeletionTimestamp(sn
} }
klog.V(5).Infof("processSnapshotWithDeletionTimestamp[%s]: delete snapshot content and remove finalizer from snapshot if needed", utils.SnapshotKey(snapshot)) klog.V(5).Infof("processSnapshotWithDeletionTimestamp[%s]: delete snapshot content and remove finalizer from snapshot if needed", utils.SnapshotKey(snapshot))
return ctrl.checkandRemoveSnapshotFinalizersAndCheckandDeleteContent(snapshot, content, deleteContent) return ctrl.checkandRemoveSnapshotFinalizersAndCheckandDeleteContent(snapshot, content, deleteContent)
} }
@@ -388,6 +404,7 @@ func (ctrl *csiSnapshotCommonController) syncReadySnapshot(snapshot *crdv1.Volum
// snapshot is bound but content is not pointing to the snapshot // snapshot is bound but content is not pointing to the snapshot
return ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotMisbound", "VolumeSnapshotContent is not bound to the VolumeSnapshot correctly") return ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotMisbound", "VolumeSnapshotContent is not bound to the VolumeSnapshot correctly")
} }
// everything is verified, return // everything is verified, return
return nil return nil
} }
@@ -396,6 +413,28 @@ func (ctrl *csiSnapshotCommonController) syncReadySnapshot(snapshot *crdv1.Volum
func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.VolumeSnapshot) error { func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.VolumeSnapshot) error {
uniqueSnapshotName := utils.SnapshotKey(snapshot) uniqueSnapshotName := utils.SnapshotKey(snapshot)
klog.V(5).Infof("syncUnreadySnapshot %s", uniqueSnapshotName) klog.V(5).Infof("syncUnreadySnapshot %s", uniqueSnapshotName)
driverName, err := ctrl.getSnapshotDriverName(snapshot)
if err != nil {
klog.Errorf("failed to getSnapshotDriverName while recording metrics for snapshot %q: %s", utils.SnapshotKey(snapshot), err)
}
snapshotProvisionType := metrics.DynamicSnapshotType
if snapshot.Spec.Source.VolumeSnapshotContentName != nil {
snapshotProvisionType = metrics.PreProvisionedSnapshotType
}
// Start metrics operations
if !utils.IsSnapshotCreated(snapshot) {
// Only start CreateSnapshot operation if the snapshot has not been cut
ctrl.metricsManager.OperationStart(
metrics.NewOperationKey(metrics.CreateSnapshotOperationName, snapshot.UID),
metrics.NewOperationValue(driverName, snapshotProvisionType),
)
}
ctrl.metricsManager.OperationStart(
metrics.NewOperationKey(metrics.CreateSnapshotAndReadyOperationName, snapshot.UID),
metrics.NewOperationValue(driverName, snapshotProvisionType),
)
// Pre-provisioned snapshot // Pre-provisioned snapshot
if snapshot.Spec.Source.VolumeSnapshotContentName != nil { if snapshot.Spec.Source.VolumeSnapshotContentName != nil {
@@ -403,13 +442,16 @@ func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.Vol
if err != nil { if err != nil {
return err return err
} }
// if no content found yet, update status and return // if no content found yet, update status and return
if content == nil { if content == nil {
// can not find the desired VolumeSnapshotContent from cache store // can not find the desired VolumeSnapshotContent from cache store
ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotContentMissing", "VolumeSnapshotContent is missing") ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotContentMissing", "VolumeSnapshotContent is missing")
klog.V(4).Infof("syncUnreadySnapshot[%s]: snapshot content %q requested but not found, will try again", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName) klog.V(4).Infof("syncUnreadySnapshot[%s]: snapshot content %q requested but not found, will try again", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName)
return fmt.Errorf("snapshot %s requests an non-existing content %s", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName) return fmt.Errorf("snapshot %s requests an non-existing content %s", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName)
} }
// Set VolumeSnapshotRef UID // Set VolumeSnapshotRef UID
newContent, err := ctrl.checkandBindSnapshotContent(snapshot, content) newContent, err := ctrl.checkandBindSnapshotContent(snapshot, content)
if err != nil { if err != nil {
@@ -426,8 +468,10 @@ func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.Vol
ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotStatusUpdateFailed", fmt.Sprintf("Snapshot status update failed, %v", err)) ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotStatusUpdateFailed", fmt.Sprintf("Snapshot status update failed, %v", err))
return err return err
} }
return nil return nil
} }
// snapshot.Spec.Source.VolumeSnapshotContentName == nil - dynamically creating snapshot // snapshot.Spec.Source.VolumeSnapshotContentName == nil - dynamically creating snapshot
klog.V(5).Infof("getDynamicallyProvisionedContentFromStore for snapshot %s", uniqueSnapshotName) klog.V(5).Infof("getDynamicallyProvisionedContentFromStore for snapshot %s", uniqueSnapshotName)
contentObj, err := ctrl.getDynamicallyProvisionedContentFromStore(snapshot) contentObj, err := ctrl.getDynamicallyProvisionedContentFromStore(snapshot)
@@ -1094,10 +1138,30 @@ func (ctrl *csiSnapshotCommonController) updateSnapshotStatus(snapshot *crdv1.Vo
if updated { if updated {
snapshotClone := snapshotObj.DeepCopy() snapshotClone := snapshotObj.DeepCopy()
snapshotClone.Status = newStatus snapshotClone.Status = newStatus
// We need to record metrics before updating the status due to a bug causing cache entries after a failed UpdateStatus call.
// Must meet the following criteria to emit a successful CreateSnapshot status
// 1. Previous status was nil OR Previous status had a nil CreationTime
// 2. New status must be non-nil with a non-nil CreationTime
driverName := content.Spec.Driver
createOperationKey := metrics.NewOperationKey(metrics.CreateSnapshotOperationName, snapshot.UID)
if !utils.IsSnapshotCreated(snapshotObj) && utils.IsSnapshotCreated(snapshotClone) {
ctrl.metricsManager.RecordMetrics(createOperationKey, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName)
}
// Must meet the following criteria to emit a successful CreateSnapshotAndReady status
// 1. Previous status was nil OR Previous status had a nil ReadyToUse OR Previous status had a false ReadyToUse
// 2. New status must be non-nil with a ReadyToUse as true
if !utils.IsSnapshotReady(snapshotObj) && utils.IsSnapshotReady(snapshotClone) {
createAndReadyOperation := metrics.NewOperationKey(metrics.CreateSnapshotAndReadyOperationName, snapshot.UID)
ctrl.metricsManager.RecordMetrics(createAndReadyOperation, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName)
}
newSnapshotObj, err := ctrl.clientset.SnapshotV1beta1().VolumeSnapshots(snapshotClone.Namespace).UpdateStatus(context.TODO(), snapshotClone, metav1.UpdateOptions{}) newSnapshotObj, err := ctrl.clientset.SnapshotV1beta1().VolumeSnapshots(snapshotClone.Namespace).UpdateStatus(context.TODO(), snapshotClone, metav1.UpdateOptions{})
if err != nil { if err != nil {
return nil, newControllerUpdateError(utils.SnapshotKey(snapshot), err.Error()) return nil, newControllerUpdateError(utils.SnapshotKey(snapshot), err.Error())
} }
return newSnapshotObj, nil return newSnapshotObj, nil
} }
@@ -1177,6 +1241,45 @@ func (ctrl *csiSnapshotCommonController) getSnapshotClass(className string) (*cr
return class, nil return class, nil
} }
// getSnapshotDriverName is a helper function to get snapshot driver from the VolumeSnapshot.
// We try to get the driverName in multiple ways, as snapshot controller metrics depend on the correct driverName.
func (ctrl *csiSnapshotCommonController) getSnapshotDriverName(vs *crdv1.VolumeSnapshot) (string, error) {
klog.V(5).Infof("getSnapshotDriverName: VolumeSnapshot[%s]", vs.Name)
var driverName string
// Pre-Provisioned snapshots have contentName as source
var contentName string
if vs.Spec.Source.VolumeSnapshotContentName != nil {
contentName = *vs.Spec.Source.VolumeSnapshotContentName
}
// Get Driver name from SnapshotContent if we found a contentName
if contentName != "" {
content, err := ctrl.contentLister.Get(contentName)
if err != nil {
klog.Errorf("getSnapshotDriverName: failed to get snapshotContent: %v", contentName)
} else {
driverName = content.Spec.Driver
}
if driverName != "" {
return driverName, nil
}
}
// Dynamic snapshots will have a snapshotclass with a driver
if vs.Spec.VolumeSnapshotClassName != nil {
class, err := ctrl.getSnapshotClass(*vs.Spec.VolumeSnapshotClassName)
if err != nil {
klog.Errorf("getSnapshotDriverName: failed to get snapshotClass: %v", *vs.Spec.VolumeSnapshotClassName)
} else {
driverName = class.Driver
}
}
return driverName, nil
}
// SetDefaultSnapshotClass is a helper function to figure out the default snapshot class. // SetDefaultSnapshotClass is a helper function to figure out the default snapshot class.
// For pre-provisioned case, it's an no-op. // For pre-provisioned case, it's an no-op.
// For dynamic provisioning, it gets the default SnapshotClasses in the system if there is any(could be multiple), // For dynamic provisioning, it gets the default SnapshotClasses in the system if there is any(could be multiple),

View File

@@ -24,6 +24,7 @@ import (
clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned" clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
storageinformers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions/volumesnapshot/v1beta1" storageinformers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions/volumesnapshot/v1beta1"
storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1" storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1"
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils" "github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
@@ -60,6 +61,8 @@ type csiSnapshotCommonController struct {
snapshotStore cache.Store snapshotStore cache.Store
contentStore cache.Store contentStore cache.Store
metricsManager metrics.MetricsManager
resyncPeriod time.Duration resyncPeriod time.Duration
} }
@@ -71,6 +74,7 @@ func NewCSISnapshotCommonController(
volumeSnapshotContentInformer storageinformers.VolumeSnapshotContentInformer, volumeSnapshotContentInformer storageinformers.VolumeSnapshotContentInformer,
volumeSnapshotClassInformer storageinformers.VolumeSnapshotClassInformer, volumeSnapshotClassInformer storageinformers.VolumeSnapshotClassInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer, pvcInformer coreinformers.PersistentVolumeClaimInformer,
metricsManager metrics.MetricsManager,
resyncPeriod time.Duration, resyncPeriod time.Duration,
) *csiSnapshotCommonController { ) *csiSnapshotCommonController {
broadcaster := record.NewBroadcaster() broadcaster := record.NewBroadcaster()
@@ -88,6 +92,7 @@ func NewCSISnapshotCommonController(
contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc), contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
snapshotQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-snapshot"), snapshotQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-snapshot"),
contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-content"), contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-content"),
metricsManager: metricsManager,
} }
ctrl.pvcLister = pvcInformer.Lister() ctrl.pvcLister = pvcInformer.Lister()
@@ -363,6 +368,7 @@ func (ctrl *csiSnapshotCommonController) updateSnapshot(snapshot *crdv1.VolumeSn
if !newSnapshot { if !newSnapshot {
return nil return nil
} }
err = ctrl.syncSnapshot(snapshot) err = ctrl.syncSnapshot(snapshot)
if err != nil { if err != nil {
if errors.IsConflict(err) { if errors.IsConflict(err) {
@@ -407,6 +413,13 @@ func (ctrl *csiSnapshotCommonController) updateContent(content *crdv1.VolumeSnap
func (ctrl *csiSnapshotCommonController) deleteSnapshot(snapshot *crdv1.VolumeSnapshot) { func (ctrl *csiSnapshotCommonController) deleteSnapshot(snapshot *crdv1.VolumeSnapshot) {
_ = ctrl.snapshotStore.Delete(snapshot) _ = ctrl.snapshotStore.Delete(snapshot)
klog.V(4).Infof("snapshot %q deleted", utils.SnapshotKey(snapshot)) klog.V(4).Infof("snapshot %q deleted", utils.SnapshotKey(snapshot))
driverName, err := ctrl.getSnapshotDriverName(snapshot)
if err != nil {
klog.Errorf("failed to getSnapshotDriverName while recording metrics for snapshot %q: %s", utils.SnapshotKey(snapshot), err)
} else {
deleteOperationKey := metrics.NewOperationKey(metrics.DeleteSnapshotOperationName, snapshot.UID)
ctrl.metricsManager.RecordMetrics(deleteOperationKey, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName)
}
snapshotContentName := "" snapshotContentName := ""
if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil { if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil {
@@ -416,6 +429,7 @@ func (ctrl *csiSnapshotCommonController) deleteSnapshot(snapshot *crdv1.VolumeSn
klog.V(5).Infof("deleteSnapshot[%q]: content not bound", utils.SnapshotKey(snapshot)) klog.V(5).Infof("deleteSnapshot[%q]: content not bound", utils.SnapshotKey(snapshot))
return return
} }
// sync the content when its snapshot is deleted. Explicitly sync'ing the // sync the content when its snapshot is deleted. Explicitly sync'ing the
// content here in response to snapshot deletion prevents the content from // content here in response to snapshot deletion prevents the content from
// waiting until the next sync period for its Release. // waiting until the next sync period for its Release.

View File

@@ -30,13 +30,48 @@ import (
) )
const ( const (
opStatusUnknown = "Unknown"
labelDriverName = "driver_name" labelDriverName = "driver_name"
labelOperationName = "operation_name" labelOperationName = "operation_name"
labelOperationStatus = "operation_status" labelOperationStatus = "operation_status"
labelSnapshotType = "snapshot_type"
subSystem = "snapshot_controller" subSystem = "snapshot_controller"
metricName = "operation_total_seconds" operationLatencyMetricName = "operation_total_seconds"
metricHelpMsg = "Total number of seconds spent by the controller on an operation from end to end" operationLatencyMetricHelpMsg = "Total number of seconds spent by the controller on an operation"
unknownDriverName = "unknown"
// CreateSnapshotOperationName is the operation that tracks how long the controller takes to create a snapshot.
// Specifically, the operation metric is emitted based on the following timestamps:
// - Start_time: controller notices the first time that there is a new VolumeSnapshot CR to dynamically provision a snapshot
// - End_time: controller notices that the CR has a status with CreationTime field set to be non-nil
CreateSnapshotOperationName = "CreateSnapshot"
// CreateSnapshotAndReadyOperationName is the operation that tracks how long the controller takes to create a snapshot and for it to be ready.
// Specifically, the operation metric is emitted based on the following timestamps:
// - Start_time: controller notices the first time that there is a new VolumeSnapshot CR(both dynamic and pre-provisioned cases)
// - End_time: controller notices that the CR has a status with Ready field set to be true
CreateSnapshotAndReadyOperationName = "CreateSnapshotAndReady"
// DeleteSnapshotOperationName is the operation that tracks how long a snapshot deletion takes.
// Specifically, the operation metric is emitted based on the following timestamps:
// - Start_time: controller notices the first time that there is a deletion timestamp placed on the VolumeSnapshot CR and the CR is ready to be deleted. Note that if the CR is being used by a PVC for rehydration, the controller should *NOT* set the start_time.
// - End_time: controller removed all finalizers on the VolumeSnapshot CR such that the CR is ready to be removed in the API server.
DeleteSnapshotOperationName = "DeleteSnapshot"
// DynamicSnapshotType represents a snapshot that is being dynamically provisioned
DynamicSnapshotType = snapshotProvisionType("dynamic")
// PreProvisionedSnapshotType represents a snapshot that is pre-provisioned
PreProvisionedSnapshotType = snapshotProvisionType("pre-provisioned")
// SnapshotStatusTypeUnknown represents that the status is unknown
SnapshotStatusTypeUnknown snapshotStatusType = "unknown"
// Success and Cancel are statuses for operation time (operation_total_seconds) as seen by snapshot controller
// SnapshotStatusTypeSuccess represents that a CreateSnapshot, CreateSnapshotAndReady,
// or DeleteSnapshot has finished successfully.
// Individual reconciliations (reconciliation_total_seconds) also use this status.
SnapshotStatusTypeSuccess snapshotStatusType = "success"
// SnapshotStatusTypeCancel represents that a CreateSnapshot, CreateSnapshotAndReady,
// or DeleteSnapshot has been deleted before finishing.
SnapshotStatusTypeCancel snapshotStatusType = "cancel"
) )
// OperationStatus is the interface type for representing an operation's execution // OperationStatus is the interface type for representing an operation's execution
@@ -57,11 +92,11 @@ type MetricsManager interface {
// OperationStart takes in an operation and caches its start time. // OperationStart takes in an operation and caches its start time.
// if the operation already exists, it's an no-op. // if the operation already exists, it's an no-op.
OperationStart(op Operation) OperationStart(key OperationKey, val OperationValue)
// DropOperation removes an operation from cache. // DropOperation removes an operation from cache.
// if the operation does not exist, it's an no-op. // if the operation does not exist, it's an no-op.
DropOperation(op Operation) DropOperation(op OperationKey)
// RecordMetrics records a metric point. Note that it will be an no-op if an // RecordMetrics records a metric point. Note that it will be an no-op if an
// operation has NOT been marked "Started" previously via invoking "OperationStart". // operation has NOT been marked "Started" previously via invoking "OperationStart".
@@ -69,20 +104,49 @@ type MetricsManager interface {
// op - the operation which the metric is associated with. // op - the operation which the metric is associated with.
// status - the operation status, if not specified, i.e., status == nil, an // status - the operation status, if not specified, i.e., status == nil, an
// "Unknown" status of the passed-in operation is assumed. // "Unknown" status of the passed-in operation is assumed.
RecordMetrics(op Operation, status OperationStatus) RecordMetrics(op OperationKey, status OperationStatus, driverName string)
} }
// Operation is a structure which holds information to identify a snapshot // OperationKey is a structure which holds information to
// related operation // uniquely identify a snapshot related operation
type Operation struct { type OperationKey struct {
// the name of the operation, for example: "CreateSnapshot", "DeleteSnapshot" // Name is the name of the operation, for example: "CreateSnapshot", "DeleteSnapshot"
Name string Name string
// the name of the driver which executes the operation // ResourceID is the resource UID to which the operation has been executed against
Driver string
// the resource UID to which the operation has been executed against
ResourceID types.UID ResourceID types.UID
} }
// OperationValue is a structure which holds operation metadata
type OperationValue struct {
// Driver is the driver name which executes the operation
Driver string
// SnapshotType represents the snapshot type, for example: "dynamic", "pre-provisioned"
SnapshotType string
// startTime is the time when the operation first started
startTime time.Time
}
// NewOperationKey initializes a new OperationKey
func NewOperationKey(name string, snapshotUID types.UID) OperationKey {
return OperationKey{
Name: name,
ResourceID: snapshotUID,
}
}
// NewOperationValue initializes a new OperationValue
func NewOperationValue(driver string, snapshotType snapshotProvisionType) OperationValue {
if driver == "" {
driver = unknownDriverName
}
return OperationValue{
Driver: driver,
SnapshotType: string(snapshotType),
}
}
type operationMetricsManager struct { type operationMetricsManager struct {
// cache is a concurrent-safe map which stores start timestamps for all // cache is a concurrent-safe map which stores start timestamps for all
// ongoing operations. // ongoing operations.
@@ -93,10 +157,11 @@ type operationMetricsManager struct {
// registry is a wrapper around Prometheus Registry // registry is a wrapper around Prometheus Registry
registry k8smetrics.KubeRegistry registry k8smetrics.KubeRegistry
// opLatencyMetrics is a Histogram metrics // opLatencyMetrics is a Histogram metrics for operation time per request
opLatencyMetrics *k8smetrics.HistogramVec opLatencyMetrics *k8smetrics.HistogramVec
} }
// NewMetricsManager creates a new MetricsManager instance
func NewMetricsManager() MetricsManager { func NewMetricsManager() MetricsManager {
mgr := &operationMetricsManager{ mgr := &operationMetricsManager{
cache: sync.Map{}, cache: sync.Map{},
@@ -105,34 +170,83 @@ func NewMetricsManager() MetricsManager {
return mgr return mgr
} }
func (opMgr *operationMetricsManager) OperationStart(op Operation) { // OperationStart starts a new operation
opMgr.cache.LoadOrStore(op, time.Now()) func (opMgr *operationMetricsManager) OperationStart(key OperationKey, val OperationValue) {
val.startTime = time.Now()
opMgr.cache.LoadOrStore(key, val)
} }
func (opMgr *operationMetricsManager) DropOperation(op Operation) { // OperationStart drops an operation
func (opMgr *operationMetricsManager) DropOperation(op OperationKey) {
opMgr.cache.Delete(op) opMgr.cache.Delete(op)
} }
func (opMgr *operationMetricsManager) RecordMetrics(op Operation, status OperationStatus) { // RecordMetrics emits operation metrics
obj, exists := opMgr.cache.Load(op) func (opMgr *operationMetricsManager) RecordMetrics(opKey OperationKey, opStatus OperationStatus, driverName string) {
obj, exists := opMgr.cache.Load(opKey)
if !exists { if !exists {
// the operation has not been cached, return directly // the operation has not been cached, return directly
return return
} }
ts, ok := obj.(time.Time) opVal, ok := obj.(OperationValue)
if !ok { if !ok {
// the cached item is not a time.Time, should NEVER happen, clean and return // the cached item is not a OperationValue, should NEVER happen, clean and return
klog.Errorf("Invalid cache entry for key %v", op) klog.Errorf("Invalid cache entry for key %v", opKey)
opMgr.cache.Delete(op) opMgr.cache.Delete(opKey)
return return
} }
strStatus := opStatusUnknown status := string(SnapshotStatusTypeUnknown)
if status != nil { if opStatus != nil {
strStatus = status.String() status = opStatus.String()
} }
duration := time.Since(ts).Seconds()
opMgr.opLatencyMetrics.WithLabelValues(op.Driver, op.Name, strStatus).Observe(duration) // if we do not know the driverName while recording metrics,
opMgr.cache.Delete(op) // refer to the cached version instead.
if driverName == "" || driverName == unknownDriverName {
driverName = opVal.Driver
}
operationDuration := time.Since(opVal.startTime).Seconds()
opMgr.opLatencyMetrics.WithLabelValues(driverName, opKey.Name, opVal.SnapshotType, status).Observe(operationDuration)
// Report cancel metrics if we are deleting an unfinished VolumeSnapshot
if opKey.Name == DeleteSnapshotOperationName {
// check if we have a CreateSnapshot operation pending for this
createKey := NewOperationKey(CreateSnapshotOperationName, opKey.ResourceID)
obj, exists := opMgr.cache.Load(createKey)
if exists {
// record a cancel metric if found
opMgr.recordCancelMetric(obj, createKey, operationDuration)
}
// check if we have a CreateSnapshotAndReady operation pending for this
createAndReadyKey := NewOperationKey(CreateSnapshotAndReadyOperationName, opKey.ResourceID)
obj, exists = opMgr.cache.Load(createAndReadyKey)
if exists {
// record a cancel metric if found
opMgr.recordCancelMetric(obj, createAndReadyKey, operationDuration)
}
}
opMgr.cache.Delete(opKey)
}
// recordCancelMetric records a metric for a create operation that hasn't finished
func (opMgr *operationMetricsManager) recordCancelMetric(obj interface{}, key OperationKey, duration float64) {
// record a cancel metric if found
val, ok := obj.(OperationValue)
if !ok {
klog.Errorf("Invalid cache entry for key %v", key)
opMgr.cache.Delete(key)
return
}
opMgr.opLatencyMetrics.WithLabelValues(
val.Driver,
key.Name,
val.SnapshotType,
string(SnapshotStatusTypeCancel),
).Observe(duration)
opMgr.cache.Delete(key)
} }
func (opMgr *operationMetricsManager) init() { func (opMgr *operationMetricsManager) init() {
@@ -140,11 +254,11 @@ func (opMgr *operationMetricsManager) init() {
opMgr.opLatencyMetrics = k8smetrics.NewHistogramVec( opMgr.opLatencyMetrics = k8smetrics.NewHistogramVec(
&k8smetrics.HistogramOpts{ &k8smetrics.HistogramOpts{
Subsystem: subSystem, Subsystem: subSystem,
Name: metricName, Name: operationLatencyMetricName,
Help: metricHelpMsg, Help: operationLatencyMetricHelpMsg,
Buckets: metricBuckets, Buckets: metricBuckets,
}, },
[]string{labelDriverName, labelOperationName, labelOperationStatus}, []string{labelDriverName, labelOperationName, labelSnapshotType, labelOperationStatus},
) )
opMgr.registry.MustRegister(opMgr.opLatencyMetrics) opMgr.registry.MustRegister(opMgr.opLatencyMetrics)
} }
@@ -175,3 +289,25 @@ func (opMgr *operationMetricsManager) StartMetricsEndpoint(pattern, addr string,
}() }()
return srv, nil return srv, nil
} }
// snapshotProvisionType represents which kind of snapshot a metric is
type snapshotProvisionType string
// snapshotStatusType represents the type of snapshot status to report
type snapshotStatusType string
// SnapshotOperationStatus represents the status for a snapshot controller operation
type SnapshotOperationStatus struct {
status snapshotStatusType
}
// NewSnapshotOperationStatus returns a new SnapshotOperationStatus
func NewSnapshotOperationStatus(status snapshotStatusType) SnapshotOperationStatus {
return SnapshotOperationStatus{
status: status,
}
}
func (sos SnapshotOperationStatus) String() string {
return string(sos.status)
}

View File

@@ -19,10 +19,12 @@ package metrics
import ( import (
"context" "context"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"log" "log"
"net/http" "net/http"
"reflect" "reflect"
"sort"
"strings" "strings"
"sync" "sync"
"testing" "testing"
@@ -86,9 +88,8 @@ func TestNew(t *testing.T) {
func TestDropNonExistingOperation(t *testing.T) { func TestDropNonExistingOperation(t *testing.T) {
mgr, wg, srv := initMgr() mgr, wg, srv := initMgr()
defer shutdown(srv, wg) defer shutdown(srv, wg)
op := Operation{ op := OperationKey{
Name: "drop-non-existing-operation-should-be-noop", Name: "drop-non-existing-operation-should-be-noop",
Driver: "driver",
ResourceID: types.UID("uid"), ResourceID: types.UID("uid"),
} }
mgr.DropOperation(op) mgr.DropOperation(op)
@@ -98,12 +99,11 @@ func TestRecordMetricsForNonExistingOperation(t *testing.T) {
mgr, wg, srv := initMgr() mgr, wg, srv := initMgr()
srvAddr := "http://" + srv.Addr + httpPattern srvAddr := "http://" + srv.Addr + httpPattern
defer shutdown(srv, wg) defer shutdown(srv, wg)
op := Operation{ opKey := OperationKey{
Name: "no-metrics-should-be-recorded-as-operation-did-not-start", Name: "no-metrics-should-be-recorded-as-operation-did-not-start",
Driver: "driver",
ResourceID: types.UID("uid"), ResourceID: types.UID("uid"),
} }
mgr.RecordMetrics(op, nil) mgr.RecordMetrics(opKey, nil, "driver")
rsp, err := http.Get(srvAddr) rsp, err := http.Get(srvAddr)
if err != nil || rsp.StatusCode != http.StatusOK { if err != nil || rsp.StatusCode != http.StatusOK {
t.Errorf("failed to get response from server %v, %v", err, rsp) t.Errorf("failed to get response from server %v, %v", err, rsp)
@@ -112,8 +112,8 @@ func TestRecordMetricsForNonExistingOperation(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("failed to read response body %v", err) t.Errorf("failed to read response body %v", err)
} }
if strings.Contains(string(r), op.Name) { if strings.Contains(string(r), opKey.Name) {
t.Errorf("found metric should have been dropped for operation [%s] [%s]", op.Name, string(r)) t.Errorf("found metric should have been dropped for operation [%s] [%s]", opKey.Name, string(r))
} }
} }
@@ -121,13 +121,13 @@ func TestDropOperation(t *testing.T) {
mgr, wg, srv := initMgr() mgr, wg, srv := initMgr()
srvAddr := "http://" + srv.Addr + httpPattern srvAddr := "http://" + srv.Addr + httpPattern
defer shutdown(srv, wg) defer shutdown(srv, wg)
op := Operation{ opKey := OperationKey{
Name: "should-have-been-dropped", Name: "should-have-been-dropped",
Driver: "driver",
ResourceID: types.UID("uid"), ResourceID: types.UID("uid"),
} }
mgr.OperationStart(op) opVal := NewOperationValue("driver", DynamicSnapshotType)
mgr.DropOperation(op) mgr.OperationStart(opKey, opVal)
mgr.DropOperation(opKey)
time.Sleep(300 * time.Millisecond) time.Sleep(300 * time.Millisecond)
rsp, err := http.Get(srvAddr) rsp, err := http.Get(srvAddr)
if err != nil || rsp.StatusCode != http.StatusOK { if err != nil || rsp.StatusCode != http.StatusOK {
@@ -137,36 +137,36 @@ func TestDropOperation(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("failed to read response body %v", err) t.Errorf("failed to read response body %v", err)
} }
if strings.Contains(string(r), op.Name) { if strings.Contains(string(r), opKey.Name) {
t.Errorf("found metric should have been dropped for operation [%s] [%s]", op.Name, string(r)) t.Errorf("found metric should have been dropped for operation [%s] [%s]", opKey.Name, string(r))
} }
// re-add with a different name // re-add with a different name
op.Name = "should-have-been-added" opKey.Name = "should-have-been-added"
mgr.OperationStart(op) mgr.OperationStart(opKey, opVal)
time.Sleep(300 * time.Millisecond) time.Sleep(300 * time.Millisecond)
opStatus := &fakeOpStatus{ opStatus := &fakeOpStatus{
statusCode: 0, statusCode: 0,
} }
mgr.RecordMetrics(op, opStatus) mgr.RecordMetrics(opKey, opStatus, "driver")
expected := expected :=
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end `# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
# TYPE snapshot_controller_operation_total_seconds histogram # TYPE snapshot_controller_operation_total_seconds histogram
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="0.25"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="0.25"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="should-have-been-added",operation_status="Success"} 0.3 snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type=""} 0.3
snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="should-have-been-added",operation_status="Success"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type=""} 1
` `
if err := verifyMetric(expected, srvAddr); err != nil { if err := verifyMetric(expected, srvAddr); err != nil {
@@ -178,34 +178,33 @@ func TestUnknownStatus(t *testing.T) {
mgr, wg, srv := initMgr() mgr, wg, srv := initMgr()
srvAddr := "http://" + srv.Addr + httpPattern srvAddr := "http://" + srv.Addr + httpPattern
defer shutdown(srv, wg) defer shutdown(srv, wg)
op := Operation{ opKey := OperationKey{
Name: "unknown-status-operation", Name: "unknown-status-operation",
Driver: "driver",
ResourceID: types.UID("uid"), ResourceID: types.UID("uid"),
} }
mgr.OperationStart(op) mgr.OperationStart(opKey, NewOperationValue("driver", DynamicSnapshotType))
// should create a Unknown data point with latency ~300ms // should create a Unknown data point with latency ~300ms
time.Sleep(300 * time.Millisecond) time.Sleep(300 * time.Millisecond)
mgr.RecordMetrics(op, nil) mgr.RecordMetrics(opKey, nil, "driver")
expected := expected :=
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end `# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
# TYPE snapshot_controller_operation_total_seconds histogram # TYPE snapshot_controller_operation_total_seconds histogram
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="0.25"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="0.25"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown"} 0.3 snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type=""} 0.3
snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type=""} 1
` `
if err := verifyMetric(expected, srvAddr); err != nil { if err := verifyMetric(expected, srvAddr); err != nil {
@@ -218,66 +217,65 @@ func TestRecordMetrics(t *testing.T) {
srvAddr := "http://" + srv.Addr + httpPattern srvAddr := "http://" + srv.Addr + httpPattern
defer shutdown(srv, wg) defer shutdown(srv, wg)
// add an operation // add an operation
op := Operation{ opKey := OperationKey{
Name: "op1", Name: "op1",
Driver: "driver1",
ResourceID: types.UID("uid1"), ResourceID: types.UID("uid1"),
} }
mgr.OperationStart(op) opVal := NewOperationValue("driver", DynamicSnapshotType)
mgr.OperationStart(opKey, opVal)
// should create a Success data point with latency ~ 1100ms // should create a Success data point with latency ~ 1100ms
time.Sleep(1100 * time.Millisecond) time.Sleep(1100 * time.Millisecond)
success := &fakeOpStatus{ success := &fakeOpStatus{
statusCode: 0, statusCode: 0,
} }
mgr.RecordMetrics(op, success) mgr.RecordMetrics(opKey, success, "driver")
// add another operation metric // add another operation metric
op.Name = "op2" opKey.Name = "op2"
op.Driver = "driver2" opKey.ResourceID = types.UID("uid2")
op.ResourceID = types.UID("uid2") mgr.OperationStart(opKey, opVal)
mgr.OperationStart(op)
// should create a Failure data point with latency ~ 100ms // should create a Failure data point with latency ~ 100ms
time.Sleep(100 * time.Millisecond) time.Sleep(100 * time.Millisecond)
failure := &fakeOpStatus{ failure := &fakeOpStatus{
statusCode: 1, statusCode: 1,
} }
mgr.RecordMetrics(op, failure) mgr.RecordMetrics(opKey, failure, "driver")
expected := expected :=
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end `# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
# TYPE snapshot_controller_operation_total_seconds histogram # TYPE snapshot_controller_operation_total_seconds histogram
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="0.25"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="0.25"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="0.5"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="0.5"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="op1",operation_status="Success"} 1.1 snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type=""} 1.1
snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="op1",operation_status="Success"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type=""} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="0.25"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="0.25"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="op2",operation_status="Failure"} 0.1 snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type=""} 0.1
snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="op2",operation_status="Failure"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type=""} 1
` `
if err := verifyMetric(expected, srvAddr); err != nil { if err := verifyMetric(expected, srvAddr); err != nil {
t.Errorf("failed testing [%v]", err) t.Errorf("failed testing [%v]", err)
@@ -295,15 +293,14 @@ func TestConcurrency(t *testing.T) {
statusCode: 1, statusCode: 1,
} }
ops := []struct { ops := []struct {
op Operation op OperationKey
desiredLatencyMs time.Duration desiredLatencyMs time.Duration
status OperationStatus status OperationStatus
drop bool drop bool
}{ }{
{ {
Operation{ OperationKey{
Name: "success1", Name: "success1",
Driver: "driver1",
ResourceID: types.UID("uid1"), ResourceID: types.UID("uid1"),
}, },
100, 100,
@@ -311,9 +308,8 @@ func TestConcurrency(t *testing.T) {
false, false,
}, },
{ {
Operation{ OperationKey{
Name: "success2", Name: "success2",
Driver: "driver2",
ResourceID: types.UID("uid2"), ResourceID: types.UID("uid2"),
}, },
100, 100,
@@ -321,9 +317,8 @@ func TestConcurrency(t *testing.T) {
false, false,
}, },
{ {
Operation{ OperationKey{
Name: "failure1", Name: "failure1",
Driver: "driver3",
ResourceID: types.UID("uid3"), ResourceID: types.UID("uid3"),
}, },
100, 100,
@@ -331,9 +326,8 @@ func TestConcurrency(t *testing.T) {
false, false,
}, },
{ {
Operation{ OperationKey{
Name: "failure2", Name: "failure2",
Driver: "driver4",
ResourceID: types.UID("uid4"), ResourceID: types.UID("uid4"),
}, },
100, 100,
@@ -341,9 +335,8 @@ func TestConcurrency(t *testing.T) {
false, false,
}, },
{ {
Operation{ OperationKey{
Name: "unknown", Name: "unknown",
Driver: "driver5",
ResourceID: types.UID("uid5"), ResourceID: types.UID("uid5"),
}, },
100, 100,
@@ -351,9 +344,8 @@ func TestConcurrency(t *testing.T) {
false, false,
}, },
{ {
Operation{ OperationKey{
Name: "drop-from-cache", Name: "drop-from-cache",
Driver: "driver6",
ResourceID: types.UID("uid6"), ResourceID: types.UID("uid6"),
}, },
100, 100,
@@ -363,15 +355,20 @@ func TestConcurrency(t *testing.T) {
} }
for i := range ops { for i := range ops {
mgr.OperationStart(ops[i].op) mgr.OperationStart(ops[i].op, OperationValue{
Driver: fmt.Sprintf("driver%v", i),
SnapshotType: string(DynamicSnapshotType),
})
} }
// add an extra operation which should remain in the cache // add an extra operation which should remain in the cache
remaining := Operation{ remaining := OperationKey{
Name: "remaining-in-cache", Name: "remaining-in-cache",
Driver: "driver7",
ResourceID: types.UID("uid7"), ResourceID: types.UID("uid7"),
} }
mgr.OperationStart(remaining) mgr.OperationStart(remaining, OperationValue{
Driver: "driver7",
SnapshotType: string(DynamicSnapshotType),
})
var wgMetrics sync.WaitGroup var wgMetrics sync.WaitGroup
@@ -385,7 +382,7 @@ func TestConcurrency(t *testing.T) {
if ops[i].drop { if ops[i].drop {
mgr.DropOperation(ops[i].op) mgr.DropOperation(ops[i].op)
} else { } else {
mgr.RecordMetrics(ops[i].op, ops[i].status) mgr.RecordMetrics(ops[i].op, ops[i].status, fmt.Sprintf("driver%v", i))
} }
}(i) }(i)
} }
@@ -395,86 +392,86 @@ func TestConcurrency(t *testing.T) {
expected := expected :=
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end `# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
# TYPE snapshot_controller_operation_total_seconds histogram # TYPE snapshot_controller_operation_total_seconds histogram
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="0.25"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="0.25"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="success1",operation_status="Success"} 0.1 snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type=""} 0.1
snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="success1",operation_status="Success"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type=""} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="0.25"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="0.25"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="success2",operation_status="Success"} 0.1 snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type=""} 0.1
snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="success2",operation_status="Success"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type=""} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="0.25"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="0.25"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver3",operation_name="failure1",operation_status="Failure"} 0.1 snapshot_controller_operation_total_seconds_sum{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type=""} 0.1
snapshot_controller_operation_total_seconds_count{driver_name="driver3",operation_name="failure1",operation_status="Failure"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type=""} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="0.25"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="0.25"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver4",operation_name="failure2",operation_status="Failure"} 0.1 snapshot_controller_operation_total_seconds_sum{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type=""} 0.1
snapshot_controller_operation_total_seconds_count{driver_name="driver4",operation_name="failure2",operation_status="Failure"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type=""} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="0.1"} 0 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="0.1"} 0
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="0.25"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="0.25"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="0.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="0.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="1"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="1"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="2.5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="2.5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="5"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="5"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="10"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="10"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="15"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="15"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="30"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="30"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="60"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="60"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="120"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="120"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="300"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="300"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="600"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="600"} 1
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="+Inf"} 1 snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="+Inf"} 1
snapshot_controller_operation_total_seconds_sum{driver_name="driver5",operation_name="unknown",operation_status="Unknown"} 0.1 snapshot_controller_operation_total_seconds_sum{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type=""} 0.1
snapshot_controller_operation_total_seconds_count{driver_name="driver5",operation_name="unknown",operation_status="Unknown"} 1 snapshot_controller_operation_total_seconds_count{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type=""} 1
` `
if err := verifyMetric(expected, srvAddr); err != nil { if err := verifyMetric(expected, srvAddr); err != nil {
t.Errorf("failed testing [%v]", err) t.Errorf("failed testing [%v]", err)
@@ -494,63 +491,141 @@ func verifyMetric(expected, srvAddr string) error {
return err return err
} }
format := expfmt.ResponseFormat(rsp.Header) format := expfmt.ResponseFormat(rsp.Header)
reader := strings.NewReader(string(r)) gotReader := strings.NewReader(string(r))
decoder := expfmt.NewDecoder(reader, format) gotDecoder := expfmt.NewDecoder(gotReader, format)
mf := &cmg.MetricFamily{} expectedReader := strings.NewReader(expected)
if err := decoder.Decode(mf); err != nil { expectedDecoder := expfmt.NewDecoder(expectedReader, format)
return err
} gotMfs := []*cmg.MetricFamily{}
reader = strings.NewReader(expected) expectedMfs := []*cmg.MetricFamily{}
decoder = expfmt.NewDecoder(reader, format) for {
gotMf := &cmg.MetricFamily{}
gotErr := gotDecoder.Decode(gotMf)
expectedMf := &cmg.MetricFamily{} expectedMf := &cmg.MetricFamily{}
if err := decoder.Decode(expectedMf); err != nil { if expectedErr := expectedDecoder.Decode(expectedMf); expectedErr != nil {
// return correctly if both are EOF
if expectedErr == io.EOF && gotErr == io.EOF {
break
} else {
return err return err
} }
if !containsMetrics(expectedMf, mf) { }
gotMfs = append(gotMfs, gotMf)
expectedMfs = append(expectedMfs, expectedMf)
}
if !containsMetrics(expectedMfs, gotMfs) {
return fmt.Errorf("failed testing, expected\n%s\n, got\n%s\n", expected, string(r)) return fmt.Errorf("failed testing, expected\n%s\n, got\n%s\n", expected, string(r))
} }
return nil return nil
} }
func containsMetrics(expected, got *cmg.MetricFamily) bool { // sortMfs, sorts metric families in alphabetical order by type.
// currently only supports counter and histogram
func sortMfs(mfs []*cmg.MetricFamily) []*cmg.MetricFamily {
var sortedMfs []*cmg.MetricFamily
// Sort first by type
sort.Slice(mfs, func(i, j int) bool {
return *mfs[i].Type < *mfs[j].Type
})
// Next, sort by length of name
sort.Slice(mfs, func(i, j int) bool {
return len(*mfs[i].Name) < len(*mfs[j].Name)
})
return sortedMfs
}
func containsMetrics(expectedMfs, gotMfs []*cmg.MetricFamily) bool {
if len(gotMfs) != len(expectedMfs) {
fmt.Printf("Not same length: expected and got metrics families: %v vs. %v\n", len(expectedMfs), len(gotMfs))
return false
}
// sort metric families for deterministic comparison.
sortedExpectedMfs := sortMfs(expectedMfs)
sortedGotMfs := sortMfs(gotMfs)
// compare expected vs. sorted actual metrics
for k, got := range sortedGotMfs {
matchCount := 0
expected := sortedExpectedMfs[k]
if (got.Name == nil || *(got.Name) != *(expected.Name)) || if (got.Name == nil || *(got.Name) != *(expected.Name)) ||
(got.Type == nil || *(got.Type) != *(expected.Type)) || (got.Type == nil || *(got.Type) != *(expected.Type)) ||
(got.Help == nil || *(got.Help) != *(expected.Help)) { (got.Help == nil || *(got.Help) != *(expected.Help)) {
fmt.Printf("invalid header info: got: %v, expected: %v\n", *got.Name, *expected.Name)
fmt.Printf("invalid header info: got: %v, expected: %v\n", *got.Type, *expected.Type)
fmt.Printf("invalid header info: got: %v, expected: %v\n", *got.Help, *expected.Help)
return false return false
} }
numRecords := len(expected.Metric) numRecords := len(expected.Metric)
if len(got.Metric) < numRecords { if len(got.Metric) < numRecords {
fmt.Printf("Not the same number of records: got.Metric: %v, numRecords: %v\n", len(got.Metric), numRecords)
return false return false
} }
matchCount := 0
for i := 0; i < len(got.Metric); i++ { for i := 0; i < len(got.Metric); i++ {
for j := 0; j < numRecords; j++ { for j := 0; j < numRecords; j++ {
if got.Metric[i].Histogram == nil && expected.Metric[j].Histogram != nil ||
got.Metric[i].Histogram != nil && expected.Metric[j].Histogram == nil {
fmt.Printf("got metric and expected metric histogram type mismatch")
return false
}
// labels should be the same // labels should be the same
if !reflect.DeepEqual(got.Metric[i].Label, expected.Metric[j].Label) { if !reflect.DeepEqual(got.Metric[i].Label, expected.Metric[j].Label) {
continue continue
} }
// metric type specific checks
switch {
case got.Metric[i].Histogram != nil && expected.Metric[j].Histogram != nil:
gh := got.Metric[i].Histogram gh := got.Metric[i].Histogram
eh := expected.Metric[j].Histogram eh := expected.Metric[j].Histogram
if gh == nil { if gh == nil || eh == nil {
continue continue
} }
if !reflect.DeepEqual(gh.Bucket, eh.Bucket) { if !reflect.DeepEqual(gh.Bucket, eh.Bucket) {
fmt.Println("got and expected histogram bucket not equal")
continue continue
} }
// this is a sum record, expecting a latency which is more than the // this is a sum record, expecting a latency which is more than the
// expected one. If the sum is smaller than expected, it will be considered // expected one. If the sum is smaller than expected, it will be considered
// as NOT a match // as NOT a match
if gh.SampleSum == nil || *(gh.SampleSum) < *(eh.SampleSum) { if gh.SampleSum == nil || eh.SampleSum == nil || *(gh.SampleSum) < *(eh.SampleSum) {
fmt.Println("difference in sample sum")
continue continue
} }
if gh.SampleCount == nil || *(gh.SampleCount) != *(eh.SampleCount) { if gh.SampleCount == nil || eh.SampleCount == nil || *(gh.SampleCount) != *(eh.SampleCount) {
fmt.Println("difference in sample count")
continue continue
} }
case got.Metric[i].Counter != nil && expected.Metric[j].Counter != nil:
gc := got.Metric[i].Counter
ec := expected.Metric[j].Counter
if gc.Value == nil || *(gc.Value) != *(ec.Value) {
fmt.Println("difference in counter values")
continue
}
}
// this is a match // this is a match
matchCount = matchCount + 1 matchCount = matchCount + 1
break break
} }
} }
return matchCount == numRecords
if matchCount != numRecords {
fmt.Printf("matchCount %v, numRecords %v\n", matchCount, numRecords)
return false
}
}
return true
} }

View File

@@ -519,3 +519,8 @@ func IsSnapshotReady(snapshot *crdv1.VolumeSnapshot) bool {
} }
return true return true
} }
// IsSnapshotCreated indicates that the snapshot has been cut on a storage system
func IsSnapshotCreated(snapshot *crdv1.VolumeSnapshot) bool {
return snapshot.Status != nil && snapshot.Status.CreationTime != nil
}