Add snapshot controller metrics
Signed-off-by: Grant Griffiths <grant@portworx.com>
This commit is contained in:
@@ -22,6 +22,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
@@ -32,6 +33,7 @@ import (
|
|||||||
|
|
||||||
"github.com/kubernetes-csi/csi-lib-utils/leaderelection"
|
"github.com/kubernetes-csi/csi-lib-utils/leaderelection"
|
||||||
controller "github.com/kubernetes-csi/external-snapshotter/v3/pkg/common-controller"
|
controller "github.com/kubernetes-csi/external-snapshotter/v3/pkg/common-controller"
|
||||||
|
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
|
||||||
|
|
||||||
clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
|
clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
|
||||||
snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme"
|
snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme"
|
||||||
@@ -48,6 +50,9 @@ var (
|
|||||||
|
|
||||||
leaderElection = flag.Bool("leader-election", false, "Enables leader election.")
|
leaderElection = flag.Bool("leader-election", false, "Enables leader election.")
|
||||||
leaderElectionNamespace = flag.String("leader-election-namespace", "", "The namespace where the leader election resource exists. Defaults to the pod namespace if not set.")
|
leaderElectionNamespace = flag.String("leader-election-namespace", "", "The namespace where the leader election resource exists. Defaults to the pod namespace if not set.")
|
||||||
|
|
||||||
|
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
|
||||||
|
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -87,6 +92,28 @@ func main() {
|
|||||||
factory := informers.NewSharedInformerFactory(snapClient, *resyncPeriod)
|
factory := informers.NewSharedInformerFactory(snapClient, *resyncPeriod)
|
||||||
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, *resyncPeriod)
|
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, *resyncPeriod)
|
||||||
|
|
||||||
|
// Create and register metrics manager
|
||||||
|
metricsManager := metrics.NewMetricsManager()
|
||||||
|
wg := &sync.WaitGroup{}
|
||||||
|
wg.Add(1)
|
||||||
|
if *httpEndpoint != "" {
|
||||||
|
srv, err := metricsManager.StartMetricsEndpoint(*metricsPath, *httpEndpoint, promklog{}, wg)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("Failed to start metrics server: %s", err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
err := srv.Shutdown(context.Background())
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("Failed to shutdown metrics server: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
klog.Infof("Metrics server successfully shutdown")
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
klog.Infof("Metrics server successfully started on %s, %s", *httpEndpoint, *metricsPath)
|
||||||
|
}
|
||||||
|
|
||||||
// Add Snapshot types to the default Kubernetes so events can be logged for them
|
// Add Snapshot types to the default Kubernetes so events can be logged for them
|
||||||
snapshotscheme.AddToScheme(scheme.Scheme)
|
snapshotscheme.AddToScheme(scheme.Scheme)
|
||||||
|
|
||||||
@@ -99,6 +126,7 @@ func main() {
|
|||||||
factory.Snapshot().V1beta1().VolumeSnapshotContents(),
|
factory.Snapshot().V1beta1().VolumeSnapshotContents(),
|
||||||
factory.Snapshot().V1beta1().VolumeSnapshotClasses(),
|
factory.Snapshot().V1beta1().VolumeSnapshotClasses(),
|
||||||
coreFactory.Core().V1().PersistentVolumeClaims(),
|
coreFactory.Core().V1().PersistentVolumeClaims(),
|
||||||
|
metricsManager,
|
||||||
*resyncPeriod,
|
*resyncPeriod,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -142,3 +170,9 @@ func buildConfig(kubeconfig string) (*rest.Config, error) {
|
|||||||
}
|
}
|
||||||
return rest.InClusterConfig()
|
return rest.InClusterConfig()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type promklog struct{}
|
||||||
|
|
||||||
|
func (pl promklog) Println(v ...interface{}) {
|
||||||
|
klog.Error(v...)
|
||||||
|
}
|
||||||
|
@@ -34,6 +34,7 @@ import (
|
|||||||
snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme"
|
snapshotscheme "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned/scheme"
|
||||||
informers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions"
|
informers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions"
|
||||||
storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1"
|
storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1"
|
||||||
|
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
|
||||||
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
|
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
@@ -734,6 +735,10 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte
|
|||||||
}
|
}
|
||||||
|
|
||||||
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, utils.NoResyncPeriodFunc())
|
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, utils.NoResyncPeriodFunc())
|
||||||
|
metricsManager := metrics.NewMetricsManager()
|
||||||
|
wg := &sync.WaitGroup{}
|
||||||
|
wg.Add(1)
|
||||||
|
metricsManager.StartMetricsEndpoint("/metrics", "localhost:0", nil, wg)
|
||||||
|
|
||||||
ctrl := NewCSISnapshotCommonController(
|
ctrl := NewCSISnapshotCommonController(
|
||||||
clientset,
|
clientset,
|
||||||
@@ -742,6 +747,7 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte
|
|||||||
informerFactory.Snapshot().V1beta1().VolumeSnapshotContents(),
|
informerFactory.Snapshot().V1beta1().VolumeSnapshotContents(),
|
||||||
informerFactory.Snapshot().V1beta1().VolumeSnapshotClasses(),
|
informerFactory.Snapshot().V1beta1().VolumeSnapshotClasses(),
|
||||||
coreFactory.Core().V1().PersistentVolumeClaims(),
|
coreFactory.Core().V1().PersistentVolumeClaims(),
|
||||||
|
metricsManager,
|
||||||
60*time.Second,
|
60*time.Second,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -32,6 +32,7 @@ import (
|
|||||||
klog "k8s.io/klog/v2"
|
klog "k8s.io/klog/v2"
|
||||||
|
|
||||||
crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v3/apis/volumesnapshot/v1beta1"
|
crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v3/apis/volumesnapshot/v1beta1"
|
||||||
|
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
|
||||||
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
|
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -235,6 +236,20 @@ func (ctrl *csiSnapshotCommonController) syncSnapshot(snapshot *crdv1.VolumeSnap
|
|||||||
// 2. Call checkandRemoveSnapshotFinalizersAndCheckandDeleteContent() with information obtained from step 1. This function name is very long but the name suggests what it does. It determines whether to remove finalizers on snapshot and whether to delete content.
|
// 2. Call checkandRemoveSnapshotFinalizersAndCheckandDeleteContent() with information obtained from step 1. This function name is very long but the name suggests what it does. It determines whether to remove finalizers on snapshot and whether to delete content.
|
||||||
func (ctrl *csiSnapshotCommonController) processSnapshotWithDeletionTimestamp(snapshot *crdv1.VolumeSnapshot) error {
|
func (ctrl *csiSnapshotCommonController) processSnapshotWithDeletionTimestamp(snapshot *crdv1.VolumeSnapshot) error {
|
||||||
klog.V(5).Infof("processSnapshotWithDeletionTimestamp VolumeSnapshot[%s]: %s", utils.SnapshotKey(snapshot), utils.GetSnapshotStatusForLogging(snapshot))
|
klog.V(5).Infof("processSnapshotWithDeletionTimestamp VolumeSnapshot[%s]: %s", utils.SnapshotKey(snapshot), utils.GetSnapshotStatusForLogging(snapshot))
|
||||||
|
driverName, err := ctrl.getSnapshotDriverName(snapshot)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed to getSnapshotDriverName while recording metrics for snapshot %q: %v", utils.SnapshotKey(snapshot), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshotProvisionType := metrics.DynamicSnapshotType
|
||||||
|
if snapshot.Spec.Source.VolumeSnapshotContentName != nil {
|
||||||
|
snapshotProvisionType = metrics.PreProvisionedSnapshotType
|
||||||
|
}
|
||||||
|
|
||||||
|
// Processing delete, start operation metric
|
||||||
|
deleteOperationKey := metrics.NewOperationKey(metrics.DeleteSnapshotOperationName, snapshot.UID)
|
||||||
|
deleteOperationValue := metrics.NewOperationValue(driverName, snapshotProvisionType)
|
||||||
|
ctrl.metricsManager.OperationStart(deleteOperationKey, deleteOperationValue)
|
||||||
|
|
||||||
var contentName string
|
var contentName string
|
||||||
if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil {
|
if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil {
|
||||||
@@ -269,6 +284,7 @@ func (ctrl *csiSnapshotCommonController) processSnapshotWithDeletionTimestamp(sn
|
|||||||
}
|
}
|
||||||
|
|
||||||
klog.V(5).Infof("processSnapshotWithDeletionTimestamp[%s]: delete snapshot content and remove finalizer from snapshot if needed", utils.SnapshotKey(snapshot))
|
klog.V(5).Infof("processSnapshotWithDeletionTimestamp[%s]: delete snapshot content and remove finalizer from snapshot if needed", utils.SnapshotKey(snapshot))
|
||||||
|
|
||||||
return ctrl.checkandRemoveSnapshotFinalizersAndCheckandDeleteContent(snapshot, content, deleteContent)
|
return ctrl.checkandRemoveSnapshotFinalizersAndCheckandDeleteContent(snapshot, content, deleteContent)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -388,6 +404,7 @@ func (ctrl *csiSnapshotCommonController) syncReadySnapshot(snapshot *crdv1.Volum
|
|||||||
// snapshot is bound but content is not pointing to the snapshot
|
// snapshot is bound but content is not pointing to the snapshot
|
||||||
return ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotMisbound", "VolumeSnapshotContent is not bound to the VolumeSnapshot correctly")
|
return ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotMisbound", "VolumeSnapshotContent is not bound to the VolumeSnapshot correctly")
|
||||||
}
|
}
|
||||||
|
|
||||||
// everything is verified, return
|
// everything is verified, return
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -396,6 +413,28 @@ func (ctrl *csiSnapshotCommonController) syncReadySnapshot(snapshot *crdv1.Volum
|
|||||||
func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.VolumeSnapshot) error {
|
func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.VolumeSnapshot) error {
|
||||||
uniqueSnapshotName := utils.SnapshotKey(snapshot)
|
uniqueSnapshotName := utils.SnapshotKey(snapshot)
|
||||||
klog.V(5).Infof("syncUnreadySnapshot %s", uniqueSnapshotName)
|
klog.V(5).Infof("syncUnreadySnapshot %s", uniqueSnapshotName)
|
||||||
|
driverName, err := ctrl.getSnapshotDriverName(snapshot)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed to getSnapshotDriverName while recording metrics for snapshot %q: %s", utils.SnapshotKey(snapshot), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshotProvisionType := metrics.DynamicSnapshotType
|
||||||
|
if snapshot.Spec.Source.VolumeSnapshotContentName != nil {
|
||||||
|
snapshotProvisionType = metrics.PreProvisionedSnapshotType
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start metrics operations
|
||||||
|
if !utils.IsSnapshotCreated(snapshot) {
|
||||||
|
// Only start CreateSnapshot operation if the snapshot has not been cut
|
||||||
|
ctrl.metricsManager.OperationStart(
|
||||||
|
metrics.NewOperationKey(metrics.CreateSnapshotOperationName, snapshot.UID),
|
||||||
|
metrics.NewOperationValue(driverName, snapshotProvisionType),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
ctrl.metricsManager.OperationStart(
|
||||||
|
metrics.NewOperationKey(metrics.CreateSnapshotAndReadyOperationName, snapshot.UID),
|
||||||
|
metrics.NewOperationValue(driverName, snapshotProvisionType),
|
||||||
|
)
|
||||||
|
|
||||||
// Pre-provisioned snapshot
|
// Pre-provisioned snapshot
|
||||||
if snapshot.Spec.Source.VolumeSnapshotContentName != nil {
|
if snapshot.Spec.Source.VolumeSnapshotContentName != nil {
|
||||||
@@ -403,13 +442,16 @@ func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.Vol
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// if no content found yet, update status and return
|
// if no content found yet, update status and return
|
||||||
if content == nil {
|
if content == nil {
|
||||||
// can not find the desired VolumeSnapshotContent from cache store
|
// can not find the desired VolumeSnapshotContent from cache store
|
||||||
ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotContentMissing", "VolumeSnapshotContent is missing")
|
ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotContentMissing", "VolumeSnapshotContent is missing")
|
||||||
klog.V(4).Infof("syncUnreadySnapshot[%s]: snapshot content %q requested but not found, will try again", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName)
|
klog.V(4).Infof("syncUnreadySnapshot[%s]: snapshot content %q requested but not found, will try again", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName)
|
||||||
|
|
||||||
return fmt.Errorf("snapshot %s requests an non-existing content %s", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName)
|
return fmt.Errorf("snapshot %s requests an non-existing content %s", utils.SnapshotKey(snapshot), *snapshot.Spec.Source.VolumeSnapshotContentName)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set VolumeSnapshotRef UID
|
// Set VolumeSnapshotRef UID
|
||||||
newContent, err := ctrl.checkandBindSnapshotContent(snapshot, content)
|
newContent, err := ctrl.checkandBindSnapshotContent(snapshot, content)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -426,8 +468,10 @@ func (ctrl *csiSnapshotCommonController) syncUnreadySnapshot(snapshot *crdv1.Vol
|
|||||||
ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotStatusUpdateFailed", fmt.Sprintf("Snapshot status update failed, %v", err))
|
ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotStatusUpdateFailed", fmt.Sprintf("Snapshot status update failed, %v", err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// snapshot.Spec.Source.VolumeSnapshotContentName == nil - dynamically creating snapshot
|
// snapshot.Spec.Source.VolumeSnapshotContentName == nil - dynamically creating snapshot
|
||||||
klog.V(5).Infof("getDynamicallyProvisionedContentFromStore for snapshot %s", uniqueSnapshotName)
|
klog.V(5).Infof("getDynamicallyProvisionedContentFromStore for snapshot %s", uniqueSnapshotName)
|
||||||
contentObj, err := ctrl.getDynamicallyProvisionedContentFromStore(snapshot)
|
contentObj, err := ctrl.getDynamicallyProvisionedContentFromStore(snapshot)
|
||||||
@@ -1094,10 +1138,30 @@ func (ctrl *csiSnapshotCommonController) updateSnapshotStatus(snapshot *crdv1.Vo
|
|||||||
if updated {
|
if updated {
|
||||||
snapshotClone := snapshotObj.DeepCopy()
|
snapshotClone := snapshotObj.DeepCopy()
|
||||||
snapshotClone.Status = newStatus
|
snapshotClone.Status = newStatus
|
||||||
|
|
||||||
|
// We need to record metrics before updating the status due to a bug causing cache entries after a failed UpdateStatus call.
|
||||||
|
// Must meet the following criteria to emit a successful CreateSnapshot status
|
||||||
|
// 1. Previous status was nil OR Previous status had a nil CreationTime
|
||||||
|
// 2. New status must be non-nil with a non-nil CreationTime
|
||||||
|
driverName := content.Spec.Driver
|
||||||
|
createOperationKey := metrics.NewOperationKey(metrics.CreateSnapshotOperationName, snapshot.UID)
|
||||||
|
if !utils.IsSnapshotCreated(snapshotObj) && utils.IsSnapshotCreated(snapshotClone) {
|
||||||
|
ctrl.metricsManager.RecordMetrics(createOperationKey, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must meet the following criteria to emit a successful CreateSnapshotAndReady status
|
||||||
|
// 1. Previous status was nil OR Previous status had a nil ReadyToUse OR Previous status had a false ReadyToUse
|
||||||
|
// 2. New status must be non-nil with a ReadyToUse as true
|
||||||
|
if !utils.IsSnapshotReady(snapshotObj) && utils.IsSnapshotReady(snapshotClone) {
|
||||||
|
createAndReadyOperation := metrics.NewOperationKey(metrics.CreateSnapshotAndReadyOperationName, snapshot.UID)
|
||||||
|
ctrl.metricsManager.RecordMetrics(createAndReadyOperation, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName)
|
||||||
|
}
|
||||||
|
|
||||||
newSnapshotObj, err := ctrl.clientset.SnapshotV1beta1().VolumeSnapshots(snapshotClone.Namespace).UpdateStatus(context.TODO(), snapshotClone, metav1.UpdateOptions{})
|
newSnapshotObj, err := ctrl.clientset.SnapshotV1beta1().VolumeSnapshots(snapshotClone.Namespace).UpdateStatus(context.TODO(), snapshotClone, metav1.UpdateOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, newControllerUpdateError(utils.SnapshotKey(snapshot), err.Error())
|
return nil, newControllerUpdateError(utils.SnapshotKey(snapshot), err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
return newSnapshotObj, nil
|
return newSnapshotObj, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1177,6 +1241,45 @@ func (ctrl *csiSnapshotCommonController) getSnapshotClass(className string) (*cr
|
|||||||
return class, nil
|
return class, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getSnapshotDriverName is a helper function to get snapshot driver from the VolumeSnapshot.
|
||||||
|
// We try to get the driverName in multiple ways, as snapshot controller metrics depend on the correct driverName.
|
||||||
|
func (ctrl *csiSnapshotCommonController) getSnapshotDriverName(vs *crdv1.VolumeSnapshot) (string, error) {
|
||||||
|
klog.V(5).Infof("getSnapshotDriverName: VolumeSnapshot[%s]", vs.Name)
|
||||||
|
var driverName string
|
||||||
|
|
||||||
|
// Pre-Provisioned snapshots have contentName as source
|
||||||
|
var contentName string
|
||||||
|
if vs.Spec.Source.VolumeSnapshotContentName != nil {
|
||||||
|
contentName = *vs.Spec.Source.VolumeSnapshotContentName
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Driver name from SnapshotContent if we found a contentName
|
||||||
|
if contentName != "" {
|
||||||
|
content, err := ctrl.contentLister.Get(contentName)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("getSnapshotDriverName: failed to get snapshotContent: %v", contentName)
|
||||||
|
} else {
|
||||||
|
driverName = content.Spec.Driver
|
||||||
|
}
|
||||||
|
|
||||||
|
if driverName != "" {
|
||||||
|
return driverName, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dynamic snapshots will have a snapshotclass with a driver
|
||||||
|
if vs.Spec.VolumeSnapshotClassName != nil {
|
||||||
|
class, err := ctrl.getSnapshotClass(*vs.Spec.VolumeSnapshotClassName)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("getSnapshotDriverName: failed to get snapshotClass: %v", *vs.Spec.VolumeSnapshotClassName)
|
||||||
|
} else {
|
||||||
|
driverName = class.Driver
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return driverName, nil
|
||||||
|
}
|
||||||
|
|
||||||
// SetDefaultSnapshotClass is a helper function to figure out the default snapshot class.
|
// SetDefaultSnapshotClass is a helper function to figure out the default snapshot class.
|
||||||
// For pre-provisioned case, it's an no-op.
|
// For pre-provisioned case, it's an no-op.
|
||||||
// For dynamic provisioning, it gets the default SnapshotClasses in the system if there is any(could be multiple),
|
// For dynamic provisioning, it gets the default SnapshotClasses in the system if there is any(could be multiple),
|
||||||
|
@@ -24,6 +24,7 @@ import (
|
|||||||
clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
|
clientset "github.com/kubernetes-csi/external-snapshotter/client/v3/clientset/versioned"
|
||||||
storageinformers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions/volumesnapshot/v1beta1"
|
storageinformers "github.com/kubernetes-csi/external-snapshotter/client/v3/informers/externalversions/volumesnapshot/v1beta1"
|
||||||
storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1"
|
storagelisters "github.com/kubernetes-csi/external-snapshotter/client/v3/listers/volumesnapshot/v1beta1"
|
||||||
|
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/metrics"
|
||||||
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
|
"github.com/kubernetes-csi/external-snapshotter/v3/pkg/utils"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
@@ -60,6 +61,8 @@ type csiSnapshotCommonController struct {
|
|||||||
snapshotStore cache.Store
|
snapshotStore cache.Store
|
||||||
contentStore cache.Store
|
contentStore cache.Store
|
||||||
|
|
||||||
|
metricsManager metrics.MetricsManager
|
||||||
|
|
||||||
resyncPeriod time.Duration
|
resyncPeriod time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -71,6 +74,7 @@ func NewCSISnapshotCommonController(
|
|||||||
volumeSnapshotContentInformer storageinformers.VolumeSnapshotContentInformer,
|
volumeSnapshotContentInformer storageinformers.VolumeSnapshotContentInformer,
|
||||||
volumeSnapshotClassInformer storageinformers.VolumeSnapshotClassInformer,
|
volumeSnapshotClassInformer storageinformers.VolumeSnapshotClassInformer,
|
||||||
pvcInformer coreinformers.PersistentVolumeClaimInformer,
|
pvcInformer coreinformers.PersistentVolumeClaimInformer,
|
||||||
|
metricsManager metrics.MetricsManager,
|
||||||
resyncPeriod time.Duration,
|
resyncPeriod time.Duration,
|
||||||
) *csiSnapshotCommonController {
|
) *csiSnapshotCommonController {
|
||||||
broadcaster := record.NewBroadcaster()
|
broadcaster := record.NewBroadcaster()
|
||||||
@@ -88,6 +92,7 @@ func NewCSISnapshotCommonController(
|
|||||||
contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
|
contentStore: cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc),
|
||||||
snapshotQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-snapshot"),
|
snapshotQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-snapshot"),
|
||||||
contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-content"),
|
contentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "snapshot-controller-content"),
|
||||||
|
metricsManager: metricsManager,
|
||||||
}
|
}
|
||||||
|
|
||||||
ctrl.pvcLister = pvcInformer.Lister()
|
ctrl.pvcLister = pvcInformer.Lister()
|
||||||
@@ -363,6 +368,7 @@ func (ctrl *csiSnapshotCommonController) updateSnapshot(snapshot *crdv1.VolumeSn
|
|||||||
if !newSnapshot {
|
if !newSnapshot {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
err = ctrl.syncSnapshot(snapshot)
|
err = ctrl.syncSnapshot(snapshot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.IsConflict(err) {
|
if errors.IsConflict(err) {
|
||||||
@@ -407,6 +413,13 @@ func (ctrl *csiSnapshotCommonController) updateContent(content *crdv1.VolumeSnap
|
|||||||
func (ctrl *csiSnapshotCommonController) deleteSnapshot(snapshot *crdv1.VolumeSnapshot) {
|
func (ctrl *csiSnapshotCommonController) deleteSnapshot(snapshot *crdv1.VolumeSnapshot) {
|
||||||
_ = ctrl.snapshotStore.Delete(snapshot)
|
_ = ctrl.snapshotStore.Delete(snapshot)
|
||||||
klog.V(4).Infof("snapshot %q deleted", utils.SnapshotKey(snapshot))
|
klog.V(4).Infof("snapshot %q deleted", utils.SnapshotKey(snapshot))
|
||||||
|
driverName, err := ctrl.getSnapshotDriverName(snapshot)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed to getSnapshotDriverName while recording metrics for snapshot %q: %s", utils.SnapshotKey(snapshot), err)
|
||||||
|
} else {
|
||||||
|
deleteOperationKey := metrics.NewOperationKey(metrics.DeleteSnapshotOperationName, snapshot.UID)
|
||||||
|
ctrl.metricsManager.RecordMetrics(deleteOperationKey, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName)
|
||||||
|
}
|
||||||
|
|
||||||
snapshotContentName := ""
|
snapshotContentName := ""
|
||||||
if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil {
|
if snapshot.Status != nil && snapshot.Status.BoundVolumeSnapshotContentName != nil {
|
||||||
@@ -416,6 +429,7 @@ func (ctrl *csiSnapshotCommonController) deleteSnapshot(snapshot *crdv1.VolumeSn
|
|||||||
klog.V(5).Infof("deleteSnapshot[%q]: content not bound", utils.SnapshotKey(snapshot))
|
klog.V(5).Infof("deleteSnapshot[%q]: content not bound", utils.SnapshotKey(snapshot))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// sync the content when its snapshot is deleted. Explicitly sync'ing the
|
// sync the content when its snapshot is deleted. Explicitly sync'ing the
|
||||||
// content here in response to snapshot deletion prevents the content from
|
// content here in response to snapshot deletion prevents the content from
|
||||||
// waiting until the next sync period for its Release.
|
// waiting until the next sync period for its Release.
|
||||||
|
@@ -30,13 +30,48 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
opStatusUnknown = "Unknown"
|
|
||||||
labelDriverName = "driver_name"
|
labelDriverName = "driver_name"
|
||||||
labelOperationName = "operation_name"
|
labelOperationName = "operation_name"
|
||||||
labelOperationStatus = "operation_status"
|
labelOperationStatus = "operation_status"
|
||||||
|
labelSnapshotType = "snapshot_type"
|
||||||
subSystem = "snapshot_controller"
|
subSystem = "snapshot_controller"
|
||||||
metricName = "operation_total_seconds"
|
operationLatencyMetricName = "operation_total_seconds"
|
||||||
metricHelpMsg = "Total number of seconds spent by the controller on an operation from end to end"
|
operationLatencyMetricHelpMsg = "Total number of seconds spent by the controller on an operation"
|
||||||
|
unknownDriverName = "unknown"
|
||||||
|
|
||||||
|
// CreateSnapshotOperationName is the operation that tracks how long the controller takes to create a snapshot.
|
||||||
|
// Specifically, the operation metric is emitted based on the following timestamps:
|
||||||
|
// - Start_time: controller notices the first time that there is a new VolumeSnapshot CR to dynamically provision a snapshot
|
||||||
|
// - End_time: controller notices that the CR has a status with CreationTime field set to be non-nil
|
||||||
|
CreateSnapshotOperationName = "CreateSnapshot"
|
||||||
|
|
||||||
|
// CreateSnapshotAndReadyOperationName is the operation that tracks how long the controller takes to create a snapshot and for it to be ready.
|
||||||
|
// Specifically, the operation metric is emitted based on the following timestamps:
|
||||||
|
// - Start_time: controller notices the first time that there is a new VolumeSnapshot CR(both dynamic and pre-provisioned cases)
|
||||||
|
// - End_time: controller notices that the CR has a status with Ready field set to be true
|
||||||
|
CreateSnapshotAndReadyOperationName = "CreateSnapshotAndReady"
|
||||||
|
|
||||||
|
// DeleteSnapshotOperationName is the operation that tracks how long a snapshot deletion takes.
|
||||||
|
// Specifically, the operation metric is emitted based on the following timestamps:
|
||||||
|
// - Start_time: controller notices the first time that there is a deletion timestamp placed on the VolumeSnapshot CR and the CR is ready to be deleted. Note that if the CR is being used by a PVC for rehydration, the controller should *NOT* set the start_time.
|
||||||
|
// - End_time: controller removed all finalizers on the VolumeSnapshot CR such that the CR is ready to be removed in the API server.
|
||||||
|
DeleteSnapshotOperationName = "DeleteSnapshot"
|
||||||
|
|
||||||
|
// DynamicSnapshotType represents a snapshot that is being dynamically provisioned
|
||||||
|
DynamicSnapshotType = snapshotProvisionType("dynamic")
|
||||||
|
// PreProvisionedSnapshotType represents a snapshot that is pre-provisioned
|
||||||
|
PreProvisionedSnapshotType = snapshotProvisionType("pre-provisioned")
|
||||||
|
|
||||||
|
// SnapshotStatusTypeUnknown represents that the status is unknown
|
||||||
|
SnapshotStatusTypeUnknown snapshotStatusType = "unknown"
|
||||||
|
// Success and Cancel are statuses for operation time (operation_total_seconds) as seen by snapshot controller
|
||||||
|
// SnapshotStatusTypeSuccess represents that a CreateSnapshot, CreateSnapshotAndReady,
|
||||||
|
// or DeleteSnapshot has finished successfully.
|
||||||
|
// Individual reconciliations (reconciliation_total_seconds) also use this status.
|
||||||
|
SnapshotStatusTypeSuccess snapshotStatusType = "success"
|
||||||
|
// SnapshotStatusTypeCancel represents that a CreateSnapshot, CreateSnapshotAndReady,
|
||||||
|
// or DeleteSnapshot has been deleted before finishing.
|
||||||
|
SnapshotStatusTypeCancel snapshotStatusType = "cancel"
|
||||||
)
|
)
|
||||||
|
|
||||||
// OperationStatus is the interface type for representing an operation's execution
|
// OperationStatus is the interface type for representing an operation's execution
|
||||||
@@ -57,11 +92,11 @@ type MetricsManager interface {
|
|||||||
|
|
||||||
// OperationStart takes in an operation and caches its start time.
|
// OperationStart takes in an operation and caches its start time.
|
||||||
// if the operation already exists, it's an no-op.
|
// if the operation already exists, it's an no-op.
|
||||||
OperationStart(op Operation)
|
OperationStart(key OperationKey, val OperationValue)
|
||||||
|
|
||||||
// DropOperation removes an operation from cache.
|
// DropOperation removes an operation from cache.
|
||||||
// if the operation does not exist, it's an no-op.
|
// if the operation does not exist, it's an no-op.
|
||||||
DropOperation(op Operation)
|
DropOperation(op OperationKey)
|
||||||
|
|
||||||
// RecordMetrics records a metric point. Note that it will be an no-op if an
|
// RecordMetrics records a metric point. Note that it will be an no-op if an
|
||||||
// operation has NOT been marked "Started" previously via invoking "OperationStart".
|
// operation has NOT been marked "Started" previously via invoking "OperationStart".
|
||||||
@@ -69,20 +104,49 @@ type MetricsManager interface {
|
|||||||
// op - the operation which the metric is associated with.
|
// op - the operation which the metric is associated with.
|
||||||
// status - the operation status, if not specified, i.e., status == nil, an
|
// status - the operation status, if not specified, i.e., status == nil, an
|
||||||
// "Unknown" status of the passed-in operation is assumed.
|
// "Unknown" status of the passed-in operation is assumed.
|
||||||
RecordMetrics(op Operation, status OperationStatus)
|
RecordMetrics(op OperationKey, status OperationStatus, driverName string)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Operation is a structure which holds information to identify a snapshot
|
// OperationKey is a structure which holds information to
|
||||||
// related operation
|
// uniquely identify a snapshot related operation
|
||||||
type Operation struct {
|
type OperationKey struct {
|
||||||
// the name of the operation, for example: "CreateSnapshot", "DeleteSnapshot"
|
// Name is the name of the operation, for example: "CreateSnapshot", "DeleteSnapshot"
|
||||||
Name string
|
Name string
|
||||||
// the name of the driver which executes the operation
|
// ResourceID is the resource UID to which the operation has been executed against
|
||||||
Driver string
|
|
||||||
// the resource UID to which the operation has been executed against
|
|
||||||
ResourceID types.UID
|
ResourceID types.UID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OperationValue is a structure which holds operation metadata
|
||||||
|
type OperationValue struct {
|
||||||
|
// Driver is the driver name which executes the operation
|
||||||
|
Driver string
|
||||||
|
// SnapshotType represents the snapshot type, for example: "dynamic", "pre-provisioned"
|
||||||
|
SnapshotType string
|
||||||
|
|
||||||
|
// startTime is the time when the operation first started
|
||||||
|
startTime time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewOperationKey initializes a new OperationKey
|
||||||
|
func NewOperationKey(name string, snapshotUID types.UID) OperationKey {
|
||||||
|
return OperationKey{
|
||||||
|
Name: name,
|
||||||
|
ResourceID: snapshotUID,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewOperationValue initializes a new OperationValue
|
||||||
|
func NewOperationValue(driver string, snapshotType snapshotProvisionType) OperationValue {
|
||||||
|
if driver == "" {
|
||||||
|
driver = unknownDriverName
|
||||||
|
}
|
||||||
|
|
||||||
|
return OperationValue{
|
||||||
|
Driver: driver,
|
||||||
|
SnapshotType: string(snapshotType),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type operationMetricsManager struct {
|
type operationMetricsManager struct {
|
||||||
// cache is a concurrent-safe map which stores start timestamps for all
|
// cache is a concurrent-safe map which stores start timestamps for all
|
||||||
// ongoing operations.
|
// ongoing operations.
|
||||||
@@ -93,10 +157,11 @@ type operationMetricsManager struct {
|
|||||||
// registry is a wrapper around Prometheus Registry
|
// registry is a wrapper around Prometheus Registry
|
||||||
registry k8smetrics.KubeRegistry
|
registry k8smetrics.KubeRegistry
|
||||||
|
|
||||||
// opLatencyMetrics is a Histogram metrics
|
// opLatencyMetrics is a Histogram metrics for operation time per request
|
||||||
opLatencyMetrics *k8smetrics.HistogramVec
|
opLatencyMetrics *k8smetrics.HistogramVec
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewMetricsManager creates a new MetricsManager instance
|
||||||
func NewMetricsManager() MetricsManager {
|
func NewMetricsManager() MetricsManager {
|
||||||
mgr := &operationMetricsManager{
|
mgr := &operationMetricsManager{
|
||||||
cache: sync.Map{},
|
cache: sync.Map{},
|
||||||
@@ -105,34 +170,83 @@ func NewMetricsManager() MetricsManager {
|
|||||||
return mgr
|
return mgr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opMgr *operationMetricsManager) OperationStart(op Operation) {
|
// OperationStart starts a new operation
|
||||||
opMgr.cache.LoadOrStore(op, time.Now())
|
func (opMgr *operationMetricsManager) OperationStart(key OperationKey, val OperationValue) {
|
||||||
|
val.startTime = time.Now()
|
||||||
|
opMgr.cache.LoadOrStore(key, val)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opMgr *operationMetricsManager) DropOperation(op Operation) {
|
// OperationStart drops an operation
|
||||||
|
func (opMgr *operationMetricsManager) DropOperation(op OperationKey) {
|
||||||
opMgr.cache.Delete(op)
|
opMgr.cache.Delete(op)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opMgr *operationMetricsManager) RecordMetrics(op Operation, status OperationStatus) {
|
// RecordMetrics emits operation metrics
|
||||||
obj, exists := opMgr.cache.Load(op)
|
func (opMgr *operationMetricsManager) RecordMetrics(opKey OperationKey, opStatus OperationStatus, driverName string) {
|
||||||
|
obj, exists := opMgr.cache.Load(opKey)
|
||||||
if !exists {
|
if !exists {
|
||||||
// the operation has not been cached, return directly
|
// the operation has not been cached, return directly
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ts, ok := obj.(time.Time)
|
opVal, ok := obj.(OperationValue)
|
||||||
if !ok {
|
if !ok {
|
||||||
// the cached item is not a time.Time, should NEVER happen, clean and return
|
// the cached item is not a OperationValue, should NEVER happen, clean and return
|
||||||
klog.Errorf("Invalid cache entry for key %v", op)
|
klog.Errorf("Invalid cache entry for key %v", opKey)
|
||||||
opMgr.cache.Delete(op)
|
opMgr.cache.Delete(opKey)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
strStatus := opStatusUnknown
|
status := string(SnapshotStatusTypeUnknown)
|
||||||
if status != nil {
|
if opStatus != nil {
|
||||||
strStatus = status.String()
|
status = opStatus.String()
|
||||||
}
|
}
|
||||||
duration := time.Since(ts).Seconds()
|
|
||||||
opMgr.opLatencyMetrics.WithLabelValues(op.Driver, op.Name, strStatus).Observe(duration)
|
// if we do not know the driverName while recording metrics,
|
||||||
opMgr.cache.Delete(op)
|
// refer to the cached version instead.
|
||||||
|
if driverName == "" || driverName == unknownDriverName {
|
||||||
|
driverName = opVal.Driver
|
||||||
|
}
|
||||||
|
|
||||||
|
operationDuration := time.Since(opVal.startTime).Seconds()
|
||||||
|
opMgr.opLatencyMetrics.WithLabelValues(driverName, opKey.Name, opVal.SnapshotType, status).Observe(operationDuration)
|
||||||
|
|
||||||
|
// Report cancel metrics if we are deleting an unfinished VolumeSnapshot
|
||||||
|
if opKey.Name == DeleteSnapshotOperationName {
|
||||||
|
// check if we have a CreateSnapshot operation pending for this
|
||||||
|
createKey := NewOperationKey(CreateSnapshotOperationName, opKey.ResourceID)
|
||||||
|
obj, exists := opMgr.cache.Load(createKey)
|
||||||
|
if exists {
|
||||||
|
// record a cancel metric if found
|
||||||
|
opMgr.recordCancelMetric(obj, createKey, operationDuration)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if we have a CreateSnapshotAndReady operation pending for this
|
||||||
|
createAndReadyKey := NewOperationKey(CreateSnapshotAndReadyOperationName, opKey.ResourceID)
|
||||||
|
obj, exists = opMgr.cache.Load(createAndReadyKey)
|
||||||
|
if exists {
|
||||||
|
// record a cancel metric if found
|
||||||
|
opMgr.recordCancelMetric(obj, createAndReadyKey, operationDuration)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
opMgr.cache.Delete(opKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
// recordCancelMetric records a metric for a create operation that hasn't finished
|
||||||
|
func (opMgr *operationMetricsManager) recordCancelMetric(obj interface{}, key OperationKey, duration float64) {
|
||||||
|
// record a cancel metric if found
|
||||||
|
val, ok := obj.(OperationValue)
|
||||||
|
if !ok {
|
||||||
|
klog.Errorf("Invalid cache entry for key %v", key)
|
||||||
|
opMgr.cache.Delete(key)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
opMgr.opLatencyMetrics.WithLabelValues(
|
||||||
|
val.Driver,
|
||||||
|
key.Name,
|
||||||
|
val.SnapshotType,
|
||||||
|
string(SnapshotStatusTypeCancel),
|
||||||
|
).Observe(duration)
|
||||||
|
opMgr.cache.Delete(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opMgr *operationMetricsManager) init() {
|
func (opMgr *operationMetricsManager) init() {
|
||||||
@@ -140,11 +254,11 @@ func (opMgr *operationMetricsManager) init() {
|
|||||||
opMgr.opLatencyMetrics = k8smetrics.NewHistogramVec(
|
opMgr.opLatencyMetrics = k8smetrics.NewHistogramVec(
|
||||||
&k8smetrics.HistogramOpts{
|
&k8smetrics.HistogramOpts{
|
||||||
Subsystem: subSystem,
|
Subsystem: subSystem,
|
||||||
Name: metricName,
|
Name: operationLatencyMetricName,
|
||||||
Help: metricHelpMsg,
|
Help: operationLatencyMetricHelpMsg,
|
||||||
Buckets: metricBuckets,
|
Buckets: metricBuckets,
|
||||||
},
|
},
|
||||||
[]string{labelDriverName, labelOperationName, labelOperationStatus},
|
[]string{labelDriverName, labelOperationName, labelSnapshotType, labelOperationStatus},
|
||||||
)
|
)
|
||||||
opMgr.registry.MustRegister(opMgr.opLatencyMetrics)
|
opMgr.registry.MustRegister(opMgr.opLatencyMetrics)
|
||||||
}
|
}
|
||||||
@@ -175,3 +289,25 @@ func (opMgr *operationMetricsManager) StartMetricsEndpoint(pattern, addr string,
|
|||||||
}()
|
}()
|
||||||
return srv, nil
|
return srv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// snapshotProvisionType represents which kind of snapshot a metric is
|
||||||
|
type snapshotProvisionType string
|
||||||
|
|
||||||
|
// snapshotStatusType represents the type of snapshot status to report
|
||||||
|
type snapshotStatusType string
|
||||||
|
|
||||||
|
// SnapshotOperationStatus represents the status for a snapshot controller operation
|
||||||
|
type SnapshotOperationStatus struct {
|
||||||
|
status snapshotStatusType
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSnapshotOperationStatus returns a new SnapshotOperationStatus
|
||||||
|
func NewSnapshotOperationStatus(status snapshotStatusType) SnapshotOperationStatus {
|
||||||
|
return SnapshotOperationStatus{
|
||||||
|
status: status,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sos SnapshotOperationStatus) String() string {
|
||||||
|
return string(sos.status)
|
||||||
|
}
|
||||||
|
@@ -19,10 +19,12 @@ package metrics
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
@@ -86,9 +88,8 @@ func TestNew(t *testing.T) {
|
|||||||
func TestDropNonExistingOperation(t *testing.T) {
|
func TestDropNonExistingOperation(t *testing.T) {
|
||||||
mgr, wg, srv := initMgr()
|
mgr, wg, srv := initMgr()
|
||||||
defer shutdown(srv, wg)
|
defer shutdown(srv, wg)
|
||||||
op := Operation{
|
op := OperationKey{
|
||||||
Name: "drop-non-existing-operation-should-be-noop",
|
Name: "drop-non-existing-operation-should-be-noop",
|
||||||
Driver: "driver",
|
|
||||||
ResourceID: types.UID("uid"),
|
ResourceID: types.UID("uid"),
|
||||||
}
|
}
|
||||||
mgr.DropOperation(op)
|
mgr.DropOperation(op)
|
||||||
@@ -98,12 +99,11 @@ func TestRecordMetricsForNonExistingOperation(t *testing.T) {
|
|||||||
mgr, wg, srv := initMgr()
|
mgr, wg, srv := initMgr()
|
||||||
srvAddr := "http://" + srv.Addr + httpPattern
|
srvAddr := "http://" + srv.Addr + httpPattern
|
||||||
defer shutdown(srv, wg)
|
defer shutdown(srv, wg)
|
||||||
op := Operation{
|
opKey := OperationKey{
|
||||||
Name: "no-metrics-should-be-recorded-as-operation-did-not-start",
|
Name: "no-metrics-should-be-recorded-as-operation-did-not-start",
|
||||||
Driver: "driver",
|
|
||||||
ResourceID: types.UID("uid"),
|
ResourceID: types.UID("uid"),
|
||||||
}
|
}
|
||||||
mgr.RecordMetrics(op, nil)
|
mgr.RecordMetrics(opKey, nil, "driver")
|
||||||
rsp, err := http.Get(srvAddr)
|
rsp, err := http.Get(srvAddr)
|
||||||
if err != nil || rsp.StatusCode != http.StatusOK {
|
if err != nil || rsp.StatusCode != http.StatusOK {
|
||||||
t.Errorf("failed to get response from server %v, %v", err, rsp)
|
t.Errorf("failed to get response from server %v, %v", err, rsp)
|
||||||
@@ -112,8 +112,8 @@ func TestRecordMetricsForNonExistingOperation(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("failed to read response body %v", err)
|
t.Errorf("failed to read response body %v", err)
|
||||||
}
|
}
|
||||||
if strings.Contains(string(r), op.Name) {
|
if strings.Contains(string(r), opKey.Name) {
|
||||||
t.Errorf("found metric should have been dropped for operation [%s] [%s]", op.Name, string(r))
|
t.Errorf("found metric should have been dropped for operation [%s] [%s]", opKey.Name, string(r))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,13 +121,13 @@ func TestDropOperation(t *testing.T) {
|
|||||||
mgr, wg, srv := initMgr()
|
mgr, wg, srv := initMgr()
|
||||||
srvAddr := "http://" + srv.Addr + httpPattern
|
srvAddr := "http://" + srv.Addr + httpPattern
|
||||||
defer shutdown(srv, wg)
|
defer shutdown(srv, wg)
|
||||||
op := Operation{
|
opKey := OperationKey{
|
||||||
Name: "should-have-been-dropped",
|
Name: "should-have-been-dropped",
|
||||||
Driver: "driver",
|
|
||||||
ResourceID: types.UID("uid"),
|
ResourceID: types.UID("uid"),
|
||||||
}
|
}
|
||||||
mgr.OperationStart(op)
|
opVal := NewOperationValue("driver", DynamicSnapshotType)
|
||||||
mgr.DropOperation(op)
|
mgr.OperationStart(opKey, opVal)
|
||||||
|
mgr.DropOperation(opKey)
|
||||||
time.Sleep(300 * time.Millisecond)
|
time.Sleep(300 * time.Millisecond)
|
||||||
rsp, err := http.Get(srvAddr)
|
rsp, err := http.Get(srvAddr)
|
||||||
if err != nil || rsp.StatusCode != http.StatusOK {
|
if err != nil || rsp.StatusCode != http.StatusOK {
|
||||||
@@ -137,36 +137,36 @@ func TestDropOperation(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("failed to read response body %v", err)
|
t.Errorf("failed to read response body %v", err)
|
||||||
}
|
}
|
||||||
if strings.Contains(string(r), op.Name) {
|
if strings.Contains(string(r), opKey.Name) {
|
||||||
t.Errorf("found metric should have been dropped for operation [%s] [%s]", op.Name, string(r))
|
t.Errorf("found metric should have been dropped for operation [%s] [%s]", opKey.Name, string(r))
|
||||||
}
|
}
|
||||||
// re-add with a different name
|
// re-add with a different name
|
||||||
op.Name = "should-have-been-added"
|
opKey.Name = "should-have-been-added"
|
||||||
mgr.OperationStart(op)
|
mgr.OperationStart(opKey, opVal)
|
||||||
time.Sleep(300 * time.Millisecond)
|
time.Sleep(300 * time.Millisecond)
|
||||||
opStatus := &fakeOpStatus{
|
opStatus := &fakeOpStatus{
|
||||||
statusCode: 0,
|
statusCode: 0,
|
||||||
}
|
}
|
||||||
mgr.RecordMetrics(op, opStatus)
|
mgr.RecordMetrics(opKey, opStatus, "driver")
|
||||||
expected :=
|
expected :=
|
||||||
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
||||||
# TYPE snapshot_controller_operation_total_seconds histogram
|
# TYPE snapshot_controller_operation_total_seconds histogram
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="0.25"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="0.25"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="should-have-been-added",operation_status="Success"} 0.3
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type=""} 0.3
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="should-have-been-added",operation_status="Success"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="should-have-been-added",operation_status="Success",snapshot_type=""} 1
|
||||||
`
|
`
|
||||||
|
|
||||||
if err := verifyMetric(expected, srvAddr); err != nil {
|
if err := verifyMetric(expected, srvAddr); err != nil {
|
||||||
@@ -178,34 +178,33 @@ func TestUnknownStatus(t *testing.T) {
|
|||||||
mgr, wg, srv := initMgr()
|
mgr, wg, srv := initMgr()
|
||||||
srvAddr := "http://" + srv.Addr + httpPattern
|
srvAddr := "http://" + srv.Addr + httpPattern
|
||||||
defer shutdown(srv, wg)
|
defer shutdown(srv, wg)
|
||||||
op := Operation{
|
opKey := OperationKey{
|
||||||
Name: "unknown-status-operation",
|
Name: "unknown-status-operation",
|
||||||
Driver: "driver",
|
|
||||||
ResourceID: types.UID("uid"),
|
ResourceID: types.UID("uid"),
|
||||||
}
|
}
|
||||||
mgr.OperationStart(op)
|
mgr.OperationStart(opKey, NewOperationValue("driver", DynamicSnapshotType))
|
||||||
// should create a Unknown data point with latency ~300ms
|
// should create a Unknown data point with latency ~300ms
|
||||||
time.Sleep(300 * time.Millisecond)
|
time.Sleep(300 * time.Millisecond)
|
||||||
mgr.RecordMetrics(op, nil)
|
mgr.RecordMetrics(opKey, nil, "driver")
|
||||||
expected :=
|
expected :=
|
||||||
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
||||||
# TYPE snapshot_controller_operation_total_seconds histogram
|
# TYPE snapshot_controller_operation_total_seconds histogram
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="0.25"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="0.25"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown"} 0.3
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type=""} 0.3
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="unknown-status-operation",operation_status="Unknown",snapshot_type=""} 1
|
||||||
`
|
`
|
||||||
|
|
||||||
if err := verifyMetric(expected, srvAddr); err != nil {
|
if err := verifyMetric(expected, srvAddr); err != nil {
|
||||||
@@ -218,66 +217,65 @@ func TestRecordMetrics(t *testing.T) {
|
|||||||
srvAddr := "http://" + srv.Addr + httpPattern
|
srvAddr := "http://" + srv.Addr + httpPattern
|
||||||
defer shutdown(srv, wg)
|
defer shutdown(srv, wg)
|
||||||
// add an operation
|
// add an operation
|
||||||
op := Operation{
|
opKey := OperationKey{
|
||||||
Name: "op1",
|
Name: "op1",
|
||||||
Driver: "driver1",
|
|
||||||
ResourceID: types.UID("uid1"),
|
ResourceID: types.UID("uid1"),
|
||||||
}
|
}
|
||||||
mgr.OperationStart(op)
|
opVal := NewOperationValue("driver", DynamicSnapshotType)
|
||||||
|
mgr.OperationStart(opKey, opVal)
|
||||||
// should create a Success data point with latency ~ 1100ms
|
// should create a Success data point with latency ~ 1100ms
|
||||||
time.Sleep(1100 * time.Millisecond)
|
time.Sleep(1100 * time.Millisecond)
|
||||||
success := &fakeOpStatus{
|
success := &fakeOpStatus{
|
||||||
statusCode: 0,
|
statusCode: 0,
|
||||||
}
|
}
|
||||||
mgr.RecordMetrics(op, success)
|
mgr.RecordMetrics(opKey, success, "driver")
|
||||||
|
|
||||||
// add another operation metric
|
// add another operation metric
|
||||||
op.Name = "op2"
|
opKey.Name = "op2"
|
||||||
op.Driver = "driver2"
|
opKey.ResourceID = types.UID("uid2")
|
||||||
op.ResourceID = types.UID("uid2")
|
mgr.OperationStart(opKey, opVal)
|
||||||
mgr.OperationStart(op)
|
|
||||||
// should create a Failure data point with latency ~ 100ms
|
// should create a Failure data point with latency ~ 100ms
|
||||||
time.Sleep(100 * time.Millisecond)
|
time.Sleep(100 * time.Millisecond)
|
||||||
failure := &fakeOpStatus{
|
failure := &fakeOpStatus{
|
||||||
statusCode: 1,
|
statusCode: 1,
|
||||||
}
|
}
|
||||||
mgr.RecordMetrics(op, failure)
|
mgr.RecordMetrics(opKey, failure, "driver")
|
||||||
|
|
||||||
expected :=
|
expected :=
|
||||||
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
||||||
# TYPE snapshot_controller_operation_total_seconds histogram
|
# TYPE snapshot_controller_operation_total_seconds histogram
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="0.25"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="0.25"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="0.5"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="0.5"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="op1",operation_status="Success"} 1.1
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type=""} 1.1
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="op1",operation_status="Success"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type=""} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="0.25"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="0.25"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="op2",operation_status="Failure"} 0.1
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type=""} 0.1
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="op2",operation_status="Failure"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type=""} 1
|
||||||
`
|
`
|
||||||
if err := verifyMetric(expected, srvAddr); err != nil {
|
if err := verifyMetric(expected, srvAddr); err != nil {
|
||||||
t.Errorf("failed testing [%v]", err)
|
t.Errorf("failed testing [%v]", err)
|
||||||
@@ -295,15 +293,14 @@ func TestConcurrency(t *testing.T) {
|
|||||||
statusCode: 1,
|
statusCode: 1,
|
||||||
}
|
}
|
||||||
ops := []struct {
|
ops := []struct {
|
||||||
op Operation
|
op OperationKey
|
||||||
desiredLatencyMs time.Duration
|
desiredLatencyMs time.Duration
|
||||||
status OperationStatus
|
status OperationStatus
|
||||||
drop bool
|
drop bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
Operation{
|
OperationKey{
|
||||||
Name: "success1",
|
Name: "success1",
|
||||||
Driver: "driver1",
|
|
||||||
ResourceID: types.UID("uid1"),
|
ResourceID: types.UID("uid1"),
|
||||||
},
|
},
|
||||||
100,
|
100,
|
||||||
@@ -311,9 +308,8 @@ func TestConcurrency(t *testing.T) {
|
|||||||
false,
|
false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Operation{
|
OperationKey{
|
||||||
Name: "success2",
|
Name: "success2",
|
||||||
Driver: "driver2",
|
|
||||||
ResourceID: types.UID("uid2"),
|
ResourceID: types.UID("uid2"),
|
||||||
},
|
},
|
||||||
100,
|
100,
|
||||||
@@ -321,9 +317,8 @@ func TestConcurrency(t *testing.T) {
|
|||||||
false,
|
false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Operation{
|
OperationKey{
|
||||||
Name: "failure1",
|
Name: "failure1",
|
||||||
Driver: "driver3",
|
|
||||||
ResourceID: types.UID("uid3"),
|
ResourceID: types.UID("uid3"),
|
||||||
},
|
},
|
||||||
100,
|
100,
|
||||||
@@ -331,9 +326,8 @@ func TestConcurrency(t *testing.T) {
|
|||||||
false,
|
false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Operation{
|
OperationKey{
|
||||||
Name: "failure2",
|
Name: "failure2",
|
||||||
Driver: "driver4",
|
|
||||||
ResourceID: types.UID("uid4"),
|
ResourceID: types.UID("uid4"),
|
||||||
},
|
},
|
||||||
100,
|
100,
|
||||||
@@ -341,9 +335,8 @@ func TestConcurrency(t *testing.T) {
|
|||||||
false,
|
false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Operation{
|
OperationKey{
|
||||||
Name: "unknown",
|
Name: "unknown",
|
||||||
Driver: "driver5",
|
|
||||||
ResourceID: types.UID("uid5"),
|
ResourceID: types.UID("uid5"),
|
||||||
},
|
},
|
||||||
100,
|
100,
|
||||||
@@ -351,9 +344,8 @@ func TestConcurrency(t *testing.T) {
|
|||||||
false,
|
false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Operation{
|
OperationKey{
|
||||||
Name: "drop-from-cache",
|
Name: "drop-from-cache",
|
||||||
Driver: "driver6",
|
|
||||||
ResourceID: types.UID("uid6"),
|
ResourceID: types.UID("uid6"),
|
||||||
},
|
},
|
||||||
100,
|
100,
|
||||||
@@ -363,15 +355,20 @@ func TestConcurrency(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i := range ops {
|
for i := range ops {
|
||||||
mgr.OperationStart(ops[i].op)
|
mgr.OperationStart(ops[i].op, OperationValue{
|
||||||
|
Driver: fmt.Sprintf("driver%v", i),
|
||||||
|
SnapshotType: string(DynamicSnapshotType),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
// add an extra operation which should remain in the cache
|
// add an extra operation which should remain in the cache
|
||||||
remaining := Operation{
|
remaining := OperationKey{
|
||||||
Name: "remaining-in-cache",
|
Name: "remaining-in-cache",
|
||||||
Driver: "driver7",
|
|
||||||
ResourceID: types.UID("uid7"),
|
ResourceID: types.UID("uid7"),
|
||||||
}
|
}
|
||||||
mgr.OperationStart(remaining)
|
mgr.OperationStart(remaining, OperationValue{
|
||||||
|
Driver: "driver7",
|
||||||
|
SnapshotType: string(DynamicSnapshotType),
|
||||||
|
})
|
||||||
|
|
||||||
var wgMetrics sync.WaitGroup
|
var wgMetrics sync.WaitGroup
|
||||||
|
|
||||||
@@ -385,7 +382,7 @@ func TestConcurrency(t *testing.T) {
|
|||||||
if ops[i].drop {
|
if ops[i].drop {
|
||||||
mgr.DropOperation(ops[i].op)
|
mgr.DropOperation(ops[i].op)
|
||||||
} else {
|
} else {
|
||||||
mgr.RecordMetrics(ops[i].op, ops[i].status)
|
mgr.RecordMetrics(ops[i].op, ops[i].status, fmt.Sprintf("driver%v", i))
|
||||||
}
|
}
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
@@ -395,86 +392,86 @@ func TestConcurrency(t *testing.T) {
|
|||||||
expected :=
|
expected :=
|
||||||
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
`# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end
|
||||||
# TYPE snapshot_controller_operation_total_seconds histogram
|
# TYPE snapshot_controller_operation_total_seconds histogram
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="0.25"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="0.25"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="success1",operation_status="Success"} 0.1
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type=""} 0.1
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="success1",operation_status="Success"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="success1",operation_status="Success",snapshot_type=""} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="0.25"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="0.25"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="success2",operation_status="Success"} 0.1
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type=""} 0.1
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="success2",operation_status="Success"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="success2",operation_status="Success",snapshot_type=""} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="0.25"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="0.25"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver3",operation_name="failure1",operation_status="Failure"} 0.1
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type=""} 0.1
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver3",operation_name="failure1",operation_status="Failure"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver3",operation_name="failure1",operation_status="Failure",snapshot_type=""} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="0.25"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="0.25"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver4",operation_name="failure2",operation_status="Failure"} 0.1
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type=""} 0.1
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver4",operation_name="failure2",operation_status="Failure"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver4",operation_name="failure2",operation_status="Failure",snapshot_type=""} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="0.1"} 0
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="0.1"} 0
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="0.25"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="0.25"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="0.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="0.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="1"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="1"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="2.5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="2.5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="5"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="5"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="10"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="10"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="15"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="15"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="30"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="30"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="60"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="60"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="120"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="120"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="300"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="300"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="600"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="600"} 1
|
||||||
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",le="+Inf"} 1
|
snapshot_controller_operation_total_seconds_bucket{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type="",le="+Inf"} 1
|
||||||
snapshot_controller_operation_total_seconds_sum{driver_name="driver5",operation_name="unknown",operation_status="Unknown"} 0.1
|
snapshot_controller_operation_total_seconds_sum{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type=""} 0.1
|
||||||
snapshot_controller_operation_total_seconds_count{driver_name="driver5",operation_name="unknown",operation_status="Unknown"} 1
|
snapshot_controller_operation_total_seconds_count{driver_name="driver5",operation_name="unknown",operation_status="Unknown",snapshot_type=""} 1
|
||||||
`
|
`
|
||||||
if err := verifyMetric(expected, srvAddr); err != nil {
|
if err := verifyMetric(expected, srvAddr); err != nil {
|
||||||
t.Errorf("failed testing [%v]", err)
|
t.Errorf("failed testing [%v]", err)
|
||||||
@@ -494,63 +491,141 @@ func verifyMetric(expected, srvAddr string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
format := expfmt.ResponseFormat(rsp.Header)
|
format := expfmt.ResponseFormat(rsp.Header)
|
||||||
reader := strings.NewReader(string(r))
|
gotReader := strings.NewReader(string(r))
|
||||||
decoder := expfmt.NewDecoder(reader, format)
|
gotDecoder := expfmt.NewDecoder(gotReader, format)
|
||||||
mf := &cmg.MetricFamily{}
|
expectedReader := strings.NewReader(expected)
|
||||||
if err := decoder.Decode(mf); err != nil {
|
expectedDecoder := expfmt.NewDecoder(expectedReader, format)
|
||||||
return err
|
|
||||||
}
|
gotMfs := []*cmg.MetricFamily{}
|
||||||
reader = strings.NewReader(expected)
|
expectedMfs := []*cmg.MetricFamily{}
|
||||||
decoder = expfmt.NewDecoder(reader, format)
|
for {
|
||||||
|
gotMf := &cmg.MetricFamily{}
|
||||||
|
gotErr := gotDecoder.Decode(gotMf)
|
||||||
expectedMf := &cmg.MetricFamily{}
|
expectedMf := &cmg.MetricFamily{}
|
||||||
if err := decoder.Decode(expectedMf); err != nil {
|
if expectedErr := expectedDecoder.Decode(expectedMf); expectedErr != nil {
|
||||||
|
// return correctly if both are EOF
|
||||||
|
if expectedErr == io.EOF && gotErr == io.EOF {
|
||||||
|
break
|
||||||
|
} else {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !containsMetrics(expectedMf, mf) {
|
}
|
||||||
|
gotMfs = append(gotMfs, gotMf)
|
||||||
|
expectedMfs = append(expectedMfs, expectedMf)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !containsMetrics(expectedMfs, gotMfs) {
|
||||||
return fmt.Errorf("failed testing, expected\n%s\n, got\n%s\n", expected, string(r))
|
return fmt.Errorf("failed testing, expected\n%s\n, got\n%s\n", expected, string(r))
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func containsMetrics(expected, got *cmg.MetricFamily) bool {
|
// sortMfs, sorts metric families in alphabetical order by type.
|
||||||
|
// currently only supports counter and histogram
|
||||||
|
func sortMfs(mfs []*cmg.MetricFamily) []*cmg.MetricFamily {
|
||||||
|
var sortedMfs []*cmg.MetricFamily
|
||||||
|
|
||||||
|
// Sort first by type
|
||||||
|
sort.Slice(mfs, func(i, j int) bool {
|
||||||
|
return *mfs[i].Type < *mfs[j].Type
|
||||||
|
})
|
||||||
|
|
||||||
|
// Next, sort by length of name
|
||||||
|
sort.Slice(mfs, func(i, j int) bool {
|
||||||
|
return len(*mfs[i].Name) < len(*mfs[j].Name)
|
||||||
|
})
|
||||||
|
|
||||||
|
return sortedMfs
|
||||||
|
}
|
||||||
|
|
||||||
|
func containsMetrics(expectedMfs, gotMfs []*cmg.MetricFamily) bool {
|
||||||
|
if len(gotMfs) != len(expectedMfs) {
|
||||||
|
fmt.Printf("Not same length: expected and got metrics families: %v vs. %v\n", len(expectedMfs), len(gotMfs))
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort metric families for deterministic comparison.
|
||||||
|
sortedExpectedMfs := sortMfs(expectedMfs)
|
||||||
|
sortedGotMfs := sortMfs(gotMfs)
|
||||||
|
|
||||||
|
// compare expected vs. sorted actual metrics
|
||||||
|
for k, got := range sortedGotMfs {
|
||||||
|
matchCount := 0
|
||||||
|
expected := sortedExpectedMfs[k]
|
||||||
|
|
||||||
if (got.Name == nil || *(got.Name) != *(expected.Name)) ||
|
if (got.Name == nil || *(got.Name) != *(expected.Name)) ||
|
||||||
(got.Type == nil || *(got.Type) != *(expected.Type)) ||
|
(got.Type == nil || *(got.Type) != *(expected.Type)) ||
|
||||||
(got.Help == nil || *(got.Help) != *(expected.Help)) {
|
(got.Help == nil || *(got.Help) != *(expected.Help)) {
|
||||||
|
fmt.Printf("invalid header info: got: %v, expected: %v\n", *got.Name, *expected.Name)
|
||||||
|
fmt.Printf("invalid header info: got: %v, expected: %v\n", *got.Type, *expected.Type)
|
||||||
|
fmt.Printf("invalid header info: got: %v, expected: %v\n", *got.Help, *expected.Help)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
numRecords := len(expected.Metric)
|
numRecords := len(expected.Metric)
|
||||||
if len(got.Metric) < numRecords {
|
if len(got.Metric) < numRecords {
|
||||||
|
fmt.Printf("Not the same number of records: got.Metric: %v, numRecords: %v\n", len(got.Metric), numRecords)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
matchCount := 0
|
|
||||||
for i := 0; i < len(got.Metric); i++ {
|
for i := 0; i < len(got.Metric); i++ {
|
||||||
for j := 0; j < numRecords; j++ {
|
for j := 0; j < numRecords; j++ {
|
||||||
|
if got.Metric[i].Histogram == nil && expected.Metric[j].Histogram != nil ||
|
||||||
|
got.Metric[i].Histogram != nil && expected.Metric[j].Histogram == nil {
|
||||||
|
fmt.Printf("got metric and expected metric histogram type mismatch")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// labels should be the same
|
// labels should be the same
|
||||||
if !reflect.DeepEqual(got.Metric[i].Label, expected.Metric[j].Label) {
|
if !reflect.DeepEqual(got.Metric[i].Label, expected.Metric[j].Label) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// metric type specific checks
|
||||||
|
switch {
|
||||||
|
case got.Metric[i].Histogram != nil && expected.Metric[j].Histogram != nil:
|
||||||
gh := got.Metric[i].Histogram
|
gh := got.Metric[i].Histogram
|
||||||
eh := expected.Metric[j].Histogram
|
eh := expected.Metric[j].Histogram
|
||||||
if gh == nil {
|
if gh == nil || eh == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if !reflect.DeepEqual(gh.Bucket, eh.Bucket) {
|
if !reflect.DeepEqual(gh.Bucket, eh.Bucket) {
|
||||||
|
fmt.Println("got and expected histogram bucket not equal")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is a sum record, expecting a latency which is more than the
|
// this is a sum record, expecting a latency which is more than the
|
||||||
// expected one. If the sum is smaller than expected, it will be considered
|
// expected one. If the sum is smaller than expected, it will be considered
|
||||||
// as NOT a match
|
// as NOT a match
|
||||||
if gh.SampleSum == nil || *(gh.SampleSum) < *(eh.SampleSum) {
|
if gh.SampleSum == nil || eh.SampleSum == nil || *(gh.SampleSum) < *(eh.SampleSum) {
|
||||||
|
fmt.Println("difference in sample sum")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if gh.SampleCount == nil || *(gh.SampleCount) != *(eh.SampleCount) {
|
if gh.SampleCount == nil || eh.SampleCount == nil || *(gh.SampleCount) != *(eh.SampleCount) {
|
||||||
|
fmt.Println("difference in sample count")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case got.Metric[i].Counter != nil && expected.Metric[j].Counter != nil:
|
||||||
|
gc := got.Metric[i].Counter
|
||||||
|
ec := expected.Metric[j].Counter
|
||||||
|
if gc.Value == nil || *(gc.Value) != *(ec.Value) {
|
||||||
|
fmt.Println("difference in counter values")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// this is a match
|
// this is a match
|
||||||
matchCount = matchCount + 1
|
matchCount = matchCount + 1
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return matchCount == numRecords
|
|
||||||
|
if matchCount != numRecords {
|
||||||
|
fmt.Printf("matchCount %v, numRecords %v\n", matchCount, numRecords)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
@@ -519,3 +519,8 @@ func IsSnapshotReady(snapshot *crdv1.VolumeSnapshot) bool {
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsSnapshotCreated indicates that the snapshot has been cut on a storage system
|
||||||
|
func IsSnapshotCreated(snapshot *crdv1.VolumeSnapshot) bool {
|
||||||
|
return snapshot.Status != nil && snapshot.Status.CreationTime != nil
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user