Add generated file

This PR adds generated files under pkg/client and vendor folder.
This commit is contained in:
xing-yang
2018-07-12 10:55:15 -07:00
parent 36b1de0341
commit e213d1890d
17729 changed files with 5090889 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"accelerator.go",
"cadvisor.go",
"custom_metrics_deployments.go",
"custom_metrics_stackdriver.go",
"influxdb.go",
"metrics_grabber.go",
"prometheus.go",
"stackdriver.go",
"stackdriver_metadata_agent.go",
],
importpath = "k8s.io/kubernetes/test/e2e/instrumentation/monitoring",
deps = [
"//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/e2e/framework/metrics:go_default_library",
"//test/e2e/instrumentation/common:go_default_library",
"//test/e2e/scheduling:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/github.com/influxdata/influxdb/client/v2:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/github.com/prometheus/common/model:go_default_library",
"//vendor/golang.org/x/oauth2/google:go_default_library",
"//vendor/google.golang.org/api/monitoring/v3:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/api/rbac/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/selection:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/discovery:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/metrics/pkg/client/custom_metrics:go_default_library",
"//vendor/k8s.io/metrics/pkg/client/external_metrics:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@@ -0,0 +1,134 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"context"
"os"
"time"
. "github.com/onsi/ginkgo"
"golang.org/x/oauth2/google"
gcm "google.golang.org/api/monitoring/v3"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/framework"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
"k8s.io/kubernetes/test/e2e/scheduling"
"k8s.io/kubernetes/test/utils/image"
)
// Stackdriver container accelerator metrics, as described here:
// https://cloud.google.com/monitoring/api/metrics_gcp#gcp-container
var acceleratorMetrics = []string{
"accelerator/duty_cycle",
"accelerator/memory_total",
"accelerator/memory_used",
}
var _ = instrumentation.SIGDescribe("Stackdriver Monitoring", func() {
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce", "gke")
})
f := framework.NewDefaultFramework("stackdriver-monitoring")
It("should have accelerator metrics [Feature:StackdriverAcceleratorMonitoring]", func() {
testStackdriverAcceleratorMonitoring(f)
})
})
func testStackdriverAcceleratorMonitoring(f *framework.Framework) {
projectId := framework.TestContext.CloudConfig.ProjectID
ctx := context.Background()
client, err := google.DefaultClient(ctx, gcm.CloudPlatformScope)
gcmService, err := gcm.New(client)
framework.ExpectNoError(err)
// set this env var if accessing Stackdriver test endpoint (default is prod):
// $ export STACKDRIVER_API_ENDPOINT_OVERRIDE=https://test-monitoring.sandbox.googleapis.com/
basePathOverride := os.Getenv("STACKDRIVER_API_ENDPOINT_OVERRIDE")
if basePathOverride != "" {
gcmService.BasePath = basePathOverride
}
scheduling.SetupNVIDIAGPUNode(f, false)
// TODO: remove this after cAdvisor race is fixed.
time.Sleep(time.Minute)
f.PodClient().Create(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: rcName,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Name: rcName,
Image: image.GetE2EImage(image.CudaVectorAdd),
Command: []string{"/bin/sh", "-c"},
Args: []string{"nvidia-smi && sleep infinity"},
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
framework.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
},
},
},
},
},
})
metricsMap := map[string]bool{}
pollingFunction := checkForAcceleratorMetrics(projectId, gcmService, time.Now(), metricsMap)
err = wait.Poll(pollFrequency, pollTimeout, pollingFunction)
if err != nil {
framework.Logf("Missing metrics: %+v", metricsMap)
}
framework.ExpectNoError(err)
}
func checkForAcceleratorMetrics(projectId string, gcmService *gcm.Service, start time.Time, metricsMap map[string]bool) func() (bool, error) {
return func() (bool, error) {
counter := 0
for _, metric := range acceleratorMetrics {
metricsMap[metric] = false
}
for _, metric := range acceleratorMetrics {
// TODO: check only for metrics from this cluster
ts, err := fetchTimeSeries(projectId, gcmService, metric, start, time.Now())
framework.ExpectNoError(err)
if len(ts) > 0 {
counter = counter + 1
metricsMap[metric] = true
framework.Logf("Received %v timeseries for metric %v", len(ts), metric)
} else {
framework.Logf("No timeseries for metric %v", metric)
}
}
if counter < 3 {
return false, nil
}
return true, nil
}
}

View File

@@ -0,0 +1,88 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"fmt"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
. "github.com/onsi/ginkgo"
)
var _ = instrumentation.SIGDescribe("Cadvisor", func() {
f := framework.NewDefaultFramework("cadvisor")
It("should be healthy on every node.", func() {
CheckCadvisorHealthOnAllNodes(f.ClientSet, 5*time.Minute)
})
})
func CheckCadvisorHealthOnAllNodes(c clientset.Interface, timeout time.Duration) {
// It should be OK to list unschedulable Nodes here.
By("getting list of nodes")
nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err)
var errors []error
// returns maxRetries, sleepDuration
readConfig := func() (int, time.Duration) {
// Read in configuration settings, reasonable defaults.
retry := framework.TestContext.Cadvisor.MaxRetries
if framework.TestContext.Cadvisor.MaxRetries == 0 {
retry = 6
framework.Logf("Overriding default retry value of zero to %d", retry)
}
sleepDurationMS := framework.TestContext.Cadvisor.SleepDurationMS
if sleepDurationMS == 0 {
sleepDurationMS = 10000
framework.Logf("Overriding default milliseconds value of zero to %d", sleepDurationMS)
}
return retry, time.Duration(sleepDurationMS) * time.Millisecond
}
maxRetries, sleepDuration := readConfig()
for {
errors = []error{}
for _, node := range nodeList.Items {
// cadvisor is not accessible directly unless its port (4194 by default) is exposed.
// Here, we access '/stats/' REST endpoint on the kubelet which polls cadvisor internally.
statsResource := fmt.Sprintf("api/v1/nodes/%s/proxy/stats/", node.Name)
By(fmt.Sprintf("Querying stats from node %s using url %s", node.Name, statsResource))
_, err = c.CoreV1().RESTClient().Get().AbsPath(statsResource).Timeout(timeout).Do().Raw()
if err != nil {
errors = append(errors, err)
}
}
if len(errors) == 0 {
return
}
if maxRetries--; maxRetries <= 0 {
break
}
framework.Logf("failed to retrieve kubelet stats -\n %v", errors)
time.Sleep(sleepDuration)
}
framework.Failf("Failed after retrying %d times for cadvisor to be healthy on all nodes. Errors:\n%v", maxRetries, errors)
}

View File

@@ -0,0 +1,327 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"fmt"
"strings"
gcm "google.golang.org/api/monitoring/v3"
corev1 "k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
rbac "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/test/e2e/framework"
"os/exec"
)
var (
CustomMetricName = "foo"
UnusedMetricName = "unused"
CustomMetricValue = int64(448)
UnusedMetricValue = int64(446)
StackdriverExporter = "stackdriver-exporter"
// HPAPermissions is a ClusterRoleBinding that grants unauthenticated user permissions granted for
// HPA for testing purposes, i.e. it should grant permission to read custom metrics.
HPAPermissions = &rbac.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "custom-metrics-reader",
},
RoleRef: rbac.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "ClusterRole",
Name: "system:controller:horizontal-pod-autoscaler",
},
Subjects: []rbac.Subject{
{
APIGroup: "rbac.authorization.k8s.io",
Kind: "Group",
Name: "system:unauthenticated",
},
},
}
StagingDeploymentsLocation = "https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/deploy/staging/"
AdapterForOldResourceModel = "adapter_old_resource_model.yaml"
AdapterForNewResourceModel = "adapter_new_resource_model.yaml"
AdapterDefault = AdapterForOldResourceModel
ClusterAdminBinding = "e2e-test-cluster-admin-binding"
)
// CustomMetricContainerSpec allows to specify a config for StackdriverExporterDeployment
// with multiple containers exporting different metrics.
type CustomMetricContainerSpec struct {
Name string
MetricName string
MetricValue int64
}
// SimpleStackdriverExporterDeployment is a Deployment of simple application that exports a metric of
// fixed value to Stackdriver in a loop.
func SimpleStackdriverExporterDeployment(name, namespace string, replicas int32, metricValue int64) *extensions.Deployment {
return StackdriverExporterDeployment(name, namespace, replicas,
[]CustomMetricContainerSpec{
{
Name: StackdriverExporter,
MetricName: CustomMetricName,
MetricValue: metricValue,
},
})
}
// StackdriverExporterDeployment is a Deployment of an application that can expose
// an arbitrary amount of metrics of fixed value to Stackdriver in a loop. Each metric
// is exposed by a different container in one pod.
// The metric names and values are configured via the containers parameter.
func StackdriverExporterDeployment(name, namespace string, replicas int32, containers []CustomMetricContainerSpec) *extensions.Deployment {
podSpec := corev1.PodSpec{Containers: []corev1.Container{}}
for _, containerSpec := range containers {
podSpec.Containers = append(podSpec.Containers, stackdriverExporterContainerSpec(containerSpec.Name, namespace, containerSpec.MetricName, containerSpec.MetricValue))
}
return &extensions.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
Spec: extensions.DeploymentSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{"name": name},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"name": name,
},
},
Spec: podSpec,
},
Replicas: &replicas,
},
}
}
// StackdriverExporterPod is a Pod of simple application that exports a metric of fixed value to
// Stackdriver in a loop.
func StackdriverExporterPod(podName, namespace, podLabel, metricName string, metricValue int64) *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: namespace,
Labels: map[string]string{
"name": podLabel,
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{stackdriverExporterContainerSpec(StackdriverExporter, namespace, metricName, metricValue)},
},
}
}
func stackdriverExporterContainerSpec(name string, namespace string, metricName string, metricValue int64) corev1.Container {
return corev1.Container{
Name: name,
Image: "k8s.gcr.io/sd-dummy-exporter:v0.2.0",
ImagePullPolicy: corev1.PullPolicy("Always"),
Command: []string{
"/bin/sh",
"-c",
strings.Join([]string{
"./sd_dummy_exporter",
"--pod-id=$(POD_ID)",
"--pod-name=$(POD_NAME)",
"--namespace=" + namespace,
"--metric-name=" + metricName,
fmt.Sprintf("--metric-value=%v", metricValue),
"--use-old-resource-model",
"--use-new-resource-model",
}, " "),
},
Env: []corev1.EnvVar{
{
Name: "POD_ID",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.uid",
},
},
},
{
Name: "POD_NAME",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.name",
},
},
},
},
Ports: []corev1.ContainerPort{{ContainerPort: 80}},
}
}
// PrometheusExporterDeployment is a Deployment of simple application with two containers
// one exposing a metric in prometheus fromat and second a prometheus-to-sd container
// that scrapes the metric and pushes it to stackdriver.
func PrometheusExporterDeployment(name, namespace string, replicas int32, metricValue int64) *extensions.Deployment {
return &extensions.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
Spec: extensions.DeploymentSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{"name": name},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"name": name,
},
},
Spec: prometheusExporterPodSpec(CustomMetricName, metricValue, 8080),
},
Replicas: &replicas,
},
}
}
func prometheusExporterPodSpec(metricName string, metricValue int64, port int32) corev1.PodSpec {
return corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "prometheus-exporter",
Image: "k8s.gcr.io/prometheus-dummy-exporter:v0.1.0",
ImagePullPolicy: corev1.PullPolicy("Always"),
Command: []string{"/prometheus_dummy_exporter", "--metric-name=" + metricName,
fmt.Sprintf("--metric-value=%v", metricValue), fmt.Sprintf("=--port=%d", port)},
Ports: []corev1.ContainerPort{{ContainerPort: port}},
},
{
Name: "prometheus-to-sd",
Image: "k8s.gcr.io/prometheus-to-sd:v0.2.3",
ImagePullPolicy: corev1.PullPolicy("Always"),
Command: []string{"/monitor", fmt.Sprintf("--source=:http://localhost:%d", port),
"--stackdriver-prefix=custom.googleapis.com", "--pod-id=$(POD_ID)", "--namespace-id=$(POD_NAMESPACE)"},
Env: []corev1.EnvVar{
{
Name: "POD_ID",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.uid",
},
},
},
{
Name: "POD_NAMESPACE",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.namespace",
},
},
},
},
},
},
}
}
// CreateAdapter creates Custom Metrics - Stackdriver adapter
// adapterDeploymentFile should be a filename for adapter deployment located in StagingDeploymentLocation
func CreateAdapter(adapterDeploymentFile string) error {
// A workaround to make the work on GKE. GKE doesn't normally allow to create cluster roles,
// which the adapter deployment does. The solution is to create cluster role binding for
// cluster-admin role and currently used service account.
err := createClusterAdminBinding()
if err != nil {
return err
}
adapterURL := StagingDeploymentsLocation + adapterDeploymentFile
err = exec.Command("wget", adapterURL).Run()
if err != nil {
return err
}
stat, err := framework.RunKubectl("create", "-f", adapterURL)
framework.Logf(stat)
return err
}
func createClusterAdminBinding() error {
stdout, stderr, err := framework.RunCmd("gcloud", "config", "get-value", "core/account")
if err != nil {
framework.Logf(stderr)
return err
}
serviceAccount := strings.TrimSpace(stdout)
framework.Logf("current service account: %q", serviceAccount)
stat, err := framework.RunKubectl("create", "clusterrolebinding", ClusterAdminBinding, "--clusterrole=cluster-admin", "--user="+serviceAccount)
framework.Logf(stat)
return err
}
// CreateDescriptors creates descriptors for metrics: CustomMetricName and UnusedMetricName.
func CreateDescriptors(service *gcm.Service, projectId string) error {
_, err := service.Projects.MetricDescriptors.Create(fmt.Sprintf("projects/%s", projectId), &gcm.MetricDescriptor{
Name: CustomMetricName,
ValueType: "INT64",
Type: "custom.googleapis.com/" + CustomMetricName,
MetricKind: "GAUGE",
}).Do()
if err != nil {
return err
}
_, err = service.Projects.MetricDescriptors.Create(fmt.Sprintf("projects/%s", projectId), &gcm.MetricDescriptor{
Name: UnusedMetricName,
ValueType: "INT64",
Type: "custom.googleapis.com/" + UnusedMetricName,
MetricKind: "GAUGE",
}).Do()
return err
}
// CleanupDescriptors deletes descriptors for metrics: CustomMetricName and UnusedMetricName.
// TODO: Cleanup time series as well
func CleanupDescriptors(service *gcm.Service, projectId string) {
_, err := service.Projects.MetricDescriptors.Delete(fmt.Sprintf("projects/%s/metricDescriptors/custom.googleapis.com/%s", projectId, CustomMetricName)).Do()
if err != nil {
framework.Logf("Failed to delete descriptor for metric '%s': %v", CustomMetricName, err)
}
_, err = service.Projects.MetricDescriptors.Delete(fmt.Sprintf("projects/%s/metricDescriptors/custom.googleapis.com/%s", projectId, UnusedMetricName)).Do()
if err != nil {
framework.Logf("Failed to delete descriptor for metric '%s': %v", CustomMetricName, err)
}
}
// CleanupAdapter deletes Custom Metrics - Stackdriver adapter deployments.
func CleanupAdapter(adapterDeploymentFile string) {
stat, err := framework.RunKubectl("delete", "-f", adapterDeploymentFile)
framework.Logf(stat)
if err != nil {
framework.Logf("Failed to delete adapter deployments: %s", err)
}
err = exec.Command("rm", adapterDeploymentFile).Run()
if err != nil {
framework.Logf("Failed to delete adapter deployment file: %s", err)
}
cleanupClusterAdminBinding()
}
func cleanupClusterAdminBinding() {
stat, err := framework.RunKubectl("delete", "clusterrolebinding", ClusterAdminBinding)
framework.Logf(stat)
if err != nil {
framework.Logf("Failed to delete cluster admin binding: %s", err)
}
}

View File

@@ -0,0 +1,259 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"context"
"time"
"golang.org/x/oauth2/google"
clientset "k8s.io/client-go/kubernetes"
. "github.com/onsi/ginkgo"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
gcm "google.golang.org/api/monitoring/v3"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/client-go/discovery"
"k8s.io/kubernetes/test/e2e/framework"
customclient "k8s.io/metrics/pkg/client/custom_metrics"
externalclient "k8s.io/metrics/pkg/client/external_metrics"
)
const (
stackdriverExporterPod1 = "stackdriver-exporter-1"
stackdriverExporterPod2 = "stackdriver-exporter-2"
stackdriverExporterLabel = "stackdriver-exporter"
)
var _ = instrumentation.SIGDescribe("Stackdriver Monitoring", func() {
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce", "gke")
})
f := framework.NewDefaultFramework("stackdriver-monitoring")
It("should run Custom Metrics - Stackdriver Adapter for old resource model [Feature:StackdriverCustomMetrics]", func() {
kubeClient := f.ClientSet
config, err := framework.LoadConfig()
if err != nil {
framework.Failf("Failed to load config: %s", err)
}
customMetricsClient := customclient.NewForConfigOrDie(config)
discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(config)
testCustomMetrics(f, kubeClient, customMetricsClient, discoveryClient, AdapterForOldResourceModel)
})
It("should run Custom Metrics - Stackdriver Adapter for new resource model [Feature:StackdriverCustomMetrics]", func() {
kubeClient := f.ClientSet
config, err := framework.LoadConfig()
if err != nil {
framework.Failf("Failed to load config: %s", err)
}
customMetricsClient := customclient.NewForConfigOrDie(config)
discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(config)
testCustomMetrics(f, kubeClient, customMetricsClient, discoveryClient, AdapterForNewResourceModel)
})
It("should run Custom Metrics - Stackdriver Adapter for external metrics [Feature:StackdriverExternalMetrics]", func() {
kubeClient := f.ClientSet
config, err := framework.LoadConfig()
if err != nil {
framework.Failf("Failed to load config: %s", err)
}
externalMetricsClient := externalclient.NewForConfigOrDie(config)
testExternalMetrics(f, kubeClient, externalMetricsClient)
})
})
func testCustomMetrics(f *framework.Framework, kubeClient clientset.Interface, customMetricsClient customclient.CustomMetricsClient, discoveryClient *discovery.DiscoveryClient, adapterDeployment string) {
projectId := framework.TestContext.CloudConfig.ProjectID
ctx := context.Background()
client, err := google.DefaultClient(ctx, gcm.CloudPlatformScope)
gcmService, err := gcm.New(client)
if err != nil {
framework.Failf("Failed to create gcm service, %v", err)
}
// Set up a cluster: create a custom metric and set up k8s-sd adapter
err = CreateDescriptors(gcmService, projectId)
if err != nil {
framework.Failf("Failed to create metric descriptor: %s", err)
}
defer CleanupDescriptors(gcmService, projectId)
err = CreateAdapter(adapterDeployment)
if err != nil {
framework.Failf("Failed to set up: %s", err)
}
defer CleanupAdapter(adapterDeployment)
_, err = kubeClient.RbacV1().ClusterRoleBindings().Create(HPAPermissions)
defer kubeClient.RbacV1().ClusterRoleBindings().Delete("custom-metrics-reader", &metav1.DeleteOptions{})
// Run application that exports the metric
_, err = createSDExporterPods(f, kubeClient)
if err != nil {
framework.Failf("Failed to create stackdriver-exporter pod: %s", err)
}
defer cleanupSDExporterPod(f, kubeClient)
// Wait a short amount of time to create a pod and export some metrics
// TODO: add some events to wait for instead of fixed amount of time
// i.e. pod creation, first time series exported
time.Sleep(60 * time.Second)
verifyResponsesFromCustomMetricsAPI(f, customMetricsClient, discoveryClient)
}
// TODO(kawych): migrate this test to new resource model
func testExternalMetrics(f *framework.Framework, kubeClient clientset.Interface, externalMetricsClient externalclient.ExternalMetricsClient) {
projectId := framework.TestContext.CloudConfig.ProjectID
ctx := context.Background()
client, err := google.DefaultClient(ctx, gcm.CloudPlatformScope)
gcmService, err := gcm.New(client)
if err != nil {
framework.Failf("Failed to create gcm service, %v", err)
}
// Set up a cluster: create a custom metric and set up k8s-sd adapter
err = CreateDescriptors(gcmService, projectId)
if err != nil {
framework.Failf("Failed to create metric descriptor: %s", err)
}
defer CleanupDescriptors(gcmService, projectId)
// Both deployments - for old and new resource model - expose External Metrics API.
err = CreateAdapter(AdapterForOldResourceModel)
if err != nil {
framework.Failf("Failed to set up: %s", err)
}
defer CleanupAdapter(AdapterForOldResourceModel)
_, err = kubeClient.RbacV1().ClusterRoleBindings().Create(HPAPermissions)
defer kubeClient.RbacV1().ClusterRoleBindings().Delete("custom-metrics-reader", &metav1.DeleteOptions{})
// Run application that exports the metric
pod, err := createSDExporterPods(f, kubeClient)
if err != nil {
framework.Failf("Failed to create stackdriver-exporter pod: %s", err)
}
defer cleanupSDExporterPod(f, kubeClient)
// Wait a short amount of time to create a pod and export some metrics
// TODO: add some events to wait for instead of fixed amount of time
// i.e. pod creation, first time series exported
time.Sleep(60 * time.Second)
verifyResponseFromExternalMetricsAPI(f, externalMetricsClient, pod)
}
func verifyResponsesFromCustomMetricsAPI(f *framework.Framework, customMetricsClient customclient.CustomMetricsClient, discoveryClient *discovery.DiscoveryClient) {
resources, err := discoveryClient.ServerResourcesForGroupVersion("custom.metrics.k8s.io/v1beta1")
if err != nil {
framework.Failf("Failed to retrieve a list of supported metrics: %s", err)
}
if !containsResource(resources.APIResources, "*/custom.googleapis.com|"+CustomMetricName) {
framework.Failf("Metric '%s' expected but not received", CustomMetricName)
}
if !containsResource(resources.APIResources, "*/custom.googleapis.com|"+UnusedMetricName) {
framework.Failf("Metric '%s' expected but not received", UnusedMetricName)
}
value, err := customMetricsClient.NamespacedMetrics(f.Namespace.Name).GetForObject(schema.GroupKind{Group: "", Kind: "Pod"}, stackdriverExporterPod1, CustomMetricName)
if err != nil {
framework.Failf("Failed query: %s", err)
}
if value.Value.Value() != CustomMetricValue {
framework.Failf("Unexpected metric value for metric %s: expected %v but received %v", CustomMetricName, CustomMetricValue, value.Value)
}
filter, err := labels.NewRequirement("name", selection.Equals, []string{stackdriverExporterLabel})
if err != nil {
framework.Failf("Couldn't create a label filter")
}
values, err := customMetricsClient.NamespacedMetrics(f.Namespace.Name).GetForObjects(schema.GroupKind{Group: "", Kind: "Pod"}, labels.NewSelector().Add(*filter), CustomMetricName)
if err != nil {
framework.Failf("Failed query: %s", err)
}
if len(values.Items) != 1 {
framework.Failf("Expected results for exactly 1 pod, but %v results received", len(values.Items))
}
if values.Items[0].DescribedObject.Name != stackdriverExporterPod1 || values.Items[0].Value.Value() != CustomMetricValue {
framework.Failf("Unexpected metric value for metric %s and pod %s: %v", CustomMetricName, values.Items[0].DescribedObject.Name, values.Items[0].Value.Value())
}
}
func containsResource(resourcesList []metav1.APIResource, resourceName string) bool {
for _, resource := range resourcesList {
if resource.Name == resourceName {
return true
}
}
return false
}
func verifyResponseFromExternalMetricsAPI(f *framework.Framework, externalMetricsClient externalclient.ExternalMetricsClient, pod *v1.Pod) {
req1, _ := labels.NewRequirement("resource.type", selection.Equals, []string{"gke_container"})
// It's important to filter out only metrics from the right namespace, since multiple e2e tests
// may run in the same project concurrently. "dummy" is added to test
req2, _ := labels.NewRequirement("resource.labels.pod_id", selection.In, []string{string(pod.UID), "dummy"})
req3, _ := labels.NewRequirement("resource.labels.namespace_id", selection.Exists, []string{})
req4, _ := labels.NewRequirement("resource.labels.zone", selection.NotEquals, []string{"dummy"})
req5, _ := labels.NewRequirement("resource.labels.cluster_name", selection.NotIn, []string{"foo", "bar"})
values, err := externalMetricsClient.
NamespacedMetrics("dummy").
List("custom.googleapis.com|"+CustomMetricName, labels.NewSelector().Add(*req1, *req2, *req3, *req4, *req5))
if err != nil {
framework.Failf("Failed query: %s", err)
}
if len(values.Items) != 1 {
framework.Failf("Expected exactly one external metric value, but % values received", len(values.Items))
}
if values.Items[0].MetricName != "custom.googleapis.com|"+CustomMetricName ||
values.Items[0].Value.Value() != CustomMetricValue ||
// Check one label just to make sure labels are included
values.Items[0].MetricLabels["resource.labels.pod_id"] != string(pod.UID) {
framework.Failf("Unexpected result for metric %s: %v", CustomMetricName, values.Items[0])
}
}
func cleanupSDExporterPod(f *framework.Framework, cs clientset.Interface) {
err := cs.CoreV1().Pods(f.Namespace.Name).Delete(stackdriverExporterPod1, &metav1.DeleteOptions{})
if err != nil {
framework.Logf("Failed to delete %s pod: %v", stackdriverExporterPod1, err)
}
err = cs.CoreV1().Pods(f.Namespace.Name).Delete(stackdriverExporterPod2, &metav1.DeleteOptions{})
if err != nil {
framework.Logf("Failed to delete %s pod: %v", stackdriverExporterPod2, err)
}
}
func createSDExporterPods(f *framework.Framework, cs clientset.Interface) (*v1.Pod, error) {
pod, err := cs.CoreV1().Pods(f.Namespace.Name).Create(StackdriverExporterPod(stackdriverExporterPod1, f.Namespace.Name, stackdriverExporterLabel, CustomMetricName, CustomMetricValue))
if err != nil {
return nil, err
}
_, err = cs.CoreV1().Pods(f.Namespace.Name).Create(StackdriverExporterPod(stackdriverExporterPod2, f.Namespace.Name, stackdriverExporterLabel, UnusedMetricName, UnusedMetricValue))
return pod, err
}

View File

@@ -0,0 +1,323 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"bytes"
"context"
"encoding/json"
"fmt"
"time"
influxdb "github.com/influxdata/influxdb/client/v2"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
. "github.com/onsi/ginkgo"
)
var _ = instrumentation.SIGDescribe("Monitoring", func() {
f := framework.NewDefaultFramework("monitoring")
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce")
framework.SkipUnlessClusterMonitoringModeIs("influxdb")
})
It("should verify monitoring pods and all cluster nodes are available on influxdb using heapster [Feature:InfluxdbMonitoring]", func() {
testMonitoringUsingHeapsterInfluxdb(f.ClientSet)
})
})
const (
influxdbService = "monitoring-influxdb"
influxdbDatabaseName = "k8s"
podlistQuery = "show tag values from \"cpu/usage\" with key = pod_name"
nodelistQuery = "show tag values from \"cpu/usage\" with key = nodename"
sleepBetweenAttempts = 5 * time.Second
testTimeout = 5 * time.Minute
initializationTimeout = 5 * time.Minute
)
var (
rcLabels = []string{"heapster", "influxGrafana"}
expectedServices = map[string]bool{
influxdbService: false,
"monitoring-grafana": false,
}
)
// Query sends a command to the server and returns the Response
func Query(c clientset.Interface, query string) (*influxdb.Response, error) {
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
defer cancel()
result, err := c.CoreV1().RESTClient().Get().
Context(ctx).
Namespace("kube-system").
Resource("services").
Name(influxdbService+":api").
SubResource("proxy").
Suffix("query").
Param("q", query).
Param("db", influxdbDatabaseName).
Param("epoch", "s").
Do().
Raw()
if err != nil {
if ctx.Err() != nil {
framework.Failf("Failed to query influx db: %v", err)
}
return nil, err
}
var response influxdb.Response
dec := json.NewDecoder(bytes.NewReader(result))
dec.UseNumber()
err = dec.Decode(&response)
if err != nil {
return nil, err
}
return &response, nil
}
func verifyExpectedRcsExistAndGetExpectedPods(c clientset.Interface) ([]string, error) {
expectedPods := []string{}
// Iterate over the labels that identify the replication controllers that we
// want to check. The rcLabels contains the value values for the k8s-app key
// that identify the replication controllers that we want to check. Using a label
// rather than an explicit name is preferred because the names will typically have
// a version suffix e.g. heapster-monitoring-v1 and this will change after a rolling
// update e.g. to heapster-monitoring-v2. By using a label query we can check for the
// situation when a heapster-monitoring-v1 and heapster-monitoring-v2 replication controller
// is running (which would be an error except during a rolling update).
for _, rcLabel := range rcLabels {
selector := labels.Set{"k8s-app": rcLabel}.AsSelector()
options := metav1.ListOptions{LabelSelector: selector.String()}
deploymentList, err := c.ExtensionsV1beta1().Deployments(metav1.NamespaceSystem).List(options)
if err != nil {
return nil, err
}
rcList, err := c.CoreV1().ReplicationControllers(metav1.NamespaceSystem).List(options)
if err != nil {
return nil, err
}
psList, err := c.AppsV1().StatefulSets(metav1.NamespaceSystem).List(options)
if err != nil {
return nil, err
}
if (len(rcList.Items) + len(deploymentList.Items) + len(psList.Items)) != 1 {
return nil, fmt.Errorf("expected to find one replica for RC or deployment with label %s but got %d",
rcLabel, len(rcList.Items))
}
// Check all the replication controllers.
for _, rc := range rcList.Items {
selector := labels.Set(rc.Spec.Selector).AsSelector()
options := metav1.ListOptions{LabelSelector: selector.String()}
podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(options)
if err != nil {
return nil, err
}
for _, pod := range podList.Items {
if pod.DeletionTimestamp != nil {
continue
}
expectedPods = append(expectedPods, pod.Name)
}
}
// Do the same for all deployments.
for _, rc := range deploymentList.Items {
selector := labels.Set(rc.Spec.Selector.MatchLabels).AsSelector()
options := metav1.ListOptions{LabelSelector: selector.String()}
podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(options)
if err != nil {
return nil, err
}
for _, pod := range podList.Items {
if pod.DeletionTimestamp != nil {
continue
}
expectedPods = append(expectedPods, pod.Name)
}
}
// And for pet sets.
for _, ps := range psList.Items {
selector := labels.Set(ps.Spec.Selector.MatchLabels).AsSelector()
options := metav1.ListOptions{LabelSelector: selector.String()}
podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(options)
if err != nil {
return nil, err
}
for _, pod := range podList.Items {
if pod.DeletionTimestamp != nil {
continue
}
expectedPods = append(expectedPods, pod.Name)
}
}
}
return expectedPods, nil
}
func expectedServicesExist(c clientset.Interface) error {
serviceList, err := c.CoreV1().Services(metav1.NamespaceSystem).List(metav1.ListOptions{})
if err != nil {
return err
}
for _, service := range serviceList.Items {
if _, ok := expectedServices[service.Name]; ok {
expectedServices[service.Name] = true
}
}
for service, found := range expectedServices {
if !found {
return fmt.Errorf("Service %q not found", service)
}
}
return nil
}
func getAllNodesInCluster(c clientset.Interface) ([]string, error) {
// It should be OK to list unschedulable Nodes here.
nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return nil, err
}
result := []string{}
for _, node := range nodeList.Items {
result = append(result, node.Name)
}
return result, nil
}
func getInfluxdbData(c clientset.Interface, query string, tag string) (map[string]bool, error) {
response, err := Query(c, query)
if err != nil {
return nil, err
}
if len(response.Results) != 1 {
return nil, fmt.Errorf("expected only one result from Influxdb for query %q. Got %+v", query, response)
}
if len(response.Results[0].Series) != 1 {
return nil, fmt.Errorf("expected exactly one series for query %q.", query)
}
if len(response.Results[0].Series[0].Columns) != 2 {
framework.Failf("Expected two columns for query %q. Found %v", query, response.Results[0].Series[0].Columns)
}
result := map[string]bool{}
for _, value := range response.Results[0].Series[0].Values {
name := value[1].(string)
result[name] = true
}
return result, nil
}
func expectedItemsExist(expectedItems []string, actualItems map[string]bool) bool {
if len(actualItems) < len(expectedItems) {
return false
}
for _, item := range expectedItems {
if _, found := actualItems[item]; !found {
return false
}
}
return true
}
func validatePodsAndNodes(c clientset.Interface, expectedPods, expectedNodes []string) bool {
pods, err := getInfluxdbData(c, podlistQuery, "pod_id")
if err != nil {
// We don't fail the test here because the influxdb service might still not be running.
framework.Logf("failed to query list of pods from influxdb. Query: %q, Err: %v", podlistQuery, err)
return false
}
nodes, err := getInfluxdbData(c, nodelistQuery, "hostname")
if err != nil {
framework.Logf("failed to query list of nodes from influxdb. Query: %q, Err: %v", nodelistQuery, err)
return false
}
if !expectedItemsExist(expectedPods, pods) {
framework.Logf("failed to find all expected Pods.\nExpected: %v\nActual: %v", expectedPods, pods)
return false
}
if !expectedItemsExist(expectedNodes, nodes) {
framework.Logf("failed to find all expected Nodes.\nExpected: %v\nActual: %v", expectedNodes, nodes)
return false
}
return true
}
func testMonitoringUsingHeapsterInfluxdb(c clientset.Interface) {
// Check if heapster pods and services are up.
var expectedPods []string
rcErr := fmt.Errorf("failed to verify expected RCs within timeout")
serviceErr := fmt.Errorf("failed to verify expected services within timeout")
err := wait.PollImmediate(sleepBetweenAttempts, initializationTimeout, func() (bool, error) {
expectedPods, rcErr = verifyExpectedRcsExistAndGetExpectedPods(c)
if rcErr != nil {
framework.Logf("Waiting for expected RCs (got error: %v)", rcErr)
return false, nil
}
serviceErr = expectedServicesExist(c)
if serviceErr != nil {
framework.Logf("Waiting for expected services (got error: %v)", serviceErr)
return false, nil
}
return true, nil
})
if err != nil {
framework.ExpectNoError(rcErr)
framework.ExpectNoError(serviceErr)
framework.Failf("Failed to verify RCs and services within timeout: %v", err)
}
expectedNodes, err := getAllNodesInCluster(c)
framework.ExpectNoError(err)
startTime := time.Now()
for {
if validatePodsAndNodes(c, expectedPods, expectedNodes) {
return
}
if time.Since(startTime) >= testTimeout {
// temporary workaround to help debug issue #12765
printDebugInfo(c)
break
}
time.Sleep(sleepBetweenAttempts)
}
framework.Failf("monitoring using heapster and influxdb test failed")
}
func printDebugInfo(c clientset.Interface) {
set := labels.Set{"k8s-app": "heapster"}
options := metav1.ListOptions{LabelSelector: set.AsSelector().String()}
podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(options)
if err != nil {
framework.Logf("Error while listing pods %v", err)
return
}
for _, pod := range podList.Items {
framework.Logf("Kubectl output:\n%v",
framework.RunKubectlOrDie("log", pod.Name, "--namespace=kube-system", "--container=heapster"))
}
}

View File

@@ -0,0 +1,102 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"strings"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/metrics"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
gin "github.com/onsi/ginkgo"
gom "github.com/onsi/gomega"
)
var _ = instrumentation.SIGDescribe("MetricsGrabber", func() {
f := framework.NewDefaultFramework("metrics-grabber")
var c, ec clientset.Interface
var grabber *metrics.MetricsGrabber
gin.BeforeEach(func() {
var err error
c = f.ClientSet
ec = f.KubemarkExternalClusterClientSet
framework.ExpectNoError(err)
grabber, err = metrics.NewMetricsGrabber(c, ec, true, true, true, true, true)
framework.ExpectNoError(err)
})
gin.It("should grab all metrics from API server.", func() {
gin.By("Connecting to /metrics endpoint")
response, err := grabber.GrabFromApiServer()
framework.ExpectNoError(err)
gom.Expect(response).NotTo(gom.BeEmpty())
})
gin.It("should grab all metrics from a Kubelet.", func() {
gin.By("Proxying to Node through the API server")
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
gom.Expect(nodes.Items).NotTo(gom.BeEmpty())
response, err := grabber.GrabFromKubelet(nodes.Items[0].Name)
framework.ExpectNoError(err)
gom.Expect(response).NotTo(gom.BeEmpty())
})
gin.It("should grab all metrics from a Scheduler.", func() {
gin.By("Proxying to Pod through the API server")
// Check if master Node is registered
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err)
var masterRegistered = false
for _, node := range nodes.Items {
if strings.HasSuffix(node.Name, "master") {
masterRegistered = true
}
}
if !masterRegistered {
framework.Logf("Master is node api.Registry. Skipping testing Scheduler metrics.")
return
}
response, err := grabber.GrabFromScheduler()
framework.ExpectNoError(err)
gom.Expect(response).NotTo(gom.BeEmpty())
})
gin.It("should grab all metrics from a ControllerManager.", func() {
gin.By("Proxying to Pod through the API server")
// Check if master Node is registered
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err)
var masterRegistered = false
for _, node := range nodes.Items {
if strings.HasSuffix(node.Name, "master") {
masterRegistered = true
}
}
if !masterRegistered {
framework.Logf("Master is node api.Registry. Skipping testing ControllerManager metrics.")
return
}
response, err := grabber.GrabFromControllerManager()
framework.ExpectNoError(err)
gom.Expect(response).NotTo(gom.BeEmpty())
})
})

View File

@@ -0,0 +1,382 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"context"
"encoding/json"
"fmt"
"math"
"time"
"github.com/prometheus/common/model"
. "github.com/onsi/ginkgo"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
)
const (
prometheusQueryStep = time.Minute
prometheusMetricErrorTolerance = 0.25
prometheusMetricValidationDuration = time.Minute * 2
prometheusRate = time.Minute * 2
prometheusRequiredNodesUpDuration = time.Minute * 5
prometheusService = "prometheus"
prometheusSleepBetweenAttempts = time.Second * 30
prometheusTestTimeout = time.Minute * 5
customMetricValue = 1000
targetCPUUsage = 0.1
)
var _ = instrumentation.SIGDescribe("[Feature:PrometheusMonitoring] Prometheus", func() {
BeforeEach(func() {
framework.SkipUnlessPrometheusMonitoringIsEnabled()
})
f := framework.NewDefaultFramework("prometheus-monitoring")
It("should scrape container metrics from all nodes.", func() {
expectedNodes, err := getAllNodes(f.ClientSet)
framework.ExpectNoError(err)
retryUntilSucceeds(func() error {
return validateMetricAvailableForAllNodes(f.ClientSet, `container_cpu_usage_seconds_total`, expectedNodes)
}, prometheusTestTimeout)
})
It("should successfully scrape all targets", func() {
retryUntilSucceeds(func() error {
return validateAllActiveTargetsAreHealthy(f.ClientSet)
}, prometheusTestTimeout)
})
It("should contain correct container CPU metric.", func() {
query := prometheusCPUQuery(f.Namespace.Name, "prometheus-cpu-consumer", prometheusRate)
consumer := consumeCPUResources(f, "prometheus-cpu-consumer", targetCPUUsage*1000)
defer consumer.CleanUp()
retryUntilSucceeds(func() error {
return validateQueryReturnsCorrectValues(f.ClientSet, query, targetCPUUsage, 3, prometheusMetricErrorTolerance)
}, prometheusTestTimeout)
})
It("should scrape metrics from annotated pods.", func() {
query := prometheusPodCustomMetricQuery(f.Namespace.Name, "prometheus-custom-pod-metric")
consumer := exportCustomMetricFromPod(f, "prometheus-custom-pod-metric", customMetricValue)
defer consumer.CleanUp()
retryUntilSucceeds(func() error {
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
}, prometheusTestTimeout)
})
It("should scrape metrics from annotated services.", func() {
query := prometheusServiceCustomMetricQuery(f.Namespace.Name, "prometheus-custom-service-metric")
consumer := exportCustomMetricFromService(f, "prometheus-custom-service-metric", customMetricValue)
defer consumer.CleanUp()
retryUntilSucceeds(func() error {
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
}, prometheusTestTimeout)
})
})
func prometheusCPUQuery(namespace, podNamePrefix string, rate time.Duration) string {
return fmt.Sprintf(`sum(irate(container_cpu_usage_seconds_total{namespace="%v",pod_name=~"%v.*",image!=""}[%vm]))`,
namespace, podNamePrefix, int64(rate.Minutes()))
}
func prometheusServiceCustomMetricQuery(namespace, service string) string {
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%v",kubernetes_name="%v"})`, namespace, service)
}
func prometheusPodCustomMetricQuery(namespace, podNamePrefix string) string {
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%s",kubernetes_pod_name=~"%s.*"})`, namespace, podNamePrefix)
}
func consumeCPUResources(f *framework.Framework, consumerName string, cpuUsage int) *common.ResourceConsumer {
return common.NewDynamicResourceConsumer(consumerName, f.Namespace.Name, common.KindDeployment, 1, cpuUsage,
memoryUsed, 0, int64(cpuUsage), memoryLimit, f.ClientSet, f.InternalClientset, f.ScalesGetter)
}
func exportCustomMetricFromPod(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
podAnnotations := map[string]string{
"prometheus.io/scrape": "true",
"prometheus.io/path": "/Metrics",
"prometheus.io/port": "8080",
}
return common.NewMetricExporter(consumerName, f.Namespace.Name, podAnnotations, nil, metricValue, f.ClientSet, f.InternalClientset, f.ScalesGetter)
}
func exportCustomMetricFromService(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
serviceAnnotations := map[string]string{
"prometheus.io/scrape": "true",
"prometheus.io/path": "/Metrics",
"prometheus.io/port": "8080",
}
return common.NewMetricExporter(consumerName, f.Namespace.Name, nil, serviceAnnotations, metricValue, f.ClientSet, f.InternalClientset, f.ScalesGetter)
}
func validateMetricAvailableForAllNodes(c clientset.Interface, metric string, expectedNodesNames []string) error {
instanceLabels, err := getInstanceLabelsAvailableForMetric(c, prometheusRequiredNodesUpDuration, metric)
if err != nil {
return err
}
nodesWithMetric := make(map[string]bool)
for _, instance := range instanceLabels {
nodesWithMetric[instance] = true
}
missedNodesCount := 0
for _, nodeName := range expectedNodesNames {
if _, found := nodesWithMetric[nodeName]; !found {
missedNodesCount++
}
}
if missedNodesCount > 0 {
return fmt.Errorf("Metric not found for %v out of %v nodes", missedNodesCount, len(expectedNodesNames))
}
return nil
}
func validateAllActiveTargetsAreHealthy(c clientset.Interface) error {
discovery, err := fetchPrometheusTargetDiscovery(c)
if err != nil {
return err
}
if len(discovery.ActiveTargets) == 0 {
return fmt.Errorf("Prometheus is not scraping any targets, at least one target is required")
}
for _, target := range discovery.ActiveTargets {
if target.Health != HealthGood {
return fmt.Errorf("Target health not good. Target: %v", target)
}
}
return nil
}
func validateQueryReturnsCorrectValues(c clientset.Interface, query string, expectedValue float64, minSamplesCount int, errorTolerance float64) error {
samples, err := fetchQueryValues(c, query, prometheusMetricValidationDuration)
if err != nil {
return err
}
if len(samples) < minSamplesCount {
return fmt.Errorf("Not enough samples for query '%v', got %v", query, samples)
}
framework.Logf("Executed query '%v' returned %v", query, samples)
for _, value := range samples {
error := math.Abs(value-expectedValue) / expectedValue
if error >= errorTolerance {
return fmt.Errorf("Query result values outside expected value tolerance. Expected error below %v, got %v", errorTolerance, error)
}
}
return nil
}
func fetchQueryValues(c clientset.Interface, query string, duration time.Duration) ([]float64, error) {
now := time.Now()
response, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
if err != nil {
return nil, err
}
m, ok := response.(model.Matrix)
if !ok {
return nil, fmt.Errorf("Expected matric response, got: %T", response)
}
values := make([]float64, 0)
for _, stream := range m {
for _, sample := range stream.Values {
values = append(values, float64(sample.Value))
}
}
return values, nil
}
func getInstanceLabelsAvailableForMetric(c clientset.Interface, duration time.Duration, metric string) ([]string, error) {
var instance model.LabelValue
now := time.Now()
query := fmt.Sprintf(`sum(%v)by(instance)`, metric)
result, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
if err != nil {
return nil, err
}
instanceLabels := make([]string, 0)
m, ok := result.(model.Matrix)
if !ok {
framework.Failf("Expected matrix response for query '%v', got: %T", query, result)
return instanceLabels, nil
}
for _, stream := range m {
if instance, ok = stream.Metric["instance"]; !ok {
continue
}
instanceLabels = append(instanceLabels, string(instance))
}
return instanceLabels, nil
}
func fetchPrometheusTargetDiscovery(c clientset.Interface) (TargetDiscovery, error) {
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
defer cancel()
response, err := c.CoreV1().RESTClient().Get().
Context(ctx).
Namespace("kube-system").
Resource("services").
Name(prometheusService+":9090").
SubResource("proxy").
Suffix("api", "v1", "targets").
Do().
Raw()
var qres promTargetsResponse
if err != nil {
fmt.Printf(string(response))
return qres.Data, err
}
err = json.Unmarshal(response, &qres)
return qres.Data, nil
}
type promTargetsResponse struct {
Status string `json:"status"`
Data TargetDiscovery `json:"data"`
}
type TargetDiscovery struct {
ActiveTargets []*Target `json:"activeTargets"`
DroppedTargets []*DroppedTarget `json:"droppedTargets"`
}
type Target struct {
DiscoveredLabels map[string]string `json:"discoveredLabels"`
Labels map[string]string `json:"labels"`
ScrapeURL string `json:"scrapeUrl"`
LastError string `json:"lastError"`
LastScrape time.Time `json:"lastScrape"`
Health TargetHealth `json:"health"`
}
type DroppedTarget struct {
// Labels before any processing.
DiscoveredLabels map[string]string `json:"discoveredLabels"`
}
const (
HealthUnknown TargetHealth = "unknown"
HealthGood TargetHealth = "up"
HealthBad TargetHealth = "down"
)
type TargetHealth string
func queryPrometheus(c clientset.Interface, query string, start, end time.Time, step time.Duration) (model.Value, error) {
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
defer cancel()
response, err := c.CoreV1().RESTClient().Get().
Context(ctx).
Namespace("kube-system").
Resource("services").
Name(prometheusService+":9090").
SubResource("proxy").
Suffix("api", "v1", "query_range").
Param("query", query).
Param("start", fmt.Sprintf("%v", start.Unix())).
Param("end", fmt.Sprintf("%v", end.Unix())).
Param("step", fmt.Sprintf("%vs", step.Seconds())).
Do().
Raw()
if err != nil {
fmt.Printf(string(response))
return nil, err
}
var qres promQueryResponse
err = json.Unmarshal(response, &qres)
return model.Value(qres.Data.v), err
}
type promQueryResponse struct {
Status string `json:"status"`
Data responseData `json:"data"`
}
type responseData struct {
Type model.ValueType `json:"resultType"`
Result interface{} `json:"result"`
// The decoded value.
v model.Value
}
func (qr *responseData) UnmarshalJSON(b []byte) error {
v := struct {
Type model.ValueType `json:"resultType"`
Result json.RawMessage `json:"result"`
}{}
err := json.Unmarshal(b, &v)
if err != nil {
return err
}
switch v.Type {
case model.ValScalar:
var sv model.Scalar
err = json.Unmarshal(v.Result, &sv)
qr.v = &sv
case model.ValVector:
var vv model.Vector
err = json.Unmarshal(v.Result, &vv)
qr.v = vv
case model.ValMatrix:
var mv model.Matrix
err = json.Unmarshal(v.Result, &mv)
qr.v = mv
default:
err = fmt.Errorf("unexpected value type %q", v.Type)
}
return err
}
func retryUntilSucceeds(validator func() error, timeout time.Duration) {
startTime := time.Now()
var err error
for {
err = validator()
if err == nil {
return
}
if time.Since(startTime) >= timeout {
break
}
framework.Logf(err.Error())
time.Sleep(prometheusSleepBetweenAttempts)
}
framework.Failf(err.Error())
}
func getAllNodes(c clientset.Interface) ([]string, error) {
nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return nil, err
}
result := []string{}
for _, node := range nodeList.Items {
result = append(result, node.Name)
}
return result, nil
}

View File

@@ -0,0 +1,189 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"context"
"fmt"
"math"
"os"
"time"
"golang.org/x/oauth2/google"
. "github.com/onsi/ginkgo"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
gcm "google.golang.org/api/monitoring/v3"
)
var (
// Stackdriver container metrics, as described here:
// https://cloud.google.com/monitoring/api/metrics#gcp-container
stackdriverMetrics = []string{
"uptime",
"memory/bytes_total",
"memory/bytes_used",
"cpu/reserved_cores",
"cpu/usage_time",
"memory/page_fault_count",
"disk/bytes_used",
"disk/bytes_total",
"cpu/utilization",
}
pollFrequency = time.Second * 5
pollTimeout = time.Minute * 7
rcName = "resource-consumer"
memoryUsed = 64
memoryLimit int64 = 200
tolerance = 0.25
)
var _ = instrumentation.SIGDescribe("Stackdriver Monitoring", func() {
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce", "gke")
})
f := framework.NewDefaultFramework("stackdriver-monitoring")
It("should have cluster metrics [Feature:StackdriverMonitoring]", func() {
testStackdriverMonitoring(f, 1, 100, 200)
})
})
func testStackdriverMonitoring(f *framework.Framework, pods, allPodsCPU int, perPodCPU int64) {
projectId := framework.TestContext.CloudConfig.ProjectID
ctx := context.Background()
client, err := google.DefaultClient(ctx, gcm.CloudPlatformScope)
// Hack for running tests locally
// If this is your use case, create application default credentials:
// $ gcloud auth application-default login
// and uncomment following lines (comment out the two lines above): (DON'T set the env var below)
/*
ts, err := google.DefaultTokenSource(oauth2.NoContext)
framework.Logf("Couldn't get application default credentials, %v", err)
if err != nil {
framework.Failf("Error accessing application default credentials, %v", err)
}
client := oauth2.NewClient(oauth2.NoContext, ts)
*/
gcmService, err := gcm.New(client)
// set this env var if accessing Stackdriver test endpoint (default is prod):
// $ export STACKDRIVER_API_ENDPOINT_OVERRIDE=https://test-monitoring.sandbox.googleapis.com/
basePathOverride := os.Getenv("STACKDRIVER_API_ENDPOINT_OVERRIDE")
if basePathOverride != "" {
gcmService.BasePath = basePathOverride
}
framework.ExpectNoError(err)
rc := common.NewDynamicResourceConsumer(rcName, f.Namespace.Name, common.KindDeployment, pods, allPodsCPU, memoryUsed, 0, perPodCPU, memoryLimit, f.ClientSet, f.InternalClientset, f.ScalesGetter)
defer rc.CleanUp()
rc.WaitForReplicas(pods, 15*time.Minute)
metricsMap := map[string]bool{}
pollingFunction := checkForMetrics(projectId, gcmService, time.Now(), metricsMap, allPodsCPU, perPodCPU)
err = wait.Poll(pollFrequency, pollTimeout, pollingFunction)
if err != nil {
framework.Logf("Missing metrics: %+v\n", metricsMap)
}
framework.ExpectNoError(err)
}
func checkForMetrics(projectId string, gcmService *gcm.Service, start time.Time, metricsMap map[string]bool, cpuUsed int, cpuLimit int64) func() (bool, error) {
return func() (bool, error) {
counter := 0
correctUtilization := false
for _, metric := range stackdriverMetrics {
metricsMap[metric] = false
}
for _, metric := range stackdriverMetrics {
// TODO: check only for metrics from this cluster
ts, err := fetchTimeSeries(projectId, gcmService, metric, start, time.Now())
framework.ExpectNoError(err)
if len(ts) > 0 {
counter = counter + 1
metricsMap[metric] = true
framework.Logf("Received %v timeseries for metric %v\n", len(ts), metric)
} else {
framework.Logf("No timeseries for metric %v\n", metric)
}
var sum float64 = 0
switch metric {
case "cpu/utilization":
for _, t := range ts {
max := t.Points[0]
maxEnd, _ := time.Parse(time.RFC3339, max.Interval.EndTime)
for _, p := range t.Points {
pEnd, _ := time.Parse(time.RFC3339, p.Interval.EndTime)
if pEnd.After(maxEnd) {
max = p
maxEnd, _ = time.Parse(time.RFC3339, max.Interval.EndTime)
}
}
sum = sum + *max.Value.DoubleValue
framework.Logf("Received %v points for metric %v\n",
len(t.Points), metric)
}
framework.Logf("Most recent cpu/utilization sum*cpu/limit: %v\n", sum*float64(cpuLimit))
if math.Abs(sum*float64(cpuLimit)-float64(cpuUsed)) > tolerance*float64(cpuUsed) {
return false, nil
} else {
correctUtilization = true
}
}
}
if counter < 9 || !correctUtilization {
return false, nil
}
return true, nil
}
}
func createMetricFilter(metric string, container_name string) string {
return fmt.Sprintf(`metric.type="container.googleapis.com/container/%s" AND
resource.label.container_name="%s"`, metric, container_name)
}
func fetchTimeSeries(projectId string, gcmService *gcm.Service, metric string, start time.Time, end time.Time) ([]*gcm.TimeSeries, error) {
response, err := gcmService.Projects.TimeSeries.
List(fullProjectName(projectId)).
Filter(createMetricFilter(metric, rcName)).
IntervalStartTime(start.Format(time.RFC3339)).
IntervalEndTime(end.Format(time.RFC3339)).
Do()
if err != nil {
return nil, err
}
return response.TimeSeries, nil
}
func fullProjectName(name string) string {
return fmt.Sprintf("projects/%s", name)
}

View File

@@ -0,0 +1,169 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"time"
"golang.org/x/oauth2/google"
clientset "k8s.io/client-go/kubernetes"
"context"
"encoding/json"
"fmt"
. "github.com/onsi/ginkgo"
"io/ioutil"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/test/e2e/framework"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
"reflect"
)
const (
// Time to wait after a pod creation for it's metadata to be exported
metadataWaitTime = 120 * time.Second
// Scope for Stackdriver Metadata API
MonitoringScope = "https://www.googleapis.com/auth/monitoring"
)
var _ = instrumentation.SIGDescribe("Stackdriver Monitoring", func() {
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce", "gke")
})
f := framework.NewDefaultFramework("stackdriver-monitoring")
var kubeClient clientset.Interface
It("should run Stackdriver Metadata Agent [Feature:StackdriverMetadataAgent]", func() {
kubeClient = f.ClientSet
testAgent(f, kubeClient)
})
})
func testAgent(f *framework.Framework, kubeClient clientset.Interface) {
projectId := framework.TestContext.CloudConfig.ProjectID
resourceType := "k8s_container"
uniqueContainerName := fmt.Sprintf("test-container-%v", time.Now().Unix())
endpoint := fmt.Sprintf(
"https://stackdriver.googleapis.com/v1beta2/projects/%v/resourceMetadata?filter=resource.type%%3D%v+AND+resource.label.container_name%%3D%v",
projectId,
resourceType,
uniqueContainerName)
oauthClient, err := google.DefaultClient(context.Background(), MonitoringScope)
if err != nil {
framework.Failf("Failed to create oauth client: %s", err)
}
// Create test pod with unique name.
framework.CreateExecPodOrFail(kubeClient, f.Namespace.Name, uniqueContainerName, func(pod *v1.Pod) {
pod.Spec.Containers[0].Name = uniqueContainerName
})
defer kubeClient.CoreV1().Pods(f.Namespace.Name).Delete(uniqueContainerName, &metav1.DeleteOptions{})
// Wait a short amount of time for Metadata Agent to be created and metadata to be exported
time.Sleep(metadataWaitTime)
resp, err := oauthClient.Get(endpoint)
if err != nil {
framework.Failf("Failed to call Stackdriver Metadata API %s", err)
}
if resp.StatusCode != 200 {
framework.Failf("Stackdriver Metadata API returned error status: %s", resp.Status)
}
metadataAPIResponse, err := ioutil.ReadAll(resp.Body)
if err != nil {
framework.Failf("Failed to read response from Stackdriver Metadata API: %s", err)
}
exists, err := verifyPodExists(metadataAPIResponse, uniqueContainerName)
if err != nil {
framework.Failf("Failed to process response from Stackdriver Metadata API: %s", err)
}
if !exists {
framework.Failf("Missing Metadata for container %q", uniqueContainerName)
}
}
type Metadata struct {
Results []map[string]interface{}
}
type Resource struct {
resourceType string
resourceLabels map[string]string
}
func verifyPodExists(response []byte, containerName string) (bool, error) {
var metadata Metadata
err := json.Unmarshal(response, &metadata)
if err != nil {
return false, fmt.Errorf("Failed to unmarshall: %s", err)
}
for _, result := range metadata.Results {
rawResource, ok := result["resource"]
if !ok {
return false, fmt.Errorf("No resource entry in response from Stackdriver Metadata API")
}
resource, err := parseResource(rawResource)
if err != nil {
return false, fmt.Errorf("No 'resource' label: %s", err)
}
if resource.resourceType == "k8s_container" &&
resource.resourceLabels["container_name"] == containerName {
return true, nil
}
}
return false, nil
}
func parseResource(resource interface{}) (*Resource, error) {
var labels map[string]string = map[string]string{}
resourceMap, ok := resource.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("Resource entry is of type %s, expected map[string]interface{}", reflect.TypeOf(resource))
}
resourceType, ok := resourceMap["type"]
if !ok {
return nil, fmt.Errorf("Resource entry doesn't have a type specified")
}
resourceTypeName, ok := resourceType.(string)
if !ok {
return nil, fmt.Errorf("Resource type is of type %s, expected string", reflect.TypeOf(resourceType))
}
resourceLabels, ok := resourceMap["labels"]
if !ok {
return nil, fmt.Errorf("Resource entry doesn't have any labels specified")
}
resourceLabelMap, ok := resourceLabels.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("Resource labels entry is of type %s, expected map[string]interface{}", reflect.TypeOf(resourceLabels))
}
for label, val := range resourceLabelMap {
labels[label], ok = val.(string)
if !ok {
return nil, fmt.Errorf("Resource label %q is of type %s, expected string", label, reflect.TypeOf(val))
}
}
return &Resource{
resourceType: resourceTypeName,
resourceLabels: labels,
}, nil
}