Add generated file

This PR adds generated files under pkg/client and vendor folder.
This commit is contained in:
xing-yang
2018-07-12 10:55:15 -07:00
parent 36b1de0341
commit e213d1890d
17729 changed files with 5090889 additions and 0 deletions

View File

@@ -0,0 +1,121 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
size = "large",
srcs = [
"extender_test.go",
"main_test.go",
"predicates_test.go",
"preemption_test.go",
"priorities_test.go",
"scheduler_test.go",
"taint_test.go",
"volume_binding_test.go",
],
embed = [":go_default_library"],
tags = ["integration"],
deps = [
"//cmd/kube-scheduler/app:go_default_library",
"//cmd/kube-scheduler/app/config:go_default_library",
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/componentconfig:go_default_library",
"//pkg/client/clientset_generated/internalclientset:go_default_library",
"//pkg/client/informers/informers_generated/internalversion:go_default_library",
"//pkg/controller/nodelifecycle:go_default_library",
"//pkg/controller/volume/persistentvolume:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubeapiserver/admission:go_default_library",
"//pkg/scheduler:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithmprovider:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/factory:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
"//test/integration/framework:go_default_library",
"//test/utils:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["util.go"],
importpath = "k8s.io/kubernetes/test/integration/scheduler",
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/api/v1/pod:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/controller/disruption:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler:go_default_library",
"//pkg/scheduler/algorithmprovider:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/factory:go_default_library",
"//test/integration/framework:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/admission:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)

View File

@@ -0,0 +1,4 @@
approvers:
- sig-scheduling-maintainers
reviewers:
- sig-scheduling

View File

@@ -0,0 +1,428 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests scheduler extender.
import (
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
_ "k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
imageutils "k8s.io/kubernetes/test/utils/image"
)
const (
filter = "filter"
prioritize = "prioritize"
bind = "bind"
extendedResourceName = "foo.com/bar"
)
type fitPredicate func(pod *v1.Pod, node *v1.Node) (bool, error)
type priorityFunc func(pod *v1.Pod, nodes *v1.NodeList) (*schedulerapi.HostPriorityList, error)
type priorityConfig struct {
function priorityFunc
weight int
}
type Extender struct {
name string
predicates []fitPredicate
prioritizers []priorityConfig
nodeCacheCapable bool
Client clientset.Interface
}
func (e *Extender) serveHTTP(t *testing.T, w http.ResponseWriter, req *http.Request) {
decoder := json.NewDecoder(req.Body)
defer req.Body.Close()
encoder := json.NewEncoder(w)
if strings.Contains(req.URL.Path, filter) || strings.Contains(req.URL.Path, prioritize) {
var args schedulerapi.ExtenderArgs
if err := decoder.Decode(&args); err != nil {
http.Error(w, "Decode error", http.StatusBadRequest)
return
}
if strings.Contains(req.URL.Path, filter) {
resp := &schedulerapi.ExtenderFilterResult{}
resp, err := e.Filter(&args)
if err != nil {
resp.Error = err.Error()
}
if err := encoder.Encode(resp); err != nil {
t.Fatalf("Failed to encode %v", resp)
}
} else if strings.Contains(req.URL.Path, prioritize) {
// Prioritize errors are ignored. Default k8s priorities or another extender's
// priorities may be applied.
priorities, _ := e.Prioritize(&args)
if err := encoder.Encode(priorities); err != nil {
t.Fatalf("Failed to encode %+v", priorities)
}
}
} else if strings.Contains(req.URL.Path, bind) {
var args schedulerapi.ExtenderBindingArgs
if err := decoder.Decode(&args); err != nil {
http.Error(w, "Decode error", http.StatusBadRequest)
return
}
resp := &schedulerapi.ExtenderBindingResult{}
if err := e.Bind(&args); err != nil {
resp.Error = err.Error()
}
if err := encoder.Encode(resp); err != nil {
t.Fatalf("Failed to encode %+v", resp)
}
} else {
http.Error(w, "Unknown method", http.StatusNotFound)
}
}
func (e *Extender) filterUsingNodeCache(args *schedulerapi.ExtenderArgs) (*schedulerapi.ExtenderFilterResult, error) {
nodeSlice := make([]string, 0)
failedNodesMap := schedulerapi.FailedNodesMap{}
for _, nodeName := range *args.NodeNames {
fits := true
for _, predicate := range e.predicates {
fit, err := predicate(args.Pod,
&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}})
if err != nil {
return &schedulerapi.ExtenderFilterResult{
Nodes: nil,
NodeNames: nil,
FailedNodes: schedulerapi.FailedNodesMap{},
Error: err.Error(),
}, err
}
if !fit {
fits = false
break
}
}
if fits {
nodeSlice = append(nodeSlice, nodeName)
} else {
failedNodesMap[nodeName] = fmt.Sprintf("extender failed: %s", e.name)
}
}
return &schedulerapi.ExtenderFilterResult{
Nodes: nil,
NodeNames: &nodeSlice,
FailedNodes: failedNodesMap,
}, nil
}
func (e *Extender) Filter(args *schedulerapi.ExtenderArgs) (*schedulerapi.ExtenderFilterResult, error) {
filtered := []v1.Node{}
failedNodesMap := schedulerapi.FailedNodesMap{}
if e.nodeCacheCapable {
return e.filterUsingNodeCache(args)
} else {
for _, node := range args.Nodes.Items {
fits := true
for _, predicate := range e.predicates {
fit, err := predicate(args.Pod, &node)
if err != nil {
return &schedulerapi.ExtenderFilterResult{
Nodes: &v1.NodeList{},
NodeNames: nil,
FailedNodes: schedulerapi.FailedNodesMap{},
Error: err.Error(),
}, err
}
if !fit {
fits = false
break
}
}
if fits {
filtered = append(filtered, node)
} else {
failedNodesMap[node.Name] = fmt.Sprintf("extender failed: %s", e.name)
}
}
return &schedulerapi.ExtenderFilterResult{
Nodes: &v1.NodeList{Items: filtered},
NodeNames: nil,
FailedNodes: failedNodesMap,
}, nil
}
}
func (e *Extender) Prioritize(args *schedulerapi.ExtenderArgs) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
combinedScores := map[string]int{}
var nodes = &v1.NodeList{Items: []v1.Node{}}
if e.nodeCacheCapable {
for _, nodeName := range *args.NodeNames {
nodes.Items = append(nodes.Items, v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}})
}
} else {
nodes = args.Nodes
}
for _, prioritizer := range e.prioritizers {
weight := prioritizer.weight
if weight == 0 {
continue
}
priorityFunc := prioritizer.function
prioritizedList, err := priorityFunc(args.Pod, nodes)
if err != nil {
return &schedulerapi.HostPriorityList{}, err
}
for _, hostEntry := range *prioritizedList {
combinedScores[hostEntry.Host] += hostEntry.Score * weight
}
}
for host, score := range combinedScores {
result = append(result, schedulerapi.HostPriority{Host: host, Score: score})
}
return &result, nil
}
func (e *Extender) Bind(binding *schedulerapi.ExtenderBindingArgs) error {
b := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Namespace: binding.PodNamespace, Name: binding.PodName, UID: binding.PodUID},
Target: v1.ObjectReference{
Kind: "Node",
Name: binding.Node,
},
}
return e.Client.CoreV1().Pods(b.Namespace).Bind(b)
}
func machine_1_2_3_Predicate(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine1" || node.Name == "machine2" || node.Name == "machine3" {
return true, nil
}
return false, nil
}
func machine_2_3_5_Predicate(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine2" || node.Name == "machine3" || node.Name == "machine5" {
return true, nil
}
return false, nil
}
func machine_2_Prioritizer(pod *v1.Pod, nodes *v1.NodeList) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes.Items {
score := 1
if node.Name == "machine2" {
score = 10
}
result = append(result, schedulerapi.HostPriority{
Host: node.Name,
Score: score,
})
}
return &result, nil
}
func machine_3_Prioritizer(pod *v1.Pod, nodes *v1.NodeList) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes.Items {
score := 1
if node.Name == "machine3" {
score = 10
}
result = append(result, schedulerapi.HostPriority{
Host: node.Name,
Score: score,
})
}
return &result, nil
}
func TestSchedulerExtender(t *testing.T) {
context := initTestMaster(t, "scheduler-extender", nil)
clientSet := context.clientSet
extender1 := &Extender{
name: "extender1",
predicates: []fitPredicate{machine_1_2_3_Predicate},
prioritizers: []priorityConfig{{machine_2_Prioritizer, 1}},
}
es1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
extender1.serveHTTP(t, w, req)
}))
defer es1.Close()
extender2 := &Extender{
name: "extender2",
predicates: []fitPredicate{machine_2_3_5_Predicate},
prioritizers: []priorityConfig{{machine_3_Prioritizer, 1}},
Client: clientSet,
}
es2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
extender2.serveHTTP(t, w, req)
}))
defer es2.Close()
extender3 := &Extender{
name: "extender3",
predicates: []fitPredicate{machine_1_2_3_Predicate},
prioritizers: []priorityConfig{{machine_2_Prioritizer, 5}},
nodeCacheCapable: true,
}
es3 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
extender3.serveHTTP(t, w, req)
}))
defer es3.Close()
policy := schedulerapi.Policy{
ExtenderConfigs: []schedulerapi.ExtenderConfig{
{
URLPrefix: es1.URL,
FilterVerb: filter,
PrioritizeVerb: prioritize,
Weight: 3,
EnableHTTPS: false,
},
{
URLPrefix: es2.URL,
FilterVerb: filter,
PrioritizeVerb: prioritize,
BindVerb: bind,
Weight: 4,
EnableHTTPS: false,
ManagedResources: []schedulerapi.ExtenderManagedResource{
{
Name: extendedResourceName,
IgnoredByScheduler: true,
},
},
},
{
URLPrefix: es3.URL,
FilterVerb: filter,
PrioritizeVerb: prioritize,
Weight: 10,
EnableHTTPS: false,
NodeCacheCapable: true,
},
},
}
policy.APIVersion = "v1"
context = initTestScheduler(t, context, nil, false, &policy)
defer cleanupTest(t, context)
DoTestPodScheduling(context.ns, t, clientSet)
}
func DoTestPodScheduling(ns *v1.Namespace, t *testing.T, cs clientset.Interface) {
// NOTE: This test cannot run in parallel, because it is creating and deleting
// non-namespaced objects (Nodes).
defer cs.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
goodCondition := v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: fmt.Sprintf("schedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
}
node := &v1.Node{
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{goodCondition},
},
}
for ii := 0; ii < 5; ii++ {
node.Name = fmt.Sprintf("machine%d", ii+1)
if _, err := cs.CoreV1().Nodes().Create(node); err != nil {
t.Fatalf("Failed to create nodes: %v", err)
}
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "extender-test-pod"},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: imageutils.GetPauseImageName(),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
extendedResourceName: *resource.NewQuantity(1, resource.DecimalSI),
},
},
},
},
},
}
myPod, err := cs.CoreV1().Pods(ns.Name).Create(pod)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
err = wait.Poll(time.Second, wait.ForeverTestTimeout, podScheduled(cs, myPod.Namespace, myPod.Name))
if err != nil {
t.Fatalf("Failed to schedule pod: %v", err)
}
myPod, err = cs.CoreV1().Pods(ns.Name).Get(myPod.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get pod: %v", err)
} else if myPod.Spec.NodeName != "machine2" {
t.Fatalf("Failed to schedule using extender, expected machine2, got %v", myPod.Spec.NodeName)
}
var gracePeriod int64
if err := cs.CoreV1().Pods(ns.Name).Delete(myPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod}); err != nil {
t.Fatalf("Failed to delete pod: %v", err)
}
_, err = cs.CoreV1().Pods(ns.Name).Get(myPod.Name, metav1.GetOptions{})
if err == nil {
t.Fatalf("Failed to delete pod: %v", err)
}
t.Logf("Scheduled pod using extenders")
}

View File

@@ -0,0 +1,27 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"testing"
"k8s.io/kubernetes/test/integration/framework"
)
func TestMain(m *testing.M) {
framework.EtcdMain(m.Run)
}

View File

@@ -0,0 +1,922 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
testutils "k8s.io/kubernetes/test/utils"
imageutils "k8s.io/kubernetes/test/utils/image"
)
// This file tests the scheduler predicates functionality.
const pollInterval = 100 * time.Millisecond
// TestInterPodAffinity verifies that scheduler's inter pod affinity and
// anti-affinity predicate functions works correctly.
func TestInterPodAffinity(t *testing.T) {
context := initTest(t, "inter-pod-affinity")
defer cleanupTest(t, context)
// Add a few nodes.
nodes, err := createNodes(context.clientSet, "testnode", nil, 2)
if err != nil {
t.Fatalf("Cannot create nodes: %v", err)
}
// Add labels to the nodes.
labels1 := map[string]string{
"region": "r1",
"zone": "z11",
}
for _, node := range nodes {
if err = testutils.AddLabelsToNode(context.clientSet, node.Name, labels1); err != nil {
t.Fatalf("Cannot add labels to node: %v", err)
}
if err = waitForNodeLabels(context.clientSet, node.Name, labels1); err != nil {
t.Fatalf("Adding labels to node didn't succeed: %v", err)
}
}
cs := context.clientSet
podLabel := map[string]string{"service": "securityscan"}
// podLabel2 := map[string]string{"security": "S1"}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
node *v1.Node
fits bool
errorType string
test string
}{
/*{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpDoesNotExist,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
node: nodes[0],
fits: false,
errorType: "invalidPod",
test: "validates that a pod with an invalid podAffinity is rejected because of the LabelSelectorRequirement is invalid",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
node: nodes[0],
fits: false,
test: "validates that Inter-pod-Affinity is respected if not matching",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name,
},
},
},
node: nodes[0],
fits: true,
test: "validates that InterPodAffinity is respected if matching. requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using In operator that matches the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"securityscan3", "value3"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: true,
test: "validates that InterPodAffinity is respected if matching. requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using not in operator in labelSelector that matches the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
Namespaces: []string{"diff-namespace"},
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel, Namespace: "ns"}}},
node: nodes[0],
fits: false,
test: "validates that inter-pod-affinity is respected when pods have different Namespaces",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "Doesn't satisfy the PodAffinity because of unmatching labelSelector with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
}, {
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
}, {
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"WrongValue"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: true,
test: "validates that InterPodAffinity is respected if matching with multiple affinities in multiple RequiredDuringSchedulingIgnoredDuringExecution ",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: podLabel2,
Name: "fakename",
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
}, {
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan2"},
}, {
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"WrongValue"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "The labelSelector requirements(items of matchExpressions) are ANDed, the pod cannot schedule onto the node because one of the matchExpression items doesn't match.",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: true,
test: "validates that InterPod Affinity and AntiAffinity is respected if matching",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel},
},
},
node: nodes[0],
fits: true,
test: "satisfies the PodAffinity and PodAntiAffinity and PodAntiAffinity symmetry with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "satisfies the PodAffinity but doesn't satisfies the PodAntiAffinity with the existing pod",
},*/
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{
NodeName: nodes[0].Name,
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value3"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel},
},
},
node: nodes[0],
fits: false,
test: "satisfies the PodAffinity and PodAntiAffinity but doesn't satisfies PodAntiAffinity symmetry with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "pod matches its own Label in PodAffinity and that matches the existing pod Labels",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}}},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{NodeName: nodes[0].Name,
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel},
},
},
node: nodes[0],
fits: false,
test: "Verify that PodAntiAffinity of an existing pod is respected when PodAntiAffinity symmetry is not satisfied with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-name",
Labels: podLabel,
},
Spec: v1.PodSpec{Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}}},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{NodeName: nodes[0].Name,
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fake-name2",
Labels: podLabel},
},
},
node: nodes[0],
fits: true,
test: "Verify that PodAntiAffinity from existing pod is respected when pod statisfies PodAntiAffinity symmetry with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "fake-name2"},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeSelector: map[string]string{"region": "r1"},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"abc"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{
{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: imageutils.GetPauseImageName()}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{Name: "fakename", Labels: map[string]string{"foo": "abc"}}},
},
fits: false,
test: "nodes[0] and nodes[1] have same topologyKey and label value. nodes[0] has an existing pod that matches the inter pod affinity rule. The new pod can not be scheduled onto either of the two nodes.",
},
}
for _, test := range tests {
for _, pod := range test.pods {
var nsName string
if pod.Namespace != "" {
nsName = pod.Namespace
} else {
nsName = context.ns.Name
}
createdPod, err := cs.CoreV1().Pods(nsName).Create(pod)
if err != nil {
t.Fatalf("Test Failed: error, %v, while creating pod during test: %v", err, test.test)
}
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podScheduled(cs, createdPod.Namespace, createdPod.Name))
if err != nil {
t.Errorf("Test Failed: error, %v, while waiting for pod during test, %v", err, test)
}
}
testPod, err := cs.CoreV1().Pods(context.ns.Name).Create(test.pod)
if err != nil {
if !(test.errorType == "invalidPod" && errors.IsInvalid(err)) {
t.Fatalf("Test Failed: error, %v, while creating pod during test: %v", err, test.test)
}
}
if test.fits {
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podScheduled(cs, testPod.Namespace, testPod.Name))
} else {
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podUnschedulable(cs, testPod.Namespace, testPod.Name))
}
if err != nil {
t.Errorf("Test Failed: %v, err %v, test.fits %v", test.test, err, test.fits)
}
err = cs.CoreV1().Pods(context.ns.Name).Delete(test.pod.Name, metav1.NewDeleteOptions(0))
if err != nil {
t.Errorf("Test Failed: error, %v, while deleting pod during test: %v", err, test.test)
}
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podDeleted(cs, context.ns.Name, test.pod.Name))
if err != nil {
t.Errorf("Test Failed: error, %v, while waiting for pod to get deleted, %v", err, test.test)
}
for _, pod := range test.pods {
var nsName string
if pod.Namespace != "" {
nsName = pod.Namespace
} else {
nsName = context.ns.Name
}
err = cs.CoreV1().Pods(nsName).Delete(pod.Name, metav1.NewDeleteOptions(0))
if err != nil {
t.Errorf("Test Failed: error, %v, while deleting pod during test: %v", err, test.test)
}
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podDeleted(cs, nsName, pod.Name))
if err != nil {
t.Errorf("Test Failed: error, %v, while waiting for pod to get deleted, %v", err, test.test)
}
}
}
}
// TestNodePIDPressure verifies that scheduler's CheckNodePIDPressurePredicate predicate
// functions works correctly.
func TestNodePIDPressure(t *testing.T) {
context := initTest(t, "node-pid-pressure")
defer cleanupTest(t, context)
// Add a node.
node, err := createNode(context.clientSet, "testnode", nil)
if err != nil {
t.Fatalf("Cannot create node: %v", err)
}
cs := context.clientSet
// Adds PID pressure condition to the node.
node.Status.Conditions = []v1.NodeCondition{
{
Type: v1.NodePIDPressure,
Status: v1.ConditionTrue,
},
}
// Update node condition.
err = updateNodeStatus(context.clientSet, node)
if err != nil {
t.Fatalf("Cannot update node: %v", err)
}
// Creats test pod.
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pidpressure-fake-name"},
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: "container", Image: imageutils.GetPauseImageName()},
},
},
}
testPod, err = cs.CoreV1().Pods(context.ns.Name).Create(testPod)
if err != nil {
t.Fatalf("Test Failed: error: %v, while creating pod", err)
}
err = waitForPodUnschedulable(cs, testPod)
if err != nil {
t.Errorf("Test Failed: error, %v, while waiting for scheduled", err)
}
cleanupPods(cs, t, []*v1.Pod{testPod})
}

View File

@@ -0,0 +1,880 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This file tests preemption functionality of the scheduler.
package scheduler
import (
"fmt"
"testing"
"time"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/features"
_ "k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
testutils "k8s.io/kubernetes/test/utils"
"github.com/golang/glog"
)
var lowPriority, mediumPriority, highPriority = int32(100), int32(200), int32(300)
func waitForNominatedNodeNameWithTimeout(cs clientset.Interface, pod *v1.Pod, timeout time.Duration) error {
if err := wait.Poll(100*time.Millisecond, timeout, func() (bool, error) {
pod, err := cs.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{})
if err != nil {
return false, err
}
if len(pod.Status.NominatedNodeName) > 0 {
return true, nil
}
return false, err
}); err != nil {
return fmt.Errorf("Pod %v annotation did not get set: %v", pod.Name, err)
}
return nil
}
func waitForNominatedNodeName(cs clientset.Interface, pod *v1.Pod) error {
return waitForNominatedNodeNameWithTimeout(cs, pod, wait.ForeverTestTimeout)
}
// TestPreemption tests a few preemption scenarios.
func TestPreemption(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption")
defer cleanupTest(t, context)
cs := context.clientSet
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI)},
}
tests := []struct {
description string
existingPods []*v1.Pod
pod *v1.Pod
preemptedPodIndexes map[int]struct{}
}{
{
description: "basic pod preemption",
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "victim-pod",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
preemptedPodIndexes: map[int]struct{}{0: {}},
},
{
description: "preemption is performed to satisfy anti-affinity",
existingPods: []*v1.Pod{
initPausePod(cs, &pausePodConfig{
Name: "pod-0", Namespace: context.ns.Name,
Priority: &mediumPriority,
Labels: map[string]string{"pod": "p0"},
Resources: defaultPodRes,
}),
initPausePod(cs, &pausePodConfig{
Name: "pod-1", Namespace: context.ns.Name,
Priority: &lowPriority,
Labels: map[string]string{"pod": "p1"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"preemptor"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
},
// A higher priority pod with anti-affinity.
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "preemptor"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"p0"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
preemptedPodIndexes: map[int]struct{}{0: {}, 1: {}},
},
{
// This is similar to the previous case only pod-1 is high priority.
description: "preemption is not performed when anti-affinity is not satisfied",
existingPods: []*v1.Pod{
initPausePod(cs, &pausePodConfig{
Name: "pod-0", Namespace: context.ns.Name,
Priority: &mediumPriority,
Labels: map[string]string{"pod": "p0"},
Resources: defaultPodRes,
}),
initPausePod(cs, &pausePodConfig{
Name: "pod-1", Namespace: context.ns.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "p1"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"preemptor"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
},
// A higher priority pod with anti-affinity.
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "preemptor"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"p0"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
preemptedPodIndexes: map[int]struct{}{},
},
}
// Create a node with some resources and a label.
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
node, err := createNode(context.clientSet, "node1", nodeRes)
if err != nil {
t.Fatalf("Error creating nodes: %v", err)
}
nodeLabels := map[string]string{"node": node.Name}
if err = testutils.AddLabelsToNode(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Cannot add labels to node: %v", err)
}
if err = waitForNodeLabels(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Adding labels to node didn't succeed: %v", err)
}
for _, test := range tests {
pods := make([]*v1.Pod, len(test.existingPods))
// Create and run existingPods.
for i, p := range test.existingPods {
pods[i], err = runPausePod(cs, p)
if err != nil {
t.Fatalf("Test [%v]: Error running pause pod: %v", test.description, err)
}
}
// Create the "pod".
preemptor, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating high priority pod: %v", err)
}
// Wait for preemption of pods and make sure the other ones are not preempted.
for i, p := range pods {
if _, found := test.preemptedPodIndexes[i]; found {
if err = wait.Poll(time.Second, wait.ForeverTestTimeout, podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Test [%v]: Pod %v is not getting evicted.", test.description, p.Name)
}
} else {
if p.DeletionTimestamp != nil {
t.Errorf("Test [%v]: Didn't expect pod %v to get preempted.", test.description, p.Name)
}
}
}
// Also check that the preemptor pod gets the annotation for nominated node name.
if len(test.preemptedPodIndexes) > 0 {
if err := waitForNominatedNodeName(cs, preemptor); err != nil {
t.Errorf("Test [%v]: NominatedNodeName annotation was not set for pod %v: %v", test.description, preemptor.Name, err)
}
}
// Cleanup
pods = append(pods, preemptor)
cleanupPods(cs, t, pods)
}
}
// TestDisablePreemption tests disable pod preemption of scheduler works as expected.
func TestDisablePreemption(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler, and disable preemption.
context := initTestDisablePreemption(t, "disable-preemption")
defer cleanupTest(t, context)
cs := context.clientSet
tests := []struct {
description string
existingPods []*v1.Pod
pod *v1.Pod
}{
{
description: "pod preemption will not happen",
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "victim-pod",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
},
}
// Create a node with some resources and a label.
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
_, err := createNode(context.clientSet, "node1", nodeRes)
if err != nil {
t.Fatalf("Error creating nodes: %v", err)
}
for _, test := range tests {
pods := make([]*v1.Pod, len(test.existingPods))
// Create and run existingPods.
for i, p := range test.existingPods {
pods[i], err = runPausePod(cs, p)
if err != nil {
t.Fatalf("Test [%v]: Error running pause pod: %v", test.description, err)
}
}
// Create the "pod".
preemptor, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating high priority pod: %v", err)
}
// Ensure preemptor should keep unschedulable.
if err := waitForPodUnschedulable(cs, preemptor); err != nil {
t.Errorf("Test [%v]: Preemptor %v should not become scheduled",
test.description, preemptor.Name)
}
// Ensure preemptor should not be nominated.
if err := waitForNominatedNodeNameWithTimeout(cs, preemptor, 5*time.Second); err == nil {
t.Errorf("Test [%v]: Preemptor %v should not be nominated",
test.description, preemptor.Name)
}
// Cleanup
pods = append(pods, preemptor)
cleanupPods(cs, t, pods)
}
}
func mkPriorityPodWithGrace(tc *TestContext, name string, priority int32, grace int64) *v1.Pod {
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI)},
}
pod := initPausePod(tc.clientSet, &pausePodConfig{
Name: name,
Namespace: tc.ns.Name,
Priority: &priority,
Labels: map[string]string{"pod": name},
Resources: defaultPodRes,
})
// Setting grace period to zero. Otherwise, we may never see the actual deletion
// of the pods in integration tests.
pod.Spec.TerminationGracePeriodSeconds = &grace
return pod
}
// This test ensures that while the preempting pod is waiting for the victims to
// terminate, other pending lower priority pods are not scheduled in the room created
// after preemption and while the higher priority pods is not scheduled yet.
func TestPreemptionStarvation(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption")
defer cleanupTest(t, context)
cs := context.clientSet
tests := []struct {
description string
numExistingPod int
numExpectedPending int
preemptor *v1.Pod
}{
{
// This test ensures that while the preempting pod is waiting for the victims
// terminate, other lower priority pods are not scheduled in the room created
// after preemption and while the higher priority pods is not scheduled yet.
description: "starvation test: higher priority pod is scheduled before the lower priority ones",
numExistingPod: 10,
numExpectedPending: 5,
preemptor: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
},
}
// Create a node with some resources and a label.
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
_, err := createNode(context.clientSet, "node1", nodeRes)
if err != nil {
t.Fatalf("Error creating nodes: %v", err)
}
for _, test := range tests {
pendingPods := make([]*v1.Pod, test.numExpectedPending)
numRunningPods := test.numExistingPod - test.numExpectedPending
runningPods := make([]*v1.Pod, numRunningPods)
// Create and run existingPods.
for i := 0; i < numRunningPods; i++ {
runningPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(context, fmt.Sprintf("rpod-%v", i), mediumPriority, 0))
if err != nil {
t.Fatalf("Test [%v]: Error creating pause pod: %v", test.description, err)
}
}
// make sure that runningPods are all scheduled.
for _, p := range runningPods {
if err := waitForPodToSchedule(cs, p); err != nil {
t.Fatalf("Pod %v didn't get scheduled: %v", p.Name, err)
}
}
// Create pending pods.
for i := 0; i < test.numExpectedPending; i++ {
pendingPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(context, fmt.Sprintf("ppod-%v", i), mediumPriority, 0))
if err != nil {
t.Fatalf("Test [%v]: Error creating pending pod: %v", test.description, err)
}
}
// Make sure that all pending pods are being marked unschedulable.
for _, p := range pendingPods {
if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout,
podUnschedulable(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Pod %v didn't get marked unschedulable: %v", p.Name, err)
}
}
// Create the preemptor.
preemptor, err := createPausePod(cs, test.preemptor)
if err != nil {
t.Errorf("Error while creating the preempting pod: %v", err)
}
// Check that the preemptor pod gets the annotation for nominated node name.
if err := waitForNominatedNodeName(cs, preemptor); err != nil {
t.Errorf("Test [%v]: NominatedNodeName annotation was not set for pod %v: %v", test.description, preemptor.Name, err)
}
// Make sure that preemptor is scheduled after preemptions.
if err := waitForPodToScheduleWithTimeout(cs, preemptor, 60*time.Second); err != nil {
t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err)
}
// Cleanup
glog.Info("Cleaning up all pods...")
allPods := pendingPods
allPods = append(allPods, runningPods...)
allPods = append(allPods, preemptor)
cleanupPods(cs, t, allPods)
}
}
// TestNominatedNodeCleanUp checks that when there are nominated pods on a
// node and a higher priority pod is nominated to run on the node, the nominated
// node name of the lower priority pods is cleared.
// Test scenario:
// 1. Create a few low priority pods with long grade period that fill up a node.
// 2. Create a medium priority pod that preempt some of those pods.
// 3. Check that nominated node name of the medium priority pod is set.
// 4. Create a high priority pod that preempts some pods on that node.
// 5. Check that nominated node name of the high priority pod is set and nominated
// node name of the medium priority pod is cleared.
func TestNominatedNodeCleanUp(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption")
defer cleanupTest(t, context)
cs := context.clientSet
defer cleanupPodsInNamespace(cs, t, context.ns.Name)
// Create a node with some resources and a label.
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
_, err := createNode(context.clientSet, "node1", nodeRes)
if err != nil {
t.Fatalf("Error creating nodes: %v", err)
}
// Step 1. Create a few low priority pods.
lowPriPods := make([]*v1.Pod, 4)
for i := 0; i < len(lowPriPods); i++ {
lowPriPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(context, fmt.Sprintf("lpod-%v", i), lowPriority, 60))
if err != nil {
t.Fatalf("Error creating pause pod: %v", err)
}
}
// make sure that the pods are all scheduled.
for _, p := range lowPriPods {
if err := waitForPodToSchedule(cs, p); err != nil {
t.Fatalf("Pod %v didn't get scheduled: %v", p.Name, err)
}
}
// Step 2. Create a medium priority pod.
podConf := initPausePod(cs, &pausePodConfig{
Name: "medium-priority",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(400, resource.BinarySI)},
},
})
medPriPod, err := createPausePod(cs, podConf)
if err != nil {
t.Errorf("Error while creating the medium priority pod: %v", err)
}
// Step 3. Check that nominated node name of the medium priority pod is set.
if err := waitForNominatedNodeName(cs, medPriPod); err != nil {
t.Errorf("NominatedNodeName annotation was not set for pod %v: %v", medPriPod.Name, err)
}
// Step 4. Create a high priority pod.
podConf = initPausePod(cs, &pausePodConfig{
Name: "high-priority",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
})
highPriPod, err := createPausePod(cs, podConf)
if err != nil {
t.Errorf("Error while creating the high priority pod: %v", err)
}
// Step 5. Check that nominated node name of the high priority pod is set.
if err := waitForNominatedNodeName(cs, highPriPod); err != nil {
t.Errorf("NominatedNodeName annotation was not set for pod %v: %v", medPriPod.Name, err)
}
// And the nominated node name of the medium priority pod is cleared.
if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) {
pod, err := cs.CoreV1().Pods(medPriPod.Namespace).Get(medPriPod.Name, metav1.GetOptions{})
if err != nil {
t.Errorf("Error getting the medium priority pod info: %v", err)
}
if len(pod.Status.NominatedNodeName) == 0 {
return true, nil
}
return false, err
}); err != nil {
t.Errorf("The nominated node name of the medium priority pod was not cleared: %v", err)
}
}
func mkMinAvailablePDB(name, namespace string, uid types.UID, minAvailable int, matchLabels map[string]string) *policy.PodDisruptionBudget {
intMinAvailable := intstr.FromInt(minAvailable)
return &policy.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
Spec: policy.PodDisruptionBudgetSpec{
MinAvailable: &intMinAvailable,
Selector: &metav1.LabelSelector{MatchLabels: matchLabels},
},
}
}
func addPodConditionReady(pod *v1.Pod) {
pod.Status = v1.PodStatus{
Phase: v1.PodRunning,
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
Status: v1.ConditionTrue,
},
},
}
}
// TestPDBInPreemption tests PodDisruptionBudget support in preemption.
func TestPDBInPreemption(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption-pdb")
defer cleanupTest(t, context)
cs := context.clientSet
initDisruptionController(context)
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI)},
}
defaultNodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
type nodeConfig struct {
name string
res *v1.ResourceList
}
tests := []struct {
description string
nodes []*nodeConfig
pdbs []*policy.PodDisruptionBudget
pdbPodNum []int32
existingPods []*v1.Pod
pod *v1.Pod
preemptedPodIndexes map[int]struct{}
}{
{
description: "A non-PDB violating pod is preempted despite its higher priority",
nodes: []*nodeConfig{{name: "node-1", res: defaultNodeRes}},
pdbs: []*policy.PodDisruptionBudget{
mkMinAvailablePDB("pdb-1", context.ns.Name, types.UID("pdb-1-uid"), 2, map[string]string{"foo": "bar"}),
},
pdbPodNum: []int32{2},
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod1",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
Labels: map[string]string{"foo": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod2",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
Labels: map[string]string{"foo": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod3",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: defaultPodRes,
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
preemptedPodIndexes: map[int]struct{}{2: {}},
},
{
description: "A node without any PDB violating pods is preferred for preemption",
nodes: []*nodeConfig{
{name: "node-1", res: defaultNodeRes},
{name: "node-2", res: defaultNodeRes},
},
pdbs: []*policy.PodDisruptionBudget{
mkMinAvailablePDB("pdb-1", context.ns.Name, types.UID("pdb-1-uid"), 2, map[string]string{"foo": "bar"}),
},
pdbPodNum: []int32{1},
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod1",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-1",
Labels: map[string]string{"foo": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod2",
Namespace: context.ns.Name,
Priority: &mediumPriority,
NodeName: "node-2",
Resources: defaultPodRes,
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
preemptedPodIndexes: map[int]struct{}{1: {}},
},
{
description: "A node with fewer PDB violating pods is preferred for preemption",
nodes: []*nodeConfig{
{name: "node-1", res: defaultNodeRes},
{name: "node-2", res: defaultNodeRes},
{name: "node-3", res: defaultNodeRes},
},
pdbs: []*policy.PodDisruptionBudget{
mkMinAvailablePDB("pdb-1", context.ns.Name, types.UID("pdb-1-uid"), 2, map[string]string{"foo1": "bar"}),
mkMinAvailablePDB("pdb-2", context.ns.Name, types.UID("pdb-2-uid"), 2, map[string]string{"foo2": "bar"}),
},
pdbPodNum: []int32{1, 5},
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod1",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-1",
Labels: map[string]string{"foo1": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod1",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: defaultPodRes,
NodeName: "node-1",
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod2",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-2",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod2",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: defaultPodRes,
NodeName: "node-2",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod4",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-3",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod5",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-3",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod6",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-3",
Labels: map[string]string{"foo2": "bar"},
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(400, resource.BinarySI)},
},
}),
// The third node is chosen because PDB is not violated for node 3 and the victims have lower priority than node-2.
preemptedPodIndexes: map[int]struct{}{4: {}, 5: {}, 6: {}},
},
}
for _, test := range tests {
t.Logf("================ Running test: %v\n", test.description)
for _, nodeConf := range test.nodes {
_, err := createNode(cs, nodeConf.name, nodeConf.res)
if err != nil {
t.Fatalf("Error creating node %v: %v", nodeConf.name, err)
}
}
pods := make([]*v1.Pod, len(test.existingPods))
var err error
// Create and run existingPods.
for i, p := range test.existingPods {
if pods[i], err = runPausePod(cs, p); err != nil {
t.Fatalf("Test [%v]: Error running pause pod: %v", test.description, err)
}
// Add pod condition ready so that PDB is updated.
addPodConditionReady(p)
if _, err := context.clientSet.CoreV1().Pods(context.ns.Name).UpdateStatus(p); err != nil {
t.Fatal(err)
}
}
// Wait for Pods to be stable in scheduler cache.
if err := waitCachedPodsStable(context, test.existingPods); err != nil {
t.Fatalf("Not all pods are stable in the cache: %v", err)
}
// Create PDBs.
for _, pdb := range test.pdbs {
_, err := context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Create(pdb)
if err != nil {
t.Fatalf("Failed to create PDB: %v", err)
}
}
// Wait for PDBs to show up in the scheduler's cache and become stable.
if err := waitCachedPDBsStable(context, test.pdbs, test.pdbPodNum); err != nil {
t.Fatalf("Not all pdbs are stable in the cache: %v", err)
}
// Create the "pod".
preemptor, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating high priority pod: %v", err)
}
// Wait for preemption of pods and make sure the other ones are not preempted.
for i, p := range pods {
if _, found := test.preemptedPodIndexes[i]; found {
if err = wait.Poll(time.Second, wait.ForeverTestTimeout, podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Test [%v]: Pod %v is not getting evicted.", test.description, p.Name)
}
} else {
if p.DeletionTimestamp != nil {
t.Errorf("Test [%v]: Didn't expect pod %v to get preempted.", test.description, p.Name)
}
}
}
// Also check that the preemptor pod gets the annotation for nominated node name.
if len(test.preemptedPodIndexes) > 0 {
if err := waitForNominatedNodeName(cs, preemptor); err != nil {
t.Errorf("Test [%v]: NominatedNodeName annotation was not set for pod %v: %v", test.description, preemptor.Name, err)
}
}
// Cleanup
pods = append(pods, preemptor)
cleanupPods(cs, t, pods)
cs.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).DeleteCollection(nil, metav1.ListOptions{})
cs.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
}
}

View File

@@ -0,0 +1,174 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
testutils "k8s.io/kubernetes/test/utils"
)
// This file tests the scheduler priority functions.
// TestNodeAffinity verifies that scheduler's node affinity priority function
// works correctly.
func TestNodeAffinity(t *testing.T) {
context := initTest(t, "node-affinity")
defer cleanupTest(t, context)
// Add a few nodes.
nodes, err := createNodes(context.clientSet, "testnode", nil, 5)
if err != nil {
t.Fatalf("Cannot create nodes: %v", err)
}
// Add a label to one of the nodes.
labeledNode := nodes[1]
labelKey := "kubernetes.io/node-topologyKey"
labelValue := "topologyvalue"
labels := map[string]string{
labelKey: labelValue,
}
if err = testutils.AddLabelsToNode(context.clientSet, labeledNode.Name, labels); err != nil {
t.Fatalf("Cannot add labels to node: %v", err)
}
if err = waitForNodeLabels(context.clientSet, labeledNode.Name, labels); err != nil {
t.Fatalf("Adding labels to node didn't succeed: %v", err)
}
// Create a pod with node affinity.
podName := "pod-with-node-affinity"
pod, err := runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
Name: podName,
Namespace: context.ns.Name,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{
{
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: labelKey,
Operator: v1.NodeSelectorOpIn,
Values: []string{labelValue},
},
},
},
Weight: 20,
},
},
},
},
}))
if err != nil {
t.Fatalf("Error running pause pod: %v", err)
}
if pod.Spec.NodeName != labeledNode.Name {
t.Errorf("Pod %v got scheduled on an unexpected node: %v. Expected node: %v.", podName, pod.Spec.NodeName, labeledNode.Name)
} else {
t.Logf("Pod %v got successfully scheduled on node %v.", podName, pod.Spec.NodeName)
}
}
// TestPodAffinity verifies that scheduler's pod affinity priority function
// works correctly.
func TestPodAffinity(t *testing.T) {
context := initTest(t, "pod-affinity")
defer cleanupTest(t, context)
// Add a few nodes.
nodesInTopology, err := createNodes(context.clientSet, "in-topology", nil, 5)
if err != nil {
t.Fatalf("Cannot create nodes: %v", err)
}
topologyKey := "node-topologykey"
topologyValue := "topologyvalue"
nodeLabels := map[string]string{
topologyKey: topologyValue,
}
for _, node := range nodesInTopology {
// Add topology key to all the nodes.
if err = testutils.AddLabelsToNode(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Cannot add labels to node %v: %v", node.Name, err)
}
if err = waitForNodeLabels(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Adding labels to node %v didn't succeed: %v", node.Name, err)
}
}
// Add a pod with a label and wait for it to schedule.
labelKey := "service"
labelValue := "S1"
_, err = runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
Name: "attractor-pod",
Namespace: context.ns.Name,
Labels: map[string]string{labelKey: labelValue},
}))
if err != nil {
t.Fatalf("Error running the attractor pod: %v", err)
}
// Add a few more nodes without the topology label.
_, err = createNodes(context.clientSet, "other-node", nil, 5)
if err != nil {
t.Fatalf("Cannot create the second set of nodes: %v", err)
}
// Add a new pod with affinity to the attractor pod.
podName := "pod-with-podaffinity"
pod, err := runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
Name: podName,
Namespace: context.ns.Name,
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: labelKey,
Operator: metav1.LabelSelectorOpIn,
Values: []string{labelValue, "S3"},
},
{
Key: labelKey,
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"S2"},
}, {
Key: labelKey,
Operator: metav1.LabelSelectorOpExists,
},
},
},
TopologyKey: topologyKey,
Namespaces: []string{context.ns.Name},
},
Weight: 50,
},
},
},
},
}))
if err != nil {
t.Fatalf("Error running pause pod: %v", err)
}
// The new pod must be scheduled on one of the nodes with the same topology
// key-value as the attractor pod.
for _, node := range nodesInTopology {
if node.Name == pod.Spec.NodeName {
t.Logf("Pod %v got successfully scheduled on node %v.", podName, pod.Spec.NodeName)
return
}
}
t.Errorf("Pod %v got scheduled on an unexpected node: %v.", podName, pod.Spec.NodeName)
}

View File

@@ -0,0 +1,848 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests the scheduler.
import (
"fmt"
"reflect"
"testing"
"time"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
clientv1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
schedulerapp "k8s.io/kubernetes/cmd/kube-scheduler/app"
schedulerappconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/apis/componentconfig"
"k8s.io/kubernetes/pkg/scheduler"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
_ "k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/factory"
"k8s.io/kubernetes/test/integration/framework"
)
type nodeMutationFunc func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface)
type nodeStateManager struct {
makeSchedulable nodeMutationFunc
makeUnSchedulable nodeMutationFunc
}
func PredicateOne(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PredicateTwo(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PriorityOne(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
func PriorityTwo(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
// TestSchedulerCreationFromConfigMap verifies that scheduler can be created
// from configurations provided by a ConfigMap object and then verifies that the
// configuration is applied correctly.
func TestSchedulerCreationFromConfigMap(t *testing.T) {
_, s, closeFn := framework.RunAMaster(nil)
defer closeFn()
ns := framework.CreateTestingNamespace("configmap", s, t)
defer framework.DeleteTestingNamespace(ns, s, t)
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
defer clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
informerFactory := informers.NewSharedInformerFactory(clientSet, 0)
// Pre-register some predicate and priority functions
factory.RegisterFitPredicate("PredicateOne", PredicateOne)
factory.RegisterFitPredicate("PredicateTwo", PredicateTwo)
factory.RegisterPriorityFunction("PriorityOne", PriorityOne, 1)
factory.RegisterPriorityFunction("PriorityTwo", PriorityTwo, 1)
for i, test := range []struct {
policy string
expectedPredicates sets.String
expectedPrioritizers sets.String
}{
{
policy: `{
"kind" : "Policy",
"apiVersion" : "v1",
"predicates" : [
{"name" : "PredicateOne"},
{"name" : "PredicateTwo"}
],
"priorities" : [
{"name" : "PriorityOne", "weight" : 1},
{"name" : "PriorityTwo", "weight" : 5}
]
}`,
expectedPredicates: sets.NewString(
"CheckNodeCondition", // mandatory predicate
"PredicateOne",
"PredicateTwo",
),
expectedPrioritizers: sets.NewString(
"PriorityOne",
"PriorityTwo",
),
},
{
policy: `{
"kind" : "Policy",
"apiVersion" : "v1"
}`,
expectedPredicates: sets.NewString(
"CheckNodeCondition", // mandatory predicate
"CheckNodeDiskPressure",
"CheckNodeMemoryPressure",
"CheckNodePIDPressure",
"CheckVolumeBinding",
"GeneralPredicates",
"MatchInterPodAffinity",
"MaxAzureDiskVolumeCount",
"MaxEBSVolumeCount",
"MaxGCEPDVolumeCount",
"NoDiskConflict",
"NoVolumeZoneConflict",
"PodToleratesNodeTaints",
),
expectedPrioritizers: sets.NewString(
"BalancedResourceAllocation",
"InterPodAffinityPriority",
"LeastRequestedPriority",
"NodeAffinityPriority",
"NodePreferAvoidPodsPriority",
"SelectorSpreadPriority",
"TaintTolerationPriority",
),
},
{
policy: `{
"kind" : "Policy",
"apiVersion" : "v1",
"predicates" : [],
"priorities" : []
}`,
expectedPredicates: sets.NewString(
"CheckNodeCondition", // mandatory predicate
),
expectedPrioritizers: sets.NewString(),
},
} {
// Add a ConfigMap object.
configPolicyName := fmt.Sprintf("scheduler-custom-policy-config-%d", i)
policyConfigMap := v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceSystem, Name: configPolicyName},
Data: map[string]string{componentconfig.SchedulerPolicyConfigMapKey: test.policy},
}
policyConfigMap.APIVersion = "v1"
clientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(&policyConfigMap)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientSet.CoreV1().Events("")})
ss := &schedulerappconfig.Config{
ComponentConfig: componentconfig.KubeSchedulerConfiguration{
HardPodAffinitySymmetricWeight: v1.DefaultHardPodAffinitySymmetricWeight,
SchedulerName: v1.DefaultSchedulerName,
AlgorithmSource: componentconfig.SchedulerAlgorithmSource{
Policy: &componentconfig.SchedulerPolicySource{
ConfigMap: &componentconfig.SchedulerPolicyConfigMapSource{
Namespace: policyConfigMap.Namespace,
Name: policyConfigMap.Name,
},
},
},
},
Client: clientSet,
InformerFactory: informerFactory,
PodInformer: factory.NewPodInformer(clientSet, 0),
EventClient: clientSet.CoreV1(),
Recorder: eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: v1.DefaultSchedulerName}),
Broadcaster: eventBroadcaster,
}
config, err := schedulerapp.NewSchedulerConfig(ss.Complete())
if err != nil {
t.Fatalf("couldn't make scheduler config: %v", err)
}
// Verify that the config is applied correctly.
schedPredicates := sets.NewString()
for k := range config.Algorithm.Predicates() {
schedPredicates.Insert(k)
}
schedPrioritizers := sets.NewString()
for _, p := range config.Algorithm.Prioritizers() {
schedPrioritizers.Insert(p.Name)
}
if !schedPredicates.Equal(test.expectedPredicates) {
t.Errorf("Expected predicates %v, got %v", test.expectedPredicates, schedPredicates)
}
if !schedPrioritizers.Equal(test.expectedPrioritizers) {
t.Errorf("Expected priority functions %v, got %v", test.expectedPrioritizers, schedPrioritizers)
}
}
}
// TestSchedulerCreationFromNonExistentConfigMap ensures that creation of the
// scheduler from a non-existent ConfigMap fails.
func TestSchedulerCreationFromNonExistentConfigMap(t *testing.T) {
_, s, closeFn := framework.RunAMaster(nil)
defer closeFn()
ns := framework.CreateTestingNamespace("configmap", s, t)
defer framework.DeleteTestingNamespace(ns, s, t)
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
defer clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
informerFactory := informers.NewSharedInformerFactory(clientSet, 0)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientSet.CoreV1().Events("")})
ss := &schedulerappconfig.Config{
ComponentConfig: componentconfig.KubeSchedulerConfiguration{
SchedulerName: v1.DefaultSchedulerName,
AlgorithmSource: componentconfig.SchedulerAlgorithmSource{
Policy: &componentconfig.SchedulerPolicySource{
ConfigMap: &componentconfig.SchedulerPolicyConfigMapSource{
Namespace: "non-existent-config",
Name: "non-existent-config",
},
},
},
HardPodAffinitySymmetricWeight: v1.DefaultHardPodAffinitySymmetricWeight,
},
Client: clientSet,
InformerFactory: informerFactory,
PodInformer: factory.NewPodInformer(clientSet, 0),
EventClient: clientSet.CoreV1(),
Recorder: eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: v1.DefaultSchedulerName}),
Broadcaster: eventBroadcaster,
}
_, err := schedulerapp.NewSchedulerConfig(ss.Complete())
if err == nil {
t.Fatalf("Creation of scheduler didn't fail while the policy ConfigMap didn't exist.")
}
}
func TestUnschedulableNodes(t *testing.T) {
context := initTest(t, "unschedulable-nodes")
defer cleanupTest(t, context)
nodeLister := context.schedulerConfigFactory.GetNodeLister()
// NOTE: This test cannot run in parallel, because it is creating and deleting
// non-namespaced objects (Nodes).
defer context.clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
goodCondition := v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: fmt.Sprintf("schedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
}
badCondition := v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
Reason: fmt.Sprintf("unschedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
}
// Create a new schedulable node, since we're first going to apply
// the unschedulable condition and verify that pods aren't scheduled.
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-scheduling-test-node"},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{goodCondition},
},
}
nodeKey, err := cache.MetaNamespaceKeyFunc(node)
if err != nil {
t.Fatalf("Couldn't retrieve key for node %v", node.Name)
}
// The test does the following for each nodeStateManager in this list:
// 1. Create a new node
// 2. Apply the makeUnSchedulable function
// 3. Create a new pod
// 4. Check that the pod doesn't get assigned to the node
// 5. Apply the schedulable function
// 6. Check that the pod *does* get assigned to the node
// 7. Delete the pod and node.
nodeModifications := []nodeStateManager{
// Test node.Spec.Unschedulable=true/false
{
makeUnSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Spec.Unschedulable = true
if _, err := c.CoreV1().Nodes().Update(n); err != nil {
t.Fatalf("Failed to update node with unschedulable=true: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
// An unschedulable node should still be present in the store
// Nodes that are unschedulable or that are not ready or
// have their disk full (Node.Spec.Conditions) are excluded
// based on NodeConditionPredicate, a separate check
return node != nil && node.(*v1.Node).Spec.Unschedulable == true
})
if err != nil {
t.Fatalf("Failed to observe reflected update for setting unschedulable=true: %v", err)
}
},
makeSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Spec.Unschedulable = false
if _, err := c.CoreV1().Nodes().Update(n); err != nil {
t.Fatalf("Failed to update node with unschedulable=false: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
return node != nil && node.(*v1.Node).Spec.Unschedulable == false
})
if err != nil {
t.Fatalf("Failed to observe reflected update for setting unschedulable=false: %v", err)
}
},
},
// Test node.Status.Conditions=ConditionTrue/Unknown
{
makeUnSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Status = v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{badCondition},
}
if _, err = c.CoreV1().Nodes().UpdateStatus(n); err != nil {
t.Fatalf("Failed to update node with bad status condition: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
return node != nil && node.(*v1.Node).Status.Conditions[0].Status == v1.ConditionUnknown
})
if err != nil {
t.Fatalf("Failed to observe reflected update for status condition update: %v", err)
}
},
makeSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Status = v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{goodCondition},
}
if _, err = c.CoreV1().Nodes().UpdateStatus(n); err != nil {
t.Fatalf("Failed to update node with healthy status condition: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
return node != nil && node.(*v1.Node).Status.Conditions[0].Status == v1.ConditionTrue
})
if err != nil {
t.Fatalf("Failed to observe reflected update for status condition update: %v", err)
}
},
},
}
for i, mod := range nodeModifications {
unSchedNode, err := context.clientSet.CoreV1().Nodes().Create(node)
if err != nil {
t.Fatalf("Failed to create node: %v", err)
}
// Apply the unschedulable modification to the node, and wait for the reflection
mod.makeUnSchedulable(t, unSchedNode, nodeLister, context.clientSet)
// Create the new pod, note that this needs to happen post unschedulable
// modification or we have a race in the test.
myPod, err := createPausePodWithResource(context.clientSet, "node-scheduling-test-pod", context.ns.Name, nil)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
// There are no schedulable nodes - the pod shouldn't be scheduled.
err = waitForPodToScheduleWithTimeout(context.clientSet, myPod, 2*time.Second)
if err == nil {
t.Errorf("Pod scheduled successfully on unschedulable nodes")
}
if err != wait.ErrWaitTimeout {
t.Errorf("Test %d: failed while trying to confirm the pod does not get scheduled on the node: %v", i, err)
} else {
t.Logf("Test %d: Pod did not get scheduled on an unschedulable node", i)
}
// Apply the schedulable modification to the node, and wait for the reflection
schedNode, err := context.clientSet.CoreV1().Nodes().Get(unSchedNode.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get node: %v", err)
}
mod.makeSchedulable(t, schedNode, nodeLister, context.clientSet)
// Wait until the pod is scheduled.
if err := waitForPodToSchedule(context.clientSet, myPod); err != nil {
t.Errorf("Test %d: failed to schedule a pod: %v", i, err)
} else {
t.Logf("Test %d: Pod got scheduled on a schedulable node", i)
}
// Clean up.
if err := deletePod(context.clientSet, myPod.Name, myPod.Namespace); err != nil {
t.Errorf("Failed to delete pod: %v", err)
}
err = context.clientSet.CoreV1().Nodes().Delete(schedNode.Name, nil)
if err != nil {
t.Errorf("Failed to delete node: %v", err)
}
}
}
func TestMultiScheduler(t *testing.T) {
/*
This integration tests the multi-scheduler feature in the following way:
1. create a default scheduler
2. create a node
3. create 3 pods: testPodNoAnnotation, testPodWithAnnotationFitsDefault and testPodWithAnnotationFitsFoo
- note: the first two should be picked and scheduled by default scheduler while the last one should be
picked by scheduler of name "foo-scheduler" which does not exist yet.
4. **check point-1**:
- testPodNoAnnotation, testPodWithAnnotationFitsDefault should be scheduled
- testPodWithAnnotationFitsFoo should NOT be scheduled
5. create a scheduler with name "foo-scheduler"
6. **check point-2**:
- testPodWithAnnotationFitsFoo should be scheduled
7. stop default scheduler
8. create 2 pods: testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2
- note: these two pods belong to default scheduler which no longer exists
9. **check point-3**:
- testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2 should NOT be scheduled
*/
// 1. create and start default-scheduler
context := initTest(t, "multi-scheduler")
defer cleanupTest(t, context)
// 2. create a node
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-multi-scheduler-test-node"},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
},
}
context.clientSet.CoreV1().Nodes().Create(node)
// 3. create 3 pods for testing
t.Logf("create 3 pods for testing")
testPod, err := createPausePodWithResource(context.clientSet, "pod-without-scheduler-name", context.ns.Name, nil)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
defaultScheduler := "default-scheduler"
testPodFitsDefault, err := createPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-default", Namespace: context.ns.Name, SchedulerName: defaultScheduler}))
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
fooScheduler := "foo-scheduler"
testPodFitsFoo, err := createPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-foo", Namespace: context.ns.Name, SchedulerName: fooScheduler}))
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
// 4. **check point-1**:
// - testPod, testPodFitsDefault should be scheduled
// - testPodFitsFoo should NOT be scheduled
t.Logf("wait for pods scheduled")
if err := waitForPodToSchedule(context.clientSet, testPod); err != nil {
t.Errorf("Test MultiScheduler: %s Pod not scheduled: %v", testPod.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPod.Name)
}
if err := waitForPodToSchedule(context.clientSet, testPodFitsDefault); err != nil {
t.Errorf("Test MultiScheduler: %s Pod not scheduled: %v", testPodFitsDefault.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPodFitsDefault.Name)
}
if err := waitForPodToScheduleWithTimeout(context.clientSet, testPodFitsFoo, time.Second*5); err == nil {
t.Errorf("Test MultiScheduler: %s Pod got scheduled, %v", testPodFitsFoo.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod not scheduled", testPodFitsFoo.Name)
}
// 5. create and start a scheduler with name "foo-scheduler"
clientSet2 := clientset.NewForConfigOrDie(&restclient.Config{Host: context.httpServer.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
informerFactory2 := informers.NewSharedInformerFactory(context.clientSet, 0)
podInformer2 := factory.NewPodInformer(context.clientSet, 0)
schedulerConfigFactory2 := createConfiguratorWithPodInformer(fooScheduler, clientSet2, podInformer2, informerFactory2)
schedulerConfig2, err := schedulerConfigFactory2.Create()
if err != nil {
t.Errorf("Couldn't create scheduler config: %v", err)
}
eventBroadcaster2 := record.NewBroadcaster()
schedulerConfig2.Recorder = eventBroadcaster2.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: fooScheduler})
eventBroadcaster2.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientSet2.CoreV1().Events("")})
go podInformer2.Informer().Run(schedulerConfig2.StopEverything)
informerFactory2.Start(schedulerConfig2.StopEverything)
sched2, _ := scheduler.NewFromConfigurator(&scheduler.FakeConfigurator{Config: schedulerConfig2}, nil...)
sched2.Run()
defer close(schedulerConfig2.StopEverything)
// 6. **check point-2**:
// - testPodWithAnnotationFitsFoo should be scheduled
err = waitForPodToSchedule(context.clientSet, testPodFitsFoo)
if err != nil {
t.Errorf("Test MultiScheduler: %s Pod not scheduled, %v", testPodFitsFoo.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPodFitsFoo.Name)
}
// 7. delete the pods that were scheduled by the default scheduler, and stop the default scheduler
if err := deletePod(context.clientSet, testPod.Name, context.ns.Name); err != nil {
t.Errorf("Failed to delete pod: %v", err)
}
if err := deletePod(context.clientSet, testPodFitsDefault.Name, context.ns.Name); err != nil {
t.Errorf("Failed to delete pod: %v", err)
}
// The rest of this test assumes that closing StopEverything will cause the
// scheduler thread to stop immediately. It won't, and in fact it will often
// schedule 1 more pod before finally exiting. Comment out until we fix that.
//
// See https://github.com/kubernetes/kubernetes/issues/23715 for more details.
/*
close(schedulerConfig.StopEverything)
// 8. create 2 pods: testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2
// - note: these two pods belong to default scheduler which no longer exists
podWithNoAnnotation2 := createPod("pod-with-no-annotation2", nil)
podWithAnnotationFitsDefault2 := createPod("pod-with-annotation-fits-default2", schedulerAnnotationFitsDefault)
testPodNoAnnotation2, err := clientSet.CoreV1().Pods(ns.Name).Create(podWithNoAnnotation2)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
testPodWithAnnotationFitsDefault2, err := clientSet.CoreV1().Pods(ns.Name).Create(podWithAnnotationFitsDefault2)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
// 9. **check point-3**:
// - testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2 should NOT be scheduled
err = wait.Poll(time.Second, time.Second*5, podScheduled(clientSet, testPodNoAnnotation2.Namespace, testPodNoAnnotation2.Name))
if err == nil {
t.Errorf("Test MultiScheduler: %s Pod got scheduled, %v", testPodNoAnnotation2.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod not scheduled", testPodNoAnnotation2.Name)
}
err = wait.Poll(time.Second, time.Second*5, podScheduled(clientSet, testPodWithAnnotationFitsDefault2.Namespace, testPodWithAnnotationFitsDefault2.Name))
if err == nil {
t.Errorf("Test MultiScheduler: %s Pod got scheduled, %v", testPodWithAnnotationFitsDefault2.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPodWithAnnotationFitsDefault2.Name)
}
*/
}
// This test will verify scheduler can work well regardless of whether kubelet is allocatable aware or not.
func TestAllocatable(t *testing.T) {
context := initTest(t, "allocatable")
defer cleanupTest(t, context)
// 2. create a node without allocatable awareness
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(30, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
}
allocNode, err := createNode(context.clientSet, "node-allocatable-scheduler-test-node", nodeRes)
if err != nil {
t.Fatalf("Failed to create node: %v", err)
}
// 3. create resource pod which requires less than Capacity
podName := "pod-test-allocatable"
podRes := &v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(20, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(20, resource.BinarySI),
}
testAllocPod, err := createPausePodWithResource(context.clientSet, podName, context.ns.Name, podRes)
if err != nil {
t.Fatalf("Test allocatable unawareness failed to create pod: %v", err)
}
// 4. Test: this test pod should be scheduled since api-server will use Capacity as Allocatable
err = waitForPodToScheduleWithTimeout(context.clientSet, testAllocPod, time.Second*5)
if err != nil {
t.Errorf("Test allocatable unawareness: %s Pod not scheduled: %v", testAllocPod.Name, err)
} else {
t.Logf("Test allocatable unawareness: %s Pod scheduled", testAllocPod.Name)
}
// 5. Change the node status to allocatable aware, note that Allocatable is less than Pod's requirement
allocNode.Status = v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(30, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(10, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10, resource.BinarySI),
},
}
if _, err := context.clientSet.CoreV1().Nodes().UpdateStatus(allocNode); err != nil {
t.Fatalf("Failed to update node with Status.Allocatable: %v", err)
}
if err := deletePod(context.clientSet, testAllocPod.Name, context.ns.Name); err != nil {
t.Fatalf("Failed to remove the first pod: %v", err)
}
// 6. Make another pod with different name, same resource request
podName2 := "pod-test-allocatable2"
testAllocPod2, err := createPausePodWithResource(context.clientSet, podName2, context.ns.Name, podRes)
if err != nil {
t.Fatalf("Test allocatable awareness failed to create pod: %v", err)
}
// 7. Test: this test pod should not be scheduled since it request more than Allocatable
if err := waitForPodToScheduleWithTimeout(context.clientSet, testAllocPod2, time.Second*5); err == nil {
t.Errorf("Test allocatable awareness: %s Pod got scheduled unexpectedly, %v", testAllocPod2.Name, err)
} else {
t.Logf("Test allocatable awareness: %s Pod not scheduled as expected", testAllocPod2.Name)
}
}
// TestPDBCache verifies that scheduler cache works as expected when handling
// PodDisruptionBudget.
func TestPDBCache(t *testing.T) {
context := initTest(t, "pdbcache")
defer cleanupTest(t, context)
intstrMin := intstr.FromInt(4)
pdb := &policy.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Namespace: context.ns.Name,
Name: "test-pdb",
UID: types.UID("test-pdb-uid"),
Labels: map[string]string{"tkey1": "tval1", "tkey2": "tval2"},
},
Spec: policy.PodDisruptionBudgetSpec{
MinAvailable: &intstrMin,
Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"tkey": "tvalue"}},
},
}
createdPDB, err := context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Create(pdb)
if err != nil {
t.Errorf("Failed to create PDB: %v", err)
}
// Wait for PDB to show up in the scheduler's cache.
if err = wait.Poll(time.Second, 15*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
t.Errorf("Error while polling for PDB: %v", err)
return false, err
}
return len(cachedPDBs) > 0, err
}); err != nil {
t.Fatalf("No PDB was added to the cache: %v", err)
}
// Read PDB from the cache and compare it.
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if len(cachedPDBs) != 1 {
t.Fatalf("Expected to have 1 pdb in cache, but found %d.", len(cachedPDBs))
}
if !reflect.DeepEqual(createdPDB, cachedPDBs[0]) {
t.Errorf("Got different PDB than expected.\nDifference detected on:\n%s", diff.ObjectReflectDiff(createdPDB, cachedPDBs[0]))
}
// Update PDB and change its labels.
pdbCopy := *cachedPDBs[0]
pdbCopy.Labels = map[string]string{}
updatedPDB, err := context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Update(&pdbCopy)
if err != nil {
t.Errorf("Failed to update PDB: %v", err)
}
// Wait for PDB to be updated in the scheduler's cache.
if err = wait.Poll(time.Second, 15*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
t.Errorf("Error while polling for PDB: %v", err)
return false, err
}
return len(cachedPDBs[0].Labels) == 0, err
}); err != nil {
t.Fatalf("No PDB was updated in the cache: %v", err)
}
// Read PDB from the cache and compare it.
cachedPDBs, err = context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if len(cachedPDBs) != 1 {
t.Errorf("Expected to have 1 pdb in cache, but found %d.", len(cachedPDBs))
}
if !reflect.DeepEqual(updatedPDB, cachedPDBs[0]) {
t.Errorf("Got different PDB than expected.\nDifference detected on:\n%s", diff.ObjectReflectDiff(updatedPDB, cachedPDBs[0]))
}
// Delete PDB.
err = context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Delete(pdb.Name, &metav1.DeleteOptions{})
if err != nil {
t.Errorf("Failed to delete PDB: %v", err)
}
// Wait for PDB to be deleted from the scheduler's cache.
if err = wait.Poll(time.Second, 15*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
t.Errorf("Error while polling for PDB: %v", err)
return false, err
}
return len(cachedPDBs) == 0, err
}); err != nil {
t.Errorf("No PDB was deleted from the cache: %v", err)
}
}
// TestSchedulerInformers tests that scheduler receives informer events and updates its cache when
// pods are scheduled by other schedulers.
func TestSchedulerInformers(t *testing.T) {
// Initialize scheduler.
context := initTest(t, "scheduler-informer")
defer cleanupTest(t, context)
cs := context.clientSet
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
}
defaultNodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
type nodeConfig struct {
name string
res *v1.ResourceList
}
tests := []struct {
description string
nodes []*nodeConfig
existingPods []*v1.Pod
pod *v1.Pod
preemptedPodIndexes map[int]struct{}
}{
{
description: "Pod cannot be scheduled when node is occupied by pods scheduled by other schedulers",
nodes: []*nodeConfig{{name: "node-1", res: defaultNodeRes}},
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "pod1",
Namespace: context.ns.Name,
Resources: defaultPodRes,
Labels: map[string]string{"foo": "bar"},
NodeName: "node-1",
SchedulerName: "foo-scheduler",
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "pod2",
Namespace: context.ns.Name,
Resources: defaultPodRes,
Labels: map[string]string{"foo": "bar"},
NodeName: "node-1",
SchedulerName: "bar-scheduler",
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "unschedulable-pod",
Namespace: context.ns.Name,
Resources: defaultPodRes,
}),
preemptedPodIndexes: map[int]struct{}{2: {}},
},
}
for _, test := range tests {
for _, nodeConf := range test.nodes {
_, err := createNode(cs, nodeConf.name, nodeConf.res)
if err != nil {
t.Fatalf("Error creating node %v: %v", nodeConf.name, err)
}
}
pods := make([]*v1.Pod, len(test.existingPods))
var err error
// Create and run existingPods.
for i, p := range test.existingPods {
if pods[i], err = runPausePod(cs, p); err != nil {
t.Fatalf("Test [%v]: Error running pause pod: %v", test.description, err)
}
}
// Create the new "pod".
unschedulable, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating new pod: %v", err)
}
if err := waitForPodUnschedulable(cs, unschedulable); err != nil {
t.Errorf("Pod %v got scheduled: %v", unschedulable.Name, err)
}
// Cleanup
pods = append(pods, unschedulable)
cleanupPods(cs, t, pods)
cs.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).DeleteCollection(nil, metav1.ListOptions{})
cs.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
}
}

View File

@@ -0,0 +1,313 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests the Taint feature.
import (
"reflect"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
utilfeature "k8s.io/apiserver/pkg/util/feature"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
internalinformers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion"
"k8s.io/kubernetes/pkg/controller/nodelifecycle"
kubeadmission "k8s.io/kubernetes/pkg/kubeapiserver/admission"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
)
// TestTaintNodeByCondition verifies:
// 1. MemoryPressure Toleration is added to non-BestEffort Pod by PodTolerationRestriction
// 2. NodeController taints nodes by node condition
// 3. Scheduler allows pod to tolerate node condition taints, e.g. network unavailable
func TestTaintNodeByCondition(t *testing.T) {
enabled := utilfeature.DefaultFeatureGate.Enabled("TaintNodesByCondition")
defer func() {
if !enabled {
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
}
}()
// Enable TaintNodeByCondition
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
// Build PodToleration Admission.
admission := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
context := initTestMaster(t, "default", admission)
// Build clientset and informers for controllers.
internalClientset := internalclientset.NewForConfigOrDie(&restclient.Config{
QPS: -1,
Host: context.httpServer.URL,
ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
internalInformers := internalinformers.NewSharedInformerFactory(internalClientset, time.Second)
kubeadmission.WantsInternalKubeClientSet(admission).SetInternalKubeClientSet(internalClientset)
kubeadmission.WantsInternalKubeInformerFactory(admission).SetInternalKubeInformerFactory(internalInformers)
controllerCh := make(chan struct{})
defer close(controllerCh)
// Apply feature gates to enable TaintNodesByCondition
algorithmprovider.ApplyFeatureGates()
context = initTestScheduler(t, context, controllerCh, false, nil)
clientset := context.clientSet
informers := context.informerFactory
nsName := context.ns.Name
// Start NodeLifecycleController for taint.
nc, err := nodelifecycle.NewNodeLifecycleController(
informers.Core().V1().Pods(),
informers.Core().V1().Nodes(),
informers.Extensions().V1beta1().DaemonSets(),
nil, // CloudProvider
clientset,
time.Second, // Node monitor grace period
time.Second, // Node startup grace period
time.Second, // Node monitor period
time.Second, // Pod eviction timeout
100, // Eviction limiter QPS
100, // Secondary eviction limiter QPS
100, // Large cluster threshold
100, // Unhealthy zone threshold
true, // Run taint manager
true, // Use taint based evictions
true, // Enabled TaintNodeByCondition feature
)
if err != nil {
t.Errorf("Failed to create node controller: %v", err)
return
}
go nc.Run(controllerCh)
// Waiting for all controller sync.
internalInformers.Start(controllerCh)
internalInformers.WaitForCacheSync(controllerCh)
// -------------------------------------------
// Test TaintNodeByCondition feature.
// -------------------------------------------
memoryPressureToleration := v1.Toleration{
Key: algorithm.TaintNodeMemoryPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
}
// Case 1: Add MememoryPressure Toleration for non-BestEffort pod.
burstablePod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "burstable-pod",
Namespace: nsName,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "busybox",
Image: "busybox",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
},
},
},
},
},
}
burstablePodInServ, err := clientset.CoreV1().Pods(nsName).Create(burstablePod)
if err != nil {
t.Errorf("Case 1: Failed to create pod: %v", err)
} else if !reflect.DeepEqual(burstablePodInServ.Spec.Tolerations, []v1.Toleration{memoryPressureToleration}) {
t.Errorf("Case 1: Unexpected toleration of non-BestEffort pod, expected: %+v, got: %v",
[]v1.Toleration{memoryPressureToleration},
burstablePodInServ.Spec.Tolerations)
}
// Case 2: No MemoryPressure Toleration for BestEffort pod.
besteffortPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "best-effort-pod",
Namespace: nsName,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "busybox",
Image: "busybox",
},
},
},
}
besteffortPodInServ, err := clientset.CoreV1().Pods(nsName).Create(besteffortPod)
if err != nil {
t.Errorf("Case 2: Failed to create pod: %v", err)
} else if len(besteffortPodInServ.Spec.Tolerations) != 0 {
t.Errorf("Case 2: Unexpected toleration # of BestEffort pod, expected: 0, got: %v",
len(besteffortPodInServ.Spec.Tolerations))
}
// Case 3: Taint Node by NetworkUnavailable condition.
networkUnavailableNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("110"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("110"),
},
Conditions: []v1.NodeCondition{
{
Type: v1.NodeNetworkUnavailable,
Status: v1.ConditionTrue,
},
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
}
nodeInformerCh := make(chan bool)
nodeInformer := informers.Core().V1().Nodes().Informer()
nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: func(old, cur interface{}) {
curNode := cur.(*v1.Node)
if curNode.Name != "node-1" {
return
}
for _, taint := range curNode.Spec.Taints {
if taint.Key == algorithm.TaintNodeNetworkUnavailable &&
taint.Effect == v1.TaintEffectNoSchedule {
nodeInformerCh <- true
break
}
}
},
})
if _, err := clientset.CoreV1().Nodes().Create(networkUnavailableNode); err != nil {
t.Errorf("Case 3: Failed to create node: %v", err)
} else {
select {
case <-time.After(60 * time.Second):
t.Errorf("Case 3: Failed to taint node after 60s.")
case <-nodeInformerCh:
}
}
// Case 4: Schedule Pod with NetworkUnavailable toleration.
networkDaemonPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "network-daemon-pod",
Namespace: nsName,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "busybox",
Image: "busybox",
},
},
Tolerations: []v1.Toleration{
{
Key: algorithm.TaintNodeNetworkUnavailable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
},
},
},
}
if _, err := clientset.CoreV1().Pods(nsName).Create(networkDaemonPod); err != nil {
t.Errorf("Case 4: Failed to create pod for network daemon: %v", err)
} else {
if err := waitForPodToScheduleWithTimeout(clientset, networkDaemonPod, time.Second*60); err != nil {
t.Errorf("Case 4: Failed to schedule network daemon pod in 60s.")
}
}
// Case 5: Taint node by unschedulable condition
unschedulableNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-2",
},
Spec: v1.NodeSpec{
Unschedulable: true,
},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("110"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("110"),
},
},
}
nodeInformerCh2 := make(chan bool)
nodeInformer2 := informers.Core().V1().Nodes().Informer()
nodeInformer2.AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: func(old, cur interface{}) {
curNode := cur.(*v1.Node)
if curNode.Name != "node-2" {
return
}
for _, taint := range curNode.Spec.Taints {
if taint.Key == algorithm.TaintNodeUnschedulable &&
taint.Effect == v1.TaintEffectNoSchedule {
nodeInformerCh2 <- true
break
}
}
},
})
if _, err := clientset.CoreV1().Nodes().Create(unschedulableNode); err != nil {
t.Errorf("Case 5: Failed to create node: %v", err)
} else {
select {
case <-time.After(60 * time.Second):
t.Errorf("Case 5: Failed to taint node after 60s.")
case <-nodeInformerCh2:
}
}
}

View File

@@ -0,0 +1,640 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/admission"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/client-go/informers"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
clientv1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/legacyscheme"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/disruption"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler"
_ "k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/factory"
"k8s.io/kubernetes/test/integration/framework"
imageutils "k8s.io/kubernetes/test/utils/image"
)
type TestContext struct {
closeFn framework.CloseFunc
httpServer *httptest.Server
ns *v1.Namespace
clientSet *clientset.Clientset
informerFactory informers.SharedInformerFactory
schedulerConfigFactory scheduler.Configurator
schedulerConfig *scheduler.Config
scheduler *scheduler.Scheduler
}
// createConfiguratorWithPodInformer creates a configurator for scheduler.
func createConfiguratorWithPodInformer(
schedulerName string,
clientSet clientset.Interface,
podInformer coreinformers.PodInformer,
informerFactory informers.SharedInformerFactory,
) scheduler.Configurator {
return factory.NewConfigFactory(
schedulerName,
clientSet,
informerFactory.Core().V1().Nodes(),
podInformer,
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
informerFactory.Extensions().V1beta1().ReplicaSets(),
informerFactory.Apps().V1beta1().StatefulSets(),
informerFactory.Core().V1().Services(),
informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
informerFactory.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
utilfeature.DefaultFeatureGate.Enabled(features.EnableEquivalenceClassCache),
false,
)
}
// initTestMasterAndScheduler initializes a test environment and creates a master with default
// configuration.
func initTestMaster(t *testing.T, nsPrefix string, admission admission.Interface) *TestContext {
var context TestContext
// 1. Create master
h := &framework.MasterHolder{Initialized: make(chan struct{})}
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
<-h.Initialized
h.M.GenericAPIServer.Handler.ServeHTTP(w, req)
}))
masterConfig := framework.NewIntegrationTestMasterConfig()
if admission != nil {
masterConfig.GenericConfig.AdmissionControl = admission
}
_, context.httpServer, context.closeFn = framework.RunAMasterUsingServer(masterConfig, s, h)
if nsPrefix != "default" {
context.ns = framework.CreateTestingNamespace(nsPrefix+string(uuid.NewUUID()), s, t)
} else {
context.ns = framework.CreateTestingNamespace("default", s, t)
}
// 2. Create kubeclient
context.clientSet = clientset.NewForConfigOrDie(
&restclient.Config{
QPS: -1, Host: s.URL,
ContentConfig: restclient.ContentConfig{
GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"},
},
},
)
return &context
}
// initTestScheduler initializes a test environment and creates a scheduler with default
// configuration.
func initTestScheduler(
t *testing.T,
context *TestContext,
controllerCh chan struct{},
setPodInformer bool,
policy *schedulerapi.Policy,
) *TestContext {
// Pod preemption is enabled by default scheduler configuration, but preemption only happens when PodPriority
// feature gate is enabled at the same time.
return initTestSchedulerWithOptions(t, context, controllerCh, setPodInformer, policy, false)
}
// initTestSchedulerWithOptions initializes a test environment and creates a scheduler with default
// configuration and other options.
func initTestSchedulerWithOptions(
t *testing.T,
context *TestContext,
controllerCh chan struct{},
setPodInformer bool,
policy *schedulerapi.Policy,
disablePreemption bool,
) *TestContext {
// Enable EnableEquivalenceClassCache for all integration tests.
defer utilfeaturetesting.SetFeatureGateDuringTest(
t,
utilfeature.DefaultFeatureGate,
features.EnableEquivalenceClassCache, true)()
// 1. Create scheduler
context.informerFactory = informers.NewSharedInformerFactory(context.clientSet, time.Second)
var podInformer coreinformers.PodInformer
// create independent pod informer if required
if setPodInformer {
podInformer = factory.NewPodInformer(context.clientSet, 12*time.Hour)
} else {
podInformer = context.informerFactory.Core().V1().Pods()
}
context.schedulerConfigFactory = createConfiguratorWithPodInformer(
v1.DefaultSchedulerName, context.clientSet, podInformer, context.informerFactory)
var err error
if policy != nil {
context.schedulerConfig, err = context.schedulerConfigFactory.CreateFromConfig(*policy)
} else {
context.schedulerConfig, err = context.schedulerConfigFactory.Create()
}
if err != nil {
t.Fatalf("Couldn't create scheduler config: %v", err)
}
// set controllerCh if provided.
if controllerCh != nil {
context.schedulerConfig.StopEverything = controllerCh
}
// set DisablePreemption option
context.schedulerConfig.DisablePreemption = disablePreemption
// set setPodInformer if provided.
if setPodInformer {
go podInformer.Informer().Run(context.schedulerConfig.StopEverything)
controller.WaitForCacheSync("scheduler", context.schedulerConfig.StopEverything, podInformer.Informer().HasSynced)
}
eventBroadcaster := record.NewBroadcaster()
context.schedulerConfig.Recorder = eventBroadcaster.NewRecorder(
legacyscheme.Scheme,
v1.EventSource{Component: v1.DefaultSchedulerName},
)
eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{
Interface: context.clientSet.CoreV1().Events(""),
})
context.informerFactory.Start(context.schedulerConfig.StopEverything)
context.informerFactory.WaitForCacheSync(context.schedulerConfig.StopEverything)
context.scheduler, err = scheduler.NewFromConfigurator(&scheduler.FakeConfigurator{
Config: context.schedulerConfig},
nil...)
if err != nil {
t.Fatalf("Couldn't create scheduler: %v", err)
}
context.scheduler.Run()
return context
}
// initDisruptionController initializes and runs a Disruption Controller to properly
// update PodDisuptionBudget objects.
func initDisruptionController(context *TestContext) *disruption.DisruptionController {
informers := informers.NewSharedInformerFactory(context.clientSet, 12*time.Hour)
dc := disruption.NewDisruptionController(
informers.Core().V1().Pods(),
informers.Policy().V1beta1().PodDisruptionBudgets(),
informers.Core().V1().ReplicationControllers(),
informers.Extensions().V1beta1().ReplicaSets(),
informers.Extensions().V1beta1().Deployments(),
informers.Apps().V1beta1().StatefulSets(),
context.clientSet)
informers.Start(context.schedulerConfig.StopEverything)
informers.WaitForCacheSync(context.schedulerConfig.StopEverything)
go dc.Run(context.schedulerConfig.StopEverything)
return dc
}
// initTest initializes a test environment and creates master and scheduler with default
// configuration.
func initTest(t *testing.T, nsPrefix string) *TestContext {
return initTestScheduler(t, initTestMaster(t, nsPrefix, nil), nil, true, nil)
}
// initTestDisablePreemption initializes a test environment and creates master and scheduler with default
// configuration but with pod preemption disabled.
func initTestDisablePreemption(t *testing.T, nsPrefix string) *TestContext {
return initTestSchedulerWithOptions(
t, initTestMaster(t, nsPrefix, nil), nil, true, nil, true)
}
// cleanupTest deletes the scheduler and the test namespace. It should be called
// at the end of a test.
func cleanupTest(t *testing.T, context *TestContext) {
// Kill the scheduler.
close(context.schedulerConfig.StopEverything)
// Cleanup nodes.
context.clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
framework.DeleteTestingNamespace(context.ns, context.httpServer, t)
context.closeFn()
}
// waitForReflection waits till the passFunc confirms that the object it expects
// to see is in the store. Used to observe reflected events.
func waitForReflection(t *testing.T, nodeLister corelisters.NodeLister, key string,
passFunc func(n interface{}) bool) error {
nodes := []*v1.Node{}
err := wait.Poll(time.Millisecond*100, wait.ForeverTestTimeout, func() (bool, error) {
n, err := nodeLister.Get(key)
switch {
case err == nil && passFunc(n):
return true, nil
case errors.IsNotFound(err):
nodes = append(nodes, nil)
case err != nil:
t.Errorf("Unexpected error: %v", err)
default:
nodes = append(nodes, n)
}
return false, nil
})
if err != nil {
t.Logf("Logging consecutive node versions received from store:")
for i, n := range nodes {
t.Logf("%d: %#v", i, n)
}
}
return err
}
// nodeHasLabels returns a function that checks if a node has all the given labels.
func nodeHasLabels(cs clientset.Interface, nodeName string, labels map[string]string) wait.ConditionFunc {
return func() (bool, error) {
node, err := cs.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
if err != nil {
// This could be a connection error so we want to retry.
return false, nil
}
for k, v := range labels {
if node.Labels == nil || node.Labels[k] != v {
return false, nil
}
}
return true, nil
}
}
// waitForNodeLabels waits for the given node to have all the given labels.
func waitForNodeLabels(cs clientset.Interface, nodeName string, labels map[string]string) error {
return wait.Poll(time.Millisecond*100, wait.ForeverTestTimeout, nodeHasLabels(cs, nodeName, labels))
}
// createNode creates a node with the given resource list and
// returns a pointer and error status. If 'res' is nil, a predefined amount of
// resource will be used.
func createNode(cs clientset.Interface, name string, res *v1.ResourceList) (*v1.Node, error) {
// if resource is nil, we use a default amount of resources for the node.
if res == nil {
res = &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
}
}
n := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: name},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: *res,
},
}
return cs.CoreV1().Nodes().Create(n)
}
// updateNodeStatus updates the status of node.
func updateNodeStatus(cs clientset.Interface, node *v1.Node) error {
_, err := cs.CoreV1().Nodes().UpdateStatus(node)
return err
}
// createNodes creates `numNodes` nodes. The created node names will be in the
// form of "`prefix`-X" where X is an ordinal.
func createNodes(cs clientset.Interface, prefix string, res *v1.ResourceList, numNodes int) ([]*v1.Node, error) {
nodes := make([]*v1.Node, numNodes)
for i := 0; i < numNodes; i++ {
nodeName := fmt.Sprintf("%v-%d", prefix, i)
node, err := createNode(cs, nodeName, res)
if err != nil {
return nodes[:], err
}
nodes[i] = node
}
return nodes[:], nil
}
type pausePodConfig struct {
Name string
Namespace string
Affinity *v1.Affinity
Annotations, Labels, NodeSelector map[string]string
Resources *v1.ResourceRequirements
Tolerations []v1.Toleration
NodeName string
SchedulerName string
Priority *int32
}
// initPausePod initializes a pod API object from the given config. It is used
// mainly in pod creation process.
func initPausePod(cs clientset.Interface, conf *pausePodConfig) *v1.Pod {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: conf.Name,
Namespace: conf.Namespace,
Labels: conf.Labels,
Annotations: conf.Annotations,
},
Spec: v1.PodSpec{
NodeSelector: conf.NodeSelector,
Affinity: conf.Affinity,
Containers: []v1.Container{
{
Name: conf.Name,
Image: imageutils.GetPauseImageName(),
},
},
Tolerations: conf.Tolerations,
NodeName: conf.NodeName,
SchedulerName: conf.SchedulerName,
Priority: conf.Priority,
},
}
if conf.Resources != nil {
pod.Spec.Containers[0].Resources = *conf.Resources
}
return pod
}
// createPausePod creates a pod with "Pause" image and the given config and
// return its pointer and error status.
func createPausePod(cs clientset.Interface, p *v1.Pod) (*v1.Pod, error) {
return cs.CoreV1().Pods(p.Namespace).Create(p)
}
// createPausePodWithResource creates a pod with "Pause" image and the given
// resources and returns its pointer and error status. The resource list can be
// nil.
func createPausePodWithResource(cs clientset.Interface, podName string,
nsName string, res *v1.ResourceList) (*v1.Pod, error) {
var conf pausePodConfig
if res == nil {
conf = pausePodConfig{
Name: podName,
Namespace: nsName,
}
} else {
conf = pausePodConfig{
Name: podName,
Namespace: nsName,
Resources: &v1.ResourceRequirements{
Requests: *res,
},
}
}
return createPausePod(cs, initPausePod(cs, &conf))
}
// runPausePod creates a pod with "Pause" image and the given config and waits
// until it is scheduled. It returns its pointer and error status.
func runPausePod(cs clientset.Interface, pod *v1.Pod) (*v1.Pod, error) {
pod, err := cs.CoreV1().Pods(pod.Namespace).Create(pod)
if err != nil {
return nil, fmt.Errorf("Error creating pause pod: %v", err)
}
if err = waitForPodToSchedule(cs, pod); err != nil {
return pod, fmt.Errorf("Pod %v didn't schedule successfully. Error: %v", pod.Name, err)
}
if pod, err = cs.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}); err != nil {
return pod, fmt.Errorf("Error getting pod %v info: %v", pod.Name, err)
}
return pod, nil
}
// podDeleted returns true if a pod is not found in the given namespace.
func podDeleted(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return true, nil
}
if pod.DeletionTimestamp != nil {
return true, nil
}
return false, nil
}
}
// podIsGettingEvicted returns true if the pod's deletion timestamp is set.
func podIsGettingEvicted(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if err != nil {
return false, err
}
if pod.DeletionTimestamp != nil {
return true, nil
}
return false, nil
}
}
// podScheduled returns true if a node is assigned to the given pod.
func podScheduled(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
if err != nil {
// This could be a connection error so we want to retry.
return false, nil
}
if pod.Spec.NodeName == "" {
return false, nil
}
return true, nil
}
}
// podUnschedulable returns a condition function that returns true if the given pod
// gets unschedulable status.
func podUnschedulable(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
if err != nil {
// This could be a connection error so we want to retry.
return false, nil
}
_, cond := podutil.GetPodCondition(&pod.Status, v1.PodScheduled)
return cond != nil && cond.Status == v1.ConditionFalse &&
cond.Reason == v1.PodReasonUnschedulable, nil
}
}
// waitForPodToScheduleWithTimeout waits for a pod to get scheduled and returns
// an error if it does not scheduled within the given timeout.
func waitForPodToScheduleWithTimeout(cs clientset.Interface, pod *v1.Pod, timeout time.Duration) error {
return wait.Poll(100*time.Millisecond, timeout, podScheduled(cs, pod.Namespace, pod.Name))
}
// waitForPodToSchedule waits for a pod to get scheduled and returns an error if
// it does not get scheduled within the timeout duration (30 seconds).
func waitForPodToSchedule(cs clientset.Interface, pod *v1.Pod) error {
return waitForPodToScheduleWithTimeout(cs, pod, 30*time.Second)
}
// waitForPodUnscheduleWithTimeout waits for a pod to fail scheduling and returns
// an error if it does not become unschedulable within the given timeout.
func waitForPodUnschedulableWithTimeout(cs clientset.Interface, pod *v1.Pod, timeout time.Duration) error {
return wait.Poll(100*time.Millisecond, timeout, podUnschedulable(cs, pod.Namespace, pod.Name))
}
// waitForPodUnschedule waits for a pod to fail scheduling and returns
// an error if it does not become unschedulable within the timeout duration (30 seconds).
func waitForPodUnschedulable(cs clientset.Interface, pod *v1.Pod) error {
return waitForPodUnschedulableWithTimeout(cs, pod, 30*time.Second)
}
// waitCachedPDBsStable waits for PDBs in scheduler cache to have "CurrentHealthy" status equal to
// the expected values.
func waitCachedPDBsStable(context *TestContext, pdbs []*policy.PodDisruptionBudget, pdbPodNum []int32) error {
return wait.Poll(time.Second, 60*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
return false, err
}
if len(cachedPDBs) != len(pdbs) {
return false, nil
}
for i, pdb := range pdbs {
found := false
for _, cpdb := range cachedPDBs {
if pdb.Name == cpdb.Name && pdb.Namespace == cpdb.Namespace {
found = true
if cpdb.Status.CurrentHealthy != pdbPodNum[i] {
return false, nil
}
}
}
if !found {
return false, nil
}
}
return true, nil
})
}
// waitCachedPodsStable waits until scheduler cache has the given pods.
func waitCachedPodsStable(context *TestContext, pods []*v1.Pod) error {
return wait.Poll(time.Second, 30*time.Second, func() (bool, error) {
cachedPods, err := context.scheduler.Config().SchedulerCache.List(labels.Everything())
if err != nil {
return false, err
}
if len(pods) != len(cachedPods) {
return false, nil
}
for _, p := range pods {
actualPod, err1 := context.clientSet.CoreV1().Pods(p.Namespace).Get(p.Name, metav1.GetOptions{})
if err1 != nil {
return false, err1
}
cachedPod, err2 := context.scheduler.Config().SchedulerCache.GetPod(actualPod)
if err2 != nil || cachedPod == nil {
return false, err2
}
}
return true, nil
})
}
// deletePod deletes the given pod in the given namespace.
func deletePod(cs clientset.Interface, podName string, nsName string) error {
return cs.CoreV1().Pods(nsName).Delete(podName, metav1.NewDeleteOptions(0))
}
// cleanupPods deletes the given pods and waits for them to be actually deleted.
func cleanupPods(cs clientset.Interface, t *testing.T, pods []*v1.Pod) {
for _, p := range pods {
err := cs.CoreV1().Pods(p.Namespace).Delete(p.Name, metav1.NewDeleteOptions(0))
if err != nil && !errors.IsNotFound(err) {
t.Errorf("error while deleting pod %v/%v: %v", p.Namespace, p.Name, err)
}
}
for _, p := range pods {
if err := wait.Poll(time.Second, wait.ForeverTestTimeout,
podDeleted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("error while waiting for pod %v/%v to get deleted: %v", p.Namespace, p.Name, err)
}
}
}
// noPodsInNamespace returns true if no pods in the given namespace.
func noPodsInNamespace(c clientset.Interface, podNamespace string) wait.ConditionFunc {
return func() (bool, error) {
pods, err := c.CoreV1().Pods(podNamespace).List(metav1.ListOptions{})
if err != nil {
return false, err
}
return len(pods.Items) == 0, nil
}
}
// cleanupPodsInNamespace deletes the pods in the given namespace and waits for them to
// be actually deleted.
func cleanupPodsInNamespace(cs clientset.Interface, t *testing.T, ns string) {
if err := cs.CoreV1().Pods(ns).DeleteCollection(nil, metav1.ListOptions{}); err != nil {
t.Errorf("error while listing pod in namespace %v: %v", ns, err)
return
}
if err := wait.Poll(time.Second, wait.ForeverTestTimeout,
noPodsInNamespace(cs, ns)); err != nil {
t.Errorf("error while waiting for pods in namespace %v: %v", ns, err)
}
}

View File

@@ -0,0 +1,622 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests the VolumeScheduling feature.
import (
"fmt"
"strconv"
"strings"
"testing"
"time"
"github.com/golang/glog"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
)
type testConfig struct {
client clientset.Interface
ns string
stop <-chan struct{}
teardown func()
}
var (
// Delete API objects immediately
deletePeriod = int64(0)
deleteOption = &metav1.DeleteOptions{GracePeriodSeconds: &deletePeriod}
modeWait = storagev1.VolumeBindingWaitForFirstConsumer
modeImmediate = storagev1.VolumeBindingImmediate
classWait = "wait"
classImmediate = "immediate"
)
const (
node1 = "node-1"
node2 = "node-2"
podLimit = 100
volsPerPod = 5
nodeAffinityLabelKey = "kubernetes.io/hostname"
)
type testPV struct {
name string
scMode storagev1.VolumeBindingMode
preboundPVC string
node string
}
type testPVC struct {
name string
scMode storagev1.VolumeBindingMode
preboundPV string
}
func TestVolumeBinding(t *testing.T) {
config := setupCluster(t, "volume-scheduling", 2)
defer config.teardown()
cases := map[string]struct {
pod *v1.Pod
pvs []*testPV
pvcs []*testPVC
// Create these, but they should not be bound in the end
unboundPvcs []*testPVC
unboundPvs []*testPV
shouldFail bool
}{
"immediate can bind": {
pod: makePod("pod-i-canbind", config.ns, []string{"pvc-i-canbind"}),
pvs: []*testPV{{"pv-i-canbind", modeImmediate, "", node1}},
pvcs: []*testPVC{{"pvc-i-canbind", modeImmediate, ""}},
},
"immediate cannot bind": {
pod: makePod("pod-i-cannotbind", config.ns, []string{"pvc-i-cannotbind"}),
unboundPvcs: []*testPVC{{"pvc-i-cannotbind", modeImmediate, ""}},
shouldFail: true,
},
"immediate pvc prebound": {
pod: makePod("pod-i-pvc-prebound", config.ns, []string{"pvc-i-prebound"}),
pvs: []*testPV{{"pv-i-pvc-prebound", modeImmediate, "", node1}},
pvcs: []*testPVC{{"pvc-i-prebound", modeImmediate, "pv-i-pvc-prebound"}},
},
"immediate pv prebound": {
pod: makePod("pod-i-pv-prebound", config.ns, []string{"pvc-i-pv-prebound"}),
pvs: []*testPV{{"pv-i-prebound", modeImmediate, "pvc-i-pv-prebound", node1}},
pvcs: []*testPVC{{"pvc-i-pv-prebound", modeImmediate, ""}},
},
"wait can bind": {
pod: makePod("pod-w-canbind", config.ns, []string{"pvc-w-canbind"}),
pvs: []*testPV{{"pv-w-canbind", modeWait, "", node1}},
pvcs: []*testPVC{{"pvc-w-canbind", modeWait, ""}},
},
"wait cannot bind": {
pod: makePod("pod-w-cannotbind", config.ns, []string{"pvc-w-cannotbind"}),
unboundPvcs: []*testPVC{{"pvc-w-cannotbind", modeWait, ""}},
shouldFail: true,
},
"wait pvc prebound": {
pod: makePod("pod-w-pvc-prebound", config.ns, []string{"pvc-w-prebound"}),
pvs: []*testPV{{"pv-w-pvc-prebound", modeWait, "", node1}},
pvcs: []*testPVC{{"pvc-w-prebound", modeWait, "pv-w-pvc-prebound"}},
},
"wait pv prebound": {
pod: makePod("pod-w-pv-prebound", config.ns, []string{"pvc-w-pv-prebound"}),
pvs: []*testPV{{"pv-w-prebound", modeWait, "pvc-w-pv-prebound", node1}},
pvcs: []*testPVC{{"pvc-w-pv-prebound", modeWait, ""}},
},
"wait can bind two": {
pod: makePod("pod-w-canbind-2", config.ns, []string{"pvc-w-canbind-2", "pvc-w-canbind-3"}),
pvs: []*testPV{
{"pv-w-canbind-2", modeWait, "", node2},
{"pv-w-canbind-3", modeWait, "", node2},
},
pvcs: []*testPVC{
{"pvc-w-canbind-2", modeWait, ""},
{"pvc-w-canbind-3", modeWait, ""},
},
unboundPvs: []*testPV{
{"pv-w-canbind-5", modeWait, "", node1},
},
},
"wait cannot bind two": {
pod: makePod("pod-w-cannotbind-2", config.ns, []string{"pvc-w-cannotbind-1", "pvc-w-cannotbind-2"}),
unboundPvcs: []*testPVC{
{"pvc-w-cannotbind-1", modeWait, ""},
{"pvc-w-cannotbind-2", modeWait, ""},
},
unboundPvs: []*testPV{
{"pv-w-cannotbind-1", modeWait, "", node2},
{"pv-w-cannotbind-2", modeWait, "", node1},
},
shouldFail: true,
},
"mix immediate and wait": {
pod: makePod("pod-mix-bound", config.ns, []string{"pvc-w-canbind-4", "pvc-i-canbind-2"}),
pvs: []*testPV{
{"pv-w-canbind-4", modeWait, "", node1},
{"pv-i-canbind-2", modeImmediate, "", node1},
},
pvcs: []*testPVC{
{"pvc-w-canbind-4", modeWait, ""},
{"pvc-i-canbind-2", modeImmediate, ""},
},
},
}
for name, test := range cases {
glog.Infof("Running test %v", name)
// Create two StorageClasses
suffix := rand.String(4)
classes := map[storagev1.VolumeBindingMode]*storagev1.StorageClass{}
classes[modeImmediate] = makeStorageClass(fmt.Sprintf("immediate-%v", suffix), &modeImmediate)
classes[modeWait] = makeStorageClass(fmt.Sprintf("wait-%v", suffix), &modeWait)
for _, sc := range classes {
if _, err := config.client.StorageV1().StorageClasses().Create(sc); err != nil {
t.Fatalf("Failed to create StorageClass %q: %v", sc.Name, err)
}
}
// Create PVs
for _, pvConfig := range test.pvs {
pv := makePV(pvConfig.name, classes[pvConfig.scMode].Name, pvConfig.preboundPVC, config.ns, pvConfig.node)
if _, err := config.client.CoreV1().PersistentVolumes().Create(pv); err != nil {
t.Fatalf("Failed to create PersistentVolume %q: %v", pv.Name, err)
}
}
for _, pvConfig := range test.unboundPvs {
pv := makePV(pvConfig.name, classes[pvConfig.scMode].Name, pvConfig.preboundPVC, config.ns, pvConfig.node)
if _, err := config.client.CoreV1().PersistentVolumes().Create(pv); err != nil {
t.Fatalf("Failed to create PersistentVolume %q: %v", pv.Name, err)
}
}
// Create PVCs
for _, pvcConfig := range test.pvcs {
pvc := makePVC(pvcConfig.name, config.ns, &classes[pvcConfig.scMode].Name, pvcConfig.preboundPV)
if _, err := config.client.CoreV1().PersistentVolumeClaims(config.ns).Create(pvc); err != nil {
t.Fatalf("Failed to create PersistentVolumeClaim %q: %v", pvc.Name, err)
}
}
for _, pvcConfig := range test.unboundPvcs {
pvc := makePVC(pvcConfig.name, config.ns, &classes[pvcConfig.scMode].Name, pvcConfig.preboundPV)
if _, err := config.client.CoreV1().PersistentVolumeClaims(config.ns).Create(pvc); err != nil {
t.Fatalf("Failed to create PersistentVolumeClaim %q: %v", pvc.Name, err)
}
}
// Create Pod
if _, err := config.client.CoreV1().Pods(config.ns).Create(test.pod); err != nil {
t.Fatalf("Failed to create Pod %q: %v", test.pod.Name, err)
}
if test.shouldFail {
if err := waitForPodUnschedulable(config.client, test.pod); err != nil {
t.Errorf("Pod %q was not unschedulable: %v", test.pod.Name, err)
}
} else {
if err := waitForPodToSchedule(config.client, test.pod); err != nil {
t.Errorf("Failed to schedule Pod %q: %v", test.pod.Name, err)
}
}
// Validate PVC/PV binding
for _, pvc := range test.pvcs {
validatePVCPhase(t, config.client, pvc.name, config.ns, v1.ClaimBound)
}
for _, pvc := range test.unboundPvcs {
validatePVCPhase(t, config.client, pvc.name, config.ns, v1.ClaimPending)
}
for _, pv := range test.pvs {
validatePVPhase(t, config.client, pv.name, v1.VolumeBound)
}
for _, pv := range test.unboundPvs {
validatePVPhase(t, config.client, pv.name, v1.VolumeAvailable)
}
// Force delete objects, but they still may not be immediately removed
deleteTestObjects(config.client, config.ns, deleteOption)
}
}
// TestVolumeBindingStress creates <podLimit> pods, each with <volsPerPod> unbound PVCs.
func TestVolumeBindingStress(t *testing.T) {
config := setupCluster(t, "volume-binding-stress", 1)
defer config.teardown()
// Create enough PVs and PVCs for all the pods
pvs := []*v1.PersistentVolume{}
pvcs := []*v1.PersistentVolumeClaim{}
for i := 0; i < podLimit*volsPerPod; i++ {
pv := makePV(fmt.Sprintf("pv-stress-%v", i), classWait, "", "", node1)
pvc := makePVC(fmt.Sprintf("pvc-stress-%v", i), config.ns, &classWait, "")
if pv, err := config.client.CoreV1().PersistentVolumes().Create(pv); err != nil {
t.Fatalf("Failed to create PersistentVolume %q: %v", pv.Name, err)
}
if pvc, err := config.client.CoreV1().PersistentVolumeClaims(config.ns).Create(pvc); err != nil {
t.Fatalf("Failed to create PersistentVolumeClaim %q: %v", pvc.Name, err)
}
pvs = append(pvs, pv)
pvcs = append(pvcs, pvc)
}
pods := []*v1.Pod{}
for i := 0; i < podLimit; i++ {
// Generate string of all the PVCs for the pod
podPvcs := []string{}
for j := i * volsPerPod; j < (i+1)*volsPerPod; j++ {
podPvcs = append(podPvcs, pvcs[j].Name)
}
pod := makePod(fmt.Sprintf("pod%v", i), config.ns, podPvcs)
if pod, err := config.client.CoreV1().Pods(config.ns).Create(pod); err != nil {
t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
}
pods = append(pods, pod)
}
// Validate Pods scheduled
for _, pod := range pods {
// Use increased timeout for stress test because there is a higher chance of
// PV sync error
if err := waitForPodToScheduleWithTimeout(config.client, pod, 60*time.Second); err != nil {
t.Errorf("Failed to schedule Pod %q: %v", pod.Name, err)
}
}
// Validate PVC/PV binding
for _, pvc := range pvcs {
validatePVCPhase(t, config.client, pvc.Name, config.ns, v1.ClaimBound)
}
for _, pv := range pvs {
validatePVPhase(t, config.client, pv.Name, v1.VolumeBound)
}
}
func TestPVAffinityConflict(t *testing.T) {
config := setupCluster(t, "volume-scheduling", 3)
defer config.teardown()
pv := makePV("local-pv", classImmediate, "", "", node1)
pvc := makePVC("local-pvc", config.ns, &classImmediate, "")
// Create PV
if _, err := config.client.CoreV1().PersistentVolumes().Create(pv); err != nil {
t.Fatalf("Failed to create PersistentVolume %q: %v", pv.Name, err)
}
// Create PVC
if _, err := config.client.CoreV1().PersistentVolumeClaims(config.ns).Create(pvc); err != nil {
t.Fatalf("Failed to create PersistentVolumeClaim %q: %v", pvc.Name, err)
}
// Wait for PVC bound
if err := waitForPVCBound(config.client, pvc); err != nil {
t.Fatalf("PVC %q failed to bind: %v", pvc.Name, err)
}
nodeMarkers := []interface{}{
markNodeAffinity,
markNodeSelector,
}
for i := 0; i < len(nodeMarkers); i++ {
podName := "local-pod-" + strconv.Itoa(i+1)
pod := makePod(podName, config.ns, []string{"local-pvc"})
nodeMarkers[i].(func(*v1.Pod, string))(pod, "node-2")
// Create Pod
if _, err := config.client.CoreV1().Pods(config.ns).Create(pod); err != nil {
t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
}
// Give time to shceduler to attempt to schedule pod
if err := waitForPodUnschedulable(config.client, pod); err != nil {
t.Errorf("Failed as Pod %s was not unschedulable: %v", pod.Name, err)
}
// Check pod conditions
p, err := config.client.CoreV1().Pods(config.ns).Get(podName, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to access Pod %s status: %v", podName, err)
}
if strings.Compare(string(p.Status.Phase), "Pending") != 0 {
t.Fatalf("Failed as Pod %s was in: %s state and not in expected: Pending state", podName, p.Status.Phase)
}
if strings.Compare(p.Status.Conditions[0].Reason, "Unschedulable") != 0 {
t.Fatalf("Failed as Pod %s reason was: %s but expected: Unschedulable", podName, p.Status.Conditions[0].Reason)
}
if !strings.Contains(p.Status.Conditions[0].Message, "node(s) didn't match node selector") || !strings.Contains(p.Status.Conditions[0].Message, "node(s) had volume node affinity conflict") {
t.Fatalf("Failed as Pod's %s failure message does not contain expected message: node(s) didn't match node selector, node(s) had volume node affinity conflict. Got message %q", podName, p.Status.Conditions[0].Message)
}
// Deleting test pod
if err := config.client.CoreV1().Pods(config.ns).Delete(podName, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete Pod %s: %v", podName, err)
}
}
}
func setupCluster(t *testing.T, nsName string, numberOfNodes int) *testConfig {
// Enable feature gates
utilfeature.DefaultFeatureGate.Set("VolumeScheduling=true,PersistentLocalVolumes=true")
controllerCh := make(chan struct{})
context := initTestScheduler(t, initTestMaster(t, nsName, nil), controllerCh, false, nil)
clientset := context.clientSet
ns := context.ns.Name
informers := context.informerFactory
// Start PV controller for volume binding.
params := persistentvolume.ControllerParameters{
KubeClient: clientset,
SyncPeriod: time.Hour, // test shouldn't need to resync
VolumePlugins: nil, // TODO; need later for dynamic provisioning
Cloud: nil,
ClusterName: "volume-test-cluster",
VolumeInformer: informers.Core().V1().PersistentVolumes(),
ClaimInformer: informers.Core().V1().PersistentVolumeClaims(),
ClassInformer: informers.Storage().V1().StorageClasses(),
PodInformer: informers.Core().V1().Pods(),
NodeInformer: informers.Core().V1().Nodes(),
EnableDynamicProvisioning: true,
}
ctrl, err := persistentvolume.NewController(params)
if err != nil {
t.Fatalf("Failed to create PV controller: %v", err)
}
go ctrl.Run(controllerCh)
// Create shared objects
// Create nodes
for i := 0; i < numberOfNodes; i++ {
testNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("node-%d", i+1),
Labels: map[string]string{nodeAffinityLabelKey: fmt.Sprintf("node-%d", i+1)},
},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(podLimit, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: fmt.Sprintf("schedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
},
},
},
}
if _, err := clientset.CoreV1().Nodes().Create(testNode); err != nil {
t.Fatalf("Failed to create Node %q: %v", testNode.Name, err)
}
}
// Create SCs
scs := []*storagev1.StorageClass{
makeStorageClass(classImmediate, &modeImmediate),
makeStorageClass(classWait, &modeWait),
}
for _, sc := range scs {
if _, err := clientset.StorageV1().StorageClasses().Create(sc); err != nil {
t.Fatalf("Failed to create StorageClass %q: %v", sc.Name, err)
}
}
return &testConfig{
client: clientset,
ns: ns,
stop: controllerCh,
teardown: func() {
deleteTestObjects(clientset, ns, nil)
cleanupTest(t, context)
utilfeature.DefaultFeatureGate.Set("VolumeScheduling=false,LocalPersistentVolumes=false")
},
}
}
func deleteTestObjects(client clientset.Interface, ns string, option *metav1.DeleteOptions) {
client.CoreV1().Pods(ns).DeleteCollection(option, metav1.ListOptions{})
client.CoreV1().PersistentVolumeClaims(ns).DeleteCollection(option, metav1.ListOptions{})
client.CoreV1().PersistentVolumes().DeleteCollection(option, metav1.ListOptions{})
client.StorageV1().StorageClasses().DeleteCollection(option, metav1.ListOptions{})
}
func makeStorageClass(name string, mode *storagev1.VolumeBindingMode) *storagev1.StorageClass {
return &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Provisioner: "kubernetes.io/no-provisioner",
VolumeBindingMode: mode,
}
}
func makePV(name, scName, pvcName, ns, node string) *v1.PersistentVolume {
pv := &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Annotations: map[string]string{},
},
Spec: v1.PersistentVolumeSpec{
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse("5Gi"),
},
AccessModes: []v1.PersistentVolumeAccessMode{
v1.ReadWriteOnce,
},
StorageClassName: scName,
PersistentVolumeSource: v1.PersistentVolumeSource{
Local: &v1.LocalVolumeSource{
Path: "/test-path",
},
},
NodeAffinity: &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeAffinityLabelKey,
Operator: v1.NodeSelectorOpIn,
Values: []string{node},
},
},
},
},
},
},
},
}
if pvcName != "" {
pv.Spec.ClaimRef = &v1.ObjectReference{Name: pvcName, Namespace: ns}
}
return pv
}
func makePVC(name, ns string, scName *string, volumeName string) *v1.PersistentVolumeClaim {
return &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
},
Spec: v1.PersistentVolumeClaimSpec{
AccessModes: []v1.PersistentVolumeAccessMode{
v1.ReadWriteOnce,
},
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse("5Gi"),
},
},
StorageClassName: scName,
VolumeName: volumeName,
},
}
}
func makePod(name, ns string, pvcs []string) *v1.Pod {
volumes := []v1.Volume{}
for i, pvc := range pvcs {
volumes = append(volumes, v1.Volume{
Name: fmt.Sprintf("vol%v", i),
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: pvc,
},
},
})
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "write-pod",
Image: "k8s.gcr.io/busybox:1.24",
Command: []string{"/bin/sh"},
Args: []string{"-c", "while true; do sleep 1; done"},
},
},
Volumes: volumes,
},
}
}
func validatePVCPhase(t *testing.T, client clientset.Interface, pvcName string, ns string, phase v1.PersistentVolumeClaimPhase) {
claim, err := client.CoreV1().PersistentVolumeClaims(ns).Get(pvcName, metav1.GetOptions{})
if err != nil {
t.Errorf("Failed to get PVC %v/%v: %v", ns, pvcName, err)
}
if claim.Status.Phase != phase {
t.Errorf("PVC %v/%v phase not %v, got %v", ns, pvcName, phase, claim.Status.Phase)
}
}
func validatePVPhase(t *testing.T, client clientset.Interface, pvName string, phase v1.PersistentVolumePhase) {
pv, err := client.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
if err != nil {
t.Errorf("Failed to get PV %v: %v", pvName, err)
}
if pv.Status.Phase != phase {
t.Errorf("PV %v phase not %v, got %v", pvName, phase, pv.Status.Phase)
}
}
func waitForPVCBound(client clientset.Interface, pvc *v1.PersistentVolumeClaim) error {
return wait.Poll(time.Second, 30*time.Second, func() (bool, error) {
claim, err := client.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pvc.Name, metav1.GetOptions{})
if err != nil {
return false, err
}
if claim.Status.Phase == v1.ClaimBound {
return true, nil
}
return false, nil
})
}
func markNodeAffinity(pod *v1.Pod, node string) {
affinity := &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeAffinityLabelKey,
Operator: v1.NodeSelectorOpIn,
Values: []string{node},
},
},
},
},
},
},
}
pod.Spec.Affinity = affinity
}
func markNodeSelector(pod *v1.Pod, node string) {
ns := map[string]string{
nodeAffinityLabelKey: node,
}
pod.Spec.NodeSelector = ns
}