Bumping k8s dependencies to 1.13

This commit is contained in:
Cheng Xing
2018-11-16 14:08:25 -08:00
parent 305407125c
commit b4c0b68ec7
8002 changed files with 884099 additions and 276228 deletions

View File

@@ -15,20 +15,21 @@ go_test(
"//pkg/controller/volume/persistentvolume:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/client-go/tools/record:go_default_library",
],
)
@@ -46,18 +47,19 @@ go_library(
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/core/equivalence:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/tools/record:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)
@@ -75,6 +77,7 @@ filegroup(
"//pkg/scheduler/algorithm:all-srcs",
"//pkg/scheduler/algorithmprovider:all-srcs",
"//pkg/scheduler/api:all-srcs",
"//pkg/scheduler/apis/config:all-srcs",
"//pkg/scheduler/cache:all-srcs",
"//pkg/scheduler/core:all-srcs",
"//pkg/scheduler/factory:all-srcs",

View File

@@ -19,24 +19,20 @@ go_library(
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"scheduler_interface_test.go",
"types_test.go",
],
srcs = ["types_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)

View File

@@ -9,6 +9,7 @@ load(
go_library(
name = "go_default_library",
srcs = [
"csi_volume_predicate.go",
"error.go",
"metadata.go",
"predicates.go",
@@ -27,25 +28,26 @@ go_library(
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/volume/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/storage/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"csi_volume_predicate_test.go",
"max_attachable_volume_predicate_test.go",
"metadata_test.go",
"predicates_test.go",
@@ -60,14 +62,14 @@ go_test(
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/storage/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
],
)

View File

@@ -0,0 +1,157 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"github.com/golang/glog"
"k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
type CSIMaxVolumeLimitChecker struct {
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
}
// NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
func NewCSIMaxVolumeLimitPredicate(
pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
c := &CSIMaxVolumeLimitChecker{
pvInfo: pvInfo,
pvcInfo: pvcInfo,
}
return c.attachableLimitPredicate
}
func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
// if feature gate is disable we return
if !utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
return true, nil, nil
}
// If a pod doesn't have any volume attached to it, the predicate will always be true.
// Thus we make a fast path for it, to avoid unnecessary computations in this case.
if len(pod.Spec.Volumes) == 0 {
return true, nil, nil
}
nodeVolumeLimits := nodeInfo.VolumeLimits()
// if node does not have volume limits this predicate should exit
if len(nodeVolumeLimits) == 0 {
return true, nil, nil
}
// a map of unique volume name/csi volume handle and volume limit key
newVolumes := make(map[string]string)
if err := c.filterAttachableVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
return false, nil, err
}
if len(newVolumes) == 0 {
return true, nil, nil
}
// a map of unique volume name/csi volume handle and volume limit key
attachedVolumes := make(map[string]string)
for _, existingPod := range nodeInfo.Pods() {
if err := c.filterAttachableVolumes(existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
return false, nil, err
}
}
newVolumeCount := map[string]int{}
attachedVolumeCount := map[string]int{}
for volumeName, volumeLimitKey := range attachedVolumes {
if _, ok := newVolumes[volumeName]; ok {
delete(newVolumes, volumeName)
}
attachedVolumeCount[volumeLimitKey]++
}
for _, volumeLimitKey := range newVolumes {
newVolumeCount[volumeLimitKey]++
}
for volumeLimitKey, count := range newVolumeCount {
maxVolumeLimit, ok := nodeVolumeLimits[v1.ResourceName(volumeLimitKey)]
if ok {
currentVolumeCount := attachedVolumeCount[volumeLimitKey]
if currentVolumeCount+count > int(maxVolumeLimit) {
return false, []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}, nil
}
}
}
return true, nil, nil
}
func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
volumes []v1.Volume, namespace string, result map[string]string) error {
for _, vol := range volumes {
// CSI volumes can only be used as persistent volumes
if vol.PersistentVolumeClaim == nil {
continue
}
pvcName := vol.PersistentVolumeClaim.ClaimName
if pvcName == "" {
return fmt.Errorf("PersistentVolumeClaim had no name")
}
pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
if err != nil {
glog.V(4).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
continue
}
pvName := pvc.Spec.VolumeName
// TODO - the actual handling of unbound PVCs will be fixed by late binding design.
if pvName == "" {
glog.V(4).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
continue
}
pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
if err != nil {
glog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
continue
}
csiSource := pv.Spec.PersistentVolumeSource.CSI
if csiSource == nil {
glog.V(4).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName)
continue
}
driverName := csiSource.Driver
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
result[csiSource.VolumeHandle] = volumeLimitKey
}
return nil
}

View File

@@ -0,0 +1,179 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
)
func TestCSIVolumeCountPredicate(t *testing.T) {
// for pods with CSI pvcs
oneVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "csi-ebs",
},
},
},
},
},
}
twoVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "cs-ebs-1",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "csi-ebs-2",
},
},
},
},
},
}
runningPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "csi-ebs-3",
},
},
},
},
},
}
tests := []struct {
newPod *v1.Pod
existingPods []*v1.Pod
filterName string
maxVols int
fits bool
test string
}{
{
newPod: oneVolPod,
existingPods: []*v1.Pod{runningPod, twoVolPod},
filterName: "csi-ebs",
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pods CSI volume",
},
{
newPod: oneVolPod,
existingPods: []*v1.Pod{runningPod, twoVolPod},
filterName: "csi-ebs",
maxVols: 2,
fits: false,
test: "doesn't when node capacity <= pods CSI volume",
},
}
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.AttachVolumeLimit, true)()
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
// running attachable predicate tests with feature gate and limit present on nodes
for _, test := range tests {
node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.filterName)
pred := NewCSIMaxVolumeLimitPredicate(getFakeCSIPVInfo("csi-ebs", "csi-ebs"), getFakeCSIPVCInfo("csi-ebs"))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), node)
if err != nil {
t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("Using allocatable [%s]%s: unexpected failure reasons: %v, want: %v", test.filterName, test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("Using allocatable [%s]%s: expected %v, got %v", test.filterName, test.test, test.fits, fits)
}
}
}
func getFakeCSIPVInfo(volumeName, driverName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
CSI: &v1.CSIPersistentVolumeSource{
Driver: driverName,
VolumeHandle: volumeName,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-2"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
CSI: &v1.CSIPersistentVolumeSource{
Driver: driverName,
VolumeHandle: volumeName + "-2",
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-3"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
CSI: &v1.CSIPersistentVolumeSource{
Driver: driverName,
VolumeHandle: volumeName + "-3",
},
},
},
},
}
}
func getFakeCSIPVCInfo(volumeName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-2"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName + "-2"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-3"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName + "-3"},
},
}
}

View File

@@ -29,6 +29,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
@@ -741,60 +742,12 @@ func TestVolumeCountConflicts(t *testing.T) {
},
}
pvInfo := func(filterName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: strings.ToLower(filterName) + "Vol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
}
pvcInfo := func(filterName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "some" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNon" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
// running attachable predicate tests without feature gate and no limit present on nodes
for _, test := range tests {
os.Setenv(KubeMaxPDVols, strconv.Itoa(test.maxVols))
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
pred := NewMaxPDVolumeCountPredicate(test.filterName, getFakePVInfo(test.filterName), getFakePVCInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), schedulercache.NewNodeInfo(test.existingPods...))
if err != nil {
t.Errorf("[%s]%s: unexpected error: %v", test.filterName, test.test, err)
@@ -812,7 +765,7 @@ func TestVolumeCountConflicts(t *testing.T) {
// running attachable predicate tests with feature gate and limit present on nodes
for _, test := range tests {
node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.filterName)
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
pred := NewMaxPDVolumeCountPredicate(test.filterName, getFakePVInfo(test.filterName), getFakePVCInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), node)
if err != nil {
t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err)
@@ -826,6 +779,122 @@ func TestVolumeCountConflicts(t *testing.T) {
}
}
func getFakePVInfo(filterName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: strings.ToLower(filterName) + "Vol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
}
func getFakePVCInfo(filterName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "some" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNon" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
}
func TestMaxVolumeFuncM5(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-m5-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "m5.large",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSNitroVolumeLimit {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSNitroVolumeLimit, maxVolume)
}
}
func TestMaxVolumeFuncT3(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-t3-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "t3.medium",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSNitroVolumeLimit {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSNitroVolumeLimit, maxVolume)
}
}
func TestMaxVolumeFuncR5(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-r5-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "r5d.xlarge",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSNitroVolumeLimit {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSNitroVolumeLimit, maxVolume)
}
}
func TestMaxVolumeFuncM4(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-m4-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "m4.2xlarge",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSVolumes {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSVolumes, maxVolume)
}
}
func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulercache.NodeInfo {
nodeInfo := schedulercache.NewNodeInfo(pods...)
node := &v1.Node{
@@ -849,6 +918,6 @@ func getVolumeLimitKey(filterType string) v1.ResourceName {
case AzureDiskVolumeFilterType:
return v1.ResourceName(volumeutil.AzureVolumeLimitKey)
default:
return ""
return v1.ResourceName(volumeutil.GetCSIAttachLimitKey(filterType))
}
}

View File

@@ -38,6 +38,12 @@ type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
// AntiAffinityTerm's topology key value used in predicate metadata
type topologyPair struct {
key string
value string
}
// Note that predicateMetadata and matchingPodAntiAffinityTerm need to be declared in the same file
// due to the way declarations are processed in predicate declaration unit tests.
type matchingPodAntiAffinityTerm struct {
@@ -45,6 +51,17 @@ type matchingPodAntiAffinityTerm struct {
node *v1.Node
}
type podSet map[*v1.Pod]struct{}
type topologyPairSet map[topologyPair]struct{}
// topologyPairsMaps keeps topologyPairToAntiAffinityPods and antiAffinityPodToTopologyPairs in sync
// as they are the inverse of each others.
type topologyPairsMaps struct {
topologyPairToPods map[topologyPair]podSet
podToTopologyPairs map[string]topologyPairSet
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
@@ -52,17 +69,17 @@ type predicateMetadata struct {
podBestEffort bool
podRequest *schedulercache.Resource
podPorts []*v1.ContainerPort
//key is a pod full name with the anti-affinity rules.
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
// A map of node name to a list of Pods on the node that can potentially match
// the affinity rules of the "pod".
nodeNameToMatchingAffinityPods map[string][]*v1.Pod
// A map of node name to a list of Pods on the node that can potentially match
// the anti-affinity rules of the "pod".
nodeNameToMatchingAntiAffinityPods map[string][]*v1.Pod
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
topologyPairsAntiAffinityPodsMap *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the affinity terms of the "pod" and its inverse.
topologyPairsPotentialAffinityPods *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the anti-affinity terms of the "pod" and its inverse.
topologyPairsPotentialAntiAffinityPods *topologyPairsMaps
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
// ignoredExtendedResources is a set of extended resource names that will
// be ignored in the PodFitsResources predicate.
//
@@ -113,23 +130,26 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
if pod == nil {
return nil
}
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap)
// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, nodeNameToInfoMap)
if err != nil {
return nil
}
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, nodeNameToInfoMap)
if err != nil {
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
nodeNameToMatchingAffinityPods: affinityPods,
nodeNameToMatchingAntiAffinityPods: antiAffinityPods,
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
topologyPairsPotentialAffinityPods: incomingPodAffinityMap,
topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
topologyPairsAntiAffinityPodsMap: existingPodAntiAffinityMap,
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName)
@@ -138,6 +158,46 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
return predicateMetadata
}
// returns a pointer to a new topologyPairsMaps
func newTopologyPairsMaps() *topologyPairsMaps {
return &topologyPairsMaps{topologyPairToPods: make(map[topologyPair]podSet),
podToTopologyPairs: make(map[string]topologyPairSet)}
}
func (topologyPairsMaps *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
podFullName := schedutil.GetPodFullName(pod)
if topologyPairsMaps.topologyPairToPods[pair] == nil {
topologyPairsMaps.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
}
topologyPairsMaps.topologyPairToPods[pair][pod] = struct{}{}
if topologyPairsMaps.podToTopologyPairs[podFullName] == nil {
topologyPairsMaps.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
}
topologyPairsMaps.podToTopologyPairs[podFullName][pair] = struct{}{}
}
func (topologyPairsMaps *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
for pair := range topologyPairsMaps.podToTopologyPairs[deletedPodFullName] {
delete(topologyPairsMaps.topologyPairToPods[pair], deletedPod)
if len(topologyPairsMaps.topologyPairToPods[pair]) == 0 {
delete(topologyPairsMaps.topologyPairToPods, pair)
}
}
delete(topologyPairsMaps.podToTopologyPairs, deletedPodFullName)
}
func (topologyPairsMaps *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
if toAppend == nil {
return
}
for pair := range toAppend.topologyPairToPods {
for pod := range toAppend.topologyPairToPods[pair] {
topologyPairsMaps.addTopologyPair(pair, pod)
}
}
}
// RemovePod changes predicateMetadata assuming that the given `deletedPod` is
// deleted from the system.
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
@@ -145,35 +205,10 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same")
}
// Delete any anti-affinity rule from the deletedPod.
delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
// Delete pod from the matching affinity or anti-affinity pods if exists.
affinity := meta.pod.Spec.Affinity
podNodeName := deletedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
if affinity.PodAffinity != nil {
for i, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAffinityPods[podNodeName] = s
break
}
}
}
if affinity.PodAntiAffinity != nil {
for i, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAntiAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = s
break
}
}
}
}
meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
// Delete pod from the matching affinity or anti-affinity topology pairs maps.
meta.topologyPairsPotentialAffinityPods.removePod(deletedPod)
meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
@@ -203,46 +238,36 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
return fmt.Errorf("invalid node in nodeInfo")
}
// Add matching anti-affinity terms of the addedPod to the map.
podMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(meta.pod, addedPod, nodeInfo.Node())
topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(meta.pod, addedPod, nodeInfo.Node())
if err != nil {
return err
}
if len(podMatchingTerms) > 0 {
existingTerms, found := meta.matchingAntiAffinityTerms[addedPodFullName]
if found {
meta.matchingAntiAffinityTerms[addedPodFullName] = append(existingTerms,
podMatchingTerms...)
} else {
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
}
}
meta.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := meta.pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
podNode := nodeInfo.Node()
// It is assumed that when the added pod matches affinity of the meta.pod, all the terms must match,
// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
// is changed
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
for _, term := range affinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
}
}
if !found {
meta.nodeNameToMatchingAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAffinityPods[podNodeName], addedPod)
}
}
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
for _, term := range antiAffinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
}
}
if !found {
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAntiAffinityPods[podNodeName], addedPod)
}
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
@@ -268,18 +293,12 @@ func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
ignoredExtendedResources: meta.ignoredExtendedResources,
}
newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...)
newPredMeta.matchingAntiAffinityTerms = map[string][]matchingPodAntiAffinityTerm{}
for k, v := range meta.matchingAntiAffinityTerms {
newPredMeta.matchingAntiAffinityTerms[k] = append([]matchingPodAntiAffinityTerm(nil), v...)
}
newPredMeta.nodeNameToMatchingAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAffinityPods {
newPredMeta.nodeNameToMatchingAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.nodeNameToMatchingAntiAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAntiAffinityPods {
newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods)
newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods)
newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps()
newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap)
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
@@ -310,8 +329,8 @@ func getAffinityTermProperties(pod *v1.Pod, terms []v1.PodAffinityTerm) (propert
return properties, nil
}
// podMatchesAffinityTermProperties return true IFF the given pod matches all the given properties.
func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
// podMatchesAllAffinityTermProperties returns true IFF the given pod matches all the given properties.
func podMatchesAllAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
@@ -323,16 +342,76 @@ func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermPro
return true
}
// getPodsMatchingAffinity finds existing Pods that match affinity terms of the given "pod".
// It ignores topology. It returns a set of Pods that are checked later by the affinity
// predicate. With this set of pods available, the affinity predicate does not
// podMatchesAnyAffinityTermProperties returns true if the given pod matches any given property.
func podMatchesAnyAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return true
}
}
return false
}
// getTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node:
// (1) Whether it has PodAntiAffinity
// (2) Whether any AffinityTerm matches the incoming pod
func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (*topologyPairsMaps, error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
var lock sync.Mutex
var firstError error
topologyMaps := newTopologyPairsMaps()
appendTopologyPairsMaps := func(toAppend *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
topologyMaps.appendMaps(toAppend)
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
}
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("node not found"))
return
}
for _, existingPod := range nodeInfo.PodsWithAffinity() {
existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, node)
if err != nil {
catchError(err)
return
}
appendTopologyPairsMaps(existingPodTopologyMaps)
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
return topologyMaps, firstError
}
// getTPMapMatchingIncomingAffinityAntiAffinity finds existing Pods that match affinity terms of the given "pod".
// It returns a topologyPairsMaps that are checked later by the affinity
// predicate. With this topologyPairsMaps available, the affinity predicate does not
// need to check all the pods in the cluster.
func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (affinityPods map[string][]*v1.Pod, antiAffinityPods map[string][]*v1.Pod, err error) {
func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return nil, nil, nil
return newTopologyPairsMaps(), newTopologyPairsMaps(), nil
}
for name := range nodeInfoMap {
@@ -341,16 +420,16 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
var lock sync.Mutex
var firstError error
affinityPods = make(map[string][]*v1.Pod)
antiAffinityPods = make(map[string][]*v1.Pod)
appendResult := func(nodeName string, affPods, antiAffPods []*v1.Pod) {
topologyPairsAffinityPodsMaps = newTopologyPairsMaps()
topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps()
appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
if len(affPods) > 0 {
affinityPods[nodeName] = affPods
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAffinityPodsMaps.appendMaps(nodeTopologyPairsAffinityPodsMaps)
}
if len(antiAffPods) > 0 {
antiAffinityPods[nodeName] = antiAffPods
if len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAntiAffinityPodsMaps.appendMaps(nodeTopologyPairsAntiAffinityPodsMaps)
}
}
@@ -362,14 +441,12 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
}
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
return nil, nil, err
}
antiAffinityProperties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
affinityProperties, err := getAffinityTermProperties(pod, affinityTerms)
if err != nil {
return nil, nil, err
}
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
@@ -378,27 +455,43 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
catchError(fmt.Errorf("nodeInfo.Node is nil"))
return
}
affPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
antiAffPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps()
nodeTopologyPairsAntiAffinityPodsMaps := newTopologyPairsMaps()
for _, existingPod := range nodeInfo.Pods() {
// Check affinity properties.
if podMatchesAffinityTermProperties(existingPod, affinityProperties) {
affPods = append(affPods, existingPod)
if podMatchesAllAffinityTermProperties(existingPod, affinityProperties) {
for _, term := range affinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
// Check anti-affinity properties.
if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) {
antiAffPods = append(antiAffPods, existingPod)
for _, term := range antiAffinityTerms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
catchError(err)
return
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAntiAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
}
if len(antiAffPods) > 0 || len(affPods) > 0 {
appendResult(node.Name, affPods, antiAffPods)
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
return affinityPods, antiAffinityPods, firstError
return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError
}
// podMatchesAffinity returns true if "targetPod" matches any affinity rule of
// targetPodMatchesAffinityOfPod returns true if "targetPod" matches ALL affinity terms of
// "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
@@ -412,11 +505,11 @@ func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
glog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, affinityProperties)
return podMatchesAllAffinityTermProperties(targetPod, affinityProperties)
}
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches any anti-affinity
// rule of "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches ANY anti-affinity
// term of "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
@@ -429,5 +522,5 @@ func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
glog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, properties)
return podMatchesAnyAffinityTermProperties(targetPod, properties)
}

View File

@@ -28,42 +28,6 @@ import (
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
// sortableAntiAffinityTerms lets us to sort anti-affinity terms.
type sortableAntiAffinityTerms []matchingPodAntiAffinityTerm
// Less establishes some ordering between two matchingPodAntiAffinityTerms for
// sorting.
func (s sortableAntiAffinityTerms) Less(i, j int) bool {
t1, t2 := s[i], s[j]
if t1.node.Name != t2.node.Name {
return t1.node.Name < t2.node.Name
}
if len(t1.term.Namespaces) != len(t2.term.Namespaces) {
return len(t1.term.Namespaces) < len(t2.term.Namespaces)
}
if t1.term.TopologyKey != t2.term.TopologyKey {
return t1.term.TopologyKey < t2.term.TopologyKey
}
if len(t1.term.LabelSelector.MatchLabels) != len(t2.term.LabelSelector.MatchLabels) {
return len(t1.term.LabelSelector.MatchLabels) < len(t2.term.LabelSelector.MatchLabels)
}
return false
}
func (s sortableAntiAffinityTerms) Len() int { return len(s) }
func (s sortableAntiAffinityTerms) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
var _ = sort.Interface(sortableAntiAffinityTerms{})
func sortAntiAffinityTerms(terms map[string][]matchingPodAntiAffinityTerm) {
for k, v := range terms {
sortableTerms := sortableAntiAffinityTerms(v)
sort.Sort(sortableTerms)
terms[k] = sortableTerms
}
}
// sortablePods lets us to sort pods.
type sortablePods []*v1.Pod
@@ -88,13 +52,6 @@ func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortableServices{})
func sortNodePodMap(np map[string][]*v1.Pod) {
for _, pl := range np {
sortablePods := sortablePods(pl)
sort.Sort(sortablePods)
}
}
// predicateMetadataEquivalent returns true if the two metadata are equivalent.
// Note: this function does not compare podRequest.
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
@@ -113,20 +70,19 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
for !reflect.DeepEqual(meta1.podPorts, meta2.podPorts) {
return fmt.Errorf("podPorts are not equal")
}
sortAntiAffinityTerms(meta1.matchingAntiAffinityTerms)
sortAntiAffinityTerms(meta2.matchingAntiAffinityTerms)
if !reflect.DeepEqual(meta1.matchingAntiAffinityTerms, meta2.matchingAntiAffinityTerms) {
return fmt.Errorf("matchingAntiAffinityTerms are not euqal")
if !reflect.DeepEqual(meta1.topologyPairsPotentialAffinityPods, meta2.topologyPairsPotentialAffinityPods) {
return fmt.Errorf("topologyPairsPotentialAffinityPods are not equal")
}
sortNodePodMap(meta1.nodeNameToMatchingAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAffinityPods, meta2.nodeNameToMatchingAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAffinityPods are not euqal")
if !reflect.DeepEqual(meta1.topologyPairsPotentialAntiAffinityPods, meta2.topologyPairsPotentialAntiAffinityPods) {
return fmt.Errorf("topologyPairsPotentialAntiAffinityPods are not equal")
}
sortNodePodMap(meta1.nodeNameToMatchingAntiAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAntiAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAntiAffinityPods, meta2.nodeNameToMatchingAntiAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAntiAffinityPods are not euqal")
if !reflect.DeepEqual(meta1.topologyPairsAntiAffinityPodsMap.podToTopologyPairs,
meta2.topologyPairsAntiAffinityPodsMap.podToTopologyPairs) {
return fmt.Errorf("topologyPairsAntiAffinityPodsMap.podToTopologyPairs are not equal")
}
if !reflect.DeepEqual(meta1.topologyPairsAntiAffinityPodsMap.topologyPairToPods,
meta2.topologyPairsAntiAffinityPodsMap.topologyPairToPods) {
return fmt.Errorf("topologyPairsAntiAffinityPodsMap.topologyPairToPods are not equal")
}
if meta1.serviceAffinityInUse {
sortablePods1 := sortablePods(meta1.serviceAffinityMatchingPodList)
@@ -236,7 +192,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
}
tests := []struct {
description string
name string
pendingPod *v1.Pod
addedPod *v1.Pod
existingPods []*v1.Pod
@@ -244,7 +200,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
services []*v1.Service
}{
{
description: "no anti-affinity or service affinity exist",
name: "no anti-affinity or service affinity exist",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@@ -267,7 +223,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
name: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@@ -300,7 +256,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata service-affinity data are updated correctly after adding and removing a pod",
name: "metadata service-affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@@ -324,7 +280,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata anti-affinity terms and service affinity data are updated correctly after adding and removing a pod",
name: "metadata anti-affinity terms and service affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@@ -358,7 +314,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
name: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@@ -395,44 +351,46 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
}
for _, test := range tests {
allPodLister := schedulertesting.FakePodLister(append(test.existingPods, test.addedPod))
// getMeta creates predicate meta data given the list of pods.
getMeta := func(lister schedulertesting.FakePodLister) (*predicateMetadata, map[string]*schedulercache.NodeInfo) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(lister, test.nodes)
// nodeList is a list of non-pointer nodes to feed to FakeNodeListInfo.
nodeList := []v1.Node{}
for _, n := range test.nodes {
nodeList = append(nodeList, *n)
t.Run(test.name, func(t *testing.T) {
allPodLister := schedulertesting.FakePodLister(append(test.existingPods, test.addedPod))
// getMeta creates predicate meta data given the list of pods.
getMeta := func(lister schedulertesting.FakePodLister) (*predicateMetadata, map[string]*schedulercache.NodeInfo) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(lister, test.nodes)
// nodeList is a list of non-pointer nodes to feed to FakeNodeListInfo.
nodeList := []v1.Node{}
for _, n := range test.nodes {
nodeList = append(nodeList, *n)
}
_, precompute := NewServiceAffinityPredicate(lister, schedulertesting.FakeServiceLister(test.services), FakeNodeListInfo(nodeList), nil)
RegisterPredicateMetadataProducer("ServiceAffinityMetaProducer", precompute)
pmf := PredicateMetadataFactory{lister}
meta := pmf.GetMetadata(test.pendingPod, nodeInfoMap)
return meta.(*predicateMetadata), nodeInfoMap
}
_, precompute := NewServiceAffinityPredicate(lister, schedulertesting.FakeServiceLister(test.services), FakeNodeListInfo(nodeList), nil)
RegisterPredicateMetadataProducer("ServiceAffinityMetaProducer", precompute)
pmf := PredicateMetadataFactory{lister}
meta := pmf.GetMetadata(test.pendingPod, nodeInfoMap)
return meta.(*predicateMetadata), nodeInfoMap
}
// allPodsMeta is meta data produced when all pods, including test.addedPod
// are given to the metadata producer.
allPodsMeta, _ := getMeta(allPodLister)
// existingPodsMeta1 is meta data produced for test.existingPods (without test.addedPod).
existingPodsMeta1, nodeInfoMap := getMeta(schedulertesting.FakePodLister(test.existingPods))
// Add test.addedPod to existingPodsMeta1 and make sure meta is equal to allPodsMeta
nodeInfo := nodeInfoMap[test.addedPod.Spec.NodeName]
if err := existingPodsMeta1.AddPod(test.addedPod, nodeInfo); err != nil {
t.Errorf("test [%v]: error adding pod to meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(allPodsMeta, existingPodsMeta1); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
// Remove the added pod and from existingPodsMeta1 an make sure it is equal
// to meta generated for existing pods.
existingPodsMeta2, _ := getMeta(schedulertesting.FakePodLister(test.existingPods))
if err := existingPodsMeta1.RemovePod(test.addedPod); err != nil {
t.Errorf("test [%v]: error removing pod from meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(existingPodsMeta1, existingPodsMeta2); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
// allPodsMeta is meta data produced when all pods, including test.addedPod
// are given to the metadata producer.
allPodsMeta, _ := getMeta(allPodLister)
// existingPodsMeta1 is meta data produced for test.existingPods (without test.addedPod).
existingPodsMeta1, nodeInfoMap := getMeta(schedulertesting.FakePodLister(test.existingPods))
// Add test.addedPod to existingPodsMeta1 and make sure meta is equal to allPodsMeta
nodeInfo := nodeInfoMap[test.addedPod.Spec.NodeName]
if err := existingPodsMeta1.AddPod(test.addedPod, nodeInfo); err != nil {
t.Errorf("error adding pod to meta: %v", err)
}
if err := predicateMetadataEquivalent(allPodsMeta, existingPodsMeta1); err != nil {
t.Errorf("meta data are not equivalent: %v", err)
}
// Remove the added pod and from existingPodsMeta1 an make sure it is equal
// to meta generated for existing pods.
existingPodsMeta2, _ := getMeta(schedulertesting.FakePodLister(test.existingPods))
if err := existingPodsMeta1.RemovePod(test.addedPod); err != nil {
t.Errorf("error removing pod from meta: %v", err)
}
if err := predicateMetadataEquivalent(existingPodsMeta1, existingPodsMeta2); err != nil {
t.Errorf("meta data are not equivalent: %v", err)
}
})
}
}
@@ -463,52 +421,93 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
HostIP: "1.2.3.4",
},
},
matchingAntiAffinityTerms: map[string][]matchingPodAntiAffinityTerm{
"term1": {
{
term: &v1.PodAffinityTerm{TopologyKey: "node"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
},
topologyPairsAntiAffinityPodsMap: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "name", value: "machine1"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
}: struct{}{},
},
{key: "name", value: "machine2"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
}: struct{}{},
},
},
podToTopologyPairs: map[string]topologyPairSet{
"p2_": {
topologyPair{key: "name", value: "machine1"}: struct{}{},
},
"p1_": {
topologyPair{key: "name", value: "machine2"}: struct{}{},
},
},
},
nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{
"nodeA": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
topologyPairsPotentialAffinityPods: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "name", value: "nodeA"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
}: struct{}{},
},
{key: "name", value: "nodeC"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
}: struct{}{},
},
},
"nodeC": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
},
podToTopologyPairs: map[string]topologyPairSet{
"p1_": {
topologyPair{key: "name", value: "nodeA"}: struct{}{},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
"p2_": {
topologyPair{key: "name", value: "nodeC"}: struct{}{},
},
"p6_": {
topologyPair{key: "name", value: "nodeC"}: struct{}{},
},
},
},
nodeNameToMatchingAntiAffinityPods: map[string][]*v1.Pod{
"nodeN": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeN"},
topologyPairsPotentialAntiAffinityPods: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "name", value: "nodeN"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeN"},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
}: struct{}{},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
{key: "name", value: "nodeM"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeM"},
}: struct{}{},
},
},
"nodeM": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeM"},
podToTopologyPairs: map[string]topologyPairSet{
"p1_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p2_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p3_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p6_": {
topologyPair{key: "name", value: "nodeM"}: struct{}{},
},
},
},
@@ -526,3 +525,269 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
t.Errorf("Copy is not equal to source!")
}
}
// TestGetTPMapMatchingIncomingAffinityAntiAffinity tests against method getTPMapMatchingIncomingAffinityAntiAffinity
// on Anti Affinity cases
func TestGetTPMapMatchingIncomingAffinityAntiAffinity(t *testing.T) {
newPodAffinityTerms := func(keys ...string) []v1.PodAffinityTerm {
var terms []v1.PodAffinityTerm
for _, key := range keys {
terms = append(terms, v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: key,
Operator: metav1.LabelSelectorOpExists,
},
},
},
TopologyKey: "hostname",
})
}
return terms
}
newPod := func(labels ...string) *v1.Pod {
labelMap := make(map[string]string)
for _, l := range labels {
labelMap[l] = ""
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "normal", Labels: labelMap},
Spec: v1.PodSpec{NodeName: "nodeA"},
}
}
normalPodA := newPod("aaa")
normalPodB := newPod("bbb")
normalPodAB := newPod("aaa", "bbb")
nodeA := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"hostname": "nodeA"}}}
tests := []struct {
name string
existingPods []*v1.Pod
nodes []*v1.Node
pod *v1.Pod
wantAffinityPodsMaps *topologyPairsMaps
wantAntiAffinityPodsMaps *topologyPairsMaps
wantErr bool
}{
{
name: "nil test",
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "aaa-normal"},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: newTopologyPairsMaps(),
},
{
name: "incoming pod without affinity/anti-affinity causes a no-op",
existingPods: []*v1.Pod{normalPodA},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "aaa-normal"},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: newTopologyPairsMaps(),
},
{
name: "no pod has label that violates incoming pod's affinity and anti-affinity",
existingPods: []*v1.Pod{normalPodB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "aaa-anti"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: newTopologyPairsMaps(),
},
{
name: "existing pod matches incoming pod's affinity and anti-affinity - single term case",
existingPods: []*v1.Pod{normalPodA},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
},
},
},
wantAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodA: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodA: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "existing pod matches incoming pod's affinity and anti-affinity - mutiple terms case",
existingPods: []*v1.Pod{normalPodAB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
},
},
},
wantAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodAB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodAB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "existing pod not match incoming pod's affinity but matches anti-affinity",
existingPods: []*v1.Pod{normalPodA},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodA: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "incoming pod's anti-affinity has more than one term - existing pod violates partial term - case 1",
existingPods: []*v1.Pod{normalPodAB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "anaffi-antiaffiti"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "ccc"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "ccc"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodAB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "incoming pod's anti-affinity has more than one term - existing pod violates partial term - case 2",
existingPods: []*v1.Pod{normalPodB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(tt.existingPods, tt.nodes)
gotAffinityPodsMaps, gotAntiAffinityPodsMaps, err := getTPMapMatchingIncomingAffinityAntiAffinity(tt.pod, nodeInfoMap)
if (err != nil) != tt.wantErr {
t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(gotAffinityPodsMaps, tt.wantAffinityPodsMaps) {
t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() gotAffinityPodsMaps = %#v, want %#v", gotAffinityPodsMaps, tt.wantAffinityPodsMaps)
}
if !reflect.DeepEqual(gotAntiAffinityPodsMaps, tt.wantAntiAffinityPodsMaps) {
t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() gotAntiAffinityPodsMaps = %#v, want %#v", gotAntiAffinityPodsMaps, tt.wantAntiAffinityPodsMaps)
}
})
}
}

View File

@@ -20,8 +20,8 @@ import (
"errors"
"fmt"
"os"
"regexp"
"strconv"
"sync"
"github.com/golang/glog"
@@ -36,7 +36,6 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
corelisters "k8s.io/client-go/listers/core/v1"
storagelisters "k8s.io/client-go/listers/storage/v1"
"k8s.io/client-go/util/workqueue"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
@@ -84,6 +83,8 @@ const (
MaxGCEPDVolumeCountPred = "MaxGCEPDVolumeCount"
// MaxAzureDiskVolumeCountPred defines the name of predicate MaxAzureDiskVolumeCount.
MaxAzureDiskVolumeCountPred = "MaxAzureDiskVolumeCount"
// MaxCSIVolumeCountPred defines the predicate that decides how many CSI volumes should be attached
MaxCSIVolumeCountPred = "MaxCSIVolumeCountPred"
// NoVolumeZoneConflictPred defines the name of predicate NoVolumeZoneConflict.
NoVolumeZoneConflictPred = "NoVolumeZoneConflict"
// CheckNodeMemoryPressurePred defines the name of predicate CheckNodeMemoryPressure.
@@ -93,10 +94,6 @@ const (
// CheckNodePIDPressurePred defines the name of predicate CheckNodePIDPressure.
CheckNodePIDPressurePred = "CheckNodePIDPressure"
// DefaultMaxEBSVolumes is the limit for volumes attached to an instance.
// Amazon recommends no more than 40; the system root volume uses at least one.
// See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html#linux-specific-volume-limits
DefaultMaxEBSVolumes = 39
// DefaultMaxGCEPDVolumes defines the maximum number of PD Volumes for GCE
// GCE instances can have up to 16 PD volumes attached.
DefaultMaxGCEPDVolumes = 16
@@ -134,7 +131,7 @@ var (
GeneralPred, HostNamePred, PodFitsHostPortsPred,
MatchNodeSelectorPred, PodFitsResourcesPred, NoDiskConflictPred,
PodToleratesNodeTaintsPred, PodToleratesNodeNoExecuteTaintsPred, CheckNodeLabelPresencePred,
CheckServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred,
CheckServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred, MaxCSIVolumeCountPred,
MaxAzureDiskVolumeCountPred, CheckVolumeBindingPred, NoVolumeZoneConflictPred,
CheckNodeMemoryPressurePred, CheckNodePIDPressurePred, CheckNodeDiskPressurePred, MatchInterPodAffinityPred}
)
@@ -291,7 +288,7 @@ func NoDiskConflict(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *sch
type MaxPDVolumeCountChecker struct {
filter VolumeFilter
volumeLimitKey v1.ResourceName
maxVolumes int
maxVolumeFunc func(node *v1.Node) int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
@@ -317,7 +314,6 @@ type VolumeFilter struct {
func NewMaxPDVolumeCountPredicate(
filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
var filter VolumeFilter
var maxVolumes int
var volumeLimitKey v1.ResourceName
switch filterName {
@@ -325,15 +321,12 @@ func NewMaxPDVolumeCountPredicate(
case EBSVolumeFilterType:
filter = EBSVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.EBSVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxEBSVolumes)
case GCEPDVolumeFilterType:
filter = GCEPDVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.GCEVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxGCEPDVolumes)
case AzureDiskVolumeFilterType:
filter = AzureDiskVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.AzureVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxAzureDiskVolumes)
default:
glog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType,
GCEPDVolumeFilterType, AzureDiskVolumeFilterType)
@@ -343,7 +336,7 @@ func NewMaxPDVolumeCountPredicate(
c := &MaxPDVolumeCountChecker{
filter: filter,
volumeLimitKey: volumeLimitKey,
maxVolumes: maxVolumes,
maxVolumeFunc: getMaxVolumeFunc(filterName),
pvInfo: pvInfo,
pvcInfo: pvcInfo,
randomVolumeIDPrefix: rand.String(32),
@@ -352,19 +345,52 @@ func NewMaxPDVolumeCountPredicate(
return c.predicate
}
// getMaxVols checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVols(defaultVal int) int {
func getMaxVolumeFunc(filterName string) func(node *v1.Node) int {
return func(node *v1.Node) int {
maxVolumesFromEnv := getMaxVolLimitFromEnv()
if maxVolumesFromEnv > 0 {
return maxVolumesFromEnv
}
var nodeInstanceType string
for k, v := range node.ObjectMeta.Labels {
if k == kubeletapis.LabelInstanceType {
nodeInstanceType = v
}
}
switch filterName {
case EBSVolumeFilterType:
return getMaxEBSVolume(nodeInstanceType)
case GCEPDVolumeFilterType:
return DefaultMaxGCEPDVolumes
case AzureDiskVolumeFilterType:
return DefaultMaxAzureDiskVolumes
default:
return -1
}
}
}
func getMaxEBSVolume(nodeInstanceType string) int {
if ok, _ := regexp.MatchString(volumeutil.EBSNitroLimitRegex, nodeInstanceType); ok {
return volumeutil.DefaultMaxEBSNitroVolumeLimit
}
return volumeutil.DefaultMaxEBSVolumes
}
// getMaxVolLimitFromEnv checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVolLimitFromEnv() int {
if rawMaxVols := os.Getenv(KubeMaxPDVols); rawMaxVols != "" {
if parsedMaxVols, err := strconv.Atoi(rawMaxVols); err != nil {
glog.Errorf("Unable to parse maximum PD volumes value, using default of %v: %v", defaultVal, err)
glog.Errorf("Unable to parse maximum PD volumes value, using default: %v", err)
} else if parsedMaxVols <= 0 {
glog.Errorf("Maximum PD volumes must be a positive value, using default of %v", defaultVal)
glog.Errorf("Maximum PD volumes must be a positive value, using default ")
} else {
return parsedMaxVols
}
}
return defaultVal
return -1
}
func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error {
@@ -454,7 +480,7 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat
}
numNewVolumes := len(newVolumes)
maxAttachLimit := c.maxVolumes
maxAttachLimit := c.maxVolumeFunc(nodeInfo.Node())
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
volumeLimits := nodeInfo.VolumeLimits()
@@ -1159,7 +1185,7 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
// targetPod matches all the terms and their topologies, 2) whether targetPod
// matches all the terms label selector and namespaces (AKA term properties),
// 3) any error.
func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
if len(terms) == 0 {
return false, false, fmt.Errorf("terms array is empty")
}
@@ -1167,7 +1193,7 @@ func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *
if err != nil {
return false, false, err
}
if !podMatchesAffinityTermProperties(targetPod, props) {
if !podMatchesAllAffinityTermProperties(targetPod, props) {
return false, false, nil
}
// Namespace and selector of the terms have matched. Now we check topology of the terms.
@@ -1214,120 +1240,63 @@ func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.Po
return terms
}
func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (map[string][]matchingPodAntiAffinityTerm, error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
// getMatchingAntiAffinityTopologyPairs calculates the following for "existingPod" on given node:
// (1) Whether it has PodAntiAffinity
// (2) Whether ANY AffinityTerm matches the incoming pod
func getMatchingAntiAffinityTopologyPairsOfPod(newPod *v1.Pod, existingPod *v1.Pod, node *v1.Node) (*topologyPairsMaps, error) {
affinity := existingPod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return nil, nil
}
var lock sync.Mutex
var firstError error
result := make(map[string][]matchingPodAntiAffinityTerm)
appendResult := func(toAppend map[string][]matchingPodAntiAffinityTerm) {
lock.Lock()
defer lock.Unlock()
for uid, terms := range toAppend {
result[uid] = append(result[uid], terms...)
}
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
topologyMaps := newTopologyPairsMaps()
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(newPod, namespaces, selector) {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
topologyMaps.addTopologyPair(pair, existingPod)
}
}
}
return topologyMaps, nil
}
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("node not found"))
return
}
nodeResult := make(map[string][]matchingPodAntiAffinityTerm)
for _, existingPod := range nodeInfo.PodsWithAffinity() {
affinity := existingPod.Spec.Affinity
if affinity == nil {
func (c *PodAffinityChecker) getMatchingAntiAffinityTopologyPairsOfPods(pod *v1.Pod, existingPods []*v1.Pod) (*topologyPairsMaps, error) {
topologyMaps := newTopologyPairsMaps()
for _, existingPod := range existingPods {
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
if apierrors.IsNotFound(err) {
glog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
continue
}
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
catchError(err)
return
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) {
existingPodFullName := schedutil.GetPodFullName(existingPod)
nodeResult[existingPodFullName] = append(
nodeResult[existingPodFullName],
matchingPodAntiAffinityTerm{term: &term, node: node})
}
}
return nil, err
}
if len(nodeResult) > 0 {
appendResult(nodeResult)
existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, existingPodNode)
if err != nil {
return nil, err
}
topologyMaps.appendMaps(existingPodTopologyMaps)
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
return result, firstError
}
func getMatchingAntiAffinityTermsOfExistingPod(newPod *v1.Pod, existingPod *v1.Pod, node *v1.Node) ([]matchingPodAntiAffinityTerm, error) {
var result []matchingPodAntiAffinityTerm
affinity := existingPod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(newPod, namespaces, selector) {
result = append(result, matchingPodAntiAffinityTerm{term: &term, node: node})
}
}
}
return result, nil
}
func (c *PodAffinityChecker) getMatchingAntiAffinityTerms(pod *v1.Pod, allPods []*v1.Pod) (map[string][]matchingPodAntiAffinityTerm, error) {
result := make(map[string][]matchingPodAntiAffinityTerm)
for _, existingPod := range allPods {
affinity := existingPod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
if apierrors.IsNotFound(err) {
glog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
continue
}
return nil, err
}
existingPodMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(pod, existingPod, existingPodNode)
if err != nil {
return nil, err
}
if len(existingPodMatchingTerms) > 0 {
existingPodFullName := schedutil.GetPodFullName(existingPod)
result[existingPodFullName] = existingPodMatchingTerms
}
}
}
return result, nil
return topologyMaps, nil
}
// Checks if scheduling the pod onto this node would break any anti-affinity
// rules indicated by the existing pods.
// terms indicated by the existing pods.
func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (algorithm.PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return ErrExistingPodsAntiAffinityRulesNotMatch, fmt.Errorf("Node is nil")
}
var matchingTerms map[string][]matchingPodAntiAffinityTerm
var topologyMaps *topologyPairsMaps
if predicateMeta, ok := meta.(*predicateMetadata); ok {
matchingTerms = predicateMeta.matchingAntiAffinityTerms
topologyMaps = predicateMeta.topologyPairsAntiAffinityPodsMap
} else {
// Filter out pods whose nodeName is equal to nodeInfo.node.Name, but are not
// present in nodeInfo. Pods on other nodes pass the filter.
@@ -1337,63 +1306,63 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
glog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
}
if matchingTerms, err = c.getMatchingAntiAffinityTerms(pod, filteredPods); err != nil {
if topologyMaps, err = c.getMatchingAntiAffinityTopologyPairsOfPods(pod, filteredPods); err != nil {
errMessage := fmt.Sprintf("Failed to get all terms that pod %+v matches, err: %+v", podName(pod), err)
glog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
}
}
for _, terms := range matchingTerms {
for i := range terms {
term := &terms[i]
if len(term.term.TopologyKey) == 0 {
errMessage := fmt.Sprintf("Empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
glog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
}
if priorityutil.NodesHaveSameTopologyKey(node, term.node, term.term.TopologyKey) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v",
podName(pod), node.Name, term.term)
return ErrExistingPodsAntiAffinityRulesNotMatch, nil
}
// Iterate over topology pairs to get any of the pods being affected by
// the scheduled pod anti-affinity terms
for topologyKey, topologyValue := range node.Labels {
if topologyMaps.topologyPairToPods[topologyPair{key: topologyKey, value: topologyValue}] != nil {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v", podName(pod), node.Name)
return ErrExistingPodsAntiAffinityRulesNotMatch, nil
}
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("Schedule Pod %+v on Node %+v is allowed, existing pods anti-affinity rules satisfied.",
glog.Infof("Schedule Pod %+v on Node %+v is allowed, existing pods anti-affinity terms satisfied.",
podName(pod), node.Name)
}
return nil, nil
}
// anyPodsMatchingTopologyTerms checks whether any of the nodes given via
// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo".
func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) {
for nodeName, targetPods := range targetPods {
targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
if len(targetPods) > 0 {
allTermsMatched := true
for _, term := range terms {
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) {
allTermsMatched = false
break
}
// nodeMatchesAllTopologyTerms checks whether "nodeInfo" matches
// topology of all the "terms" for the given "pod".
func (c *PodAffinityChecker) nodeMatchesAllTopologyTerms(pod *v1.Pod, topologyPairs *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) bool {
node := nodeInfo.Node()
for _, term := range terms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
if _, ok := topologyPairs.topologyPairToPods[pair]; !ok {
return false
}
if allTermsMatched {
// We have 1 or more pods on the target node that have already matched namespace and selector
// and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node.
return true, nil
} else {
return false
}
}
return true
}
// nodeMatchesAnyTopologyTerm checks whether "nodeInfo" matches
// topology of any "term" for the given "pod".
func (c *PodAffinityChecker) nodeMatchesAnyTopologyTerm(pod *v1.Pod, topologyPairs *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) bool {
node := nodeInfo.Node()
for _, term := range terms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
if _, ok := topologyPairs.topologyPairToPods[pair]; ok {
return true
}
}
}
return false, nil
return false
}
// Checks if scheduling the pod onto this node would break any rules of this pod.
// Checks if scheduling the pod onto this node would break any term of this pod.
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo,
affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
@@ -1403,20 +1372,15 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
}
if predicateMeta, ok := meta.(*predicateMetadata); ok {
// Check all affinity terms.
matchingPods := predicateMeta.nodeNameToMatchingAffinityPods
topologyPairsPotentialAffinityPods := predicateMeta.topologyPairsPotentialAffinityPods
if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Errorf(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
matchExists := c.nodeMatchesAllTopologyTerms(pod, topologyPairsPotentialAffinityPods, nodeInfo, affinityTerms)
if !matchExists {
// This pod may the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
if !(len(topologyPairsPotentialAffinityPods.topologyPairToPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
@@ -1425,16 +1389,16 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
}
// Check all anti-affinity terms.
matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods
topologyPairsPotentialAntiAffinityPods := predicateMeta.topologyPairsPotentialAntiAffinityPods
if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms)
if err != nil || matchExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v",
podName(pod), node.Name, err)
matchExists := c.nodeMatchesAnyTopologyTerm(pod, topologyPairsPotentialAntiAffinityPods, nodeInfo, antiAffinityTerms)
if matchExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity",
podName(pod), node.Name)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
} else { // We don't have precomputed metadata. We have to follow a slow path to check affinity rules.
} else { // We don't have precomputed metadata. We have to follow a slow path to check affinity terms.
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
if err != nil {
return ErrPodAffinityRulesNotMatch, err
@@ -1472,7 +1436,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
}
if !matchFound && len(affinityTerms) > 0 {
// We have not been able to find any matches for the pod's affinity rules.
// We have not been able to find any matches for the pod's affinity terms.
// This pod may be the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
@@ -1506,7 +1470,14 @@ func CheckNodeUnschedulablePredicate(pod *v1.Pod, meta algorithm.PredicateMetada
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}
if nodeInfo.Node().Spec.Unschedulable {
// If pod tolerate unschedulable taint, it's also tolerate `node.Spec.Unschedulable`.
podToleratesUnschedulable := v1helper.TolerationsTolerateTaint(pod.Spec.Tolerations, &v1.Taint{
Key: algorithm.TaintNodeUnschedulable,
Effect: v1.TaintEffectNoSchedule,
})
// TODO (k82cn): deprecates `node.Spec.Unschedulable` in 1.13.
if nodeInfo.Node().Spec.Unschedulable && !podToleratesUnschedulable {
return false, []algorithm.PredicateFailureReason{ErrNodeUnschedulable}, nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -37,14 +37,14 @@ go_library(
"//pkg/scheduler/cache:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/parsers:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
@@ -74,13 +74,12 @@ go_test(
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/util/parsers:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@@ -32,7 +32,7 @@ var (
// BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together
// with LeastRequestedPriority. It calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// Detail: score = 10 - variance(cpuFraction,memoryFraction,volumeFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
// Resource Utilization"
BalancedResourceAllocationMap = balancedResourcePriority.PriorityMap

View File

@@ -26,11 +26,12 @@ import (
"k8s.io/kubernetes/pkg/util/parsers"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
// The two thresholds are used as bounds for the image score range. They correspond to a reasonable size range for
// container images compressed and stored in registries; 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
mb int64 = 1024 * 1024
minThreshold int64 = 23 * mb
maxThreshold int64 = 1000 * mb
)
// ImageLocalityPriorityMap is a priority function that favors nodes that already have requested pod container's images.
@@ -44,44 +45,55 @@ func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *scheduler
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
sumSize := totalImageSize(nodeInfo, pod.Spec.Containers)
var score int
if priorityMeta, ok := meta.(*priorityMetadata); ok {
score = calculatePriority(sumImageScores(nodeInfo, pod.Spec.Containers, priorityMeta.totalNumNodes))
} else {
// if we are not able to parse priority meta data, skip this priority
score = 0
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: calculateScoreFromSize(sumSize),
Score: score,
}, nil
}
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
// 1. Split image size range into 10 buckets.
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
func calculateScoreFromSize(sumSize int64) int {
switch {
case sumSize == 0 || sumSize < minImgSize:
// 0 means none of the images required by this pod are present on this
// node or the total size of the images present is too small to be taken into further consideration.
return 0
case sumSize >= maxImgSize:
// If existing images' total size is larger than max, just make it highest priority.
return schedulerapi.MaxPriority
// calculatePriority returns the priority of a node. Given the sumScores of requested images on the node, the node's
// priority is obtained by scaling the maximum priority value with a ratio proportional to the sumScores.
func calculatePriority(sumScores int64) int {
if sumScores < minThreshold {
sumScores = minThreshold
} else if sumScores > maxThreshold {
sumScores = maxThreshold
}
return int((int64(schedulerapi.MaxPriority) * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
return int(int64(schedulerapi.MaxPriority) * (sumScores - minThreshold) / (maxThreshold - minThreshold))
}
// totalImageSize returns the total image size of all the containers that are already on the node.
func totalImageSize(nodeInfo *schedulercache.NodeInfo, containers []v1.Container) int64 {
var total int64
// sumImageScores returns the sum of image scores of all the containers that are already on the node.
// Each image receives a raw score of its size, scaled by scaledImageScore. The raw scores are later used to calculate
// the final score. Note that the init containers are not considered for it's rare for users to deploy huge init containers.
func sumImageScores(nodeInfo *schedulercache.NodeInfo, containers []v1.Container, totalNumNodes int) int64 {
var sum int64
imageStates := nodeInfo.ImageStates()
imageSizes := nodeInfo.ImageSizes()
for _, container := range containers {
if size, ok := imageSizes[normalizedImageName(container.Image)]; ok {
total += size
if state, ok := imageStates[normalizedImageName(container.Image)]; ok {
sum += scaledImageScore(state, totalNumNodes)
}
}
return total
return sum
}
// scaledImageScore returns an adaptively scaled score for the given state of an image.
// The size of the image is used as the base score, scaled by a factor which considers how much nodes the image has "spread" to.
// This heuristic aims to mitigate the undesirable "node heating problem", i.e., pods get assigned to the same or
// a few nodes due to image locality.
func scaledImageScore(imageState *schedulercache.ImageStateSummary, totalNumNodes int) int64 {
spread := float64(imageState.NumNodes) / float64(totalNumNodes)
return int64(float64(imageState.Size) * spread)
}
// normalizedImageName returns the CRI compliant name for a given image.

View File

@@ -42,13 +42,13 @@ func TestImageLocalityPriority(t *testing.T) {
},
}
test40140 := v1.PodSpec{
test40300 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/140",
Image: "gcr.io/300",
},
},
}
@@ -64,7 +64,7 @@ func TestImageLocalityPriority(t *testing.T) {
},
}
node401402000 := v1.NodeStatus{
node403002000 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
@@ -76,10 +76,10 @@ func TestImageLocalityPriority(t *testing.T) {
},
{
Names: []string{
"gcr.io/140:" + parsers.DefaultImageTag,
"gcr.io/140:v1",
"gcr.io/300:" + parsers.DefaultImageTag,
"gcr.io/300:v1",
},
SizeBytes: int64(140 * mb),
SizeBytes: int64(300 * mb),
},
{
Names: []string{
@@ -120,29 +120,29 @@ func TestImageLocalityPriority(t *testing.T) {
// Node1
// Image: gcr.io/40:latest 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Score: 0 (40M/2 < 23M, min-threshold)
// Node2
// Image: gcr.io/250:latest 250MB
// Score: (250M-23M)/97.7M + 1 = 3
// Score: 10 * (250M/2 - 23M)/(1000M - 23M) = 1
pod: &v1.Pod{Spec: test40250},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
nodes: []*v1.Node{makeImageNode("machine1", node403002000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
name: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Pod: gcr.io/40 gcr.io/300
// Node1
// Image: gcr.io/40:latest 40MB, gcr.io/140:latest 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Image: gcr.io/40:latest 40MB, gcr.io/300:latest 300MB
// Score: 10 * ((40M + 300M)/2 - 23M)/(1000M - 23M) = 1
// Node2
// Image: not present
// Score: 0
pod: &v1.Pod{Spec: test40140},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
pod: &v1.Pod{Spec: test40300},
nodes: []*v1.Node{makeImageNode("machine1", node403002000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
name: "two images on one node, prefer this node",
},
{
@@ -150,13 +150,13 @@ func TestImageLocalityPriority(t *testing.T) {
// Node1
// Image: gcr.io/2000:latest 2000MB
// Score: 2000 > max score = 10
// Score: 10 (2000M/2 >= 1000M, max-threshold)
// Node2
// Image: gcr.io/10:latest 10MB
// Score: 10 < min score = 0
// Score: 0 (10M/2 < 23M, min-threshold)
pod: &v1.Pod{Spec: testMinMax},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
nodes: []*v1.Node{makeImageNode("machine1", node403002000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
name: "if exceed limit, use limit",
},
@@ -165,7 +165,7 @@ func TestImageLocalityPriority(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, &priorityMetadata{totalNumNodes: len(test.nodes)})(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}

View File

@@ -29,7 +29,7 @@ var (
// prioritizes based on the minimum of the average of the fraction of requested to capacity.
//
// Details:
// cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity)/2
// (cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity))/2
LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
)

View File

@@ -21,7 +21,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@@ -52,6 +51,7 @@ type priorityMetadata struct {
podSelectors []labels.Selector
controllerRef *metav1.OwnerReference
podFirstServiceSelector labels.Selector
totalNumNodes int
}
// PriorityMetadata is a PriorityMetadataProducer. Node info can be nil.
@@ -65,8 +65,9 @@ func (pmf *PriorityMetadataFactory) PriorityMetadata(pod *v1.Pod, nodeNameToInfo
podTolerations: getAllTolerationPreferNoSchedule(pod.Spec.Tolerations),
affinity: pod.Spec.Affinity,
podSelectors: getSelectors(pod, pmf.serviceLister, pmf.controllerLister, pmf.replicaSetLister, pmf.statefulSetLister),
controllerRef: priorityutil.GetControllerRef(pod),
controllerRef: metav1.GetControllerOf(pod),
podFirstServiceSelector: getFirstServiceSelector(pod, pmf.serviceLister),
totalNumNodes: len(nodeNameToInfo),
}
}

View File

@@ -20,9 +20,8 @@ import (
"reflect"
"testing"
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
@@ -156,7 +155,7 @@ func TestPriorityMetadata(t *testing.T) {
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister([]*v1.Service{}),
schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
schedulertesting.FakeReplicaSetLister([]*apps.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {

View File

@@ -39,10 +39,10 @@ func mostResourceScorer(requested, allocable *schedulercache.Resource, includeVo
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculatUnusedScore
// is almost a reversed version of least_requested_priority.calculateUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUSedScore.
// in their default scheduling policies) calculateUsedScore.
func mostRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0

View File

@@ -22,7 +22,6 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@@ -39,7 +38,7 @@ func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, node
controllerRef = priorityMeta.controllerRef
} else {
// We couldn't parse metadata - fallback to the podspec.
controllerRef = priorityutil.GetControllerRef(pod)
controllerRef = metav1.GetControllerOf(pod)
}
if controllerRef != nil {

View File

@@ -209,19 +209,19 @@ func TestRequestedToCapacityRatio(t *testing.T) {
for _, test := range tests {
nodeNames := make([]string, 0)
var nodeNames []string
for nodeName := range test.nodes {
nodeNames = append(nodeNames, nodeName)
}
sort.Strings(nodeNames)
nodes := make([]*v1.Node, 0)
var nodes []*v1.Node
for _, nodeName := range nodeNames {
node := test.nodes[nodeName]
nodes = append(nodes, makeNode(nodeName, node.capacity.cpu, node.capacity.mem))
}
scheduledPods := make([]*v1.Pod, 0)
var scheduledPods []*v1.Pod
for name, node := range test.nodes {
scheduledPods = append(scheduledPods,
buildResourcesPod(name, node.used))

View File

@@ -21,6 +21,8 @@ import (
"github.com/golang/glog"
"k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
@@ -56,20 +58,31 @@ func (r *ResourceAllocationPriority) PriorityMap(
requested.Memory += nodeInfo.NonZeroRequest().Memory
var score int64
// Check if the pod has volumes and this could be added to scorer function for balanced resource allocation.
if len(pod.Spec.Volumes) >= 0 && nodeInfo.TransientInfo != nil {
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
score = r.scorer(&requested, &allocatable, true, nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount)
} else {
score = r.scorer(&requested, &allocatable, false, 0, 0)
}
if glog.V(10) {
glog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory,
requested.MilliCPU+allocatable.MilliCPU, requested.Memory+allocatable.Memory,
score,
)
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
glog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, %d volumes, total request %d millicores %d memory bytes %d volumes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount,
requested.MilliCPU, requested.Memory,
nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes,
score,
)
} else {
glog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory,
requested.MilliCPU, requested.Memory,
score,
)
}
}
return schedulerapi.HostPriority{

View File

@@ -70,7 +70,7 @@ func ResourceLimitsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedule
}, nil
}
// computeScore return 1 if limit value is less than or equal to allocable
// computeScore returns 1 if limit value is less than or equal to allocatable
// value, otherwise it returns 0.
func computeScore(limit, allocatable int64) int64 {
if limit != 0 && allocatable != 0 && limit <= allocatable {

View File

@@ -97,16 +97,12 @@ func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{
glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
continue
}
matches := false
for _, selector := range selectors {
if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
matches = true
count++
break
}
}
if matches {
count++
}
}
return schedulerapi.HostPriority{
Host: node.Name,

View File

@@ -21,9 +21,8 @@ import (
"sort"
"testing"
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
@@ -60,7 +59,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
rss []*apps.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
@@ -200,7 +199,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
name: "service with partial pod label matches with service and replica set",
@@ -241,7 +240,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
name: "disjoined service and replica set should be treated equally",
@@ -280,7 +279,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
name: "Replica set with partial pod label matches",
@@ -318,7 +317,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
name: "Another replication set with partial pod label matches",
@@ -410,7 +409,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
pod *v1.Pod
pods []*v1.Pod
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
rss []*apps.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
@@ -764,7 +763,7 @@ func TestZoneSpreadPriority(t *testing.T) {
// when construct mataDataProducer
sss := []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
rcs := []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}}
rss := []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
rss := []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {

View File

@@ -11,17 +11,16 @@ go_test(
srcs = [
"non_zero_test.go",
"topologies_test.go",
"util_test.go",
],
embed = [":go_default_library"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/selection:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/selection:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)
@@ -30,14 +29,12 @@ go_library(
srcs = [
"non_zero.go",
"topologies.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util",
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)

View File

@@ -26,7 +26,7 @@ import (
)
func TestGetNonzeroRequests(t *testing.T) {
tds := []struct {
tests := []struct {
name string
requests v1.ResourceList
expectedCPU int64
@@ -65,9 +65,11 @@ func TestGetNonzeroRequests(t *testing.T) {
},
}
for _, td := range tds {
realCPU, realMemory := GetNonzeroRequests(&td.requests)
assert.EqualValuesf(t, td.expectedCPU, realCPU, "Failed to test: %s", td.name)
assert.EqualValuesf(t, td.expectedMemory, realMemory, "Failed to test: %s", td.name)
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
realCPU, realMemory := GetNonzeroRequests(&test.requests)
assert.EqualValuesf(t, test.expectedCPU, realCPU, "Failed to test: %s", test.name)
assert.EqualValuesf(t, test.expectedMemory, realMemory, "Failed to test: %s", test.name)
})
}
}

View File

@@ -59,8 +59,10 @@ func TestGetNamespacesFromPodAffinityTerm(t *testing.T) {
}
for _, test := range tests {
realValue := GetNamespacesFromPodAffinityTerm(fakePod(), test.podAffinityTerm)
assert.EqualValuesf(t, test.expectedValue, realValue, "Failed to test: %s", test.name)
t.Run(test.name, func(t *testing.T) {
realValue := GetNamespacesFromPodAffinityTerm(fakePod(), test.podAffinityTerm)
assert.EqualValuesf(t, test.expectedValue, realValue, "Failed to test: %s", test.name)
})
}
}
@@ -96,12 +98,14 @@ func TestPodMatchesTermsNamespaceAndSelector(t *testing.T) {
}
for _, test := range tests {
fakeTestPod := fakePod()
fakeTestPod.Namespace = test.podNamespaces
fakeTestPod.Labels = test.podLabels
t.Run(test.name, func(t *testing.T) {
fakeTestPod := fakePod()
fakeTestPod.Namespace = test.podNamespaces
fakeTestPod.Labels = test.podLabels
realValue := PodMatchesTermsNamespaceAndSelector(fakeTestPod, fakeNamespaces, fakeSelector)
assert.EqualValuesf(t, test.expectedResult, realValue, "Faild to test: %s", test.name)
realValue := PodMatchesTermsNamespaceAndSelector(fakeTestPod, fakeNamespaces, fakeSelector)
assert.EqualValuesf(t, test.expectedResult, realValue, "Faild to test: %s", test.name)
})
}
}
@@ -248,7 +252,9 @@ func TestNodesHaveSameTopologyKey(t *testing.T) {
}
for _, test := range tests {
got := NodesHaveSameTopologyKey(test.nodeA, test.nodeB, test.topologyKey)
assert.Equalf(t, test.expected, got, "Failed to test: %s", test.name)
t.Run(test.name, func(t *testing.T) {
got := NodesHaveSameTopologyKey(test.nodeA, test.nodeB, test.topologyKey)
assert.Equalf(t, test.expected, got, "Failed to test: %s", test.name)
})
}
}

View File

@@ -1,119 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func TestGetControllerRef(t *testing.T) {
fakeBlockOwnerDeletion := true
fakeFalseController := false
fakeTrueController := true
fakeEmptyOwnerReference := metav1.OwnerReference{}
tds := []struct {
name string
pod v1.Pod
expectedNil bool
expectedOR metav1.OwnerReference
}{
{
"ownerreference_not_exist",
v1.Pod{},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_nil",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
},
},
},
},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_false",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
Controller: &fakeFalseController,
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
},
},
},
},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_true",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
Controller: &fakeTrueController,
},
},
},
},
false,
metav1.OwnerReference{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
Controller: &fakeTrueController,
},
},
}
for _, td := range tds {
realOR := GetControllerRef(&td.pod)
if td.expectedNil {
assert.Nilf(t, realOR, "Failed to test: %s", td.name)
} else {
assert.Equalf(t, &td.expectedOR, realOR, "Failed to test: %s", td.name)
}
}
}

View File

@@ -1,60 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/api/core/v1"
)
// Some functions used by multiple scheduler tests.
type schedulerTester struct {
t *testing.T
scheduler ScheduleAlgorithm
nodeLister NodeLister
}
// Call if you know exactly where pod should get scheduled.
func (st *schedulerTester) expectSchedule(pod *v1.Pod, expected string) {
actual, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
if actual != expected {
st.t.Errorf("Unexpected scheduling value: %v, expected %v", actual, expected)
}
}
// Call if you can't predict where pod will be scheduled.
func (st *schedulerTester) expectSuccess(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
}
// Call if pod should *not* schedule.
func (st *schedulerTester) expectFailure(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err == nil {
st.t.Error("Unexpected non-error")
}
}

View File

@@ -17,9 +17,8 @@ limitations under the License.
package algorithm
import (
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
@@ -120,7 +119,7 @@ type ControllerLister interface {
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*extensions.ReplicaSet, error)
GetPodReplicaSets(*v1.Pod) ([]*apps.ReplicaSet, error)
}
var _ ControllerLister = &EmptyControllerLister{}
@@ -144,7 +143,7 @@ var _ ReplicaSetLister = &EmptyReplicaSetLister{}
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*apps.ReplicaSet, err error) {
return nil, nil
}

View File

@@ -19,7 +19,7 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/factory:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@@ -17,9 +17,9 @@ go_library(
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/factory:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
@@ -36,14 +36,14 @@ go_test(
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/latest:go_default_library",
"//pkg/scheduler/factory:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/util/testing:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/client-go/util/testing:go_default_library",
],
)

View File

@@ -814,6 +814,130 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
}},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.12": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "CheckNodePIDPressure"},
{"name": "CheckNodeCondition"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MaxCSIVolumeCountPred"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "CheckVolumeBinding"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2},
{
"name": "RequestedToCapacityRatioPriority",
"weight": 2,
"argument": {
"requestedToCapacityRatioArguments": {
"shape": [
{"utilization": 0, "score": 0},
{"utilization": 50, "score": 7}
]
}
}}
],"extenders": [{
"urlPrefix": "/prefix",
"filterVerb": "filter",
"prioritizeVerb": "prioritize",
"weight": 1,
"bindVerb": "bind",
"enableHttps": true,
"tlsConfig": {"Insecure":true},
"httpTimeout": 1,
"nodeCacheCapable": true,
"managedResources": [{"name":"example.com/foo","ignoredByScheduler":true}],
"ignorable":true
}]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "CheckNodePIDPressure"},
{Name: "CheckNodeCondition"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MaxCSIVolumeCountPred"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "CheckVolumeBinding"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
{
Name: "RequestedToCapacityRatioPriority",
Weight: 2,
Argument: &schedulerapi.PriorityArgument{
RequestedToCapacityRatioArguments: &schedulerapi.RequestedToCapacityRatioArguments{
UtilizationShape: []schedulerapi.UtilizationShapePoint{
{Utilization: 0, Score: 0},
{Utilization: 50, Score: 7},
}},
},
},
},
ExtenderConfigs: []schedulerapi.ExtenderConfig{{
URLPrefix: "/prefix",
FilterVerb: "filter",
PrioritizeVerb: "prioritize",
Weight: 1,
BindVerb: "bind", // 1.11 restored case-sensitivity, but allowed either "BindVerb" or "bindVerb"
EnableHTTPS: true,
TLSConfig: &restclient.TLSClientConfig{Insecure: true},
HTTPTimeout: 1,
NodeCacheCapable: true,
ManagedResources: []schedulerapi.ExtenderManagedResource{{Name: v1.ResourceName("example.com/foo"), IgnoredByScheduler: true}},
Ignorable: true,
}},
},
},
}
registeredPredicates := sets.NewString(factory.ListRegisteredFitPredicates()...)
@@ -849,23 +973,24 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
informerFactory := informers.NewSharedInformerFactory(client, 0)
if _, err := factory.NewConfigFactory(
"some-scheduler-name",
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
informerFactory.Extensions().V1beta1().ReplicaSets(),
informerFactory.Apps().V1beta1().StatefulSets(),
informerFactory.Core().V1().Services(),
informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
informerFactory.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
enableEquivalenceCache,
false,
).CreateFromConfig(policy); err != nil {
if _, err := factory.NewConfigFactory(&factory.ConfigFactoryArgs{
SchedulerName: "some-scheduler-name",
Client: client,
NodeInformer: informerFactory.Core().V1().Nodes(),
PodInformer: informerFactory.Core().V1().Pods(),
PvInformer: informerFactory.Core().V1().PersistentVolumes(),
PvcInformer: informerFactory.Core().V1().PersistentVolumeClaims(),
ReplicationControllerInformer: informerFactory.Core().V1().ReplicationControllers(),
ReplicaSetInformer: informerFactory.Apps().V1().ReplicaSets(),
StatefulSetInformer: informerFactory.Apps().V1().StatefulSets(),
ServiceInformer: informerFactory.Core().V1().Services(),
PdbInformer: informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
StorageClassInformer: informerFactory.Storage().V1().StorageClasses(),
HardPodAffinitySymmetricWeight: v1.DefaultHardPodAffinitySymmetricWeight,
EnableEquivalenceClassCache: enableEquivalenceCache,
DisablePreemption: false,
PercentageOfNodesToScore: schedulerapi.DefaultPercentageOfNodesToScore,
}).CreateFromConfig(policy); err != nil {
t.Errorf("%s: Error constructing: %v", v, err)
continue
}

View File

@@ -94,10 +94,6 @@ func init() {
// Register the priority function so that its available
// but do not include it as part of the default priorities
factory.RegisterPriorityFunction2("EqualPriority", core.EqualPriorityMap, nil, 1)
// ImageLocalityPriority prioritizes nodes based on locality of images requested by a pod. Nodes with larger size
// of already-installed packages required by the pod will be preferred over nodes with no already-installed
// packages required by the pod or a small total size of already-installed packages required by the pod.
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
factory.RegisterPriorityFunction2(
@@ -137,6 +133,12 @@ func defaultPredicates() sets.String {
return predicates.NewMaxPDVolumeCountPredicate(predicates.AzureDiskVolumeFilterType, args.PVInfo, args.PVCInfo)
},
),
factory.RegisterFitPredicateFactory(
predicates.MaxCSIVolumeCountPred,
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewCSIMaxVolumeLimitPredicate(args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by inter-pod affinity.
factory.RegisterFitPredicateFactory(
predicates.MatchInterPodAffinityPred,
@@ -197,10 +199,13 @@ func ApplyFeatureGates() {
// Fit is determined based on whether a pod can tolerate all of the node's taints
factory.RegisterMandatoryFitPredicate(predicates.PodToleratesNodeTaintsPred, predicates.PodToleratesNodeTaints)
// Fit is determined based on whether a pod can tolerate unschedulable of node
factory.RegisterMandatoryFitPredicate(predicates.CheckNodeUnschedulablePred, predicates.CheckNodeUnschedulablePredicate)
// Insert Key "PodToleratesNodeTaints" and "CheckNodeUnschedulable" To All Algorithm Provider
// The key will insert to all providers which in algorithmProviderMap[]
// if you just want insert to specific provider, call func InsertPredicateKeyToAlgoProvider()
factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.PodToleratesNodeTaintsPred)
factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.CheckNodeUnschedulablePred)
glog.Warningf("TaintNodesByCondition is enabled, PodToleratesNodeTaints predicate is mandatory")
}
@@ -260,6 +265,9 @@ func defaultPriorities() sets.String {
// Prioritizes nodes that marked with taint which pod can tolerate.
factory.RegisterPriorityFunction2("TaintTolerationPriority", priorities.ComputeTaintTolerationPriorityMap, priorities.ComputeTaintTolerationPriorityReduce, 1),
// ImageLocalityPriority prioritizes nodes that have images requested by the pod present.
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1),
)
}

View File

@@ -59,7 +59,8 @@ func TestDefaultPriorities(t *testing.T) {
"BalancedResourceAllocation",
"NodePreferAvoidPodsPriority",
"NodeAffinityPriority",
"TaintTolerationPriority")
"TaintTolerationPriority",
"ImageLocalityPriority")
if expected := defaultPriorities(); !result.Equal(expected) {
t.Errorf("expected %v got %v", expected, result)
}
@@ -67,18 +68,19 @@ func TestDefaultPriorities(t *testing.T) {
func TestDefaultPredicates(t *testing.T) {
result := sets.NewString(
"NoVolumeZoneConflict",
"MaxEBSVolumeCount",
"MaxGCEPDVolumeCount",
"MaxAzureDiskVolumeCount",
"MatchInterPodAffinity",
"NoDiskConflict",
"GeneralPredicates",
"CheckNodeMemoryPressure",
"CheckNodeDiskPressure",
"CheckNodePIDPressure",
"CheckNodeCondition",
"PodToleratesNodeTaints",
predicates.NoVolumeZoneConflictPred,
predicates.MaxEBSVolumeCountPred,
predicates.MaxGCEPDVolumeCountPred,
predicates.MaxAzureDiskVolumeCountPred,
predicates.MaxCSIVolumeCountPred,
predicates.MatchInterPodAffinityPred,
predicates.NoDiskConflictPred,
predicates.GeneralPred,
predicates.CheckNodeMemoryPressurePred,
predicates.CheckNodeDiskPressurePred,
predicates.CheckNodePIDPressurePred,
predicates.CheckNodeConditionPred,
predicates.PodToleratesNodeTaintsPred,
predicates.CheckVolumeBindingPred,
)

View File

@@ -17,6 +17,7 @@ limitations under the License.
package algorithmprovider
import (
"fmt"
"testing"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@@ -44,44 +45,48 @@ func TestDefaultConfigExists(t *testing.T) {
func TestAlgorithmProviders(t *testing.T) {
for _, pn := range algorithmProviderNames {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Errorf("error retrieving '%s' provider: %v", pn, err)
break
}
if len(p.PriorityFunctionKeys) == 0 {
t.Errorf("%s algorithm provider shouldn't have 0 priority functions", pn)
}
for _, pf := range p.PriorityFunctionKeys.List() {
if !factory.IsPriorityFunctionRegistered(pf) {
t.Errorf("priority function %s is not registered but is used in the %s algorithm provider", pf, pn)
t.Run(pn, func(t *testing.T) {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Fatalf("error retrieving provider: %v", err)
}
}
for _, fp := range p.FitPredicateKeys.List() {
if !factory.IsFitPredicateRegistered(fp) {
t.Errorf("fit predicate %s is not registered but is used in the %s algorithm provider", fp, pn)
if len(p.PriorityFunctionKeys) == 0 {
t.Errorf("algorithm provider shouldn't have 0 priority functions")
}
}
for _, pf := range p.PriorityFunctionKeys.List() {
t.Run(fmt.Sprintf("priorityfunction/%s", pf), func(t *testing.T) {
if !factory.IsPriorityFunctionRegistered(pf) {
t.Errorf("priority function is not registered but is used in the algorithm provider")
}
})
}
for _, fp := range p.FitPredicateKeys.List() {
t.Run(fmt.Sprintf("fitpredicate/%s", fp), func(t *testing.T) {
if !factory.IsFitPredicateRegistered(fp) {
t.Errorf("fit predicate is not registered but is used in the algorithm provider")
}
})
}
})
}
}
func TestApplyFeatureGates(t *testing.T) {
for _, pn := range algorithmProviderNames {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Errorf("Error retrieving '%s' provider: %v", pn, err)
break
}
t.Run(pn, func(t *testing.T) {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Fatalf("Error retrieving provider: %v", err)
}
if !p.FitPredicateKeys.Has("CheckNodeCondition") {
t.Errorf("Failed to find predicate: 'CheckNodeCondition'")
break
}
if !p.FitPredicateKeys.Has("CheckNodeCondition") {
t.Fatalf("Failed to find predicate: 'CheckNodeCondition'")
}
if !p.FitPredicateKeys.Has("PodToleratesNodeTaints") {
t.Errorf("Failed to find predicate: 'PodToleratesNodeTaints'")
break
}
if !p.FitPredicateKeys.Has("PodToleratesNodeTaints") {
t.Fatalf("Failed to find predicate: 'PodToleratesNodeTaints'")
}
})
}
// Apply features for algorithm providers.
@@ -90,20 +95,19 @@ func TestApplyFeatureGates(t *testing.T) {
ApplyFeatureGates()
for _, pn := range algorithmProviderNames {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Errorf("Error retrieving '%s' provider: %v", pn, err)
break
}
t.Run(pn, func(t *testing.T) {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Fatalf("Error retrieving '%s' provider: %v", pn, err)
}
if !p.FitPredicateKeys.Has("PodToleratesNodeTaints") {
t.Errorf("Failed to find predicate: 'PodToleratesNodeTaints'")
break
}
if !p.FitPredicateKeys.Has("PodToleratesNodeTaints") {
t.Fatalf("Failed to find predicate: 'PodToleratesNodeTaints'")
}
if p.FitPredicateKeys.Has("CheckNodeCondition") {
t.Errorf("Unexpected predicate: 'CheckNodeCondition'")
break
}
if p.FitPredicateKeys.Has("CheckNodeCondition") {
t.Fatalf("Unexpected predicate: 'CheckNodeCondition'")
}
})
}
}

View File

@@ -15,12 +15,12 @@ go_library(
],
importpath = "k8s.io/kubernetes/pkg/scheduler/api",
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
],
)

View File

@@ -12,10 +12,10 @@ go_library(
deps = [
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/serializer/json:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/serializer/versioning:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/serializer/json:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/serializer/versioning:go_default_library",
],
)

View File

@@ -36,6 +36,9 @@ const (
MaxPriority = 10
// MaxWeight defines the max weight value.
MaxWeight = MaxInt / MaxPriority
// DefaultPercentageOfNodesToScore defines the percentage of nodes of all nodes
// that once found feasible, the scheduler stops looking for more nodes.
DefaultPercentageOfNodesToScore = 50
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

View File

@@ -16,12 +16,12 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/scheduler/api/v1",
deps = [
"//pkg/scheduler/api:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
],
)

View File

@@ -21,7 +21,7 @@ limitations under the License.
package v1
import (
core_v1 "k8s.io/api/core/v1"
corev1 "k8s.io/api/core/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
rest "k8s.io/client-go/rest"
)
@@ -31,33 +31,21 @@ func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(core_v1.Pod)
(*in).DeepCopyInto(*out)
}
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(core_v1.NodeList)
(*in).DeepCopyInto(*out)
}
*out = new(corev1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
return
@@ -110,12 +98,8 @@ func (in *ExtenderConfig) DeepCopyInto(out *ExtenderConfig) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
if *in == nil {
*out = nil
} else {
*out = new(rest.TLSClientConfig)
(*in).DeepCopyInto(*out)
}
*out = new(rest.TLSClientConfig)
(*in).DeepCopyInto(*out)
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
@@ -140,24 +124,16 @@ func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(core_v1.NodeList)
(*in).DeepCopyInto(*out)
}
*out = new(corev1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
if in.FailedNodes != nil {
@@ -201,35 +177,37 @@ func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(core_v1.Pod)
(*in).DeepCopyInto(*out)
}
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
var outVal *Victims
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(Victims)
val.DeepCopyInto((*out)[key])
in, out := &val, &outVal
*out = new(Victims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
@@ -252,12 +230,15 @@ func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult)
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
@@ -391,11 +372,10 @@ func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(MetaPod)
(*in)[i].DeepCopyInto((*out)[i])
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(MetaPod)
**out = **in
}
}
}
@@ -463,21 +443,13 @@ func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
if *in == nil {
*out = nil
} else {
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
return
}
@@ -497,12 +469,8 @@ func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
return
}
@@ -522,30 +490,18 @@ func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAntiAffinity)
**out = **in
}
*out = new(ServiceAntiAffinity)
**out = **in
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
if *in == nil {
*out = nil
} else {
*out = new(LabelPreference)
**out = **in
}
*out = new(LabelPreference)
**out = **in
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
if *in == nil {
*out = nil
} else {
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
return
}
@@ -565,12 +521,8 @@ func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
return
}
@@ -664,13 +616,12 @@ func (in *Victims) DeepCopyInto(out *Victims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*core_v1.Pod, len(*in))
*out = make([]*corev1.Pod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(core_v1.Pod)
(*in)[i].DeepCopyInto((*out)[i])
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
}
}

View File

@@ -13,10 +13,10 @@ go_library(
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/validation:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/validation:go_default_library",
],
)

View File

@@ -31,33 +31,21 @@ func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
return
@@ -110,12 +98,8 @@ func (in *ExtenderConfig) DeepCopyInto(out *ExtenderConfig) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
if *in == nil {
*out = nil
} else {
*out = new(rest.TLSClientConfig)
(*in).DeepCopyInto(*out)
}
*out = new(rest.TLSClientConfig)
(*in).DeepCopyInto(*out)
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
@@ -140,24 +124,16 @@ func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
if in.FailedNodes != nil {
@@ -201,35 +177,37 @@ func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
var outVal *Victims
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(Victims)
val.DeepCopyInto((*out)[key])
in, out := &val, &outVal
*out = new(Victims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
@@ -252,12 +230,15 @@ func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult)
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
@@ -391,11 +372,10 @@ func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(MetaPod)
(*in)[i].DeepCopyInto((*out)[i])
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(MetaPod)
**out = **in
}
}
}
@@ -463,21 +443,13 @@ func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
if *in == nil {
*out = nil
} else {
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
return
}
@@ -497,12 +469,8 @@ func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
return
}
@@ -522,30 +490,18 @@ func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAntiAffinity)
**out = **in
}
*out = new(ServiceAntiAffinity)
**out = **in
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
if *in == nil {
*out = nil
} else {
*out = new(LabelPreference)
**out = **in
}
*out = new(LabelPreference)
**out = **in
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
if *in == nil {
*out = nil
} else {
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
return
}
@@ -565,12 +521,8 @@ func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
return
}
@@ -666,11 +618,10 @@ func (in *Victims) DeepCopyInto(out *Victims) {
in, out := &in.Pods, &out.Pods
*out = make([]*v1.Pod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(v1.Pod)
(*in)[i].DeepCopyInto((*out)[i])
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
}
}

View File

@@ -0,0 +1,39 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"register.go",
"types.go",
"zz_generated.deepcopy.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/apis/config",
visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/apimachinery/pkg/apis/config:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/apis/config:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/apis/config/scheme:all-srcs",
"//pkg/scheduler/apis/config/v1alpha1:all-srcs",
"//pkg/scheduler/apis/config/validation:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

11
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/OWNERS generated vendored Executable file
View File

@@ -0,0 +1,11 @@
approvers:
- api-approvers
- sig-scheduling-maintainers
- sttts
- luxas
reviewers:
- sig-scheduling
- api-reviewers
- dixudx
- luxas
- sttts

View File

@@ -0,0 +1,20 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// +k8s:deepcopy-gen=package
// +groupName=kubescheduler.config.k8s.io
package config // import "k8s.io/kubernetes/pkg/scheduler/apis/config"

View File

@@ -0,0 +1,43 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)
// GroupName is the group name used in this package
const GroupName = "kubescheduler.config.k8s.io"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
var (
// SchemeBuilder is the scheme builder with scheme init functions to run for this API package
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
// AddToScheme is a global function that registers this API group & version to a scheme
AddToScheme = SchemeBuilder.AddToScheme
)
// addKnownTypes registers known types to the given scheme
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&KubeSchedulerConfiguration{},
)
return nil
}

View File

@@ -0,0 +1,29 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["scheme.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme",
visibility = ["//visibility:public"],
deps = [
"//pkg/scheduler/apis/config:go_default_library",
"//pkg/scheduler/apis/config/v1alpha1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/serializer:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,44 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheme
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
kubeschedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config"
kubeschedulerconfigv1alpha1 "k8s.io/kubernetes/pkg/scheduler/apis/config/v1alpha1"
)
var (
// Scheme is the runtime.Scheme to which all kubescheduler api types are registered.
Scheme = runtime.NewScheme()
// Codecs provides access to encoding and decoding for the scheme.
Codecs = serializer.NewCodecFactory(Scheme)
)
func init() {
AddToScheme(Scheme)
}
// AddToScheme builds the kubescheduler scheme using all known versions of the kubescheduler api.
func AddToScheme(scheme *runtime.Scheme) {
utilruntime.Must(kubeschedulerconfig.AddToScheme(Scheme))
utilruntime.Must(kubeschedulerconfigv1alpha1.AddToScheme(Scheme))
utilruntime.Must(scheme.SetVersionPriority(kubeschedulerconfigv1alpha1.SchemeGroupVersion))
}

View File

@@ -0,0 +1,137 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
apimachineryconfig "k8s.io/apimachinery/pkg/apis/config"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apiserverconfig "k8s.io/apiserver/pkg/apis/config"
)
const (
// SchedulerDefaultLockObjectNamespace defines default scheduler lock object namespace ("kube-system")
SchedulerDefaultLockObjectNamespace string = metav1.NamespaceSystem
// SchedulerDefaultLockObjectName defines default scheduler lock object name ("kube-scheduler")
SchedulerDefaultLockObjectName = "kube-scheduler"
// SchedulerPolicyConfigMapKey defines the key of the element in the
// scheduler's policy ConfigMap that contains scheduler's policy config.
SchedulerPolicyConfigMapKey = "policy.cfg"
// SchedulerDefaultProviderName defines the default provider names
SchedulerDefaultProviderName = "DefaultProvider"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// KubeSchedulerConfiguration configures a scheduler
type KubeSchedulerConfiguration struct {
metav1.TypeMeta
// SchedulerName is name of the scheduler, used to select which pods
// will be processed by this scheduler, based on pod's "spec.SchedulerName".
SchedulerName string
// AlgorithmSource specifies the scheduler algorithm source.
AlgorithmSource SchedulerAlgorithmSource
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 0-100.
HardPodAffinitySymmetricWeight int32
// LeaderElection defines the configuration of leader election client.
LeaderElection KubeSchedulerLeaderElectionConfiguration
// ClientConnection specifies the kubeconfig file and client connection
// settings for the proxy server to use when communicating with the apiserver.
ClientConnection apimachineryconfig.ClientConnectionConfiguration
// HealthzBindAddress is the IP address and port for the health check server to serve on,
// defaulting to 0.0.0.0:10251
HealthzBindAddress string
// MetricsBindAddress is the IP address and port for the metrics server to
// serve on, defaulting to 0.0.0.0:10251.
MetricsBindAddress string
// DebuggingConfiguration holds configuration for Debugging related features
// TODO: We might wanna make this a substruct like Debugging apiserverconfig.DebuggingConfiguration
apiserverconfig.DebuggingConfiguration
// DisablePreemption disables the pod preemption feature.
DisablePreemption bool
// PercentageOfNodeToScore is the percentage of all nodes that once found feasible
// for running a pod, the scheduler stops its search for more feasible nodes in
// the cluster. This helps improve scheduler's performance. Scheduler always tries to find
// at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is.
// Example: if the cluster size is 500 nodes and the value of this flag is 30,
// then scheduler stops finding further feasible nodes once it finds 150 feasible ones.
// When the value is 0, default percentage (50%) of the nodes will be scored.
PercentageOfNodesToScore int32
// DEPRECATED.
// Indicate the "all topologies" set for empty topologyKey when it's used for PreferredDuringScheduling pod anti-affinity.
FailureDomains string
// Duration to wait for a binding operation to complete before timing out
// Value must be non-negative integer. The value zero indicates no waiting.
// If this value is nil, the default value will be used.
BindTimeoutSeconds *int64
}
// SchedulerAlgorithmSource is the source of a scheduler algorithm. One source
// field must be specified, and source fields are mutually exclusive.
type SchedulerAlgorithmSource struct {
// Policy is a policy based algorithm source.
Policy *SchedulerPolicySource
// Provider is the name of a scheduling algorithm provider to use.
Provider *string
}
// SchedulerPolicySource configures a means to obtain a scheduler Policy. One
// source field must be specified, and source fields are mutually exclusive.
type SchedulerPolicySource struct {
// File is a file policy source.
File *SchedulerPolicyFileSource
// ConfigMap is a config map policy source.
ConfigMap *SchedulerPolicyConfigMapSource
}
// SchedulerPolicyFileSource is a policy serialized to disk and accessed via
// path.
type SchedulerPolicyFileSource struct {
// Path is the location of a serialized policy.
Path string
}
// SchedulerPolicyConfigMapSource is a policy serialized into a config map value
// under the SchedulerPolicyConfigMapKey key.
type SchedulerPolicyConfigMapSource struct {
// Namespace is the namespace of the policy config map.
Namespace string
// Name is the name of hte policy config map.
Name string
}
// KubeSchedulerLeaderElectionConfiguration expands LeaderElectionConfiguration
// to include scheduler specific configuration.
type KubeSchedulerLeaderElectionConfiguration struct {
apiserverconfig.LeaderElectionConfiguration
// LockObjectNamespace defines the namespace of the lock object
LockObjectNamespace string
// LockObjectName defines the lock object name
LockObjectName string
}

View File

@@ -0,0 +1,53 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"defaults.go",
"doc.go",
"register.go",
"zz_generated.conversion.go",
"zz_generated.deepcopy.go",
"zz_generated.defaults.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/apis/config/v1alpha1",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/master/ports:go_default_library",
"//pkg/scheduler/apis/config:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/config/v1alpha1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/conversion:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/apis/config/v1alpha1:go_default_library",
"//staging/src/k8s.io/kube-scheduler/config/v1alpha1:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["defaults_test.go"],
embed = [":go_default_library"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/kube-scheduler/config/v1alpha1:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,111 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"net"
"strconv"
"k8s.io/apimachinery/pkg/runtime"
apiserverconfigv1alpha1 "k8s.io/apiserver/pkg/apis/config/v1alpha1"
kubescedulerconfigv1alpha1 "k8s.io/kube-scheduler/config/v1alpha1"
// this package shouldn't really depend on other k8s.io/kubernetes code
api "k8s.io/kubernetes/pkg/apis/core"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/master/ports"
)
func addDefaultingFuncs(scheme *runtime.Scheme) error {
return RegisterDefaults(scheme)
}
// SetDefaults_KubeSchedulerConfiguration sets additional defaults
func SetDefaults_KubeSchedulerConfiguration(obj *kubescedulerconfigv1alpha1.KubeSchedulerConfiguration) {
if len(obj.SchedulerName) == 0 {
obj.SchedulerName = api.DefaultSchedulerName
}
if obj.HardPodAffinitySymmetricWeight == 0 {
obj.HardPodAffinitySymmetricWeight = api.DefaultHardPodAffinitySymmetricWeight
}
if obj.AlgorithmSource.Policy == nil &&
(obj.AlgorithmSource.Provider == nil || len(*obj.AlgorithmSource.Provider) == 0) {
val := kubescedulerconfigv1alpha1.SchedulerDefaultProviderName
obj.AlgorithmSource.Provider = &val
}
if policy := obj.AlgorithmSource.Policy; policy != nil {
if policy.ConfigMap != nil && len(policy.ConfigMap.Namespace) == 0 {
obj.AlgorithmSource.Policy.ConfigMap.Namespace = api.NamespaceSystem
}
}
if host, port, err := net.SplitHostPort(obj.HealthzBindAddress); err == nil {
if len(host) == 0 {
host = "0.0.0.0"
}
obj.HealthzBindAddress = net.JoinHostPort(host, port)
} else {
obj.HealthzBindAddress = net.JoinHostPort("0.0.0.0", strconv.Itoa(ports.SchedulerPort))
}
if host, port, err := net.SplitHostPort(obj.MetricsBindAddress); err == nil {
if len(host) == 0 {
host = "0.0.0.0"
}
obj.MetricsBindAddress = net.JoinHostPort(host, port)
} else {
obj.MetricsBindAddress = net.JoinHostPort("0.0.0.0", strconv.Itoa(ports.SchedulerPort))
}
if len(obj.LeaderElection.LockObjectNamespace) == 0 {
obj.LeaderElection.LockObjectNamespace = kubescedulerconfigv1alpha1.SchedulerDefaultLockObjectNamespace
}
if len(obj.LeaderElection.LockObjectName) == 0 {
obj.LeaderElection.LockObjectName = kubescedulerconfigv1alpha1.SchedulerDefaultLockObjectName
}
if obj.PercentageOfNodesToScore == 0 {
// by default, stop finding feasible nodes once the number of feasible nodes is 50% of the cluster.
obj.PercentageOfNodesToScore = 50
}
if len(obj.FailureDomains) == 0 {
obj.FailureDomains = kubeletapis.DefaultFailureDomains
}
if len(obj.ClientConnection.ContentType) == 0 {
obj.ClientConnection.ContentType = "application/vnd.kubernetes.protobuf"
}
// Scheduler has an opinion about QPS/Burst, setting specific defaults for itself, instead of generic settings.
if obj.ClientConnection.QPS == 0.0 {
obj.ClientConnection.QPS = 50.0
}
if obj.ClientConnection.Burst == 0 {
obj.ClientConnection.Burst = 100
}
// Use the default LeaderElectionConfiguration options
apiserverconfigv1alpha1.RecommendedDefaultLeaderElectionConfiguration(&obj.LeaderElection.LeaderElectionConfiguration)
if obj.BindTimeoutSeconds == nil {
defaultBindTimeoutSeconds := int64(600)
obj.BindTimeoutSeconds = &defaultBindTimeoutSeconds
}
}

View File

@@ -0,0 +1,64 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"encoding/json"
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
kubeschedulerconfigv1alpha1 "k8s.io/kube-scheduler/config/v1alpha1"
)
func TestSchedulerDefaults(t *testing.T) {
ks1 := &kubeschedulerconfigv1alpha1.KubeSchedulerConfiguration{}
SetDefaults_KubeSchedulerConfiguration(ks1)
cm, err := convertObjToConfigMap("KubeSchedulerConfiguration", ks1)
if err != nil {
t.Errorf("unexpected ConvertObjToConfigMap error %v", err)
}
ks2 := &kubeschedulerconfigv1alpha1.KubeSchedulerConfiguration{}
if err = json.Unmarshal([]byte(cm.Data["KubeSchedulerConfiguration"]), ks2); err != nil {
t.Errorf("unexpected error unserializing scheduler config %v", err)
}
if !reflect.DeepEqual(ks2, ks1) {
t.Errorf("Expected:\n%#v\n\nGot:\n%#v", ks1, ks2)
}
}
// ConvertObjToConfigMap converts an object to a ConfigMap.
// This is specifically meant for ComponentConfigs.
func convertObjToConfigMap(name string, obj runtime.Object) (*v1.ConfigMap, error) {
eJSONBytes, err := json.Marshal(obj)
if err != nil {
return nil, err
}
cm := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Data: map[string]string{
name: string(eJSONBytes[:]),
},
}
return cm, nil
}

View File

@@ -0,0 +1,24 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// +k8s:deepcopy-gen=package
// +k8s:conversion-gen=k8s.io/kubernetes/pkg/scheduler/apis/config
// +k8s:conversion-gen-external-types=k8s.io/kube-scheduler/config/v1alpha1
// +k8s:defaulter-gen=TypeMeta
// +k8s:defaulter-gen-input=../../../../../vendor/k8s.io/kube-scheduler/config/v1alpha1
// +groupName=kubescheduler.config.k8s.io
package v1alpha1 // import "k8s.io/kubernetes/pkg/scheduler/apis/config/v1alpha1"

View File

@@ -0,0 +1,43 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"k8s.io/apimachinery/pkg/runtime/schema"
kubeschedulerconfigv1alpha1 "k8s.io/kube-scheduler/config/v1alpha1"
)
// GroupName is the group name used in this package
const GroupName = "kubescheduler.config.k8s.io"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha1"}
var (
// localSchemeBuilder extends the SchemeBuilder instance with the external types. In this package,
// defaulting and conversion init funcs are registered as well.
localSchemeBuilder = &kubeschedulerconfigv1alpha1.SchemeBuilder
// AddToScheme is a global function that registers this API group & version to a scheme
AddToScheme = localSchemeBuilder.AddToScheme
)
func init() {
// We only register manually written functions here. The registration of the
// generated functions takes place in the generated files. The separation
// makes the code compile even when the generated files are missing.
localSchemeBuilder.Register(addDefaultingFuncs)
}

View File

@@ -0,0 +1,274 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by conversion-gen. DO NOT EDIT.
package v1alpha1
import (
unsafe "unsafe"
configv1alpha1 "k8s.io/apimachinery/pkg/apis/config/v1alpha1"
conversion "k8s.io/apimachinery/pkg/conversion"
runtime "k8s.io/apimachinery/pkg/runtime"
apisconfigv1alpha1 "k8s.io/apiserver/pkg/apis/config/v1alpha1"
v1alpha1 "k8s.io/kube-scheduler/config/v1alpha1"
config "k8s.io/kubernetes/pkg/scheduler/apis/config"
)
func init() {
localSchemeBuilder.Register(RegisterConversions)
}
// RegisterConversions adds conversion functions to the given scheme.
// Public to allow building arbitrary schemes.
func RegisterConversions(s *runtime.Scheme) error {
if err := s.AddGeneratedConversionFunc((*v1alpha1.KubeSchedulerConfiguration)(nil), (*config.KubeSchedulerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(a.(*v1alpha1.KubeSchedulerConfiguration), b.(*config.KubeSchedulerConfiguration), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*config.KubeSchedulerConfiguration)(nil), (*v1alpha1.KubeSchedulerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_config_KubeSchedulerConfiguration_To_v1alpha1_KubeSchedulerConfiguration(a.(*config.KubeSchedulerConfiguration), b.(*v1alpha1.KubeSchedulerConfiguration), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1alpha1.KubeSchedulerLeaderElectionConfiguration)(nil), (*config.KubeSchedulerLeaderElectionConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_KubeSchedulerLeaderElectionConfiguration_To_config_KubeSchedulerLeaderElectionConfiguration(a.(*v1alpha1.KubeSchedulerLeaderElectionConfiguration), b.(*config.KubeSchedulerLeaderElectionConfiguration), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*config.KubeSchedulerLeaderElectionConfiguration)(nil), (*v1alpha1.KubeSchedulerLeaderElectionConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_config_KubeSchedulerLeaderElectionConfiguration_To_v1alpha1_KubeSchedulerLeaderElectionConfiguration(a.(*config.KubeSchedulerLeaderElectionConfiguration), b.(*v1alpha1.KubeSchedulerLeaderElectionConfiguration), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1alpha1.SchedulerAlgorithmSource)(nil), (*config.SchedulerAlgorithmSource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_SchedulerAlgorithmSource_To_config_SchedulerAlgorithmSource(a.(*v1alpha1.SchedulerAlgorithmSource), b.(*config.SchedulerAlgorithmSource), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*config.SchedulerAlgorithmSource)(nil), (*v1alpha1.SchedulerAlgorithmSource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_config_SchedulerAlgorithmSource_To_v1alpha1_SchedulerAlgorithmSource(a.(*config.SchedulerAlgorithmSource), b.(*v1alpha1.SchedulerAlgorithmSource), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1alpha1.SchedulerPolicyConfigMapSource)(nil), (*config.SchedulerPolicyConfigMapSource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_SchedulerPolicyConfigMapSource_To_config_SchedulerPolicyConfigMapSource(a.(*v1alpha1.SchedulerPolicyConfigMapSource), b.(*config.SchedulerPolicyConfigMapSource), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*config.SchedulerPolicyConfigMapSource)(nil), (*v1alpha1.SchedulerPolicyConfigMapSource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_config_SchedulerPolicyConfigMapSource_To_v1alpha1_SchedulerPolicyConfigMapSource(a.(*config.SchedulerPolicyConfigMapSource), b.(*v1alpha1.SchedulerPolicyConfigMapSource), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1alpha1.SchedulerPolicyFileSource)(nil), (*config.SchedulerPolicyFileSource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_SchedulerPolicyFileSource_To_config_SchedulerPolicyFileSource(a.(*v1alpha1.SchedulerPolicyFileSource), b.(*config.SchedulerPolicyFileSource), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*config.SchedulerPolicyFileSource)(nil), (*v1alpha1.SchedulerPolicyFileSource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_config_SchedulerPolicyFileSource_To_v1alpha1_SchedulerPolicyFileSource(a.(*config.SchedulerPolicyFileSource), b.(*v1alpha1.SchedulerPolicyFileSource), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1alpha1.SchedulerPolicySource)(nil), (*config.SchedulerPolicySource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_SchedulerPolicySource_To_config_SchedulerPolicySource(a.(*v1alpha1.SchedulerPolicySource), b.(*config.SchedulerPolicySource), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*config.SchedulerPolicySource)(nil), (*v1alpha1.SchedulerPolicySource)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_config_SchedulerPolicySource_To_v1alpha1_SchedulerPolicySource(a.(*config.SchedulerPolicySource), b.(*v1alpha1.SchedulerPolicySource), scope)
}); err != nil {
return err
}
return nil
}
func autoConvert_v1alpha1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(in *v1alpha1.KubeSchedulerConfiguration, out *config.KubeSchedulerConfiguration, s conversion.Scope) error {
out.SchedulerName = in.SchedulerName
if err := Convert_v1alpha1_SchedulerAlgorithmSource_To_config_SchedulerAlgorithmSource(&in.AlgorithmSource, &out.AlgorithmSource, s); err != nil {
return err
}
out.HardPodAffinitySymmetricWeight = in.HardPodAffinitySymmetricWeight
if err := Convert_v1alpha1_KubeSchedulerLeaderElectionConfiguration_To_config_KubeSchedulerLeaderElectionConfiguration(&in.LeaderElection, &out.LeaderElection, s); err != nil {
return err
}
if err := configv1alpha1.Convert_v1alpha1_ClientConnectionConfiguration_To_config_ClientConnectionConfiguration(&in.ClientConnection, &out.ClientConnection, s); err != nil {
return err
}
out.HealthzBindAddress = in.HealthzBindAddress
out.MetricsBindAddress = in.MetricsBindAddress
if err := apisconfigv1alpha1.Convert_v1alpha1_DebuggingConfiguration_To_config_DebuggingConfiguration(&in.DebuggingConfiguration, &out.DebuggingConfiguration, s); err != nil {
return err
}
out.DisablePreemption = in.DisablePreemption
out.PercentageOfNodesToScore = in.PercentageOfNodesToScore
out.FailureDomains = in.FailureDomains
out.BindTimeoutSeconds = (*int64)(unsafe.Pointer(in.BindTimeoutSeconds))
return nil
}
// Convert_v1alpha1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration is an autogenerated conversion function.
func Convert_v1alpha1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(in *v1alpha1.KubeSchedulerConfiguration, out *config.KubeSchedulerConfiguration, s conversion.Scope) error {
return autoConvert_v1alpha1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(in, out, s)
}
func autoConvert_config_KubeSchedulerConfiguration_To_v1alpha1_KubeSchedulerConfiguration(in *config.KubeSchedulerConfiguration, out *v1alpha1.KubeSchedulerConfiguration, s conversion.Scope) error {
out.SchedulerName = in.SchedulerName
if err := Convert_config_SchedulerAlgorithmSource_To_v1alpha1_SchedulerAlgorithmSource(&in.AlgorithmSource, &out.AlgorithmSource, s); err != nil {
return err
}
out.HardPodAffinitySymmetricWeight = in.HardPodAffinitySymmetricWeight
if err := Convert_config_KubeSchedulerLeaderElectionConfiguration_To_v1alpha1_KubeSchedulerLeaderElectionConfiguration(&in.LeaderElection, &out.LeaderElection, s); err != nil {
return err
}
if err := configv1alpha1.Convert_config_ClientConnectionConfiguration_To_v1alpha1_ClientConnectionConfiguration(&in.ClientConnection, &out.ClientConnection, s); err != nil {
return err
}
out.HealthzBindAddress = in.HealthzBindAddress
out.MetricsBindAddress = in.MetricsBindAddress
if err := apisconfigv1alpha1.Convert_config_DebuggingConfiguration_To_v1alpha1_DebuggingConfiguration(&in.DebuggingConfiguration, &out.DebuggingConfiguration, s); err != nil {
return err
}
out.DisablePreemption = in.DisablePreemption
out.PercentageOfNodesToScore = in.PercentageOfNodesToScore
out.FailureDomains = in.FailureDomains
out.BindTimeoutSeconds = (*int64)(unsafe.Pointer(in.BindTimeoutSeconds))
return nil
}
// Convert_config_KubeSchedulerConfiguration_To_v1alpha1_KubeSchedulerConfiguration is an autogenerated conversion function.
func Convert_config_KubeSchedulerConfiguration_To_v1alpha1_KubeSchedulerConfiguration(in *config.KubeSchedulerConfiguration, out *v1alpha1.KubeSchedulerConfiguration, s conversion.Scope) error {
return autoConvert_config_KubeSchedulerConfiguration_To_v1alpha1_KubeSchedulerConfiguration(in, out, s)
}
func autoConvert_v1alpha1_KubeSchedulerLeaderElectionConfiguration_To_config_KubeSchedulerLeaderElectionConfiguration(in *v1alpha1.KubeSchedulerLeaderElectionConfiguration, out *config.KubeSchedulerLeaderElectionConfiguration, s conversion.Scope) error {
if err := apisconfigv1alpha1.Convert_v1alpha1_LeaderElectionConfiguration_To_config_LeaderElectionConfiguration(&in.LeaderElectionConfiguration, &out.LeaderElectionConfiguration, s); err != nil {
return err
}
out.LockObjectNamespace = in.LockObjectNamespace
out.LockObjectName = in.LockObjectName
return nil
}
// Convert_v1alpha1_KubeSchedulerLeaderElectionConfiguration_To_config_KubeSchedulerLeaderElectionConfiguration is an autogenerated conversion function.
func Convert_v1alpha1_KubeSchedulerLeaderElectionConfiguration_To_config_KubeSchedulerLeaderElectionConfiguration(in *v1alpha1.KubeSchedulerLeaderElectionConfiguration, out *config.KubeSchedulerLeaderElectionConfiguration, s conversion.Scope) error {
return autoConvert_v1alpha1_KubeSchedulerLeaderElectionConfiguration_To_config_KubeSchedulerLeaderElectionConfiguration(in, out, s)
}
func autoConvert_config_KubeSchedulerLeaderElectionConfiguration_To_v1alpha1_KubeSchedulerLeaderElectionConfiguration(in *config.KubeSchedulerLeaderElectionConfiguration, out *v1alpha1.KubeSchedulerLeaderElectionConfiguration, s conversion.Scope) error {
if err := apisconfigv1alpha1.Convert_config_LeaderElectionConfiguration_To_v1alpha1_LeaderElectionConfiguration(&in.LeaderElectionConfiguration, &out.LeaderElectionConfiguration, s); err != nil {
return err
}
out.LockObjectNamespace = in.LockObjectNamespace
out.LockObjectName = in.LockObjectName
return nil
}
// Convert_config_KubeSchedulerLeaderElectionConfiguration_To_v1alpha1_KubeSchedulerLeaderElectionConfiguration is an autogenerated conversion function.
func Convert_config_KubeSchedulerLeaderElectionConfiguration_To_v1alpha1_KubeSchedulerLeaderElectionConfiguration(in *config.KubeSchedulerLeaderElectionConfiguration, out *v1alpha1.KubeSchedulerLeaderElectionConfiguration, s conversion.Scope) error {
return autoConvert_config_KubeSchedulerLeaderElectionConfiguration_To_v1alpha1_KubeSchedulerLeaderElectionConfiguration(in, out, s)
}
func autoConvert_v1alpha1_SchedulerAlgorithmSource_To_config_SchedulerAlgorithmSource(in *v1alpha1.SchedulerAlgorithmSource, out *config.SchedulerAlgorithmSource, s conversion.Scope) error {
out.Policy = (*config.SchedulerPolicySource)(unsafe.Pointer(in.Policy))
out.Provider = (*string)(unsafe.Pointer(in.Provider))
return nil
}
// Convert_v1alpha1_SchedulerAlgorithmSource_To_config_SchedulerAlgorithmSource is an autogenerated conversion function.
func Convert_v1alpha1_SchedulerAlgorithmSource_To_config_SchedulerAlgorithmSource(in *v1alpha1.SchedulerAlgorithmSource, out *config.SchedulerAlgorithmSource, s conversion.Scope) error {
return autoConvert_v1alpha1_SchedulerAlgorithmSource_To_config_SchedulerAlgorithmSource(in, out, s)
}
func autoConvert_config_SchedulerAlgorithmSource_To_v1alpha1_SchedulerAlgorithmSource(in *config.SchedulerAlgorithmSource, out *v1alpha1.SchedulerAlgorithmSource, s conversion.Scope) error {
out.Policy = (*v1alpha1.SchedulerPolicySource)(unsafe.Pointer(in.Policy))
out.Provider = (*string)(unsafe.Pointer(in.Provider))
return nil
}
// Convert_config_SchedulerAlgorithmSource_To_v1alpha1_SchedulerAlgorithmSource is an autogenerated conversion function.
func Convert_config_SchedulerAlgorithmSource_To_v1alpha1_SchedulerAlgorithmSource(in *config.SchedulerAlgorithmSource, out *v1alpha1.SchedulerAlgorithmSource, s conversion.Scope) error {
return autoConvert_config_SchedulerAlgorithmSource_To_v1alpha1_SchedulerAlgorithmSource(in, out, s)
}
func autoConvert_v1alpha1_SchedulerPolicyConfigMapSource_To_config_SchedulerPolicyConfigMapSource(in *v1alpha1.SchedulerPolicyConfigMapSource, out *config.SchedulerPolicyConfigMapSource, s conversion.Scope) error {
out.Namespace = in.Namespace
out.Name = in.Name
return nil
}
// Convert_v1alpha1_SchedulerPolicyConfigMapSource_To_config_SchedulerPolicyConfigMapSource is an autogenerated conversion function.
func Convert_v1alpha1_SchedulerPolicyConfigMapSource_To_config_SchedulerPolicyConfigMapSource(in *v1alpha1.SchedulerPolicyConfigMapSource, out *config.SchedulerPolicyConfigMapSource, s conversion.Scope) error {
return autoConvert_v1alpha1_SchedulerPolicyConfigMapSource_To_config_SchedulerPolicyConfigMapSource(in, out, s)
}
func autoConvert_config_SchedulerPolicyConfigMapSource_To_v1alpha1_SchedulerPolicyConfigMapSource(in *config.SchedulerPolicyConfigMapSource, out *v1alpha1.SchedulerPolicyConfigMapSource, s conversion.Scope) error {
out.Namespace = in.Namespace
out.Name = in.Name
return nil
}
// Convert_config_SchedulerPolicyConfigMapSource_To_v1alpha1_SchedulerPolicyConfigMapSource is an autogenerated conversion function.
func Convert_config_SchedulerPolicyConfigMapSource_To_v1alpha1_SchedulerPolicyConfigMapSource(in *config.SchedulerPolicyConfigMapSource, out *v1alpha1.SchedulerPolicyConfigMapSource, s conversion.Scope) error {
return autoConvert_config_SchedulerPolicyConfigMapSource_To_v1alpha1_SchedulerPolicyConfigMapSource(in, out, s)
}
func autoConvert_v1alpha1_SchedulerPolicyFileSource_To_config_SchedulerPolicyFileSource(in *v1alpha1.SchedulerPolicyFileSource, out *config.SchedulerPolicyFileSource, s conversion.Scope) error {
out.Path = in.Path
return nil
}
// Convert_v1alpha1_SchedulerPolicyFileSource_To_config_SchedulerPolicyFileSource is an autogenerated conversion function.
func Convert_v1alpha1_SchedulerPolicyFileSource_To_config_SchedulerPolicyFileSource(in *v1alpha1.SchedulerPolicyFileSource, out *config.SchedulerPolicyFileSource, s conversion.Scope) error {
return autoConvert_v1alpha1_SchedulerPolicyFileSource_To_config_SchedulerPolicyFileSource(in, out, s)
}
func autoConvert_config_SchedulerPolicyFileSource_To_v1alpha1_SchedulerPolicyFileSource(in *config.SchedulerPolicyFileSource, out *v1alpha1.SchedulerPolicyFileSource, s conversion.Scope) error {
out.Path = in.Path
return nil
}
// Convert_config_SchedulerPolicyFileSource_To_v1alpha1_SchedulerPolicyFileSource is an autogenerated conversion function.
func Convert_config_SchedulerPolicyFileSource_To_v1alpha1_SchedulerPolicyFileSource(in *config.SchedulerPolicyFileSource, out *v1alpha1.SchedulerPolicyFileSource, s conversion.Scope) error {
return autoConvert_config_SchedulerPolicyFileSource_To_v1alpha1_SchedulerPolicyFileSource(in, out, s)
}
func autoConvert_v1alpha1_SchedulerPolicySource_To_config_SchedulerPolicySource(in *v1alpha1.SchedulerPolicySource, out *config.SchedulerPolicySource, s conversion.Scope) error {
out.File = (*config.SchedulerPolicyFileSource)(unsafe.Pointer(in.File))
out.ConfigMap = (*config.SchedulerPolicyConfigMapSource)(unsafe.Pointer(in.ConfigMap))
return nil
}
// Convert_v1alpha1_SchedulerPolicySource_To_config_SchedulerPolicySource is an autogenerated conversion function.
func Convert_v1alpha1_SchedulerPolicySource_To_config_SchedulerPolicySource(in *v1alpha1.SchedulerPolicySource, out *config.SchedulerPolicySource, s conversion.Scope) error {
return autoConvert_v1alpha1_SchedulerPolicySource_To_config_SchedulerPolicySource(in, out, s)
}
func autoConvert_config_SchedulerPolicySource_To_v1alpha1_SchedulerPolicySource(in *config.SchedulerPolicySource, out *v1alpha1.SchedulerPolicySource, s conversion.Scope) error {
out.File = (*v1alpha1.SchedulerPolicyFileSource)(unsafe.Pointer(in.File))
out.ConfigMap = (*v1alpha1.SchedulerPolicyConfigMapSource)(unsafe.Pointer(in.ConfigMap))
return nil
}
// Convert_config_SchedulerPolicySource_To_v1alpha1_SchedulerPolicySource is an autogenerated conversion function.
func Convert_config_SchedulerPolicySource_To_v1alpha1_SchedulerPolicySource(in *config.SchedulerPolicySource, out *v1alpha1.SchedulerPolicySource, s conversion.Scope) error {
return autoConvert_config_SchedulerPolicySource_To_v1alpha1_SchedulerPolicySource(in, out, s)
}

View File

@@ -1,5 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2016 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -14,23 +16,6 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package util
// Code generated by deepcopy-gen. DO NOT EDIT.
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// GetControllerRef gets pod's owner controller reference from a pod object.
func GetControllerRef(pod *v1.Pod) *metav1.OwnerReference {
if len(pod.OwnerReferences) == 0 {
return nil
}
for i := range pod.OwnerReferences {
ref := &pod.OwnerReferences[i]
if ref.Controller != nil && *ref.Controller {
return ref
}
}
return nil
}
package v1alpha1

View File

@@ -0,0 +1,40 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by defaulter-gen. DO NOT EDIT.
package v1alpha1
import (
runtime "k8s.io/apimachinery/pkg/runtime"
v1alpha1 "k8s.io/kube-scheduler/config/v1alpha1"
)
// RegisterDefaults adds defaulters functions to the given scheme.
// Public to allow building arbitrary schemes.
// All generated defaulters are covering - they call all nested defaulters.
func RegisterDefaults(scheme *runtime.Scheme) error {
scheme.AddTypeDefaultingFunc(&v1alpha1.KubeSchedulerConfiguration{}, func(obj interface{}) {
SetObjectDefaults_KubeSchedulerConfiguration(obj.(*v1alpha1.KubeSchedulerConfiguration))
})
return nil
}
func SetObjectDefaults_KubeSchedulerConfiguration(in *v1alpha1.KubeSchedulerConfiguration) {
SetDefaults_KubeSchedulerConfiguration(in)
}

View File

@@ -0,0 +1,41 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["validation.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/apis/config/validation",
visibility = ["//visibility:public"],
deps = [
"//pkg/scheduler/apis/config:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/config/validation:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/validation:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/validation/field:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/apis/config/validation:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["validation_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/apis/config:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/config:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/apis/config:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,64 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
apimachinery "k8s.io/apimachinery/pkg/apis/config/validation"
"k8s.io/apimachinery/pkg/util/validation"
"k8s.io/apimachinery/pkg/util/validation/field"
apiserver "k8s.io/apiserver/pkg/apis/config/validation"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
)
// ValidateKubeSchedulerConfiguration ensures validation of the KubeSchedulerConfiguration struct
func ValidateKubeSchedulerConfiguration(cc *config.KubeSchedulerConfiguration) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, apimachinery.ValidateClientConnectionConfiguration(&cc.ClientConnection, field.NewPath("clientConnection"))...)
allErrs = append(allErrs, ValidateKubeSchedulerLeaderElectionConfiguration(&cc.LeaderElection, field.NewPath("leaderElection"))...)
if len(cc.SchedulerName) == 0 {
allErrs = append(allErrs, field.Required(field.NewPath("schedulerName"), ""))
}
for _, msg := range validation.IsValidSocketAddr(cc.HealthzBindAddress) {
allErrs = append(allErrs, field.Invalid(field.NewPath("healthzBindAddress"), cc.HealthzBindAddress, msg))
}
for _, msg := range validation.IsValidSocketAddr(cc.MetricsBindAddress) {
allErrs = append(allErrs, field.Invalid(field.NewPath("metricsBindAddress"), cc.MetricsBindAddress, msg))
}
if cc.HardPodAffinitySymmetricWeight < 0 || cc.HardPodAffinitySymmetricWeight > 100 {
allErrs = append(allErrs, field.Invalid(field.NewPath("hardPodAffinitySymmetricWeight"), cc.HardPodAffinitySymmetricWeight, "not in valid range 0-100"))
}
if cc.BindTimeoutSeconds == nil {
allErrs = append(allErrs, field.Required(field.NewPath("bindTimeoutSeconds"), ""))
}
return allErrs
}
// ValidateKubeSchedulerLeaderElectionConfiguration ensures validation of the KubeSchedulerLeaderElectionConfiguration struct
func ValidateKubeSchedulerLeaderElectionConfiguration(cc *config.KubeSchedulerLeaderElectionConfiguration, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if !cc.LeaderElectionConfiguration.LeaderElect {
return allErrs
}
allErrs = append(allErrs, apiserver.ValidateLeaderElectionConfiguration(&cc.LeaderElectionConfiguration, field.NewPath("leaderElectionConfiguration"))...)
if len(cc.LockObjectNamespace) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("lockObjectNamespace"), ""))
}
if len(cc.LockObjectName) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("lockObjectName"), ""))
}
return allErrs
}

View File

@@ -0,0 +1,150 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"testing"
"time"
apimachinery "k8s.io/apimachinery/pkg/apis/config"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apiserver "k8s.io/apiserver/pkg/apis/config"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
)
func TestValidateKubeSchedulerConfiguration(t *testing.T) {
testTimeout := int64(0)
validConfig := &config.KubeSchedulerConfiguration{
SchedulerName: "me",
HealthzBindAddress: "0.0.0.0:10254",
MetricsBindAddress: "0.0.0.0:10254",
HardPodAffinitySymmetricWeight: 80,
ClientConnection: apimachinery.ClientConnectionConfiguration{
AcceptContentTypes: "application/json",
ContentType: "application/json",
QPS: 10,
Burst: 10,
},
AlgorithmSource: config.SchedulerAlgorithmSource{
Policy: &config.SchedulerPolicySource{
ConfigMap: &config.SchedulerPolicyConfigMapSource{
Namespace: "name",
Name: "name",
},
},
},
LeaderElection: config.KubeSchedulerLeaderElectionConfiguration{
LockObjectNamespace: "name",
LockObjectName: "name",
LeaderElectionConfiguration: apiserver.LeaderElectionConfiguration{
ResourceLock: "configmap",
LeaderElect: true,
LeaseDuration: metav1.Duration{Duration: 30 * time.Second},
RenewDeadline: metav1.Duration{Duration: 15 * time.Second},
RetryPeriod: metav1.Duration{Duration: 5 * time.Second},
},
},
BindTimeoutSeconds: &testTimeout,
}
HardPodAffinitySymmetricWeightGt100 := validConfig.DeepCopy()
HardPodAffinitySymmetricWeightGt100.HardPodAffinitySymmetricWeight = 120
HardPodAffinitySymmetricWeightLt0 := validConfig.DeepCopy()
HardPodAffinitySymmetricWeightLt0.HardPodAffinitySymmetricWeight = -1
lockObjectNameNotSet := validConfig.DeepCopy()
lockObjectNameNotSet.LeaderElection.LockObjectName = ""
lockObjectNamespaceNotSet := validConfig.DeepCopy()
lockObjectNamespaceNotSet.LeaderElection.LockObjectNamespace = ""
metricsBindAddrHostInvalid := validConfig.DeepCopy()
metricsBindAddrHostInvalid.MetricsBindAddress = "0.0.0.0.0:9090"
metricsBindAddrPortInvalid := validConfig.DeepCopy()
metricsBindAddrPortInvalid.MetricsBindAddress = "0.0.0.0:909090"
healthzBindAddrHostInvalid := validConfig.DeepCopy()
healthzBindAddrHostInvalid.HealthzBindAddress = "0.0.0.0.0:9090"
healthzBindAddrPortInvalid := validConfig.DeepCopy()
healthzBindAddrPortInvalid.HealthzBindAddress = "0.0.0.0:909090"
enableContentProfilingSetWithoutEnableProfiling := validConfig.DeepCopy()
enableContentProfilingSetWithoutEnableProfiling.EnableProfiling = false
enableContentProfilingSetWithoutEnableProfiling.EnableContentionProfiling = true
bindTimeoutUnset := validConfig.DeepCopy()
bindTimeoutUnset.BindTimeoutSeconds = nil
scenarios := map[string]struct {
expectedToFail bool
config *config.KubeSchedulerConfiguration
}{
"good": {
expectedToFail: false,
config: validConfig,
},
"bad-lock-object-names-not-set": {
expectedToFail: true,
config: lockObjectNameNotSet,
},
"bad-lock-object-namespace-not-set": {
expectedToFail: true,
config: lockObjectNamespaceNotSet,
},
"bad-healthz-port-invalid": {
expectedToFail: true,
config: healthzBindAddrPortInvalid,
},
"bad-healthz-host-invalid": {
expectedToFail: true,
config: healthzBindAddrHostInvalid,
},
"bad-metrics-port-invalid": {
expectedToFail: true,
config: metricsBindAddrPortInvalid,
},
"bad-metrics-host-invalid": {
expectedToFail: true,
config: metricsBindAddrHostInvalid,
},
"bad-hard-pod-affinity-symmetric-weight-lt-0": {
expectedToFail: true,
config: HardPodAffinitySymmetricWeightGt100,
},
"bad-hard-pod-affinity-symmetric-weight-gt-100": {
expectedToFail: true,
config: HardPodAffinitySymmetricWeightLt0,
},
"bind-timeout-unset": {
expectedToFail: true,
config: bindTimeoutUnset,
},
}
for name, scenario := range scenarios {
errs := ValidateKubeSchedulerConfiguration(scenario.config)
if len(errs) == 0 && scenario.expectedToFail {
t.Errorf("Unexpected success for scenario: %s", name)
}
if len(errs) > 0 && !scenario.expectedToFail {
t.Errorf("Unexpected failure for scenario: %s - %+v", name, errs)
}
}
}

View File

@@ -0,0 +1,160 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package config
import (
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerConfiguration) DeepCopyInto(out *KubeSchedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
in.AlgorithmSource.DeepCopyInto(&out.AlgorithmSource)
out.LeaderElection = in.LeaderElection
out.ClientConnection = in.ClientConnection
out.DebuggingConfiguration = in.DebuggingConfiguration
if in.BindTimeoutSeconds != nil {
in, out := &in.BindTimeoutSeconds, &out.BindTimeoutSeconds
*out = new(int64)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerConfiguration.
func (in *KubeSchedulerConfiguration) DeepCopy() *KubeSchedulerConfiguration {
if in == nil {
return nil
}
out := new(KubeSchedulerConfiguration)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *KubeSchedulerConfiguration) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerLeaderElectionConfiguration) DeepCopyInto(out *KubeSchedulerLeaderElectionConfiguration) {
*out = *in
out.LeaderElectionConfiguration = in.LeaderElectionConfiguration
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerLeaderElectionConfiguration.
func (in *KubeSchedulerLeaderElectionConfiguration) DeepCopy() *KubeSchedulerLeaderElectionConfiguration {
if in == nil {
return nil
}
out := new(KubeSchedulerLeaderElectionConfiguration)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerAlgorithmSource) DeepCopyInto(out *SchedulerAlgorithmSource) {
*out = *in
if in.Policy != nil {
in, out := &in.Policy, &out.Policy
*out = new(SchedulerPolicySource)
(*in).DeepCopyInto(*out)
}
if in.Provider != nil {
in, out := &in.Provider, &out.Provider
*out = new(string)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerAlgorithmSource.
func (in *SchedulerAlgorithmSource) DeepCopy() *SchedulerAlgorithmSource {
if in == nil {
return nil
}
out := new(SchedulerAlgorithmSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicyConfigMapSource) DeepCopyInto(out *SchedulerPolicyConfigMapSource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicyConfigMapSource.
func (in *SchedulerPolicyConfigMapSource) DeepCopy() *SchedulerPolicyConfigMapSource {
if in == nil {
return nil
}
out := new(SchedulerPolicyConfigMapSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicyFileSource) DeepCopyInto(out *SchedulerPolicyFileSource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicyFileSource.
func (in *SchedulerPolicyFileSource) DeepCopy() *SchedulerPolicyFileSource {
if in == nil {
return nil
}
out := new(SchedulerPolicyFileSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicySource) DeepCopyInto(out *SchedulerPolicySource) {
*out = *in
if in.File != nil {
in, out := &in.File, &out.File
*out = new(SchedulerPolicyFileSource)
**out = **in
}
if in.ConfigMap != nil {
in, out := &in.ConfigMap, &out.ConfigMap
*out = new(SchedulerPolicyConfigMapSource)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicySource.
func (in *SchedulerPolicySource) DeepCopy() *SchedulerPolicySource {
if in == nil {
return nil
}
out := new(SchedulerPolicySource)
in.DeepCopyInto(out)
return out
}

View File

@@ -6,6 +6,7 @@ go_library(
"cache.go",
"interface.go",
"node_info.go",
"node_tree.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/cache",
@@ -15,13 +16,15 @@ go_library(
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/util/node:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
@@ -30,22 +33,25 @@ go_test(
srcs = [
"cache_test.go",
"node_info_test.go",
"node_tree_test.go",
"util_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@@ -23,6 +23,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
@@ -51,14 +52,17 @@ type schedulerCache struct {
period time.Duration
// This mutex guards all fields within this cache struct.
mu sync.Mutex
mu sync.RWMutex
// a set of assumed pod keys.
// The key could further be used to get an entry in podStates.
assumedPods map[string]bool
// a map from pod key to podState.
podStates map[string]*podState
nodes map[string]*NodeInfo
nodeTree *NodeTree
pdbs map[string]*policy.PodDisruptionBudget
// A map from image name to its imageState.
imageStates map[string]*imageState
}
type podState struct {
@@ -69,6 +73,29 @@ type podState struct {
bindingFinished bool
}
type imageState struct {
// Size of the image
size int64
// A set of node names for nodes having this image present
nodes sets.String
}
// ImageStateSummary provides summarized information about the state of an image.
type ImageStateSummary struct {
// Size of the image
Size int64
// Used to track how many nodes have this image
NumNodes int
}
// createImageStateSummary returns a summarizing snapshot of the given image's state.
func (cache *schedulerCache) createImageStateSummary(state *imageState) *ImageStateSummary {
return &ImageStateSummary{
Size: state.size,
NumNodes: len(state.nodes),
}
}
func newSchedulerCache(ttl, period time.Duration, stop <-chan struct{}) *schedulerCache {
return &schedulerCache{
ttl: ttl,
@@ -76,17 +103,19 @@ func newSchedulerCache(ttl, period time.Duration, stop <-chan struct{}) *schedul
stop: stop,
nodes: make(map[string]*NodeInfo),
nodeTree: newNodeTree(nil),
assumedPods: make(map[string]bool),
podStates: make(map[string]*podState),
pdbs: make(map[string]*policy.PodDisruptionBudget),
imageStates: make(map[string]*imageState),
}
}
// Snapshot takes a snapshot of the current schedulerCache. The method has performance impact,
// and should be only used in non-critical path.
func (cache *schedulerCache) Snapshot() *Snapshot {
cache.mu.Lock()
defer cache.mu.Unlock()
cache.mu.RLock()
defer cache.mu.RUnlock()
nodes := make(map[string]*NodeInfo)
for k, v := range cache.nodes {
@@ -113,6 +142,7 @@ func (cache *schedulerCache) Snapshot() *Snapshot {
func (cache *schedulerCache) UpdateNodeNameToInfoMap(nodeNameToInfo map[string]*NodeInfo) error {
cache.mu.Lock()
defer cache.mu.Unlock()
for name, info := range cache.nodes {
if utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && info.TransientInfo != nil {
// Transient scheduler info is reset here.
@@ -136,8 +166,8 @@ func (cache *schedulerCache) List(selector labels.Selector) ([]*v1.Pod, error) {
}
func (cache *schedulerCache) FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error) {
cache.mu.Lock()
defer cache.mu.Unlock()
cache.mu.RLock()
defer cache.mu.RUnlock()
// podFilter is expected to return true for most or all of the pods. We
// can avoid expensive array growth without wasting too much memory by
// pre-allocating capacity.
@@ -188,8 +218,8 @@ func (cache *schedulerCache) finishBinding(pod *v1.Pod, now time.Time) error {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
cache.mu.RLock()
defer cache.mu.RUnlock()
glog.V(5).Infof("Finished binding for pod %v. Can be expired.", key)
currState, ok := cache.podStates[key]
@@ -317,6 +347,7 @@ func (cache *schedulerCache) UpdatePod(oldPod, newPod *v1.Pod) error {
if err := cache.updatePod(oldPod, newPod); err != nil {
return err
}
currState.pod = newPod
default:
return fmt.Errorf("pod %v is not added to scheduler cache, so cannot be updated", key)
}
@@ -358,8 +389,8 @@ func (cache *schedulerCache) IsAssumedPod(pod *v1.Pod) (bool, error) {
return false, err
}
cache.mu.Lock()
defer cache.mu.Unlock()
cache.mu.RLock()
defer cache.mu.RUnlock()
b, found := cache.assumedPods[key]
if !found {
@@ -374,8 +405,8 @@ func (cache *schedulerCache) GetPod(pod *v1.Pod) (*v1.Pod, error) {
return nil, err
}
cache.mu.Lock()
defer cache.mu.Unlock()
cache.mu.RLock()
defer cache.mu.RUnlock()
podState, ok := cache.podStates[key]
if !ok {
@@ -393,7 +424,12 @@ func (cache *schedulerCache) AddNode(node *v1.Node) error {
if !ok {
n = NewNodeInfo()
cache.nodes[node.Name] = n
} else {
cache.removeNodeImageStates(n.node)
}
cache.nodeTree.AddNode(node)
cache.addNodeImageStates(node, n)
return n.SetNode(node)
}
@@ -405,7 +441,12 @@ func (cache *schedulerCache) UpdateNode(oldNode, newNode *v1.Node) error {
if !ok {
n = NewNodeInfo()
cache.nodes[newNode.Name] = n
} else {
cache.removeNodeImageStates(n.node)
}
cache.nodeTree.UpdateNode(oldNode, newNode)
cache.addNodeImageStates(newNode, n)
return n.SetNode(newNode)
}
@@ -424,9 +465,63 @@ func (cache *schedulerCache) RemoveNode(node *v1.Node) error {
if len(n.pods) == 0 && n.node == nil {
delete(cache.nodes, node.Name)
}
cache.nodeTree.RemoveNode(node)
cache.removeNodeImageStates(node)
return nil
}
// addNodeImageStates adds states of the images on given node to the given nodeInfo and update the imageStates in
// scheduler cache. This function assumes the lock to scheduler cache has been acquired.
func (cache *schedulerCache) addNodeImageStates(node *v1.Node, nodeInfo *NodeInfo) {
newSum := make(map[string]*ImageStateSummary)
for _, image := range node.Status.Images {
for _, name := range image.Names {
// update the entry in imageStates
state, ok := cache.imageStates[name]
if !ok {
state = &imageState{
size: image.SizeBytes,
nodes: sets.NewString(node.Name),
}
cache.imageStates[name] = state
} else {
state.nodes.Insert(node.Name)
}
// create the imageStateSummary for this image
if _, ok := newSum[name]; !ok {
newSum[name] = cache.createImageStateSummary(state)
}
}
}
nodeInfo.imageStates = newSum
}
// removeNodeImageStates removes the given node record from image entries having the node
// in imageStates cache. After the removal, if any image becomes free, i.e., the image
// is no longer available on any node, the image entry will be removed from imageStates.
func (cache *schedulerCache) removeNodeImageStates(node *v1.Node) {
if node == nil {
return
}
for _, image := range node.Status.Images {
for _, name := range image.Names {
state, ok := cache.imageStates[name]
if ok {
state.nodes.Delete(node.Name)
if len(state.nodes) == 0 {
// Remove the unused image to make sure the length of
// imageStates represents the total number of different
// images on all nodes
delete(cache.imageStates, name)
}
}
}
}
}
func (cache *schedulerCache) AddPDB(pdb *policy.PodDisruptionBudget) error {
cache.mu.Lock()
defer cache.mu.Unlock()
@@ -449,8 +544,8 @@ func (cache *schedulerCache) RemovePDB(pdb *policy.PodDisruptionBudget) error {
}
func (cache *schedulerCache) ListPDBs(selector labels.Selector) ([]*policy.PodDisruptionBudget, error) {
cache.mu.Lock()
defer cache.mu.Unlock()
cache.mu.RLock()
defer cache.mu.RUnlock()
var pdbs []*policy.PodDisruptionBudget
for _, pdb := range cache.pdbs {
if selector.Matches(labels.Set(pdb.Labels)) {
@@ -461,8 +556,8 @@ func (cache *schedulerCache) ListPDBs(selector labels.Selector) ([]*policy.PodDi
}
func (cache *schedulerCache) IsUpToDate(n *NodeInfo) bool {
cache.mu.Lock()
defer cache.mu.Unlock()
cache.mu.RLock()
defer cache.mu.RUnlock()
node, ok := cache.nodes[n.Node().Name]
return ok && n.generation == node.generation
}
@@ -508,3 +603,7 @@ func (cache *schedulerCache) expirePod(key string, ps *podState) error {
delete(cache.podStates, key)
return nil
}
func (cache *schedulerCache) NodeTree() *NodeTree {
return cache.nodeTree
}

View File

@@ -108,7 +108,7 @@ func TestAssumePodScheduled(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}, {
pods: []*v1.Pod{testPods[1], testPods[2]},
@@ -125,7 +125,7 @@ func TestAssumePodScheduled(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1], testPods[2]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}, { // test non-zero request
pods: []*v1.Pod{testPods[3]},
@@ -142,7 +142,7 @@ func TestAssumePodScheduled(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[3]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}, {
pods: []*v1.Pod{testPods[4]},
@@ -160,7 +160,7 @@ func TestAssumePodScheduled(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[4]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}, {
pods: []*v1.Pod{testPods[4], testPods[5]},
@@ -178,7 +178,7 @@ func TestAssumePodScheduled(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[4], testPods[5]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}, {
pods: []*v1.Pod{testPods[6]},
@@ -195,7 +195,7 @@ func TestAssumePodScheduled(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[6]},
usedPorts: newHostPortInfoBuilder().build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
},
}
@@ -275,7 +275,7 @@ func TestExpirePod(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}}
@@ -328,7 +328,7 @@ func TestAddPodWillConfirm(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}}
@@ -425,7 +425,7 @@ func TestAddPodWillReplaceAssumed(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{updatedPod.DeepCopy()},
usedPorts: newHostPortInfoBuilder().add("TCP", "0.0.0.0", 90).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
},
}}
@@ -481,7 +481,7 @@ func TestAddPodAfterExpiration(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{basePod},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}}
@@ -537,7 +537,7 @@ func TestUpdatePod(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
}, {
requestedResource: &Resource{
MilliCPU: 100,
@@ -551,7 +551,7 @@ func TestUpdatePod(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
}},
}}
@@ -577,6 +577,65 @@ func TestUpdatePod(t *testing.T) {
}
}
// TestUpdatePodAndGet tests get always return latest pod state
func TestUpdatePodAndGet(t *testing.T) {
nodeName := "node"
ttl := 10 * time.Second
testPods := []*v1.Pod{
makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
makeBasePod(t, nodeName, "test", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}),
}
tests := []struct {
pod *v1.Pod
podToUpdate *v1.Pod
handler func(cache Cache, pod *v1.Pod) error
assumePod bool
}{
{
pod: testPods[0],
podToUpdate: testPods[0],
handler: func(cache Cache, pod *v1.Pod) error {
return cache.AssumePod(pod)
},
assumePod: true,
},
{
pod: testPods[0],
podToUpdate: testPods[1],
handler: func(cache Cache, pod *v1.Pod) error {
return cache.AddPod(pod)
},
assumePod: false,
},
}
for _, tt := range tests {
cache := newSchedulerCache(ttl, time.Second, nil)
if err := tt.handler(cache, tt.pod); err != nil {
t.Fatalf("unexpected err: %v", err)
}
if !tt.assumePod {
if err := cache.UpdatePod(tt.pod, tt.podToUpdate); err != nil {
t.Fatalf("UpdatePod failed: %v", err)
}
}
cachedPod, err := cache.GetPod(tt.pod)
if err != nil {
t.Fatalf("GetPod failed: %v", err)
}
if !reflect.DeepEqual(tt.podToUpdate, cachedPod) {
t.Fatalf("pod get=%s, want=%s", cachedPod, tt.podToUpdate)
}
}
}
// TestExpireAddUpdatePod test the sequence that a pod is expired, added, then updated
func TestExpireAddUpdatePod(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
@@ -610,7 +669,7 @@ func TestExpireAddUpdatePod(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
}, {
requestedResource: &Resource{
MilliCPU: 100,
@@ -624,7 +683,7 @@ func TestExpireAddUpdatePod(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
}},
}}
@@ -702,7 +761,7 @@ func TestEphemeralStorageResource(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{podE},
usedPorts: schedutil.HostPortInfo{},
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
},
}
@@ -749,7 +808,7 @@ func TestRemovePod(t *testing.T) {
allocatableResource: &Resource{},
pods: []*v1.Pod{basePod},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
imageStates: make(map[string]*ImageStateSummary),
},
}}
@@ -1006,6 +1065,9 @@ func TestNodeOperators(t *testing.T) {
if !found {
t.Errorf("Failed to find node %v in schedulercache.", node.Name)
}
if cache.nodeTree.NumNodes != 1 || cache.nodeTree.Next() != node.Name {
t.Errorf("cache.nodeTree is not updated correctly after adding node: %v", node.Name)
}
// Generations are globally unique. We check in our unit tests that they are incremented correctly.
expected.generation = got.generation
@@ -1041,12 +1103,21 @@ func TestNodeOperators(t *testing.T) {
if !reflect.DeepEqual(got, expected) {
t.Errorf("Failed to update node in schedulercache:\n got: %+v \nexpected: %+v", got, expected)
}
// Check nodeTree after update
if cache.nodeTree.NumNodes != 1 || cache.nodeTree.Next() != node.Name {
t.Errorf("unexpected cache.nodeTree after updating node: %v", node.Name)
}
// Case 4: the node can not be removed if pods is not empty.
cache.RemoveNode(node)
if _, found := cache.nodes[node.Name]; !found {
t.Errorf("The node %v should not be removed if pods is not empty.", node.Name)
}
// Check nodeTree after remove. The node should be removed from the nodeTree even if there are
// still pods on it.
if cache.nodeTree.NumNodes != 0 || cache.nodeTree.Next() != "" {
t.Errorf("unexpected cache.nodeTree after removing node: %v", node.Name)
}
}
}
@@ -1069,16 +1140,18 @@ func BenchmarkUpdate1kNodes30kPods(b *testing.B) {
}
}
func BenchmarkExpire100Pods(b *testing.B) {
benchmarkExpire(b, 100)
}
func BenchmarkExpire1kPods(b *testing.B) {
benchmarkExpire(b, 1000)
}
func BenchmarkExpire10kPods(b *testing.B) {
benchmarkExpire(b, 10000)
func BenchmarkExpirePods(b *testing.B) {
podNums := []int{
100,
1000,
10000,
}
for _, podNum := range podNums {
name := fmt.Sprintf("%dPods", podNum)
b.Run(name, func(b *testing.B) {
benchmarkExpire(b, podNum)
})
}
}
func benchmarkExpire(b *testing.B, podNum int) {

View File

@@ -125,6 +125,9 @@ type Cache interface {
// IsUpToDate returns true if the given NodeInfo matches the current data in the cache.
IsUpToDate(n *NodeInfo) bool
// NodeTree returns a node tree structure
NodeTree() *NodeTree
}
// Snapshot is a snapshot of cache state

View File

@@ -58,9 +58,10 @@ type NodeInfo struct {
taints []v1.Taint
taintsErr error
// This is a map from image name to image size, also for checking image existence on the node
// Cache it here to avoid rebuilding the map during scheduling, e.g., in image_locality.go
imageSizes map[string]int64
// imageStates holds the entry of an image if and only if this image is on the node. The entry can be used for
// checking an image's existence and advanced usage (e.g., image locality scheduling policy) based on the image
// state information.
imageStates map[string]*ImageStateSummary
// TransientInfo holds the information pertaining to a scheduling cycle. This will be destructed at the end of
// scheduling cycle.
@@ -261,7 +262,7 @@ func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
TransientInfo: newTransientSchedulerInfo(),
generation: nextGeneration(),
usedPorts: make(util.HostPortInfo),
imageSizes: make(map[string]int64),
imageStates: make(map[string]*ImageStateSummary),
}
for _, pod := range pods {
ni.AddPod(pod)
@@ -293,12 +294,12 @@ func (n *NodeInfo) UsedPorts() util.HostPortInfo {
return n.usedPorts
}
// ImageSizes returns the image size information on this node.
func (n *NodeInfo) ImageSizes() map[string]int64 {
// ImageStates returns the state information of all images.
func (n *NodeInfo) ImageStates() map[string]*ImageStateSummary {
if n == nil {
return nil
}
return n.imageSizes
return n.imageStates
}
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
@@ -392,15 +393,20 @@ func (n *NodeInfo) Clone() *NodeInfo {
diskPressureCondition: n.diskPressureCondition,
pidPressureCondition: n.pidPressureCondition,
usedPorts: make(util.HostPortInfo),
imageSizes: n.imageSizes,
imageStates: n.imageStates,
generation: n.generation,
}
if len(n.pods) > 0 {
clone.pods = append([]*v1.Pod(nil), n.pods...)
}
if len(n.usedPorts) > 0 {
for k, v := range n.usedPorts {
clone.usedPorts[k] = v
// util.HostPortInfo is a map-in-map struct
// make sure it's deep copied
for ip, portMap := range n.usedPorts {
clone.usedPorts[ip] = make(map[util.ProtocolPort]struct{})
for protocolPort, v := range portMap {
clone.usedPorts[ip][protocolPort] = v
}
}
}
if len(n.podsWithAffinity) > 0 {
@@ -547,17 +553,6 @@ func (n *NodeInfo) updateUsedPorts(pod *v1.Pod, add bool) {
}
}
func (n *NodeInfo) updateImageSizes() {
node := n.Node()
imageSizes := make(map[string]int64)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageSizes[name] = image.SizeBytes
}
}
n.imageSizes = imageSizes
}
// SetNode sets the overall node information.
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
@@ -579,7 +574,6 @@ func (n *NodeInfo) SetNode(node *v1.Node) error {
}
}
n.TransientInfo = newTransientSchedulerInfo()
n.updateImageSizes()
n.generation = nextGeneration()
return nil
}
@@ -596,6 +590,7 @@ func (n *NodeInfo) RemoveNode(node *v1.Node) error {
n.memoryPressureCondition = v1.ConditionUnknown
n.diskPressureCondition = v1.ConditionUnknown
n.pidPressureCondition = v1.ConditionUnknown
n.imageStates = make(map[string]*ImageStateSummary)
n.generation = nextGeneration()
return nil
}

View File

@@ -26,7 +26,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/util/parsers"
)
func TestNewResource(t *testing.T) {
@@ -241,46 +240,6 @@ func TestSetMaxResource(t *testing.T) {
}
}
func TestImageSizes(t *testing.T) {
ni := fakeNodeInfo()
ni.node = &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
Status: v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/10:" + parsers.DefaultImageTag,
"gcr.io/10:v1",
},
SizeBytes: int64(10 * 1024 * 1024),
},
{
Names: []string{
"gcr.io/50:" + parsers.DefaultImageTag,
"gcr.io/50:v1",
},
SizeBytes: int64(50 * 1024 * 1024),
},
},
},
}
ni.updateImageSizes()
expected := map[string]int64{
"gcr.io/10:" + parsers.DefaultImageTag: 10 * 1024 * 1024,
"gcr.io/10:v1": 10 * 1024 * 1024,
"gcr.io/50:" + parsers.DefaultImageTag: 50 * 1024 * 1024,
"gcr.io/50:v1": 50 * 1024 * 1024,
}
imageSizes := ni.ImageSizes()
if !reflect.DeepEqual(expected, imageSizes) {
t.Errorf("expected: %#v, got: %#v", expected, imageSizes)
}
}
func TestNewNodeInfo(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
@@ -312,7 +271,7 @@ func TestNewNodeInfo(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
imageStates: map[string]*ImageStateSummary{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
@@ -401,9 +360,7 @@ func TestNodeInfoClone(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{
"gcr.io/10": 10 * 1024 * 1024,
},
imageStates: map[string]*ImageStateSummary{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
@@ -473,9 +430,7 @@ func TestNodeInfoClone(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{
"gcr.io/10": 10 * 1024 * 1024,
},
imageStates: map[string]*ImageStateSummary{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
@@ -540,6 +495,7 @@ func TestNodeInfoClone(t *testing.T) {
ni := test.nodeInfo.Clone()
// Modify the field to check if the result is a clone of the origin one.
test.nodeInfo.generation += 10
test.nodeInfo.usedPorts.Remove("127.0.0.1", "TCP", 80)
if !reflect.DeepEqual(test.expected, ni) {
t.Errorf("expected: %#v, got: %#v", test.expected, ni)
}
@@ -633,7 +589,7 @@ func TestNodeInfoAddPod(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
imageStates: map[string]*ImageStateSummary{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
@@ -752,7 +708,7 @@ func TestNodeInfoRemovePod(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
imageStates: map[string]*ImageStateSummary{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
@@ -868,7 +824,7 @@ func TestNodeInfoRemovePod(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
imageStates: map[string]*ImageStateSummary{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{

View File

@@ -0,0 +1,185 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"fmt"
"sync"
"k8s.io/api/core/v1"
utilnode "k8s.io/kubernetes/pkg/util/node"
"github.com/golang/glog"
)
// NodeTree is a tree-like data structure that holds node names in each zone. Zone names are
// keys to "NodeTree.tree" and values of "NodeTree.tree" are arrays of node names.
type NodeTree struct {
tree map[string]*nodeArray // a map from zone (region-zone) to an array of nodes in the zone.
zones []string // a list of all the zones in the tree (keys)
zoneIndex int
NumNodes int
mu sync.RWMutex
}
// nodeArray is a struct that has nodes that are in a zone.
// We use a slice (as opposed to a set/map) to store the nodes because iterating over the nodes is
// a lot more frequent than searching them by name.
type nodeArray struct {
nodes []string
lastIndex int
}
func (na *nodeArray) next() (nodeName string, exhausted bool) {
if len(na.nodes) == 0 {
glog.Error("The nodeArray is empty. It should have been deleted from NodeTree.")
return "", false
}
if na.lastIndex >= len(na.nodes) {
return "", true
}
nodeName = na.nodes[na.lastIndex]
na.lastIndex++
return nodeName, false
}
func newNodeTree(nodes []*v1.Node) *NodeTree {
nt := &NodeTree{
tree: make(map[string]*nodeArray),
}
for _, n := range nodes {
nt.AddNode(n)
}
return nt
}
// AddNode adds a node and its corresponding zone to the tree. If the zone already exists, the node
// is added to the array of nodes in that zone.
func (nt *NodeTree) AddNode(n *v1.Node) {
nt.mu.Lock()
defer nt.mu.Unlock()
nt.addNode(n)
}
func (nt *NodeTree) addNode(n *v1.Node) {
zone := utilnode.GetZoneKey(n)
if na, ok := nt.tree[zone]; ok {
for _, nodeName := range na.nodes {
if nodeName == n.Name {
glog.Warningf("node %v already exist in the NodeTree", n.Name)
return
}
}
na.nodes = append(na.nodes, n.Name)
} else {
nt.zones = append(nt.zones, zone)
nt.tree[zone] = &nodeArray{nodes: []string{n.Name}, lastIndex: 0}
}
glog.V(5).Infof("Added node %v in group %v to NodeTree", n.Name, zone)
nt.NumNodes++
}
// RemoveNode removes a node from the NodeTree.
func (nt *NodeTree) RemoveNode(n *v1.Node) error {
nt.mu.Lock()
defer nt.mu.Unlock()
return nt.removeNode(n)
}
func (nt *NodeTree) removeNode(n *v1.Node) error {
zone := utilnode.GetZoneKey(n)
if na, ok := nt.tree[zone]; ok {
for i, nodeName := range na.nodes {
if nodeName == n.Name {
na.nodes = append(na.nodes[:i], na.nodes[i+1:]...)
if len(na.nodes) == 0 {
nt.removeZone(zone)
}
glog.V(5).Infof("Removed node %v in group %v from NodeTree", n.Name, zone)
nt.NumNodes--
return nil
}
}
}
glog.Errorf("Node %v in group %v was not found", n.Name, zone)
return fmt.Errorf("node %v in group %v was not found", n.Name, zone)
}
// removeZone removes a zone from tree.
// This function must be called while writer locks are hold.
func (nt *NodeTree) removeZone(zone string) {
delete(nt.tree, zone)
for i, z := range nt.zones {
if z == zone {
nt.zones = append(nt.zones[:i], nt.zones[i+1:]...)
}
}
}
// UpdateNode updates a node in the NodeTree.
func (nt *NodeTree) UpdateNode(old, new *v1.Node) {
var oldZone string
if old != nil {
oldZone = utilnode.GetZoneKey(old)
}
newZone := utilnode.GetZoneKey(new)
// If the zone ID of the node has not changed, we don't need to do anything. Name of the node
// cannot be changed in an update.
if oldZone == newZone {
return
}
nt.mu.Lock()
defer nt.mu.Unlock()
nt.removeNode(old) // No error checking. We ignore whether the old node exists or not.
nt.addNode(new)
}
func (nt *NodeTree) resetExhausted() {
for _, na := range nt.tree {
na.lastIndex = 0
}
nt.zoneIndex = 0
}
// Next returns the name of the next node. NodeTree iterates over zones and in each zone iterates
// over nodes in a round robin fashion.
func (nt *NodeTree) Next() string {
nt.mu.Lock()
defer nt.mu.Unlock()
if len(nt.zones) == 0 {
return ""
}
numExhaustedZones := 0
for {
if nt.zoneIndex >= len(nt.zones) {
nt.zoneIndex = 0
}
zone := nt.zones[nt.zoneIndex]
nt.zoneIndex++
// We do not check the exhausted zones before calling next() on the zone. This ensures
// that if more nodes are added to a zone after it is exhausted, we iterate over the new nodes.
nodeName, exhausted := nt.tree[zone].next()
if exhausted {
numExhaustedZones++
if numExhaustedZones >= len(nt.zones) { // all zones are exhausted. we should reset.
nt.resetExhausted()
}
} else {
return nodeName
}
}
}

View File

@@ -0,0 +1,448 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
)
var allNodes = []*v1.Node{
// Node 0: a node without any region-zone label
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
},
},
// Node 1: a node with region label only
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
},
},
},
// Node 2: a node with zone label only
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-2",
Labels: map[string]string{
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 3: a node with proper region and zone labels
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-3",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 4: a node with proper region and zone labels
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-4",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 5: a node with proper region and zone labels in a different zone, same region as above
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-5",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-3",
},
},
},
// Node 6: a node with proper region and zone labels in a new region and zone
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-6",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-2",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 7: a node with proper region and zone labels in a region and zone as node-6
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-7",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-2",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 8: a node with proper region and zone labels in a region and zone as node-6
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-8",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-2",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
}}
func verifyNodeTree(t *testing.T, nt *NodeTree, expectedTree map[string]*nodeArray) {
expectedNumNodes := int(0)
for _, na := range expectedTree {
expectedNumNodes += len(na.nodes)
}
if nt.NumNodes != expectedNumNodes {
t.Errorf("unexpected NodeTree.numNodes. Expected: %v, Got: %v", expectedNumNodes, nt.NumNodes)
}
if !reflect.DeepEqual(nt.tree, expectedTree) {
t.Errorf("The node tree is not the same as expected. Expected: %v, Got: %v", expectedTree, nt.tree)
}
if len(nt.zones) != len(expectedTree) {
t.Errorf("Number of zones in NodeTree.zones is not expected. Expected: %v, Got: %v", len(expectedTree), len(nt.zones))
}
for _, z := range nt.zones {
if _, ok := expectedTree[z]; !ok {
t.Errorf("zone %v is not expected to exist in NodeTree.zones", z)
}
}
}
func TestNodeTree_AddNode(t *testing.T) {
tests := []struct {
name string
nodesToAdd []*v1.Node
expectedTree map[string]*nodeArray
}{
{
name: "single node no labels",
nodesToAdd: allNodes[:1],
expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 0}},
},
{
name: "mix of nodes with and without proper labels",
nodesToAdd: allNodes[:4],
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3"}, 0},
},
},
{
name: "mix of nodes with and without proper labels and some zones with multiple nodes",
nodesToAdd: allNodes[:7],
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(nil)
for _, n := range test.nodesToAdd {
nt.AddNode(n)
}
verifyNodeTree(t, nt, test.expectedTree)
})
}
}
func TestNodeTree_RemoveNode(t *testing.T) {
tests := []struct {
name string
existingNodes []*v1.Node
nodesToRemove []*v1.Node
expectedTree map[string]*nodeArray
expectError bool
}{
{
name: "remove a single node with no labels",
existingNodes: allNodes[:7],
nodesToRemove: allNodes[:1],
expectedTree: map[string]*nodeArray{
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
{
name: "remove a few nodes including one from a zone with multiple nodes",
existingNodes: allNodes[:7],
nodesToRemove: allNodes[1:4],
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:zone-2": {[]string{"node-4"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
{
name: "remove all nodes",
existingNodes: allNodes[:7],
nodesToRemove: allNodes[:7],
expectedTree: map[string]*nodeArray{},
},
{
name: "remove non-existing node",
existingNodes: nil,
nodesToRemove: allNodes[:5],
expectedTree: map[string]*nodeArray{},
expectError: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(test.existingNodes)
for _, n := range test.nodesToRemove {
err := nt.RemoveNode(n)
if test.expectError == (err == nil) {
t.Errorf("unexpected returned error value: %v", err)
}
}
verifyNodeTree(t, nt, test.expectedTree)
})
}
}
func TestNodeTree_UpdateNode(t *testing.T) {
tests := []struct {
name string
existingNodes []*v1.Node
nodeToUpdate *v1.Node
expectedTree map[string]*nodeArray
}{
{
name: "update a node without label",
existingNodes: allNodes[:7],
nodeToUpdate: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
expectedTree: map[string]*nodeArray{
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
{
name: "update the only existing node",
existingNodes: allNodes[:1],
nodeToUpdate: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
expectedTree: map[string]*nodeArray{
"region-1:\x00:zone-2": {[]string{"node-0"}, 0},
},
},
{
name: "update non-existing node",
existingNodes: allNodes[:1],
nodeToUpdate: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-new",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:zone-2": {[]string{"node-new"}, 0},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(test.existingNodes)
var oldNode *v1.Node
for _, n := range allNodes {
if n.Name == test.nodeToUpdate.Name {
oldNode = n
break
}
}
if oldNode == nil {
oldNode = &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "nonexisting-node"}}
}
nt.UpdateNode(oldNode, test.nodeToUpdate)
verifyNodeTree(t, nt, test.expectedTree)
})
}
}
func TestNodeTree_Next(t *testing.T) {
tests := []struct {
name string
nodesToAdd []*v1.Node
numRuns int // number of times to run Next()
expectedOutput []string
}{
{
name: "empty tree",
nodesToAdd: nil,
numRuns: 2,
expectedOutput: []string{"", ""},
},
{
name: "should go back to the first node after finishing a round",
nodesToAdd: allNodes[:1],
numRuns: 2,
expectedOutput: []string{"node-0", "node-0"},
},
{
name: "should go back to the first node after going over all nodes",
nodesToAdd: allNodes[:4],
numRuns: 5,
expectedOutput: []string{"node-0", "node-1", "node-2", "node-3", "node-0"},
},
{
name: "should go to all zones before going to the second nodes in the same zone",
nodesToAdd: allNodes[:9],
numRuns: 11,
expectedOutput: []string{"node-0", "node-1", "node-2", "node-3", "node-5", "node-6", "node-4", "node-7", "node-8", "node-0", "node-1"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(test.nodesToAdd)
var output []string
for i := 0; i < test.numRuns; i++ {
output = append(output, nt.Next())
}
if !reflect.DeepEqual(output, test.expectedOutput) {
t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output)
}
})
}
}
func TestNodeTreeMultiOperations(t *testing.T) {
tests := []struct {
name string
nodesToAdd []*v1.Node
nodesToRemove []*v1.Node
operations []string
expectedOutput []string
}{
{
name: "add and remove all nodes between two Next operations",
nodesToAdd: allNodes[2:9],
nodesToRemove: allNodes[2:9],
operations: []string{"add", "add", "next", "add", "remove", "remove", "remove", "next"},
expectedOutput: []string{"node-2", ""},
},
{
name: "add and remove some nodes between two Next operations",
nodesToAdd: allNodes[2:9],
nodesToRemove: allNodes[2:9],
operations: []string{"add", "add", "next", "add", "remove", "remove", "next"},
expectedOutput: []string{"node-2", "node-4"},
},
{
name: "remove nodes already iterated on and add new nodes",
nodesToAdd: allNodes[2:9],
nodesToRemove: allNodes[2:9],
operations: []string{"add", "add", "next", "next", "add", "remove", "remove", "next"},
expectedOutput: []string{"node-2", "node-3", "node-4"},
},
{
name: "add more nodes to an exhausted zone",
nodesToAdd: append(allNodes[4:9], allNodes[3]),
nodesToRemove: nil,
operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "add", "next", "next", "next"},
expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-3", "node-8", "node-4"},
},
{
name: "remove zone and add new to ensure exhausted is reset correctly",
nodesToAdd: append(allNodes[3:5], allNodes[6:8]...),
nodesToRemove: allNodes[3:5],
operations: []string{"add", "add", "next", "next", "remove", "add", "add", "next", "next", "remove", "next", "next"},
expectedOutput: []string{"node-3", "node-4", "node-6", "node-7", "node-6", "node-7"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(nil)
addIndex := 0
removeIndex := 0
var output []string
for _, op := range test.operations {
switch op {
case "add":
if addIndex >= len(test.nodesToAdd) {
t.Error("more add operations than nodesToAdd")
} else {
nt.AddNode(test.nodesToAdd[addIndex])
addIndex++
}
case "remove":
if removeIndex >= len(test.nodesToRemove) {
t.Error("more remove operations than nodesToRemove")
} else {
nt.RemoveNode(test.nodesToRemove[removeIndex])
removeIndex++
}
case "next":
output = append(output, nt.Next())
default:
t.Errorf("unknow operation: %v", op)
}
}
if !reflect.DeepEqual(output, test.expectedOutput) {
t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output)
}
})
}
}

View File

@@ -16,7 +16,10 @@ limitations under the License.
package cache
import "k8s.io/api/core/v1"
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
)
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
// and the values are the aggregated information for that node.
@@ -29,11 +32,47 @@ func CreateNodeNameToInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*NodeI
}
nodeNameToInfo[nodeName].AddPod(pod)
}
imageExistenceMap := createImageExistenceMap(nodes)
for _, node := range nodes {
if _, ok := nodeNameToInfo[node.Name]; !ok {
nodeNameToInfo[node.Name] = NewNodeInfo()
}
nodeNameToInfo[node.Name].SetNode(node)
nodeInfo := nodeNameToInfo[node.Name]
nodeInfo.SetNode(node)
nodeInfo.imageStates = getNodeImageStates(node, imageExistenceMap)
}
return nodeNameToInfo
}
// getNodeImageStates returns the given node's image states based on the given imageExistence map.
func getNodeImageStates(node *v1.Node, imageExistenceMap map[string]sets.String) map[string]*ImageStateSummary {
imageStates := make(map[string]*ImageStateSummary)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageStates[name] = &ImageStateSummary{
Size: image.SizeBytes,
NumNodes: len(imageExistenceMap[name]),
}
}
}
return imageStates
}
// createImageExistenceMap returns a map recording on which nodes the images exist, keyed by the images' names.
func createImageExistenceMap(nodes []*v1.Node) map[string]sets.String {
imageExistenceMap := make(map[string]sets.String)
for _, node := range nodes {
for _, image := range node.Status.Images {
for _, name := range image.Names {
if _, ok := imageExistenceMap[name]; !ok {
imageExistenceMap[name] = sets.NewString(node.Name)
} else {
imageExistenceMap[name].Insert(node.Name)
}
}
}
}
return imageExistenceMap
}

View File

@@ -0,0 +1,134 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
)
const mb int64 = 1024 * 1024
func TestGetNodeImageStates(t *testing.T) {
tests := []struct {
node *v1.Node
imageExistenceMap map[string]sets.String
expected map[string]*ImageStateSummary
}{
{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-0"},
Status: v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
},
{
Names: []string{
"gcr.io/200:v1",
},
SizeBytes: int64(200 * mb),
},
},
},
},
imageExistenceMap: map[string]sets.String{
"gcr.io/10:v1": sets.NewString("node-0", "node-1"),
"gcr.io/200:v1": sets.NewString("node-0"),
},
expected: map[string]*ImageStateSummary{
"gcr.io/10:v1": {
Size: int64(10 * mb),
NumNodes: 2,
},
"gcr.io/200:v1": {
Size: int64(200 * mb),
NumNodes: 1,
},
},
},
}
for _, test := range tests {
imageStates := getNodeImageStates(test.node, test.imageExistenceMap)
if !reflect.DeepEqual(test.expected, imageStates) {
t.Errorf("expected: %#v, got: %#v", test.expected, imageStates)
}
}
}
func TestCreateImageExistenceMap(t *testing.T) {
tests := []struct {
nodes []*v1.Node
expected map[string]sets.String
}{
{
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: "node-0"},
Status: v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "node-1"},
Status: v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
},
{
Names: []string{
"gcr.io/200:v1",
},
SizeBytes: int64(200 * mb),
},
},
},
},
},
expected: map[string]sets.String{
"gcr.io/10:v1": sets.NewString("node-0", "node-1"),
"gcr.io/200:v1": sets.NewString("node-1"),
},
},
}
for _, test := range tests {
imageMap := createImageExistenceMap(test.nodes)
if !reflect.DeepEqual(test.expected, imageMap) {
t.Errorf("expected: %#v, got: %#v", test.expected, imageMap)
}
}
}

View File

@@ -9,7 +9,6 @@ load(
go_test(
name = "go_default_test",
srcs = [
"equivalence_cache_test.go",
"extender_test.go",
"generic_scheduler_test.go",
"scheduling_queue_test.go",
@@ -22,23 +21,23 @@ go_test(
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core/equivalence:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"equivalence_cache.go",
"extender.go",
"generic_scheduler.go",
"scheduling_queue.go",
@@ -51,23 +50,23 @@ go_library(
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core/equivalence:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/util/hash:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/net:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/trace:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/net:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/trace:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
@@ -80,6 +79,9 @@ filegroup(
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
srcs = [
":package-srcs",
"//pkg/scheduler/core/equivalence:all-srcs",
],
tags = ["automanaged"],
)

View File

@@ -0,0 +1,50 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["eqivalence.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/core/equivalence",
visibility = ["//visibility:public"],
deps = [
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/util/hash:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["eqivalence_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,357 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package equivalence defines Pod equivalence classes and the equivalence class
// cache.
package equivalence
import (
"fmt"
"hash/fnv"
"sync"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/metrics"
hashutil "k8s.io/kubernetes/pkg/util/hash"
)
// Cache is a thread safe map saves and reuses the output of predicate functions,
// it uses node name as key to access those cached results.
//
// Internally, results are keyed by predicate name, and "equivalence
// class". (Equivalence class is defined in the `Class` type.) Saved results
// will be reused until an appropriate invalidation function is called.
type Cache struct {
// i.e. map[string]*NodeCache
sync.Map
}
// NewCache create an empty equiv class cache.
func NewCache() *Cache {
return new(Cache)
}
// NodeCache saves and reuses the output of predicate functions. Use RunPredicate to
// get or update the cached results. An appropriate Invalidate* function should
// be called when some predicate results are no longer valid.
//
// Internally, results are keyed by predicate name, and "equivalence
// class". (Equivalence class is defined in the `Class` type.) Saved results
// will be reused until an appropriate invalidation function is called.
//
// NodeCache objects are thread safe within the context of NodeCache,
type NodeCache struct {
mu sync.RWMutex
cache predicateMap
}
// newNodeCache returns an empty NodeCache.
func newNodeCache() *NodeCache {
return &NodeCache{
cache: make(predicateMap),
}
}
// GetNodeCache returns the existing NodeCache for given node if present. Otherwise,
// it creates the NodeCache and returns it.
// The boolean flag is true if the value was loaded, false if created.
func (c *Cache) GetNodeCache(name string) (nodeCache *NodeCache, exists bool) {
v, exists := c.LoadOrStore(name, newNodeCache())
nodeCache = v.(*NodeCache)
return
}
// InvalidatePredicates clears all cached results for the given predicates.
func (c *Cache) InvalidatePredicates(predicateKeys sets.String) {
if len(predicateKeys) == 0 {
return
}
c.Range(func(k, v interface{}) bool {
n := v.(*NodeCache)
n.invalidatePreds(predicateKeys)
return true
})
glog.V(5).Infof("Cache invalidation: node=*,predicates=%v", predicateKeys)
}
// InvalidatePredicatesOnNode clears cached results for the given predicates on one node.
func (c *Cache) InvalidatePredicatesOnNode(nodeName string, predicateKeys sets.String) {
if len(predicateKeys) == 0 {
return
}
if v, ok := c.Load(nodeName); ok {
n := v.(*NodeCache)
n.invalidatePreds(predicateKeys)
}
glog.V(5).Infof("Cache invalidation: node=%s,predicates=%v", nodeName, predicateKeys)
}
// InvalidateAllPredicatesOnNode clears all cached results for one node.
func (c *Cache) InvalidateAllPredicatesOnNode(nodeName string) {
c.Delete(nodeName)
glog.V(5).Infof("Cache invalidation: node=%s,predicates=*", nodeName)
}
// InvalidateCachedPredicateItemForPodAdd is a wrapper of
// InvalidateCachedPredicateItem for pod add case
// TODO: This does not belong with the equivalence cache implementation.
func (c *Cache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod, nodeName string) {
// MatchInterPodAffinity: we assume scheduler can make sure newly bound pod
// will not break the existing inter pod affinity. So we does not need to
// invalidate MatchInterPodAffinity when pod added.
//
// But when a pod is deleted, existing inter pod affinity may become invalid.
// (e.g. this pod was preferred by some else, or vice versa)
//
// NOTE: assumptions above will not stand when we implemented features like
// RequiredDuringSchedulingRequiredDuringExecutioc.
// NoDiskConflict: the newly scheduled pod fits to existing pods on this node,
// it will also fits to equivalence class of existing pods
// GeneralPredicates: will always be affected by adding a new pod
invalidPredicates := sets.NewString(predicates.GeneralPred)
// MaxPDVolumeCountPredicate: we check the volumes of pod to make decisioc.
for _, vol := range pod.Spec.Volumes {
if vol.PersistentVolumeClaim != nil {
invalidPredicates.Insert(
predicates.MaxEBSVolumeCountPred,
predicates.MaxGCEPDVolumeCountPred,
predicates.MaxAzureDiskVolumeCountPred)
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
invalidPredicates.Insert(predicates.MaxCSIVolumeCountPred)
}
} else {
// We do not consider CSI volumes here because CSI
// volumes can not be used inline.
if vol.AWSElasticBlockStore != nil {
invalidPredicates.Insert(predicates.MaxEBSVolumeCountPred)
}
if vol.GCEPersistentDisk != nil {
invalidPredicates.Insert(predicates.MaxGCEPDVolumeCountPred)
}
if vol.AzureDisk != nil {
invalidPredicates.Insert(predicates.MaxAzureDiskVolumeCountPred)
}
}
}
c.InvalidatePredicatesOnNode(nodeName, invalidPredicates)
}
// Class represents a set of pods which are equivalent from the perspective of
// the scheduler. i.e. the scheduler would make the same decision for any pod
// from the same class.
type Class struct {
// Equivalence hash
hash uint64
}
// NewClass returns the equivalence class for a given Pod. The returned Class
// objects will be equal for two Pods in the same class. nil values should not
// be considered equal to each other.
//
// NOTE: Make sure to compare types of Class and not *Class.
// TODO(misterikkit): Return error instead of nil *Class.
func NewClass(pod *v1.Pod) *Class {
equivalencePod := getEquivalencePod(pod)
if equivalencePod != nil {
hash := fnv.New32a()
hashutil.DeepHashObject(hash, equivalencePod)
return &Class{
hash: uint64(hash.Sum32()),
}
}
return nil
}
// predicateMap stores resultMaps with predicate name as the key.
type predicateMap map[string]resultMap
// resultMap stores PredicateResult with pod equivalence hash as the key.
type resultMap map[uint64]predicateResult
// predicateResult stores the output of a FitPredicate.
type predicateResult struct {
Fit bool
FailReasons []algorithm.PredicateFailureReason
}
// RunPredicate returns a cached predicate result. In case of a cache miss, the predicate will be
// run and its results cached for the next call.
//
// NOTE: RunPredicate will not update the equivalence cache if the given NodeInfo is stale.
func (n *NodeCache) RunPredicate(
pred algorithm.FitPredicate,
predicateKey string,
pod *v1.Pod,
meta algorithm.PredicateMetadata,
nodeInfo *schedulercache.NodeInfo,
equivClass *Class,
cache schedulercache.Cache,
) (bool, []algorithm.PredicateFailureReason, error) {
if nodeInfo == nil || nodeInfo.Node() == nil {
// This may happen during tests.
return false, []algorithm.PredicateFailureReason{}, fmt.Errorf("nodeInfo is nil or node is invalid")
}
result, ok := n.lookupResult(pod.GetName(), nodeInfo.Node().GetName(), predicateKey, equivClass.hash)
if ok {
return result.Fit, result.FailReasons, nil
}
fit, reasons, err := pred(pod, meta, nodeInfo)
if err != nil {
return fit, reasons, err
}
if cache != nil {
n.updateResult(pod.GetName(), predicateKey, fit, reasons, equivClass.hash, cache, nodeInfo)
}
return fit, reasons, nil
}
// updateResult updates the cached result of a predicate.
func (n *NodeCache) updateResult(
podName, predicateKey string,
fit bool,
reasons []algorithm.PredicateFailureReason,
equivalenceHash uint64,
cache schedulercache.Cache,
nodeInfo *schedulercache.NodeInfo,
) {
if nodeInfo == nil || nodeInfo.Node() == nil {
// This may happen during tests.
metrics.EquivalenceCacheWrites.WithLabelValues("discarded_bad_node").Inc()
return
}
// Skip update if NodeInfo is stale.
if !cache.IsUpToDate(nodeInfo) {
metrics.EquivalenceCacheWrites.WithLabelValues("discarded_stale").Inc()
return
}
predicateItem := predicateResult{
Fit: fit,
FailReasons: reasons,
}
n.mu.Lock()
defer n.mu.Unlock()
// If cached predicate map already exists, just update the predicate by key
if predicates, ok := n.cache[predicateKey]; ok {
// maps in golang are references, no need to add them back
predicates[equivalenceHash] = predicateItem
} else {
n.cache[predicateKey] =
resultMap{
equivalenceHash: predicateItem,
}
}
glog.V(5).Infof("Cache update: node=%s, predicate=%s,pod=%s,value=%v",
nodeInfo.Node().Name, predicateKey, podName, predicateItem)
}
// lookupResult returns cached predicate results and a bool saying whether a
// cache entry was found.
func (n *NodeCache) lookupResult(
podName, nodeName, predicateKey string,
equivalenceHash uint64,
) (value predicateResult, ok bool) {
n.mu.RLock()
defer n.mu.RUnlock()
value, ok = n.cache[predicateKey][equivalenceHash]
if ok {
metrics.EquivalenceCacheHits.Inc()
} else {
metrics.EquivalenceCacheMisses.Inc()
}
return value, ok
}
// invalidatePreds deletes cached predicates by given keys.
func (n *NodeCache) invalidatePreds(predicateKeys sets.String) {
n.mu.Lock()
defer n.mu.Unlock()
for predicateKey := range predicateKeys {
delete(n.cache, predicateKey)
}
}
// equivalencePod is the set of pod attributes which must match for two pods to
// be considered equivalent for scheduling purposes. For correctness, this must
// include any Pod field which is used by a FitPredicate.
//
// NOTE: For equivalence hash to be formally correct, lists and maps in the
// equivalencePod should be normalized. (e.g. by sorting them) However, the vast
// majority of equivalent pod classes are expected to be created from a single
// pod template, so they will all have the same ordering.
type equivalencePod struct {
Namespace *string
Labels map[string]string
Affinity *v1.Affinity
Containers []v1.Container // See note about ordering
InitContainers []v1.Container // See note about ordering
NodeName *string
NodeSelector map[string]string
Tolerations []v1.Toleration
Volumes []v1.Volume // See note about ordering
}
// getEquivalencePod returns a normalized representation of a pod so that two
// "equivalent" pods will hash to the same value.
func getEquivalencePod(pod *v1.Pod) *equivalencePod {
ep := &equivalencePod{
Namespace: &pod.Namespace,
Labels: pod.Labels,
Affinity: pod.Spec.Affinity,
Containers: pod.Spec.Containers,
InitContainers: pod.Spec.InitContainers,
NodeName: &pod.Spec.NodeName,
NodeSelector: pod.Spec.NodeSelector,
Tolerations: pod.Spec.Tolerations,
Volumes: pod.Spec.Volumes,
}
// DeepHashObject considers nil and empty slices to be different. Normalize them.
if len(ep.Containers) == 0 {
ep.Containers = nil
}
if len(ep.InitContainers) == 0 {
ep.InitContainers = nil
}
if len(ep.Tolerations) == 0 {
ep.Tolerations = nil
}
if len(ep.Volumes) == 0 {
ep.Volumes = nil
}
// Normalize empty maps also.
if len(ep.Labels) == 0 {
ep.Labels = nil
}
if len(ep.NodeSelector) == 0 {
ep.NodeSelector = nil
}
// TODO(misterikkit): Also normalize nested maps and slices.
return ep
}

View File

@@ -14,20 +14,17 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package core
package equivalence
import (
"errors"
"reflect"
"sync"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
@@ -246,17 +243,21 @@ func TestRunPredicate(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n1"}})
testNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n1"}}
node.SetNode(testNode)
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1"}}
meta := algorithm.EmptyPredicateMetadataProducer(nil, nil)
ecache := NewEquivalenceCache()
equivClass := ecache.getEquivalenceClassInfo(pod)
// Initialize and populate equivalence class cache.
ecache := NewCache()
nodeCache, _ := ecache.GetNodeCache(testNode.Name)
equivClass := NewClass(pod)
if test.expectCacheHit {
ecache.updateResult(pod.Name, "testPredicate", test.expectFit, test.expectedReasons, equivClass.hash, test.cache, node)
nodeCache.updateResult(pod.Name, "testPredicate", test.expectFit, test.expectedReasons, equivClass.hash, test.cache, node)
}
fit, reasons, err := ecache.RunPredicate(test.pred.predicate, "testPredicate", pod, meta, node, equivClass, test.cache)
fit, reasons, err := nodeCache.RunPredicate(test.pred.predicate, "testPredicate", pod, meta, node, equivClass, test.cache)
if err != nil {
if err.Error() != test.expectedError {
@@ -287,14 +288,14 @@ func TestRunPredicate(t *testing.T) {
if !test.expectCacheHit && test.pred.callCount == 0 {
t.Errorf("Predicate should be called")
}
_, _, invalid := ecache.lookupResult(pod.Name, node.Node().Name, "testPredicate", equivClass.hash)
if invalid && test.expectCacheWrite {
_, ok := nodeCache.lookupResult(pod.Name, node.Node().Name, "testPredicate", equivClass.hash)
if !ok && test.expectCacheWrite {
t.Errorf("Cache write should happen")
}
if !test.expectCacheHit && test.expectCacheWrite && invalid {
if !test.expectCacheHit && test.expectCacheWrite && !ok {
t.Errorf("Cache write should happen")
}
if !test.expectCacheHit && !test.expectCacheWrite && !invalid {
if !test.expectCacheHit && !test.expectCacheWrite && ok {
t.Errorf("Cache write should not happen")
}
})
@@ -311,7 +312,7 @@ func TestUpdateResult(t *testing.T) {
reasons []algorithm.PredicateFailureReason
equivalenceHash uint64
expectPredicateMap bool
expectCacheItem HostPredicate
expectCacheItem predicateResult
cache schedulercache.Cache
}{
{
@@ -322,7 +323,7 @@ func TestUpdateResult(t *testing.T) {
fit: true,
equivalenceHash: 123,
expectPredicateMap: false,
expectCacheItem: HostPredicate{
expectCacheItem: predicateResult{
Fit: true,
},
cache: &upToDateCache{},
@@ -335,47 +336,52 @@ func TestUpdateResult(t *testing.T) {
fit: false,
equivalenceHash: 123,
expectPredicateMap: true,
expectCacheItem: HostPredicate{
expectCacheItem: predicateResult{
Fit: false,
},
cache: &upToDateCache{},
},
}
for _, test := range tests {
ecache := NewEquivalenceCache()
if test.expectPredicateMap {
ecache.algorithmCache[test.nodeName] = AlgorithmCache{}
predicateItem := HostPredicate{
Fit: true,
}
ecache.algorithmCache[test.nodeName][test.predicateKey] =
PredicateMap{
test.equivalenceHash: predicateItem,
t.Run(test.name, func(t *testing.T) {
node := schedulercache.NewNodeInfo()
testNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}}
node.SetNode(testNode)
// Initialize and populate equivalence class cache.
ecache := NewCache()
nodeCache, _ := ecache.GetNodeCache(testNode.Name)
if test.expectPredicateMap {
predicateItem := predicateResult{
Fit: true,
}
}
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
ecache.updateResult(
test.pod,
test.predicateKey,
test.fit,
test.reasons,
test.equivalenceHash,
test.cache,
node,
)
cachedMapItem, ok := ecache.algorithmCache[test.nodeName][test.predicateKey]
if !ok {
t.Errorf("Failed: %s, can't find expected cache item: %v",
test.name, test.expectCacheItem)
} else {
if !reflect.DeepEqual(cachedMapItem[test.equivalenceHash], test.expectCacheItem) {
t.Errorf("Failed: %s, expected cached item: %v, but got: %v",
test.name, test.expectCacheItem, cachedMapItem[test.equivalenceHash])
nodeCache.cache[test.predicateKey] =
resultMap{
test.equivalenceHash: predicateItem,
}
}
}
nodeCache.updateResult(
test.pod,
test.predicateKey,
test.fit,
test.reasons,
test.equivalenceHash,
test.cache,
node,
)
cachedMapItem, ok := nodeCache.cache[test.predicateKey]
if !ok {
t.Errorf("can't find expected cache item: %v", test.expectCacheItem)
} else {
if !reflect.DeepEqual(cachedMapItem[test.equivalenceHash], test.expectCacheItem) {
t.Errorf("expected cached item: %v, but got: %v",
test.expectCacheItem, cachedMapItem[test.equivalenceHash])
}
}
})
}
}
@@ -396,8 +402,8 @@ func TestLookupResult(t *testing.T) {
equivalenceHashForUpdatePredicate uint64
equivalenceHashForCalPredicate uint64
cachedItem predicateItemType
expectedInvalidPredicateKey bool
expectedInvalidEquivalenceHash bool
expectedPredicateKeyMiss bool
expectedEquivalenceHashMiss bool
expectedPredicateItem predicateItemType
cache schedulercache.Cache
}{
@@ -412,7 +418,7 @@ func TestLookupResult(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
expectedInvalidPredicateKey: true,
expectedPredicateKeyMiss: true,
expectedPredicateItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{},
@@ -429,7 +435,7 @@ func TestLookupResult(t *testing.T) {
cachedItem: predicateItemType{
fit: true,
},
expectedInvalidPredicateKey: false,
expectedPredicateKeyMiss: false,
expectedPredicateItem: predicateItemType{
fit: true,
reasons: []algorithm.PredicateFailureReason{},
@@ -447,7 +453,7 @@ func TestLookupResult(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
expectedInvalidPredicateKey: false,
expectedPredicateKeyMiss: false,
expectedPredicateItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
@@ -465,8 +471,8 @@ func TestLookupResult(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
expectedInvalidPredicateKey: false,
expectedInvalidEquivalenceHash: true,
expectedPredicateKeyMiss: false,
expectedEquivalenceHashMiss: true,
expectedPredicateItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{},
@@ -476,58 +482,67 @@ func TestLookupResult(t *testing.T) {
}
for _, test := range tests {
ecache := NewEquivalenceCache()
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
// set cached item to equivalence cache
ecache.updateResult(
test.podName,
test.predicateKey,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
test.cache,
node,
)
// if we want to do invalid, invalid the cached item
if test.expectedInvalidPredicateKey {
predicateKeys := sets.NewString()
predicateKeys.Insert(test.predicateKey)
ecache.InvalidateCachedPredicateItem(test.nodeName, predicateKeys)
}
// calculate predicate with equivalence cache
fit, reasons, invalid := ecache.lookupResult(test.podName,
test.nodeName,
test.predicateKey,
test.equivalenceHashForCalPredicate,
)
// returned invalid should match expectedInvalidPredicateKey or expectedInvalidEquivalenceHash
if test.equivalenceHashForUpdatePredicate != test.equivalenceHashForCalPredicate {
if invalid != test.expectedInvalidEquivalenceHash {
t.Errorf("Failed: %s, expected invalid: %v, but got: %v",
test.name, test.expectedInvalidEquivalenceHash, invalid)
t.Run(test.name, func(t *testing.T) {
testNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}}
// Initialize and populate equivalence class cache.
ecache := NewCache()
nodeCache, _ := ecache.GetNodeCache(testNode.Name)
node := schedulercache.NewNodeInfo()
node.SetNode(testNode)
// set cached item to equivalence cache
nodeCache.updateResult(
test.podName,
test.predicateKey,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
test.cache,
node,
)
// if we want to do invalid, invalid the cached item
if test.expectedPredicateKeyMiss {
predicateKeys := sets.NewString()
predicateKeys.Insert(test.predicateKey)
ecache.InvalidatePredicatesOnNode(test.nodeName, predicateKeys)
}
} else {
if invalid != test.expectedInvalidPredicateKey {
t.Errorf("Failed: %s, expected invalid: %v, but got: %v",
test.name, test.expectedInvalidPredicateKey, invalid)
// calculate predicate with equivalence cache
result, ok := nodeCache.lookupResult(test.podName,
test.nodeName,
test.predicateKey,
test.equivalenceHashForCalPredicate,
)
fit, reasons := result.Fit, result.FailReasons
// returned invalid should match expectedPredicateKeyMiss or expectedEquivalenceHashMiss
if test.equivalenceHashForUpdatePredicate != test.equivalenceHashForCalPredicate {
if ok && test.expectedEquivalenceHashMiss {
t.Errorf("Failed: %s, expected (equivalence hash) cache miss", test.name)
}
if !ok && !test.expectedEquivalenceHashMiss {
t.Errorf("Failed: %s, expected (equivalence hash) cache hit", test.name)
}
} else {
if ok && test.expectedPredicateKeyMiss {
t.Errorf("Failed: %s, expected (predicate key) cache miss", test.name)
}
if !ok && !test.expectedPredicateKeyMiss {
t.Errorf("Failed: %s, expected (predicate key) cache hit", test.name)
}
}
}
// returned predicate result should match expected predicate item
if fit != test.expectedPredicateItem.fit {
t.Errorf("Failed: %s, expected fit: %v, but got: %v", test.name, test.cachedItem.fit, fit)
}
if !slicesEqual(reasons, test.expectedPredicateItem.reasons) {
t.Errorf("Failed: %s, expected reasons: %v, but got: %v",
test.name, test.expectedPredicateItem.reasons, reasons)
}
// returned predicate result should match expected predicate item
if fit != test.expectedPredicateItem.fit {
t.Errorf("Failed: %s, expected fit: %v, but got: %v", test.name, test.cachedItem.fit, fit)
}
if !slicesEqual(reasons, test.expectedPredicateItem.reasons) {
t.Errorf("Failed: %s, expected reasons: %v, but got: %v",
test.name, test.expectedPredicateItem.reasons, reasons)
}
})
}
}
func TestGetEquivalenceHash(t *testing.T) {
ecache := NewEquivalenceCache()
pod1 := makeBasicPod("pod1")
pod2 := makeBasicPod("pod2")
@@ -618,7 +633,7 @@ func TestGetEquivalenceHash(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
for i, podInfo := range test.podInfoList {
testPod := podInfo.pod
eclassInfo := ecache.getEquivalenceClassInfo(testPod)
eclassInfo := NewClass(testPod)
if eclassInfo == nil && podInfo.hashIsValid {
t.Errorf("Failed: pod %v is expected to have valid hash", testPod)
}
@@ -646,6 +661,7 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
testPredicate := "GeneralPredicates"
// tests is used to initialize all nodes
tests := []struct {
name string
podName string
nodeName string
equivalenceHashForUpdatePredicate uint64
@@ -653,6 +669,7 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
cache schedulercache.Cache
}{
{
name: "hash predicate 123 not fits host ports",
podName: "testPod",
nodeName: "node1",
equivalenceHashForUpdatePredicate: 123,
@@ -665,6 +682,7 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
cache: &upToDateCache{},
},
{
name: "hash predicate 456 not fits host ports",
podName: "testPod",
nodeName: "node2",
equivalenceHashForUpdatePredicate: 456,
@@ -677,6 +695,7 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
cache: &upToDateCache{},
},
{
name: "hash predicate 123 fits",
podName: "testPod",
nodeName: "node3",
equivalenceHashForUpdatePredicate: 123,
@@ -686,13 +705,16 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
cache: &upToDateCache{},
},
}
ecache := NewEquivalenceCache()
ecache := NewCache()
for _, test := range tests {
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
testNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}}
node.SetNode(testNode)
nodeCache, _ := ecache.GetNodeCache(testNode.Name)
// set cached item to equivalence cache
ecache.updateResult(
nodeCache.updateResult(
test.podName,
testPredicate,
test.cachedItem.fit,
@@ -704,17 +726,18 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
}
// invalidate cached predicate for all nodes
ecache.InvalidateCachedPredicateItemOfAllNodes(sets.NewString(testPredicate))
ecache.InvalidatePredicates(sets.NewString(testPredicate))
// there should be no cached predicate any more
for _, test := range tests {
if algorithmCache, exist := ecache.algorithmCache[test.nodeName]; exist {
if _, exist := algorithmCache[testPredicate]; exist {
t.Errorf("Failed: cached item for predicate key: %v on node: %v should be invalidated",
testPredicate, test.nodeName)
break
t.Run(test.name, func(t *testing.T) {
if nodeCache, exist := ecache.GetNodeCache(test.nodeName); exist {
if _, exist := nodeCache.cache[testPredicate]; exist {
t.Errorf("Failed: cached item for predicate key: %v on node: %v should be invalidated",
testPredicate, test.nodeName)
}
}
}
})
}
}
@@ -722,6 +745,7 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
testPredicate := "GeneralPredicates"
// tests is used to initialize all nodes
tests := []struct {
name string
podName string
nodeName string
equivalenceHashForUpdatePredicate uint64
@@ -729,6 +753,7 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
cache schedulercache.Cache
}{
{
name: "hash predicate 123 not fits host ports",
podName: "testPod",
nodeName: "node1",
equivalenceHashForUpdatePredicate: 123,
@@ -739,6 +764,7 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
cache: &upToDateCache{},
},
{
name: "hash predicate 456 not fits host ports",
podName: "testPod",
nodeName: "node2",
equivalenceHashForUpdatePredicate: 456,
@@ -749,6 +775,7 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
cache: &upToDateCache{},
},
{
name: "hash predicate 123 fits host ports",
podName: "testPod",
nodeName: "node3",
equivalenceHashForUpdatePredicate: 123,
@@ -758,13 +785,16 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
cache: &upToDateCache{},
},
}
ecache := NewEquivalenceCache()
ecache := NewCache()
for _, test := range tests {
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
testNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}}
node.SetNode(testNode)
nodeCache, _ := ecache.GetNodeCache(testNode.Name)
// set cached item to equivalence cache
ecache.updateResult(
nodeCache.updateResult(
test.podName,
testPredicate,
test.cachedItem.fit,
@@ -776,115 +806,19 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
}
for _, test := range tests {
// invalidate cached predicate for all nodes
ecache.InvalidateAllCachedPredicateItemOfNode(test.nodeName)
if _, exist := ecache.algorithmCache[test.nodeName]; exist {
t.Errorf("Failed: cached item for node: %v should be invalidated", test.nodeName)
break
}
t.Run(test.name, func(t *testing.T) {
// invalidate cached predicate for all nodes
ecache.InvalidateAllPredicatesOnNode(test.nodeName)
if _, ok := ecache.GetNodeCache(test.nodeName); ok {
t.Errorf("Failed: cached item for node: %v should be invalidated", test.nodeName)
}
})
}
}
func BenchmarkEquivalenceHash(b *testing.B) {
pod := makeBasicPod("test")
for i := 0; i < b.N; i++ {
getEquivalenceHash(pod)
}
}
// syncingMockCache delegates method calls to an actual Cache,
// but calls to UpdateNodeNameToInfoMap synchronize with the test.
type syncingMockCache struct {
schedulercache.Cache
cycleStart, cacheInvalidated chan struct{}
once sync.Once
}
// UpdateNodeNameToInfoMap delegates to the real implementation, but on the first call, it
// synchronizes with the test.
//
// Since UpdateNodeNameToInfoMap is one of the first steps of (*genericScheduler).Schedule, we use
// this point to signal to the test that a scheduling cycle has started.
func (c *syncingMockCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
err := c.Cache.UpdateNodeNameToInfoMap(infoMap)
c.once.Do(func() {
c.cycleStart <- struct{}{}
<-c.cacheInvalidated
})
return err
}
// TestEquivalenceCacheInvalidationRace tests that equivalence cache invalidation is correctly
// handled when an invalidation event happens early in a scheduling cycle. Specifically, the event
// occurs after schedulercache is snapshotted and before equivalence cache lock is acquired.
func TestEquivalenceCacheInvalidationRace(t *testing.T) {
// Create a predicate that returns false the first time and true on subsequent calls.
podWillFit := false
var callCount int
testPredicate := func(pod *v1.Pod,
meta algorithm.PredicateMetadata,
nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
callCount++
if !podWillFit {
podWillFit = true
return false, []algorithm.PredicateFailureReason{predicates.ErrFakePredicate}, nil
}
return true, nil, nil
}
// Set up the mock cache.
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}})
mockCache := &syncingMockCache{
Cache: cache,
cycleStart: make(chan struct{}),
cacheInvalidated: make(chan struct{}),
}
eCache := NewEquivalenceCache()
// Ensure that equivalence cache invalidation happens after the scheduling cycle starts, but before
// the equivalence cache would be updated.
go func() {
<-mockCache.cycleStart
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "new-pod", UID: "new-pod"},
Spec: v1.PodSpec{NodeName: "machine1"}}
if err := cache.AddPod(pod); err != nil {
t.Errorf("Could not add pod to cache: %v", err)
}
eCache.InvalidateAllCachedPredicateItemOfNode("machine1")
mockCache.cacheInvalidated <- struct{}{}
}()
// Set up the scheduler.
ps := map[string]algorithm.FitPredicate{"testPredicate": testPredicate}
predicates.SetPredicatesOrdering([]string{"testPredicate"})
prioritizers := []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}}
pvcLister := schedulertesting.FakePersistentVolumeClaimLister([]*v1.PersistentVolumeClaim{})
scheduler := NewGenericScheduler(
mockCache,
eCache,
NewSchedulingQueue(),
ps,
algorithm.EmptyPredicateMetadataProducer,
prioritizers,
algorithm.EmptyPriorityMetadataProducer,
nil, nil, pvcLister, true, false)
// First scheduling attempt should fail.
nodeLister := schedulertesting.FakeNodeLister(makeNodeList([]string{"machine1"}))
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}}
machine, err := scheduler.Schedule(pod, nodeLister)
if machine != "" || err == nil {
t.Error("First scheduling attempt did not fail")
}
// Second scheduling attempt should succeed because cache was invalidated.
_, err = scheduler.Schedule(pod, nodeLister)
if err != nil {
t.Errorf("Second scheduling attempt failed: %v", err)
}
if callCount != 2 {
t.Errorf("Predicate should have been called twice. Was called %d times.", callCount)
getEquivalencePod(pod)
}
}

View File

@@ -1,307 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package core
import (
"fmt"
"hash/fnv"
"sync"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
hashutil "k8s.io/kubernetes/pkg/util/hash"
"github.com/golang/glog"
)
// EquivalenceCache holds:
// 1. a map of AlgorithmCache with node name as key
// 2. function to get equivalence pod
type EquivalenceCache struct {
mu sync.RWMutex
algorithmCache map[string]AlgorithmCache
}
// The AlgorithmCache stores PredicateMap with predicate name as key, PredicateMap as value.
type AlgorithmCache map[string]PredicateMap
// PredicateMap stores HostPrediacte with equivalence hash as key
type PredicateMap map[uint64]HostPredicate
// HostPredicate is the cached predicate result
type HostPredicate struct {
Fit bool
FailReasons []algorithm.PredicateFailureReason
}
// NewEquivalenceCache returns EquivalenceCache to speed up predicates by caching
// result from previous scheduling.
func NewEquivalenceCache() *EquivalenceCache {
return &EquivalenceCache{
algorithmCache: make(map[string]AlgorithmCache),
}
}
// RunPredicate will return a cached predicate result. In case of a cache miss, the predicate will
// be run and its results cached for the next call.
//
// NOTE: RunPredicate will not update the equivalence cache if the given NodeInfo is stale.
func (ec *EquivalenceCache) RunPredicate(
pred algorithm.FitPredicate,
predicateKey string,
pod *v1.Pod,
meta algorithm.PredicateMetadata,
nodeInfo *schedulercache.NodeInfo,
equivClassInfo *equivalenceClassInfo,
cache schedulercache.Cache,
) (bool, []algorithm.PredicateFailureReason, error) {
if nodeInfo == nil || nodeInfo.Node() == nil {
// This may happen during tests.
return false, []algorithm.PredicateFailureReason{}, fmt.Errorf("nodeInfo is nil or node is invalid")
}
fit, reasons, invalid := ec.lookupResult(pod.GetName(), nodeInfo.Node().GetName(), predicateKey, equivClassInfo.hash)
if !invalid {
return fit, reasons, nil
}
fit, reasons, err := pred(pod, meta, nodeInfo)
if err != nil {
return fit, reasons, err
}
if cache != nil {
ec.updateResult(pod.GetName(), predicateKey, fit, reasons, equivClassInfo.hash, cache, nodeInfo)
}
return fit, reasons, nil
}
// updateResult updates the cached result of a predicate.
func (ec *EquivalenceCache) updateResult(
podName, predicateKey string,
fit bool,
reasons []algorithm.PredicateFailureReason,
equivalenceHash uint64,
cache schedulercache.Cache,
nodeInfo *schedulercache.NodeInfo,
) {
ec.mu.Lock()
defer ec.mu.Unlock()
if nodeInfo == nil || nodeInfo.Node() == nil {
// This may happen during tests.
return
}
// Skip update if NodeInfo is stale.
if !cache.IsUpToDate(nodeInfo) {
return
}
nodeName := nodeInfo.Node().GetName()
if _, exist := ec.algorithmCache[nodeName]; !exist {
ec.algorithmCache[nodeName] = AlgorithmCache{}
}
predicateItem := HostPredicate{
Fit: fit,
FailReasons: reasons,
}
// if cached predicate map already exists, just update the predicate by key
if predicateMap, ok := ec.algorithmCache[nodeName][predicateKey]; ok {
// maps in golang are references, no need to add them back
predicateMap[equivalenceHash] = predicateItem
} else {
ec.algorithmCache[nodeName][predicateKey] =
PredicateMap{
equivalenceHash: predicateItem,
}
}
glog.V(5).Infof("Updated cached predicate: %v for pod: %v on node: %s, with item %v", predicateKey, podName, nodeName, predicateItem)
}
// lookupResult returns cached predicate results:
// 1. if pod fit
// 2. reasons if pod did not fit
// 3. if cache item is not found
func (ec *EquivalenceCache) lookupResult(
podName, nodeName, predicateKey string,
equivalenceHash uint64,
) (bool, []algorithm.PredicateFailureReason, bool) {
ec.mu.RLock()
defer ec.mu.RUnlock()
glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache",
predicateKey, podName, nodeName)
if hostPredicate, exist := ec.algorithmCache[nodeName][predicateKey][equivalenceHash]; exist {
if hostPredicate.Fit {
return true, []algorithm.PredicateFailureReason{}, false
}
return false, hostPredicate.FailReasons, false
}
// is invalid
return false, []algorithm.PredicateFailureReason{}, true
}
// InvalidateCachedPredicateItem marks all items of given predicateKeys, of all pods, on the given node as invalid
func (ec *EquivalenceCache) InvalidateCachedPredicateItem(nodeName string, predicateKeys sets.String) {
if len(predicateKeys) == 0 {
return
}
ec.mu.Lock()
defer ec.mu.Unlock()
for predicateKey := range predicateKeys {
delete(ec.algorithmCache[nodeName], predicateKey)
}
glog.V(5).Infof("Done invalidating cached predicates: %v on node: %s", predicateKeys, nodeName)
}
// InvalidateCachedPredicateItemOfAllNodes marks all items of given predicateKeys, of all pods, on all node as invalid
func (ec *EquivalenceCache) InvalidateCachedPredicateItemOfAllNodes(predicateKeys sets.String) {
if len(predicateKeys) == 0 {
return
}
ec.mu.Lock()
defer ec.mu.Unlock()
// algorithmCache uses nodeName as key, so we just iterate it and invalid given predicates
for _, algorithmCache := range ec.algorithmCache {
for predicateKey := range predicateKeys {
delete(algorithmCache, predicateKey)
}
}
glog.V(5).Infof("Done invalidating cached predicates: %v on all node", predicateKeys)
}
// InvalidateAllCachedPredicateItemOfNode marks all cached items on given node as invalid
func (ec *EquivalenceCache) InvalidateAllCachedPredicateItemOfNode(nodeName string) {
ec.mu.Lock()
defer ec.mu.Unlock()
delete(ec.algorithmCache, nodeName)
glog.V(5).Infof("Done invalidating all cached predicates on node: %s", nodeName)
}
// InvalidateCachedPredicateItemForPodAdd is a wrapper of InvalidateCachedPredicateItem for pod add case
func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod, nodeName string) {
// MatchInterPodAffinity: we assume scheduler can make sure newly bound pod
// will not break the existing inter pod affinity. So we does not need to invalidate
// MatchInterPodAffinity when pod added.
//
// But when a pod is deleted, existing inter pod affinity may become invalid.
// (e.g. this pod was preferred by some else, or vice versa)
//
// NOTE: assumptions above will not stand when we implemented features like
// RequiredDuringSchedulingRequiredDuringExecution.
// NoDiskConflict: the newly scheduled pod fits to existing pods on this node,
// it will also fits to equivalence class of existing pods
// GeneralPredicates: will always be affected by adding a new pod
invalidPredicates := sets.NewString(predicates.GeneralPred)
// MaxPDVolumeCountPredicate: we check the volumes of pod to make decision.
for _, vol := range pod.Spec.Volumes {
if vol.PersistentVolumeClaim != nil {
invalidPredicates.Insert(predicates.MaxEBSVolumeCountPred, predicates.MaxGCEPDVolumeCountPred, predicates.MaxAzureDiskVolumeCountPred)
} else {
if vol.AWSElasticBlockStore != nil {
invalidPredicates.Insert(predicates.MaxEBSVolumeCountPred)
}
if vol.GCEPersistentDisk != nil {
invalidPredicates.Insert(predicates.MaxGCEPDVolumeCountPred)
}
if vol.AzureDisk != nil {
invalidPredicates.Insert(predicates.MaxAzureDiskVolumeCountPred)
}
}
}
ec.InvalidateCachedPredicateItem(nodeName, invalidPredicates)
}
// equivalenceClassInfo holds equivalence hash which is used for checking equivalence cache.
// We will pass this to podFitsOnNode to ensure equivalence hash is only calculated per schedule.
type equivalenceClassInfo struct {
// Equivalence hash.
hash uint64
}
// getEquivalenceClassInfo returns a hash of the given pod.
// The hashing function returns the same value for any two pods that are
// equivalent from the perspective of scheduling.
func (ec *EquivalenceCache) getEquivalenceClassInfo(pod *v1.Pod) *equivalenceClassInfo {
equivalencePod := getEquivalenceHash(pod)
if equivalencePod != nil {
hash := fnv.New32a()
hashutil.DeepHashObject(hash, equivalencePod)
return &equivalenceClassInfo{
hash: uint64(hash.Sum32()),
}
}
return nil
}
// equivalencePod is the set of pod attributes which must match for two pods to
// be considered equivalent for scheduling purposes. For correctness, this must
// include any Pod field which is used by a FitPredicate.
//
// NOTE: For equivalence hash to be formally correct, lists and maps in the
// equivalencePod should be normalized. (e.g. by sorting them) However, the
// vast majority of equivalent pod classes are expected to be created from a
// single pod template, so they will all have the same ordering.
type equivalencePod struct {
Namespace *string
Labels map[string]string
Affinity *v1.Affinity
Containers []v1.Container // See note about ordering
InitContainers []v1.Container // See note about ordering
NodeName *string
NodeSelector map[string]string
Tolerations []v1.Toleration
Volumes []v1.Volume // See note about ordering
}
// getEquivalenceHash returns the equivalencePod for a Pod.
func getEquivalenceHash(pod *v1.Pod) *equivalencePod {
ep := &equivalencePod{
Namespace: &pod.Namespace,
Labels: pod.Labels,
Affinity: pod.Spec.Affinity,
Containers: pod.Spec.Containers,
InitContainers: pod.Spec.InitContainers,
NodeName: &pod.Spec.NodeName,
NodeSelector: pod.Spec.NodeSelector,
Tolerations: pod.Spec.Tolerations,
Volumes: pod.Spec.Volumes,
}
// DeepHashObject considers nil and empty slices to be different. Normalize them.
if len(ep.Containers) == 0 {
ep.Containers = nil
}
if len(ep.InitContainers) == 0 {
ep.InitContainers = nil
}
if len(ep.Tolerations) == 0 {
ep.Tolerations = nil
}
if len(ep.Volumes) == 0 {
ep.Volumes = nil
}
// Normalize empty maps also.
if len(ep.Labels) == 0 {
ep.Labels = nil
}
if len(ep.NodeSelector) == 0 {
ep.NodeSelector = nil
}
// TODO(misterikkit): Also normalize nested maps and slices.
return ep
}

View File

@@ -454,7 +454,7 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
// because of the errors from errorPredicateExtender and/or
// errorPrioritizerExtender.
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
prioritizers: []algorithm.PriorityConfig{{Function: machine2Prioritizer, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{errorPredicateExtender},
@@ -492,43 +492,46 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}
for _, test := range tests {
extenders := []algorithm.SchedulerExtender{}
for ii := range test.extenders {
extenders = append(extenders, &test.extenders[ii])
}
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, name := range test.nodes {
cache.AddNode(createNode(name))
}
queue := NewSchedulingQueue()
scheduler := NewGenericScheduler(
cache,
nil,
queue,
test.predicates,
algorithm.EmptyPredicateMetadataProducer,
test.prioritizers,
algorithm.EmptyPriorityMetadataProducer,
extenders,
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
podIgnored := &v1.Pod{}
machine, err := scheduler.Schedule(podIgnored, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if test.expectsErr {
if err == nil {
t.Errorf("Unexpected non-error for %s, machine %s", test.name, machine)
t.Run(test.name, func(t *testing.T) {
extenders := []algorithm.SchedulerExtender{}
for ii := range test.extenders {
extenders = append(extenders, &test.extenders[ii])
}
} else {
if err != nil {
t.Errorf("Unexpected error: %v", err)
continue
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, name := range test.nodes {
cache.AddNode(createNode(name))
}
if test.expectedHost != machine {
t.Errorf("Failed : %s, Expected: %s, Saw: %s", test.name, test.expectedHost, machine)
queue := NewSchedulingQueue()
scheduler := NewGenericScheduler(
cache,
nil,
queue,
test.predicates,
algorithm.EmptyPredicateMetadataProducer,
test.prioritizers,
algorithm.EmptyPriorityMetadataProducer,
extenders,
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false,
schedulerapi.DefaultPercentageOfNodesToScore)
podIgnored := &v1.Pod{}
machine, err := scheduler.Schedule(podIgnored, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if test.expectsErr {
if err == nil {
t.Errorf("Unexpected non-error, machine %s", machine)
}
} else {
if err != nil {
t.Errorf("Unexpected error: %v", err)
return
}
if test.expectedHost != machine {
t.Errorf("Expected: %s, Saw: %s", test.expectedHost, machine)
}
}
}
})
}
}

View File

@@ -17,6 +17,7 @@ limitations under the License.
package core
import (
"context"
"fmt"
"math"
"sort"
@@ -39,11 +40,20 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core/equivalence"
"k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
)
const (
// minFeasibleNodesToFind is the minimum number of nodes that would be scored
// in each scheduling cycle. This is a semi-arbitrary value to ensure that a
// certain minimum of nodes are checked for feasibility. This in turn helps
// ensure a minimum level of spreading.
minFeasibleNodesToFind = 100
)
// FailedPredicateMap declares a map[string][]algorithm.PredicateFailureReason type.
type FailedPredicateMap map[string][]algorithm.PredicateFailureReason
@@ -85,20 +95,20 @@ func (f *FitError) Error() string {
type genericScheduler struct {
cache schedulercache.Cache
equivalenceCache *EquivalenceCache
equivalenceCache *equivalence.Cache
schedulingQueue SchedulingQueue
predicates map[string]algorithm.FitPredicate
priorityMetaProducer algorithm.PriorityMetadataProducer
predicateMetaProducer algorithm.PredicateMetadataProducer
prioritizers []algorithm.PriorityConfig
extenders []algorithm.SchedulerExtender
lastNodeIndexLock sync.Mutex
lastNodeIndex uint64
alwaysCheckAllPredicates bool
cachedNodeInfoMap map[string]*schedulercache.NodeInfo
volumeBinder *volumebinder.VolumeBinder
pvcLister corelisters.PersistentVolumeClaimLister
disablePreemption bool
percentageOfNodesToScore int32
}
// Schedule tries to schedule the given pod to one of the nodes in the node list.
@@ -175,6 +185,22 @@ func (g *genericScheduler) Predicates() map[string]algorithm.FitPredicate {
return g.predicates
}
// findMaxScores returns the indexes of nodes in the "priorityList" that has the highest "Score".
func findMaxScores(priorityList schedulerapi.HostPriorityList) []int {
maxScoreIndexes := make([]int, 0, len(priorityList)/2)
maxScore := priorityList[0].Score
for i, hp := range priorityList {
if hp.Score > maxScore {
maxScore = hp.Score
maxScoreIndexes = maxScoreIndexes[:0]
maxScoreIndexes = append(maxScoreIndexes, i)
} else if hp.Score == maxScore {
maxScoreIndexes = append(maxScoreIndexes, i)
}
}
return maxScoreIndexes
}
// selectHost takes a prioritized list of nodes and then picks one
// in a round-robin manner from the nodes that had the highest score.
func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList) (string, error) {
@@ -182,16 +208,11 @@ func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList
return "", fmt.Errorf("empty priorityList")
}
sort.Sort(sort.Reverse(priorityList))
maxScore := priorityList[0].Score
firstAfterMaxScore := sort.Search(len(priorityList), func(i int) bool { return priorityList[i].Score < maxScore })
g.lastNodeIndexLock.Lock()
ix := int(g.lastNodeIndex % uint64(firstAfterMaxScore))
maxScores := findMaxScores(priorityList)
ix := int(g.lastNodeIndex % uint64(len(maxScores)))
g.lastNodeIndex++
g.lastNodeIndexLock.Unlock()
return priorityList[ix].Host, nil
return priorityList[maxScores[ix]].Host, nil
}
// preempt finds nodes with pods that can be preempted to make room for "pod" to
@@ -211,7 +232,7 @@ func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister,
return nil, nil, nil, err
}
if !podEligibleToPreemptOthers(pod, g.cachedNodeInfoMap) {
glog.V(5).Infof("Pod %v is not eligible for more preemption.", pod.Name)
glog.V(5).Infof("Pod %v/%v is not eligible for more preemption.", pod.Namespace, pod.Name)
return nil, nil, nil, nil
}
allNodes, err := nodeLister.List()
@@ -221,9 +242,9 @@ func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister,
if len(allNodes) == 0 {
return nil, nil, nil, ErrNoNodesAvailable
}
potentialNodes := nodesWherePreemptionMightHelp(pod, allNodes, fitError.FailedPredicates)
potentialNodes := nodesWherePreemptionMightHelp(allNodes, fitError.FailedPredicates)
if len(potentialNodes) == 0 {
glog.V(3).Infof("Preemption will not help schedule pod %v on any node.", pod.Name)
glog.V(3).Infof("Preemption will not help schedule pod %v/%v on any node.", pod.Namespace, pod.Name)
// In this case, we should clean-up any existing nominated node name of the pod.
return nil, nil, []*v1.Pod{pod}, nil
}
@@ -271,7 +292,7 @@ func (g *genericScheduler) processPreemptionWithExtenders(
) (map[*v1.Node]*schedulerapi.Victims, error) {
if len(nodeToVictims) > 0 {
for _, extender := range g.extenders {
if extender.SupportsPreemption() {
if extender.SupportsPreemption() && extender.IsInterested(pod) {
newNodeToVictims, err := extender.ProcessPreemption(
pod,
nodeToVictims,
@@ -325,6 +346,20 @@ func (g *genericScheduler) getLowerPriorityNominatedPods(pod *v1.Pod, nodeName s
return lowerPriorityPods
}
// numFeasibleNodesToFind returns the number of feasible nodes that once found, the scheduler stops
// its search for more feasible nodes.
func (g *genericScheduler) numFeasibleNodesToFind(numAllNodes int32) int32 {
if numAllNodes < minFeasibleNodesToFind || g.percentageOfNodesToScore <= 0 ||
g.percentageOfNodesToScore >= 100 {
return numAllNodes
}
numNodes := numAllNodes * g.percentageOfNodesToScore / 100
if numNodes < minFeasibleNodesToFind {
return minFeasibleNodesToFind
}
return numNodes
}
// Filters the nodes to find the ones that fit based on the given predicate functions
// Each node is passed through the predicate functions to determine if it is a fit
func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v1.Node, FailedPredicateMap, error) {
@@ -334,34 +369,45 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v
if len(g.predicates) == 0 {
filtered = nodes
} else {
allNodes := int32(g.cache.NodeTree().NumNodes)
numNodesToFind := g.numFeasibleNodesToFind(allNodes)
// Create filtered list with enough space to avoid growing it
// and allow assigning.
filtered = make([]*v1.Node, len(nodes))
filtered = make([]*v1.Node, numNodesToFind)
errs := errors.MessageCountMap{}
var predicateResultLock sync.Mutex
var filteredLen int32
var (
predicateResultLock sync.Mutex
filteredLen int32
equivClass *equivalence.Class
)
ctx, cancel := context.WithCancel(context.Background())
// We can use the same metadata producer for all nodes.
meta := g.predicateMetaProducer(pod, g.cachedNodeInfoMap)
var equivCacheInfo *equivalenceClassInfo
if g.equivalenceCache != nil {
// getEquivalenceClassInfo will return immediately if no equivalence pod found
equivCacheInfo = g.equivalenceCache.getEquivalenceClassInfo(pod)
equivClass = equivalence.NewClass(pod)
}
checkNode := func(i int) {
nodeName := nodes[i].Name
var nodeCache *equivalence.NodeCache
nodeName := g.cache.NodeTree().Next()
if g.equivalenceCache != nil {
nodeCache, _ = g.equivalenceCache.GetNodeCache(nodeName)
}
fits, failedPredicates, err := podFitsOnNode(
pod,
meta,
g.cachedNodeInfoMap[nodeName],
g.predicates,
g.cache,
g.equivalenceCache,
nodeCache,
g.schedulingQueue,
g.alwaysCheckAllPredicates,
equivCacheInfo,
equivClass,
)
if err != nil {
predicateResultLock.Lock()
@@ -370,14 +416,24 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v
return
}
if fits {
filtered[atomic.AddInt32(&filteredLen, 1)-1] = nodes[i]
length := atomic.AddInt32(&filteredLen, 1)
if length > numNodesToFind {
cancel()
atomic.AddInt32(&filteredLen, -1)
} else {
filtered[length-1] = g.cachedNodeInfoMap[nodeName].Node()
}
} else {
predicateResultLock.Lock()
failedPredicateMap[nodeName] = failedPredicates
predicateResultLock.Unlock()
}
}
workqueue.Parallelize(16, len(nodes), checkNode)
// Stops searching for more nodes once the configured number of feasible nodes
// are found.
workqueue.ParallelizeUntil(ctx, 16, int(allNodes), checkNode)
filtered = filtered[:filteredLen]
if len(errs) > 0 {
return []*v1.Node{}, FailedPredicateMap{}, errors.CreateAggregateFromMessageCountMap(errs)
@@ -461,10 +517,10 @@ func podFitsOnNode(
info *schedulercache.NodeInfo,
predicateFuncs map[string]algorithm.FitPredicate,
cache schedulercache.Cache,
ecache *EquivalenceCache,
nodeCache *equivalence.NodeCache,
queue SchedulingQueue,
alwaysCheckAllPredicates bool,
equivCacheInfo *equivalenceClassInfo,
equivClass *equivalence.Class,
) (bool, []algorithm.PredicateFailureReason, error) {
var (
eCacheAvailable bool
@@ -501,7 +557,7 @@ func podFitsOnNode(
// Bypass eCache if node has any nominated pods.
// TODO(bsalamat): consider using eCache and adding proper eCache invalidations
// when pods are nominated or their nominations change.
eCacheAvailable = equivCacheInfo != nil && !podsAdded
eCacheAvailable = equivClass != nil && nodeCache != nil && !podsAdded
for _, predicateKey := range predicates.Ordering() {
var (
fit bool
@@ -511,7 +567,7 @@ func podFitsOnNode(
//TODO (yastij) : compute average predicate restrictiveness to export it as Prometheus metric
if predicate, exist := predicateFuncs[predicateKey]; exist {
if eCacheAvailable {
fit, reasons, err = ecache.RunPredicate(predicate, predicateKey, pod, metaToUse, nodeInfoToUse, equivCacheInfo, cache)
fit, reasons, err = nodeCache.RunPredicate(predicate, predicateKey, pod, metaToUse, nodeInfoToUse, equivClass, cache)
} else {
fit, reasons, err = predicate(pod, metaToUse, nodeInfoToUse)
}
@@ -524,8 +580,8 @@ func podFitsOnNode(
failedPredicates = append(failedPredicates, reasons...)
// if alwaysCheckAllPredicates is false, short circuit all predicates when one predicate fails.
if !alwaysCheckAllPredicates {
glog.V(5).Infoln("since alwaysCheckAllPredicates has not been set, the predicate" +
"evaluation is short circuited and there are chances" +
glog.V(5).Infoln("since alwaysCheckAllPredicates has not been set, the predicate " +
"evaluation is short circuited and there are chances " +
"of other predicates failing as well.")
break
}
@@ -594,6 +650,7 @@ func PrioritizeNodes(
results[i] = make(schedulerapi.HostPriorityList, len(nodes))
}
}
processNode := func(index int) {
nodeInfo := nodeNameToInfo[nodes[index].Name]
var err error
@@ -604,7 +661,7 @@ func PrioritizeNodes(
results[i][index], err = priorityConfigs[i].Map(pod, meta, nodeInfo)
if err != nil {
appendError(err)
return
results[i][index].Host = nodes[index].Name
}
}
}
@@ -971,7 +1028,7 @@ func selectVictimsOnNode(
// nodesWherePreemptionMightHelp returns a list of nodes with failed predicates
// that may be satisfied by removing pods from the node.
func nodesWherePreemptionMightHelp(pod *v1.Pod, nodes []*v1.Node, failedPredicatesMap FailedPredicateMap) []*v1.Node {
func nodesWherePreemptionMightHelp(nodes []*v1.Node, failedPredicatesMap FailedPredicateMap) []*v1.Node {
potentialNodes := []*v1.Node{}
for _, node := range nodes {
unresolvableReasonExist := false
@@ -985,11 +1042,19 @@ func nodesWherePreemptionMightHelp(pod *v1.Pod, nodes []*v1.Node, failedPredicat
switch failedPredicate {
case
predicates.ErrNodeSelectorNotMatch,
predicates.ErrPodAffinityRulesNotMatch,
predicates.ErrPodNotMatchHostName,
predicates.ErrTaintsTolerationsNotMatch,
predicates.ErrNodeLabelPresenceViolated,
// Node conditions won't change when scheduler simulates removal of preemption victims.
// So, it is pointless to try nodes that have not been able to host the pod due to node
// conditions. These include ErrNodeNotReady, ErrNodeUnderPIDPressure, ErrNodeUnderMemoryPressure, ....
predicates.ErrNodeNotReady,
predicates.ErrNodeNetworkUnavailable,
predicates.ErrNodeUnderDiskPressure,
predicates.ErrNodeUnderPIDPressure,
predicates.ErrNodeUnderMemoryPressure,
predicates.ErrNodeOutOfDisk,
predicates.ErrNodeUnschedulable,
predicates.ErrNodeUnknownCondition,
predicates.ErrVolumeZoneConflict,
@@ -997,7 +1062,6 @@ func nodesWherePreemptionMightHelp(pod *v1.Pod, nodes []*v1.Node, failedPredicat
predicates.ErrVolumeBindConflict:
unresolvableReasonExist = true
break
// TODO(bsalamat): Please add affinity failure cases once we have specific affinity failure errors.
}
}
if !found || !unresolvableReasonExist {
@@ -1058,7 +1122,7 @@ func podPassesBasicChecks(pod *v1.Pod, pvcLister corelisters.PersistentVolumeCla
// NewGenericScheduler creates a genericScheduler object.
func NewGenericScheduler(
cache schedulercache.Cache,
eCache *EquivalenceCache,
eCache *equivalence.Cache,
podQueue SchedulingQueue,
predicates map[string]algorithm.FitPredicate,
predicateMetaProducer algorithm.PredicateMetadataProducer,
@@ -1069,6 +1133,7 @@ func NewGenericScheduler(
pvcLister corelisters.PersistentVolumeClaimLister,
alwaysCheckAllPredicates bool,
disablePreemption bool,
percentageOfNodesToScore int32,
) algorithm.ScheduleAlgorithm {
return &genericScheduler{
cache: cache,
@@ -1084,5 +1149,6 @@ func NewGenericScheduler(
pvcLister: pvcLister,
alwaysCheckAllPredicates: alwaysCheckAllPredicates,
disablePreemption: disablePreemption,
percentageOfNodesToScore: percentageOfNodesToScore,
}
}

View File

@@ -22,15 +22,16 @@ import (
"reflect"
"strconv"
"strings"
"sync"
"testing"
"time"
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
@@ -39,11 +40,13 @@ import (
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core/equivalence"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
var (
order = []string{"false", "true", "matches", "nopods", algorithmpredicates.MatchInterPodAffinityPred}
errPrioritize = fmt.Errorf("priority map encounters an error")
order = []string{"false", "true", "matches", "nopods", algorithmpredicates.MatchInterPodAffinityPred}
)
func falsePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
@@ -110,6 +113,26 @@ func reverseNumericPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercac
return reverseResult, nil
}
func trueMapPriority(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
return schedulerapi.HostPriority{
Host: nodeInfo.Node().Name,
Score: 1,
}, nil
}
func falseMapPriority(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
return schedulerapi.HostPriority{}, errPrioritize
}
func getNodeReducePriority(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
for _, host := range result {
if host.Host == "" {
return fmt.Errorf("unexpected empty host name")
}
}
return nil
}
func makeNodeList(nodeNames []string) []*v1.Node {
result := make([]*v1.Node, 0, len(nodeNames))
for _, nodeName := range nodeNames {
@@ -121,11 +144,13 @@ func makeNodeList(nodeNames []string) []*v1.Node {
func TestSelectHost(t *testing.T) {
scheduler := genericScheduler{}
tests := []struct {
name string
list schedulerapi.HostPriorityList
possibleHosts sets.String
expectsErr bool
}{
{
name: "unique properly ordered scores",
list: []schedulerapi.HostPriority{
{Host: "machine1.1", Score: 1},
{Host: "machine2.1", Score: 2},
@@ -133,8 +158,8 @@ func TestSelectHost(t *testing.T) {
possibleHosts: sets.NewString("machine2.1"),
expectsErr: false,
},
// equal scores
{
name: "equal scores",
list: []schedulerapi.HostPriority{
{Host: "machine1.1", Score: 1},
{Host: "machine1.2", Score: 2},
@@ -144,8 +169,8 @@ func TestSelectHost(t *testing.T) {
possibleHosts: sets.NewString("machine1.2", "machine1.3", "machine2.1"),
expectsErr: false,
},
// out of order scores
{
name: "out of order scores",
list: []schedulerapi.HostPriority{
{Host: "machine1.1", Score: 3},
{Host: "machine1.2", Score: 3},
@@ -156,8 +181,8 @@ func TestSelectHost(t *testing.T) {
possibleHosts: sets.NewString("machine1.1", "machine1.2", "machine1.3"),
expectsErr: false,
},
// empty priorityList
{
name: "empty priority list",
list: []schedulerapi.HostPriority{},
possibleHosts: sets.NewString(),
expectsErr: true,
@@ -165,22 +190,24 @@ func TestSelectHost(t *testing.T) {
}
for _, test := range tests {
// increase the randomness
for i := 0; i < 10; i++ {
got, err := scheduler.selectHost(test.list)
if test.expectsErr {
if err == nil {
t.Error("Unexpected non-error")
}
} else {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if !test.possibleHosts.Has(got) {
t.Errorf("got %s is not in the possible map %v", got, test.possibleHosts)
t.Run(test.name, func(t *testing.T) {
// increase the randomness
for i := 0; i < 10; i++ {
got, err := scheduler.selectHost(test.list)
if test.expectsErr {
if err == nil {
t.Error("Unexpected non-error")
}
} else {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if !test.possibleHosts.Has(got) {
t.Errorf("got %s is not in the possible map %v", got, test.possibleHosts)
}
}
}
}
})
}
}
@@ -394,41 +421,52 @@ func TestGenericScheduler(t *testing.T) {
},
},
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: falseMapPriority, Weight: 1}, {Map: trueMapPriority, Reduce: getNodeReducePriority, Weight: 2}},
nodes: []string{"2", "1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
name: "test error with priority map",
wErr: errors.NewAggregate([]error{errPrioritize, errPrioritize}),
},
}
for _, test := range tests {
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, pod := range test.pods {
cache.AddPod(pod)
}
for _, name := range test.nodes {
cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}})
}
pvcs := []*v1.PersistentVolumeClaim{}
pvcs = append(pvcs, test.pvcs...)
t.Run(test.name, func(t *testing.T) {
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, pod := range test.pods {
cache.AddPod(pod)
}
for _, name := range test.nodes {
cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}})
}
pvcs := []*v1.PersistentVolumeClaim{}
pvcs = append(pvcs, test.pvcs...)
pvcLister := schedulertesting.FakePersistentVolumeClaimLister(pvcs)
pvcLister := schedulertesting.FakePersistentVolumeClaimLister(pvcs)
scheduler := NewGenericScheduler(
cache,
nil,
NewSchedulingQueue(),
test.predicates,
algorithm.EmptyPredicateMetadataProducer,
test.prioritizers,
algorithm.EmptyPriorityMetadataProducer,
[]algorithm.SchedulerExtender{},
nil,
pvcLister,
test.alwaysCheckAllPredicates,
false)
machine, err := scheduler.Schedule(test.pod, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
scheduler := NewGenericScheduler(
cache,
nil,
NewSchedulingQueue(),
test.predicates,
algorithm.EmptyPredicateMetadataProducer,
test.prioritizers,
algorithm.EmptyPriorityMetadataProducer,
[]algorithm.SchedulerExtender{},
nil,
pvcLister,
test.alwaysCheckAllPredicates,
false,
schedulerapi.DefaultPercentageOfNodesToScore)
machine, err := scheduler.Schedule(test.pod, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if !reflect.DeepEqual(err, test.wErr) {
t.Errorf("Failed : %s, Unexpected error: %v, expected: %v", test.name, err, test.wErr)
}
if test.expectedHosts != nil && !test.expectedHosts.Has(machine) {
t.Errorf("Failed : %s, Expected: %s, got: %s", test.name, test.expectedHosts, machine)
}
if !reflect.DeepEqual(err, test.wErr) {
t.Errorf("Unexpected error: %v, expected: %v", err, test.wErr)
}
if test.expectedHosts != nil && !test.expectedHosts.Has(machine) {
t.Errorf("Expected: %s, got: %s", test.expectedHosts, machine)
}
})
}
}
@@ -449,7 +487,8 @@ func makeScheduler(predicates map[string]algorithm.FitPredicate, nodes []*v1.Nod
algorithm.EmptyPredicateMetadataProducer,
prioritizers,
algorithm.EmptyPriorityMetadataProducer,
nil, nil, nil, false, false)
nil, nil, nil, false, false,
schedulerapi.DefaultPercentageOfNodesToScore)
cache.UpdateNodeNameToInfoMap(s.(*genericScheduler).cachedNodeInfoMap)
return s.(*genericScheduler)
@@ -471,13 +510,15 @@ func TestFindFitAllError(t *testing.T) {
}
for _, node := range nodes {
failures, found := predicateMap[node.Name]
if !found {
t.Errorf("failed to find node: %s in %v", node.Name, predicateMap)
}
if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate {
t.Errorf("unexpected failures: %v", failures)
}
t.Run(node.Name, func(t *testing.T) {
failures, found := predicateMap[node.Name]
if !found {
t.Errorf("failed to find node in %v", predicateMap)
}
if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate {
t.Errorf("unexpected failures: %v", failures)
}
})
}
}
@@ -501,13 +542,15 @@ func TestFindFitSomeError(t *testing.T) {
if node.Name == pod.Name {
continue
}
failures, found := predicateMap[node.Name]
if !found {
t.Errorf("failed to find node: %s in %v", node.Name, predicateMap)
}
if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate {
t.Errorf("unexpected failures: %v", failures)
}
t.Run(node.Name, func(t *testing.T) {
failures, found := predicateMap[node.Name]
if !found {
t.Errorf("failed to find node in %v", predicateMap)
}
if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate {
t.Errorf("unexpected failures: %v", failures)
}
})
}
}
@@ -599,10 +642,11 @@ func TestZeroRequest(t *testing.T) {
large2 := large
large2.NodeName = "machine2"
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
test string
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
name string
expectedScore int
}{
// The point of these next two tests is to show you get the same priority for a zero-request pod
// as for a pod with the defaults requests, both when the zero-request pod is already on the machine
@@ -610,76 +654,74 @@ func TestZeroRequest(t *testing.T) {
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
test: "test priority of zero-request pod with machine with zero-request pod",
name: "test priority of zero-request pod with machine with zero-request pod",
pods: []*v1.Pod{
{Spec: large1}, {Spec: noResources1},
{Spec: large2}, {Spec: small2},
},
expectedScore: 25,
},
{
pod: &v1.Pod{Spec: small},
nodes: []*v1.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
test: "test priority of nonzero-request pod with machine with zero-request pod",
name: "test priority of nonzero-request pod with machine with zero-request pod",
pods: []*v1.Pod{
{Spec: large1}, {Spec: noResources1},
{Spec: large2}, {Spec: small2},
},
expectedScore: 25,
},
// The point of this test is to verify that we're not just getting the same score no matter what we schedule.
{
pod: &v1.Pod{Spec: large},
nodes: []*v1.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
test: "test priority of larger pod with machine with zero-request pod",
name: "test priority of larger pod with machine with zero-request pod",
pods: []*v1.Pod{
{Spec: large1}, {Spec: noResources1},
{Spec: large2}, {Spec: small2},
},
expectedScore: 23,
},
}
const expectedPriority int = 25
for _, test := range tests {
// This should match the configuration in defaultPriorities() in
// pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
// to test what's actually in production.
priorityConfigs := []algorithm.PriorityConfig{
{Map: algorithmpriorities.LeastRequestedPriorityMap, Weight: 1},
{Map: algorithmpriorities.BalancedResourceAllocationMap, Weight: 1},
}
selectorSpreadPriorityMap, selectorSpreadPriorityReduce := algorithmpriorities.NewSelectorSpreadPriority(
schedulertesting.FakeServiceLister([]*v1.Service{}),
schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
pc := algorithm.PriorityConfig{Map: selectorSpreadPriorityMap, Reduce: selectorSpreadPriorityReduce, Weight: 1}
priorityConfigs = append(priorityConfigs, pc)
t.Run(test.name, func(t *testing.T) {
// This should match the configuration in defaultPriorities() in
// pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
// to test what's actually in production.
priorityConfigs := []algorithm.PriorityConfig{
{Map: algorithmpriorities.LeastRequestedPriorityMap, Weight: 1},
{Map: algorithmpriorities.BalancedResourceAllocationMap, Weight: 1},
}
selectorSpreadPriorityMap, selectorSpreadPriorityReduce := algorithmpriorities.NewSelectorSpreadPriority(
schedulertesting.FakeServiceLister([]*v1.Service{}),
schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
schedulertesting.FakeReplicaSetLister([]*apps.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
pc := algorithm.PriorityConfig{Map: selectorSpreadPriorityMap, Reduce: selectorSpreadPriorityReduce, Weight: 1}
priorityConfigs = append(priorityConfigs, pc)
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
mataDataProducer := algorithmpriorities.NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister([]*v1.Service{}),
schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
mataDataProducer := algorithmpriorities.NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister([]*v1.Service{}),
schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
schedulertesting.FakeReplicaSetLister([]*apps.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
list, err := PrioritizeNodes(
test.pod, nodeNameToInfo, mataData, priorityConfigs,
schedulertesting.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{})
if err != nil {
t.Errorf("unexpected error: %v", err)
}
for _, hp := range list {
if test.test == "test priority of larger pod with machine with zero-request pod" {
if hp.Score == expectedPriority {
t.Errorf("%s: expected non-%d for all priorities, got list %#v", test.test, expectedPriority, list)
}
} else {
if hp.Score != expectedPriority {
t.Errorf("%s: expected %d for all priorities, got list %#v", test.test, expectedPriority, list)
list, err := PrioritizeNodes(
test.pod, nodeNameToInfo, mataData, priorityConfigs,
schedulertesting.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{})
if err != nil {
t.Errorf("unexpected error: %v", err)
}
for _, hp := range list {
if hp.Score != test.expectedScore {
t.Errorf("expected %d for all priorities, got list %#v", test.expectedScore, list)
}
}
}
})
}
}
@@ -695,30 +737,30 @@ func printNodeToVictims(nodeToVictims map[*v1.Node]*schedulerapi.Victims) string
return output
}
func checkPreemptionVictims(testName string, expected map[string]map[string]bool, nodeToPods map[*v1.Node]*schedulerapi.Victims) error {
func checkPreemptionVictims(expected map[string]map[string]bool, nodeToPods map[*v1.Node]*schedulerapi.Victims) error {
if len(expected) == len(nodeToPods) {
for k, victims := range nodeToPods {
if expPods, ok := expected[k.Name]; ok {
if len(victims.Pods) != len(expPods) {
return fmt.Errorf("test [%v]: unexpected number of pods. expected: %v, got: %v", testName, expected, printNodeToVictims(nodeToPods))
return fmt.Errorf("unexpected number of pods. expected: %v, got: %v", expected, printNodeToVictims(nodeToPods))
}
prevPriority := int32(math.MaxInt32)
for _, p := range victims.Pods {
// Check that pods are sorted by their priority.
if *p.Spec.Priority > prevPriority {
return fmt.Errorf("test [%v]: pod %v of node %v was not sorted by priority", testName, p.Name, k)
return fmt.Errorf("pod %v of node %v was not sorted by priority", p.Name, k)
}
prevPriority = *p.Spec.Priority
if _, ok := expPods[p.Name]; !ok {
return fmt.Errorf("test [%v]: pod %v was not expected. Expected: %v", testName, p.Name, expPods)
return fmt.Errorf("pod %v was not expected. Expected: %v", p.Name, expPods)
}
}
} else {
return fmt.Errorf("test [%v]: unexpected machines. expected: %v, got: %v", testName, expected, printNodeToVictims(nodeToPods))
return fmt.Errorf("unexpected machines. expected: %v, got: %v", expected, printNodeToVictims(nodeToPods))
}
}
} else {
return fmt.Errorf("test [%v]: unexpected number of machines. expected: %v, got: %v", testName, expected, printNodeToVictims(nodeToPods))
return fmt.Errorf("unexpected number of machines. expected: %v, got: %v", expected, printNodeToVictims(nodeToPods))
}
return nil
}
@@ -879,7 +921,7 @@ func TestSelectNodesForPreemption(t *testing.T) {
Name: "machine1",
Labels: map[string]string{"pod": "preemptor"}}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a", Labels: map[string]string{"service": "securityscan"}}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1", Affinity: &v1.Affinity{
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a"), Labels: map[string]string{"service": "securityscan"}}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1", Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
@@ -904,23 +946,25 @@ func TestSelectNodesForPreemption(t *testing.T) {
},
}
for _, test := range tests {
nodes := []*v1.Node{}
for _, n := range test.nodes {
node := makeNode(n, 1000*5, priorityutil.DefaultMemoryRequest*5)
node.ObjectMeta.Labels = map[string]string{"hostname": node.Name}
nodes = append(nodes, node)
}
if test.addAffinityPredicate {
test.predicates[algorithmpredicates.MatchInterPodAffinityPred] = algorithmpredicates.NewPodAffinityPredicate(FakeNodeInfo(*nodes[0]), schedulertesting.FakePodLister(test.pods))
}
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nodes)
nodeToPods, err := selectNodesForPreemption(test.pod, nodeNameToInfo, nodes, test.predicates, PredicateMetadata, nil, nil)
if err != nil {
t.Error(err)
}
if err := checkPreemptionVictims(test.name, test.expected, nodeToPods); err != nil {
t.Error(err)
}
t.Run(test.name, func(t *testing.T) {
nodes := []*v1.Node{}
for _, n := range test.nodes {
node := makeNode(n, 1000*5, priorityutil.DefaultMemoryRequest*5)
node.ObjectMeta.Labels = map[string]string{"hostname": node.Name}
nodes = append(nodes, node)
}
if test.addAffinityPredicate {
test.predicates[algorithmpredicates.MatchInterPodAffinityPred] = algorithmpredicates.NewPodAffinityPredicate(FakeNodeInfo(*nodes[0]), schedulertesting.FakePodLister(test.pods))
}
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nodes)
nodeToPods, err := selectNodesForPreemption(test.pod, nodeNameToInfo, nodes, test.predicates, PredicateMetadata, nil, nil)
if err != nil {
t.Error(err)
}
if err := checkPreemptionVictims(test.expected, nodeToPods); err != nil {
t.Error(err)
}
})
}
}
@@ -1067,23 +1111,25 @@ func TestPickOneNodeForPreemption(t *testing.T) {
},
}
for _, test := range tests {
nodes := []*v1.Node{}
for _, n := range test.nodes {
nodes = append(nodes, makeNode(n, priorityutil.DefaultMilliCPURequest*5, priorityutil.DefaultMemoryRequest*5))
}
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nodes)
candidateNodes, _ := selectNodesForPreemption(test.pod, nodeNameToInfo, nodes, test.predicates, PredicateMetadata, nil, nil)
node := pickOneNodeForPreemption(candidateNodes)
found := false
for _, nodeName := range test.expected {
if node.Name == nodeName {
found = true
break
t.Run(test.name, func(t *testing.T) {
nodes := []*v1.Node{}
for _, n := range test.nodes {
nodes = append(nodes, makeNode(n, priorityutil.DefaultMilliCPURequest*5, priorityutil.DefaultMemoryRequest*5))
}
}
if !found {
t.Errorf("test [%v]: unexpected node: %v", test.name, node)
}
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nodes)
candidateNodes, _ := selectNodesForPreemption(test.pod, nodeNameToInfo, nodes, test.predicates, PredicateMetadata, nil, nil)
node := pickOneNodeForPreemption(candidateNodes)
found := false
for _, nodeName := range test.expected {
if node.Name == nodeName {
found = true
break
}
}
if !found {
t.Errorf("unexpected node: %v", node)
}
})
}
}
@@ -1097,7 +1143,6 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
tests := []struct {
name string
failedPredMap FailedPredicateMap
pod *v1.Pod
expected map[string]bool // set of expected node names. Value is ignored.
}{
{
@@ -1108,33 +1153,15 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrTaintsTolerationsNotMatch},
"machine4": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeLabelPresenceViolated},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}},
expected: map[string]bool{},
},
{
name: "pod affinity should be tried",
name: "ErrPodAffinityNotMatch should be tried as it indicates that the pod is unschedulable due to inter-pod affinity or anti-affinity",
failedPredMap: FailedPredicateMap{
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodAffinityNotMatch},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodNotMatchHostName},
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnschedulable},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "hostname",
},
},
}}}},
expected: map[string]bool{"machine1": true, "machine4": true},
},
{
@@ -1143,42 +1170,16 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodAffinityNotMatch},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodNotMatchHostName},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "hostname",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"blah", "foo"},
},
},
},
TopologyKey: "region",
},
},
},
}}},
expected: map[string]bool{"machine1": true, "machine3": true, "machine4": true},
},
{
name: "ErrPodAffinityRulesNotMatch should not be tried as it indicates that the pod is unschedulable due to inter-pod affinity, but ErrPodAffinityNotMatch should be tried as it indicates that the pod is unschedulable due to inter-pod affinity or anti-affinity",
failedPredMap: FailedPredicateMap{
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodAffinityRulesNotMatch},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodAffinityNotMatch},
},
expected: map[string]bool{"machine2": true, "machine3": true, "machine4": true},
},
{
name: "Mix of failed predicates works fine",
failedPredMap: FailedPredicateMap{
@@ -1187,21 +1188,50 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.NewInsufficientResourceError(v1.ResourceMemory, 1000, 600, 400)},
"machine4": []algorithm.PredicateFailureReason{},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}},
expected: map[string]bool{"machine3": true, "machine4": true},
},
{
name: "Node condition errors should be considered unresolvable",
failedPredMap: FailedPredicateMap{
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderDiskPressure},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderPIDPressure},
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderMemoryPressure},
"machine4": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeOutOfDisk},
},
expected: map[string]bool{},
},
{
name: "Node condition errors and ErrNodeUnknownCondition should be considered unresolvable",
failedPredMap: FailedPredicateMap{
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeNotReady},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeNetworkUnavailable},
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnknownCondition},
},
expected: map[string]bool{"machine4": true},
},
{
name: "ErrVolume... errors should not be tried as it indicates that the pod is unschedulable due to no matching volumes for pod on node",
failedPredMap: FailedPredicateMap{
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrVolumeZoneConflict},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrVolumeNodeConflict},
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrVolumeBindConflict},
},
expected: map[string]bool{"machine4": true},
},
}
for _, test := range tests {
nodes := nodesWherePreemptionMightHelp(test.pod, makeNodeList(nodeNames), test.failedPredMap)
if len(test.expected) != len(nodes) {
t.Errorf("test [%v]:number of nodes is not the same as expected. exptectd: %d, got: %d. Nodes: %v", test.name, len(test.expected), len(nodes), nodes)
}
for _, node := range nodes {
if _, found := test.expected[node.Name]; !found {
t.Errorf("test [%v]: node %v is not expected.", test.name, node.Name)
t.Run(test.name, func(t *testing.T) {
nodes := nodesWherePreemptionMightHelp(makeNodeList(nodeNames), test.failedPredMap)
if len(test.expected) != len(nodes) {
t.Errorf("number of nodes is not the same as expected. exptectd: %d, got: %d. Nodes: %v", len(test.expected), len(nodes), nodes)
}
}
for _, node := range nodes {
if _, found := test.expected[node.Name]; !found {
t.Errorf("node %v is not expected.", node.Name)
}
}
})
}
}
@@ -1321,78 +1351,204 @@ func TestPreempt(t *testing.T) {
expectedNode: "machine1",
expectedPods: []string{"m1.1", "m1.2"},
},
{
name: "One scheduler extender allows only machine1, but it is not interested in given pod, otherwise machine1 would have been chosen",
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
Containers: veryLargeContainers,
Priority: &highPriority},
},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
},
extenders: []*FakeExtender{
{
predicates: []fitPredicate{machine1PredicateExtender},
unInterested: true,
},
{
predicates: []fitPredicate{truePredicateExtender},
},
},
expectedNode: "machine3",
expectedPods: []string{},
},
}
for _, test := range tests {
stop := make(chan struct{})
cache := schedulercache.New(time.Duration(0), stop)
for _, pod := range test.pods {
cache.AddPod(pod)
}
cachedNodeInfoMap := map[string]*schedulercache.NodeInfo{}
for _, name := range nodeNames {
node := makeNode(name, 1000*5, priorityutil.DefaultMemoryRequest*5)
cache.AddNode(node)
t.Run(test.name, func(t *testing.T) {
stop := make(chan struct{})
cache := schedulercache.New(time.Duration(0), stop)
for _, pod := range test.pods {
cache.AddPod(pod)
}
cachedNodeInfoMap := map[string]*schedulercache.NodeInfo{}
for _, name := range nodeNames {
node := makeNode(name, 1000*5, priorityutil.DefaultMemoryRequest*5)
cache.AddNode(node)
// Set nodeInfo to extenders to mock extenders' cache for preemption.
cachedNodeInfo := schedulercache.NewNodeInfo()
cachedNodeInfo.SetNode(node)
cachedNodeInfoMap[name] = cachedNodeInfo
}
extenders := []algorithm.SchedulerExtender{}
for _, extender := range test.extenders {
// Set nodeInfoMap as extenders cached node information.
extender.cachedNodeNameToInfo = cachedNodeInfoMap
extenders = append(extenders, extender)
}
scheduler := NewGenericScheduler(
cache,
nil,
NewSchedulingQueue(),
map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
algorithm.EmptyPredicateMetadataProducer,
[]algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
algorithm.EmptyPriorityMetadataProducer,
extenders,
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
// Call Preempt and check the expected results.
node, victims, _, err := scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap}))
if err != nil {
t.Errorf("test [%v]: unexpected error in preemption: %v", test.name, err)
}
if (node != nil && node.Name != test.expectedNode) || (node == nil && len(test.expectedNode) != 0) {
t.Errorf("test [%v]: expected node: %v, got: %v", test.name, test.expectedNode, node)
}
if len(victims) != len(test.expectedPods) {
t.Errorf("test [%v]: expected %v pods, got %v.", test.name, len(test.expectedPods), len(victims))
}
for _, victim := range victims {
found := false
for _, expPod := range test.expectedPods {
if expPod == victim.Name {
found = true
break
// Set nodeInfo to extenders to mock extenders' cache for preemption.
cachedNodeInfo := schedulercache.NewNodeInfo()
cachedNodeInfo.SetNode(node)
cachedNodeInfoMap[name] = cachedNodeInfo
}
extenders := []algorithm.SchedulerExtender{}
for _, extender := range test.extenders {
// Set nodeInfoMap as extenders cached node information.
extender.cachedNodeNameToInfo = cachedNodeInfoMap
extenders = append(extenders, extender)
}
scheduler := NewGenericScheduler(
cache,
nil,
NewSchedulingQueue(),
map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
algorithm.EmptyPredicateMetadataProducer,
[]algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
algorithm.EmptyPriorityMetadataProducer,
extenders,
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false,
schedulerapi.DefaultPercentageOfNodesToScore)
// Call Preempt and check the expected results.
node, victims, _, err := scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap}))
if err != nil {
t.Errorf("unexpected error in preemption: %v", err)
}
if (node != nil && node.Name != test.expectedNode) || (node == nil && len(test.expectedNode) != 0) {
t.Errorf("expected node: %v, got: %v", test.expectedNode, node.GetName())
}
if len(victims) != len(test.expectedPods) {
t.Errorf("expected %v pods, got %v.", len(test.expectedPods), len(victims))
}
for _, victim := range victims {
found := false
for _, expPod := range test.expectedPods {
if expPod == victim.Name {
found = true
break
}
}
if !found {
t.Errorf("pod %v is not expected to be a victim.", victim.Name)
}
// Mark the victims for deletion and record the preemptor's nominated node name.
now := metav1.Now()
victim.DeletionTimestamp = &now
test.pod.Status.NominatedNodeName = node.Name
}
if !found {
t.Errorf("test [%v]: pod %v is not expected to be a victim.", test.name, victim.Name)
// Call preempt again and make sure it doesn't preempt any more pods.
node, victims, _, err = scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap}))
if err != nil {
t.Errorf("unexpected error in preemption: %v", err)
}
// Mark the victims for deletion and record the preemptor's nominated node name.
now := metav1.Now()
victim.DeletionTimestamp = &now
test.pod.Status.NominatedNodeName = node.Name
}
// Call preempt again and make sure it doesn't preempt any more pods.
node, victims, _, err = scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap}))
if err != nil {
t.Errorf("test [%v]: unexpected error in preemption: %v", test.name, err)
}
if node != nil && len(victims) > 0 {
t.Errorf("test [%v]: didn't expect any more preemption. Node %v is selected for preemption.", test.name, node)
}
close(stop)
if node != nil && len(victims) > 0 {
t.Errorf("didn't expect any more preemption. Node %v is selected for preemption.", node)
}
close(stop)
})
}
}
// syncingMockCache delegates method calls to an actual Cache,
// but calls to UpdateNodeNameToInfoMap synchronize with the test.
type syncingMockCache struct {
schedulercache.Cache
cycleStart, cacheInvalidated chan struct{}
once sync.Once
}
// UpdateNodeNameToInfoMap delegates to the real implementation, but on the first call, it
// synchronizes with the test.
//
// Since UpdateNodeNameToInfoMap is one of the first steps of (*genericScheduler).Schedule, we use
// this point to signal to the test that a scheduling cycle has started.
func (c *syncingMockCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
err := c.Cache.UpdateNodeNameToInfoMap(infoMap)
c.once.Do(func() {
c.cycleStart <- struct{}{}
<-c.cacheInvalidated
})
return err
}
// TestCacheInvalidationRace tests that equivalence cache invalidation is correctly
// handled when an invalidation event happens early in a scheduling cycle. Specifically, the event
// occurs after schedulercache is snapshotted and before equivalence cache lock is acquired.
func TestCacheInvalidationRace(t *testing.T) {
// Create a predicate that returns false the first time and true on subsequent calls.
podWillFit := false
var callCount int
testPredicate := func(pod *v1.Pod,
meta algorithm.PredicateMetadata,
nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
callCount++
if !podWillFit {
podWillFit = true
return false, []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate}, nil
}
return true, nil, nil
}
// Set up the mock cache.
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
testNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
cache.AddNode(testNode)
mockCache := &syncingMockCache{
Cache: cache,
cycleStart: make(chan struct{}),
cacheInvalidated: make(chan struct{}),
}
eCache := equivalence.NewCache()
// Ensure that equivalence cache invalidation happens after the scheduling cycle starts, but before
// the equivalence cache would be updated.
go func() {
<-mockCache.cycleStart
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "new-pod", UID: "new-pod"},
Spec: v1.PodSpec{NodeName: "machine1"}}
if err := cache.AddPod(pod); err != nil {
t.Errorf("Could not add pod to cache: %v", err)
}
eCache.InvalidateAllPredicatesOnNode("machine1")
mockCache.cacheInvalidated <- struct{}{}
}()
// Set up the scheduler.
ps := map[string]algorithm.FitPredicate{"testPredicate": testPredicate}
algorithmpredicates.SetPredicatesOrdering([]string{"testPredicate"})
prioritizers := []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}}
pvcLister := schedulertesting.FakePersistentVolumeClaimLister([]*v1.PersistentVolumeClaim{})
scheduler := NewGenericScheduler(
mockCache,
eCache,
NewSchedulingQueue(),
ps,
algorithm.EmptyPredicateMetadataProducer,
prioritizers,
algorithm.EmptyPriorityMetadataProducer,
nil, nil, pvcLister, true, false,
schedulerapi.DefaultPercentageOfNodesToScore)
// First scheduling attempt should fail.
nodeLister := schedulertesting.FakeNodeLister(makeNodeList([]string{"machine1"}))
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}}
machine, err := scheduler.Schedule(pod, nodeLister)
if machine != "" || err == nil {
t.Error("First scheduling attempt did not fail")
}
// Second scheduling attempt should succeed because cache was invalidated.
_, err = scheduler.Schedule(pod, nodeLister)
if err != nil {
t.Errorf("Second scheduling attempt failed: %v", err)
}
if callCount != 2 {
t.Errorf("Predicate should have been called twice. Was called %d times.", callCount)
}
}

View File

@@ -241,10 +241,10 @@ func (p *PriorityQueue) Add(pod *v1.Pod) error {
defer p.lock.Unlock()
err := p.activeQ.Add(pod)
if err != nil {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
glog.Errorf("Error adding pod %v/%v to the scheduling queue: %v", pod.Namespace, pod.Name, err)
} else {
if p.unschedulableQ.get(pod) != nil {
glog.Errorf("Error: pod %v is already in the unschedulable queue.", pod.Name)
glog.Errorf("Error: pod %v/%v is already in the unschedulable queue.", pod.Namespace, pod.Name)
p.deleteNominatedPodIfExists(pod)
p.unschedulableQ.delete(pod)
}
@@ -267,7 +267,7 @@ func (p *PriorityQueue) AddIfNotPresent(pod *v1.Pod) error {
}
err := p.activeQ.Add(pod)
if err != nil {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
glog.Errorf("Error adding pod %v/%v to the scheduling queue: %v", pod.Namespace, pod.Name, err)
} else {
p.addNominatedPodIfNeeded(pod)
p.cond.Broadcast()
@@ -410,7 +410,7 @@ func (p *PriorityQueue) MoveAllToActiveQueue() {
defer p.lock.Unlock()
for _, pod := range p.unschedulableQ.pods {
if err := p.activeQ.Add(pod); err != nil {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
glog.Errorf("Error adding pod %v/%v to the scheduling queue: %v", pod.Namespace, pod.Name, err)
}
}
p.unschedulableQ.clear()
@@ -425,7 +425,7 @@ func (p *PriorityQueue) movePodsToActiveQueue(pods []*v1.Pod) {
if err := p.activeQ.Add(pod); err == nil {
p.unschedulableQ.delete(pod)
} else {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
glog.Errorf("Error adding pod %v/%v to the scheduling queue: %v", pod.Namespace, pod.Name, err)
}
}
p.receivedMoveRequest = true

View File

@@ -375,6 +375,7 @@ func TestUnschedulablePodsMap(t *testing.T) {
updatedPods[3] = pods[3].DeepCopy()
tests := []struct {
name string
podsToAdd []*v1.Pod
expectedMapAfterAdd map[string]*v1.Pod
podsToUpdate []*v1.Pod
@@ -383,6 +384,7 @@ func TestUnschedulablePodsMap(t *testing.T) {
expectedMapAfterDelete map[string]*v1.Pod
}{
{
name: "create, update, delete subset of pods",
podsToAdd: []*v1.Pod{pods[0], pods[1], pods[2], pods[3]},
expectedMapAfterAdd: map[string]*v1.Pod{
util.GetPodFullName(pods[0]): pods[0],
@@ -404,6 +406,7 @@ func TestUnschedulablePodsMap(t *testing.T) {
},
},
{
name: "create, update, delete all",
podsToAdd: []*v1.Pod{pods[0], pods[3]},
expectedMapAfterAdd: map[string]*v1.Pod{
util.GetPodFullName(pods[0]): pods[0],
@@ -418,6 +421,7 @@ func TestUnschedulablePodsMap(t *testing.T) {
expectedMapAfterDelete: map[string]*v1.Pod{},
},
{
name: "delete non-existing and existing pods",
podsToAdd: []*v1.Pod{pods[1], pods[2]},
expectedMapAfterAdd: map[string]*v1.Pod{
util.GetPodFullName(pods[1]): pods[1],
@@ -435,35 +439,37 @@ func TestUnschedulablePodsMap(t *testing.T) {
},
}
for i, test := range tests {
upm := newUnschedulablePodsMap()
for _, p := range test.podsToAdd {
upm.addOrUpdate(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterAdd) {
t.Errorf("#%d: Unexpected map after adding pods. Expected: %v, got: %v",
i, test.expectedMapAfterAdd, upm.pods)
}
if len(test.podsToUpdate) > 0 {
for _, p := range test.podsToUpdate {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
upm := newUnschedulablePodsMap()
for _, p := range test.podsToAdd {
upm.addOrUpdate(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterUpdate) {
t.Errorf("#%d: Unexpected map after updating pods. Expected: %v, got: %v",
i, test.expectedMapAfterUpdate, upm.pods)
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterAdd) {
t.Errorf("Unexpected map after adding pods. Expected: %v, got: %v",
test.expectedMapAfterAdd, upm.pods)
}
}
for _, p := range test.podsToDelete {
upm.delete(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterDelete) {
t.Errorf("#%d: Unexpected map after deleting pods. Expected: %v, got: %v",
i, test.expectedMapAfterDelete, upm.pods)
}
upm.clear()
if len(upm.pods) != 0 {
t.Errorf("Expected the map to be empty, but has %v elements.", len(upm.pods))
}
if len(test.podsToUpdate) > 0 {
for _, p := range test.podsToUpdate {
upm.addOrUpdate(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterUpdate) {
t.Errorf("Unexpected map after updating pods. Expected: %v, got: %v",
test.expectedMapAfterUpdate, upm.pods)
}
}
for _, p := range test.podsToDelete {
upm.delete(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterDelete) {
t.Errorf("Unexpected map after deleting pods. Expected: %v, got: %v",
test.expectedMapAfterDelete, upm.pods)
}
upm.clear()
if len(upm.pods) != 0 {
t.Errorf("Expected the map to be empty, but has %v elements.", len(upm.pods))
}
})
}
}

View File

@@ -12,42 +12,9 @@ go_library(
"cache_comparer.go",
"factory.go",
"plugins.go",
] + select({
"@io_bazel_rules_go//go/platform:android": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:darwin": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:linux": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:nacl": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:plan9": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:solaris": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:windows": [
"signal_windows.go",
],
"//conditions:default": [],
}),
"signal.go",
"signal_windows.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/factory",
deps = [
"//pkg/api/v1/pod:go_default_library",
@@ -62,32 +29,32 @@ go_library(
"//pkg/scheduler/api/validation:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/core/equivalence:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/api/storage/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/informers/apps/v1:go_default_library",
"//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/informers/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/client-go/informers/storage/v1:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/listers/apps/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/informers/apps/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/informers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/informers/policy/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/informers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/listers/apps/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/listers/policy/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/listers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
],
)
@@ -110,19 +77,19 @@ go_test(
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/client-go/util/testing:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/util/testing:go_default_library",
],
)

View File

@@ -27,27 +27,29 @@ import (
)
func TestCompareNodes(t *testing.T) {
compare := compareStrategy{}
tests := []struct {
name string
actual []string
cached []string
missing []string
redundant []string
}{
{
name: "redundant cached value",
actual: []string{"foo", "bar"},
cached: []string{"bar", "foo", "foobar"},
missing: []string{},
redundant: []string{"foobar"},
},
{
name: "missing cached value",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foo"},
missing: []string{"foobar"},
redundant: []string{},
},
{
name: "proper cache set",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foobar", "foo"},
missing: []string{},
@@ -56,34 +58,40 @@ func TestCompareNodes(t *testing.T) {
}
for _, test := range tests {
nodes := []*v1.Node{}
for _, nodeName := range test.actual {
node := &v1.Node{}
node.Name = nodeName
nodes = append(nodes, node)
}
t.Run(test.name, func(t *testing.T) {
testCompareNodes(test.actual, test.cached, test.missing, test.redundant, t)
})
}
}
nodeInfo := make(map[string]*schedulercache.NodeInfo)
for _, nodeName := range test.cached {
nodeInfo[nodeName] = &schedulercache.NodeInfo{}
}
func testCompareNodes(actual, cached, missing, redundant []string, t *testing.T) {
compare := compareStrategy{}
nodes := []*v1.Node{}
for _, nodeName := range actual {
node := &v1.Node{}
node.Name = nodeName
nodes = append(nodes, node)
}
m, r := compare.CompareNodes(nodes, nodeInfo)
nodeInfo := make(map[string]*schedulercache.NodeInfo)
for _, nodeName := range cached {
nodeInfo[nodeName] = &schedulercache.NodeInfo{}
}
if !reflect.DeepEqual(m, test.missing) {
t.Errorf("missing expected to be %s; got %s", test.missing, m)
}
m, r := compare.CompareNodes(nodes, nodeInfo)
if !reflect.DeepEqual(r, test.redundant) {
t.Errorf("redundant expected to be %s; got %s", test.redundant, r)
}
if !reflect.DeepEqual(m, missing) {
t.Errorf("missing expected to be %s; got %s", missing, m)
}
if !reflect.DeepEqual(r, redundant) {
t.Errorf("redundant expected to be %s; got %s", redundant, r)
}
}
func TestComparePods(t *testing.T) {
compare := compareStrategy{}
tests := []struct {
name string
actual []string
cached []string
queued []string
@@ -91,6 +99,7 @@ func TestComparePods(t *testing.T) {
redundant []string
}{
{
name: "redundant cached value",
actual: []string{"foo", "bar"},
cached: []string{"bar", "foo", "foobar"},
queued: []string{},
@@ -98,6 +107,7 @@ func TestComparePods(t *testing.T) {
redundant: []string{"foobar"},
},
{
name: "redundant and queued values",
actual: []string{"foo", "bar"},
cached: []string{"foo", "foobar"},
queued: []string{"bar"},
@@ -105,6 +115,7 @@ func TestComparePods(t *testing.T) {
redundant: []string{"foobar"},
},
{
name: "missing cached value",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foo"},
queued: []string{},
@@ -112,6 +123,7 @@ func TestComparePods(t *testing.T) {
redundant: []string{},
},
{
name: "missing and queued values",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"foo"},
queued: []string{"bar"},
@@ -119,6 +131,7 @@ func TestComparePods(t *testing.T) {
redundant: []string{},
},
{
name: "correct cache set",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foobar", "foo"},
queued: []string{},
@@ -126,6 +139,7 @@ func TestComparePods(t *testing.T) {
redundant: []string{},
},
{
name: "queued cache value",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"foobar", "foo"},
queued: []string{"bar"},
@@ -135,64 +149,73 @@ func TestComparePods(t *testing.T) {
}
for _, test := range tests {
pods := []*v1.Pod{}
for _, uid := range test.actual {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
pods = append(pods, pod)
}
t.Run(test.name, func(t *testing.T) {
testComparePods(test.actual, test.cached, test.queued, test.missing, test.redundant, t)
})
}
}
queuedPods := []*v1.Pod{}
for _, uid := range test.queued {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
queuedPods = append(queuedPods, pod)
}
func testComparePods(actual, cached, queued, missing, redundant []string, t *testing.T) {
compare := compareStrategy{}
pods := []*v1.Pod{}
for _, uid := range actual {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
pods = append(pods, pod)
}
nodeInfo := make(map[string]*schedulercache.NodeInfo)
for _, uid := range test.cached {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
pod.Namespace = "ns"
pod.Name = uid
queuedPods := []*v1.Pod{}
for _, uid := range queued {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
queuedPods = append(queuedPods, pod)
}
nodeInfo[uid] = schedulercache.NewNodeInfo(pod)
}
nodeInfo := make(map[string]*schedulercache.NodeInfo)
for _, uid := range cached {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
pod.Namespace = "ns"
pod.Name = uid
m, r := compare.ComparePods(pods, queuedPods, nodeInfo)
nodeInfo[uid] = schedulercache.NewNodeInfo(pod)
}
if !reflect.DeepEqual(m, test.missing) {
t.Errorf("missing expected to be %s; got %s", test.missing, m)
}
m, r := compare.ComparePods(pods, queuedPods, nodeInfo)
if !reflect.DeepEqual(r, test.redundant) {
t.Errorf("redundant expected to be %s; got %s", test.redundant, r)
}
if !reflect.DeepEqual(m, missing) {
t.Errorf("missing expected to be %s; got %s", missing, m)
}
if !reflect.DeepEqual(r, redundant) {
t.Errorf("redundant expected to be %s; got %s", redundant, r)
}
}
func TestComparePdbs(t *testing.T) {
compare := compareStrategy{}
tests := []struct {
name string
actual []string
cached []string
missing []string
redundant []string
}{
{
name: "redundant cache value",
actual: []string{"foo", "bar"},
cached: []string{"bar", "foo", "foobar"},
missing: []string{},
redundant: []string{"foobar"},
},
{
name: "missing cache value",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foo"},
missing: []string{"foobar"},
redundant: []string{},
},
{
name: "correct cache",
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foobar", "foo"},
missing: []string{},
@@ -201,28 +224,35 @@ func TestComparePdbs(t *testing.T) {
}
for _, test := range tests {
pdbs := []*policy.PodDisruptionBudget{}
for _, uid := range test.actual {
pdb := &policy.PodDisruptionBudget{}
pdb.UID = types.UID(uid)
pdbs = append(pdbs, pdb)
}
cache := make(map[string]*policy.PodDisruptionBudget)
for _, uid := range test.cached {
pdb := &policy.PodDisruptionBudget{}
pdb.UID = types.UID(uid)
cache[uid] = pdb
}
m, r := compare.ComparePdbs(pdbs, cache)
if !reflect.DeepEqual(m, test.missing) {
t.Errorf("missing expected to be %s; got %s", test.missing, m)
}
if !reflect.DeepEqual(r, test.redundant) {
t.Errorf("redundant expected to be %s; got %s", test.redundant, r)
}
t.Run(test.name, func(t *testing.T) {
testComparePdbs(test.actual, test.cached, test.missing, test.redundant, t)
})
}
}
func testComparePdbs(actual, cached, missing, redundant []string, t *testing.T) {
compare := compareStrategy{}
pdbs := []*policy.PodDisruptionBudget{}
for _, uid := range actual {
pdb := &policy.PodDisruptionBudget{}
pdb.UID = types.UID(uid)
pdbs = append(pdbs, pdb)
}
cache := make(map[string]*policy.PodDisruptionBudget)
for _, uid := range cached {
pdb := &policy.PodDisruptionBudget{}
pdb.UID = types.UID(uid)
cache[uid] = pdb
}
m, r := compare.ComparePdbs(pdbs, cache)
if !reflect.DeepEqual(m, missing) {
t.Errorf("missing expected to be %s; got %s", missing, m)
}
if !reflect.DeepEqual(r, redundant) {
t.Errorf("redundant expected to be %s; got %s", redundant, r)
}
}

View File

@@ -29,6 +29,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/api/policy/v1beta1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
@@ -38,15 +39,13 @@ import (
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
appsinformers "k8s.io/client-go/informers/apps/v1beta1"
appsinformers "k8s.io/client-go/informers/apps/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
extensionsinformers "k8s.io/client-go/informers/extensions/v1beta1"
policyinformers "k8s.io/client-go/informers/policy/v1beta1"
storageinformers "k8s.io/client-go/informers/storage/v1"
clientset "k8s.io/client-go/kubernetes"
appslisters "k8s.io/client-go/listers/apps/v1beta1"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
extensionslisters "k8s.io/client-go/listers/extensions/v1beta1"
policylisters "k8s.io/client-go/listers/policy/v1beta1"
storagelisters "k8s.io/client-go/listers/storage/v1"
"k8s.io/client-go/tools/cache"
@@ -61,6 +60,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/api/validation"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/core/equivalence"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
)
@@ -98,7 +98,7 @@ type configFactory struct {
// a means to list all controllers
controllerLister corelisters.ReplicationControllerLister
// a means to list all replicasets
replicaSetLister extensionslisters.ReplicaSetLister
replicaSetLister appslisters.ReplicaSetLister
// a means to list all statefulsets
statefulSetLister appslisters.StatefulSetLister
// a means to list all PodDisruptionBudgets
@@ -123,7 +123,7 @@ type configFactory struct {
hardPodAffinitySymmetricWeight int32
// Equivalence class cache
equivalencePodCache *core.EquivalenceCache
equivalencePodCache *equivalence.Cache
// Enable equivalence class cache
enableEquivalenceClassCache bool
@@ -136,59 +136,67 @@ type configFactory struct {
// Disable pod preemption or not.
disablePreemption bool
// percentageOfNodesToScore specifies percentage of all nodes to score in each scheduling cycle.
percentageOfNodesToScore int32
}
// ConfigFactoryArgs is a set arguments passed to NewConfigFactory.
type ConfigFactoryArgs struct {
SchedulerName string
Client clientset.Interface
NodeInformer coreinformers.NodeInformer
PodInformer coreinformers.PodInformer
PvInformer coreinformers.PersistentVolumeInformer
PvcInformer coreinformers.PersistentVolumeClaimInformer
ReplicationControllerInformer coreinformers.ReplicationControllerInformer
ReplicaSetInformer appsinformers.ReplicaSetInformer
StatefulSetInformer appsinformers.StatefulSetInformer
ServiceInformer coreinformers.ServiceInformer
PdbInformer policyinformers.PodDisruptionBudgetInformer
StorageClassInformer storageinformers.StorageClassInformer
HardPodAffinitySymmetricWeight int32
EnableEquivalenceClassCache bool
DisablePreemption bool
PercentageOfNodesToScore int32
BindTimeoutSeconds int64
}
// NewConfigFactory initializes the default implementation of a Configurator To encourage eventual privatization of the struct type, we only
// return the interface.
func NewConfigFactory(
schedulerName string,
client clientset.Interface,
nodeInformer coreinformers.NodeInformer,
podInformer coreinformers.PodInformer,
pvInformer coreinformers.PersistentVolumeInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
replicationControllerInformer coreinformers.ReplicationControllerInformer,
replicaSetInformer extensionsinformers.ReplicaSetInformer,
statefulSetInformer appsinformers.StatefulSetInformer,
serviceInformer coreinformers.ServiceInformer,
pdbInformer policyinformers.PodDisruptionBudgetInformer,
storageClassInformer storageinformers.StorageClassInformer,
hardPodAffinitySymmetricWeight int32,
enableEquivalenceClassCache bool,
disablePreemption bool,
) scheduler.Configurator {
func NewConfigFactory(args *ConfigFactoryArgs) scheduler.Configurator {
stopEverything := make(chan struct{})
schedulerCache := schedulercache.New(30*time.Second, stopEverything)
// storageClassInformer is only enabled through VolumeScheduling feature gate
var storageClassLister storagelisters.StorageClassLister
if storageClassInformer != nil {
storageClassLister = storageClassInformer.Lister()
if args.StorageClassInformer != nil {
storageClassLister = args.StorageClassInformer.Lister()
}
c := &configFactory{
client: client,
client: args.Client,
podLister: schedulerCache,
podQueue: core.NewSchedulingQueue(),
pVLister: pvInformer.Lister(),
pVCLister: pvcInformer.Lister(),
serviceLister: serviceInformer.Lister(),
controllerLister: replicationControllerInformer.Lister(),
replicaSetLister: replicaSetInformer.Lister(),
statefulSetLister: statefulSetInformer.Lister(),
pdbLister: pdbInformer.Lister(),
pVLister: args.PvInformer.Lister(),
pVCLister: args.PvcInformer.Lister(),
serviceLister: args.ServiceInformer.Lister(),
controllerLister: args.ReplicationControllerInformer.Lister(),
replicaSetLister: args.ReplicaSetInformer.Lister(),
statefulSetLister: args.StatefulSetInformer.Lister(),
pdbLister: args.PdbInformer.Lister(),
storageClassLister: storageClassLister,
schedulerCache: schedulerCache,
StopEverything: stopEverything,
schedulerName: schedulerName,
hardPodAffinitySymmetricWeight: hardPodAffinitySymmetricWeight,
enableEquivalenceClassCache: enableEquivalenceClassCache,
disablePreemption: disablePreemption,
schedulerName: args.SchedulerName,
hardPodAffinitySymmetricWeight: args.HardPodAffinitySymmetricWeight,
enableEquivalenceClassCache: args.EnableEquivalenceClassCache,
disablePreemption: args.DisablePreemption,
percentageOfNodesToScore: args.PercentageOfNodesToScore,
}
c.scheduledPodsHasSynced = podInformer.Informer().HasSynced
c.scheduledPodsHasSynced = args.PodInformer.Informer().HasSynced
// scheduled pod cache
podInformer.Informer().AddEventHandler(
args.PodInformer.Informer().AddEventHandler(
cache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
@@ -213,15 +221,15 @@ func NewConfigFactory(
},
)
// unscheduled pod queue
podInformer.Informer().AddEventHandler(
args.PodInformer.Informer().AddEventHandler(
cache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
case *v1.Pod:
return unassignedNonTerminatedPod(t) && responsibleForPod(t, schedulerName)
return unassignedNonTerminatedPod(t) && responsibleForPod(t, args.SchedulerName)
case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok {
return unassignedNonTerminatedPod(pod) && responsibleForPod(pod, schedulerName)
return unassignedNonTerminatedPod(pod) && responsibleForPod(pod, args.SchedulerName)
}
runtime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, c))
return false
@@ -239,29 +247,29 @@ func NewConfigFactory(
)
// ScheduledPodLister is something we provide to plug-in functions that
// they may need to call.
c.scheduledPodLister = assignedPodLister{podInformer.Lister()}
c.scheduledPodLister = assignedPodLister{args.PodInformer.Lister()}
nodeInformer.Informer().AddEventHandler(
args.NodeInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: c.addNodeToCache,
UpdateFunc: c.updateNodeInCache,
DeleteFunc: c.deleteNodeFromCache,
},
)
c.nodeLister = nodeInformer.Lister()
c.nodeLister = args.NodeInformer.Lister()
pdbInformer.Informer().AddEventHandler(
args.PdbInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: c.addPDBToCache,
UpdateFunc: c.updatePDBInCache,
DeleteFunc: c.deletePDBFromCache,
},
)
c.pdbLister = pdbInformer.Lister()
c.pdbLister = args.PdbInformer.Lister()
// On add and delete of PVs, it will affect equivalence cache items
// related to persistent volume
pvInformer.Informer().AddEventHandler(
args.PvInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
// MaxPDVolumeCountPredicate: since it relies on the counts of PV.
AddFunc: c.onPvAdd,
@@ -269,43 +277,50 @@ func NewConfigFactory(
DeleteFunc: c.onPvDelete,
},
)
c.pVLister = pvInformer.Lister()
c.pVLister = args.PvInformer.Lister()
// This is for MaxPDVolumeCountPredicate: add/delete PVC will affect counts of PV when it is bound.
pvcInformer.Informer().AddEventHandler(
args.PvcInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: c.onPvcAdd,
UpdateFunc: c.onPvcUpdate,
DeleteFunc: c.onPvcDelete,
},
)
c.pVCLister = pvcInformer.Lister()
c.pVCLister = args.PvcInformer.Lister()
// This is for ServiceAffinity: affected by the selector of the service is updated.
// Also, if new service is added, equivalence cache will also become invalid since
// existing pods may be "captured" by this service and change this predicate result.
serviceInformer.Informer().AddEventHandler(
args.ServiceInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: c.onServiceAdd,
UpdateFunc: c.onServiceUpdate,
DeleteFunc: c.onServiceDelete,
},
)
c.serviceLister = serviceInformer.Lister()
c.serviceLister = args.ServiceInformer.Lister()
// Existing equivalence cache should not be affected by add/delete RC/Deployment etc,
// it only make sense when pod is scheduled or deleted
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
// Setup volume binder
c.volumeBinder = volumebinder.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer)
c.volumeBinder = volumebinder.NewVolumeBinder(args.Client, args.PvcInformer, args.PvInformer, args.StorageClassInformer, time.Duration(args.BindTimeoutSeconds)*time.Second)
args.StorageClassInformer.Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: c.onStorageClassAdd,
DeleteFunc: c.onStorageClassDelete,
},
)
}
// Setup cache comparer
comparer := &cacheComparer{
podLister: podInformer.Lister(),
nodeLister: nodeInformer.Lister(),
pdbLister: pdbInformer.Lister(),
podLister: args.PodInformer.Lister(),
nodeLister: args.NodeInformer.Lister(),
pdbLister: args.PdbInformer.Lister(),
cache: c.schedulerCache,
podQueue: c.podQueue,
}
@@ -383,6 +398,13 @@ func (c *configFactory) onPvAdd(obj interface{}) {
}
c.invalidatePredicatesForPv(pv)
}
// Pods created when there are no PVs available will be stuck in
// unschedulable queue. But unbound PVs created for static provisioning and
// delay binding storage class are skipped in PV controller dynamic
// provisiong and binding process, will not trigger events to schedule pod
// again. So we need to move pods to active queue on PV add for this
// scenario.
c.podQueue.MoveAllToActiveQueue()
}
func (c *configFactory) onPvUpdate(old, new interface{}) {
@@ -399,10 +421,22 @@ func (c *configFactory) onPvUpdate(old, new interface{}) {
}
c.invalidatePredicatesForPvUpdate(oldPV, newPV)
}
// Scheduler.bindVolumesWorker may fail to update assumed pod volume
// bindings due to conflicts if PVs are updated by PV controller or other
// parties, then scheduler will add pod back to unschedulable queue. We
// need to move pods to active queue on PV update for this scenario.
c.podQueue.MoveAllToActiveQueue()
}
func (c *configFactory) invalidatePredicatesForPvUpdate(oldPV, newPV *v1.PersistentVolume) {
invalidPredicates := sets.NewString()
// CheckVolumeBinding predicate calls SchedulerVolumeBinder.FindPodVolumes
// which will cache PVs in PodBindingCache. When PV got updated, we should
// invalidate cache, otherwise PVAssumeCache.Assume will fail with out of sync
// error.
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
invalidPredicates.Insert(predicates.CheckVolumeBindingPred)
}
for k, v := range newPV.Labels {
// If PV update modifies the zone/region labels.
if isZoneRegionLabel(k) && !reflect.DeepEqual(v, oldPV.Labels[k]) {
@@ -410,7 +444,7 @@ func (c *configFactory) invalidatePredicatesForPvUpdate(oldPV, newPV *v1.Persist
break
}
}
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
c.equivalencePodCache.InvalidatePredicates(invalidPredicates)
}
// isZoneRegionLabel check if given key of label is zone or region label.
@@ -455,6 +489,10 @@ func (c *configFactory) invalidatePredicatesForPv(pv *v1.PersistentVolume) {
invalidPredicates.Insert(predicates.MaxAzureDiskVolumeCountPred)
}
if pv.Spec.CSI != nil && utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
invalidPredicates.Insert(predicates.MaxCSIVolumeCountPred)
}
// If PV contains zone related label, it may impact cached NoVolumeZoneConflict
for k := range pv.Labels {
if isZoneRegionLabel(k) {
@@ -468,7 +506,7 @@ func (c *configFactory) invalidatePredicatesForPv(pv *v1.PersistentVolume) {
invalidPredicates.Insert(predicates.CheckVolumeBindingPred)
}
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
c.equivalencePodCache.InvalidatePredicates(invalidPredicates)
}
func (c *configFactory) onPvcAdd(obj interface{}) {
@@ -531,6 +569,10 @@ func (c *configFactory) invalidatePredicatesForPvc(pvc *v1.PersistentVolumeClaim
// The bound volume type may change
invalidPredicates := sets.NewString(maxPDVolumeCountPredicateKeys...)
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
invalidPredicates.Insert(predicates.MaxCSIVolumeCountPred)
}
// The bound volume's label may change
invalidPredicates.Insert(predicates.NoVolumeZoneConflictPred)
@@ -538,7 +580,7 @@ func (c *configFactory) invalidatePredicatesForPvc(pvc *v1.PersistentVolumeClaim
// Add/delete impacts the available PVs to choose from
invalidPredicates.Insert(predicates.CheckVolumeBindingPred)
}
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
c.equivalencePodCache.InvalidatePredicates(invalidPredicates)
}
func (c *configFactory) invalidatePredicatesForPvcUpdate(old, new *v1.PersistentVolumeClaim) {
@@ -551,14 +593,71 @@ func (c *configFactory) invalidatePredicatesForPvcUpdate(old, new *v1.Persistent
}
// The bound volume type may change
invalidPredicates.Insert(maxPDVolumeCountPredicateKeys...)
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
invalidPredicates.Insert(predicates.MaxCSIVolumeCountPred)
}
}
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
c.equivalencePodCache.InvalidatePredicates(invalidPredicates)
}
func (c *configFactory) onStorageClassAdd(obj interface{}) {
sc, ok := obj.(*storagev1.StorageClass)
if !ok {
glog.Errorf("cannot convert to *storagev1.StorageClass: %v", obj)
return
}
// CheckVolumeBindingPred fails if pod has unbound immediate PVCs. If these
// PVCs have specified StorageClass name, creating StorageClass objects
// with late binding will cause predicates to pass, so we need to move pods
// to active queue.
// We don't need to invalidate cached results because results will not be
// cached for pod that has unbound immediate PVCs.
if sc.VolumeBindingMode != nil && *sc.VolumeBindingMode == storagev1.VolumeBindingWaitForFirstConsumer {
c.podQueue.MoveAllToActiveQueue()
}
}
func (c *configFactory) onStorageClassDelete(obj interface{}) {
if c.enableEquivalenceClassCache {
var sc *storagev1.StorageClass
switch t := obj.(type) {
case *storagev1.StorageClass:
sc = t
case cache.DeletedFinalStateUnknown:
var ok bool
sc, ok = t.Obj.(*storagev1.StorageClass)
if !ok {
glog.Errorf("cannot convert to *storagev1.StorageClass: %v", t.Obj)
return
}
default:
glog.Errorf("cannot convert to *storagev1.StorageClass: %v", t)
return
}
c.invalidatePredicatesForStorageClass(sc)
}
}
func (c *configFactory) invalidatePredicatesForStorageClass(sc *storagev1.StorageClass) {
invalidPredicates := sets.NewString()
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
if sc.VolumeBindingMode != nil && *sc.VolumeBindingMode == storagev1.VolumeBindingWaitForFirstConsumer {
// Delete can cause predicates to fail
invalidPredicates.Insert(predicates.CheckVolumeBindingPred)
invalidPredicates.Insert(predicates.NoVolumeZoneConflictPred)
}
}
c.equivalencePodCache.InvalidatePredicates(invalidPredicates)
}
func (c *configFactory) onServiceAdd(obj interface{}) {
if c.enableEquivalenceClassCache {
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(serviceAffinitySet)
c.equivalencePodCache.InvalidatePredicates(serviceAffinitySet)
}
c.podQueue.MoveAllToActiveQueue()
}
@@ -569,7 +668,7 @@ func (c *configFactory) onServiceUpdate(oldObj interface{}, newObj interface{})
oldService := oldObj.(*v1.Service)
newService := newObj.(*v1.Service)
if !reflect.DeepEqual(oldService.Spec.Selector, newService.Spec.Selector) {
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(serviceAffinitySet)
c.equivalencePodCache.InvalidatePredicates(serviceAffinitySet)
}
}
c.podQueue.MoveAllToActiveQueue()
@@ -577,7 +676,7 @@ func (c *configFactory) onServiceUpdate(oldObj interface{}, newObj interface{})
func (c *configFactory) onServiceDelete(obj interface{}) {
if c.enableEquivalenceClassCache {
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(serviceAffinitySet)
c.equivalencePodCache.InvalidatePredicates(serviceAffinitySet)
}
c.podQueue.MoveAllToActiveQueue()
}
@@ -595,7 +694,7 @@ func (c *configFactory) GetSchedulerName() string {
return c.schedulerName
}
// GetClient provides a kubernetes client, mostly internal use, but may also be called by mock-tests.
// GetClient provides a kubernetes Client, mostly internal use, but may also be called by mock-tests.
func (c *configFactory) GetClient() clientset.Interface {
return c.client
}
@@ -694,13 +793,13 @@ func (c *configFactory) invalidateCachedPredicatesOnUpdatePod(newPod *v1.Pod, ol
if !reflect.DeepEqual(oldPod.GetLabels(), newPod.GetLabels()) {
// MatchInterPodAffinity need to be reconsidered for this node,
// as well as all nodes in its same failure domain.
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(
c.equivalencePodCache.InvalidatePredicates(
matchInterPodAffinitySet)
}
// if requested container resource changed, invalidate GeneralPredicates of this node
if !reflect.DeepEqual(predicates.GetResourceRequest(newPod),
predicates.GetResourceRequest(oldPod)) {
c.equivalencePodCache.InvalidateCachedPredicateItem(
c.equivalencePodCache.InvalidatePredicatesOnNode(
newPod.Spec.NodeName, generalPredicatesSets)
}
}
@@ -741,14 +840,14 @@ func (c *configFactory) invalidateCachedPredicatesOnDeletePod(pod *v1.Pod) {
// MatchInterPodAffinity need to be reconsidered for this node,
// as well as all nodes in its same failure domain.
// TODO(resouer) can we just do this for nodes in the same failure domain
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(
c.equivalencePodCache.InvalidatePredicates(
matchInterPodAffinitySet)
// if this pod have these PV, cached result of disk conflict will become invalid.
for _, volume := range pod.Spec.Volumes {
if volume.GCEPersistentDisk != nil || volume.AWSElasticBlockStore != nil ||
volume.RBD != nil || volume.ISCSI != nil {
c.equivalencePodCache.InvalidateCachedPredicateItem(
c.equivalencePodCache.InvalidatePredicatesOnNode(
pod.Spec.NodeName, noDiskConflictSet)
}
}
@@ -766,6 +865,11 @@ func (c *configFactory) addNodeToCache(obj interface{}) {
glog.Errorf("scheduler cache AddNode failed: %v", err)
}
if c.enableEquivalenceClassCache {
// GetNodeCache() will lazily create NodeCache for given node if it does not exist.
c.equivalencePodCache.GetNodeCache(node.GetName())
}
c.podQueue.MoveAllToActiveQueue()
// NOTE: add a new node does not affect existing predicates in equivalence cache
}
@@ -858,7 +962,7 @@ func (c *configFactory) invalidateCachedPredicatesOnNodeUpdate(newNode *v1.Node,
if newNode.Spec.Unschedulable != oldNode.Spec.Unschedulable {
invalidPredicates.Insert(predicates.CheckNodeConditionPred)
}
c.equivalencePodCache.InvalidateCachedPredicateItem(newNode.GetName(), invalidPredicates)
c.equivalencePodCache.InvalidatePredicatesOnNode(newNode.GetName(), invalidPredicates)
}
}
@@ -885,7 +989,7 @@ func (c *configFactory) deleteNodeFromCache(obj interface{}) {
glog.Errorf("scheduler cache RemoveNode failed: %v", err)
}
if c.enableEquivalenceClassCache {
c.equivalencePodCache.InvalidateAllCachedPredicateItemOfNode(node.GetName())
c.equivalencePodCache.InvalidateAllPredicatesOnNode(node.GetName())
}
}
@@ -994,7 +1098,7 @@ func (c *configFactory) CreateFromConfig(policy schedulerapi.Policy) (*scheduler
}
}
extenders := make([]algorithm.SchedulerExtender, 0)
var extenders []algorithm.SchedulerExtender
if len(policy.ExtenderConfigs) != 0 {
ignoredExtendedResources := sets.NewString()
for ii := range policy.ExtenderConfigs {
@@ -1074,7 +1178,7 @@ func (c *configFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
// Init equivalence class cache
if c.enableEquivalenceClassCache {
c.equivalencePodCache = core.NewEquivalenceCache()
c.equivalencePodCache = equivalence.NewCache()
glog.Info("Created equivalence class cache")
}
@@ -1091,6 +1195,7 @@ func (c *configFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
c.pVCLister,
c.alwaysCheckAllPredicates,
c.disablePreemption,
c.percentageOfNodesToScore,
)
podBackoff := util.CreateDefaultPodBackoff()
@@ -1178,7 +1283,7 @@ func (c *configFactory) getPluginArgs() (*PluginFactoryArgs, error) {
func (c *configFactory) getNextPod() *v1.Pod {
pod, err := c.podQueue.Pop()
if err == nil {
glog.V(4).Infof("About to try and schedule pod %v", pod.Name)
glog.V(4).Infof("About to try and schedule pod %v/%v", pod.Namespace, pod.Name)
return pod
}
glog.Errorf("Error while retrieving next pod from scheduling queue: %v", err)
@@ -1297,10 +1402,10 @@ func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration) core
func (c *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue core.SchedulingQueue) func(pod *v1.Pod, err error) {
return func(pod *v1.Pod, err error) {
if err == core.ErrNoNodesAvailable {
glog.V(4).Infof("Unable to schedule %v %v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name)
glog.V(4).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name)
} else {
if _, ok := err.(*core.FitError); ok {
glog.V(4).Infof("Unable to schedule %v %v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
glog.V(4).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
} else if errors.IsNotFound(err) {
if errStatus, ok := err.(errors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
nodeName := errStatus.Status().Details.Name
@@ -1315,12 +1420,12 @@ func (c *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue
c.schedulerCache.RemoveNode(&node)
// invalidate cached predicate for the node
if c.enableEquivalenceClassCache {
c.equivalencePodCache.InvalidateAllCachedPredicateItemOfNode(nodeName)
c.equivalencePodCache.InvalidateAllPredicatesOnNode(nodeName)
}
}
}
} else {
glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err)
glog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
}
}
@@ -1413,7 +1518,7 @@ type podConditionUpdater struct {
}
func (p *podConditionUpdater) Update(pod *v1.Pod, condition *v1.PodCondition) error {
glog.V(2).Infof("Updating pod condition for %s/%s to (%s==%s)", pod.Namespace, pod.Name, condition.Type, condition.Status)
glog.V(3).Infof("Updating pod condition for %s/%s to (%s==%s)", pod.Namespace, pod.Name, condition.Type, condition.Status)
if podutil.UpdatePodCondition(&pod.Status, condition) {
_, err := p.Client.CoreV1().Pods(pod.Namespace).UpdateStatus(pod)
return err

View File

@@ -49,6 +49,7 @@ import (
const (
enableEquivalenceCache = true
disablePodPreemption = false
bindTimeoutSeconds = 600
)
func TestCreate(t *testing.T) {
@@ -331,53 +332,65 @@ func TestNodeEnumerator(t *testing.T) {
t.Fatalf("expected %v, got %v", e, a)
}
for i := range testList.Items {
gotObj := me.Get(i)
if e, a := testList.Items[i].Name, gotObj.(*v1.Node).Name; e != a {
t.Errorf("Expected %v, got %v", e, a)
}
if e, a := &testList.Items[i], gotObj; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %#v, got %v#", e, a)
}
t.Run(fmt.Sprintf("node enumerator/%v", i), func(t *testing.T) {
gotObj := me.Get(i)
if e, a := testList.Items[i].Name, gotObj.(*v1.Node).Name; e != a {
t.Errorf("Expected %v, got %v", e, a)
}
if e, a := &testList.Items[i], gotObj; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %#v, got %v#", e, a)
}
})
}
}
func TestBind(t *testing.T) {
table := []struct {
name string
binding *v1.Binding
}{
{binding: &v1.Binding{
ObjectMeta: metav1.ObjectMeta{
Namespace: metav1.NamespaceDefault,
Name: "foo",
{
name: "binding can bind and validate request",
binding: &v1.Binding{
ObjectMeta: metav1.ObjectMeta{
Namespace: metav1.NamespaceDefault,
Name: "foo",
},
Target: v1.ObjectReference{
Name: "foohost.kubernetes.mydomain.com",
},
},
Target: v1.ObjectReference{
Name: "foohost.kubernetes.mydomain.com",
},
}},
},
}
for _, item := range table {
handler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
b := binder{client}
if err := b.Bind(item.binding); err != nil {
t.Errorf("Unexpected error: %v", err)
continue
}
expectedBody := runtime.EncodeOrDie(schedulertesting.Test.Codec(), item.binding)
handler.ValidateRequest(t,
schedulertesting.Test.SubResourcePath(string(v1.ResourcePods), metav1.NamespaceDefault, "foo", "binding"),
"POST", &expectedBody)
for _, test := range table {
t.Run(test.name, func(t *testing.T) {
testBind(test.binding, t)
})
}
}
func testBind(binding *v1.Binding, t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
b := binder{client}
if err := b.Bind(binding); err != nil {
t.Errorf("Unexpected error: %v", err)
return
}
expectedBody := runtime.EncodeOrDie(schedulertesting.Test.Codec(), binding)
handler.ValidateRequest(t,
schedulertesting.Test.SubResourcePath(string(v1.ResourcePods), metav1.NamespaceDefault, "foo", "binding"),
"POST", &expectedBody)
}
func TestInvalidHardPodAffinitySymmetricWeight(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
@@ -406,38 +419,44 @@ func TestInvalidFactoryArgs(t *testing.T) {
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
testCases := []struct {
name string
hardPodAffinitySymmetricWeight int32
expectErr string
}{
{
name: "symmetric weight below range",
hardPodAffinitySymmetricWeight: -1,
expectErr: "invalid hardPodAffinitySymmetricWeight: -1, must be in the range 0-100",
},
{
name: "symmetric weight above range",
hardPodAffinitySymmetricWeight: 101,
expectErr: "invalid hardPodAffinitySymmetricWeight: 101, must be in the range 0-100",
},
}
for _, test := range testCases {
factory := newConfigFactory(client, test.hardPodAffinitySymmetricWeight)
_, err := factory.Create()
if err == nil {
t.Errorf("expected err: %s, got nothing", test.expectErr)
}
t.Run(test.name, func(t *testing.T) {
factory := newConfigFactory(client, test.hardPodAffinitySymmetricWeight)
_, err := factory.Create()
if err == nil {
t.Errorf("expected err: %s, got nothing", test.expectErr)
}
})
}
}
func TestSkipPodUpdate(t *testing.T) {
for _, test := range []struct {
table := []struct {
pod *v1.Pod
isAssumedPodFunc func(*v1.Pod) bool
getPodFunc func(*v1.Pod) *v1.Pod
expected bool
name string
}{
// Non-assumed pod should not be skipped.
{
name: "Non-assumed pod",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
@@ -453,9 +472,8 @@ func TestSkipPodUpdate(t *testing.T) {
},
expected: false,
},
// Pod update (with changes on ResourceVersion, Spec.NodeName and/or
// Annotations) for an already assumed pod should be skipped.
{
name: "with changes on ResourceVersion, Spec.NodeName and/or Annotations",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
@@ -483,9 +501,8 @@ func TestSkipPodUpdate(t *testing.T) {
},
expected: true,
},
// Pod update (with changes on Labels) for an already assumed pod
// should not be skipped.
{
name: "with changes on Labels",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
@@ -505,23 +522,26 @@ func TestSkipPodUpdate(t *testing.T) {
},
expected: false,
},
} {
c := &configFactory{
schedulerCache: &schedulertesting.FakeCache{
IsAssumedPodFunc: test.isAssumedPodFunc,
GetPodFunc: test.getPodFunc,
},
}
got := c.skipPodUpdate(test.pod)
if got != test.expected {
t.Errorf("skipPodUpdate() = %t, expected = %t", got, test.expected)
}
}
for _, test := range table {
t.Run(test.name, func(t *testing.T) {
c := &configFactory{
schedulerCache: &schedulertesting.FakeCache{
IsAssumedPodFunc: test.isAssumedPodFunc,
GetPodFunc: test.getPodFunc,
},
}
got := c.skipPodUpdate(test.pod)
if got != test.expected {
t.Errorf("skipPodUpdate() = %t, expected = %t", got, test.expected)
}
})
}
}
func newConfigFactory(client *clientset.Clientset, hardPodAffinitySymmetricWeight int32) scheduler.Configurator {
informerFactory := informers.NewSharedInformerFactory(client, 0)
return NewConfigFactory(
return NewConfigFactory(&ConfigFactoryArgs{
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
@@ -529,15 +549,17 @@ func newConfigFactory(client *clientset.Clientset, hardPodAffinitySymmetricWeigh
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
informerFactory.Extensions().V1beta1().ReplicaSets(),
informerFactory.Apps().V1beta1().StatefulSets(),
informerFactory.Apps().V1().ReplicaSets(),
informerFactory.Apps().V1().StatefulSets(),
informerFactory.Core().V1().Services(),
informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
informerFactory.Storage().V1().StorageClasses(),
hardPodAffinitySymmetricWeight,
enableEquivalenceCache,
disablePodPreemption,
)
schedulerapi.DefaultPercentageOfNodesToScore,
bindTimeoutSeconds,
})
}
type fakeExtender struct {
@@ -593,24 +615,22 @@ func (f *fakeExtender) IsInterested(pod *v1.Pod) bool {
}
func TestGetBinderFunc(t *testing.T) {
for _, test := range []struct {
podName string
extenders []algorithm.SchedulerExtender
table := []struct {
podName string
extenders []algorithm.SchedulerExtender
expectedBinderType string
name string
}{
// Expect to return the default binder because the extender is not a
// binder, even though it's interested in the pod.
{
name: "the extender is not a binder",
podName: "pod0",
extenders: []algorithm.SchedulerExtender{
&fakeExtender{isBinder: false, interestedPodName: "pod0"},
},
expectedBinderType: "*factory.binder",
},
// Expect to return the fake binder because one of the extenders is a
// binder and it's interested in the pod.
{
name: "one of the extenders is a binder and interested in pod",
podName: "pod0",
extenders: []algorithm.SchedulerExtender{
&fakeExtender{isBinder: false, interestedPodName: "pod0"},
@@ -618,9 +638,8 @@ func TestGetBinderFunc(t *testing.T) {
},
expectedBinderType: "*factory.fakeExtender",
},
// Expect to return the default binder because one of the extenders is
// a binder but the binder is not interested in the pod.
{
name: "one of the extenders is a binder, but not interested in pod",
podName: "pod1",
extenders: []algorithm.SchedulerExtender{
&fakeExtender{isBinder: false, interestedPodName: "pod1"},
@@ -628,20 +647,28 @@ func TestGetBinderFunc(t *testing.T) {
},
expectedBinderType: "*factory.binder",
},
} {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: test.podName,
},
}
}
f := &configFactory{}
binderFunc := f.getBinderFunc(test.extenders)
binder := binderFunc(pod)
binderType := fmt.Sprintf("%s", reflect.TypeOf(binder))
if binderType != test.expectedBinderType {
t.Errorf("Expected binder %q but got %q", test.expectedBinderType, binderType)
}
for _, test := range table {
t.Run(test.name, func(t *testing.T) {
testGetBinderFunc(test.expectedBinderType, test.podName, test.extenders, t)
})
}
}
func testGetBinderFunc(expectedBinderType, podName string, extenders []algorithm.SchedulerExtender, t *testing.T) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
}
f := &configFactory{}
binderFunc := f.getBinderFunc(extenders)
binder := binderFunc(pod)
binderType := fmt.Sprintf("%s", reflect.TypeOf(binder))
if binderType != expectedBinderType {
t.Errorf("Expected binder %q but got %q", expectedBinderType, binderType)
}
}

View File

@@ -36,14 +36,18 @@ func TestAlgorithmNameValidation(t *testing.T) {
"Some,Alg:orithm",
}
for _, name := range algorithmNamesShouldValidate {
if !validName.MatchString(name) {
t.Errorf("%v should be a valid algorithm name but is not valid.", name)
}
t.Run(name, func(t *testing.T) {
if !validName.MatchString(name) {
t.Errorf("should be a valid algorithm name but is not valid.")
}
})
}
for _, name := range algorithmNamesShouldNotValidate {
if validName.MatchString(name) {
t.Errorf("%v should be an invalid algorithm name but is valid.", name)
}
t.Run(name, func(t *testing.T) {
if validName.MatchString(name) {
t.Errorf("should be an invalid algorithm name but is valid.")
}
})
}
}
@@ -70,16 +74,18 @@ func TestValidatePriorityConfigOverFlow(t *testing.T) {
},
}
for _, test := range tests {
err := validateSelectedConfigs(test.configs)
if test.expected {
if err == nil {
t.Errorf("Expected Overflow for %s", test.description)
t.Run(test.description, func(t *testing.T) {
err := validateSelectedConfigs(test.configs)
if test.expected {
if err == nil {
t.Errorf("Expected Overflow")
}
} else {
if err != nil {
t.Errorf("Did not expect an overflow")
}
}
} else {
if err != nil {
t.Errorf("Did not expect an overflow for %s", test.description)
}
}
})
}
}

View File

@@ -117,6 +117,23 @@ var (
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
})
equivalenceCacheLookups = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "equiv_cache_lookups_total",
Help: "Total number of equivalence cache lookups, by whether or not a cache entry was found",
}, []string{"result"})
EquivalenceCacheHits = equivalenceCacheLookups.With(prometheus.Labels{"result": "hit"})
EquivalenceCacheMisses = equivalenceCacheLookups.With(prometheus.Labels{"result": "miss"})
EquivalenceCacheWrites = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "equiv_cache_writes",
Help: "Total number of equivalence cache writes, by result",
}, []string{"result"})
metricsList = []prometheus.Collector{
SchedulingLatency,
E2eSchedulingLatency,
@@ -127,6 +144,8 @@ var (
SchedulingAlgorithmPremptionEvaluationDuration,
PreemptionVictims,
PreemptionAttempts,
equivalenceCacheLookups,
EquivalenceCacheWrites,
}
)

View File

@@ -17,7 +17,6 @@ limitations under the License.
package scheduler
import (
"fmt"
"time"
"k8s.io/api/core/v1"
@@ -34,6 +33,7 @@ import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/core/equivalence"
"k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
@@ -73,21 +73,29 @@ func (sched *Scheduler) StopEverything() {
close(sched.config.StopEverything)
}
// Cache returns the cache in scheduler for test to check the data in scheduler.
func (sched *Scheduler) Cache() schedulercache.Cache {
return sched.config.SchedulerCache
}
// Configurator defines I/O, caching, and other functionality needed to
// construct a new scheduler. An implementation of this can be seen in
// factory.go.
type Configurator interface {
GetPriorityFunctionConfigs(priorityKeys sets.String) ([]algorithm.PriorityConfig, error)
GetPriorityMetadataProducer() (algorithm.PriorityMetadataProducer, error)
// Exposed for testing
GetHardPodAffinitySymmetricWeight() int32
// Exposed for testing
MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue core.SchedulingQueue) func(pod *v1.Pod, err error)
// Predicate related accessors to be exposed for use by k8s.io/autoscaler/cluster-autoscaler
GetPredicateMetadataProducer() (algorithm.PredicateMetadataProducer, error)
GetPredicates(predicateKeys sets.String) (map[string]algorithm.FitPredicate, error)
GetHardPodAffinitySymmetricWeight() int32
GetSchedulerName() string
MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue core.SchedulingQueue) func(pod *v1.Pod, err error)
// Needs to be exposed for things like integration tests where we want to make fake nodes.
GetNodeLister() corelisters.NodeLister
// Exposed for testing
GetClient() clientset.Interface
// Exposed for testing
GetScheduledPodLister() corelisters.PodLister
Create() (*Config, error)
@@ -104,7 +112,7 @@ type Config struct {
SchedulerCache schedulercache.Cache
// Ecache is used for optimistically invalid affected cache items after
// successfully binding a pod
Ecache *core.EquivalenceCache
Ecache *equivalence.Cache
NodeLister algorithm.NodeLister
Algorithm algorithm.ScheduleAlgorithm
GetBinder func(pod *v1.Pod) Binder
@@ -175,10 +183,6 @@ func (sched *Scheduler) Run() {
return
}
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
go sched.config.VolumeBinder.Run(sched.bindVolumesWorker, sched.config.StopEverything)
}
go wait.Until(sched.scheduleOne, 0, sched.config.StopEverything)
}
@@ -191,7 +195,6 @@ func (sched *Scheduler) Config() *Config {
func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
host, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
if err != nil {
glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name)
pod = pod.DeepCopy()
sched.config.Error(pod, err)
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "%v", err)
@@ -232,7 +235,7 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
nodeName = node.Name
err = sched.config.PodPreemptor.SetNominatedNodeName(preemptor, nodeName)
if err != nil {
glog.Errorf("Error in preemption process. Cannot update pod %v annotations: %v", preemptor.Name, err)
glog.Errorf("Error in preemption process. Cannot update pod %v/%v annotations: %v", preemptor.Namespace, preemptor.Name, err)
return "", err
}
for _, victim := range victims {
@@ -257,17 +260,12 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
return nodeName, err
}
// assumeAndBindVolumes will update the volume cache and then asynchronously bind volumes if required.
//
// If volume binding is required, then the bind volumes routine will update the pod to send it back through
// the scheduler.
//
// Otherwise, return nil error and continue to assume the pod.
// assumeVolumes will update the volume cache with the chosen bindings
//
// This function modifies assumed if volume binding is required.
func (sched *Scheduler) assumeAndBindVolumes(assumed *v1.Pod, host string) error {
func (sched *Scheduler) assumeVolumes(assumed *v1.Pod, host string) (allBound bool, err error) {
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
allBound, bindingRequired, err := sched.config.VolumeBinder.Binder.AssumePodVolumes(assumed, host)
allBound, err = sched.config.VolumeBinder.Binder.AssumePodVolumes(assumed, host)
if err != nil {
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePodVolumes failed: %v", err)
@@ -277,76 +275,38 @@ func (sched *Scheduler) assumeAndBindVolumes(assumed *v1.Pod, host string) error
Reason: "SchedulerError",
Message: err.Error(),
})
return err
}
if !allBound {
err = fmt.Errorf("Volume binding started, waiting for completion")
if bindingRequired {
if sched.config.Ecache != nil {
invalidPredicates := sets.NewString(predicates.CheckVolumeBindingPred)
sched.config.Ecache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
}
// bindVolumesWorker() will update the Pod object to put it back in the scheduler queue
sched.config.VolumeBinder.BindQueue.Add(assumed)
} else {
// We are just waiting for PV controller to finish binding, put it back in the
// scheduler queue
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "FailedScheduling", "%v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: "VolumeBindingWaiting",
})
}
return err
// Invalidate ecache because assumed volumes could have affected the cached
// pvs for other pods
if sched.config.Ecache != nil {
invalidPredicates := sets.NewString(predicates.CheckVolumeBindingPred)
sched.config.Ecache.InvalidatePredicates(invalidPredicates)
}
}
return nil
return
}
// bindVolumesWorker() processes pods queued in assumeAndBindVolumes() and tries to
// make the API update for volume binding.
// This function runs forever until the volume BindQueue is closed.
func (sched *Scheduler) bindVolumesWorker() {
workFunc := func() bool {
keyObj, quit := sched.config.VolumeBinder.BindQueue.Get()
if quit {
return true
}
defer sched.config.VolumeBinder.BindQueue.Done(keyObj)
// bindVolumes will make the API update with the assumed bindings and wait until
// the PV controller has completely finished the binding operation.
//
// If binding errors, times out or gets undone, then an error will be returned to
// retry scheduling.
func (sched *Scheduler) bindVolumes(assumed *v1.Pod) error {
var reason string
var eventType string
assumed, ok := keyObj.(*v1.Pod)
if !ok {
glog.V(4).Infof("Object is not a *v1.Pod")
return false
glog.V(5).Infof("Trying to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
if err != nil {
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\": %v", assumed.Namespace, assumed.Name, err)
// Unassume the Pod and retry scheduling
if forgetErr := sched.config.SchedulerCache.ForgetPod(assumed); forgetErr != nil {
glog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
}
// TODO: add metrics
var reason string
var eventType string
glog.V(5).Infof("Trying to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
// The Pod is always sent back to the scheduler afterwards.
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
if err != nil {
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\": %v", assumed.Namespace, assumed.Name, err)
reason = "VolumeBindingFailed"
eventType = v1.EventTypeWarning
} else {
glog.V(4).Infof("Successfully bound volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
reason = "VolumeBindingWaiting"
eventType = v1.EventTypeNormal
err = fmt.Errorf("Volume binding started, waiting for completion")
}
// Always fail scheduling regardless of binding success.
// The Pod needs to be sent back through the scheduler to:
// * Retry volume binding if it fails.
// * Retry volume binding if dynamic provisioning fails.
// * Bind the Pod to the Node once all volumes are bound.
reason = "VolumeBindingFailed"
eventType = v1.EventTypeWarning
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, eventType, "FailedScheduling", "%v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
@@ -354,15 +314,11 @@ func (sched *Scheduler) bindVolumesWorker() {
Status: v1.ConditionFalse,
Reason: reason,
})
return false
return err
}
for {
if quit := workFunc(); quit {
glog.V(4).Infof("bindVolumesWorker shutting down")
break
}
}
glog.V(5).Infof("Success binding volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
return nil
}
// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
@@ -470,16 +426,12 @@ func (sched *Scheduler) scheduleOne() {
// Assume volumes first before assuming the pod.
//
// If no volumes need binding, then nil is returned, and continue to assume the pod.
// If all volumes are completely bound, then allBound is true and binding will be skipped.
//
// Otherwise, error is returned and volume binding is started asynchronously for all of the pod's volumes.
// scheduleOne() returns immediately on error, so that it doesn't continue to assume the pod.
//
// After the asynchronous volume binding updates are made, it will send the pod back through the scheduler for
// subsequent passes until all volumes are fully bound.
// Otherwise, binding of volumes is started after the pod is assumed, but before pod binding.
//
// This function modifies 'assumedPod' if volume binding is required.
err = sched.assumeAndBindVolumes(assumedPod, suggestedHost)
allBound, err := sched.assumeVolumes(assumedPod, suggestedHost)
if err != nil {
return
}
@@ -491,6 +443,14 @@ func (sched *Scheduler) scheduleOne() {
}
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
go func() {
// Bind volumes first before Pod
if !allBound {
err = sched.bindVolumes(assumedPod)
if err != nil {
return
}
}
err := sched.bind(assumedPod, &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
Target: v1.ObjectReference{

View File

@@ -37,6 +37,7 @@ import (
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
@@ -145,6 +146,7 @@ func TestScheduler(t *testing.T) {
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
table := []struct {
name string
injectBindError error
sendPod *v1.Pod
algo algorithm.ScheduleAlgorithm
@@ -156,18 +158,23 @@ func TestScheduler(t *testing.T) {
eventReason string
}{
{
name: "bind assumed pod scheduled",
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
eventReason: "Scheduled",
}, {
},
{
name: "error pod failed scheduling",
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, errS},
expectError: errS,
expectErrorPod: podWithID("foo", ""),
eventReason: "FailedScheduling",
}, {
},
{
name: "error bind forget pod failed scheduling",
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
@@ -184,71 +191,73 @@ func TestScheduler(t *testing.T) {
},
}
for i, item := range table {
var gotError error
var gotPod *v1.Pod
var gotForgetPod *v1.Pod
var gotAssumedPod *v1.Pod
var gotBinding *v1.Binding
configurator := &FakeConfigurator{
Config: &Config{
SchedulerCache: &schedulertesting.FakeCache{
ForgetFunc: func(pod *v1.Pod) {
gotForgetPod = pod
for _, item := range table {
t.Run(item.name, func(t *testing.T) {
var gotError error
var gotPod *v1.Pod
var gotForgetPod *v1.Pod
var gotAssumedPod *v1.Pod
var gotBinding *v1.Binding
configurator := &FakeConfigurator{
Config: &Config{
SchedulerCache: &schedulertesting.FakeCache{
ForgetFunc: func(pod *v1.Pod) {
gotForgetPod = pod
},
AssumeFunc: func(pod *v1.Pod) {
gotAssumedPod = pod
},
},
AssumeFunc: func(pod *v1.Pod) {
gotAssumedPod = pod
NodeLister: schedulertesting.FakeNodeLister(
[]*v1.Node{&testNode},
),
Algorithm: item.algo,
GetBinder: func(pod *v1.Pod) Binder {
return fakeBinder{func(b *v1.Binding) error {
gotBinding = b
return item.injectBindError
}}
},
PodConditionUpdater: fakePodConditionUpdater{},
Error: func(p *v1.Pod, err error) {
gotPod = p
gotError = err
},
NextPod: func() *v1.Pod {
return item.sendPod
},
Recorder: eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: "scheduler"}),
VolumeBinder: volumebinder.NewFakeVolumeBinder(&persistentvolume.FakeVolumeBinderConfig{AllBound: true}),
},
NodeLister: schedulertesting.FakeNodeLister(
[]*v1.Node{&testNode},
),
Algorithm: item.algo,
GetBinder: func(pod *v1.Pod) Binder {
return fakeBinder{func(b *v1.Binding) error {
gotBinding = b
return item.injectBindError
}}
},
PodConditionUpdater: fakePodConditionUpdater{},
Error: func(p *v1.Pod, err error) {
gotPod = p
gotError = err
},
NextPod: func() *v1.Pod {
return item.sendPod
},
Recorder: eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: "scheduler"}),
VolumeBinder: volumebinder.NewFakeVolumeBinder(&persistentvolume.FakeVolumeBinderConfig{AllBound: true}),
},
}
s, _ := NewFromConfigurator(configurator, nil...)
called := make(chan struct{})
events := eventBroadcaster.StartEventWatcher(func(e *v1.Event) {
if e, a := item.eventReason, e.Reason; e != a {
t.Errorf("%v: expected %v, got %v", i, e, a)
}
close(called)
s, _ := NewFromConfigurator(configurator, nil...)
called := make(chan struct{})
events := eventBroadcaster.StartEventWatcher(func(e *v1.Event) {
if e, a := item.eventReason, e.Reason; e != a {
t.Errorf("expected %v, got %v", e, a)
}
close(called)
})
s.scheduleOne()
<-called
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
t.Errorf("assumed pod: wanted %v, got %v", e, a)
}
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
t.Errorf("error pod: wanted %v, got %v", e, a)
}
if e, a := item.expectForgetPod, gotForgetPod; !reflect.DeepEqual(e, a) {
t.Errorf("forget pod: wanted %v, got %v", e, a)
}
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
t.Errorf("error: wanted %v, got %v", e, a)
}
if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) {
t.Errorf("error: %s", diff.ObjectDiff(e, a))
}
events.Stop()
})
s.scheduleOne()
<-called
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectForgetPod, gotForgetPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: forget pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: wanted %v, got %v", i, e, a)
}
if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: %s", i, diff.ObjectDiff(e, a))
}
events.Stop()
}
}
@@ -381,52 +390,57 @@ func TestSchedulerErrorWithLongBinding(t *testing.T) {
conflictPod := podWithPort("bar", "", 8080)
pods := map[string]*v1.Pod{firstPod.Name: firstPod, conflictPod.Name: conflictPod}
for _, test := range []struct {
name string
Expected map[string]bool
CacheTTL time.Duration
BindingDuration time.Duration
}{
{
name: "long cache ttl",
Expected: map[string]bool{firstPod.Name: true},
CacheTTL: 100 * time.Millisecond,
BindingDuration: 300 * time.Millisecond,
},
{
name: "short cache ttl",
Expected: map[string]bool{firstPod.Name: true},
CacheTTL: 10 * time.Second,
BindingDuration: 300 * time.Millisecond,
},
} {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(test.CacheTTL, stop)
t.Run(test.name, func(t *testing.T) {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(test.CacheTTL, stop)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
scheduler, bindingChan := setupTestSchedulerLongBindingWithRetry(
queuedPodStore, scache, nodeLister, predicateMap, stop, test.BindingDuration)
scheduler.Run()
queuedPodStore.Add(firstPod)
queuedPodStore.Add(conflictPod)
scheduler, bindingChan := setupTestSchedulerLongBindingWithRetry(
queuedPodStore, scache, nodeLister, predicateMap, stop, test.BindingDuration)
scheduler.Run()
queuedPodStore.Add(firstPod)
queuedPodStore.Add(conflictPod)
resultBindings := map[string]bool{}
waitChan := time.After(5 * time.Second)
for finished := false; !finished; {
select {
case b := <-bindingChan:
resultBindings[b.Name] = true
p := pods[b.Name]
p.Spec.NodeName = b.Target.Name
scache.AddPod(p)
case <-waitChan:
finished = true
resultBindings := map[string]bool{}
waitChan := time.After(5 * time.Second)
for finished := false; !finished; {
select {
case b := <-bindingChan:
resultBindings[b.Name] = true
p := pods[b.Name]
p.Spec.NodeName = b.Target.Name
scache.AddPod(p)
case <-waitChan:
finished = true
}
}
}
if !reflect.DeepEqual(resultBindings, test.Expected) {
t.Errorf("Result binding are not equal to expected. %v != %v", resultBindings, test.Expected)
}
if !reflect.DeepEqual(resultBindings, test.Expected) {
t.Errorf("Result binding are not equal to expected. %v != %v", resultBindings, test.Expected)
}
})
}
}
@@ -548,7 +562,8 @@ func setupTestScheduler(queuedPodStore *clientcache.FIFO, scache schedulercache.
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
false,
api.DefaultPercentageOfNodesToScore)
bindingChan := make(chan *v1.Binding, 1)
errChan := make(chan error, 1)
configurator := &FakeConfigurator{
@@ -597,7 +612,8 @@ func setupTestSchedulerLongBindingWithRetry(queuedPodStore *clientcache.FIFO, sc
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
false,
api.DefaultPercentageOfNodesToScore)
bindingChan := make(chan *v1.Binding, 2)
configurator := &FakeConfigurator{
Config: &Config{
@@ -673,7 +689,8 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
utilfeature.DefaultFeatureGate.Set("VolumeScheduling=true")
defer utilfeature.DefaultFeatureGate.Set("VolumeScheduling=false")
table := map[string]struct {
table := []struct {
name string
expectError error
expectPodBind *v1.Binding
expectAssumeCalled bool
@@ -681,7 +698,8 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
eventReason string
volumeBinderConfig *persistentvolume.FakeVolumeBinderConfig
}{
"all-bound": {
{
name: "all bound",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
AllBound: true,
FindUnboundSatsified: true,
@@ -689,10 +707,10 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
},
expectAssumeCalled: true,
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
eventReason: "Scheduled",
eventReason: "Scheduled",
},
"bound,invalid-pv-affinity": {
{
name: "bound/invalid pv affinity",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
AllBound: true,
FindUnboundSatsified: true,
@@ -701,7 +719,8 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
eventReason: "FailedScheduling",
expectError: makePredicateError("1 node(s) had volume node affinity conflict"),
},
"unbound,no-matches": {
{
name: "unbound/no matches",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: false,
FindBoundSatsified: true,
@@ -709,7 +728,8 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
eventReason: "FailedScheduling",
expectError: makePredicateError("1 node(s) didn't find available persistent volumes to bind"),
},
"bound-and-unbound-unsatisfied": {
{
name: "bound and unbound unsatisfied",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: false,
FindBoundSatsified: false,
@@ -717,36 +737,27 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
eventReason: "FailedScheduling",
expectError: makePredicateError("1 node(s) didn't find available persistent volumes to bind, 1 node(s) had volume node affinity conflict"),
},
"unbound,found-matches": {
{
name: "unbound/found matches/bind succeeds",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
AssumeBindingRequired: true,
FindUnboundSatsified: true,
FindBoundSatsified: true,
},
expectAssumeCalled: true,
expectBindCalled: true,
eventReason: "FailedScheduling",
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
eventReason: "Scheduled",
},
"unbound,found-matches,already-bound": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
AssumeBindingRequired: false,
},
expectAssumeCalled: true,
expectBindCalled: false,
eventReason: "FailedScheduling",
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
},
"predicate-error": {
{
name: "predicate error",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindErr: findErr,
},
eventReason: "FailedScheduling",
expectError: findErr,
},
"assume-error": {
{
name: "assume error",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
@@ -756,12 +767,12 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
eventReason: "FailedScheduling",
expectError: assumeErr,
},
"bind-error": {
{
name: "bind error",
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
AssumeBindingRequired: true,
BindErr: bindErr,
FindUnboundSatsified: true,
FindBoundSatsified: true,
BindErr: bindErr,
},
expectAssumeCalled: true,
expectBindCalled: true,
@@ -770,68 +781,68 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
},
}
for name, item := range table {
stop := make(chan struct{})
fakeVolumeBinder := volumebinder.NewFakeVolumeBinder(item.volumeBinderConfig)
internalBinder, ok := fakeVolumeBinder.Binder.(*persistentvolume.FakeVolumeBinder)
if !ok {
t.Fatalf("Failed to get fake volume binder")
}
s, bindingChan, errChan := setupTestSchedulerWithVolumeBinding(fakeVolumeBinder, stop, eventBroadcaster)
eventChan := make(chan struct{})
events := eventBroadcaster.StartEventWatcher(func(e *v1.Event) {
if e, a := item.eventReason, e.Reason; e != a {
t.Errorf("%v: expected %v, got %v", name, e, a)
for _, item := range table {
t.Run(item.name, func(t *testing.T) {
stop := make(chan struct{})
fakeVolumeBinder := volumebinder.NewFakeVolumeBinder(item.volumeBinderConfig)
internalBinder, ok := fakeVolumeBinder.Binder.(*persistentvolume.FakeVolumeBinder)
if !ok {
t.Fatalf("Failed to get fake volume binder")
}
close(eventChan)
s, bindingChan, errChan := setupTestSchedulerWithVolumeBinding(fakeVolumeBinder, stop, eventBroadcaster)
eventChan := make(chan struct{})
events := eventBroadcaster.StartEventWatcher(func(e *v1.Event) {
if e, a := item.eventReason, e.Reason; e != a {
t.Errorf("expected %v, got %v", e, a)
}
close(eventChan)
})
s.scheduleOne()
// Wait for pod to succeed or fail scheduling
select {
case <-eventChan:
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("scheduling timeout after %v", wait.ForeverTestTimeout)
}
events.Stop()
// Wait for scheduling to return an error
select {
case err := <-errChan:
if item.expectError == nil || !reflect.DeepEqual(item.expectError.Error(), err.Error()) {
t.Errorf("err \nWANT=%+v,\nGOT=%+v", item.expectError, err)
}
case <-time.After(chanTimeout):
if item.expectError != nil {
t.Errorf("did not receive error after %v", chanTimeout)
}
}
// Wait for pod to succeed binding
select {
case b := <-bindingChan:
if !reflect.DeepEqual(item.expectPodBind, b) {
t.Errorf("err \nWANT=%+v,\nGOT=%+v", item.expectPodBind, b)
}
case <-time.After(chanTimeout):
if item.expectPodBind != nil {
t.Errorf("did not receive pod binding after %v", chanTimeout)
}
}
if item.expectAssumeCalled != internalBinder.AssumeCalled {
t.Errorf("expectedAssumeCall %v", item.expectAssumeCalled)
}
if item.expectBindCalled != internalBinder.BindCalled {
t.Errorf("expectedBindCall %v", item.expectBindCalled)
}
close(stop)
})
go fakeVolumeBinder.Run(s.bindVolumesWorker, stop)
s.scheduleOne()
// Wait for pod to succeed or fail scheduling
select {
case <-eventChan:
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("%v: scheduling timeout after %v", name, wait.ForeverTestTimeout)
}
events.Stop()
// Wait for scheduling to return an error
select {
case err := <-errChan:
if item.expectError == nil || !reflect.DeepEqual(item.expectError.Error(), err.Error()) {
t.Errorf("%v: \n err \nWANT=%+v,\nGOT=%+v", name, item.expectError, err)
}
case <-time.After(chanTimeout):
if item.expectError != nil {
t.Errorf("%v: did not receive error after %v", name, chanTimeout)
}
}
// Wait for pod to succeed binding
select {
case b := <-bindingChan:
if !reflect.DeepEqual(item.expectPodBind, b) {
t.Errorf("%v: \n err \nWANT=%+v,\nGOT=%+v", name, item.expectPodBind, b)
}
case <-time.After(chanTimeout):
if item.expectPodBind != nil {
t.Errorf("%v: did not receive pod binding after %v", name, chanTimeout)
}
}
if item.expectAssumeCalled != internalBinder.AssumeCalled {
t.Errorf("%v: expectedAssumeCall %v", name, item.expectAssumeCalled)
}
if item.expectBindCalled != internalBinder.BindCalled {
t.Errorf("%v: expectedBindCall %v", name, item.expectBindCalled)
}
close(stop)
}
}

View File

@@ -11,7 +11,6 @@ go_library(
srcs = [
"fake_cache.go",
"fake_lister.go",
"pods_to_cache.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/testing",
@@ -21,15 +20,14 @@ go_library(
"//pkg/apis/core/install:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
],
)
@@ -51,7 +49,7 @@ go_test(
srcs = ["util_test.go"],
embed = [":go_default_library"],
deps = [
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)

View File

@@ -106,5 +106,8 @@ func (f *FakeCache) Snapshot() *schedulercache.Snapshot {
return &schedulercache.Snapshot{}
}
// IsUpToDate is a fake mthod for testing
// IsUpToDate is a fake method for testing
func (f *FakeCache) IsUpToDate(*schedulercache.NodeInfo) bool { return true }
// NodeTree is a fake method for testing.
func (f *FakeCache) NodeTree() *schedulercache.NodeTree { return nil }

View File

@@ -19,9 +19,8 @@ package testing
import (
"fmt"
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
corelisters "k8s.io/client-go/listers/core/v1"
@@ -126,10 +125,10 @@ func (f FakeControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.
var _ algorithm.ReplicaSetLister = &FakeReplicaSetLister{}
// FakeReplicaSetLister implements ControllerLister on []extensions.ReplicaSet for test purposes.
type FakeReplicaSetLister []*extensions.ReplicaSet
type FakeReplicaSetLister []*apps.ReplicaSet
// GetPodReplicaSets gets the ReplicaSets that have the selector that match the labels on the given pod
func (f FakeReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
func (f FakeReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*apps.ReplicaSet, err error) {
var selector labels.Selector
for _, rs := range f {

View File

@@ -1,65 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testing
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// PodsToCache is used for testing
type PodsToCache []*v1.Pod
// AssumePod returns nil.
func (p PodsToCache) AssumePod(pod *v1.Pod) error { return nil }
// ForgetPod returns nil.
func (p PodsToCache) ForgetPod(pod *v1.Pod) error { return nil }
// AddPod returns nil.
func (p PodsToCache) AddPod(pod *v1.Pod) error { return nil }
// UpdatePod returns nil.
func (p PodsToCache) UpdatePod(oldPod, newPod *v1.Pod) error { return nil }
// RemovePod returns nil.
func (p PodsToCache) RemovePod(pod *v1.Pod) error { return nil }
// AddNode returns nil.
func (p PodsToCache) AddNode(node *v1.Node) error { return nil }
// UpdateNode returns nil.
func (p PodsToCache) UpdateNode(oldNode, newNode *v1.Node) error { return nil }
// RemoveNode returns nil.
func (p PodsToCache) RemoveNode(node *v1.Node) error { return nil }
// UpdateNodeNameToInfoMap returns nil.
func (p PodsToCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
return nil
}
// List returns pods matching the label selector.
func (p PodsToCache) List(s labels.Selector) (selected []*v1.Pod, err error) {
for _, pod := range p {
if s.Matches(labels.Set(pod.Labels)) {
selected = append(selected, pod)
}
}
return selected, nil
}

View File

@@ -34,16 +34,6 @@ type FakeConfigurator struct {
Config *Config
}
// GetPriorityFunctionConfigs is not implemented yet.
func (fc *FakeConfigurator) GetPriorityFunctionConfigs(priorityKeys sets.String) ([]algorithm.PriorityConfig, error) {
return nil, fmt.Errorf("not implemented")
}
// GetPriorityMetadataProducer is not implemented yet.
func (fc *FakeConfigurator) GetPriorityMetadataProducer() (algorithm.PriorityMetadataProducer, error) {
return nil, fmt.Errorf("not implemented")
}
// GetPredicateMetadataProducer is not implemented yet.
func (fc *FakeConfigurator) GetPredicateMetadataProducer() (algorithm.PredicateMetadataProducer, error) {
return nil, fmt.Errorf("not implemented")
@@ -59,11 +49,6 @@ func (fc *FakeConfigurator) GetHardPodAffinitySymmetricWeight() int32 {
panic("not implemented")
}
// GetSchedulerName is not implemented yet.
func (fc *FakeConfigurator) GetSchedulerName() string {
panic("not implemented")
}
// MakeDefaultErrorFunc is not implemented yet.
func (fc *FakeConfigurator) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue core.SchedulingQueue) func(pod *v1.Pod, err error) {
return nil

View File

@@ -15,8 +15,9 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/apis/scheduling:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
],
)
@@ -30,10 +31,10 @@ go_library(
deps = [
"//pkg/apis/scheduling:go_default_library",
"//pkg/features:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@@ -17,9 +17,11 @@ limitations under the License.
package util
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/kubernetes/pkg/apis/scheduling"
)
@@ -303,3 +305,115 @@ func TestHostPortInfo_Check(t *testing.T) {
}
}
}
func TestGetContainerPorts(t *testing.T) {
tests := []struct {
pod1 *v1.Pod
pod2 *v1.Pod
expected []*v1.ContainerPort
}{
{
pod1: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Ports: []v1.ContainerPort{
{
ContainerPort: 8001,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8002,
Protocol: v1.ProtocolTCP,
},
},
},
{
Ports: []v1.ContainerPort{
{
ContainerPort: 8003,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8004,
Protocol: v1.ProtocolTCP,
},
},
},
},
},
},
pod2: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Ports: []v1.ContainerPort{
{
ContainerPort: 8011,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8012,
Protocol: v1.ProtocolTCP,
},
},
},
{
Ports: []v1.ContainerPort{
{
ContainerPort: 8013,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8014,
Protocol: v1.ProtocolTCP,
},
},
},
},
},
},
expected: []*v1.ContainerPort{
{
ContainerPort: 8001,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8002,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8003,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8004,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8011,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8012,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8013,
Protocol: v1.ProtocolTCP,
},
{
ContainerPort: 8014,
Protocol: v1.ProtocolTCP,
},
},
},
}
for _, test := range tests {
result := GetContainerPorts(test.pod1, test.pod2)
if !reflect.DeepEqual(test.expected, result) {
t.Errorf("Got different result than expected.\nDifference detected on:\n%s", diff.ObjectGoPrintSideBySide(test.expected, result))
}
}
}

View File

@@ -7,12 +7,10 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/controller/volume/persistentvolume:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/informers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/informers/storage/v1:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
],
)

View File

@@ -20,19 +20,15 @@ import (
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
coreinformers "k8s.io/client-go/informers/core/v1"
storageinformers "k8s.io/client-go/informers/storage/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
)
// VolumeBinder sets up the volume binding library and manages
// the volume binding operations with a queue.
// VolumeBinder sets up the volume binding library
type VolumeBinder struct {
Binder persistentvolume.SchedulerVolumeBinder
BindQueue *workqueue.Type
Binder persistentvolume.SchedulerVolumeBinder
}
// NewVolumeBinder sets up the volume binding library and binding queue
@@ -40,30 +36,21 @@ func NewVolumeBinder(
client clientset.Interface,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
pvInformer coreinformers.PersistentVolumeInformer,
storageClassInformer storageinformers.StorageClassInformer) *VolumeBinder {
storageClassInformer storageinformers.StorageClassInformer,
bindTimeout time.Duration) *VolumeBinder {
return &VolumeBinder{
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer),
BindQueue: workqueue.NewNamed("podsToBind"),
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer, bindTimeout),
}
}
// NewFakeVolumeBinder sets up a fake volume binder and binding queue
func NewFakeVolumeBinder(config *persistentvolume.FakeVolumeBinderConfig) *VolumeBinder {
return &VolumeBinder{
Binder: persistentvolume.NewFakeVolumeBinder(config),
BindQueue: workqueue.NewNamed("podsToBind"),
Binder: persistentvolume.NewFakeVolumeBinder(config),
}
}
// Run starts a goroutine to handle the binding queue with the given function.
func (b *VolumeBinder) Run(bindWorkFunc func(), stopCh <-chan struct{}) {
go wait.Until(bindWorkFunc, time.Second, stopCh)
<-stopCh
b.BindQueue.ShutDown()
}
// DeletePodBindings will delete the cached volume bindings for the given pod.
func (b *VolumeBinder) DeletePodBindings(pod *v1.Pod) {
cache := b.Binder.GetBindingsCache()