Merge pull request #585 from nearora-msft/implement-distributed-snapshotting

Implement distributed snapshotting
This commit is contained in:
Kubernetes Prow Robot
2021-12-25 06:05:15 -08:00
committed by GitHub
16 changed files with 870 additions and 14 deletions

View File

@@ -109,6 +109,20 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh
* `--port`: Secure port that the webhook listens on (default 443)
### Distributed Snapshotting
The distributed snapshotting feature is provided to handle snapshot operations for local volumes. To use this functionality, the snapshotter sidecar should be deployed along with the csi driver on each node so that every node manages the snapshot operations only for the volumes local to that node. This feature can be enabled by setting the following command line options to true:
#### Snapshot controller option
* `--enable-distributed-snapshotting`: This option lets the snapshot controller know that distributed snapshotting is enabled and the snapshotter sidecar will be running on each node. Off by default.
#### CSI external snapshotter sidecar option
* `--node-deployment`: Enables the snapshotter sidecar to handle snapshot operations for the volumes local to the node on which it is deployed. Off by default.
Other than this, the NODE_NAME environment variable must be set where the CSI snapshotter sidecar is deployed. The value of NODE_NAME should be the name of the node where the sidecar is running.
### Snapshot controller command line options
#### Important optional arguments that are highly recommended to be used
@@ -134,7 +148,9 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh
* `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second.
*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
* `--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
* `--enable-distributed-snapshotting` : Enables each node to handle snapshots for the volumes local to that node. Off by default. It should be set to true only if `--node-deployment` parameter for the csi external snapshotter sidecar is set to true. See https://github.com/kubernetes-csi/external-snapshotter/blob/master/README.md#distributed-snapshotting for details.
#### Other recognized arguments
* `--kubeconfig <path>`: Path to Kubernetes client configuration that the snapshot controller uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the snapshot controller does not run as a Kubernetes pod, e.g. for debugging.
@@ -172,9 +188,11 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh
* `--worker-threads`: Number of worker threads for running create snapshot and delete snapshot operations. Default value is 10.
* `--node-deployment`: Enables deploying the sidecar controller together with a CSI driver on nodes to manage node-local volumes. Off by default. This should be set to true along with the `--enable-distributed-snapshotting` in the snapshot controller parameters to make use of distributed snapshotting. See https://github.com/kubernetes-csi/external-snapshotter/blob/master/README.md#distributed-snapshotting for details.
* `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second.
*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
* `--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
#### Other recognized arguments
* `--kubeconfig <path>`: Path to Kubernetes client configuration that the CSI external-snapshotter uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the external-snapshotter does not run as a Kubernetes pod, e.g. for debugging.

View File

@@ -26,8 +26,12 @@ import (
"strings"
"time"
utils "github.com/kubernetes-csi/external-snapshotter/v4/pkg/utils"
"google.golang.org/grpc"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
@@ -75,11 +79,12 @@ var (
kubeAPIQPS = flag.Float64("kube-api-qps", 5, "QPS to use while communicating with the kubernetes apiserver. Defaults to 5.0.")
kubeAPIBurst = flag.Int("kube-api-burst", 10, "Burst to use while communicating with the kubernetes apiserver. Defaults to 10.")
metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
enableNodeDeployment = flag.Bool("node-deployment", false, "Enables deploying the sidecar controller together with a CSI driver on nodes to manage snapshots for node-local volumes.")
)
var (
@@ -98,6 +103,12 @@ func main() {
}
klog.Infof("Version: %s", version)
// If distributed snapshotting is enabled and leaderElection is also set to true, return
if *enableNodeDeployment && *leaderElection {
klog.Error("Leader election cannot happen when node-deployment is set to true")
os.Exit(1)
}
// Create the client config. Use kubeconfig if given, otherwise assume in-cluster.
config, err := buildConfig(*kubeconfig)
if err != nil {
@@ -122,6 +133,19 @@ func main() {
factory := informers.NewSharedInformerFactory(snapClient, *resyncPeriod)
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, *resyncPeriod)
var snapshotContentfactory informers.SharedInformerFactory
if *enableNodeDeployment {
node := os.Getenv("NODE_NAME")
if node == "" {
klog.Fatal("The NODE_NAME environment variable must be set when using --enable-node-deployment.")
}
snapshotContentfactory = informers.NewSharedInformerFactoryWithOptions(snapClient, *resyncPeriod, informers.WithTweakListOptions(func(lo *v1.ListOptions) {
lo.LabelSelector = labels.Set{utils.VolumeSnapshotContentManagedByLabel: node}.AsSelector().String()
}),
)
} else {
snapshotContentfactory = factory
}
// Add Snapshot types to the default Kubernetes so events can be logged for them
snapshotscheme.AddToScheme(scheme.Scheme)
@@ -202,7 +226,7 @@ func main() {
snapClient,
kubeClient,
driverName,
factory.Snapshot().V1().VolumeSnapshotContents(),
snapshotContentfactory.Snapshot().V1().VolumeSnapshotContents(),
factory.Snapshot().V1().VolumeSnapshotClasses(),
snapShotter,
*csiTimeout,
@@ -216,6 +240,7 @@ func main() {
run := func(context.Context) {
// run...
stopCh := make(chan struct{})
snapshotContentfactory.Start(stopCh)
factory.Start(stopCh)
coreFactory.Start(stopCh)
go ctrl.Run(*threads, stopCh)

View File

@@ -27,6 +27,7 @@ import (
"sync"
"time"
v1 "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
@@ -64,10 +65,11 @@ var (
kubeAPIQPS = flag.Float64("kube-api-qps", 5, "QPS to use while communicating with the kubernetes apiserver. Defaults to 5.0.")
kubeAPIBurst = flag.Int("kube-api-burst", 10, "Burst to use while communicating with the kubernetes apiserver. Defaults to 10.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
enableDistributedSnapshotting = flag.Bool("enable-distributed-snapshotting", false, "Enables each node to handle snapshotting for the local volumes created on that node")
)
var (
@@ -147,6 +149,11 @@ func main() {
factory := informers.NewSharedInformerFactory(snapClient, *resyncPeriod)
coreFactory := coreinformers.NewSharedInformerFactory(kubeClient, *resyncPeriod)
var nodeInformer v1.NodeInformer
if *enableDistributedSnapshotting {
nodeInformer = coreFactory.Core().V1().Nodes()
}
// Create and register metrics manager
metricsManager := metrics.NewMetricsManager()
@@ -174,10 +181,12 @@ func main() {
factory.Snapshot().V1().VolumeSnapshotContents(),
factory.Snapshot().V1().VolumeSnapshotClasses(),
coreFactory.Core().V1().PersistentVolumeClaims(),
nodeInformer,
metricsManager,
*resyncPeriod,
workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax),
workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax),
*enableDistributedSnapshotting,
)
if err := ensureCustomResourceDefinitionsExist(snapClient); err != nil {

View File

@@ -44,7 +44,10 @@ rules:
- apiGroups: ["snapshot.storage.k8s.io"]
resources: ["volumesnapshots/status"]
verbs: ["update", "patch"]
# Enable this RBAC rule only when using distributed snapshotting, i.e. when the enable-distributed-snapshotting flag is set to true
# - apiGroups: [""]
# resources: ["nodes"]
# verbs: ["get", "list", "watch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1

1
go.mod
View File

@@ -24,6 +24,7 @@ require (
k8s.io/apimachinery v0.23.0
k8s.io/client-go v0.23.0
k8s.io/component-base v0.23.0
k8s.io/component-helpers v0.23.0
k8s.io/klog/v2 v2.30.0
k8s.io/kubernetes v1.23.0
)

1
go.sum
View File

@@ -1100,6 +1100,7 @@ k8s.io/cluster-bootstrap v0.23.0/go.mod h1:VltEnKWfrRTiKgOXp3ts3vh7yqNlH6KFKFflo
k8s.io/code-generator v0.23.0/go.mod h1:vQvOhDXhuzqiVfM/YHp+dmg10WDZCchJVObc9MvowsE=
k8s.io/component-base v0.23.0 h1:UAnyzjvVZ2ZR1lF35YwtNY6VMN94WtOnArcXBu34es8=
k8s.io/component-base v0.23.0/go.mod h1:DHH5uiFvLC1edCpvcTDV++NKULdYYU6pR9Tt3HIKMKI=
k8s.io/component-helpers v0.23.0 h1:qNbqN10QTefiWcCOPkHL/0nn81sdKVv6ZgEXcSyot/U=
k8s.io/component-helpers v0.23.0/go.mod h1:liXMh6FZS4qamKtMJQ7uLHnFe3tlC86RX5mJEk/aerg=
k8s.io/controller-manager v0.23.0/go.mod h1:6/IKItSv6p9FY3mSbHgsOYmt4y+HDxiC5hEFg9rJVc8=
k8s.io/cri-api v0.23.0/go.mod h1:2edENu3/mkyW3c6fVPPPaVGEFbLRacJizBbSp7ZOLOo=

View File

@@ -838,10 +838,12 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte
informerFactory.Snapshot().V1().VolumeSnapshotContents(),
informerFactory.Snapshot().V1().VolumeSnapshotClasses(),
coreFactory.Core().V1().PersistentVolumeClaims(),
nil,
metricsManager,
60*time.Second,
workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute),
workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute),
false,
)
ctrl.eventRecorder = record.NewFakeRecorder(1000)

View File

@@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes/scheme"
ref "k8s.io/client-go/tools/reference"
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
klog "k8s.io/klog/v2"
crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
@@ -671,6 +672,18 @@ func (ctrl *csiSnapshotCommonController) createSnapshotContent(snapshot *crdv1.V
},
}
if ctrl.enableDistributedSnapshotting {
nodeName, err := ctrl.getManagedByNode(volume)
if err != nil {
return nil, err
}
if nodeName != "" {
snapshotContent.Labels = map[string]string{
utils.VolumeSnapshotContentManagedByLabel: nodeName,
}
}
}
// Set AnnDeletionSecretRefName and AnnDeletionSecretRefNamespace
if snapshotterSecretRef != nil {
klog.V(5).Infof("createSnapshotContent: set annotation [%s] on content [%s].", utils.AnnDeletionSecretRefName, snapshotContent.Name)
@@ -1655,3 +1668,27 @@ func (ctrl *csiSnapshotCommonController) checkAndSetInvalidSnapshotLabel(snapsho
return updatedSnapshot, nil
}
func (ctrl *csiSnapshotCommonController) getManagedByNode(pv *v1.PersistentVolume) (string, error) {
if pv.Spec.NodeAffinity == nil {
klog.V(5).Infof("NodeAffinity not set for pv %s", pv.Name)
return "", nil
}
nodeSelectorTerms := pv.Spec.NodeAffinity.Required
nodes, err := ctrl.nodeLister.List(labels.Everything())
if err != nil {
klog.Errorf("failed to get the list of nodes: %q", err)
return "", err
}
for _, node := range nodes {
match, _ := corev1helpers.MatchNodeSelectorTerms(node, nodeSelectorTerms)
if match {
return node.Name, nil
}
}
klog.Errorf("failed to find nodes that match the node affinity requirements for pv[%s]", pv.Name)
return "", nil
}

View File

@@ -57,6 +57,8 @@ type csiSnapshotCommonController struct {
classListerSynced cache.InformerSynced
pvcLister corelisters.PersistentVolumeClaimLister
pvcListerSynced cache.InformerSynced
nodeLister corelisters.NodeLister
nodeListerSynced cache.InformerSynced
snapshotStore cache.Store
contentStore cache.Store
@@ -64,6 +66,8 @@ type csiSnapshotCommonController struct {
metricsManager metrics.MetricsManager
resyncPeriod time.Duration
enableDistributedSnapshotting bool
}
// NewCSISnapshotController returns a new *csiSnapshotCommonController
@@ -74,10 +78,12 @@ func NewCSISnapshotCommonController(
volumeSnapshotContentInformer storageinformers.VolumeSnapshotContentInformer,
volumeSnapshotClassInformer storageinformers.VolumeSnapshotClassInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
nodeInformer coreinformers.NodeInformer,
metricsManager metrics.MetricsManager,
resyncPeriod time.Duration,
snapshotRateLimiter workqueue.RateLimiter,
contentRateLimiter workqueue.RateLimiter,
enableDistributedSnapshotting bool,
) *csiSnapshotCommonController {
broadcaster := record.NewBroadcaster()
broadcaster.StartLogging(klog.Infof)
@@ -125,6 +131,13 @@ func NewCSISnapshotCommonController(
ctrl.classLister = volumeSnapshotClassInformer.Lister()
ctrl.classListerSynced = volumeSnapshotClassInformer.Informer().HasSynced
ctrl.enableDistributedSnapshotting = enableDistributedSnapshotting
if enableDistributedSnapshotting {
ctrl.nodeLister = nodeInformer.Lister()
ctrl.nodeListerSynced = nodeInformer.Informer().HasSynced
}
return ctrl
}
@@ -135,7 +148,12 @@ func (ctrl *csiSnapshotCommonController) Run(workers int, stopCh <-chan struct{}
klog.Infof("Starting snapshot controller")
defer klog.Infof("Shutting snapshot controller")
if !cache.WaitForCacheSync(stopCh, ctrl.snapshotListerSynced, ctrl.contentListerSynced, ctrl.classListerSynced, ctrl.pvcListerSynced) {
informersSynced := []cache.InformerSynced{ctrl.snapshotListerSynced, ctrl.contentListerSynced, ctrl.classListerSynced, ctrl.pvcListerSynced}
if ctrl.enableDistributedSnapshotting {
informersSynced = append(informersSynced, ctrl.nodeListerSynced)
}
if !cache.WaitForCacheSync(stopCh, informersSynced...) {
klog.Errorf("Cannot sync caches")
return
}

View File

@@ -21,11 +21,28 @@ import (
crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
"github.com/kubernetes-csi/external-snapshotter/v4/pkg/utils"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/tools/cache"
)
var deletionPolicy = crdv1.VolumeSnapshotContentDelete
type FakeNodeLister struct {
NodeList []*v1.Node
}
// List lists all Nodes in the indexer.
// Objects returned here must be treated as read-only.
func (l FakeNodeLister) List(selector labels.Selector) (ret []*v1.Node, err error) {
return l.NodeList, nil
}
func (l FakeNodeLister) Get(name string) (*v1.Node, error) {
return nil, nil
}
func storeVersion(t *testing.T, prefix string, c cache.Store, version string, expectedReturn bool) {
content := newContent("contentName", "snapuid1-1", "snap1-1", "sid1-1", classGold, "", "pv-handle-1-1", deletionPolicy, nil, nil, false, true)
content.ResourceVersion = version
@@ -92,3 +109,71 @@ func TestControllerCacheParsingError(t *testing.T) {
t.Errorf("Expected parsing error, got nil instead")
}
}
func TestGetManagedByNode(t *testing.T) {
// Test that a matching node is found
node1 := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Labels: map[string]string{"key1": "value1"},
},
}
node2 := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
Labels: map[string]string{"key2": "value2"},
},
}
ctrl := &csiSnapshotCommonController{
nodeLister: FakeNodeLister{NodeList: []*v1.Node{node1, node2}},
}
pv := &v1.PersistentVolume{
Spec: v1.PersistentVolumeSpec{
NodeAffinity: &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "key1",
Operator: v1.NodeSelectorOpIn,
Values: []string{"value1"},
},
},
},
},
},
},
},
}
nodeName, err := ctrl.getManagedByNode(pv)
if err != nil {
t.Errorf("Unexpected error occurred: %v", err)
}
if nodeName != "node1" {
t.Errorf("Expected node:%s , Found node: %s instead", "node1", nodeName)
}
// Test that no matching node is found
node1 = &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
}
ctrl = &csiSnapshotCommonController{
nodeLister: FakeNodeLister{NodeList: []*v1.Node{node1}},
}
nodeName, _ = ctrl.getManagedByNode(pv)
if nodeName != "" {
t.Errorf("Expected no node, Found node(%s)", nodeName)
}
}

View File

@@ -107,6 +107,9 @@ const (
// VolumeSnapshotInvalidLabel is applied to invalid snapshot as a label key. The value does not matter.
// See https://github.com/kubernetes/enhancements/blob/master/keps/sig-storage/177-volume-snapshot/tighten-validation-webhook-crd.md#automatic-labelling-of-invalid-objects
VolumeSnapshotInvalidLabel = "snapshot.storage.kubernetes.io/invalid-snapshot-resource"
// VolumeSnapshotContentManagedByLabel is applied by the snapshot controller to the VolumeSnapshotContent object in case distributed snapshotting is enabled.
// The value contains the name of the node that handles the snapshot for the volume local to that node.
VolumeSnapshotContentManagedByLabel = "snapshot.storage.kubernetes.io/managed-by"
)
var SnapshotterSecretParams = secretParamsMap{

202
vendor/k8s.io/component-helpers/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,23 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package corev1 defines functions which should satisfy one of the following:
//
// - Be used by more than one core component (kube-scheduler, kubelet, kube-apiserver, etc.)
// - Be used by a core component and another kubernetes project (cluster-autoscaler, descheduler)
//
// And be a scheduling feature.
package corev1 // import "k8s.io/component-helpers/scheduling/corev1"

View File

@@ -0,0 +1,101 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package corev1
import (
"encoding/json"
v1 "k8s.io/api/core/v1"
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
)
// PodPriority returns priority of the given pod.
func PodPriority(pod *v1.Pod) int32 {
if pod.Spec.Priority != nil {
return *pod.Spec.Priority
}
// When priority of a running pod is nil, it means it was created at a time
// that there was no global default priority class and the priority class
// name of the pod was empty. So, we resolve to the static default priority.
return 0
}
// MatchNodeSelectorTerms checks whether the node labels and fields match node selector terms in ORed;
// nil or empty term matches no objects.
func MatchNodeSelectorTerms(
node *v1.Node,
nodeSelector *v1.NodeSelector,
) (bool, error) {
if node == nil {
return false, nil
}
return nodeaffinity.NewLazyErrorNodeSelector(nodeSelector).Match(node)
}
// GetAvoidPodsFromNodeAnnotations scans the list of annotations and
// returns the pods that needs to be avoided for this node from scheduling
func GetAvoidPodsFromNodeAnnotations(annotations map[string]string) (v1.AvoidPods, error) {
var avoidPods v1.AvoidPods
if len(annotations) > 0 && annotations[v1.PreferAvoidPodsAnnotationKey] != "" {
err := json.Unmarshal([]byte(annotations[v1.PreferAvoidPodsAnnotationKey]), &avoidPods)
if err != nil {
return avoidPods, err
}
}
return avoidPods, nil
}
// TolerationsTolerateTaint checks if taint is tolerated by any of the tolerations.
func TolerationsTolerateTaint(tolerations []v1.Toleration, taint *v1.Taint) bool {
for i := range tolerations {
if tolerations[i].ToleratesTaint(taint) {
return true
}
}
return false
}
type taintsFilterFunc func(*v1.Taint) bool
// FindMatchingUntoleratedTaint checks if the given tolerations tolerates
// all the filtered taints, and returns the first taint without a toleration
// Returns true if there is an untolerated taint
// Returns false if all taints are tolerated
func FindMatchingUntoleratedTaint(taints []v1.Taint, tolerations []v1.Toleration, inclusionFilter taintsFilterFunc) (v1.Taint, bool) {
filteredTaints := getFilteredTaints(taints, inclusionFilter)
for _, taint := range filteredTaints {
if !TolerationsTolerateTaint(tolerations, &taint) {
return taint, true
}
}
return v1.Taint{}, false
}
// getFilteredTaints returns a list of taints satisfying the filter predicate
func getFilteredTaints(taints []v1.Taint, inclusionFilter taintsFilterFunc) []v1.Taint {
if inclusionFilter == nil {
return taints
}
filteredTaints := []v1.Taint{}
for _, taint := range taints {
if !inclusionFilter(&taint) {
continue
}
filteredTaints = append(filteredTaints, taint)
}
return filteredTaints
}

View File

@@ -0,0 +1,324 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeaffinity
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/validation/field"
)
// NodeSelector is a runtime representation of v1.NodeSelector.
type NodeSelector struct {
lazy LazyErrorNodeSelector
}
// LazyErrorNodeSelector is a runtime representation of v1.NodeSelector that
// only reports parse errors when no terms match.
type LazyErrorNodeSelector struct {
terms []nodeSelectorTerm
}
// NewNodeSelector returns a NodeSelector or aggregate parsing errors found.
func NewNodeSelector(ns *v1.NodeSelector, opts ...field.PathOption) (*NodeSelector, error) {
lazy := NewLazyErrorNodeSelector(ns, opts...)
var errs []error
for _, term := range lazy.terms {
if len(term.parseErrs) > 0 {
errs = append(errs, term.parseErrs...)
}
}
if len(errs) != 0 {
return nil, errors.Flatten(errors.NewAggregate(errs))
}
return &NodeSelector{lazy: *lazy}, nil
}
// NewLazyErrorNodeSelector creates a NodeSelector that only reports parse
// errors when no terms match.
func NewLazyErrorNodeSelector(ns *v1.NodeSelector, opts ...field.PathOption) *LazyErrorNodeSelector {
p := field.ToPath(opts...)
parsedTerms := make([]nodeSelectorTerm, 0, len(ns.NodeSelectorTerms))
path := p.Child("nodeSelectorTerms")
for i, term := range ns.NodeSelectorTerms {
// nil or empty term selects no objects
if isEmptyNodeSelectorTerm(&term) {
continue
}
p := path.Index(i)
parsedTerms = append(parsedTerms, newNodeSelectorTerm(&term, p))
}
return &LazyErrorNodeSelector{
terms: parsedTerms,
}
}
// Match checks whether the node labels and fields match the selector terms, ORed;
// nil or empty term matches no objects.
func (ns *NodeSelector) Match(node *v1.Node) bool {
// parse errors are reported in NewNodeSelector.
match, _ := ns.lazy.Match(node)
return match
}
// Match checks whether the node labels and fields match the selector terms, ORed;
// nil or empty term matches no objects.
// Parse errors are only returned if no terms matched.
func (ns *LazyErrorNodeSelector) Match(node *v1.Node) (bool, error) {
if node == nil {
return false, nil
}
nodeLabels := labels.Set(node.Labels)
nodeFields := extractNodeFields(node)
var errs []error
for _, term := range ns.terms {
match, tErrs := term.match(nodeLabels, nodeFields)
if len(tErrs) > 0 {
errs = append(errs, tErrs...)
continue
}
if match {
return true, nil
}
}
return false, errors.Flatten(errors.NewAggregate(errs))
}
// PreferredSchedulingTerms is a runtime representation of []v1.PreferredSchedulingTerms.
type PreferredSchedulingTerms struct {
terms []preferredSchedulingTerm
}
// NewPreferredSchedulingTerms returns a PreferredSchedulingTerms or all the parsing errors found.
// If a v1.PreferredSchedulingTerm has a 0 weight, its parsing is skipped.
func NewPreferredSchedulingTerms(terms []v1.PreferredSchedulingTerm, opts ...field.PathOption) (*PreferredSchedulingTerms, error) {
p := field.ToPath(opts...)
var errs []error
parsedTerms := make([]preferredSchedulingTerm, 0, len(terms))
for i, term := range terms {
path := p.Index(i)
if term.Weight == 0 || isEmptyNodeSelectorTerm(&term.Preference) {
continue
}
parsedTerm := preferredSchedulingTerm{
nodeSelectorTerm: newNodeSelectorTerm(&term.Preference, path),
weight: int(term.Weight),
}
if len(parsedTerm.parseErrs) > 0 {
errs = append(errs, parsedTerm.parseErrs...)
} else {
parsedTerms = append(parsedTerms, parsedTerm)
}
}
if len(errs) != 0 {
return nil, errors.Flatten(errors.NewAggregate(errs))
}
return &PreferredSchedulingTerms{terms: parsedTerms}, nil
}
// Score returns a score for a Node: the sum of the weights of the terms that
// match the Node.
func (t *PreferredSchedulingTerms) Score(node *v1.Node) int64 {
var score int64
nodeLabels := labels.Set(node.Labels)
nodeFields := extractNodeFields(node)
for _, term := range t.terms {
// parse errors are reported in NewPreferredSchedulingTerms.
if ok, _ := term.match(nodeLabels, nodeFields); ok {
score += int64(term.weight)
}
}
return score
}
func isEmptyNodeSelectorTerm(term *v1.NodeSelectorTerm) bool {
return len(term.MatchExpressions) == 0 && len(term.MatchFields) == 0
}
func extractNodeFields(n *v1.Node) fields.Set {
f := make(fields.Set)
if len(n.Name) > 0 {
f["metadata.name"] = n.Name
}
return f
}
type nodeSelectorTerm struct {
matchLabels labels.Selector
matchFields fields.Selector
parseErrs []error
}
func newNodeSelectorTerm(term *v1.NodeSelectorTerm, path *field.Path) nodeSelectorTerm {
var parsedTerm nodeSelectorTerm
var errs []error
if len(term.MatchExpressions) != 0 {
p := path.Child("matchExpressions")
parsedTerm.matchLabels, errs = nodeSelectorRequirementsAsSelector(term.MatchExpressions, p)
if errs != nil {
parsedTerm.parseErrs = append(parsedTerm.parseErrs, errs...)
}
}
if len(term.MatchFields) != 0 {
p := path.Child("matchFields")
parsedTerm.matchFields, errs = nodeSelectorRequirementsAsFieldSelector(term.MatchFields, p)
if errs != nil {
parsedTerm.parseErrs = append(parsedTerm.parseErrs, errs...)
}
}
return parsedTerm
}
func (t *nodeSelectorTerm) match(nodeLabels labels.Set, nodeFields fields.Set) (bool, []error) {
if t.parseErrs != nil {
return false, t.parseErrs
}
if t.matchLabels != nil && !t.matchLabels.Matches(nodeLabels) {
return false, nil
}
if t.matchFields != nil && len(nodeFields) > 0 && !t.matchFields.Matches(nodeFields) {
return false, nil
}
return true, nil
}
// nodeSelectorRequirementsAsSelector converts the []NodeSelectorRequirement api type into a struct that implements
// labels.Selector.
func nodeSelectorRequirementsAsSelector(nsm []v1.NodeSelectorRequirement, path *field.Path) (labels.Selector, []error) {
if len(nsm) == 0 {
return labels.Nothing(), nil
}
var errs []error
selector := labels.NewSelector()
for i, expr := range nsm {
p := path.Index(i)
var op selection.Operator
switch expr.Operator {
case v1.NodeSelectorOpIn:
op = selection.In
case v1.NodeSelectorOpNotIn:
op = selection.NotIn
case v1.NodeSelectorOpExists:
op = selection.Exists
case v1.NodeSelectorOpDoesNotExist:
op = selection.DoesNotExist
case v1.NodeSelectorOpGt:
op = selection.GreaterThan
case v1.NodeSelectorOpLt:
op = selection.LessThan
default:
errs = append(errs, field.NotSupported(p.Child("operator"), expr.Operator, nil))
continue
}
r, err := labels.NewRequirement(expr.Key, op, expr.Values, field.WithPath(p))
if err != nil {
errs = append(errs, err)
} else {
selector = selector.Add(*r)
}
}
if len(errs) != 0 {
return nil, errs
}
return selector, nil
}
var validFieldSelectorOperators = []string{
string(v1.NodeSelectorOpIn),
string(v1.NodeSelectorOpNotIn),
}
// nodeSelectorRequirementsAsFieldSelector converts the []NodeSelectorRequirement core type into a struct that implements
// fields.Selector.
func nodeSelectorRequirementsAsFieldSelector(nsr []v1.NodeSelectorRequirement, path *field.Path) (fields.Selector, []error) {
if len(nsr) == 0 {
return fields.Nothing(), nil
}
var errs []error
var selectors []fields.Selector
for i, expr := range nsr {
p := path.Index(i)
switch expr.Operator {
case v1.NodeSelectorOpIn:
if len(expr.Values) != 1 {
errs = append(errs, field.Invalid(p.Child("values"), expr.Values, "must have one element"))
} else {
selectors = append(selectors, fields.OneTermEqualSelector(expr.Key, expr.Values[0]))
}
case v1.NodeSelectorOpNotIn:
if len(expr.Values) != 1 {
errs = append(errs, field.Invalid(p.Child("values"), expr.Values, "must have one element"))
} else {
selectors = append(selectors, fields.OneTermNotEqualSelector(expr.Key, expr.Values[0]))
}
default:
errs = append(errs, field.NotSupported(p.Child("operator"), expr.Operator, validFieldSelectorOperators))
}
}
if len(errs) != 0 {
return nil, errs
}
return fields.AndSelectors(selectors...), nil
}
type preferredSchedulingTerm struct {
nodeSelectorTerm
weight int
}
type RequiredNodeAffinity struct {
labelSelector labels.Selector
nodeSelector *LazyErrorNodeSelector
}
// GetRequiredNodeAffinity returns the parsing result of pod's nodeSelector and nodeAffinity.
func GetRequiredNodeAffinity(pod *v1.Pod) RequiredNodeAffinity {
var selector labels.Selector
if len(pod.Spec.NodeSelector) > 0 {
selector = labels.SelectorFromSet(pod.Spec.NodeSelector)
}
// Use LazyErrorNodeSelector for backwards compatibility of parsing errors.
var affinity *LazyErrorNodeSelector
if pod.Spec.Affinity != nil &&
pod.Spec.Affinity.NodeAffinity != nil &&
pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
affinity = NewLazyErrorNodeSelector(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution)
}
return RequiredNodeAffinity{labelSelector: selector, nodeSelector: affinity}
}
// Match checks whether the pod is schedulable onto nodes according to
// the requirements in both nodeSelector and nodeAffinity.
func (s RequiredNodeAffinity) Match(node *v1.Node) (bool, error) {
if s.labelSelector != nil {
if !s.labelSelector.Matches(labels.Set(node.Labels)) {
return false, nil
}
}
if s.nodeSelector != nil {
return s.nodeSelector.Match(node)
}
return true, nil
}

4
vendor/modules.txt vendored
View File

@@ -638,6 +638,10 @@ k8s.io/client-go/util/workqueue
## explicit; go 1.16
k8s.io/component-base/metrics
k8s.io/component-base/version
# k8s.io/component-helpers v0.23.0 => k8s.io/component-helpers v0.23.0
## explicit; go 1.16
k8s.io/component-helpers/scheduling/corev1
k8s.io/component-helpers/scheduling/corev1/nodeaffinity
# k8s.io/klog v1.0.0
## explicit; go 1.12
k8s.io/klog