diff --git a/config/crd/bases/config.nri_balloonspolicies.yaml b/config/crd/bases/config.nri_balloonspolicies.yaml index c0601b9f0..2ce11773a 100644 --- a/config/crd/bases/config.nri_balloonspolicies.yaml +++ b/config/crd/bases/config.nri_balloonspolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object allocatorTopologyBalancing: description: |- diff --git a/config/crd/bases/config.nri_templatepolicies.yaml b/config/crd/bases/config.nri_templatepolicies.yaml index 125e5ccb7..938c4cd73 100644 --- a/config/crd/bases/config.nri_templatepolicies.yaml +++ b/config/crd/bases/config.nri_templatepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/config/crd/bases/config.nri_topologyawarepolicies.yaml b/config/crd/bases/config.nri_topologyawarepolicies.yaml index 5d2e6e100..1ac7af5f9 100644 --- a/config/crd/bases/config.nri_topologyawarepolicies.yaml +++ b/config/crd/bases/config.nri_topologyawarepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index c0601b9f0..2ce11773a 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object allocatorTopologyBalancing: description: |- diff --git a/deployment/helm/balloons/templates/daemonset.yaml b/deployment/helm/balloons/templates/daemonset.yaml index 7a60a59bc..50f3a0d2d 100644 --- a/deployment/helm/balloons/templates/daemonset.yaml +++ b/deployment/helm/balloons/templates/daemonset.yaml @@ -117,6 +117,9 @@ spec: mountPath: /var/run/nri-resource-policy - name: nrisockets mountPath: /var/run/nri + - name: pod-resources-socket + mountPath: /var/lib/kubelet/pod-resources + readOnly: true {{- if .Values.podPriorityClassNodeCritical }} priorityClassName: system-node-critical {{- end }} @@ -136,6 +139,10 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate + - name: pod-resources-socket + hostPath: + path: /var/lib/kubelet/pod-resources + type: DirectoryOrCreate {{- if .Values.nri.runtime.patchConfig }} - name: containerd-config hostPath: diff --git a/deployment/helm/template/crds/config.nri_templatepolicies.yaml b/deployment/helm/template/crds/config.nri_templatepolicies.yaml index 125e5ccb7..938c4cd73 100644 --- a/deployment/helm/template/crds/config.nri_templatepolicies.yaml +++ b/deployment/helm/template/crds/config.nri_templatepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/deployment/helm/template/templates/daemonset.yaml b/deployment/helm/template/templates/daemonset.yaml index 626ba2837..67b9d5687 100644 --- a/deployment/helm/template/templates/daemonset.yaml +++ b/deployment/helm/template/templates/daemonset.yaml @@ -110,6 +110,9 @@ spec: mountPath: /var/run/nri-resource-policy - name: nrisockets mountPath: /var/run/nri + - name: pod-resources-socket + mountPath: /var/lib/kubelet/pod-resources + readOnly: true {{- if .Values.podPriorityClassNodeCritical }} priorityClassName: system-node-critical {{- end }} @@ -129,6 +132,10 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate + - name: pod-resources-socket + hostPath: + path: /var/lib/kubelet/pod-resources + type: DirectoryOrCreate {{- if .Values.nri.runtime.patchConfig }} - name: containerd-config hostPath: diff --git a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml index 5d2e6e100..1ac7af5f9 100644 --- a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml +++ b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml @@ -51,6 +51,10 @@ spec: NodeResourceTopology enables support for exporting resource usage using NodeResourceTopology Custom Resources. type: boolean + podResourceAPI: + description: PodResourceAPI enables support for querying kubelet + Pod Resource API. + type: boolean type: object availableResources: additionalProperties: diff --git a/deployment/helm/topology-aware/templates/daemonset.yaml b/deployment/helm/topology-aware/templates/daemonset.yaml index d9cffa393..5e956518b 100644 --- a/deployment/helm/topology-aware/templates/daemonset.yaml +++ b/deployment/helm/topology-aware/templates/daemonset.yaml @@ -117,6 +117,9 @@ spec: mountPath: /var/run/nri-resource-policy - name: nrisockets mountPath: /var/run/nri + - name: pod-resources-socket + mountPath: /var/lib/kubelet/pod-resources + readOnly: true {{- if .Values.podPriorityClassNodeCritical }} priorityClassName: system-node-critical {{- end }} @@ -136,6 +139,10 @@ spec: hostPath: path: /var/run/nri type: DirectoryOrCreate + - name: pod-resources-socket + hostPath: + path: /var/lib/kubelet/pod-resources + type: DirectoryOrCreate {{- if .Values.nri.runtime.patchConfig }} - name: containerd-config hostPath: diff --git a/go.mod b/go.mod index cdef4cb07..3b60e206d 100644 --- a/go.mod +++ b/go.mod @@ -16,8 +16,8 @@ require ( github.com/onsi/ginkgo/v2 v2.19.0 github.com/onsi/gomega v1.33.1 github.com/pelletier/go-toml/v2 v2.1.0 - github.com/prometheus/client_golang v1.17.0 - github.com/prometheus/client_model v0.5.0 + github.com/prometheus/client_golang v1.19.1 + github.com/prometheus/client_model v0.6.1 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 go.opentelemetry.io/otel v1.19.0 @@ -27,11 +27,13 @@ require ( go.opentelemetry.io/otel/trace v1.19.0 golang.org/x/sys v0.21.0 golang.org/x/time v0.3.0 + google.golang.org/grpc v1.65.0 k8s.io/api v0.31.2 k8s.io/apimachinery v0.31.2 k8s.io/client-go v0.31.2 k8s.io/code-generator v0.31.2 k8s.io/klog/v2 v2.130.1 + k8s.io/kubelet v0.31.2 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.16.2 sigs.k8s.io/yaml v1.4.0 @@ -40,7 +42,7 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect @@ -63,15 +65,14 @@ require ( github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect - github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/common v0.44.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/x448/float16 v0.8.4 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect @@ -84,15 +85,14 @@ require ( golang.org/x/term v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20231009173412-8bfb1ae86b6c // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231009173412-8bfb1ae86b6c // indirect - google.golang.org/grpc v1.58.3 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/cri-api v0.25.4 // indirect + k8s.io/cri-api v0.31.2 // indirect k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/go.sum b/go.sum index 8a1b7ea33..3900efd13 100644 --- a/go.sum +++ b/go.sum @@ -619,8 +619,9 @@ github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -713,8 +714,9 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= +github.com/golang/glog v1.2.1 h1:OptwRhECazUx5ix5TTWC3EZhsZEHWcYWY4FQHTIubm4= +github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -867,8 +869,6 @@ github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= -github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= -github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -899,18 +899,18 @@ github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qR github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= -github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= -github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -1561,16 +1561,14 @@ google.golang.org/genproto v0.0.0-20230330154414-c0448cd141ea/go.mod h1:UUQDJDOl google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOlWu4KYeJZffbWgBkS1YFobzKbLVfK69pe0Ak= google.golang.org/genproto v0.0.0-20230525234025-438c736192d0/go.mod h1:9ExIQyXL5hZrHzQceCwuSYwZZ5QZBazOcprJ5rgs3lY= google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54/go.mod h1:zqTuNwFlFRsw5zIts5VnzLQxSRqh+CGOTVMlYbY0Eyk= -google.golang.org/genproto v0.0.0-20231002182017-d307bd883b97 h1:SeZZZx0cP0fqUyA+oRzP9k7cSwJlvDFiROO72uwD6i0= -google.golang.org/genproto v0.0.0-20231002182017-d307bd883b97/go.mod h1:t1VqOqqvce95G3hIDCT5FeO3YUc6Q4Oe24L/+rNMxRk= google.golang.org/genproto/googleapis/api v0.0.0-20230525234020-1aefcd67740a/go.mod h1:ts19tUU+Z0ZShN1y3aPyq2+O3d5FUNNgT6FtOzmrNn8= google.golang.org/genproto/googleapis/api v0.0.0-20230525234035-dd9d682886f9/go.mod h1:vHYtlOoi6TsQ3Uk2yxR7NI5z8uoV+3pZtR4jmHIkRig= -google.golang.org/genproto/googleapis/api v0.0.0-20231009173412-8bfb1ae86b6c h1:0RtEmmHjemvUXloH7+RuBSIw7n+GEHMOMY1CkGYnWq4= -google.golang.org/genproto/googleapis/api v0.0.0-20231009173412-8bfb1ae86b6c/go.mod h1:Wth13BrWMRN/G+guBLupKa6fslcWZv14R0ZKDRkNfY8= +google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 h1:7whR9kGa5LUwFtpLm2ArCEejtnxlGeLbAyjFY8sGNFw= +google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157/go.mod h1:99sLkeliLXfdj2J75X3Ho+rrVCaJze0uwN7zDDkjPVU= google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234015-3fc162c6f38a/go.mod h1:xURIpW9ES5+/GZhnV6beoEtxQrnkRGIfP5VQG2tCBLc= google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231009173412-8bfb1ae86b6c h1:jHkCUWkseRf+W+edG5hMzr/Uh1xkDREY4caybAq4dpY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231009173412-8bfb1ae86b6c/go.mod h1:4cYg8o5yUbm77w8ZX00LhMVNl/YVBFJRYWDc0uYWMs0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1612,8 +1610,8 @@ google.golang.org/grpc v1.52.3/go.mod h1:pu6fVzoFb+NBYNAvQL08ic+lvB2IojljRYuun5v google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= -google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= -google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= +google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= @@ -1667,14 +1665,16 @@ k8s.io/client-go v0.31.2 h1:Y2F4dxU5d3AQj+ybwSMqQnpZH9F30//1ObxOKlTI9yc= k8s.io/client-go v0.31.2/go.mod h1:NPa74jSVR/+eez2dFsEIHNa+3o09vtNaWwWwb1qSxSs= k8s.io/code-generator v0.31.2 h1:xLWxG0HEpMSHfcM//3u3Ro2Hmc6AyyLINQS//Z2GEOI= k8s.io/code-generator v0.31.2/go.mod h1:eEQHXgBU/m7LDaToDoiz3t97dUUVyOblQdwOr8rivqc= -k8s.io/cri-api v0.25.4 h1:NgyuGXa4YDJsCV5UIQpaQPrv/pc9Jg9BcnR+xqGSf40= -k8s.io/cri-api v0.25.4/go.mod h1:riC/P0yOGUf2K1735wW+CXs1aY2ctBgePtnnoFLd0dU= +k8s.io/cri-api v0.31.2 h1:O/weUnSHvM59nTio0unxIUFyRHMRKkYn96YDILSQKmo= +k8s.io/cri-api v0.31.2/go.mod h1:Po3TMAYH/+KrZabi7QiwQI4a692oZcUOUThd/rqwxrI= k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo= k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/kubelet v0.31.2 h1:6Hytyw4LqWqhgzoi7sPfpDGClu2UfxmPmaiXPC4FRgI= +k8s.io/kubelet v0.31.2/go.mod h1:0E4++3cMWi2cJxOwuaQP3eMBa7PSOvAFgkTPlVc/2FA= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index f0f6b9cd5..660e083f0 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -30,6 +30,7 @@ import ( "k8s.io/client-go/tools/clientcmd" nrtapi "github.com/containers/nri-plugins/pkg/agent/nrtapi" + "github.com/containers/nri-plugins/pkg/agent/podresapi" "github.com/containers/nri-plugins/pkg/agent/watch" cfgapi "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1" k8sclient "k8s.io/client-go/kubernetes" @@ -126,11 +127,12 @@ type Agent struct { kubeConfig string // kubeconfig path configFile string // configuration file to use instead of custom resource - cfgIf ConfigInterface // custom resource access interface - httpCli *http.Client // shared HTTP client - k8sCli *k8sclient.Clientset // kubernetes client - nrtCli *nrtapi.Client // NRT custom resources client - nrtLock sync.Mutex // serialize NRT custom resource updates + cfgIf ConfigInterface // custom resource access interface + httpCli *http.Client // shared HTTP client + k8sCli *k8sclient.Clientset // kubernetes client + nrtCli *nrtapi.Client // NRT custom resources client + nrtLock sync.Mutex // serialize NRT custom resource updates + podResCli *podresapi.Client // pod resources API client notifyFn NotifyFn // config resource change notification callback nodeWatch watch.Interface // kubernetes node watch @@ -262,6 +264,68 @@ func (a *Agent) Stop() { } } +var ( + defaultConfig = &cfgapi.AgentConfig{ + NodeResourceTopology: true, + } +) + +func getAgentConfig(newConfig metav1.Object) *cfgapi.AgentConfig { + cfg := cfgapi.GetAgentConfig(newConfig) + if cfg == nil { + return defaultConfig + } + return cfg +} + +func (a *Agent) configure(newConfig metav1.Object) { + if a.hasLocalConfig() { + log.Warn("running with local configuration, skipping client setup...") + return + } + + cfg := getAgentConfig(newConfig) + + // Failure to create a client is not a fatal error. + switch { + case cfg.NodeResourceTopology && a.nrtCli == nil: + log.Info("enabling NRT client") + cfg, err := a.getRESTConfig() + if err != nil { + log.Error("failed to setup NRT client: %w", err) + break + } + cli, err := nrtapi.NewForConfigAndClient(cfg, a.httpCli) + if err != nil { + log.Error("failed to setup NRT client: %w", err) + break + } + a.nrtCli = cli + + case !cfg.NodeResourceTopology && a.nrtCli != nil: + log.Info("disabling NRT client") + a.nrtCli = nil + } + + // Reconfigure pod resource client, both on initial startup and reconfiguration. + // Failure to create a client is not a fatal error. + switch { + case cfg.PodResourceAPI && a.podResCli == nil: + log.Info("enabling PodResourceAPI client") + cli, err := podresapi.NewClient() + if err != nil { + log.Error("failed to setup PodResourceAPI client: %v", err) + break + } + a.podResCli = cli + + case !cfg.PodResourceAPI && a.podResCli != nil: + log.Info("disabling PodResourceAPI client") + a.podResCli.Close() + a.podResCli = nil + } +} + func (a *Agent) hasLocalConfig() bool { return a.configFile != "" } @@ -314,51 +378,6 @@ func (a *Agent) cleanupClients() { a.nrtCli = nil } -var ( - defaultConfig = &cfgapi.AgentConfig{ - NodeResourceTopology: true, - } -) - -func getAgentConfig(newConfig metav1.Object) *cfgapi.AgentConfig { - cfg := cfgapi.GetAgentConfig(newConfig) - if cfg == nil { - return defaultConfig - } - return cfg -} - -func (a *Agent) configure(newConfig metav1.Object) { - if a.hasLocalConfig() { - log.Warn("running with local configuration, skipping client setup...") - return - } - - cfg := getAgentConfig(newConfig) - - // Reconfigure NRT client, both on initial startup and reconfiguration. - // Failure to create a client is not a fatal error. - switch { - case cfg.NodeResourceTopology && a.nrtCli == nil: - log.Info("enabling NRT client") - cfg, err := a.getRESTConfig() - if err != nil { - log.Error("failed to setup NRT client: %w", err) - break - } - cli, err := nrtapi.NewForConfigAndClient(cfg, a.httpCli) - if err != nil { - log.Error("failed to setup NRT client: %w", err) - break - } - a.nrtCli = cli - - case !cfg.NodeResourceTopology && a.nrtCli != nil: - log.Info("disabling NRT client") - a.nrtCli = nil - } -} - func (a *Agent) getRESTConfig() (*rest.Config, error) { var ( cfg *rest.Config diff --git a/pkg/agent/pod-resource-api.go b/pkg/agent/pod-resource-api.go new file mode 100644 index 000000000..83a7dc4ec --- /dev/null +++ b/pkg/agent/pod-resource-api.go @@ -0,0 +1,80 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package agent + +import ( + "context" + "time" + + "github.com/containers/nri-plugins/pkg/agent/podresapi" +) + +// GetPodResources queries the given pod's resources. +func (a *Agent) GetPodResources(ns, pod string, timeout time.Duration) (*podresapi.PodResources, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + return a.podResCli.Get(ctx, ns, pod) +} + +// GoGetPodResources queries the given pod's resources asynchronously. +func (a *Agent) GoGetPodResources(ns, pod string, timeout time.Duration) <-chan *podresapi.PodResources { + if !a.podResCli.HasClient() { + return nil + } + + ch := make(chan *podresapi.PodResources, 1) + + go func() { + defer close(ch) + p, err := a.GetPodResources(ns, pod, timeout) + if err != nil { + log.Error("failed to get pod resources for %s/%s: %v", ns, pod, err) + return + } + ch <- p + }() + + return ch +} + +// ListPodResources lists all pods' resources. +func (a *Agent) ListPodResources(timeout time.Duration) (podresapi.PodResourcesList, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + return a.podResCli.List(ctx) +} + +// GoListPodResources lists all pods' resources asynchronously. +func (a *Agent) GoListPodResources(timeout time.Duration) <-chan podresapi.PodResourcesList { + if !a.podResCli.HasClient() { + return nil + } + + ch := make(chan podresapi.PodResourcesList, 1) + + go func() { + defer close(ch) + l, err := a.ListPodResources(timeout) + if err != nil { + log.Error("failed to list pod resources: %v", err) + return + } + ch <- l + }() + + return ch +} diff --git a/pkg/agent/podresapi/client.go b/pkg/agent/podresapi/client.go new file mode 100644 index 000000000..e9a2e4909 --- /dev/null +++ b/pkg/agent/podresapi/client.go @@ -0,0 +1,167 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package podresapi + +import ( + "context" + "fmt" + "net" + "strings" + "time" + + logger "github.com/containers/nri-plugins/pkg/log" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + api "k8s.io/kubelet/pkg/apis/podresources/v1" +) + +// ClientOption is an option for the client. +type ClientOption func(*Client) + +// Client is Pod Resources API client. +type Client struct { + socketPath string + conn *grpc.ClientConn + cli api.PodResourcesListerClient + noGet bool +} + +const ( + // these constants were obtained from NFD sources, cross-checked against + // https://github.com/kubernetes/kubernetes/blob/release-1.31/test/e2e_node/util.go#L83 + defaultSocketPath = "/var/lib/kubelet/pod-resources/kubelet.sock" + timeout = 10 * time.Second + maxSize = 1024 * 1024 * 16 +) + +var ( + errGetDisabled = fmt.Errorf("PodResources API Get method disabled") + log = logger.Get("podresapi") +) + +// WithSocketPath sets the kubelet socket path to connect to. +func WithSocketPath(path string) ClientOption { + return func(c *Client) { + c.socketPath = path + } +} + +// WithClientConn sets a pre-created gRPC connection for the client. +func WithClientConn(conn *grpc.ClientConn) ClientOption { + return func(c *Client) { + c.conn = conn + } +} + +// NewClient creates a new Pod Resources API client with the given options. +func NewClient(options ...ClientOption) (*Client, error) { + c := &Client{ + socketPath: defaultSocketPath, + } + + for _, o := range options { + o(c) + } + + if c.conn == nil { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + dialer := func(ctx context.Context, addr string) (net.Conn, error) { + return (&net.Dialer{}).DialContext(ctx, "unix", addr) + } + + conn, err := grpc.DialContext( + ctx, + c.socketPath, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithContextDialer(dialer), + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxSize)), + grpc.WithBlock(), + ) + + if err != nil { + return nil, fmt.Errorf("failed to connect podresource client: %w", err) + } + + c.conn = conn + } + + c.cli = api.NewPodResourcesListerClient(c.conn) + return c, nil +} + +// Close closes the client. +func (c *Client) Close() { + if c != nil && c.conn != nil { + c.conn.Close() + c.conn = nil + } + c.cli = nil +} + +// HasClient returns true if the client has a usable client. +func (c *Client) HasClient() bool { + return c != nil && c.cli != nil +} + +// List lists all pods' resources. +func (c *Client) List(ctx context.Context) (PodResourcesList, error) { + if !c.HasClient() { + return nil, nil + } + + reply, err := c.cli.List(ctx, &api.ListPodResourcesRequest{}) + if err != nil { + return nil, fmt.Errorf("failed to get pod resources by list: %w", err) + } + + return PodResourcesList(reply.GetPodResources()), nil +} + +// Get queries the given pod's resources. +func (c *Client) Get(ctx context.Context, namespace, pod string) (*PodResources, error) { + if !c.HasClient() { + return nil, nil + } + + if !c.noGet { + reply, err := c.cli.Get(ctx, &api.GetPodResourcesRequest{ + PodNamespace: namespace, + PodName: pod, + }) + if err == nil { + return &PodResources{reply.GetPodResources()}, nil + } + + if !strings.Contains(fmt.Sprintf("%v", err), fmt.Sprintf("%v", errGetDisabled)) { + return nil, fmt.Errorf("failed to get pod resources: %w", err) + } + + log.Warnf("PodResources API Get() disabled, falling back to List()...") + log.Warnf("You can enable Get() by passing this feature gate setting to kubelet:") + log.Warnf(" --feature-gates=KubeletPodResourcesGet=true") + + c.noGet = true + } + + l, err := c.List(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get pod resources: %w", err) + } + + return l.GetPodResources(namespace, pod), nil +} diff --git a/pkg/agent/podresapi/resources.go b/pkg/agent/podresapi/resources.go new file mode 100644 index 000000000..03c6b47fd --- /dev/null +++ b/pkg/agent/podresapi/resources.go @@ -0,0 +1,134 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package podresapi + +import ( + "strconv" + "strings" + + "github.com/containers/nri-plugins/pkg/topology" + api "k8s.io/kubelet/pkg/apis/podresources/v1" +) + +// PodResources contains resources for a pod. +type PodResources struct { + *api.PodResources +} + +// ContainerResources contains resources for a single container. +type ContainerResources struct { + *api.ContainerResources +} + +// PodResourcesList is a list of PodResources. +type PodResourcesList []*api.PodResources + +// PodResourceMap is a map representation of PodResourcesList. +type PodResourcesMap map[string]map[string]*PodResources + +// GetContainer returns resources for the given container. +func (p *PodResources) GetContainer(ctr string) *ContainerResources { + if p == nil { + return nil + } + + for _, c := range p.GetContainers() { + if c.GetName() == ctr { + return &ContainerResources{c} + } + } + + return nil +} + +// GetPodResources returns resources for the given pod. +func (l PodResourcesList) GetPodResources(ns, pod string) *PodResources { + for _, p := range l { + if p.GetNamespace() == ns && p.GetName() == pod { + return &PodResources{p} + } + } + + return nil +} + +// Map returns a PodResourcesMap for the pod resources list. +func (l PodResourcesList) Map() PodResourcesMap { + m := make(PodResourcesMap) + + for _, p := range l { + podMap, ok := m[p.GetNamespace()] + if !ok { + podMap = make(map[string]*PodResources) + m[p.GetNamespace()] = podMap + } + podMap[p.GetName()] = &PodResources{p} + } + + return m +} + +// GetPod returns resources for the given pod. +func (m PodResourcesMap) GetPod(ns, pod string) *PodResources { + return m[ns][pod] +} + +// GetContainer returns resources for the given container. +func (m PodResourcesMap) GetContainer(ns, pod, ctr string) *ContainerResources { + return m.GetPod(ns, pod).GetContainer(ctr) +} + +// GetDeviceTopologyHints returns topology hints for the given container. checkDenied +// is used to filter out hints that are disallowed. +func (r *ContainerResources) GetDeviceTopologyHints(checkDenied func(string) bool) topology.Hints { + if r == nil { + return nil + } + + hints := make(topology.Hints) + + for _, dev := range r.GetDevices() { + name := "podresourceapi:" + dev.GetResourceName() + + if checkDenied(name) { + log.Info("filtering hints for disallowed device %s", name) + continue + } + + var ( + nodes = dev.GetTopology().GetNodes() + numas = &strings.Builder{} + sep = "" + ) + + if len(nodes) == 0 { + continue + } + + for i, n := range nodes { + numas.WriteString(sep) + numas.WriteString(strconv.FormatInt(n.GetID(), 10)) + if i == 0 { + sep = "," + } + } + + hints[name] = topology.Hint{ + NUMAs: numas.String(), + } + } + + return hints +} diff --git a/pkg/apis/config/v1alpha1/agent.go b/pkg/apis/config/v1alpha1/agent.go index c01b9305a..2fe3cd70d 100644 --- a/pkg/apis/config/v1alpha1/agent.go +++ b/pkg/apis/config/v1alpha1/agent.go @@ -20,6 +20,9 @@ type AgentConfig struct { // NodeResourceTopology Custom Resources. // +optional NodeResourceTopology bool `json:"nodeResourceTopology,omitempty"` + // PodResourceAPI enables support for querying kubelet Pod Resource API. + // +optional + PodResourceAPI bool `json:"podResourceAPI,omitempty"` } // GetAgentConfig returns the agent-specific configuration if we have one. diff --git a/pkg/resmgr/cache/cache.go b/pkg/resmgr/cache/cache.go index ee11ce88a..dc0982617 100644 --- a/pkg/resmgr/cache/cache.go +++ b/pkg/resmgr/cache/cache.go @@ -27,6 +27,7 @@ import ( nri "github.com/containerd/nri/pkg/api" v1 "k8s.io/api/core/v1" + "github.com/containers/nri-plugins/pkg/agent/podresapi" "github.com/containers/nri-plugins/pkg/utils/cpuset" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" @@ -114,6 +115,10 @@ type Pod interface { // and return the value of the first key found. GetEffectiveAnnotation(key, container string) (string, bool) + // GetPodResources returns the pod resources for this pod, waiting for any + // pending fetch to complete or a timeout. + GetPodResources() *podresapi.PodResources + // GetContainerAffinity returns the affinity expressions for the named container. GetContainerAffinity(string) ([]*Affinity, error) // ScopeExpression returns an affinity expression for defining this pod as the scope. @@ -138,12 +143,16 @@ type Pod interface { // A cached pod. type pod struct { - cache *cache // our cache of object - Pod *nri.PodSandbox // pod data from NRI - QOSClass v1.PodQOSClass // pod QOS class - Affinity *podContainerAffinity // annotated container affinity - prettyName string // cached PrettyName() - ctime time.Time // time of pod creation + cache *cache // our cache of object + Pod *nri.PodSandbox // pod data from NRI + QOSClass v1.PodQOSClass // pod QOS class + Affinity *podContainerAffinity // annotated container affinity + PodResources *podresapi.PodResources // pod resources acquired from podresourceapi + podResCh <-chan *podresapi.PodResources // channel for pod resource fetch + waitResCh chan struct{} // channel for waiting for pod resource fetch + prettyName string // cached PrettyName() + ctime time.Time // time of pod creation + } // ContainerState is the container state in the runtime. @@ -222,6 +231,9 @@ type Container interface { // The requirements are calculated from the containers cgroup parameters. GetResourceRequirements() v1.ResourceRequirements + // GetPodResources gets container-specific resources acquired from podresourceapi. + GetPodResources() *podresapi.ContainerResources + // SetResourceUpdates sets updated resources for a container. Returns true if the // resources were really updated. SetResourceUpdates(*nri.LinuxResources) bool @@ -322,6 +334,7 @@ type container struct { State ContainerState // current state of the container Requirements v1.ResourceRequirements + PodResources *podresapi.ContainerResources ResourceUpdates *v1.ResourceRequirements request interface{} @@ -359,7 +372,7 @@ type Cacheable interface { // itself upon startup. type Cache interface { // InsertPod inserts a pod into the cache, using a runtime request or reply. - InsertPod(pod *nri.PodSandbox) (Pod, error) + InsertPod(pod *nri.PodSandbox, ch <-chan *podresapi.PodResources) Pod // DeletePod deletes a pod from the cache. DeletePod(id string) Pod // LookupPod looks up a pod in the cache. @@ -410,7 +423,7 @@ type Cache interface { Save() error // RefreshPods purges/inserts stale/new pods/containers using a pod sandbox list response. - RefreshPods([]*nri.PodSandbox) ([]Pod, []Pod, []Container) + RefreshPods([]*nri.PodSandbox, <-chan podresapi.PodResourcesList) ([]Pod, []Pod, []Container) // RefreshContainers purges/inserts stale/new containers using a container list response. RefreshContainers([]*nri.Container) ([]Container, []Container) @@ -523,12 +536,12 @@ func (cch *cache) ResetActivePolicy() error { } // Insert a pod into the cache. -func (cch *cache) InsertPod(nriPod *nri.PodSandbox) (Pod, error) { - p := cch.createPod(nriPod) +func (cch *cache) InsertPod(nriPod *nri.PodSandbox, ch <-chan *podresapi.PodResources) Pod { + p := cch.createPod(nriPod, ch) cch.Pods[nriPod.GetId()] = p cch.Save() - return p, nil + return p } // Delete a pod from the cache. @@ -620,7 +633,7 @@ func (cch *cache) LookupContainerByCgroup(path string) (Container, bool) { } // RefreshPods purges/inserts stale/new pods/containers into the cache. -func (cch *cache) RefreshPods(pods []*nri.PodSandbox) ([]Pod, []Pod, []Container) { +func (cch *cache) RefreshPods(pods []*nri.PodSandbox, resCh <-chan podresapi.PodResourcesList) ([]Pod, []Pod, []Container) { valid := make(map[string]struct{}) add := []Pod{} @@ -631,13 +644,8 @@ func (cch *cache) RefreshPods(pods []*nri.PodSandbox) ([]Pod, []Pod, []Container valid[item.Id] = struct{}{} if _, ok := cch.Pods[item.Id]; !ok { log.Debug("inserting discovered pod %s...", item.Id) - pod, err := cch.InsertPod(item) - if err != nil { - log.Error("failed to insert discovered pod %s to cache: %v", - item.Id, err) - } else { - add = append(add, pod) - } + pod := cch.InsertPod(item, nil) + add = append(add, pod) } } for _, pod := range cch.Pods { @@ -655,6 +663,16 @@ func (cch *cache) RefreshPods(pods []*nri.PodSandbox) ([]Pod, []Pod, []Container } } + if resCh != nil { + podResList := <-resCh + if len(podResList) > 0 { + podResMap := podResList.Map() + for _, pod := range cch.Pods { + pod.setPodResources(podResMap.GetPod(pod.GetNamespace(), pod.GetName())) + } + } + } + return add, del, containers } @@ -686,6 +704,11 @@ func (cch *cache) RefreshContainers(containers []*nri.Container) ([]Container, [ c.State = ContainerStateStale del = append(del, c) } + + pod, ok := cch.Pods[c.GetPodID()] + if ok { + c.PodResources = pod.GetPodResources().GetContainer(c.GetName()) + } } return add, del diff --git a/pkg/resmgr/cache/container.go b/pkg/resmgr/cache/container.go index 306492883..3031e1f5b 100644 --- a/pkg/resmgr/cache/container.go +++ b/pkg/resmgr/cache/container.go @@ -24,6 +24,7 @@ import ( "strings" "time" + "github.com/containers/nri-plugins/pkg/agent/podresapi" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" "github.com/containers/nri-plugins/pkg/cgroups" "github.com/containers/nri-plugins/pkg/kubernetes" @@ -89,18 +90,25 @@ func (c *container) getDenyPathList() (*PathList, bool, error) { // Create and initialize a cached container. func (cch *cache) createContainer(nriCtr *nri.Container) (*container, error) { podID := nriCtr.GetPodSandboxId() - _, ok := cch.Pods[podID] + pod, ok := cch.Pods[podID] if !ok { return nil, cacheError("can't find cached pod %s for container %s (%s)", podID, nriCtr.GetId(), nriCtr.GetName()) } c := &container{ - cache: cch, - Ctr: nriCtr, - State: nriCtr.GetState(), - Tags: make(map[string]string), - ctime: time.Now(), + cache: cch, + Ctr: nriCtr, + State: nriCtr.GetState(), + Tags: make(map[string]string), + ctime: time.Now(), + PodResources: pod.GetPodResources().GetContainer(nriCtr.GetName()), + } + + if c.PodResources == nil { + log.Info("no pod resources for container %s", c.PrettyName()) + } else { + log.Info("got pod resources %+v", c.PodResources) } c.generateTopologyHints() @@ -233,9 +241,19 @@ func (c *container) generateTopologyHints() { } } } + + checkDenied := func(path string) bool { + return checkAllowedAndDeniedPaths(path, allowPathList, denyPathList) + } + + if podRes := c.GetPodResources(); podRes != nil { + hints := podRes.GetDeviceTopologyHints(checkDenied) + c.TopologyHints = topology.MergeTopologyHints(c.TopologyHints, hints) + } } else { log.Info("automatic topology hint generation disabled for devices") } + } func isReadOnlyMount(m *nri.Mount) bool { @@ -452,6 +470,14 @@ func (c *container) GetResourceRequirements() v1.ResourceRequirements { return c.Requirements } +func (c *container) GetPodResources() *podresapi.ContainerResources { + pod, ok := c.GetPod() + if !ok { + return nil + } + return pod.GetPodResources().GetContainer(c.GetName()) +} + func (c *container) SetResourceUpdates(r *nri.LinuxResources) bool { r = mergeNRIResources(r, c.Ctr.GetLinux().GetResources()) diff --git a/pkg/resmgr/cache/pod.go b/pkg/resmgr/cache/pod.go index c69bcbe56..2c82083b5 100644 --- a/pkg/resmgr/cache/pod.go +++ b/pkg/resmgr/cache/pod.go @@ -21,19 +21,22 @@ import ( nri "github.com/containerd/nri/pkg/api" v1 "k8s.io/api/core/v1" + "github.com/containers/nri-plugins/pkg/agent/podresapi" resmgr "github.com/containers/nri-plugins/pkg/apis/resmgr/v1alpha1" "github.com/containers/nri-plugins/pkg/cgroups" "github.com/containers/nri-plugins/pkg/kubernetes" ) // Create and initialize a cached pod. -func (cch *cache) createPod(nriPod *nri.PodSandbox) *pod { +func (cch *cache) createPod(nriPod *nri.PodSandbox, ch <-chan *podresapi.PodResources) *pod { p := &pod{ cache: cch, Pod: nriPod, ctime: time.Now(), } + p.goFetchPodResources(ch) + if err := p.parseCgroupForQOSClass(); err != nil { log.Error("pod %s: %v", p.PrettyName(), err) } @@ -133,6 +136,32 @@ func (p *pod) GetQOSClass() v1.PodQOSClass { return p.QOSClass } +func (p *pod) goFetchPodResources(ch <-chan *podresapi.PodResources) { + go func() { + p.podResCh = ch + p.waitResCh = make(chan struct{}) + defer close(p.waitResCh) + + if p.podResCh != nil { + p.PodResources = <-p.podResCh + log.Debug("fetched pod resources %+v for %s", p.PodResources, p.GetName()) + } + }() +} + +func (p *pod) setPodResources(podRes *podresapi.PodResources) { + p.PodResources = podRes + log.Debug("set pod resources %+v for %s", p.PodResources, p.GetName()) +} + +func (p *pod) GetPodResources() *podresapi.PodResources { + if p.waitResCh != nil { + log.Debug("waiting for pod resources fetch to complete...") + _ = <-p.waitResCh + } + return p.PodResources +} + func (p *pod) GetContainerAffinity(name string) ([]*Affinity, error) { if p.Affinity != nil { return (*p.Affinity)[name], nil diff --git a/pkg/resmgr/nri.go b/pkg/resmgr/nri.go index 379b992db..ba7c58eba 100644 --- a/pkg/resmgr/nri.go +++ b/pkg/resmgr/nri.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "os" + "time" "github.com/containers/nri-plugins/pkg/instrumentation/metrics" "github.com/containers/nri-plugins/pkg/instrumentation/tracing" @@ -42,6 +43,11 @@ var ( nri = logger.NewLogger("nri-plugin") ) +const ( + podResListTimeout = 2 * time.Second + podResGetTimeout = 1 * time.Second +) + func newNRIPlugin(resmgr *resmgr) (*nriPlugin, error) { p := &nriPlugin{ resmgr: resmgr, @@ -198,7 +204,7 @@ func (p *nriPlugin) syncWithNRI(pods []*api.PodSandbox, containers []*api.Contai nri.Info("synchronizing cache state with NRI runtime...") - _, _, deleted := m.cache.RefreshPods(pods) + _, _, deleted := m.cache.RefreshPods(pods, m.agent.GoListPodResources(podResListTimeout)) for _, c := range deleted { nri.Info("discovered stale container %s (%s)...", c.PrettyName(), c.GetID()) released = append(released, c) @@ -290,18 +296,21 @@ func (p *nriPlugin) RunPodSandbox(ctx context.Context, pod *api.PodSandbox) (ret span.End(tracing.WithStatus(retErr)) }() + m := p.resmgr + podResCh := m.agent.GoGetPodResources(pod.GetNamespace(), pod.GetName(), podResGetTimeout) + p.dump(in, event, pod) defer func() { p.dump(out, event, retErr) }() - m := p.resmgr m.Lock() defer m.Unlock() b := metrics.Block() defer b.Done() - m.cache.InsertPod(pod) + m.cache.InsertPod(pod, podResCh) + return nil } @@ -383,19 +392,19 @@ func (p *nriPlugin) RemovePodSandbox(ctx context.Context, podSandbox *api.PodSan return nil } -func (p *nriPlugin) CreateContainer(ctx context.Context, podSandbox *api.PodSandbox, container *api.Container) (adjust *api.ContainerAdjustment, updates []*api.ContainerUpdate, retErr error) { +func (p *nriPlugin) CreateContainer(ctx context.Context, pod *api.PodSandbox, container *api.Container) (adjust *api.ContainerAdjustment, updates []*api.ContainerUpdate, retErr error) { event := CreateContainer _, span := tracing.StartSpan( ctx, event, - tracing.WithAttributes(containerSpanTags(podSandbox, container)...), + tracing.WithAttributes(containerSpanTags(pod, container)...), ) defer func() { span.End(tracing.WithStatus(retErr)) }() - p.dump(in, event, podSandbox, container) + p.dump(in, event, pod, container) defer func() { p.dump(out, event, adjust, updates, retErr) }()