From eea08e4ab2625133102af4bb477c7d7c8b656931 Mon Sep 17 00:00:00 2001 From: amaslennikov Date: Fri, 25 Mar 2022 17:30:47 +0300 Subject: [PATCH] Extend resource pool configuration with resource prefix option Resource configuration now might optionally contain a resource prefix. The previous behavior was to always add "rdma" prefix to all resources. Now it's a default option and might be changed if "resourcePrefix" is provided in the config Signed-off-by: amaslennikov --- README.md | 65 +++++++++++++++---------- go.sum | 1 - pkg/resources/resources_manager.go | 26 ++++++---- pkg/resources/resources_manager_test.go | 7 +-- pkg/resources/server.go | 7 ++- pkg/resources/server_test.go | 48 +++++++++--------- pkg/types/types.go | 9 ++-- 7 files changed, 92 insertions(+), 71 deletions(-) diff --git a/README.md b/README.md index 2d260ac..38e9a3f 100644 --- a/README.md +++ b/README.md @@ -4,22 +4,22 @@ [![Coverage Status](https://coveralls.io/repos/github/Mellanox/k8s-rdma-shared-dev-plugin/badge.svg)](https://coveralls.io/github/Mellanox/k8s-rdma-shared-dev-plugin) # k8s-rdma-shared-dev-plugin + (https://hub.docker.com/r/mellanox/k8s-rdma-shared-dev-plugin) -This is simple rdma device plugin that support IB and RoCE HCA. -This plugin runs as daemonset. -Its container image is available at mellanox/k8s-rdma-shared-dev-plugin. +This is simple rdma device plugin that support IB and RoCE HCA. This plugin runs as daemonset. Its container image is +available at mellanox/k8s-rdma-shared-dev-plugin. # How to use device plugin **1.** Use CNI plugin such as Contiv, Calico, Cluster -Make sure to configure ib0 or appropriate IPoIB netdevice as the parent netdevice for creating overlay/virtual netdevices. +Make sure to configure ib0 or appropriate IPoIB netdevice as the parent netdevice for creating overlay/virtual +netdevices. **2.** Create ConfigMap -Create config map to describe mode as "hca" mode. -This is per node configuration. +Create config map to describe mode as "hca" mode. This is per node configuration. ``` kubectl create -f images/k8s-rdma-shared-dev-plugin-config-map.yaml @@ -34,6 +34,7 @@ kubectl create -f images/k8s-rdma-shared-dev-plugin-ds.yaml **4.** Create Test pod Create test pod which requests 1 vhca resource. + ``` kubectl create -f example/test-hca-pod.yaml ``` @@ -75,16 +76,18 @@ kubectl create -f ``` # RDMA Shared Device Plugin Configurations + The plugin has several configuration fields, this section explains each field usage ```json { "periodicUpdateInterval": 300, "configList": [{ - "resourceName": "hca_shared_devices_a", - "rdmaHcaMax": 1000, - "devices": ["ib0", "ib1"] - }, + "resourceName": "hca_shared_devices_a", + "resourcePrefix": "example_prefix", + "rdmaHcaMax": 1000, + "devices": ["ib0", "ib1"] + }, { "resourceName": "hca_shared_devices_b", "rdmaHcaMax": 500, @@ -98,26 +101,27 @@ The plugin has several configuration fields, this section explains each field us } ``` -`periodicUpdateInterval` is the time interval in seconds to update the resources according to host devices in case of changes. -Notes: - - if `periodicUpdateInterval` is 0 then periodic update for host devices will be disabled. - - if `periodicUpdateInterval` is not set then default periodic update interval of 60 seconds will be used. +`periodicUpdateInterval` is the time interval in seconds to update the resources according to host devices in case of +changes. Notes: -`"configList"` should contain a list of config objects. Each config object may consist of following fields: +- if `periodicUpdateInterval` is 0 then periodic update for host devices will be disabled. +- if `periodicUpdateInterval` is not set then default periodic update interval of 60 seconds will be used. +`"configList"` should contain a list of config objects. Each config object may consist of following fields: -| Field | Required | Description | Type | Example | -|----------------|----------|------------------------------------------------------------------------------------------------------------------------------------|------------------|---------------------------------------------------------| -| "resourceName" | Y | Endpoint resource name. Should not contain special characters, must be unique in the scope of the resource prefix | string | "hca_shared_devices_a" | -| "rdmaHcaMax" | Y | Maximum number of RDMA resources that can be provided by the device plugin resource | Integer | 1000 | -| "selectors" | N | A map of device selectors for filtering the devices. refer to [Device Selectors](#devices-selectors) section for more information | json object | selectors": {"vendors": ["15b3"],"deviceIDs": ["1017"]} | -| "devices" | N | A list of devices names to be selected, same as "ifNames" selector | `string` list | ["ib0", "ib1"] | +| Field | Required | Description | Type | Default value | Example | +|------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------|------------------|---------------|---------------------------------------------------------| +| "resourceName" | Y | Endpoint resource name. Should not contain special characters, must be unique in the scope of the resource prefix | string | - | "hca_shared_devices_a" | +| "resourcePrefix" | N | Endpoint resource prefix. Should not contain special characters | string | "rdma" | "example_prefix" | +| "rdmaHcaMax" | Y | Maximum number of RDMA resources that can be provided by the device plugin resource | Integer | - | 1000 | +| "selectors" | N | A map of device selectors for filtering the devices. refer to [Device Selectors](#devices-selectors) section for more information | json object | - | selectors": {"vendors": ["15b3"],"deviceIDs": ["1017"]} | +| "devices" | N | A list of devices names to be selected, same as "ifNames" selector | `string` list | - | ["ib0", "ib1"] | Note: Either `selectors` or `devices` must be specified for a given resource, "selectors" is recommended. ## Devices Selectors -The following selectors are used for filtering the desired devices. +The following selectors are used for filtering the desired devices. | Field | Description | Type | Example | |-------------|----------------------------------------------------------------|---------------|--------------------------| @@ -130,7 +134,10 @@ The following selectors are used for filtering the desired devices. [//]: # (The tables above generated using: https://ozh.github.io/ascii-tables/) ## Selectors Matching Process -The device plugin filters the host devices based on the provided selectors, if there are any missing selectors, the device plugin ignores them. Device plugin performs logical OR between elements of a specific selector and logical AND is performed between selectors. + +The device plugin filters the host devices based on the provided selectors, if there are any missing selectors, the +device plugin ignores them. Device plugin performs logical OR between elements of a specific selector and logical AND is +performed between selectors. # RDMA shared device plugin deployment with node labels @@ -139,7 +146,8 @@ RDMA shared device plugin should be deployed on nodes that: 1. Have RDMA capable hardware 2. RDMA kernel stack is loaded -To allow proper node selection [Node Feature Discovery (NFD)](https://github.com/kubernetes-sigs/node-feature-discovery) can be used to discover the node capabilities, and expose them as node labels. +To allow proper node selection [Node Feature Discovery (NFD)](https://github.com/kubernetes-sigs/node-feature-discovery) +can be used to discover the node capabilities, and expose them as node labels. 1. Deploy NFD, release `v0.6.0` or new newer @@ -150,15 +158,18 @@ To allow proper node selection [Node Feature Discovery (NFD)](https://github.com ``` 2. Check the new labels added to the node + ``` # kubectl get nodes --show-labels ``` -RDMA device plugin can then be deployed on nodes with `feature.node.kubernetes.io/custom-rdma.available=true`, which indicates that the node is RDMA capable and RDMA modules are loaded. +RDMA device plugin can then be deployed on nodes with `feature.node.kubernetes.io/custom-rdma.available=true`, which +indicates that the node is RDMA capable and RDMA modules are loaded. # Docker image -RDMA shared device plugin uses `alpine` base image by default. To build RDMA shared device plugin with -another base image you need to pass `BASE_IMAGE` argument: + +RDMA shared device plugin uses `alpine` base image by default. To build RDMA shared device plugin with another base +image you need to pass `BASE_IMAGE` argument: ``` docker build -t k8s-rdma-shared-dev-plugin \ diff --git a/go.sum b/go.sum index 16c0aa9..c818682 100644 --- a/go.sum +++ b/go.sum @@ -137,7 +137,6 @@ golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= diff --git a/pkg/resources/resources_manager.go b/pkg/resources/resources_manager.go index 421f330..f60b405 100644 --- a/pkg/resources/resources_manager.go +++ b/pkg/resources/resources_manager.go @@ -43,7 +43,7 @@ var ( // resourceManager for plugin type resourceManager struct { configFile string - resourcePrefix string + defaultResourcePrefix string socketSuffix string watchMode bool configList []*types.UserConfig @@ -62,12 +62,12 @@ func NewResourceManager() types.ResourceManager { fmt.Println("Using Deprecated Devie Plugin Registry Path") } return &resourceManager{ - configFile: configFilePath, - resourcePrefix: rdmaHcaResourcePrefix, - socketSuffix: socketSuffix, - watchMode: watcherMode, - netlinkManager: &netlinkManager{}, - rds: NewRdmaDeviceSpec(requiredRdmaDevices), + configFile: configFilePath, + defaultResourcePrefix: rdmaHcaResourcePrefix, + socketSuffix: socketSuffix, + watchMode: watcherMode, + netlinkManager: &netlinkManager{}, + rds: NewRdmaDeviceSpec(requiredRdmaDevices), } } @@ -120,7 +120,7 @@ func (rm *resourceManager) ValidateConfigs() error { for _, conf := range rm.configList { // check if name contains acceptable characters - if !validResourceName(conf.ResourceName) { + if !validResourceNameOrPrefix(conf.ResourceName) { return fmt.Errorf("error: resource name \"%s\" contains invalid characters", conf.ResourceName) } // check resource names are unique @@ -129,6 +129,12 @@ func (rm *resourceManager) ValidateConfigs() error { // resource name already exist return fmt.Errorf("error: resource name \"%s\" already exists", conf.ResourceName) } + // If prefix is not configured - use the default one. Otherwise validate if it contains acceptable characters + if conf.ResourcePrefix == "" { + conf.ResourcePrefix = rm.defaultResourcePrefix + } else if !validResourceNameOrPrefix(conf.ResourcePrefix) { + return fmt.Errorf("error: resource prefix \"%s\" contains invalid characters", conf.ResourcePrefix) + } if conf.RdmaHcaMax < 0 { return fmt.Errorf("error: Invalid value for rdmaHcaMax < 0: %d", conf.RdmaHcaMax) @@ -197,7 +203,7 @@ func (rm *resourceManager) InitServers() error { log.Printf("Warning: no devices in device pool, creating empty resource server for %s", config.ResourceName) } - rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.resourcePrefix, rm.socketSuffix) + rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.socketSuffix) if err != nil { return err } @@ -241,7 +247,7 @@ func (rm *resourceManager) RestartAllServers() error { return nil } -func validResourceName(name string) bool { +func validResourceNameOrPrefix(name string) bool { // name regex var validString = regexp.MustCompile(`^[a-zA-Z0-9_]+$`) return validString.MatchString(name) diff --git a/pkg/resources/resources_manager_test.go b/pkg/resources/resources_manager_test.go index fa60c6c..1077d22 100644 --- a/pkg/resources/resources_manager_test.go +++ b/pkg/resources/resources_manager_test.go @@ -414,9 +414,10 @@ var _ = Describe("ResourcesManger", func() { rm := &resourceManager{} configlist = append(configlist, &types.UserConfig{ - ResourceName: "test_config", - RdmaHcaMax: 100, - Devices: []string{"ib0"}}) + ResourceName: "test_config", + ResourcePrefix: "test_prefix", + RdmaHcaMax: 100, + Devices: []string{"ib0"}}) rm.configList = configlist err := rm.InitServers() diff --git a/pkg/resources/server.go b/pkg/resources/server.go index b2f784e..476bd1b 100644 --- a/pkg/resources/server.go +++ b/pkg/resources/server.go @@ -106,7 +106,7 @@ func (rsc *resourcesServerPort) Dial(unixSocketPath string, timeout time.Duratio } // newResourceServer returns an initialized server -func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, watcherMode bool, resourcePrefix, +func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, watcherMode bool, socketSuffix string) (types.ResourceServer, error) { var devs []*pluginapi.Device @@ -115,6 +115,9 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w if config.RdmaHcaMax < 0 { return nil, fmt.Errorf("error: Invalid value for rdmaHcaMax < 0: %d", config.RdmaHcaMax) } + if config.ResourcePrefix == "" { + return nil, fmt.Errorf("error: Empty resourcePrefix") + } deviceSpec := getDevicesSpec(devices) @@ -138,7 +141,7 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w socketName := fmt.Sprintf("%s.%s", config.ResourceName, socketSuffix) return &resourceServer{ - resourceName: fmt.Sprintf("%s/%s", resourcePrefix, config.ResourceName), + resourceName: fmt.Sprintf("%s/%s", config.ResourcePrefix, config.ResourceName), socketName: socketName, socketPath: filepath.Join(sockDir, socketName), watchMode: watcherMode, diff --git a/pkg/resources/server_test.go b/pkg/resources/server_test.go index 1da6691..a995cdb 100644 --- a/pkg/resources/server_test.go +++ b/pkg/resources/server_test.go @@ -55,8 +55,8 @@ var _ = Describe("resourceServer tests", func() { Symlinks: map[string]string{path.Join(fakeNetDevicePath, "device"): "../../../0000:02:00.0"}, } defer fs.Use()() - conf := &types.UserConfig{ResourceName: "test_server", RdmaHcaMax: 100} - obj, err := newResourceServer(conf, fakeDeviceList, true, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "test_server", ResourcePrefix: "rdma", RdmaHcaMax: 100} + obj, err := newResourceServer(conf, fakeDeviceList, true, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) Expect(rs.resourceName).To(Equal("rdma/test_server")) @@ -70,8 +70,8 @@ var _ = Describe("resourceServer tests", func() { Symlinks: map[string]string{path.Join(fakeNetDevicePath, "device"): "../../../0000:02:00.0"}, } defer fs.Use()() - conf := &types.UserConfig{ResourceName: "test_server", RdmaHcaMax: 0} - obj, err := newResourceServer(conf, fakeDeviceList, true, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "test_server", ResourcePrefix: "rdma", RdmaHcaMax: 0} + obj, err := newResourceServer(conf, fakeDeviceList, true, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) Expect(rs.resourceName).To(Equal("rdma/test_server")) @@ -85,12 +85,12 @@ var _ = Describe("resourceServer tests", func() { Symlinks: map[string]string{path.Join(fakeNetDevicePath, "device"): "../../../0000:02:00.0"}, } defer fs.Use()() - conf := &types.UserConfig{ResourceName: "test_server", RdmaHcaMax: 100} + conf := &types.UserConfig{ResourceName: "test_server", ResourcePrefix: "rdma", RdmaHcaMax: 100} fakePciDevice := &mocks.PciNetDevice{} fakePciDevice.On("GetRdmaSpec").Return([]*pluginapi.DeviceSpec{}) fakePciDevice.On("GetPciAddr").Return("0000:02:00.0") deviceList := []types.PciNetDevice{fakePciDevice} - obj, err := newResourceServer(conf, deviceList, true, "rdma", "socket") + obj, err := newResourceServer(conf, deviceList, true, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) Expect(rs.resourceName).To(Equal("rdma/test_server")) @@ -104,8 +104,8 @@ var _ = Describe("resourceServer tests", func() { Symlinks: map[string]string{path.Join(fakeNetDevicePath, "device"): "../../../0000:02:00.0"}, } defer fs.Use()() - conf := &types.UserConfig{ResourceName: "test_server", RdmaHcaMax: 100} - obj, err := newResourceServer(conf, fakeDeviceList, false, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "test_server", ResourcePrefix: "rdma", RdmaHcaMax: 100} + obj, err := newResourceServer(conf, fakeDeviceList, false, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) Expect(rs.resourceName).To(Equal("rdma/test_server")) @@ -119,8 +119,8 @@ var _ = Describe("resourceServer tests", func() { Symlinks: map[string]string{path.Join(fakeNetDevicePath, "device"): "../../../0000:02:00.0"}, } defer fs.Use()() - conf := &types.UserConfig{ResourceName: "test_server", RdmaHcaMax: 0} - obj, err := newResourceServer(conf, fakeDeviceList, false, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "test_server", ResourcePrefix: "rdma", RdmaHcaMax: 0} + obj, err := newResourceServer(conf, fakeDeviceList, false, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) Expect(rs.resourceName).To(Equal("rdma/test_server")) @@ -129,8 +129,8 @@ var _ = Describe("resourceServer tests", func() { Expect(len(rs.devs)).To(Equal(0)) }) It("server with plugin with invalid max number of resources", func() { - conf := &types.UserConfig{ResourceName: "test_server", RdmaHcaMax: -100} - obj, err := newResourceServer(conf, fakeDeviceList, true, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "test_server", ResourcePrefix: "rdma", RdmaHcaMax: -100} + obj, err := newResourceServer(conf, fakeDeviceList, true, "socket") Expect(err).To(HaveOccurred()) Expect(obj).To(BeNil()) }) @@ -387,8 +387,8 @@ var _ = Describe("resourceServer tests", func() { Symlinks: map[string]string{path.Join(fakeNetDevicePath, "device"): "../../../0000:02:00.0"}, } defer fs.Use()() - conf := &types.UserConfig{RdmaHcaMax: 100, ResourceName: "fake"} - obj, err := newResourceServer(conf, fakeDeviceList, true, "fake", "fake") + conf := &types.UserConfig{RdmaHcaMax: 100, ResourcePrefix: "rdma", ResourceName: "fake"} + obj, err := newResourceServer(conf, fakeDeviceList, true, "fake") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) @@ -522,8 +522,8 @@ var _ = Describe("resourceServer tests", func() { activeSockDir = activeSockDirBackup }() - conf := &types.UserConfig{ResourceName: "fake_test", RdmaHcaMax: 100} - obj, err := newResourceServer(conf, fakeDeviceList, true, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "fake_test", ResourcePrefix: "rdma", RdmaHcaMax: 100} + obj, err := newResourceServer(conf, fakeDeviceList, true, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) @@ -586,8 +586,8 @@ var _ = Describe("resourceServer tests", func() { // Use faked dir as socket dir deprecatedSockDir = fs.RootDir - conf := &types.UserConfig{ResourceName: "fakename", RdmaHcaMax: 100} - obj, err := newResourceServer(conf, fakeDeviceList, false, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "fakename", ResourcePrefix: "rdma", RdmaHcaMax: 100} + obj, err := newResourceServer(conf, fakeDeviceList, false, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) @@ -621,8 +621,8 @@ var _ = Describe("resourceServer tests", func() { // Use faked dir as socket dir activeSockDir = fs.RootDir - conf := &types.UserConfig{ResourceName: "fakename", RdmaHcaMax: 100} - obj, err := newResourceServer(conf, fakeDeviceList, true, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "fakename", ResourcePrefix: "rdma", RdmaHcaMax: 100} + obj, err := newResourceServer(conf, fakeDeviceList, true, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) @@ -660,8 +660,8 @@ var _ = Describe("resourceServer tests", func() { // Use faked dir as socket dir deprecatedSockDir = fs.RootDir - conf := &types.UserConfig{ResourceName: "fakename", RdmaHcaMax: 100} - obj, err := newResourceServer(conf, fakeDeviceList, false, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "fakename", ResourcePrefix: "rdma", RdmaHcaMax: 100} + obj, err := newResourceServer(conf, fakeDeviceList, false, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) @@ -686,8 +686,8 @@ var _ = Describe("resourceServer tests", func() { DescribeTable("allocating", func(req *pluginapi.AllocateRequest, expectedRespLength int, shouldFail bool) { - conf := &types.UserConfig{ResourceName: "fakename", RdmaHcaMax: 100} - obj, err := newResourceServer(conf, fakeDeviceList, true, "rdma", "socket") + conf := &types.UserConfig{ResourceName: "fakename", ResourcePrefix: "rdma", RdmaHcaMax: 100} + obj, err := newResourceServer(conf, fakeDeviceList, true, "socket") Expect(err).ToNot(HaveOccurred()) rs := obj.(*resourceServer) diff --git a/pkg/types/types.go b/pkg/types/types.go index 8218926..3155833 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -21,10 +21,11 @@ type Selectors struct { // UserConfig configuration for device plugin type UserConfig struct { - ResourceName string `json:"resourceName"` - RdmaHcaMax int `json:"rdmaHcaMax"` - Devices []string `json:"devices"` - Selectors Selectors `json:"selectors"` + ResourceName string `json:"resourceName"` + ResourcePrefix string `json:"resourcePrefix"` + RdmaHcaMax int `json:"rdmaHcaMax"` + Devices []string `json:"devices"` + Selectors Selectors `json:"selectors"` } // UserConfigList config list for servers