Skip to content

Commit

Permalink
Merge pull request #1460 from tkatila/release-0.27.1-prep
Browse files Browse the repository at this point in the history
Release 0.27.1 prep
  • Loading branch information
mythi authored Jun 20, 2023
2 parents f333145 + 8427a14 commit 34c8ada
Show file tree
Hide file tree
Showing 69 changed files with 200 additions and 161 deletions.
6 changes: 3 additions & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pipeline {
REG="cloud-native-image-registry.westus.cloudapp.azure.com/"
K8S_VERSION="1.27.1"
GOLANGCI_LINT_VERSION="v1.52.2"
GO_VERSION="1.20"
GO_VERSION="1.20.5"
GO_TAR="go${GO_VERSION}.linux-amd64.tar.gz"
GOROOT="/usr/local/go"
GOPATH="/tmp/go"
Expand Down Expand Up @@ -46,7 +46,7 @@ pipeline {
echo -e 'unqualified-search-registries = ["docker.io"]' | sudo tee -a /etc/containers/registries.conf
'''
sh "sudo curl -L https://dl.k8s.io/release/v${K8S_VERSION}/bin/linux/amd64/kubectl -o /usr/bin/kubectl"
sh "sudo chmod +x /usr/bin/kubectl"
sh "sudo chmod +x /usr/bin/kubectl"
}
}
stage("make go-mod-tidy") {
Expand Down Expand Up @@ -142,7 +142,7 @@ pipeline {
stage('make test-with-kind') {
steps {
dir(path: "$REPO_DIR") {
sh "make test-with-kind REG=intel/ TAG=0.27.0"
sh "make test-with-kind REG=intel/ TAG=0.27.1"
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ clean:

ORG?=intel
REG?=$(ORG)/
TAG?=0.27.0
TAG?=0.27.1
export TAG

e2e-fpga:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This repository contains a framework for developing plugins for the Kubernetes
[device plugins framework](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/),
along with a number of device plugin implementations utilizing that framework.

The [v0.27 release](https://github.com/intel/intel-device-plugins-for-kubernetes/releases/latest)
The [v0.27.1 release](https://github.com/intel/intel-device-plugins-for-kubernetes/releases/latest)
is the latest feature release with its documentation available [here](https://intel.github.io/intel-device-plugins-for-kubernetes/0.27/).

Table of Contents
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-deviceplugin-operator.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_deviceplugin_operator"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-deviceplugin-operator'
LABEL summary='Intel® device plugin operator for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-dlb-initcontainer.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ RUN curl -SL https://github.com/landley/toybox/archive/refs/tags/$TOYBOX_VERSION
###
FROM ${FINAL_BASE}
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
COPY --from=builder /install_root /
COPY demo/dlb-init.sh /usr/local/bin/
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-dlb-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_dlb_device_plugin"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-dlb-plugin'
LABEL summary='Intel® DLB device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-dsa-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_dsa_device_plugin"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-dsa-plugin'
LABEL summary='Intel® DSA device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-fpga-admissionwebhook.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_fpga_admissionwebhook"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-fpga-admissionwebhook'
LABEL summary='Intel® FPGA admission controller webhook for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-fpga-initcontainer.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ RUN curl -SL https://github.com/landley/toybox/archive/refs/tags/$TOYBOX_VERSION
###
FROM ${FINAL_BASE}
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-fpga-initcontainer'
LABEL summary='Intel® FPGA programming CRI hook for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-fpga-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_fpga_device_plugin"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-fpga-plugin'
LABEL summary='Intel® FPGA device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-gpu-fakedev.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_gpu_fakedev"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-gpu-fakedev'
LABEL summary='Fake device file generator for Intel® GPU plugin'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-gpu-initcontainer.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ RUN curl -SL https://github.com/landley/toybox/archive/refs/tags/$TOYBOX_VERSION
###
FROM ${FINAL_BASE}
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-gpu-initcontainer'
LABEL summary='Intel® GPU NFD hook for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-gpu-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_gpu_device_plugin"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-gpu-plugin'
LABEL summary='Intel® GPU device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-iaa-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_iaa_device_plugin"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-iaa-plugin'
LABEL summary='Intel® IAA device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-qat-initcontainer.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ RUN curl -SL https://github.com/landley/toybox/archive/refs/tags/$TOYBOX_VERSION
###
FROM ${FINAL_BASE}
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-qat-initcontainer'
LABEL summary='Intel® QAT initcontainer for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-qat-plugin-kerneldrv.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ RUN install -D ${DIR}/LICENSE /install_root/licenses/intel-device-plugins-for-ku
else mkdir -p /install_root/licenses/$CMD/go-licenses/ && cd licenses/$CMD && cp -r * /install_root/licenses/$CMD/go-licenses/ ; fi
FROM debian:unstable-slim
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-qat-plugin-kerneldrv'
LABEL summary='Intel® QAT device plugin kerneldrv for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-qat-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_qat_device_plugin"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-qat-plugin'
LABEL summary='Intel® QAT device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-sgx-admissionwebhook.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_sgx_admissionwebhook"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-sgx-admissionwebhook'
LABEL summary='Intel® SGX admission controller webhook for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-sgx-initcontainer.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ RUN curl -SL https://github.com/landley/toybox/archive/refs/tags/$TOYBOX_VERSION
###
FROM ${FINAL_BASE}
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-sgx-initcontainer'
LABEL summary='Intel® SGX NFD hook for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-sgx-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_sgx_device_plugin"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-sgx-plugin'
LABEL summary='Intel® SGX device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-vpu-plugin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ RUN install -D ${DIR}/LICENSE /install_root/licenses/intel-device-plugins-for-ku
else mkdir -p /install_root/licenses/$CMD/go-licenses/ && cd licenses/$CMD && cp -r * /install_root/licenses/$CMD/go-licenses/ ; fi
FROM debian:unstable-slim
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-vpu-plugin'
LABEL summary='Intel® VPU device plugin for Kubernetes'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/intel-xpumanager-sidecar.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FROM ${FINAL_BASE}
COPY --from=builder /install_root /
ENTRYPOINT ["/usr/local/bin/intel_xpumanager_sidecar"]
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
LABEL name='intel-xpumanager-sidecar'
LABEL summary='Intel® xpumanager sidecar'
Expand Down
2 changes: 1 addition & 1 deletion build/docker/lib/default_labels.docker
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
LABEL vendor='Intel®'
LABEL version='0.27.0'
LABEL version='0.27.1'
LABEL release='1'
2 changes: 2 additions & 0 deletions cmd/xpumanager_sidecar/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ Intel GPUs can be interconnected via an XeLink. In some workloads it is benefici
| -interval | int | 10 | Interval for XeLink topology fetching and label writing (seconds, >= 1) |
| -startup-delay | int | 10 | Startup delay before the first topology fetching (seconds, >= 0) |
| -label-namespace | string | gpu.intel.com | Namespace or prefix for the labels. i.e. **gpu.intel.com**/xe-links |
| -allow-subdeviceless-links | bool | false | Include xelinks that are not on subdevices |
| -use-https | bool | false | Use HTTPS protocol when connecting to XPU-Manager |

The sidecar also accepts a number of other arguments. Please use the -h option to see the complete list of options.

Expand Down
44 changes: 31 additions & 13 deletions cmd/xpumanager_sidecar/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"bufio"
"bytes"
"context"
"crypto/tls"
"flag"
"fmt"
"io"
Expand Down Expand Up @@ -55,15 +56,17 @@ type xpuManagerTopologyMatrixCell struct {
}

type xpuManagerSidecar struct {
getMetricsData func() []byte
tmpDirPrefix string
dstFilePath string
labelNamespace string
url string
interval uint64
startDelay uint64
xpumPort uint64
laneCount uint64
getMetricsData func() []byte
tmpDirPrefix string
dstFilePath string
labelNamespace string
url string
interval uint64
startDelay uint64
xpumPort uint64
laneCount uint64
allowSubdevicelessLinks bool
useHTTPS bool
}

func (e *invalidEntryErr) Error() string {
Expand All @@ -75,6 +78,14 @@ func (xms *xpuManagerSidecar) getMetricsDataFromXPUM() []byte {
Timeout: 5 * time.Second,
}

if xms.useHTTPS {
customTransport := http.DefaultTransport.(*http.Transport).Clone()
//#nosec
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}

client.Transport = customTransport
}

ctx := context.Background()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, xms.url, http.NoBody)
Expand Down Expand Up @@ -108,7 +119,7 @@ func (xms *xpuManagerSidecar) getMetricsDataFromXPUM() []byte {
return resBody
}

func processMetricsLabels(labels []*io_prometheus_client.LabelPair) (xpuManagerTopologyMatrixCell, error) {
func processMetricsLabels(labels []*io_prometheus_client.LabelPair, allowNonSubdeviceLinks bool) (xpuManagerTopologyMatrixCell, error) {
cell := createInvalidTopologyCell()

for _, label := range labels {
Expand All @@ -118,7 +129,7 @@ func processMetricsLabels(labels []*io_prometheus_client.LabelPair) (xpuManagerT
klog.V(5).Info(name, " ", strVal)

// xelinks should always be on subdevices
if name == "local_on_subdevice" && strVal != "true" {
if !allowNonSubdeviceLinks && name == "local_on_subdevice" && strVal != "true" {
return cell, &invalidEntryErr{}
}

Expand Down Expand Up @@ -193,7 +204,7 @@ func (xms *xpuManagerSidecar) GetTopologyFromXPUMMetrics(data []byte) (topologyI
continue
}

cell, err := processMetricsLabels(metric.Label)
cell, err := processMetricsLabels(metric.Label, xms.allowSubdevicelessLinks)
if err == nil {
klog.V(5).Info("topology entry: ", cell)
topologyInfos = append(topologyInfos, cell)
Expand Down Expand Up @@ -367,6 +378,8 @@ func main() {
flag.StringVar(&xms.dstFilePath, "dst-file-path", "/etc/kubernetes/node-feature-discovery/features.d/xpum-sidecar-labels.txt", "label file destination")
flag.Uint64Var(&xms.laneCount, "lane-count", 4, "minimum lane count for xelink")
flag.StringVar(&xms.labelNamespace, "label-namespace", "gpu.intel.com", "namespace for the labels")
flag.BoolVar(&xms.allowSubdevicelessLinks, "allow-subdeviceless-links", false, "allow xelinks that are not tied to subdevices (=1 tile GPUs)")
flag.BoolVar(&xms.useHTTPS, "use-https", false, "Use HTTPS protocol to connect to xpumanager")
klog.InitFlags(nil)

flag.Parse()
Expand All @@ -375,7 +388,12 @@ func main() {
klog.Fatal("zero interval won't work, set it to at least 1")
}

xms.url = fmt.Sprintf("http://127.0.0.1:%d/metrics", xms.xpumPort)
protocol := "http"
if xms.useHTTPS {
protocol = "https"
}

xms.url = fmt.Sprintf("%s://127.0.0.1:%d/metrics", protocol, xms.xpumPort)

keepIterating := true

Expand Down
31 changes: 25 additions & 6 deletions cmd/xpumanager_sidecar/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ import (
)

type testCase struct {
name string
metricsData []string
expectedLabels []string
minLaneCount int
name string
metricsData []string
expectedLabels []string
minLaneCount int
allowSubdeviceless bool
}

func createTestCases() []testCase {
Expand Down Expand Up @@ -59,12 +60,25 @@ func createTestCases() []testCase {
metricsData: []string{
`# HELP xpum_topology_link Connection type fo two GPU tiles`,
`# TYPE xpum_topology_link gauge`,
`xpum_topology_link{dev_file="card1",dev_name="Intel(R) Graphics [0x0bdb]",pci_bdf="0000:51:00.0",pci_dev="0xbdb",src="direct",uuid="01000000-0000-0000-0000-000000510000",vendor="Intel(R) Corporation",local_cpu_affinity="0-23,48-71",local_device_id="0",local_numa_index="0",local_on_subdevice="false",local_subdevice_id="0",remote_device_id="0",remote_subdevice_id="0"} 1`,
`xpum_topology_link{dev_file="card1",dev_name="Intel(R) Graphics [0x0bdb]",pci_bdf="0000:51:00.0",pci_dev="0xbdb",src="direct",uuid="01000000-0000-0000-0000-000000510000",vendor="Intel(R) Corporation",local_cpu_affinity="0-23,48-71",local_device_id="0",local_numa_index="0",local_on_subdevice="false",local_subdevice_id="0",remote_device_id="1",remote_subdevice_id="0"} 1`,
`xpum_topology_link{dev_file="card1",dev_name="Intel(R) Graphics [0x0bdb]",pci_bdf="0000:51:00.0",pci_dev="0xbdb",src="direct",uuid="01000000-0000-0000-0000-000000510000",vendor="Intel(R) Corporation",local_cpu_affinity="0-23,48-71",local_device_id="0",local_numa_index="0",local_on_subdevice="false",local_subdevice_id="0",remote_device_id="1",remote_subdevice_id="0",lane_count="4"} 1`,
`xpum_topology_link{dev_file="card1",dev_name="Intel(R) Graphics [0x0bdb]",pci_bdf="0000:51:00.0",pci_dev="0xbdb",src="direct",uuid="01000000-0000-0000-0000-000000510000",vendor="Intel(R) Corporation",local_cpu_affinity="0-23,48-71",local_device_id="0",local_numa_index="0",local_on_subdevice="false",local_subdevice_id="0",remote_device_id="1",remote_subdevice_id="1",lane_count="4"} 1`,
"",
},
expectedLabels: []string{"xpumanager.intel.com/xe-links="},
},
{
name: "Xelinks not on sub devices when it's allowed",
minLaneCount: 4,
metricsData: []string{
`# HELP xpum_topology_link Connection type fo two GPU tiles`,
`# TYPE xpum_topology_link gauge`,
`xpum_topology_link{dev_file="card1",dev_name="Intel(R) Graphics [0x0bdb]",pci_bdf="0000:51:00.0",pci_dev="0xbdb",src="direct",uuid="01000000-0000-0000-0000-000000510000",vendor="Intel(R) Corporation",local_cpu_affinity="0-23,48-71",local_device_id="0",local_numa_index="0",local_on_subdevice="false",local_subdevice_id="0",remote_device_id="1",remote_subdevice_id="0",lane_count="4"} 1`,
`xpum_topology_link{dev_file="card1",dev_name="Intel(R) Graphics [0x0bdb]",pci_bdf="0000:51:00.0",pci_dev="0xbdb",src="direct",uuid="01000000-0000-0000-0000-000000510000",vendor="Intel(R) Corporation",local_cpu_affinity="0-23,48-71",local_device_id="0",local_numa_index="0",local_on_subdevice="false",local_subdevice_id="0",remote_device_id="1",remote_subdevice_id="1",lane_count="4"} 1`,
"",
},
expectedLabels: []string{"xpumanager.intel.com/xe-links=0.0-1.0_0.0-1.1"},
allowSubdeviceless: true,
},
{
name: "Xelinks without lan counts",
minLaneCount: 4,
Expand Down Expand Up @@ -208,6 +222,9 @@ func TestLabeling(t *testing.T) {
for _, tc := range tcs {
print("Testcase (labeling): ", tc.name, "\n")
xms := tc.createFakeXMS(tc.metricsData, tc.minLaneCount)

xms.allowSubdevicelessLinks = tc.allowSubdeviceless

topologyInfos := xms.GetTopologyFromXPUMMetrics([]byte(strings.Join(tc.metricsData, "\n")))

labels := xms.createLabels(topologyInfos)
Expand All @@ -224,6 +241,8 @@ func TestIterate(t *testing.T) {
print("Testcase (iterate): ", tc.name, "\n")
xms := tc.createFakeXMS(tc.metricsData, tc.minLaneCount)

xms.allowSubdevicelessLinks = tc.allowSubdeviceless

root, err := os.MkdirTemp("", "test_new_xms")
if err != nil {
t.Fatalf("can't create temporary directory: %+v", err)
Expand Down
Loading

0 comments on commit 34c8ada

Please sign in to comment.