Skip to content

Commit 34d405c

Browse files
authored
Merge pull request #1917 from karthikvetrivel/fix/driver-spec-shared-state-multi-nodepool
Fix shared state bug causing incorrect driver images in multi-node-pool clusters
2 parents 5c5f325 + 5853e28 commit 34d405c

File tree

2 files changed

+64
-1
lines changed

2 files changed

+64
-1
lines changed

internal/state/driver.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ func getDriverSpec(cr *nvidiav1alpha1.NVIDIADriver, nodePool nodePool) (*driverS
545545
nvidiaDriverName := getDriverName(cr, nodePool.getOS())
546546
nvidiaDriverAppName := getDriverAppName(cr, nodePool)
547547

548-
spec := &cr.Spec
548+
spec := cr.Spec.DeepCopy()
549549
imagePath, err := getDriverImagePath(spec, nodePool)
550550
if err != nil {
551551
return nil, fmt.Errorf("failed to get driver image path: %v", err)

internal/state/driver_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,3 +911,66 @@ func TestGetSanitizedKernelVersion(t *testing.T) {
911911
require.Equal(t, test.expected, result)
912912
}
913913
}
914+
915+
func TestGetDriverSpecMultipleNodePools(t *testing.T) {
916+
cr := &nvidiav1alpha1.NVIDIADriver{
917+
ObjectMeta: metav1.ObjectMeta{
918+
UID: apitypes.UID("test-uid-multipools"),
919+
},
920+
Spec: nvidiav1alpha1.NVIDIADriverSpec{
921+
DriverType: nvidiav1alpha1.GPU,
922+
UsePrecompiled: ptr.To(true),
923+
Repository: "nvcr.io/nvidia",
924+
Image: "driver",
925+
Version: "535.104.05",
926+
Manager: nvidiav1alpha1.DriverManagerSpec{
927+
Repository: "nvcr.io/nvidia/cloud-native",
928+
Image: "k8s-driver-manager",
929+
Version: "v0.6.2",
930+
},
931+
},
932+
}
933+
934+
pool1 := nodePool{
935+
osRelease: "ubuntu",
936+
osVersion: "22.04",
937+
kernel: "5.15.0-generic",
938+
nodeSelector: map[string]string{
939+
"feature.node.kubernetes.io/kernel-version.full": "5.15.0-generic",
940+
"feature.node.kubernetes.io/system-os_release.VERSION_ID": "22.04",
941+
},
942+
}
943+
944+
pool2 := nodePool{
945+
osRelease: "ubuntu",
946+
osVersion: "20.04",
947+
kernel: "5.4.0-generic",
948+
nodeSelector: map[string]string{
949+
"feature.node.kubernetes.io/kernel-version.full": "5.4.0-generic",
950+
"feature.node.kubernetes.io/system-os_release.VERSION_ID": "20.04",
951+
},
952+
}
953+
954+
spec1, err := getDriverSpec(cr, pool1)
955+
require.NoError(t, err)
956+
spec2, err := getDriverSpec(cr, pool2)
957+
require.NoError(t, err)
958+
959+
// Verify each spec has correct values
960+
assert.Equal(t, "nvcr.io/nvidia/driver:535.104.05-5.15.0-generic-ubuntu22.04", spec1.ImagePath)
961+
assert.Equal(t, "nvcr.io/nvidia/driver:535.104.05-5.4.0-generic-ubuntu20.04", spec2.ImagePath)
962+
assert.Equal(t, "ubuntu22.04", spec1.OSVersion)
963+
assert.Equal(t, "ubuntu20.04", spec2.OSVersion)
964+
965+
// Verify NodeSelectors are independent
966+
assert.Equal(t, "5.15.0-generic", spec1.Spec.NodeSelector["feature.node.kubernetes.io/kernel-version.full"])
967+
assert.Equal(t, "5.4.0-generic", spec2.Spec.NodeSelector["feature.node.kubernetes.io/kernel-version.full"])
968+
969+
// Verify specs have independent pointers
970+
assert.NotEqual(t, spec1.Spec, spec2.Spec)
971+
972+
// Verify modifying one doesn't affect the other
973+
spec1.Spec.NodeSelector["test-key"] = "test-value"
974+
_, exists := spec2.Spec.NodeSelector["test-key"]
975+
assert.False(t, exists)
976+
}

0 commit comments

Comments
 (0)