Skip to content

Commit 6c61a9b

Browse files
marnixbouhuisc3y1huang
authored andcommitted
test: add test for scheduling bug triggered by setting auto-balance to best-effort
Signed-off-by: Marnix Bouhuis <[email protected]>
1 parent b17cc5f commit 6c61a9b

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed

scheduler/replica_scheduler_test.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package scheduler
33
import (
44
"context"
55
"fmt"
6+
"strconv"
67
"testing"
78
"time"
89

@@ -269,6 +270,7 @@ type ReplicaSchedulerTestCase struct {
269270
replicaNodeSoftAntiAffinity string
270271
replicaZoneSoftAntiAffinity string
271272
replicaDiskSoftAntiAffinity string
273+
replicaAutoBalance string
272274
ReplicaReplenishmentWaitInterval string
273275

274276
// some test cases only try to schedule a subset of a volume's replicas
@@ -1156,6 +1158,11 @@ func (s *TestSuite) TestReplicaScheduler(c *C) {
11561158
tc.firstNilReplica = -1
11571159
testCases["non-reusable replica after interval expires"] = tc
11581160

1161+
// Test scheduling on the right node when "best-effort" auto balancing is enabled and an incorrect node has a
1162+
// node with less load.
1163+
tc = generateBestEffortAutoBalanceScheduleTestCase()
1164+
testCases["scheduling on the right node with \"best-effort\" auto balancing"] = tc
1165+
11591166
for name, tc := range testCases {
11601167
fmt.Printf("testing %v\n", name)
11611168

@@ -1323,6 +1330,108 @@ func generateFailedReplicaTestCase(
13231330
return
13241331
}
13251332

1333+
// Test scheduling on the right node when "best-effort" auto balancing is enabled and an incorrect node has a
1334+
// node with less load.
1335+
func generateBestEffortAutoBalanceScheduleTestCase() *ReplicaSchedulerTestCase {
1336+
tc := &ReplicaSchedulerTestCase{
1337+
engineImage: newEngineImage(TestEngineImage, longhorn.EngineImageStateDeployed),
1338+
allReplicas: make(map[string]*longhorn.Replica),
1339+
replicasToSchedule: map[string]struct{}{},
1340+
firstNilReplica: -1,
1341+
}
1342+
1343+
daemon1 := newDaemonPod(corev1.PodRunning, TestDaemon1, TestNamespace, TestNode1, TestIP1)
1344+
daemon2 := newDaemonPod(corev1.PodRunning, TestDaemon2, TestNamespace, TestNode2, TestIP2)
1345+
tc.daemons = []*corev1.Pod{
1346+
daemon1,
1347+
daemon2,
1348+
}
1349+
1350+
// Create 2 nodes in the same zone
1351+
node1 := newNode(TestNode1, TestNamespace, TestZone1, true, longhorn.ConditionStatusTrue)
1352+
node2 := newNode(TestNode2, TestNamespace, TestZone1, true, longhorn.ConditionStatusTrue)
1353+
tc.engineImage.Status.NodeDeploymentMap[node1.Name] = true
1354+
tc.engineImage.Status.NodeDeploymentMap[node2.Name] = true
1355+
1356+
// For the test create a volume that requires 2 replicas
1357+
tc.volume = newVolume(TestVolumeName, 2)
1358+
1359+
// Give each node 2 disks
1360+
addDisks := func(node *longhorn.Node, index int64, disk longhorn.DiskSpec, storageAvailable int64, hasReplica bool) (diskID string) {
1361+
if node.Spec.Disks == nil {
1362+
node.Spec.Disks = make(map[string]longhorn.DiskSpec)
1363+
}
1364+
if node.Status.DiskStatus == nil {
1365+
node.Status.DiskStatus = make(map[string]*longhorn.DiskStatus)
1366+
}
1367+
1368+
var scheduledReplica map[string]int64
1369+
if hasReplica {
1370+
replica := newReplicaForVolume(tc.volume)
1371+
replica.Spec.NodeID = node.ObjectMeta.Name
1372+
tc.allReplicas[replica.Name] = replica
1373+
scheduledReplica = map[string]int64{replica.Name: TestVolumeSize}
1374+
}
1375+
1376+
id := getDiskID(node.ObjectMeta.Name, strconv.FormatInt(index, 10))
1377+
node.Spec.Disks[id] = disk
1378+
node.Status.DiskStatus[id] = &longhorn.DiskStatus{
1379+
StorageAvailable: storageAvailable,
1380+
StorageScheduled: 0,
1381+
StorageMaximum: TestDiskSize,
1382+
Conditions: []longhorn.Condition{
1383+
newCondition(longhorn.DiskConditionTypeSchedulable, longhorn.ConditionStatusTrue),
1384+
},
1385+
DiskUUID: id,
1386+
Type: longhorn.DiskTypeFilesystem,
1387+
ScheduledReplica: scheduledReplica,
1388+
}
1389+
return id
1390+
}
1391+
1392+
node1disk1 := newDisk(TestDefaultDataPath, true, 0)
1393+
node1disk2 := newDisk(TestDefaultDataPath, true, 0)
1394+
addDisks(node1, 1, node1disk1, TestDiskAvailableSize, true) // Later we schedule a replica on this disk
1395+
addDisks(node1, 2, node1disk2, TestDiskAvailableSize, false) // No replica, but current bug causes scheduler to pick this disk
1396+
1397+
node2disk1 := newDisk(TestDefaultDataPath, true, 0)
1398+
node2disk2 := newDisk(TestDefaultDataPath, true, 0)
1399+
addDisks(node2, 1, node2disk1, TestDiskAvailableSize/2-100, false) // No replica
1400+
expectedDiskID := addDisks(node2, 2, node2disk2, TestDiskAvailableSize/2, false) // No replica, scheduler should choose this since it has the most storage available from the valid options.
1401+
1402+
// TODO: setting the next line to "best-effort" causes the test to fail. Removing this line bypasses the bug and makes the test pass.
1403+
tc.replicaAutoBalance = "best-effort" // Current scheduling bug happens with best-effort only.
1404+
tc.replicaDiskSoftAntiAffinity = "false" // Do not allow scheduling of replicas on the same disk.
1405+
tc.replicaNodeSoftAntiAffinity = "false" // Do not allow scheduling of replica on the same node.
1406+
tc.replicaZoneSoftAntiAffinity = "true" // Allow scheduling in the same zone, both nodes are in the same one. The scheduler takes a shortcut otherwise that hides the bug.
1407+
1408+
tc.nodes = map[string]*longhorn.Node{
1409+
TestNode1: node1,
1410+
TestNode2: node2,
1411+
}
1412+
1413+
// Add replica that still needs to be scheduled
1414+
replicaToSchedule := newReplicaForVolume(tc.volume)
1415+
tc.allReplicas[replicaToSchedule.Name] = replicaToSchedule
1416+
1417+
// Only test scheduling for the replicaToSchedule, we don't want to schedule the replica that is already on node1disk1 again
1418+
tc.replicasToSchedule[replicaToSchedule.Name] = struct{}{}
1419+
1420+
// Expect replica to be scheduled on node2disk2.
1421+
// - node1disk1 should not be possible, there is already a replica on the node and on the disk
1422+
// - node1disk2 should not be possible, there is already a replica on the node
1423+
// - node2disk1 is possible, but does not have the most available storage space left
1424+
// - node2disk2 is possible, and has the most storage left of all valid options. This should be the candidate.
1425+
tc.expectedNodes = map[string]*longhorn.Node{
1426+
TestNode2: node2,
1427+
}
1428+
tc.expectedDisks = map[string]struct{}{
1429+
expectedDiskID: {},
1430+
}
1431+
1432+
return tc
1433+
}
1434+
13261435
func setSettings(tc *ReplicaSchedulerTestCase, lhClient *lhfake.Clientset, sIndexer cache.Indexer, c *C) {
13271436
// Set default-instance-manager-image setting
13281437
s := initSettings(string(types.SettingNameDefaultInstanceManagerImage), TestInstanceManagerImage)
@@ -1378,6 +1487,17 @@ func setSettings(tc *ReplicaSchedulerTestCase, lhClient *lhfake.Clientset, sInde
13781487
err = sIndexer.Add(setting)
13791488
c.Assert(err, IsNil)
13801489
}
1490+
// Set replica auto-balance setting
1491+
if tc.replicaAutoBalance != "" {
1492+
s := initSettings(
1493+
string(types.SettingNameReplicaAutoBalance),
1494+
tc.replicaAutoBalance)
1495+
setting, err :=
1496+
lhClient.LonghornV1beta2().Settings(TestNamespace).Create(context.TODO(), s, metav1.CreateOptions{})
1497+
c.Assert(err, IsNil)
1498+
err = sIndexer.Add(setting)
1499+
c.Assert(err, IsNil)
1500+
}
13811501
// Set replica replenishment wait interval setting
13821502
if tc.ReplicaReplenishmentWaitInterval != "" {
13831503
s := initSettings(

0 commit comments

Comments
 (0)