Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,18 @@ func (p *rolloutPlanner) reconcileOldMachineSetsRollingUpdate(ctx context.Contex
}
}

// On top of considering maxUnavailable, before scaling down it is also important to consider how unavailability is distributed
// across MachineSets, because if there are unavailable machines on the new MachineSet it is necessary to slow down or stop rollout
// to prevent to transition all the Machines to a broken state.
// In order to do so, compute the number of unavailable machines on the new MachineSet and use it to reduce totalScaleDownCount.
newMSUnavailableMachineCount := max(ptr.Deref(p.newMS.Spec.Replicas, 0)-ptr.Deref(p.newMS.Status.AvailableReplicas, 0), 0)

// Compute the total number of replicas that can be scaled down.
// Exit immediately if there is no room for scaling down.
// NOTE: this is a quick preliminary check to verify if there is room for scaling down any of the oldMSs; further down the code
// will make additional checks to ensure scale down actually happens without breaching MaxUnavailable, and
// if necessary, it will reduce the extent of the scale down accordingly.
totalScaleDownCount := max(totalSpecReplicas-totalPendingScaleDown-minAvailable, 0)
totalScaleDownCount := max(totalSpecReplicas-totalPendingScaleDown-minAvailable-newMSUnavailableMachineCount, 0)
if totalScaleDownCount <= 0 {
return nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,34 @@ func Test_reconcileOldMachineSetsRollingUpdate(t *testing.T) {
},
expectedNotes: map[string][]string{},
},
{
name: "do not scale down if there are unavailable replicas on the new MachineSet (maxSurge 1, maxUnavailable 0), all machines unavailable",
scaleIntent: map[string]int32{},
md: createMD("v2", 3, withRollingUpdateStrategy(1, 0)),
newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // one machine created on the new NewMS due to maxSurge, but it is not reaching the available state,
oldMSs: []*clusterv1.MachineSet{
createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(0)),
},
expectScaleIntent: map[string]int32{
// no new scale down intent for oldMSs (ms1): unavailability on new MS
},
expectedNotes: map[string][]string{},
skipMaxUnavailabilityCheck: true, // The test case is simulating all machines not unavailable, so this check will fail
},
{
name: "do not scale down if there are unavailable replicas on the new MachineSet (maxSurge 0, maxUnavailable 1), all machines unavailable",
scaleIntent: map[string]int32{},
md: createMD("v2", 3, withRollingUpdateStrategy(0, 1)),
newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // one machine created on the new NewMS after scale down on the old MS, but it is not reaching the available state,
oldMSs: []*clusterv1.MachineSet{
createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(0)), // OldMS scaled down due to maxUnavailable 1
},
expectScaleIntent: map[string]int32{
// no new scale down intent for oldMSs (ms1): unavailability on new MS
},
expectedNotes: map[string][]string{},
skipMaxUnavailabilityCheck: true, // The test case is simulating all machines not unavailable, so this check will fail
},
{
name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from current reconcile already takes the availability buffer (newMS is scaling down)",
scaleIntent: map[string]int32{
Expand Down
Loading