@@ -21,6 +21,7 @@ import (
2121 "fmt"
2222 "math"
2323 "os"
24+ "strings"
2425 "sync"
2526 "time"
2627
@@ -242,6 +243,18 @@ func (p PodDiff) String(ignorePhases sets.String) string {
242243 return ret
243244}
244245
246+ // DeletedPods returns a slice of pods that were present at the beginning
247+ // and then disappeared.
248+ func (p PodDiff ) DeletedPods () []string {
249+ var deletedPods []string
250+ for podName , podInfo := range p {
251+ if podInfo .hostname == nonExist {
252+ deletedPods = append (deletedPods , podName )
253+ }
254+ }
255+ return deletedPods
256+ }
257+
245258// Diff computes a PodDiff given 2 lists of pods.
246259func Diff (oldPods []* v1.Pod , curPods []* v1.Pod ) PodDiff {
247260 podInfoMap := PodDiff {}
@@ -765,9 +778,8 @@ func (config *RCConfig) start() error {
765778 pods := ps .List ()
766779 startupStatus := ComputeRCStartupStatus (pods , config .Replicas )
767780
768- pods = startupStatus .Created
769781 if config .CreatedPods != nil {
770- * config .CreatedPods = pods
782+ * config .CreatedPods = startupStatus . Created
771783 }
772784 if ! config .Silent {
773785 config .RCConfigLog (startupStatus .String (config .Name ))
@@ -787,16 +799,15 @@ func (config *RCConfig) start() error {
787799 }
788800 return fmt .Errorf ("%d containers failed which is more than allowed %d" , startupStatus .FailedContainers , maxContainerFailures )
789801 }
790- if len (pods ) < len (oldPods ) || len (pods ) > config .Replicas {
791- // This failure mode includes:
792- // kubelet is dead, so node controller deleted pods and rc creates more
793- // - diagnose by noting the pod diff below.
794- // pod is unhealthy, so replication controller creates another to take its place
795- // - diagnose by comparing the previous "2 Pod states" lines for inactive pods
796- errorStr := fmt .Sprintf ("Number of reported pods for %s changed: %d vs %d" , config .Name , len (pods ), len (oldPods ))
797- config .RCConfigLog ("%v, pods that changed since the last iteration:" , errorStr )
798- config .RCConfigLog (Diff (oldPods , pods ).String (sets .NewString ()))
799- return fmt .Errorf (errorStr )
802+
803+ diff := Diff (oldPods , pods )
804+ deletedPods := diff .DeletedPods ()
805+ if len (deletedPods ) != 0 {
806+ // There are some pods that have disappeared.
807+ err := fmt .Errorf ("%d pods disappeared for %s: %v" , len (deletedPods ), config .Name , strings .Join (deletedPods , ", " ))
808+ config .RCConfigLog (err .Error ())
809+ config .RCConfigLog (diff .String (sets .NewString ()))
810+ return err
800811 }
801812
802813 if len (pods ) > len (oldPods ) || startupStatus .Running > oldRunning {
0 commit comments