Skip to content

Commit bc55425

Browse files
authored
fix/service delete deadline (#227)
* fix: service container wait only apply if the destroy grace period is larger than 0s. Interpret also the wait result chan and errors * chore(service): explains why we ignore certain errors * chore: readds terraform-plugin-docs * docs(service): explains the behavior if the grace period is 0s * fix(service): ignore certain errors on wait error * refactor: introduce more speaking error contains ignorable message method because it's more precise on what's happening in the code
1 parent 1ebe551 commit bc55425

File tree

10 files changed

+83
-27
lines changed

10 files changed

+83
-27
lines changed

docs/resources/service.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ Optional:
354354
- **privileges** (Block List, Max: 1) Security options for the container (see [below for nested schema](#nestedblock--task_spec--container_spec--privileges))
355355
- **read_only** (Boolean) Mount the container's root filesystem as read only
356356
- **secrets** (Block Set) References to zero or more secrets that will be exposed to the service (see [below for nested schema](#nestedblock--task_spec--container_spec--secrets))
357-
- **stop_grace_period** (String) Amount of time to wait for the container to terminate before forcefully removing it (ms|s|m|h)
357+
- **stop_grace_period** (String) Amount of time to wait for the container to terminate before forcefully removing it (ms|s|m|h). If not specified or '0s' the destroy will not check if all tasks/containers of the service terminate.
358358
- **stop_signal** (String) Signal to stop the container
359359
- **user** (String) The user inside the container
360360

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ require (
99
github.com/docker/go-connections v0.4.0
1010
github.com/docker/go-units v0.4.0
1111
github.com/hashicorp/go-cty v1.4.1-0.20200414143053-d3edf31b6320
12+
github.com/hashicorp/terraform-plugin-docs v0.4.0 // indirect
1213
github.com/hashicorp/terraform-plugin-sdk/v2 v2.6.1
1314
github.com/mattn/go-colorable v0.1.8 // indirect
1415
github.com/mitchellh/go-homedir v1.1.0

go.sum

Lines changed: 27 additions & 0 deletions
Large diffs are not rendered by default.

internal/provider/helpers.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,15 @@ func testCheckLabelMap(name string, partialKey string, expectedLabels map[string
122122
return nil
123123
}
124124
}
125+
126+
// containsIgnorableErrorMessage checks if the error message contains one of the
127+
// message to ignore. Returns true if so, false otherwise (also if no ignorable message is given)
128+
func containsIgnorableErrorMessage(errorMsg string, ignorableErrorMessages ...string) bool {
129+
for _, ignorableErrorMessage := range ignorableErrorMessages {
130+
if strings.Contains(errorMsg, ignorableErrorMessage) {
131+
return true
132+
}
133+
}
134+
135+
return false
136+
}

internal/provider/resource_docker_container_funcs.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ func resourceDockerContainerCreate(ctx context.Context, d *schema.ResourceData,
355355
// Still support the deprecated properties
356356
if v, ok := d.GetOk("networks"); ok {
357357
if err := client.NetworkDisconnect(ctx, "bridge", retContainer.ID, false); err != nil {
358-
if !strings.Contains(err.Error(), "is not connected to the network bridge") {
358+
if !containsIgnorableErrorMessage(err.Error(), "is not connected to the network bridge") {
359359
return diag.Errorf("Unable to disconnect the default network: %s", err)
360360
}
361361
}
@@ -375,7 +375,7 @@ func resourceDockerContainerCreate(ctx context.Context, d *schema.ResourceData,
375375
// But overwrite them with the future ones, if set
376376
if v, ok := d.GetOk("networks_advanced"); ok {
377377
if err := client.NetworkDisconnect(ctx, "bridge", retContainer.ID, false); err != nil {
378-
if !strings.Contains(err.Error(), "is not connected to the network bridge") {
378+
if !containsIgnorableErrorMessage(err.Error(), "is not connected to the network bridge") {
379379
return diag.Errorf("Unable to disconnect the default network: %s", err)
380380
}
381381
}
@@ -848,7 +848,7 @@ func resourceDockerContainerDelete(ctx context.Context, d *schema.ResourceData,
848848
case waitOk := <-waitOkC:
849849
log.Printf("[INFO] Container exited with code [%v]: '%s'", waitOk.StatusCode, d.Id())
850850
case err := <-errorC:
851-
if !(strings.Contains(err.Error(), "No such container") || strings.Contains(err.Error(), "is already in progress")) {
851+
if !containsIgnorableErrorMessage(err.Error(), "No such container", "is already in progress") {
852852
return diag.Errorf("Error waiting for container removal '%s': %s", d.Id(), err)
853853
}
854854
}

internal/provider/resource_docker_network_funcs.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"encoding/json"
66
"log"
7-
"strings"
87
"time"
98

109
"github.com/docker/docker/api/types"
@@ -201,7 +200,7 @@ func resourceDockerNetworkRemoveRefreshFunc(ctx context.Context,
201200
}
202201

203202
if err := client.NetworkRemove(ctx, networkID); err != nil {
204-
if strings.Contains(err.Error(), "has active endpoints") {
203+
if containsIgnorableErrorMessage(err.Error(), "has active endpoints") {
205204
return networkID, "pending", nil
206205
}
207206
return networkID, "other", err

internal/provider/resource_docker_service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ func resourceDockerService() *schema.Resource {
303303
},
304304
"stop_grace_period": {
305305
Type: schema.TypeString,
306-
Description: "Amount of time to wait for the container to terminate before forcefully removing it (ms|s|m|h)",
306+
Description: "Amount of time to wait for the container to terminate before forcefully removing it (ms|s|m|h). If not specified or '0s' the destroy will not check if all tasks/containers of the service terminate.",
307307
Optional: true,
308308
Computed: true,
309309
ValidateDiagFunc: validateDurationGeq0(),

internal/provider/resource_docker_service_funcs.go

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func resourceDockerServiceCreate(ctx context.Context, d *schema.ResourceData, me
6767
if deleteErr := deleteService(ctx, service.ID, d, client); deleteErr != nil {
6868
return diag.FromErr(deleteErr)
6969
}
70-
if strings.Contains(err.Error(), "timeout while waiting for state") {
70+
if containsIgnorableErrorMessage(err.Error(), "timeout while waiting for state") {
7171
return diag.FromErr(&DidNotConvergeError{ServiceID: service.ID, Timeout: convergeConfig.timeout})
7272
}
7373
return diag.FromErr(err)
@@ -205,7 +205,7 @@ func resourceDockerServiceUpdate(ctx context.Context, d *schema.ResourceData, me
205205
state, err := stateConf.WaitForStateContext(ctx)
206206
log.Printf("[INFO] State awaited: %v with error: %v", state, err)
207207
if err != nil {
208-
if strings.Contains(err.Error(), "timeout while waiting for state") {
208+
if containsIgnorableErrorMessage(err.Error(), "timeout while waiting for state") {
209209
return diag.FromErr(&DidNotConvergeError{ServiceID: service.ID, Timeout: convergeConfig.timeout})
210210
}
211211
return diag.FromErr(err)
@@ -249,7 +249,7 @@ func fetchDockerService(ctx context.Context, ID string, name string, client *cli
249249
func deleteService(ctx context.Context, serviceID string, d *schema.ResourceData, client *client.Client) error {
250250
// get containerIDs of the running service because they do not exist after the service is deleted
251251
serviceContainerIds := make([]string, 0)
252-
if _, ok := d.GetOk("task_spec.0.container_spec.0.stop_grace_period"); ok {
252+
if v, ok := d.GetOk("task_spec.0.container_spec.0.stop_grace_period"); ok && v.(string) != "0s" {
253253
filters := filters.NewArgs()
254254
filters.Add("service", d.Get("name").(string))
255255
tasks, err := client.TaskList(ctx, types.TaskListOptions{
@@ -264,39 +264,58 @@ func deleteService(ctx context.Context, serviceID string, d *schema.ResourceData
264264
if task.Status.ContainerStatus != nil {
265265
containerID = task.Status.ContainerStatus.ContainerID
266266
}
267-
log.Printf("[INFO] Found container ['%s'] for destroying: '%s'", task.Status.State, containerID)
267+
log.Printf("[INFO] Found container with ID ['%s'] in state '%s' for destroying", containerID, task.Status.State)
268268
if strings.TrimSpace(containerID) != "" && task.Status.State != swarm.TaskStateShutdown {
269269
serviceContainerIds = append(serviceContainerIds, containerID)
270270
}
271271
}
272272
}
273273

274274
// delete the service
275-
log.Printf("[INFO] Deleting service: '%s'", serviceID)
275+
log.Printf("[INFO] Deleting service with ID: '%s'", serviceID)
276276
if err := client.ServiceRemove(ctx, serviceID); err != nil {
277-
return fmt.Errorf("Error deleting service %s: %s", serviceID, err)
277+
return fmt.Errorf("Error deleting service with ID '%s': %s", serviceID, err)
278278
}
279279

280280
// destroy each container after a grace period if specified
281-
if v, ok := d.GetOk("task_spec.0.container_spec.0.stop_grace_period"); ok {
281+
if v, ok := d.GetOk("task_spec.0.container_spec.0.stop_grace_period"); ok && v.(string) != "0s" {
282282
for _, containerID := range serviceContainerIds {
283-
destroyGraceSeconds, _ := time.ParseDuration(v.(string))
284-
log.Printf("[INFO] Waiting for container: '%s' to exit: max %v", containerID, destroyGraceSeconds)
285-
ctx, cancel := context.WithTimeout(ctx, destroyGraceSeconds)
286-
// TODO why defer? see container_resource with handling return channels! why not remove then wait?
283+
destroyGraceTime, _ := time.ParseDuration(v.(string))
284+
log.Printf("[INFO] Waiting for container with ID: '%s' to exit: max %v", containerID, destroyGraceTime)
285+
ctx, cancel := context.WithTimeout(ctx, destroyGraceTime)
286+
// We defer explicitly to avoid context leaks
287287
defer cancel()
288-
exitCode, _ := client.ContainerWait(ctx, containerID, container.WaitConditionRemoved)
289-
log.Printf("[INFO] Container exited with code [%v]: '%s'", exitCode, containerID)
288+
289+
containerWaitChan, containerWaitErrChan := client.ContainerWait(ctx, containerID, container.WaitConditionRemoved)
290+
select {
291+
case containerWaitResult := <-containerWaitChan:
292+
if containerWaitResult.Error != nil {
293+
// We ignore those types of errors because the container might be already removed before
294+
// the containerWait returns
295+
if !(containsIgnorableErrorMessage(containerWaitResult.Error.Message, "No such container")) {
296+
return fmt.Errorf("failed to wait for container with ID '%s': '%v'", containerID, containerWaitResult.Error.Message)
297+
}
298+
}
299+
log.Printf("[INFO] Container with ID '%s' exited with code '%v'", containerID, containerWaitResult.StatusCode)
300+
case containerWaitErrResult := <-containerWaitErrChan:
301+
// We ignore those types of errors because the container might be already removed before
302+
// the containerWait returns
303+
if !(containsIgnorableErrorMessage(containerWaitErrResult.Error(), "No such container")) {
304+
return fmt.Errorf("error on wait for container with ID '%s': %v", containerID, containerWaitErrResult)
305+
}
306+
}
290307

291308
removeOpts := types.ContainerRemoveOptions{
292309
RemoveVolumes: true,
293310
Force: true,
294311
}
295312

296-
log.Printf("[INFO] Removing container: '%s'", containerID)
313+
log.Printf("[INFO] Removing container with ID: '%s'", containerID)
297314
if err := client.ContainerRemove(ctx, containerID, removeOpts); err != nil {
298-
if !(strings.Contains(err.Error(), "No such container") || strings.Contains(err.Error(), "is already in progress")) {
299-
return fmt.Errorf("Error deleting container %s: %s", containerID, err)
315+
// We ignore those types of errors because the container might be already removed of the removal is in progress
316+
// before the containerRemove call happens
317+
if !containsIgnorableErrorMessage(err.Error(), "No such container", "is already in progress") {
318+
return fmt.Errorf("Error deleting container with ID '%s': %s", containerID, err)
300319
}
301320
}
302321
}

internal/provider/resource_docker_service_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"os"
77
"regexp"
88
"strconv"
9-
"strings"
109
"testing"
1110
"time"
1211

@@ -1328,7 +1327,7 @@ func checkAndRemoveImages(ctx context.Context, s *terraform.State) error {
13281327
Force: true,
13291328
})
13301329
if err != nil {
1331-
if strings.Contains(err.Error(), "image is being used by running container") {
1330+
if containsIgnorableErrorMessage(err.Error(), "image is being used by running container") {
13321331
if retryDeleteCount == maxRetryDeleteCount {
13331332
return fmt.Errorf("could not delete image '%s' after %d retries", image.ID, maxRetryDeleteCount)
13341333
}

internal/provider/resource_docker_volume.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"encoding/json"
66
"log"
7-
"strings"
87
"time"
98

109
"github.com/docker/docker/api/types"
@@ -158,7 +157,7 @@ func resourceDockerVolumeRemoveRefreshFunc(
158157
forceDelete := true
159158

160159
if err := client.VolumeRemove(context.Background(), volumeID, forceDelete); err != nil {
161-
if strings.Contains(err.Error(), "volume is in use") { // store.IsInUse(err)
160+
if containsIgnorableErrorMessage(err.Error(), "volume is in use") {
162161
log.Printf("[INFO] Volume with id '%v' is still in use", volumeID)
163162
return volumeID, "in_use", nil
164163
}

0 commit comments

Comments
 (0)