diff options
author | Sebastiaan van Stijn <github@gone.nl> | 2021-10-22 15:33:22 +0200 |
---|---|---|
committer | Sebastiaan van Stijn <github@gone.nl> | 2021-10-27 12:30:35 +0200 |
commit | 4b9a3dac46fd8e36f9f4f7b5835f108eb69e164a (patch) | |
tree | 2b416538b0bfe0d860639e91c3e3de43bb6b8fd4 /integration | |
parent | e2f740de442bac52b280bc485a3ca5b31567d938 (diff) | |
download | docker-4b9a3dac46fd8e36f9f4f7b5835f108eb69e164a.tar.gz |
Fix race in TestCreateServiceSecretFileMode, TestCreateServiceConfigFileMode
Looks like this test was broken from the start, and fully relied on a race
condition. (Test was added in 65ee7fff02111bf696bc2fec442d07c2957f4151)
The problem is in the service's command: `ls -l /etc/config || /bin/top`, which
will either:
- exit immediately if the secret is mounted correctly at `/etc/config` (which it should)
- keep running with `/bin/top` if the above failed
After the service is created, the test enters a race-condition, checking for 1
task to be running (which it ocassionally is), after which it proceeds, and looks
up the list of tasks of the service, to get the log output of `ls -l /etc/config`.
This is another race: first of all, the original filter for that task lookup did
not filter by `running`, so it would pick "any" task of the service (either failed,
running, or "completed" (successfully exited) tasks).
In the meantime though, SwarmKit kept reconciling the service, and creating new
tasks, so even if the test was able to get the ID of the correct task, that task
may already have been exited, and removed (task-limit is 5 by default), so only
if the test was "lucky", it would be able to get the logs, but of course, chances
were likely that it would be "too late", and the task already gone.
The problem can be easily reproduced when running the steps manually:
echo 'CONFIG' | docker config create myconfig -
docker service create --config source=myconfig,target=/etc/config,mode=0777 --name myservice busybox sh -c 'ls -l /etc/config || /bin/top'
The above creates the service, but it keeps retrying, because each task exits
immediately (followed by SwarmKit reconciling and starting a new task);
mjntpfkkyuuc1dpay4h00c4oo
overall progress: 0 out of 1 tasks
1/1: ready [======================================> ]
verify: Detected task failure
^COperation continuing in background.
Use `docker service ps mjntpfkkyuuc1dpay4h00c4oo` to check progress.
And checking the tasks for the service reveals that tasks exit cleanly (no error),
but _do exit_, so swarm just keeps up reconciling, and spinning up new tasks;
docker service ps myservice --no-trunc
ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS
2wmcuv4vffnet8nybg3he4v9n myservice.1 busybox:latest@sha256:f7ca5a32c10d51aeda3b4d01c61c6061f497893d7f6628b92f822f7117182a57 docker-desktop Ready Ready less than a second ago
5p8b006uec125iq2892lxay64 \_ myservice.1 busybox:latest@sha256:f7ca5a32c10d51aeda3b4d01c61c6061f497893d7f6628b92f822f7117182a57 docker-desktop Shutdown Complete less than a second ago
k8lpsvlak4b3nil0zfkexw61p \_ myservice.1 busybox:latest@sha256:f7ca5a32c10d51aeda3b4d01c61c6061f497893d7f6628b92f822f7117182a57 docker-desktop Shutdown Complete 6 seconds ago
vsunl5pi7e2n9ol3p89kvj6pn \_ myservice.1 busybox:latest@sha256:f7ca5a32c10d51aeda3b4d01c61c6061f497893d7f6628b92f822f7117182a57 docker-desktop Shutdown Complete 11 seconds ago
orxl8b6kt2l6dfznzzd4lij4s \_ myservice.1 busybox:latest@sha256:f7ca5a32c10d51aeda3b4d01c61c6061f497893d7f6628b92f822f7117182a57 docker-desktop Shutdown Complete 17 seconds ago
This patch changes the service's command to `sleep`, so that a successful task
(after successfully performing `ls -l /etc/config`) continues to be running until
the service is deleted. With that change, the service should (usually) reconcile
immediately, which removes the race condition, and should also make it faster :)
This patch changes the tests to use client.ServiceLogs() instead of using the
service's tasklist to directly access container logs. This should also fix some
failures that happened if some tasks failed to start before reconciling, in which
case client.TaskList() (with the current filters), could return more tasks than
anticipated (as it also contained the exited tasks);
=== RUN TestCreateServiceSecretFileMode
create_test.go:291: assertion failed: 2 (int) != 1 (int)
--- FAIL: TestCreateServiceSecretFileMode (7.88s)
=== RUN TestCreateServiceConfigFileMode
create_test.go:355: assertion failed: 2 (int) != 1 (int)
--- FAIL: TestCreateServiceConfigFileMode (7.87s)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
(cherry picked from commit 13cff6d5839cca995d889b47d656888e8e2345ca)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
Diffstat (limited to 'integration')
-rw-r--r-- | integration/service/create_test.go | 26 |
1 files changed, 6 insertions, 20 deletions
diff --git a/integration/service/create_test.go b/integration/service/create_test.go index 9a7d5c0c14..1992a6e4ca 100644 --- a/integration/service/create_test.go +++ b/integration/service/create_test.go @@ -267,7 +267,7 @@ func TestCreateServiceSecretFileMode(t *testing.T) { serviceID := swarm.CreateService(t, d, swarm.ServiceWithReplicas(instances), swarm.ServiceWithName(serviceName), - swarm.ServiceWithCommand([]string{"/bin/sh", "-c", "ls -l /etc/secret || /bin/top"}), + swarm.ServiceWithCommand([]string{"/bin/sh", "-c", "ls -l /etc/secret && sleep inf"}), swarm.ServiceWithSecret(&swarmtypes.SecretReference{ File: &swarmtypes.SecretReferenceFileTarget{ Name: "/etc/secret", @@ -282,15 +282,8 @@ func TestCreateServiceSecretFileMode(t *testing.T) { poll.WaitOn(t, swarm.RunningTasksCount(client, serviceID, instances), swarm.ServicePoll) - filter := filters.NewArgs() - filter.Add("service", serviceID) - tasks, err := client.TaskList(ctx, types.TaskListOptions{ - Filters: filter, - }) - assert.NilError(t, err) - assert.Check(t, is.Equal(len(tasks), 1)) - - body, err := client.ContainerLogs(ctx, tasks[0].Status.ContainerStatus.ContainerID, types.ContainerLogsOptions{ + body, err := client.ServiceLogs(ctx, serviceID, types.ContainerLogsOptions{ + Tail: "1", ShowStdout: true, }) assert.NilError(t, err) @@ -330,7 +323,7 @@ func TestCreateServiceConfigFileMode(t *testing.T) { serviceName := "TestService_" + t.Name() serviceID := swarm.CreateService(t, d, swarm.ServiceWithName(serviceName), - swarm.ServiceWithCommand([]string{"/bin/sh", "-c", "ls -l /etc/config || /bin/top"}), + swarm.ServiceWithCommand([]string{"/bin/sh", "-c", "ls -l /etc/config && sleep inf"}), swarm.ServiceWithReplicas(instances), swarm.ServiceWithConfig(&swarmtypes.ConfigReference{ File: &swarmtypes.ConfigReferenceFileTarget{ @@ -346,15 +339,8 @@ func TestCreateServiceConfigFileMode(t *testing.T) { poll.WaitOn(t, swarm.RunningTasksCount(client, serviceID, instances)) - filter := filters.NewArgs() - filter.Add("service", serviceID) - tasks, err := client.TaskList(ctx, types.TaskListOptions{ - Filters: filter, - }) - assert.NilError(t, err) - assert.Check(t, is.Equal(len(tasks), 1)) - - body, err := client.ContainerLogs(ctx, tasks[0].Status.ContainerStatus.ContainerID, types.ContainerLogsOptions{ + body, err := client.ServiceLogs(ctx, serviceID, types.ContainerLogsOptions{ + Tail: "1", ShowStdout: true, }) assert.NilError(t, err) |