summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--api/server/router/container/backend.go2
-rw-r--r--api/server/router/container/exec.go3
-rw-r--r--builder/dockerfile/builder.go19
-rw-r--r--builder/dockerfile/command/command.go66
-rw-r--r--builder/dockerfile/dispatchers.go107
-rw-r--r--builder/dockerfile/evaluator.go33
-rw-r--r--builder/dockerfile/parser/line_parsers.go29
-rw-r--r--builder/dockerfile/parser/parser.go33
-rw-r--r--builder/dockerfile/parser/testfiles/health/Dockerfile10
-rw-r--r--builder/dockerfile/parser/testfiles/health/result9
-rw-r--r--container/health.go49
-rw-r--r--container/state.go4
-rw-r--r--daemon/commit.go19
-rw-r--r--daemon/exec.go28
-rw-r--r--daemon/health.go314
-rw-r--r--daemon/health_test.go112
-rw-r--r--daemon/inspect.go10
-rw-r--r--daemon/monitor.go9
-rw-r--r--daemon/stop.go2
-rw-r--r--docs/reference/builder.md67
-rw-r--r--docs/reference/run.md60
-rw-r--r--integration-cli/docker_cli_health_test.go154
-rw-r--r--libcontainerd/client_linux.go11
-rw-r--r--libcontainerd/client_windows.go19
-rw-r--r--libcontainerd/types.go1
-rw-r--r--runconfig/opts/parse.go40
-rw-r--r--runconfig/opts/parse_test.go40
27 files changed, 1170 insertions, 80 deletions
diff --git a/api/server/router/container/backend.go b/api/server/router/container/backend.go
index c7eafa770f..b3cc625ff2 100644
--- a/api/server/router/container/backend.go
+++ b/api/server/router/container/backend.go
@@ -17,7 +17,7 @@ type execBackend interface {
ContainerExecCreate(name string, config *types.ExecConfig) (string, error)
ContainerExecInspect(id string) (*backend.ExecInspect, error)
ContainerExecResize(name string, height, width int) error
- ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error
+ ContainerExecStart(ctx context.Context, name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error
ExecExists(name string) (bool, error)
}
diff --git a/api/server/router/container/exec.go b/api/server/router/container/exec.go
index fb88ac824a..21f5dc8300 100644
--- a/api/server/router/container/exec.go
+++ b/api/server/router/container/exec.go
@@ -106,7 +106,8 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
}
// Now run the user process in container.
- if err := s.backend.ContainerExecStart(execName, stdin, stdout, stderr); err != nil {
+ // Maybe we should we pass ctx here if we're not detaching?
+ if err := s.backend.ContainerExecStart(context.Background(), execName, stdin, stdout, stderr); err != nil {
if execStartCheck.Detach {
return err
}
diff --git a/builder/dockerfile/builder.go b/builder/dockerfile/builder.go
index 6ac76e877e..a7f96c6f13 100644
--- a/builder/dockerfile/builder.go
+++ b/builder/dockerfile/builder.go
@@ -22,15 +22,16 @@ import (
)
var validCommitCommands = map[string]bool{
- "cmd": true,
- "entrypoint": true,
- "env": true,
- "expose": true,
- "label": true,
- "onbuild": true,
- "user": true,
- "volume": true,
- "workdir": true,
+ "cmd": true,
+ "entrypoint": true,
+ "healthcheck": true,
+ "env": true,
+ "expose": true,
+ "label": true,
+ "onbuild": true,
+ "user": true,
+ "volume": true,
+ "workdir": true,
}
// BuiltinAllowedBuildArgs is list of built-in allowed build args
diff --git a/builder/dockerfile/command/command.go b/builder/dockerfile/command/command.go
index 9e1b799dcf..3e087e422e 100644
--- a/builder/dockerfile/command/command.go
+++ b/builder/dockerfile/command/command.go
@@ -3,40 +3,42 @@ package command
// Define constants for the command strings
const (
- Env = "env"
- Label = "label"
- Maintainer = "maintainer"
- Add = "add"
- Copy = "copy"
- From = "from"
- Onbuild = "onbuild"
- Workdir = "workdir"
- Run = "run"
- Cmd = "cmd"
- Entrypoint = "entrypoint"
- Expose = "expose"
- Volume = "volume"
- User = "user"
- StopSignal = "stopsignal"
- Arg = "arg"
+ Env = "env"
+ Label = "label"
+ Maintainer = "maintainer"
+ Add = "add"
+ Copy = "copy"
+ From = "from"
+ Onbuild = "onbuild"
+ Workdir = "workdir"
+ Run = "run"
+ Cmd = "cmd"
+ Entrypoint = "entrypoint"
+ Expose = "expose"
+ Volume = "volume"
+ User = "user"
+ StopSignal = "stopsignal"
+ Arg = "arg"
+ Healthcheck = "healthcheck"
)
// Commands is list of all Dockerfile commands
var Commands = map[string]struct{}{
- Env: {},
- Label: {},
- Maintainer: {},
- Add: {},
- Copy: {},
- From: {},
- Onbuild: {},
- Workdir: {},
- Run: {},
- Cmd: {},
- Entrypoint: {},
- Expose: {},
- Volume: {},
- User: {},
- StopSignal: {},
- Arg: {},
+ Env: {},
+ Label: {},
+ Maintainer: {},
+ Add: {},
+ Copy: {},
+ From: {},
+ Onbuild: {},
+ Workdir: {},
+ Run: {},
+ Cmd: {},
+ Entrypoint: {},
+ Expose: {},
+ Volume: {},
+ User: {},
+ StopSignal: {},
+ Arg: {},
+ Healthcheck: {},
}
diff --git a/builder/dockerfile/dispatchers.go b/builder/dockerfile/dispatchers.go
index 1de7dc2465..2f3b56cfd2 100644
--- a/builder/dockerfile/dispatchers.go
+++ b/builder/dockerfile/dispatchers.go
@@ -12,7 +12,9 @@ import (
"regexp"
"runtime"
"sort"
+ "strconv"
"strings"
+ "time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/api"
@@ -426,6 +428,111 @@ func cmd(b *Builder, args []string, attributes map[string]bool, original string)
return nil
}
+// parseOptInterval(flag) is the duration of flag.Value, or 0 if
+// empty. An error is reported if the value is given and is not positive.
+func parseOptInterval(f *Flag) (time.Duration, error) {
+ s := f.Value
+ if s == "" {
+ return 0, nil
+ }
+ d, err := time.ParseDuration(s)
+ if err != nil {
+ return 0, err
+ }
+ if d <= 0 {
+ return 0, fmt.Errorf("Interval %#v must be positive", f.name)
+ }
+ return d, nil
+}
+
+// HEALTHCHECK foo
+//
+// Set the default healthcheck command to run in the container (which may be empty).
+// Argument handling is the same as RUN.
+//
+func healthcheck(b *Builder, args []string, attributes map[string]bool, original string) error {
+ if len(args) == 0 {
+ return fmt.Errorf("HEALTHCHECK requires an argument")
+ }
+ typ := strings.ToUpper(args[0])
+ args = args[1:]
+ if typ == "NONE" {
+ if len(args) != 0 {
+ return fmt.Errorf("HEALTHCHECK NONE takes no arguments")
+ }
+ test := strslice.StrSlice{typ}
+ b.runConfig.Healthcheck = &container.HealthConfig{
+ Test: test,
+ }
+ } else {
+ if b.runConfig.Healthcheck != nil {
+ oldCmd := b.runConfig.Healthcheck.Test
+ if len(oldCmd) > 0 && oldCmd[0] != "NONE" {
+ fmt.Fprintf(b.Stdout, "Note: overriding previous HEALTHCHECK: %v\n", oldCmd)
+ }
+ }
+
+ healthcheck := container.HealthConfig{}
+
+ flInterval := b.flags.AddString("interval", "")
+ flTimeout := b.flags.AddString("timeout", "")
+ flRetries := b.flags.AddString("retries", "")
+
+ if err := b.flags.Parse(); err != nil {
+ return err
+ }
+
+ switch typ {
+ case "CMD":
+ cmdSlice := handleJSONArgs(args, attributes)
+ if len(cmdSlice) == 0 {
+ return fmt.Errorf("Missing command after HEALTHCHECK CMD")
+ }
+
+ if !attributes["json"] {
+ typ = "CMD-SHELL"
+ }
+
+ healthcheck.Test = strslice.StrSlice(append([]string{typ}, cmdSlice...))
+ default:
+ return fmt.Errorf("Unknown type %#v in HEALTHCHECK (try CMD)", typ)
+ }
+
+ interval, err := parseOptInterval(flInterval)
+ if err != nil {
+ return err
+ }
+ healthcheck.Interval = interval
+
+ timeout, err := parseOptInterval(flTimeout)
+ if err != nil {
+ return err
+ }
+ healthcheck.Timeout = timeout
+
+ if flRetries.Value != "" {
+ retries, err := strconv.ParseInt(flRetries.Value, 10, 32)
+ if err != nil {
+ return err
+ }
+ if retries < 1 {
+ return fmt.Errorf("--retries must be at least 1 (not %d)", retries)
+ }
+ healthcheck.Retries = int(retries)
+ } else {
+ healthcheck.Retries = 0
+ }
+
+ b.runConfig.Healthcheck = &healthcheck
+ }
+
+ if err := b.commit("", b.runConfig.Cmd, fmt.Sprintf("HEALTHCHECK %q", b.runConfig.Healthcheck)); err != nil {
+ return err
+ }
+
+ return nil
+}
+
// ENTRYPOINT /usr/sbin/nginx
//
// Set the entrypoint (which defaults to sh -c on linux, or cmd /S /C on Windows) to
diff --git a/builder/dockerfile/evaluator.go b/builder/dockerfile/evaluator.go
index 905675d0e0..52786371df 100644
--- a/builder/dockerfile/evaluator.go
+++ b/builder/dockerfile/evaluator.go
@@ -58,22 +58,23 @@ var evaluateTable map[string]func(*Builder, []string, map[string]bool, string) e
func init() {
evaluateTable = map[string]func(*Builder, []string, map[string]bool, string) error{
- command.Env: env,
- command.Label: label,
- command.Maintainer: maintainer,
- command.Add: add,
- command.Copy: dispatchCopy, // copy() is a go builtin
- command.From: from,
- command.Onbuild: onbuild,
- command.Workdir: workdir,
- command.Run: run,
- command.Cmd: cmd,
- command.Entrypoint: entrypoint,
- command.Expose: expose,
- command.Volume: volume,
- command.User: user,
- command.StopSignal: stopSignal,
- command.Arg: arg,
+ command.Env: env,
+ command.Label: label,
+ command.Maintainer: maintainer,
+ command.Add: add,
+ command.Copy: dispatchCopy, // copy() is a go builtin
+ command.From: from,
+ command.Onbuild: onbuild,
+ command.Workdir: workdir,
+ command.Run: run,
+ command.Cmd: cmd,
+ command.Entrypoint: entrypoint,
+ command.Expose: expose,
+ command.Volume: volume,
+ command.User: user,
+ command.StopSignal: stopSignal,
+ command.Arg: arg,
+ command.Healthcheck: healthcheck,
}
}
diff --git a/builder/dockerfile/parser/line_parsers.go b/builder/dockerfile/parser/line_parsers.go
index adf15ed5a5..ddd92dd416 100644
--- a/builder/dockerfile/parser/line_parsers.go
+++ b/builder/dockerfile/parser/line_parsers.go
@@ -329,3 +329,32 @@ func parseMaybeJSONToList(rest string) (*Node, map[string]bool, error) {
return parseStringsWhitespaceDelimited(rest)
}
+
+// The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
+func parseHealthConfig(rest string) (*Node, map[string]bool, error) {
+ // Find end of first argument
+ var sep int
+ for ; sep < len(rest); sep++ {
+ if unicode.IsSpace(rune(rest[sep])) {
+ break
+ }
+ }
+ next := sep
+ for ; next < len(rest); next++ {
+ if !unicode.IsSpace(rune(rest[next])) {
+ break
+ }
+ }
+
+ if sep == 0 {
+ return nil, nil, nil
+ }
+
+ typ := rest[:sep]
+ cmd, attrs, err := parseMaybeJSON(rest[next:])
+ if err != nil {
+ return nil, nil, err
+ }
+
+ return &Node{Value: typ, Next: cmd, Attributes: attrs}, nil, err
+}
diff --git a/builder/dockerfile/parser/parser.go b/builder/dockerfile/parser/parser.go
index e42904fef8..683f30f68a 100644
--- a/builder/dockerfile/parser/parser.go
+++ b/builder/dockerfile/parser/parser.go
@@ -66,22 +66,23 @@ func init() {
// functions. Errors are propagated up by Parse() and the resulting AST can
// be incorporated directly into the existing AST as a next.
dispatch = map[string]func(string) (*Node, map[string]bool, error){
- command.User: parseString,
- command.Onbuild: parseSubCommand,
- command.Workdir: parseString,
- command.Env: parseEnv,
- command.Label: parseLabel,
- command.Maintainer: parseString,
- command.From: parseString,
- command.Add: parseMaybeJSONToList,
- command.Copy: parseMaybeJSONToList,
- command.Run: parseMaybeJSON,
- command.Cmd: parseMaybeJSON,
- command.Entrypoint: parseMaybeJSON,
- command.Expose: parseStringsWhitespaceDelimited,
- command.Volume: parseMaybeJSONToList,
- command.StopSignal: parseString,
- command.Arg: parseNameOrNameVal,
+ command.User: parseString,
+ command.Onbuild: parseSubCommand,
+ command.Workdir: parseString,
+ command.Env: parseEnv,
+ command.Label: parseLabel,
+ command.Maintainer: parseString,
+ command.From: parseString,
+ command.Add: parseMaybeJSONToList,
+ command.Copy: parseMaybeJSONToList,
+ command.Run: parseMaybeJSON,
+ command.Cmd: parseMaybeJSON,
+ command.Entrypoint: parseMaybeJSON,
+ command.Expose: parseStringsWhitespaceDelimited,
+ command.Volume: parseMaybeJSONToList,
+ command.StopSignal: parseString,
+ command.Arg: parseNameOrNameVal,
+ command.Healthcheck: parseHealthConfig,
}
}
diff --git a/builder/dockerfile/parser/testfiles/health/Dockerfile b/builder/dockerfile/parser/testfiles/health/Dockerfile
new file mode 100644
index 0000000000..6534ce17d4
--- /dev/null
+++ b/builder/dockerfile/parser/testfiles/health/Dockerfile
@@ -0,0 +1,10 @@
+FROM debian
+ADD check.sh main.sh /app/
+CMD /app/main.sh
+HEALTHCHECK
+HEALTHCHECK --interval=5s --timeout=3s --retries=1 \
+ CMD /app/check.sh --quiet
+HEALTHCHECK CMD
+HEALTHCHECK CMD a b
+HEALTHCHECK --timeout=3s CMD ["foo"]
+HEALTHCHECK CONNECT TCP 7000
diff --git a/builder/dockerfile/parser/testfiles/health/result b/builder/dockerfile/parser/testfiles/health/result
new file mode 100644
index 0000000000..bfa846c917
--- /dev/null
+++ b/builder/dockerfile/parser/testfiles/health/result
@@ -0,0 +1,9 @@
+(from "debian")
+(add "check.sh" "main.sh" "/app/")
+(cmd "/app/main.sh")
+(healthcheck)
+(healthcheck ["--interval=5s" "--timeout=3s" "--retries=1"] "CMD" "/app/check.sh --quiet")
+(healthcheck "CMD")
+(healthcheck "CMD" "a b")
+(healthcheck ["--timeout=3s"] "CMD" "foo")
+(healthcheck "CONNECT" "TCP 7000")
diff --git a/container/health.go b/container/health.go
new file mode 100644
index 0000000000..36f01debc6
--- /dev/null
+++ b/container/health.go
@@ -0,0 +1,49 @@
+package container
+
+import (
+ "github.com/Sirupsen/logrus"
+ "github.com/docker/engine-api/types"
+)
+
+// Health holds the current container health-check state
+type Health struct {
+ types.Health
+ stop chan struct{} // Write struct{} to stop the monitor
+}
+
+// String returns a human-readable description of the health-check state
+func (s *Health) String() string {
+ if s.stop == nil {
+ return "no healthcheck"
+ }
+ switch s.Status {
+ case types.Starting:
+ return "health: starting"
+ default: // Healthy and Unhealthy are clear on their own
+ return s.Status
+ }
+}
+
+// OpenMonitorChannel creates and returns a new monitor channel. If there already is one,
+// it returns nil.
+func (s *Health) OpenMonitorChannel() chan struct{} {
+ if s.stop == nil {
+ logrus.Debugf("OpenMonitorChannel")
+ s.stop = make(chan struct{})
+ return s.stop
+ }
+ return nil
+}
+
+// CloseMonitorChannel closes any existing monitor channel.
+func (s *Health) CloseMonitorChannel() {
+ if s.stop != nil {
+ logrus.Debugf("CloseMonitorChannel: waiting for probe to stop")
+ // This channel does not buffer. Once the write succeeds, the monitor
+ // has read the stop request and will not make any further updates
+ // to c.State.Health.
+ s.stop <- struct{}{}
+ s.stop = nil
+ logrus.Debugf("CloseMonitorChannel done")
+ }
+}
diff --git a/container/state.go b/container/state.go
index e0ede8a33a..852ca1d0e5 100644
--- a/container/state.go
+++ b/container/state.go
@@ -27,6 +27,7 @@ type State struct {
StartedAt time.Time
FinishedAt time.Time
waitChan chan struct{}
+ Health *Health
}
// NewState creates a default state object with a fresh channel for state changes.
@@ -46,6 +47,9 @@ func (s *State) String() string {
return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
}
+ if h := s.Health; h != nil {
+ return fmt.Sprintf("Up %s (%s)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)), h.String())
+ }
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
}
diff --git a/daemon/commit.go b/daemon/commit.go
index bb82c85e54..24c7a46701 100644
--- a/daemon/commit.go
+++ b/daemon/commit.go
@@ -80,6 +80,25 @@ func merge(userConf, imageConf *containertypes.Config) error {
userConf.Entrypoint = imageConf.Entrypoint
}
}
+ if imageConf.Healthcheck != nil {
+ if userConf.Healthcheck == nil {
+ userConf.Healthcheck = imageConf.Healthcheck
+ } else {
+ if len(userConf.Healthcheck.Test) == 0 {
+ userConf.Healthcheck.Test = imageConf.Healthcheck.Test
+ }
+ if userConf.Healthcheck.Interval == 0 {
+ userConf.Healthcheck.Interval = imageConf.Healthcheck.Interval
+ }
+ if userConf.Healthcheck.Timeout == 0 {
+ userConf.Healthcheck.Timeout = imageConf.Healthcheck.Timeout
+ }
+ if userConf.Healthcheck.Retries == 0 {
+ userConf.Healthcheck.Retries = imageConf.Healthcheck.Retries
+ }
+ }
+ }
+
if userConf.WorkingDir == "" {
userConf.WorkingDir = imageConf.WorkingDir
}
diff --git a/daemon/exec.go b/daemon/exec.go
index e58205361e..fd09fd784d 100644
--- a/daemon/exec.go
+++ b/daemon/exec.go
@@ -14,11 +14,15 @@ import (
"github.com/docker/docker/errors"
"github.com/docker/docker/libcontainerd"
"github.com/docker/docker/pkg/pools"
+ "github.com/docker/docker/pkg/signal"
"github.com/docker/docker/pkg/term"
"github.com/docker/engine-api/types"
"github.com/docker/engine-api/types/strslice"
)
+// Seconds to wait after sending TERM before trying KILL
+const termProcessTimeout = 10
+
func (d *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
container.ExecCommands.Add(config.ID, config)
@@ -130,7 +134,8 @@ func (d *Daemon) ContainerExecCreate(name string, config *types.ExecConfig) (str
// ContainerExecStart starts a previously set up exec instance. The
// std streams are set up.
-func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) {
+// If ctx is cancelled, the process is terminated.
+func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) {
var (
cStdin io.ReadCloser
cStdout, cStderr io.Writer
@@ -197,15 +202,28 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
return nil
}
- attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
+ attachErr := container.AttachStreams(ctx, ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
if err := d.containerd.AddProcess(c.ID, name, p); err != nil {
return err
}
- err = <-attachErr
- if err != nil {
- return fmt.Errorf("attach failed with error: %v", err)
+ select {
+ case <-ctx.Done():
+ logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
+ d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["TERM"]))
+ select {
+ case <-time.After(termProcessTimeout * time.Second):
+ logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout)
+ d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["KILL"]))
+ case <-attachErr:
+ // TERM signal worked
+ }
+ return fmt.Errorf("context cancelled")
+ case err := <-attachErr:
+ if err != nil {
+ return fmt.Errorf("attach failed with error: %v", err)
+ }
}
return nil
}
diff --git a/daemon/health.go b/daemon/health.go
new file mode 100644
index 0000000000..ec9843561a
--- /dev/null
+++ b/daemon/health.go
@@ -0,0 +1,314 @@
+package daemon
+
+import (
+ "bytes"
+ "fmt"
+ "runtime"
+ "strings"
+ "time"
+
+ "golang.org/x/net/context"
+
+ "github.com/Sirupsen/logrus"
+ "github.com/docker/docker/container"
+ "github.com/docker/docker/daemon/exec"
+ "github.com/docker/engine-api/types"
+ "github.com/docker/engine-api/types/strslice"
+)
+
+const (
+ // Longest healthcheck probe output message to store. Longer messages will be truncated.
+ maxOutputLen = 4096
+
+ // Default interval between probe runs (from the end of the first to the start of the second).
+ // Also the time before the first probe.
+ defaultProbeInterval = 30 * time.Second
+
+ // The maximum length of time a single probe run should take. If the probe takes longer
+ // than this, the check is considered to have failed.
+ defaultProbeTimeout = 30 * time.Second
+
+ // Shut down a container if it becomes Unhealthy.
+ defaultExitOnUnhealthy = true
+
+ // Maximum number of entries to record
+ maxLogEntries = 5
+)
+
+const (
+ // Exit status codes that can be returned by the probe command.
+
+ exitStatusHealthy = 0 // Container is healthy
+ exitStatusUnhealthy = 1 // Container is unhealthy
+ exitStatusStarting = 2 // Container needs more time to start
+)
+
+// probe implementations know how to run a particular type of probe.
+type probe interface {
+ // Perform one run of the check. Returns the exit code and an optional
+ // short diagnostic string.
+ run(context.Context, *Daemon, *container.Container) (*types.HealthcheckResult, error)
+}
+
+// cmdProbe implements the "CMD" probe type.
+type cmdProbe struct {
+ // Run the command with the system's default shell instead of execing it directly.
+ shell bool
+}
+
+// exec the healthcheck command in the container.
+// Returns the exit code and probe output (if any)
+func (p *cmdProbe) run(ctx context.Context, d *Daemon, container *container.Container) (*types.HealthcheckResult, error) {
+ cmdSlice := strslice.StrSlice(container.Config.Healthcheck.Test)[1:]
+ if p.shell {
+ if runtime.GOOS != "windows" {
+ cmdSlice = append([]string{"/bin/sh", "-c"}, cmdSlice...)
+ } else {
+ cmdSlice = append([]string{"cmd", "/S", "/C"}, cmdSlice...)
+ }
+ }
+ entrypoint, args := d.getEntrypointAndArgs(strslice.StrSlice{}, cmdSlice)
+ execConfig := exec.NewConfig()
+ execConfig.OpenStdin = false
+ execConfig.OpenStdout = true
+ execConfig.OpenStderr = true
+ execConfig.ContainerID = container.ID
+ execConfig.DetachKeys = []byte{}
+ execConfig.Entrypoint = entrypoint
+ execConfig.Args = args
+ execConfig.Tty = false
+ execConfig.Privileged = false
+ execConfig.User = container.Config.User
+
+ d.registerExecCommand(container, execConfig)
+ d.LogContainerEvent(container, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " "))
+
+ output := &limitedBuffer{}
+ err := d.ContainerExecStart(ctx, execConfig.ID, nil, output, output)
+ if err != nil {
+ return nil, err
+ }
+ info, err := d.getExecConfig(execConfig.ID)
+ if err != nil {
+ return nil, err
+ }
+ if info.ExitCode == nil {
+ return nil, fmt.Errorf("Healthcheck has no exit code!")
+ }
+ // Note: Go's json package will handle invalid UTF-8 for us
+ out := output.String()
+ return &types.HealthcheckResult{
+ End: time.Now(),
+ ExitCode: *info.ExitCode,
+ Output: out,
+ }, nil
+}
+
+// Update the container's Status.Health struct based on the latest probe's result.
+func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult) {
+ c.Lock()
+ defer c.Unlock()
+
+ retries := c.Config.Healthcheck.Retries
+ if retries <= 0 {
+ retries = 1 // Default if unset or set to an invalid value
+ }
+
+ h := c.State.Health
+ oldStatus := h.Status
+
+ if len(h.Log) >= maxLogEntries {
+ h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result)
+ } else {
+ h.Log = append(h.Log, result)
+ }
+
+ if result.ExitCode == exitStatusHealthy {
+ h.FailingStreak = 0
+ h.Status = types.Healthy
+ } else if result.ExitCode == exitStatusStarting && c.State.Health.Status == types.Starting {
+ // The container is not ready yet. Remain in the starting state.
+ } else {
+ // Failure (incuding invalid exit code)
+ h.FailingStreak++
+ if c.State.Health.FailingStreak >= retries {
+ h.Status = types.Unhealthy
+ }
+ // Else we're starting or healthy. Stay in that state.
+ }
+
+ if oldStatus != h.Status {
+ d.LogContainerEvent(c, "health_status: "+h.Status)
+ }
+}
+
+// Run the container's monitoring thread until notified via "stop".
+// There is never more than one monitor thread running per container at a time.
+func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe) {
+ probeTimeout := timeoutWithDefault(c.Config.Healthcheck.Timeout, defaultProbeTimeout)
+ probeInterval := timeoutWithDefault(c.Config.Healthcheck.Interval, defaultProbeInterval)
+ for {
+ select {
+ case <-stop:
+ logrus.Debugf("Stop healthcheck monitoring (received while idle)")
+ return
+ case <-time.After(probeInterval):
+ logrus.Debugf("Running health check...")
+ startTime := time.Now()
+ ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout)
+ results := make(chan *types.HealthcheckResult)
+ go func() {
+ result, err := probe.run(ctx, d, c)
+ if err != nil {
+ logrus.Warnf("Health check error: %v", err)
+ results <- &types.HealthcheckResult{
+ ExitCode: -1,
+ Output: err.Error(),
+ Start: startTime,
+ End: time.Now(),
+ }
+ } else {
+ result.Start = startTime
+ logrus.Debugf("Health check done (exitCode=%d)", result.ExitCode)
+ results <- result
+ }
+ close(results)
+ }()
+ select {
+ case <-stop:
+ logrus.Debugf("Stop healthcheck monitoring (received while probing)")
+ // Stop timeout and kill probe, but don't wait for probe to exit.
+ cancelProbe()
+ return
+ case result := <-results:
+ handleProbeResult(d, c, result)
+ // Stop timeout
+ cancelProbe()
+ case <-ctx.Done():
+ logrus.Debugf("Health check taking too long")
+ handleProbeResult(d, c, &types.HealthcheckResult{
+ ExitCode: -1,
+ Output: fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout),
+ Start: startTime,
+ End: time.Now(),
+ })
+ cancelProbe()
+ // Wait for probe to exit (it might take a while to respond to the TERM
+ // signal and we don't want dying probes to pile up).
+ <-results
+ }
+ }
+ }
+}
+
+// Get a suitable probe implementation for the container's healthcheck configuration.
+func getProbe(c *container.Container) probe {
+ config := c.Config.Healthcheck
+ if config == nil || len(config.Test) == 0 {
+ return nil
+ }
+ switch config.Test[0] {
+ case "CMD":
+ return &cmdProbe{shell: false}
+ case "CMD-SHELL":
+ return &cmdProbe{shell: true}
+ default:
+ logrus.Warnf("Unknown healthcheck type '%s' (expected 'CMD')", config.Test[0])
+ return nil
+ }
+}
+
+// Ensure the health-check monitor is running or not, depending on the current
+// state of the container.
+// Called from monitor.go, with c locked.
+func (d *Daemon) updateHealthMonitor(c *container.Container) {
+ h := c.State.Health
+ if h == nil {
+ return // No healthcheck configured
+ }
+
+ probe := getProbe(c)
+ wantRunning := c.Running && !c.Paused && probe != nil
+ if wantRunning {
+ if stop := h.OpenMonitorChannel(); stop != nil {
+ go monitor(d, c, stop, probe)
+ }
+ } else {
+ h.CloseMonitorChannel()
+ }
+}
+
+// Reset the health state for a newly-started, restarted or restored container.
+// initHealthMonitor is called from monitor.go and we should never be running
+// two instances at once.
+// Called with c locked.
+func (d *Daemon) initHealthMonitor(c *container.Container) {
+ if c.Config.Healthcheck == nil {
+ return
+ }
+
+ // This is needed in case we're auto-restarting
+ d.stopHealthchecks(c)
+
+ if c.State.Health == nil {
+ h := &container.Health{}
+ h.Status = types.Starting
+ h.FailingStreak = 0
+ c.State.Health = h
+ }
+
+ d.updateHealthMonitor(c)
+}
+
+// Called when the container is being stopped (whether because the health check is
+// failing or for any other reason).
+func (d *Daemon) stopHealthchecks(c *container.Container) {
+ h := c.State.Health
+ if h != nil {
+ h.CloseMonitorChannel()
+ }
+}
+
+// Buffer up to maxOutputLen bytes. Further data is discarded.
+type limitedBuffer struct {
+ buf bytes.Buffer
+ truncated bool // indicates that data has been lost
+}
+
+// Append to limitedBuffer while there is room.
+func (b *limitedBuffer) Write(data []byte) (int, error) {
+ bufLen := b.buf.Len()
+ dataLen := len(data)
+ keep := min(maxOutputLen-bufLen, dataLen)
+ if keep > 0 {
+ b.buf.Write(data[:keep])
+ }
+ if keep < dataLen {
+ b.truncated = true
+ }
+ return dataLen, nil
+}
+
+// The contents of the buffer, with "..." appended if it overflowed.
+func (b *limitedBuffer) String() string {
+ out := b.buf.String()
+ if b.truncated {
+ out = out + "..."
+ }
+ return out
+}
+
+// If configuredValue is zero, use defaultValue instead.
+func timeoutWithDefault(configuredValue time.Duration, defaultValue time.Duration) time.Duration {
+ if configuredValue == 0 {
+ return defaultValue
+ }
+ return configuredValue
+}
+
+func min(x, y int) int {
+ if x < y {
+ return x
+ }
+ return y
+}
diff --git a/daemon/health_test.go b/daemon/health_test.go
new file mode 100644
index 0000000000..f53c32f4f2
--- /dev/null
+++ b/daemon/health_test.go
@@ -0,0 +1,112 @@
+package daemon
+
+import (
+ "testing"
+ "time"
+
+ "github.com/docker/docker/container"
+ "github.com/docker/docker/daemon/events"
+ "github.com/docker/engine-api/types"
+ containertypes "github.com/docker/engine-api/types/container"
+ eventtypes "github.com/docker/engine-api/types/events"
+)
+
+func reset(c *container.Container) {
+ c.State = &container.State{}
+ c.State.Health = &container.Health{}
+ c.State.Health.Status = types.Starting
+}
+
+func TestHealthStates(t *testing.T) {
+ e := events.New()
+ _, l, _ := e.Subscribe()
+ defer e.Evict(l)
+
+ expect := func(expected string) {
+ select {
+ case event := <-l:
+ ev := event.(eventtypes.Message)
+ if ev.Status != expected {
+ t.Errorf("Expecting event %#v, but got %#v\n", expected, ev.Status)
+ }
+ case <-time.After(1 * time.Second):
+ t.Errorf("Expecting event %#v, but got nothing\n", expected)
+ }
+ }
+
+ c := &container.Container{
+ CommonContainer: container.CommonContainer{
+ ID: "container_id",
+ Name: "container_name",
+ Config: &containertypes.Config{
+ Image: "image_name",
+ },
+ },
+ }
+ daemon := &Daemon{
+ EventsService: e,
+ }
+
+ c.Config.Healthcheck = &containertypes.HealthConfig{
+ Retries: 1,
+ }
+
+ reset(c)
+
+ handleResult := func(startTime time.Time, exitCode int) {
+ handleProbeResult(daemon, c, &types.HealthcheckResult{
+ Start: startTime,
+ End: startTime,
+ ExitCode: exitCode,
+ })
+ }
+
+ // starting -> failed -> success -> failed
+
+ handleResult(c.State.StartedAt.Add(1*time.Second), 1)
+ expect("health_status: unhealthy")
+
+ handleResult(c.State.StartedAt.Add(2*time.Second), 0)
+ expect("health_status: healthy")
+
+ handleResult(c.State.StartedAt.Add(3*time.Second), 1)
+ expect("health_status: unhealthy")
+
+ // starting -> starting -> starting ->
+ // healthy -> starting (invalid transition)
+
+ reset(c)
+
+ handleResult(c.State.StartedAt.Add(20*time.Second), 2)
+ handleResult(c.State.StartedAt.Add(40*time.Second), 2)
+ if c.State.Health.Status != types.Starting {
+ t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
+ }
+
+ handleResult(c.State.StartedAt.Add(50*time.Second), 0)
+ expect("health_status: healthy")
+ handleResult(c.State.StartedAt.Add(60*time.Second), 2)
+ expect("health_status: unhealthy")
+
+ // Test retries
+
+ reset(c)
+ c.Config.Healthcheck.Retries = 3
+
+ handleResult(c.State.StartedAt.Add(20*time.Second), 1)
+ handleResult(c.State.StartedAt.Add(40*time.Second), 1)
+ if c.State.Health.Status != types.Starting {
+ t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
+ }
+ if c.State.Health.FailingStreak != 2 {
+ t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
+ }
+ handleResult(c.State.StartedAt.Add(60*time.Second), 1)
+ expect("health_status: unhealthy")
+
+ handleResult(c.State.StartedAt.Add(80*time.Second), 0)
+ expect("health_status: healthy")
+ if c.State.Health.FailingStreak != 0 {
+ t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
+ }
+}
diff --git a/daemon/inspect.go b/daemon/inspect.go
index db475537f1..e10402203f 100644
--- a/daemon/inspect.go
+++ b/daemon/inspect.go
@@ -108,6 +108,15 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool)
hostConfig.Links = append(hostConfig.Links, fmt.Sprintf("%s:%s", child.Name, linkAlias))
}
+ var containerHealth *types.Health
+ if container.State.Health != nil {
+ containerHealth = &types.Health{
+ Status: container.State.Health.Status,
+ FailingStreak: container.State.Health.FailingStreak,
+ Log: append([]*types.HealthcheckResult{}, container.State.Health.Log...),
+ }
+ }
+
containerState := &types.ContainerState{
Status: container.State.StateString(),
Running: container.State.Running,
@@ -120,6 +129,7 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool)
Error: container.State.Error,
StartedAt: container.State.StartedAt.Format(time.RFC3339Nano),
FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano),
+ Health: containerHealth,
}
contJSONBase := &types.ContainerJSONBase{
diff --git a/daemon/monitor.go b/daemon/monitor.go
index cb334cf29d..30d36836f8 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -25,6 +25,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
if runtime.GOOS == "windows" {
return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
}
+ daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "oom")
case libcontainerd.StateExit:
c.Lock()
@@ -35,6 +36,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
attributes := map[string]string{
"exitCode": strconv.Itoa(int(e.ExitCode)),
}
+ daemon.updateHealthMonitor(c)
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.Cleanup(c)
// FIXME: here is race condition between two RUN instructions in Dockerfile
@@ -54,6 +56,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
"exitCode": strconv.Itoa(int(e.ExitCode)),
}
daemon.LogContainerEventWithAttributes(c, "die", attributes)
+ daemon.updateHealthMonitor(c)
return c.ToDisk()
case libcontainerd.StateExitProcess:
c.Lock()
@@ -74,18 +77,24 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
}
case libcontainerd.StateStart, libcontainerd.StateRestore:
+ // Container is already locked in this case
c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
c.HasBeenManuallyStopped = false
if err := c.ToDisk(); err != nil {
c.Reset(false)
return err
}
+ daemon.initHealthMonitor(c)
daemon.LogContainerEvent(c, "start")
case libcontainerd.StatePause:
+ // Container is already locked in this case
c.Paused = true
+ daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "pause")
case libcontainerd.StateResume:
+ // Container is already locked in this case
c.Paused = false
+ daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "unpause")
}
diff --git a/daemon/stop.go b/daemon/stop.go
index 701743008a..4bbdbbd74c 100644
--- a/daemon/stop.go
+++ b/daemon/stop.go
@@ -41,6 +41,8 @@ func (daemon *Daemon) containerStop(container *container.Container, seconds int)
return nil
}
+ daemon.stopHealthchecks(container)
+
stopSignal := container.StopSignal()
// 1. Send a stop signal
if err := daemon.killPossiblyDeadProcess(container, stopSignal); err != nil {
diff --git a/docs/reference/builder.md b/docs/reference/builder.md
index 863dcd36c7..f460338b22 100644
--- a/docs/reference/builder.md
+++ b/docs/reference/builder.md
@@ -1470,6 +1470,73 @@ The `STOPSIGNAL` instruction sets the system call signal that will be sent to th
This signal can be a valid unsigned number that matches a position in the kernel's syscall table, for instance 9,
or a signal name in the format SIGNAME, for instance SIGKILL.
+## HEALTHCHECK
+
+The `HEALTHCHECK` instruction has two forms:
+
+* `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container)
+* `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image)
+
+The `HEALTHCHECK` instruction tells Docker how to test a container to check that
+it is still working. This can detect cases such as a web server that is stuck in
+an infinite loop and unable to handle new connections, even though the server
+process is still running.
+
+When a container has a healthcheck specified, it has a _health status_ in
+addition to its normal status. This status is initially `starting`. Whenever a
+health check passes, it becomes `healthy` (whatever state it was previously in).
+After a certain number of consecutive failures, it becomes `unhealthy`.
+
+The options that can appear before `CMD` are:
+
+* `--interval=DURATION` (default: `30s`)
+* `--timeout=DURATION` (default: `30s`)
+* `--retries=N` (default: `1`)
+
+The health check will first run **interval** seconds after the container is
+started, and then again **interval** seconds after each previous check completes.
+
+If a single run of the check takes longer than **timeout** seconds then the check
+is considered to have failed.
+
+It takes **retries** consecutive failures of the health check for the container
+to be considered `unhealthy`.
+
+There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list
+more than one then only the last `HEALTHCHECK` will take effect.
+
+The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK
+CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands;
+see e.g. `ENTRYPOINT` for details).
+
+The command's exit status indicates the health status of the container.
+The possible values are:
+
+- 0: success - the container is healthy and ready for use
+- 1: unhealthy - the container is not working correctly
+- 2: starting - the container is not ready for use yet, but is working correctly
+
+If the probe returns 2 ("starting") when the container has already moved out of the
+"starting" state then it is treated as "unhealthy" instead.
+
+For example, to check every five minutes or so that a web-server is able to
+serve the site's main page within three seconds:
+
+ HEALTHCHECK --interval=5m --timeout=3s \
+ CMD curl -f http://localhost/ || exit 1
+
+To help debug failing probes, any output text (UTF-8 encoded) that the command writes
+on stdout or stderr will be stored in the health status and can be queried with
+`docker inspect`. Such output should be kept short (only the first 4096 bytes
+are stored currently).
+
+When the health status of a container changes, a `health_status` event is
+generated with the new status.
+
+The `HEALTHCHECK` feature was added in Docker 1.12.
+
+
+
## Dockerfile examples
Below you can see some examples of Dockerfile syntax. If you're interested in
diff --git a/docs/reference/run.md b/docs/reference/run.md
index 567d42b207..a7c5267f45 100644
--- a/docs/reference/run.md
+++ b/docs/reference/run.md
@@ -1250,6 +1250,7 @@ Dockerfile instruction and how the operator can override that setting.
#entrypoint-default-command-to-execute-at-runtime)
- [EXPOSE (Incoming Ports)](#expose-incoming-ports)
- [ENV (Environment Variables)](#env-environment-variables)
+ - [HEALTHCHECK](#healthcheck)
- [VOLUME (Shared Filesystems)](#volume-shared-filesystems)
- [USER](#user)
- [WORKDIR](#workdir)
@@ -1398,6 +1399,65 @@ above, or already defined by the developer with a Dockerfile `ENV`:
Similarly the operator can set the **hostname** with `-h`.
+### HEALTHCHECK
+
+```
+ --health-cmd Command to run to check health
+ --health-interval Time between running the check
+ --health-retries Consecutive failures needed to report unhealthy
+ --health-timeout Maximum time to allow one check to run
+ --no-healthcheck Disable any container-specified HEALTHCHECK
+```
+
+Example:
+
+ $ docker run --name=test -d \
+ --health-cmd='stat /etc/passwd || exit 1' \
+ --health-interval=2s \
+ busybox sleep 1d
+ $ sleep 2; docker inspect --format='{{.State.Health.Status}}' test
+ healthy
+ $ docker exec test rm /etc/passwd
+ $ sleep 2; docker inspect --format='{{json .State.Health}}' test
+ {
+ "Status": "unhealthy",
+ "FailingStreak": 3,
+ "Log": [
+ {
+ "Start": "2016-05-25T17:22:04.635478668Z",
+ "End": "2016-05-25T17:22:04.7272552Z",
+ "ExitCode": 0,
+ "Output": " File: /etc/passwd\n Size: 334 \tBlocks: 8 IO Block: 4096 regular file\nDevice: 32h/50d\tInode: 12 Links: 1\nAccess: (0664/-rw-rw-r--) Uid: ( 0/ root) Gid: ( 0/ root)\nAccess: 2015-12-05 22:05:32.000000000\nModify: 2015..."
+ },
+ {
+ "Start": "2016-05-25T17:22:06.732900633Z",
+ "End": "2016-05-25T17:22:06.822168935Z",
+ "ExitCode": 0,
+ "Output": " File: /etc/passwd\n Size: 334 \tBlocks: 8 IO Block: 4096 regular file\nDevice: 32h/50d\tInode: 12 Links: 1\nAccess: (0664/-rw-rw-r--) Uid: ( 0/ root) Gid: ( 0/ root)\nAccess: 2015-12-05 22:05:32.000000000\nModify: 2015..."
+ },
+ {
+ "Start": "2016-05-25T17:22:08.823956535Z",
+ "End": "2016-05-25T17:22:08.897359124Z",
+ "ExitCode": 1,
+ "Output": "stat: can't stat '/etc/passwd': No such file or directory\n"
+ },
+ {
+ "Start": "2016-05-25T17:22:10.898802931Z",
+ "End": "2016-05-25T17:22:10.969631866Z",
+ "ExitCode": 1,
+ "Output": "stat: can't stat '/etc/passwd': No such file or directory\n"
+ },
+ {
+ "Start": "2016-05-25T17:22:12.971033523Z",
+ "End": "2016-05-25T17:22:13.082015516Z",
+ "ExitCode": 1,
+ "Output": "stat: can't stat '/etc/passwd': No such file or directory\n"
+ }
+ ]
+ }
+
+The health status is also displayed in the `docker ps` output.
+
### TMPFS (mount tmpfs filesystems)
```bash
diff --git a/integration-cli/docker_cli_health_test.go b/integration-cli/docker_cli_health_test.go
new file mode 100644
index 0000000000..b374dba357
--- /dev/null
+++ b/integration-cli/docker_cli_health_test.go
@@ -0,0 +1,154 @@
+package main
+
+import (
+ "encoding/json"
+ "github.com/docker/docker/pkg/integration/checker"
+ "github.com/docker/engine-api/types"
+ "github.com/go-check/check"
+ "strconv"
+ "strings"
+ "time"
+)
+
+func waitForStatus(c *check.C, name string, prev string, expected string) {
+ prev = prev + "\n"
+ expected = expected + "\n"
+ for {
+ out, _ := dockerCmd(c, "inspect", "--format={{.State.Status}}", name)
+ if out == expected {
+ return
+ }
+ c.Check(out, checker.Equals, prev)
+ if out != prev {
+ return
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+}
+
+func waitForHealthStatus(c *check.C, name string, prev string, expected string) {
+ prev = prev + "\n"
+ expected = expected + "\n"
+ for {
+ out, _ := dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name)
+ if out == expected {
+ return
+ }
+ c.Check(out, checker.Equals, prev)
+ if out != prev {
+ return
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+}
+
+func getHealth(c *check.C, name string) *types.Health {
+ out, _ := dockerCmd(c, "inspect", "--format={{json .State.Health}}", name)
+ var health types.Health
+ err := json.Unmarshal([]byte(out), &health)
+ c.Check(err, checker.Equals, nil)
+ return &health
+}
+
+func (s *DockerSuite) TestHealth(c *check.C) {
+ testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
+
+ imageName := "testhealth"
+ _, err := buildImage(imageName,
+ `FROM busybox
+ RUN echo OK > /status
+ CMD ["/bin/sleep", "120"]
+ STOPSIGNAL SIGKILL
+ HEALTHCHECK --interval=1s --timeout=30s \
+ CMD cat /status`,
+ true)
+
+ c.Check(err, check.IsNil)
+
+ // No health status before starting
+ name := "test_health"
+ dockerCmd(c, "create", "--name", name, imageName)
+ out, _ := dockerCmd(c, "ps", "-a", "--format={{.Status}}")
+ c.Check(out, checker.Equals, "Created\n")
+
+ // Inspect the options
+ out, _ = dockerCmd(c, "inspect",
+ "--format='timeout={{.Config.Healthcheck.Timeout}} "+
+ "interval={{.Config.Healthcheck.Interval}} "+
+ "retries={{.Config.Healthcheck.Retries}} "+
+ "test={{.Config.Healthcheck.Test}}'", name)
+ c.Check(out, checker.Equals, "timeout=30s interval=1s retries=0 test=[CMD-SHELL cat /status]\n")
+
+ // Start
+ dockerCmd(c, "start", name)
+ waitForHealthStatus(c, name, "starting", "healthy")
+
+ // Make it fail
+ dockerCmd(c, "exec", name, "rm", "/status")
+ waitForHealthStatus(c, name, "healthy", "unhealthy")
+
+ // Inspect the status
+ out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name)
+ c.Check(out, checker.Equals, "unhealthy\n")
+
+ // Make it healthy again
+ dockerCmd(c, "exec", name, "touch", "/status")
+ waitForHealthStatus(c, name, "unhealthy", "healthy")
+
+ // Remove container
+ dockerCmd(c, "rm", "-f", name)
+
+ // Disable the check from the CLI
+ out, _ = dockerCmd(c, "create", "--name=noh", "--no-healthcheck", imageName)
+ out, _ = dockerCmd(c, "inspect", "--format={{.Config.Healthcheck.Test}}", "noh")
+ c.Check(out, checker.Equals, "[NONE]\n")
+ dockerCmd(c, "rm", "noh")
+
+ // Disable the check with a new build
+ _, err = buildImage("no_healthcheck",
+ `FROM testhealth
+ HEALTHCHECK NONE`, true)
+ c.Check(err, check.IsNil)
+
+ out, _ = dockerCmd(c, "inspect", "--format={{.ContainerConfig.Healthcheck.Test}}", "no_healthcheck")
+ c.Check(out, checker.Equals, "[NONE]\n")
+
+ // Enable the checks from the CLI
+ _, _ = dockerCmd(c, "run", "-d", "--name=fatal_healthcheck",
+ "--health-interval=0.5s",
+ "--health-retries=3",
+ "--health-cmd=cat /status",
+ "no_healthcheck")
+ waitForHealthStatus(c, "fatal_healthcheck", "starting", "healthy")
+ health := getHealth(c, "fatal_healthcheck")
+ c.Check(health.Status, checker.Equals, "healthy")
+ c.Check(health.FailingStreak, checker.Equals, 0)
+ last := health.Log[len(health.Log)-1]
+ c.Check(last.ExitCode, checker.Equals, 0)
+ c.Check(last.Output, checker.Equals, "OK\n")
+
+ // Fail the check, which should now make it exit
+ dockerCmd(c, "exec", "fatal_healthcheck", "rm", "/status")
+ waitForStatus(c, "fatal_healthcheck", "running", "exited")
+
+ out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", "fatal_healthcheck")
+ c.Check(out, checker.Equals, "unhealthy\n")
+ failsStr, _ := dockerCmd(c, "inspect", "--format={{.State.Health.FailingStreak}}", "fatal_healthcheck")
+ fails, err := strconv.Atoi(strings.TrimSpace(failsStr))
+ c.Check(err, check.IsNil)
+ c.Check(fails >= 3, checker.Equals, true)
+ dockerCmd(c, "rm", "-f", "fatal_healthcheck")
+
+ // Check timeout
+ // Note: if the interval is too small, it seems that Docker spends all its time running health
+ // checks and never gets around to killing it.
+ _, _ = dockerCmd(c, "run", "-d", "--name=test",
+ "--health-interval=1s", "--health-cmd=sleep 5m", "--health-timeout=1ms", imageName)
+ waitForHealthStatus(c, "test", "starting", "unhealthy")
+ health = getHealth(c, "test")
+ last = health.Log[len(health.Log)-1]
+ c.Check(health.Status, checker.Equals, "unhealthy")
+ c.Check(last.ExitCode, checker.Equals, -1)
+ c.Check(last.Output, checker.Equals, "Health check exceeded timeout (1ms)")
+ dockerCmd(c, "rm", "-f", "test")
+}
diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go
index 165597b9a6..10c377154e 100644
--- a/libcontainerd/client_linux.go
+++ b/libcontainerd/client_linux.go
@@ -190,6 +190,17 @@ func (clnt *client) Signal(containerID string, sig int) error {
return err
}
+func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
+ clnt.lock(containerID)
+ defer clnt.unlock(containerID)
+ _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
+ Id: containerID,
+ Pid: pid,
+ Signal: uint32(sig),
+ })
+ return err
+}
+
func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)
diff --git a/libcontainerd/client_windows.go b/libcontainerd/client_windows.go
index 1f4046507a..78b3b992f1 100644
--- a/libcontainerd/client_windows.go
+++ b/libcontainerd/client_windows.go
@@ -304,6 +304,25 @@ func (clnt *client) Signal(containerID string, sig int) error {
return nil
}
+// While Linux has support for the full range of signals, signals aren't really implemented on Windows.
+// We try to terminate the specified process whatever signal is requested.
+func (clnt *client) SignalProcess(containerID string, processFriendlyName string, sig int) error {
+ clnt.lock(containerID)
+ defer clnt.unlock(containerID)
+ cont, err := clnt.getContainer(containerID)
+ if err != nil {
+ return err
+ }
+
+ for _, p := range cont.processes {
+ if p.friendlyName == processFriendlyName {
+ return hcsshim.TerminateProcessInComputeSystem(containerID, p.systemPid)
+ }
+ }
+
+ return fmt.Errorf("SignalProcess could not find process %s in %s", processFriendlyName, containerID)
+}
+
// Resize handles a CLI event to resize an interactive docker run or docker exec
// window.
func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
diff --git a/libcontainerd/types.go b/libcontainerd/types.go
index 15d0fc33f8..357ca1bd4d 100644
--- a/libcontainerd/types.go
+++ b/libcontainerd/types.go
@@ -34,6 +34,7 @@ type Backend interface {
type Client interface {
Create(containerID string, spec Spec, options ...CreateOption) error
Signal(containerID string, sig int) error
+ SignalProcess(containerID string, processFriendlyName string, sig int) error
AddProcess(containerID, processFriendlyName string, process Process) error
Resize(containerID, processFriendlyName string, width, height int) error
Pause(containerID string) error
diff --git a/runconfig/opts/parse.go b/runconfig/opts/parse.go
index 8f9371fd39..c2e009eaa6 100644
--- a/runconfig/opts/parse.go
+++ b/runconfig/opts/parse.go
@@ -100,6 +100,12 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
flStopSignal = cmd.String([]string{"-stop-signal"}, signal.DefaultStopSignal, fmt.Sprintf("Signal to stop a container, %v by default", signal.DefaultStopSignal))
flIsolation = cmd.String([]string{"-isolation"}, "", "Container isolation technology")
flShmSize = cmd.String([]string{"-shm-size"}, "", "Size of /dev/shm, default value is 64MB")
+ // Healthcheck
+ flNoHealthcheck = cmd.Bool([]string{"-no-healthcheck"}, false, "Disable any container-specified HEALTHCHECK")
+ flHealthCmd = cmd.String([]string{"-health-cmd"}, "", "Command to run to check health")
+ flHealthInterval = cmd.Duration([]string{"-health-interval"}, 0, "Time between running the check")
+ flHealthTimeout = cmd.Duration([]string{"-health-timeout"}, 0, "Maximum time to allow one check to run")
+ flHealthRetries = cmd.Int([]string{"-health-retries"}, 0, "Consecutive failures needed to report unhealthy")
)
cmd.Var(&flAttach, []string{"a", "-attach"}, "Attach to STDIN, STDOUT or STDERR")
@@ -351,6 +357,39 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
return nil, nil, nil, cmd, err
}
+ // Healthcheck
+ var healthConfig *container.HealthConfig
+ haveHealthSettings := *flHealthCmd != "" ||
+ *flHealthInterval != 0 ||
+ *flHealthTimeout != 0 ||
+ *flHealthRetries != 0
+ if *flNoHealthcheck {
+ if haveHealthSettings {
+ return nil, nil, nil, cmd, fmt.Errorf("--no-healthcheck conflicts with --health-* options")
+ }
+ test := strslice.StrSlice{"NONE"}
+ healthConfig = &container.HealthConfig{Test: test}
+ } else if haveHealthSettings {
+ var probe strslice.StrSlice
+ if *flHealthCmd != "" {
+ args := []string{"CMD-SHELL", *flHealthCmd}
+ probe = strslice.StrSlice(args)
+ }
+ if *flHealthInterval < 0 {
+ return nil, nil, nil, cmd, fmt.Errorf("--health-interval cannot be negative")
+ }
+ if *flHealthTimeout < 0 {
+ return nil, nil, nil, cmd, fmt.Errorf("--health-timeout cannot be negative")
+ }
+
+ healthConfig = &container.HealthConfig{
+ Test: probe,
+ Interval: *flHealthInterval,
+ Timeout: *flHealthTimeout,
+ Retries: *flHealthRetries,
+ }
+ }
+
resources := container.Resources{
CgroupParent: *flCgroupParent,
Memory: flMemory,
@@ -399,6 +438,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
Entrypoint: entrypoint,
WorkingDir: *flWorkingDir,
Labels: ConvertKVStringsToMap(labels),
+ Healthcheck: healthConfig,
}
if cmd.IsSet("-stop-signal") {
config.StopSignal = *flStopSignal
diff --git a/runconfig/opts/parse_test.go b/runconfig/opts/parse_test.go
index 30f755c792..e3a00a93b4 100644
--- a/runconfig/opts/parse_test.go
+++ b/runconfig/opts/parse_test.go
@@ -9,6 +9,7 @@ import (
"runtime"
"strings"
"testing"
+ "time"
flag "github.com/docker/docker/pkg/mflag"
"github.com/docker/docker/runconfig"
@@ -584,6 +585,45 @@ func TestParseRestartPolicy(t *testing.T) {
}
}
+func TestParseHealth(t *testing.T) {
+ checkOk := func(args ...string) *container.HealthConfig {
+ config, _, _, _, err := parseRun(args)
+ if err != nil {
+ t.Fatalf("%#v: %v", args, err)
+ }
+ return config.Healthcheck
+ }
+ checkError := func(expected string, args ...string) {
+ config, _, _, _, err := parseRun(args)
+ if err == nil {
+ t.Fatalf("Expected error, but got %#v", config)
+ }
+ if err.Error() != expected {
+ t.Fatalf("Expected %#v, got %#v", expected, err)
+ }
+ }
+ health := checkOk("--no-healthcheck", "img", "cmd")
+ if health == nil || len(health.Test) != 1 || health.Test[0] != "NONE" {
+ t.Fatalf("--no-healthcheck failed: %#v", health)
+ }
+
+ health = checkOk("--health-cmd=/check.sh -q", "img", "cmd")
+ if len(health.Test) != 2 || health.Test[0] != "CMD-SHELL" || health.Test[1] != "/check.sh -q" {
+ t.Fatalf("--health-cmd: got %#v", health.Test)
+ }
+ if health.Timeout != 0 {
+ t.Fatalf("--health-cmd: timeout = %f", health.Timeout)
+ }
+
+ checkError("--no-healthcheck conflicts with --health-* options",
+ "--no-healthcheck", "--health-cmd=/check.sh -q", "img", "cmd")
+
+ health = checkOk("--health-timeout=2s", "--health-retries=3", "--health-interval=4.5s", "img", "cmd")
+ if health.Timeout != 2*time.Second || health.Retries != 3 || health.Interval != 4500*time.Millisecond {
+ t.Fatalf("--health-*: got %#v", health)
+ }
+}
+
func TestParseLoggingOpts(t *testing.T) {
// logging opts ko
if _, _, _, _, err := parseRun([]string{"--log-driver=none", "--log-opt=anything", "img", "cmd"}); err == nil || err.Error() != "invalid logging opts for driver none" {