summaryrefslogtreecommitdiff
path: root/runtime/execdriver
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/execdriver')
-rw-r--r--runtime/execdriver/MAINTAINERS2
-rw-r--r--runtime/execdriver/driver.go144
-rw-r--r--runtime/execdriver/execdrivers/execdrivers.go23
-rw-r--r--runtime/execdriver/lxc/driver.go418
-rw-r--r--runtime/execdriver/lxc/info.go50
-rw-r--r--runtime/execdriver/lxc/info_test.go36
-rw-r--r--runtime/execdriver/lxc/init.go175
-rw-r--r--runtime/execdriver/lxc/lxc_init_linux.go11
-rw-r--r--runtime/execdriver/lxc/lxc_init_unsupported.go7
-rw-r--r--runtime/execdriver/lxc/lxc_template.go176
-rw-r--r--runtime/execdriver/lxc/lxc_template_unit_test.go135
-rw-r--r--runtime/execdriver/native/configuration/parse.go186
-rw-r--r--runtime/execdriver/native/configuration/parse_test.go166
-rw-r--r--runtime/execdriver/native/create.go114
-rw-r--r--runtime/execdriver/native/driver.go292
-rw-r--r--runtime/execdriver/native/info.go21
-rw-r--r--runtime/execdriver/native/template/default_template.go45
-rw-r--r--runtime/execdriver/native/term.go42
-rw-r--r--runtime/execdriver/pipes.go23
-rw-r--r--runtime/execdriver/termconsole.go126
20 files changed, 2192 insertions, 0 deletions
diff --git a/runtime/execdriver/MAINTAINERS b/runtime/execdriver/MAINTAINERS
new file mode 100644
index 0000000000..1cb551364d
--- /dev/null
+++ b/runtime/execdriver/MAINTAINERS
@@ -0,0 +1,2 @@
+Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
+Guillaume J. Charmes <guillaume@docker.com> (@creack)
diff --git a/runtime/execdriver/driver.go b/runtime/execdriver/driver.go
new file mode 100644
index 0000000000..27a575cb3a
--- /dev/null
+++ b/runtime/execdriver/driver.go
@@ -0,0 +1,144 @@
+package execdriver
+
+import (
+ "errors"
+ "io"
+ "os"
+ "os/exec"
+)
+
+// Context is a generic key value pair that allows
+// arbatrary data to be sent
+type Context map[string]string
+
+var (
+ ErrNotRunning = errors.New("Process could not be started")
+ ErrWaitTimeoutReached = errors.New("Wait timeout reached")
+ ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function")
+ ErrDriverNotFound = errors.New("The requested docker init has not been found")
+)
+
+var dockerInitFcts map[string]InitFunc
+
+type (
+ StartCallback func(*Command)
+ InitFunc func(i *InitArgs) error
+)
+
+func RegisterInitFunc(name string, fct InitFunc) error {
+ if dockerInitFcts == nil {
+ dockerInitFcts = make(map[string]InitFunc)
+ }
+ if _, ok := dockerInitFcts[name]; ok {
+ return ErrDriverAlreadyRegistered
+ }
+ dockerInitFcts[name] = fct
+ return nil
+}
+
+func GetInitFunc(name string) (InitFunc, error) {
+ fct, ok := dockerInitFcts[name]
+ if !ok {
+ return nil, ErrDriverNotFound
+ }
+ return fct, nil
+}
+
+// Args provided to the init function for a driver
+type InitArgs struct {
+ User string
+ Gateway string
+ Ip string
+ WorkDir string
+ Privileged bool
+ Env []string
+ Args []string
+ Mtu int
+ Driver string
+ Console string
+ Pipe int
+ Root string
+}
+
+// Driver specific information based on
+// processes registered with the driver
+type Info interface {
+ IsRunning() bool
+}
+
+// Terminal in an interface for drivers to implement
+// if they want to support Close and Resize calls from
+// the core
+type Terminal interface {
+ io.Closer
+ Resize(height, width int) error
+}
+
+type TtyTerminal interface {
+ Master() *os.File
+}
+
+type Driver interface {
+ Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
+ Kill(c *Command, sig int) error
+ Name() string // Driver name
+ Info(id string) Info // "temporary" hack (until we move state from core to plugins)
+ GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.
+ Terminate(c *Command) error // kill it with fire
+}
+
+// Network settings of the container
+type Network struct {
+ Interface *NetworkInterface `json:"interface"` // if interface is nil then networking is disabled
+ Mtu int `json:"mtu"`
+}
+
+type NetworkInterface struct {
+ Gateway string `json:"gateway"`
+ IPAddress string `json:"ip"`
+ Bridge string `json:"bridge"`
+ IPPrefixLen int `json:"ip_prefix_len"`
+}
+
+type Resources struct {
+ Memory int64 `json:"memory"`
+ MemorySwap int64 `json:"memory_swap"`
+ CpuShares int64 `json:"cpu_shares"`
+}
+
+type Mount struct {
+ Source string `json:"source"`
+ Destination string `json:"destination"`
+ Writable bool `json:"writable"`
+ Private bool `json:"private"`
+}
+
+// Process wrapps an os/exec.Cmd to add more metadata
+type Command struct {
+ exec.Cmd `json:"-"`
+
+ ID string `json:"id"`
+ Privileged bool `json:"privileged"`
+ User string `json:"user"`
+ Rootfs string `json:"rootfs"` // root fs of the container
+ InitPath string `json:"initpath"` // dockerinit
+ Entrypoint string `json:"entrypoint"`
+ Arguments []string `json:"arguments"`
+ WorkingDir string `json:"working_dir"`
+ ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
+ Tty bool `json:"tty"`
+ Network *Network `json:"network"`
+ Config map[string][]string `json:"config"` // generic values that specific drivers can consume
+ Resources *Resources `json:"resources"`
+ Mounts []Mount `json:"mounts"`
+
+ Terminal Terminal `json:"-"` // standard or tty terminal
+ Console string `json:"-"` // dev/console path
+ ContainerPid int `json:"container_pid"` // the pid for the process inside a container
+}
+
+// Return the pid of the process
+// If the process is nil -1 will be returned
+func (c *Command) Pid() int {
+ return c.ContainerPid
+}
diff --git a/runtime/execdriver/execdrivers/execdrivers.go b/runtime/execdriver/execdrivers/execdrivers.go
new file mode 100644
index 0000000000..9e277c86df
--- /dev/null
+++ b/runtime/execdriver/execdrivers/execdrivers.go
@@ -0,0 +1,23 @@
+package execdrivers
+
+import (
+ "fmt"
+ "github.com/dotcloud/docker/pkg/sysinfo"
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "github.com/dotcloud/docker/runtime/execdriver/lxc"
+ "github.com/dotcloud/docker/runtime/execdriver/native"
+ "path"
+)
+
+func NewDriver(name, root, initPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
+ switch name {
+ case "lxc":
+ // we want to five the lxc driver the full docker root because it needs
+ // to access and write config and template files in /var/lib/docker/containers/*
+ // to be backwards compatible
+ return lxc.NewDriver(root, sysInfo.AppArmor)
+ case "native":
+ return native.NewDriver(path.Join(root, "execdriver", "native"), initPath)
+ }
+ return nil, fmt.Errorf("unknown exec driver %s", name)
+}
diff --git a/runtime/execdriver/lxc/driver.go b/runtime/execdriver/lxc/driver.go
new file mode 100644
index 0000000000..ef16dcc380
--- /dev/null
+++ b/runtime/execdriver/lxc/driver.go
@@ -0,0 +1,418 @@
+package lxc
+
+import (
+ "fmt"
+ "github.com/dotcloud/docker/pkg/cgroups"
+ "github.com/dotcloud/docker/pkg/label"
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "github.com/dotcloud/docker/utils"
+ "io/ioutil"
+ "log"
+ "os"
+ "os/exec"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+ "time"
+)
+
+const DriverName = "lxc"
+
+func init() {
+ execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
+ if err := setupEnv(args); err != nil {
+ return err
+ }
+
+ if err := setupHostname(args); err != nil {
+ return err
+ }
+
+ if err := setupNetworking(args); err != nil {
+ return err
+ }
+
+ if err := setupCapabilities(args); err != nil {
+ return err
+ }
+
+ if err := setupWorkingDirectory(args); err != nil {
+ return err
+ }
+
+ if err := changeUser(args); err != nil {
+ return err
+ }
+
+ path, err := exec.LookPath(args.Args[0])
+ if err != nil {
+ log.Printf("Unable to locate %v", args.Args[0])
+ os.Exit(127)
+ }
+ if err := syscall.Exec(path, args.Args, os.Environ()); err != nil {
+ return fmt.Errorf("dockerinit unable to execute %s - %s", path, err)
+ }
+ panic("Unreachable")
+ })
+}
+
+type driver struct {
+ root string // root path for the driver to use
+ apparmor bool
+ sharedRoot bool
+}
+
+func NewDriver(root string, apparmor bool) (*driver, error) {
+ // setup unconfined symlink
+ if err := linkLxcStart(root); err != nil {
+ return nil, err
+ }
+ return &driver{
+ apparmor: apparmor,
+ root: root,
+ sharedRoot: rootIsShared(),
+ }, nil
+}
+
+func (d *driver) Name() string {
+ version := d.version()
+ return fmt.Sprintf("%s-%s", DriverName, version)
+}
+
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+ if err := execdriver.SetTerminal(c, pipes); err != nil {
+ return -1, err
+ }
+ configPath, err := d.generateLXCConfig(c)
+ if err != nil {
+ return -1, err
+ }
+ params := []string{
+ "lxc-start",
+ "-n", c.ID,
+ "-f", configPath,
+ "--",
+ c.InitPath,
+ "-driver",
+ DriverName,
+ }
+
+ if c.Network.Interface != nil {
+ params = append(params,
+ "-g", c.Network.Interface.Gateway,
+ "-i", fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
+ )
+ }
+ params = append(params,
+ "-mtu", strconv.Itoa(c.Network.Mtu),
+ )
+
+ if c.User != "" {
+ params = append(params, "-u", c.User)
+ }
+
+ if c.Privileged {
+ if d.apparmor {
+ params[0] = path.Join(d.root, "lxc-start-unconfined")
+
+ }
+ params = append(params, "-privileged")
+ }
+
+ if c.WorkingDir != "" {
+ params = append(params, "-w", c.WorkingDir)
+ }
+
+ params = append(params, "--", c.Entrypoint)
+ params = append(params, c.Arguments...)
+
+ if d.sharedRoot {
+ // lxc-start really needs / to be non-shared, or all kinds of stuff break
+ // when lxc-start unmount things and those unmounts propagate to the main
+ // mount namespace.
+ // What we really want is to clone into a new namespace and then
+ // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork
+ // without exec in go we have to do this horrible shell hack...
+ shellString :=
+ "mount --make-rslave /; exec " +
+ utils.ShellQuoteArguments(params)
+
+ params = []string{
+ "unshare", "-m", "--", "/bin/sh", "-c", shellString,
+ }
+ }
+
+ var (
+ name = params[0]
+ arg = params[1:]
+ )
+ aname, err := exec.LookPath(name)
+ if err != nil {
+ aname = name
+ }
+ c.Path = aname
+ c.Args = append([]string{name}, arg...)
+
+ if err := c.Start(); err != nil {
+ return -1, err
+ }
+
+ var (
+ waitErr error
+ waitLock = make(chan struct{})
+ )
+ go func() {
+ if err := c.Wait(); err != nil {
+ if _, ok := err.(*exec.ExitError); !ok { // Do not propagate the error if it's simply a status code != 0
+ waitErr = err
+ }
+ }
+ close(waitLock)
+ }()
+
+ // Poll lxc for RUNNING status
+ pid, err := d.waitForStart(c, waitLock)
+ if err != nil {
+ if c.Process != nil {
+ c.Process.Kill()
+ }
+ return -1, err
+ }
+ c.ContainerPid = pid
+
+ if startCallback != nil {
+ startCallback(c)
+ }
+
+ <-waitLock
+
+ return getExitCode(c), waitErr
+}
+
+/// Return the exit code of the process
+// if the process has not exited -1 will be returned
+func getExitCode(c *execdriver.Command) int {
+ if c.ProcessState == nil {
+ return -1
+ }
+ return c.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+}
+
+func (d *driver) Kill(c *execdriver.Command, sig int) error {
+ return KillLxc(c.ID, sig)
+}
+
+func (d *driver) Terminate(c *execdriver.Command) error {
+ return KillLxc(c.ID, 9)
+}
+
+func (d *driver) version() string {
+ var (
+ version string
+ output []byte
+ err error
+ )
+ if _, errPath := exec.LookPath("lxc-version"); errPath == nil {
+ output, err = exec.Command("lxc-version").CombinedOutput()
+ } else {
+ output, err = exec.Command("lxc-start", "--version").CombinedOutput()
+ }
+ if err == nil {
+ version = strings.TrimSpace(string(output))
+ if parts := strings.SplitN(version, ":", 2); len(parts) == 2 {
+ version = strings.TrimSpace(parts[1])
+ }
+ }
+ return version
+}
+
+func KillLxc(id string, sig int) error {
+ var (
+ err error
+ output []byte
+ )
+ _, err = exec.LookPath("lxc-kill")
+ if err == nil {
+ output, err = exec.Command("lxc-kill", "-n", id, strconv.Itoa(sig)).CombinedOutput()
+ } else {
+ output, err = exec.Command("lxc-stop", "-k", "-n", id, strconv.Itoa(sig)).CombinedOutput()
+ }
+ if err != nil {
+ return fmt.Errorf("Err: %s Output: %s", err, output)
+ }
+ return nil
+}
+
+// wait for the process to start and return the pid for the process
+func (d *driver) waitForStart(c *execdriver.Command, waitLock chan struct{}) (int, error) {
+ var (
+ err error
+ output []byte
+ )
+ // We wait for the container to be fully running.
+ // Timeout after 5 seconds. In case of broken pipe, just retry.
+ // Note: The container can run and finish correctly before
+ // the end of this loop
+ for now := time.Now(); time.Since(now) < 5*time.Second; {
+ select {
+ case <-waitLock:
+ // If the process dies while waiting for it, just return
+ return -1, nil
+ default:
+ }
+
+ output, err = d.getInfo(c.ID)
+ if err != nil {
+ output, err = d.getInfo(c.ID)
+ if err != nil {
+ return -1, err
+ }
+ }
+ info, err := parseLxcInfo(string(output))
+ if err != nil {
+ return -1, err
+ }
+ if info.Running {
+ return info.Pid, nil
+ }
+ time.Sleep(50 * time.Millisecond)
+ }
+ return -1, execdriver.ErrNotRunning
+}
+
+func (d *driver) getInfo(id string) ([]byte, error) {
+ return exec.Command("lxc-info", "-n", id).CombinedOutput()
+}
+
+type info struct {
+ ID string
+ driver *driver
+}
+
+func (i *info) IsRunning() bool {
+ var running bool
+
+ output, err := i.driver.getInfo(i.ID)
+ if err != nil {
+ utils.Errorf("Error getting info for lxc container %s: %s (%s)", i.ID, err, output)
+ return false
+ }
+ if strings.Contains(string(output), "RUNNING") {
+ running = true
+ }
+ return running
+}
+
+func (d *driver) Info(id string) execdriver.Info {
+ return &info{
+ ID: id,
+ driver: d,
+ }
+}
+
+func (d *driver) GetPidsForContainer(id string) ([]int, error) {
+ pids := []int{}
+
+ // cpu is chosen because it is the only non optional subsystem in cgroups
+ subsystem := "cpu"
+ cgroupRoot, err := cgroups.FindCgroupMountpoint(subsystem)
+ if err != nil {
+ return pids, err
+ }
+
+ cgroupDir, err := cgroups.GetThisCgroupDir(subsystem)
+ if err != nil {
+ return pids, err
+ }
+
+ filename := filepath.Join(cgroupRoot, cgroupDir, id, "tasks")
+ if _, err := os.Stat(filename); os.IsNotExist(err) {
+ // With more recent lxc versions use, cgroup will be in lxc/
+ filename = filepath.Join(cgroupRoot, cgroupDir, "lxc", id, "tasks")
+ }
+
+ output, err := ioutil.ReadFile(filename)
+ if err != nil {
+ return pids, err
+ }
+ for _, p := range strings.Split(string(output), "\n") {
+ if len(p) == 0 {
+ continue
+ }
+ pid, err := strconv.Atoi(p)
+ if err != nil {
+ return pids, fmt.Errorf("Invalid pid '%s': %s", p, err)
+ }
+ pids = append(pids, pid)
+ }
+ return pids, nil
+}
+
+func linkLxcStart(root string) error {
+ sourcePath, err := exec.LookPath("lxc-start")
+ if err != nil {
+ return err
+ }
+ targetPath := path.Join(root, "lxc-start-unconfined")
+
+ if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) {
+ return err
+ } else if err == nil {
+ if err := os.Remove(targetPath); err != nil {
+ return err
+ }
+ }
+ return os.Symlink(sourcePath, targetPath)
+}
+
+// TODO: This can be moved to the mountinfo reader in the mount pkg
+func rootIsShared() bool {
+ if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil {
+ for _, line := range strings.Split(string(data), "\n") {
+ cols := strings.Split(line, " ")
+ if len(cols) >= 6 && cols[4] == "/" {
+ return strings.HasPrefix(cols[6], "shared")
+ }
+ }
+ }
+
+ // No idea, probably safe to assume so
+ return true
+}
+
+func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
+ var (
+ process, mount string
+ root = path.Join(d.root, "containers", c.ID, "config.lxc")
+ labels = c.Config["label"]
+ )
+ fo, err := os.Create(root)
+ if err != nil {
+ return "", err
+ }
+ defer fo.Close()
+
+ if len(labels) > 0 {
+ process, mount, err = label.GenLabels(labels[0])
+ if err != nil {
+ return "", err
+ }
+ }
+
+ if err := LxcTemplateCompiled.Execute(fo, struct {
+ *execdriver.Command
+ AppArmor bool
+ ProcessLabel string
+ MountLabel string
+ }{
+ Command: c,
+ AppArmor: d.apparmor,
+ ProcessLabel: process,
+ MountLabel: mount,
+ }); err != nil {
+ return "", err
+ }
+ return root, nil
+}
diff --git a/runtime/execdriver/lxc/info.go b/runtime/execdriver/lxc/info.go
new file mode 100644
index 0000000000..27b4c58604
--- /dev/null
+++ b/runtime/execdriver/lxc/info.go
@@ -0,0 +1,50 @@
+package lxc
+
+import (
+ "bufio"
+ "errors"
+ "strconv"
+ "strings"
+)
+
+var (
+ ErrCannotParse = errors.New("cannot parse raw input")
+)
+
+type lxcInfo struct {
+ Running bool
+ Pid int
+}
+
+func parseLxcInfo(raw string) (*lxcInfo, error) {
+ if raw == "" {
+ return nil, ErrCannotParse
+ }
+ var (
+ err error
+ s = bufio.NewScanner(strings.NewReader(raw))
+ info = &lxcInfo{}
+ )
+ for s.Scan() {
+ text := s.Text()
+
+ if s.Err() != nil {
+ return nil, s.Err()
+ }
+
+ parts := strings.Split(text, ":")
+ if len(parts) < 2 {
+ continue
+ }
+ switch strings.ToLower(strings.TrimSpace(parts[0])) {
+ case "state":
+ info.Running = strings.TrimSpace(parts[1]) == "RUNNING"
+ case "pid":
+ info.Pid, err = strconv.Atoi(strings.TrimSpace(parts[1]))
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+ return info, nil
+}
diff --git a/runtime/execdriver/lxc/info_test.go b/runtime/execdriver/lxc/info_test.go
new file mode 100644
index 0000000000..edafc02511
--- /dev/null
+++ b/runtime/execdriver/lxc/info_test.go
@@ -0,0 +1,36 @@
+package lxc
+
+import (
+ "testing"
+)
+
+func TestParseRunningInfo(t *testing.T) {
+ raw := `
+ state: RUNNING
+ pid: 50`
+
+ info, err := parseLxcInfo(raw)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !info.Running {
+ t.Fatal("info should return a running state")
+ }
+ if info.Pid != 50 {
+ t.Fatalf("info should have pid 50 got %d", info.Pid)
+ }
+}
+
+func TestEmptyInfo(t *testing.T) {
+ _, err := parseLxcInfo("")
+ if err == nil {
+ t.Fatal("error should not be nil")
+ }
+}
+
+func TestBadInfo(t *testing.T) {
+ _, err := parseLxcInfo("state")
+ if err != nil {
+ t.Fatal(err)
+ }
+}
diff --git a/runtime/execdriver/lxc/init.go b/runtime/execdriver/lxc/init.go
new file mode 100644
index 0000000000..c1933a5e43
--- /dev/null
+++ b/runtime/execdriver/lxc/init.go
@@ -0,0 +1,175 @@
+package lxc
+
+import (
+ "encoding/json"
+ "fmt"
+ "github.com/dotcloud/docker/pkg/netlink"
+ "github.com/dotcloud/docker/pkg/user"
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "github.com/syndtr/gocapability/capability"
+ "io/ioutil"
+ "net"
+ "os"
+ "strings"
+ "syscall"
+)
+
+// Clear environment pollution introduced by lxc-start
+func setupEnv(args *execdriver.InitArgs) error {
+ // Get env
+ var env []string
+ content, err := ioutil.ReadFile(".dockerenv")
+ if err != nil {
+ return fmt.Errorf("Unable to load environment variables: %v", err)
+ }
+ if err := json.Unmarshal(content, &env); err != nil {
+ return fmt.Errorf("Unable to unmarshal environment variables: %v", err)
+ }
+ // Propagate the plugin-specific container env variable
+ env = append(env, "container="+os.Getenv("container"))
+
+ args.Env = env
+
+ os.Clearenv()
+ for _, kv := range args.Env {
+ parts := strings.SplitN(kv, "=", 2)
+ if len(parts) == 1 {
+ parts = append(parts, "")
+ }
+ os.Setenv(parts[0], parts[1])
+ }
+
+ return nil
+}
+
+func setupHostname(args *execdriver.InitArgs) error {
+ hostname := getEnv(args, "HOSTNAME")
+ if hostname == "" {
+ return nil
+ }
+ return setHostname(hostname)
+}
+
+// Setup networking
+func setupNetworking(args *execdriver.InitArgs) error {
+ if args.Ip != "" {
+ // eth0
+ iface, err := net.InterfaceByName("eth0")
+ if err != nil {
+ return fmt.Errorf("Unable to set up networking: %v", err)
+ }
+ ip, ipNet, err := net.ParseCIDR(args.Ip)
+ if err != nil {
+ return fmt.Errorf("Unable to set up networking: %v", err)
+ }
+ if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil {
+ return fmt.Errorf("Unable to set up networking: %v", err)
+ }
+ if err := netlink.NetworkSetMTU(iface, args.Mtu); err != nil {
+ return fmt.Errorf("Unable to set MTU: %v", err)
+ }
+ if err := netlink.NetworkLinkUp(iface); err != nil {
+ return fmt.Errorf("Unable to set up networking: %v", err)
+ }
+
+ // loopback
+ iface, err = net.InterfaceByName("lo")
+ if err != nil {
+ return fmt.Errorf("Unable to set up networking: %v", err)
+ }
+ if err := netlink.NetworkLinkUp(iface); err != nil {
+ return fmt.Errorf("Unable to set up networking: %v", err)
+ }
+ }
+ if args.Gateway != "" {
+ gw := net.ParseIP(args.Gateway)
+ if gw == nil {
+ return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.Gateway)
+ }
+
+ if err := netlink.AddDefaultGw(gw); err != nil {
+ return fmt.Errorf("Unable to set up networking: %v", err)
+ }
+ }
+
+ return nil
+}
+
+// Setup working directory
+func setupWorkingDirectory(args *execdriver.InitArgs) error {
+ if args.WorkDir == "" {
+ return nil
+ }
+ if err := syscall.Chdir(args.WorkDir); err != nil {
+ return fmt.Errorf("Unable to change dir to %v: %v", args.WorkDir, err)
+ }
+ return nil
+}
+
+// Takes care of dropping privileges to the desired user
+func changeUser(args *execdriver.InitArgs) error {
+ uid, gid, suppGids, err := user.GetUserGroupSupplementary(
+ args.User,
+ syscall.Getuid(), syscall.Getgid(),
+ )
+ if err != nil {
+ return err
+ }
+
+ if err := syscall.Setgroups(suppGids); err != nil {
+ return fmt.Errorf("Setgroups failed: %v", err)
+ }
+ if err := syscall.Setgid(gid); err != nil {
+ return fmt.Errorf("Setgid failed: %v", err)
+ }
+ if err := syscall.Setuid(uid); err != nil {
+ return fmt.Errorf("Setuid failed: %v", err)
+ }
+
+ return nil
+}
+
+func setupCapabilities(args *execdriver.InitArgs) error {
+ if args.Privileged {
+ return nil
+ }
+
+ drop := []capability.Cap{
+ capability.CAP_SETPCAP,
+ capability.CAP_SYS_MODULE,
+ capability.CAP_SYS_RAWIO,
+ capability.CAP_SYS_PACCT,
+ capability.CAP_SYS_ADMIN,
+ capability.CAP_SYS_NICE,
+ capability.CAP_SYS_RESOURCE,
+ capability.CAP_SYS_TIME,
+ capability.CAP_SYS_TTY_CONFIG,
+ capability.CAP_AUDIT_WRITE,
+ capability.CAP_AUDIT_CONTROL,
+ capability.CAP_MAC_OVERRIDE,
+ capability.CAP_MAC_ADMIN,
+ capability.CAP_NET_ADMIN,
+ }
+
+ c, err := capability.NewPid(os.Getpid())
+ if err != nil {
+ return err
+ }
+
+ c.Unset(capability.CAPS|capability.BOUNDS, drop...)
+
+ if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil {
+ return err
+ }
+ return nil
+}
+
+func getEnv(args *execdriver.InitArgs, key string) string {
+ for _, kv := range args.Env {
+ parts := strings.SplitN(kv, "=", 2)
+ if parts[0] == key && len(parts) == 2 {
+ return parts[1]
+ }
+ }
+ return ""
+}
diff --git a/runtime/execdriver/lxc/lxc_init_linux.go b/runtime/execdriver/lxc/lxc_init_linux.go
new file mode 100644
index 0000000000..7288f5877b
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_init_linux.go
@@ -0,0 +1,11 @@
+// +build amd64
+
+package lxc
+
+import (
+ "syscall"
+)
+
+func setHostname(hostname string) error {
+ return syscall.Sethostname([]byte(hostname))
+}
diff --git a/runtime/execdriver/lxc/lxc_init_unsupported.go b/runtime/execdriver/lxc/lxc_init_unsupported.go
new file mode 100644
index 0000000000..d68cb91a1e
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_init_unsupported.go
@@ -0,0 +1,7 @@
+// +build !linux !amd64
+
+package lxc
+
+func setHostname(hostname string) error {
+ panic("Not supported on darwin")
+}
diff --git a/runtime/execdriver/lxc/lxc_template.go b/runtime/execdriver/lxc/lxc_template.go
new file mode 100644
index 0000000000..c49753c6aa
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_template.go
@@ -0,0 +1,176 @@
+package lxc
+
+import (
+ "github.com/dotcloud/docker/pkg/label"
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "strings"
+ "text/template"
+)
+
+const LxcTemplate = `
+{{if .Network.Interface}}
+# network configuration
+lxc.network.type = veth
+lxc.network.link = {{.Network.Interface.Bridge}}
+lxc.network.name = eth0
+{{else}}
+# network is disabled (-n=false)
+lxc.network.type = empty
+lxc.network.flags = up
+{{end}}
+lxc.network.mtu = {{.Network.Mtu}}
+
+# root filesystem
+{{$ROOTFS := .Rootfs}}
+lxc.rootfs = {{$ROOTFS}}
+
+# use a dedicated pts for the container (and limit the number of pseudo terminal
+# available)
+lxc.pts = 1024
+
+# disable the main console
+lxc.console = none
+{{if .ProcessLabel}}
+lxc.se_context = {{ .ProcessLabel}}
+{{end}}
+{{$MOUNTLABEL := .MountLabel}}
+
+# no controlling tty at all
+lxc.tty = 1
+
+{{if .Privileged}}
+lxc.cgroup.devices.allow = a
+{{else}}
+# no implicit access to devices
+lxc.cgroup.devices.deny = a
+
+# but allow mknod for any device
+lxc.cgroup.devices.allow = c *:* m
+lxc.cgroup.devices.allow = b *:* m
+
+# /dev/null and zero
+lxc.cgroup.devices.allow = c 1:3 rwm
+lxc.cgroup.devices.allow = c 1:5 rwm
+
+# consoles
+lxc.cgroup.devices.allow = c 5:1 rwm
+lxc.cgroup.devices.allow = c 5:0 rwm
+lxc.cgroup.devices.allow = c 4:0 rwm
+lxc.cgroup.devices.allow = c 4:1 rwm
+
+# /dev/urandom,/dev/random
+lxc.cgroup.devices.allow = c 1:9 rwm
+lxc.cgroup.devices.allow = c 1:8 rwm
+
+# /dev/pts/ - pts namespaces are "coming soon"
+lxc.cgroup.devices.allow = c 136:* rwm
+lxc.cgroup.devices.allow = c 5:2 rwm
+
+# tuntap
+lxc.cgroup.devices.allow = c 10:200 rwm
+
+# fuse
+#lxc.cgroup.devices.allow = c 10:229 rwm
+
+# rtc
+#lxc.cgroup.devices.allow = c 254:0 rwm
+{{end}}
+
+# standard mount point
+# Use mnt.putold as per https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/986385
+lxc.pivotdir = lxc_putold
+
+# NOTICE: These mounts must be applied within the namespace
+
+# WARNING: procfs is a known attack vector and should probably be disabled
+# if your userspace allows it. eg. see http://blog.zx2c4.com/749
+lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0
+
+# WARNING: sysfs is a known attack vector and should probably be disabled
+# if your userspace allows it. eg. see http://bit.ly/T9CkqJ
+lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
+
+{{if .Tty}}
+lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0
+{{end}}
+
+lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts {{formatMountLabel "newinstance,ptmxmode=0666,nosuid,noexec" $MOUNTLABEL}} 0 0
+lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs {{formatMountLabel "size=65536k,nosuid,nodev,noexec" $MOUNTLABEL}} 0 0
+
+{{range $value := .Mounts}}
+{{if $value.Writable}}
+lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,rw 0 0
+{{else}}
+lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,ro 0 0
+{{end}}
+{{end}}
+
+{{if .Privileged}}
+{{if .AppArmor}}
+lxc.aa_profile = unconfined
+{{else}}
+#lxc.aa_profile = unconfined
+{{end}}
+{{end}}
+
+# limits
+{{if .Resources}}
+{{if .Resources.Memory}}
+lxc.cgroup.memory.limit_in_bytes = {{.Resources.Memory}}
+lxc.cgroup.memory.soft_limit_in_bytes = {{.Resources.Memory}}
+{{with $memSwap := getMemorySwap .Resources}}
+lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}}
+{{end}}
+{{end}}
+{{if .Resources.CpuShares}}
+lxc.cgroup.cpu.shares = {{.Resources.CpuShares}}
+{{end}}
+{{end}}
+
+{{if .Config.lxc}}
+{{range $value := .Config.lxc}}
+lxc.{{$value}}
+{{end}}
+{{end}}
+`
+
+var LxcTemplateCompiled *template.Template
+
+// Escape spaces in strings according to the fstab documentation, which is the
+// format for "lxc.mount.entry" lines in lxc.conf. See also "man 5 fstab".
+func escapeFstabSpaces(field string) string {
+ return strings.Replace(field, " ", "\\040", -1)
+}
+
+func getMemorySwap(v *execdriver.Resources) int64 {
+ // By default, MemorySwap is set to twice the size of RAM.
+ // If you want to omit MemorySwap, set it to `-1'.
+ if v.MemorySwap < 0 {
+ return 0
+ }
+ return v.Memory * 2
+}
+
+func getLabel(c map[string][]string, name string) string {
+ label := c["label"]
+ for _, l := range label {
+ parts := strings.SplitN(l, "=", 2)
+ if strings.TrimSpace(parts[0]) == name {
+ return strings.TrimSpace(parts[1])
+ }
+ }
+ return ""
+}
+
+func init() {
+ var err error
+ funcMap := template.FuncMap{
+ "getMemorySwap": getMemorySwap,
+ "escapeFstabSpaces": escapeFstabSpaces,
+ "formatMountLabel": label.FormatMountLabel,
+ }
+ LxcTemplateCompiled, err = template.New("lxc").Funcs(funcMap).Parse(LxcTemplate)
+ if err != nil {
+ panic(err)
+ }
+}
diff --git a/runtime/execdriver/lxc/lxc_template_unit_test.go b/runtime/execdriver/lxc/lxc_template_unit_test.go
new file mode 100644
index 0000000000..7f473a0502
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_template_unit_test.go
@@ -0,0 +1,135 @@
+package lxc
+
+import (
+ "bufio"
+ "fmt"
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "io/ioutil"
+ "math/rand"
+ "os"
+ "path"
+ "strings"
+ "testing"
+ "time"
+)
+
+func TestLXCConfig(t *testing.T) {
+ root, err := ioutil.TempDir("", "TestLXCConfig")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(root)
+
+ os.MkdirAll(path.Join(root, "containers", "1"), 0777)
+
+ // Memory is allocated randomly for testing
+ rand.Seed(time.Now().UTC().UnixNano())
+ var (
+ memMin = 33554432
+ memMax = 536870912
+ mem = memMin + rand.Intn(memMax-memMin)
+ cpuMin = 100
+ cpuMax = 10000
+ cpu = cpuMin + rand.Intn(cpuMax-cpuMin)
+ )
+
+ driver, err := NewDriver(root, false)
+ if err != nil {
+ t.Fatal(err)
+ }
+ command := &execdriver.Command{
+ ID: "1",
+ Resources: &execdriver.Resources{
+ Memory: int64(mem),
+ CpuShares: int64(cpu),
+ },
+ Network: &execdriver.Network{
+ Mtu: 1500,
+ Interface: nil,
+ },
+ }
+ p, err := driver.generateLXCConfig(command)
+ if err != nil {
+ t.Fatal(err)
+ }
+ grepFile(t, p,
+ fmt.Sprintf("lxc.cgroup.memory.limit_in_bytes = %d", mem))
+
+ grepFile(t, p,
+ fmt.Sprintf("lxc.cgroup.memory.memsw.limit_in_bytes = %d", mem*2))
+}
+
+func TestCustomLxcConfig(t *testing.T) {
+ root, err := ioutil.TempDir("", "TestCustomLxcConfig")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(root)
+
+ os.MkdirAll(path.Join(root, "containers", "1"), 0777)
+
+ driver, err := NewDriver(root, false)
+ if err != nil {
+ t.Fatal(err)
+ }
+ command := &execdriver.Command{
+ ID: "1",
+ Privileged: false,
+ Config: map[string][]string{
+ "lxc": {
+ "lxc.utsname = docker",
+ "lxc.cgroup.cpuset.cpus = 0,1",
+ },
+ },
+ Network: &execdriver.Network{
+ Mtu: 1500,
+ Interface: nil,
+ },
+ }
+
+ p, err := driver.generateLXCConfig(command)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ grepFile(t, p, "lxc.utsname = docker")
+ grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
+}
+
+func grepFile(t *testing.T, path string, pattern string) {
+ f, err := os.Open(path)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer f.Close()
+ r := bufio.NewReader(f)
+ var (
+ line string
+ )
+ err = nil
+ for err == nil {
+ line, err = r.ReadString('\n')
+ if strings.Contains(line, pattern) == true {
+ return
+ }
+ }
+ t.Fatalf("grepFile: pattern \"%s\" not found in \"%s\"", pattern, path)
+}
+
+func TestEscapeFstabSpaces(t *testing.T) {
+ var testInputs = map[string]string{
+ " ": "\\040",
+ "": "",
+ "/double space": "/double\\040\\040space",
+ "/some long test string": "/some\\040long\\040test\\040string",
+ "/var/lib/docker": "/var/lib/docker",
+ " leading": "\\040leading",
+ "trailing ": "trailing\\040",
+ }
+ for in, exp := range testInputs {
+ if out := escapeFstabSpaces(in); exp != out {
+ t.Logf("Expected %s got %s", exp, out)
+ t.Fail()
+ }
+ }
+}
diff --git a/runtime/execdriver/native/configuration/parse.go b/runtime/execdriver/native/configuration/parse.go
new file mode 100644
index 0000000000..6d6c643919
--- /dev/null
+++ b/runtime/execdriver/native/configuration/parse.go
@@ -0,0 +1,186 @@
+package configuration
+
+import (
+ "fmt"
+ "github.com/dotcloud/docker/pkg/libcontainer"
+ "github.com/dotcloud/docker/utils"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+)
+
+type Action func(*libcontainer.Container, interface{}, string) error
+
+var actions = map[string]Action{
+ "cap.add": addCap, // add a cap
+ "cap.drop": dropCap, // drop a cap
+
+ "ns.add": addNamespace, // add a namespace
+ "ns.drop": dropNamespace, // drop a namespace when cloning
+
+ "net.join": joinNetNamespace, // join another containers net namespace
+
+ "cgroups.cpu_shares": cpuShares, // set the cpu shares
+ "cgroups.memory": memory, // set the memory limit
+ "cgroups.memory_swap": memorySwap, // set the memory swap limit
+ "cgroups.cpuset.cpus": cpusetCpus, // set the cpus used
+
+ "apparmor_profile": apparmorProfile, // set the apparmor profile to apply
+
+ "fs.readonly": readonlyFs, // make the rootfs of the container read only
+}
+
+func cpusetCpus(container *libcontainer.Container, context interface{}, value string) error {
+ if container.Cgroups == nil {
+ return fmt.Errorf("cannot set cgroups when they are disabled")
+ }
+ container.Cgroups.CpusetCpus = value
+
+ return nil
+}
+
+func apparmorProfile(container *libcontainer.Container, context interface{}, value string) error {
+ container.Context["apparmor_profile"] = value
+ return nil
+}
+
+func cpuShares(container *libcontainer.Container, context interface{}, value string) error {
+ if container.Cgroups == nil {
+ return fmt.Errorf("cannot set cgroups when they are disabled")
+ }
+ v, err := strconv.ParseInt(value, 10, 0)
+ if err != nil {
+ return err
+ }
+ container.Cgroups.CpuShares = v
+ return nil
+}
+
+func memory(container *libcontainer.Container, context interface{}, value string) error {
+ if container.Cgroups == nil {
+ return fmt.Errorf("cannot set cgroups when they are disabled")
+ }
+
+ v, err := utils.RAMInBytes(value)
+ if err != nil {
+ return err
+ }
+ container.Cgroups.Memory = v
+ return nil
+}
+
+func memorySwap(container *libcontainer.Container, context interface{}, value string) error {
+ if container.Cgroups == nil {
+ return fmt.Errorf("cannot set cgroups when they are disabled")
+ }
+ v, err := strconv.ParseInt(value, 0, 64)
+ if err != nil {
+ return err
+ }
+ container.Cgroups.MemorySwap = v
+ return nil
+}
+
+func addCap(container *libcontainer.Container, context interface{}, value string) error {
+ c := container.CapabilitiesMask.Get(value)
+ if c == nil {
+ return fmt.Errorf("%s is not a valid capability", value)
+ }
+ c.Enabled = true
+ return nil
+}
+
+func dropCap(container *libcontainer.Container, context interface{}, value string) error {
+ c := container.CapabilitiesMask.Get(value)
+ if c == nil {
+ return fmt.Errorf("%s is not a valid capability", value)
+ }
+ c.Enabled = false
+ return nil
+}
+
+func addNamespace(container *libcontainer.Container, context interface{}, value string) error {
+ ns := container.Namespaces.Get(value)
+ if ns == nil {
+ return fmt.Errorf("%s is not a valid namespace", value[1:])
+ }
+ ns.Enabled = true
+ return nil
+}
+
+func dropNamespace(container *libcontainer.Container, context interface{}, value string) error {
+ ns := container.Namespaces.Get(value)
+ if ns == nil {
+ return fmt.Errorf("%s is not a valid namespace", value[1:])
+ }
+ ns.Enabled = false
+ return nil
+}
+
+func readonlyFs(container *libcontainer.Container, context interface{}, value string) error {
+ switch value {
+ case "1", "true":
+ container.ReadonlyFs = true
+ default:
+ container.ReadonlyFs = false
+ }
+ return nil
+}
+
+func joinNetNamespace(container *libcontainer.Container, context interface{}, value string) error {
+ var (
+ running = context.(map[string]*exec.Cmd)
+ cmd = running[value]
+ )
+
+ if cmd == nil || cmd.Process == nil {
+ return fmt.Errorf("%s is not a valid running container to join", value)
+ }
+ nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net")
+ container.Networks = append(container.Networks, &libcontainer.Network{
+ Type: "netns",
+ Context: libcontainer.Context{
+ "nspath": nspath,
+ },
+ })
+ return nil
+}
+
+func vethMacAddress(container *libcontainer.Container, context interface{}, value string) error {
+ var veth *libcontainer.Network
+ for _, network := range container.Networks {
+ if network.Type == "veth" {
+ veth = network
+ break
+ }
+ }
+ if veth == nil {
+ return fmt.Errorf("not veth configured for container")
+ }
+ veth.Context["mac"] = value
+ return nil
+}
+
+// configureCustomOptions takes string commands from the user and allows modification of the
+// container's default configuration.
+//
+// TODO: this can be moved to a general utils or parser in pkg
+func ParseConfiguration(container *libcontainer.Container, running map[string]*exec.Cmd, opts []string) error {
+ for _, opt := range opts {
+ kv := strings.SplitN(opt, "=", 2)
+ if len(kv) < 2 {
+ return fmt.Errorf("invalid format for %s", opt)
+ }
+
+ action, exists := actions[kv[0]]
+ if !exists {
+ return fmt.Errorf("%s is not a valid option for the native driver", kv[0])
+ }
+
+ if err := action(container, running, kv[1]); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/runtime/execdriver/native/configuration/parse_test.go b/runtime/execdriver/native/configuration/parse_test.go
new file mode 100644
index 0000000000..8001358766
--- /dev/null
+++ b/runtime/execdriver/native/configuration/parse_test.go
@@ -0,0 +1,166 @@
+package configuration
+
+import (
+ "github.com/dotcloud/docker/runtime/execdriver/native/template"
+ "testing"
+)
+
+func TestSetReadonlyRootFs(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "fs.readonly=true",
+ }
+ )
+
+ if container.ReadonlyFs {
+ t.Fatal("container should not have a readonly rootfs by default")
+ }
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if !container.ReadonlyFs {
+ t.Fatal("container should have a readonly rootfs")
+ }
+}
+
+func TestConfigurationsDoNotConflict(t *testing.T) {
+ var (
+ container1 = template.New()
+ container2 = template.New()
+ opts = []string{
+ "cap.add=NET_ADMIN",
+ }
+ )
+
+ if err := ParseConfiguration(container1, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if !container1.CapabilitiesMask.Get("NET_ADMIN").Enabled {
+ t.Fatal("container one should have NET_ADMIN enabled")
+ }
+ if container2.CapabilitiesMask.Get("NET_ADMIN").Enabled {
+ t.Fatal("container two should not have NET_ADMIN enabled")
+ }
+}
+
+func TestCpusetCpus(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "cgroups.cpuset.cpus=1,2",
+ }
+ )
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if expected := "1,2"; container.Cgroups.CpusetCpus != expected {
+ t.Fatalf("expected %s got %s for cpuset.cpus", expected, container.Cgroups.CpusetCpus)
+ }
+}
+
+func TestAppArmorProfile(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "apparmor_profile=koye-the-protector",
+ }
+ )
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+ if expected := "koye-the-protector"; container.Context["apparmor_profile"] != expected {
+ t.Fatalf("expected profile %s got %s", expected, container.Context["apparmor_profile"])
+ }
+}
+
+func TestCpuShares(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "cgroups.cpu_shares=1048",
+ }
+ )
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if expected := int64(1048); container.Cgroups.CpuShares != expected {
+ t.Fatalf("expected cpu shares %d got %d", expected, container.Cgroups.CpuShares)
+ }
+}
+
+func TestCgroupMemory(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "cgroups.memory=500m",
+ }
+ )
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if expected := int64(500 * 1024 * 1024); container.Cgroups.Memory != expected {
+ t.Fatalf("expected memory %d got %d", expected, container.Cgroups.Memory)
+ }
+}
+
+func TestAddCap(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "cap.add=MKNOD",
+ "cap.add=SYS_ADMIN",
+ }
+ )
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if !container.CapabilitiesMask.Get("MKNOD").Enabled {
+ t.Fatal("container should have MKNOD enabled")
+ }
+ if !container.CapabilitiesMask.Get("SYS_ADMIN").Enabled {
+ t.Fatal("container should have SYS_ADMIN enabled")
+ }
+}
+
+func TestDropCap(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "cap.drop=MKNOD",
+ }
+ )
+ // enabled all caps like in privileged mode
+ for _, c := range container.CapabilitiesMask {
+ c.Enabled = true
+ }
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if container.CapabilitiesMask.Get("MKNOD").Enabled {
+ t.Fatal("container should not have MKNOD enabled")
+ }
+}
+
+func TestDropNamespace(t *testing.T) {
+ var (
+ container = template.New()
+ opts = []string{
+ "ns.drop=NEWNET",
+ }
+ )
+ if err := ParseConfiguration(container, nil, opts); err != nil {
+ t.Fatal(err)
+ }
+
+ if container.Namespaces.Get("NEWNET").Enabled {
+ t.Fatal("container should not have NEWNET enabled")
+ }
+}
diff --git a/runtime/execdriver/native/create.go b/runtime/execdriver/native/create.go
new file mode 100644
index 0000000000..71fab3e064
--- /dev/null
+++ b/runtime/execdriver/native/create.go
@@ -0,0 +1,114 @@
+package native
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/dotcloud/docker/pkg/label"
+ "github.com/dotcloud/docker/pkg/libcontainer"
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "github.com/dotcloud/docker/runtime/execdriver/native/configuration"
+ "github.com/dotcloud/docker/runtime/execdriver/native/template"
+)
+
+// createContainer populates and configures the container type with the
+// data provided by the execdriver.Command
+func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container, error) {
+ container := template.New()
+
+ container.Hostname = getEnv("HOSTNAME", c.Env)
+ container.Tty = c.Tty
+ container.User = c.User
+ container.WorkingDir = c.WorkingDir
+ container.Env = c.Env
+ container.Cgroups.Name = c.ID
+ // check to see if we are running in ramdisk to disable pivot root
+ container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
+
+ if err := d.createNetwork(container, c); err != nil {
+ return nil, err
+ }
+ if c.Privileged {
+ if err := d.setPrivileged(container); err != nil {
+ return nil, err
+ }
+ }
+ if err := d.setupCgroups(container, c); err != nil {
+ return nil, err
+ }
+ if err := d.setupMounts(container, c); err != nil {
+ return nil, err
+ }
+ if err := d.setupLabels(container, c); err != nil {
+ return nil, err
+ }
+ if err := configuration.ParseConfiguration(container, d.activeContainers, c.Config["native"]); err != nil {
+ return nil, err
+ }
+ return container, nil
+}
+
+func (d *driver) createNetwork(container *libcontainer.Container, c *execdriver.Command) error {
+ container.Networks = []*libcontainer.Network{
+ {
+ Mtu: c.Network.Mtu,
+ Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0),
+ Gateway: "localhost",
+ Type: "loopback",
+ Context: libcontainer.Context{},
+ },
+ }
+
+ if c.Network.Interface != nil {
+ vethNetwork := libcontainer.Network{
+ Mtu: c.Network.Mtu,
+ Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
+ Gateway: c.Network.Interface.Gateway,
+ Type: "veth",
+ Context: libcontainer.Context{
+ "prefix": "veth",
+ "bridge": c.Network.Interface.Bridge,
+ },
+ }
+ container.Networks = append(container.Networks, &vethNetwork)
+ }
+ return nil
+}
+
+func (d *driver) setPrivileged(container *libcontainer.Container) error {
+ for _, c := range container.CapabilitiesMask {
+ c.Enabled = true
+ }
+ container.Cgroups.DeviceAccess = true
+ container.Context["apparmor_profile"] = "unconfined"
+ return nil
+}
+
+func (d *driver) setupCgroups(container *libcontainer.Container, c *execdriver.Command) error {
+ if c.Resources != nil {
+ container.Cgroups.CpuShares = c.Resources.CpuShares
+ container.Cgroups.Memory = c.Resources.Memory
+ container.Cgroups.MemorySwap = c.Resources.MemorySwap
+ }
+ return nil
+}
+
+func (d *driver) setupMounts(container *libcontainer.Container, c *execdriver.Command) error {
+ for _, m := range c.Mounts {
+ container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private})
+ }
+ return nil
+}
+
+func (d *driver) setupLabels(container *libcontainer.Container, c *execdriver.Command) error {
+ labels := c.Config["label"]
+ if len(labels) > 0 {
+ process, mount, err := label.GenLabels(labels[0])
+ if err != nil {
+ return err
+ }
+ container.Context["mount_label"] = mount
+ container.Context["process_label"] = process
+ }
+ return nil
+}
diff --git a/runtime/execdriver/native/driver.go b/runtime/execdriver/native/driver.go
new file mode 100644
index 0000000000..d18865e508
--- /dev/null
+++ b/runtime/execdriver/native/driver.go
@@ -0,0 +1,292 @@
+package native
+
+import (
+ "encoding/json"
+ "fmt"
+ "github.com/dotcloud/docker/pkg/cgroups"
+ "github.com/dotcloud/docker/pkg/libcontainer"
+ "github.com/dotcloud/docker/pkg/libcontainer/apparmor"
+ "github.com/dotcloud/docker/pkg/libcontainer/nsinit"
+ "github.com/dotcloud/docker/pkg/system"
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "io"
+ "io/ioutil"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+)
+
+const (
+ DriverName = "native"
+ Version = "0.1"
+ BackupApparmorProfilePath = "apparmor/docker.back" // relative to docker root
+)
+
+func init() {
+ execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
+ var (
+ container *libcontainer.Container
+ ns = nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{args.Root}, createLogger(""))
+ )
+ f, err := os.Open(filepath.Join(args.Root, "container.json"))
+ if err != nil {
+ return err
+ }
+ if err := json.NewDecoder(f).Decode(&container); err != nil {
+ f.Close()
+ return err
+ }
+ f.Close()
+
+ cwd, err := os.Getwd()
+ if err != nil {
+ return err
+ }
+ syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(args.Pipe))
+ if err != nil {
+ return err
+ }
+ if err := ns.Init(container, cwd, args.Console, syncPipe, args.Args); err != nil {
+ return err
+ }
+ return nil
+ })
+}
+
+type driver struct {
+ root string
+ initPath string
+ activeContainers map[string]*exec.Cmd
+}
+
+func NewDriver(root, initPath string) (*driver, error) {
+ if err := os.MkdirAll(root, 0700); err != nil {
+ return nil, err
+ }
+ // native driver root is at docker_root/execdriver/native. Put apparmor at docker_root
+ if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
+ return nil, err
+ }
+ return &driver{
+ root: root,
+ initPath: initPath,
+ activeContainers: make(map[string]*exec.Cmd),
+ }, nil
+}
+
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+ // take the Command and populate the libcontainer.Container from it
+ container, err := d.createContainer(c)
+ if err != nil {
+ return -1, err
+ }
+ d.activeContainers[c.ID] = &c.Cmd
+
+ var (
+ term nsinit.Terminal
+ factory = &dockerCommandFactory{c: c, driver: d}
+ stateWriter = &dockerStateWriter{
+ callback: startCallback,
+ c: c,
+ dsw: &nsinit.DefaultStateWriter{filepath.Join(d.root, c.ID)},
+ }
+ ns = nsinit.NewNsInit(factory, stateWriter, createLogger(os.Getenv("DEBUG")))
+ args = append([]string{c.Entrypoint}, c.Arguments...)
+ )
+ if err := d.createContainerRoot(c.ID); err != nil {
+ return -1, err
+ }
+ defer d.removeContainerRoot(c.ID)
+
+ if c.Tty {
+ term = &dockerTtyTerm{
+ pipes: pipes,
+ }
+ } else {
+ term = &dockerStdTerm{
+ pipes: pipes,
+ }
+ }
+ c.Terminal = term
+ if err := d.writeContainerFile(container, c.ID); err != nil {
+ return -1, err
+ }
+ return ns.Exec(container, term, args)
+}
+
+func (d *driver) Kill(p *execdriver.Command, sig int) error {
+ return syscall.Kill(p.Process.Pid, syscall.Signal(sig))
+}
+
+func (d *driver) Terminate(p *execdriver.Command) error {
+ // lets check the start time for the process
+ started, err := d.readStartTime(p)
+ if err != nil {
+ // if we don't have the data on disk then we can assume the process is gone
+ // because this is only removed after we know the process has stopped
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+
+ currentStartTime, err := system.GetProcessStartTime(p.Process.Pid)
+ if err != nil {
+ return err
+ }
+ if started == currentStartTime {
+ err = syscall.Kill(p.Process.Pid, 9)
+ }
+ d.removeContainerRoot(p.ID)
+ return err
+
+}
+
+func (d *driver) readStartTime(p *execdriver.Command) (string, error) {
+ data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start"))
+ if err != nil {
+ return "", err
+ }
+ return string(data), nil
+}
+
+func (d *driver) Info(id string) execdriver.Info {
+ return &info{
+ ID: id,
+ driver: d,
+ }
+}
+
+func (d *driver) Name() string {
+ return fmt.Sprintf("%s-%s", DriverName, Version)
+}
+
+// TODO: this can be improved with our driver
+// there has to be a better way to do this
+func (d *driver) GetPidsForContainer(id string) ([]int, error) {
+ pids := []int{}
+
+ subsystem := "devices"
+ cgroupRoot, err := cgroups.FindCgroupMountpoint(subsystem)
+ if err != nil {
+ return pids, err
+ }
+ cgroupDir, err := cgroups.GetThisCgroupDir(subsystem)
+ if err != nil {
+ return pids, err
+ }
+
+ filename := filepath.Join(cgroupRoot, cgroupDir, id, "tasks")
+ if _, err := os.Stat(filename); os.IsNotExist(err) {
+ filename = filepath.Join(cgroupRoot, cgroupDir, "docker", id, "tasks")
+ }
+
+ output, err := ioutil.ReadFile(filename)
+ if err != nil {
+ return pids, err
+ }
+ for _, p := range strings.Split(string(output), "\n") {
+ if len(p) == 0 {
+ continue
+ }
+ pid, err := strconv.Atoi(p)
+ if err != nil {
+ return pids, fmt.Errorf("Invalid pid '%s': %s", p, err)
+ }
+ pids = append(pids, pid)
+ }
+ return pids, nil
+}
+
+func (d *driver) writeContainerFile(container *libcontainer.Container, id string) error {
+ data, err := json.Marshal(container)
+ if err != nil {
+ return err
+ }
+ return ioutil.WriteFile(filepath.Join(d.root, id, "container.json"), data, 0655)
+}
+
+func (d *driver) createContainerRoot(id string) error {
+ return os.MkdirAll(filepath.Join(d.root, id), 0655)
+}
+
+func (d *driver) removeContainerRoot(id string) error {
+ return os.RemoveAll(filepath.Join(d.root, id))
+}
+
+func getEnv(key string, env []string) string {
+ for _, pair := range env {
+ parts := strings.Split(pair, "=")
+ if parts[0] == key {
+ return parts[1]
+ }
+ }
+ return ""
+}
+
+type dockerCommandFactory struct {
+ c *execdriver.Command
+ driver *driver
+}
+
+// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
+// defined on the container's configuration and use the current binary as the init with the
+// args provided
+func (d *dockerCommandFactory) Create(container *libcontainer.Container, console string, syncFile *os.File, args []string) *exec.Cmd {
+ // we need to join the rootfs because nsinit will setup the rootfs and chroot
+ initPath := filepath.Join(d.c.Rootfs, d.c.InitPath)
+
+ d.c.Path = d.driver.initPath
+ d.c.Args = append([]string{
+ initPath,
+ "-driver", DriverName,
+ "-console", console,
+ "-pipe", "3",
+ "-root", filepath.Join(d.driver.root, d.c.ID),
+ "--",
+ }, args...)
+
+ // set this to nil so that when we set the clone flags anything else is reset
+ d.c.SysProcAttr = nil
+ system.SetCloneFlags(&d.c.Cmd, uintptr(nsinit.GetNamespaceFlags(container.Namespaces)))
+ d.c.ExtraFiles = []*os.File{syncFile}
+
+ d.c.Env = container.Env
+ d.c.Dir = d.c.Rootfs
+
+ return &d.c.Cmd
+}
+
+type dockerStateWriter struct {
+ dsw nsinit.StateWriter
+ c *execdriver.Command
+ callback execdriver.StartCallback
+}
+
+func (d *dockerStateWriter) WritePid(pid int, started string) error {
+ d.c.ContainerPid = pid
+ err := d.dsw.WritePid(pid, started)
+ if d.callback != nil {
+ d.callback(d.c)
+ }
+ return err
+}
+
+func (d *dockerStateWriter) DeletePid() error {
+ return d.dsw.DeletePid()
+}
+
+func createLogger(debug string) *log.Logger {
+ var w io.Writer
+ // if we are in debug mode set the logger to stderr
+ if debug != "" {
+ w = os.Stderr
+ } else {
+ w = ioutil.Discard
+ }
+ return log.New(w, "[libcontainer] ", log.LstdFlags)
+}
diff --git a/runtime/execdriver/native/info.go b/runtime/execdriver/native/info.go
new file mode 100644
index 0000000000..aef2f85c6b
--- /dev/null
+++ b/runtime/execdriver/native/info.go
@@ -0,0 +1,21 @@
+package native
+
+import (
+ "os"
+ "path/filepath"
+)
+
+type info struct {
+ ID string
+ driver *driver
+}
+
+// IsRunning is determined by looking for the
+// pid file for a container. If the file exists then the
+// container is currently running
+func (i *info) IsRunning() bool {
+ if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil {
+ return true
+ }
+ return false
+}
diff --git a/runtime/execdriver/native/template/default_template.go b/runtime/execdriver/native/template/default_template.go
new file mode 100644
index 0000000000..a1ecb04d76
--- /dev/null
+++ b/runtime/execdriver/native/template/default_template.go
@@ -0,0 +1,45 @@
+package template
+
+import (
+ "github.com/dotcloud/docker/pkg/cgroups"
+ "github.com/dotcloud/docker/pkg/libcontainer"
+)
+
+// New returns the docker default configuration for libcontainer
+func New() *libcontainer.Container {
+ container := &libcontainer.Container{
+ CapabilitiesMask: libcontainer.Capabilities{
+ libcontainer.GetCapability("SETPCAP"),
+ libcontainer.GetCapability("SYS_MODULE"),
+ libcontainer.GetCapability("SYS_RAWIO"),
+ libcontainer.GetCapability("SYS_PACCT"),
+ libcontainer.GetCapability("SYS_ADMIN"),
+ libcontainer.GetCapability("SYS_NICE"),
+ libcontainer.GetCapability("SYS_RESOURCE"),
+ libcontainer.GetCapability("SYS_TIME"),
+ libcontainer.GetCapability("SYS_TTY_CONFIG"),
+ libcontainer.GetCapability("AUDIT_WRITE"),
+ libcontainer.GetCapability("AUDIT_CONTROL"),
+ libcontainer.GetCapability("MAC_OVERRIDE"),
+ libcontainer.GetCapability("MAC_ADMIN"),
+ libcontainer.GetCapability("NET_ADMIN"),
+ libcontainer.GetCapability("MKNOD"),
+ },
+ Namespaces: libcontainer.Namespaces{
+ libcontainer.GetNamespace("NEWNS"),
+ libcontainer.GetNamespace("NEWUTS"),
+ libcontainer.GetNamespace("NEWIPC"),
+ libcontainer.GetNamespace("NEWPID"),
+ libcontainer.GetNamespace("NEWNET"),
+ },
+ Cgroups: &cgroups.Cgroup{
+ Parent: "docker",
+ DeviceAccess: false,
+ },
+ Context: libcontainer.Context{
+ "apparmor_profile": "docker-default",
+ },
+ }
+ container.CapabilitiesMask.Get("MKNOD").Enabled = true
+ return container
+}
diff --git a/runtime/execdriver/native/term.go b/runtime/execdriver/native/term.go
new file mode 100644
index 0000000000..0d5298d388
--- /dev/null
+++ b/runtime/execdriver/native/term.go
@@ -0,0 +1,42 @@
+/*
+ These types are wrappers around the libcontainer Terminal interface so that
+ we can resuse the docker implementations where possible.
+*/
+package native
+
+import (
+ "github.com/dotcloud/docker/runtime/execdriver"
+ "io"
+ "os"
+ "os/exec"
+)
+
+type dockerStdTerm struct {
+ execdriver.StdConsole
+ pipes *execdriver.Pipes
+}
+
+func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error {
+ return d.AttachPipes(cmd, d.pipes)
+}
+
+func (d *dockerStdTerm) SetMaster(master *os.File) {
+ // do nothing
+}
+
+type dockerTtyTerm struct {
+ execdriver.TtyConsole
+ pipes *execdriver.Pipes
+}
+
+func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error {
+ go io.Copy(t.pipes.Stdout, t.MasterPty)
+ if t.pipes.Stdin != nil {
+ go io.Copy(t.MasterPty, t.pipes.Stdin)
+ }
+ return nil
+}
+
+func (t *dockerTtyTerm) SetMaster(master *os.File) {
+ t.MasterPty = master
+}
diff --git a/runtime/execdriver/pipes.go b/runtime/execdriver/pipes.go
new file mode 100644
index 0000000000..158219f0c5
--- /dev/null
+++ b/runtime/execdriver/pipes.go
@@ -0,0 +1,23 @@
+package execdriver
+
+import (
+ "io"
+)
+
+// Pipes is a wrapper around a containers output for
+// stdin, stdout, stderr
+type Pipes struct {
+ Stdin io.ReadCloser
+ Stdout, Stderr io.Writer
+}
+
+func NewPipes(stdin io.ReadCloser, stdout, stderr io.Writer, useStdin bool) *Pipes {
+ p := &Pipes{
+ Stdout: stdout,
+ Stderr: stderr,
+ }
+ if useStdin {
+ p.Stdin = stdin
+ }
+ return p
+}
diff --git a/runtime/execdriver/termconsole.go b/runtime/execdriver/termconsole.go
new file mode 100644
index 0000000000..af6b88d3d1
--- /dev/null
+++ b/runtime/execdriver/termconsole.go
@@ -0,0 +1,126 @@
+package execdriver
+
+import (
+ "github.com/dotcloud/docker/pkg/term"
+ "github.com/kr/pty"
+ "io"
+ "os"
+ "os/exec"
+)
+
+func SetTerminal(command *Command, pipes *Pipes) error {
+ var (
+ term Terminal
+ err error
+ )
+ if command.Tty {
+ term, err = NewTtyConsole(command, pipes)
+ } else {
+ term, err = NewStdConsole(command, pipes)
+ }
+ if err != nil {
+ return err
+ }
+ command.Terminal = term
+ return nil
+}
+
+type TtyConsole struct {
+ MasterPty *os.File
+ SlavePty *os.File
+}
+
+func NewTtyConsole(command *Command, pipes *Pipes) (*TtyConsole, error) {
+ ptyMaster, ptySlave, err := pty.Open()
+ if err != nil {
+ return nil, err
+ }
+ tty := &TtyConsole{
+ MasterPty: ptyMaster,
+ SlavePty: ptySlave,
+ }
+ if err := tty.AttachPipes(&command.Cmd, pipes); err != nil {
+ tty.Close()
+ return nil, err
+ }
+ command.Console = tty.SlavePty.Name()
+ return tty, nil
+}
+
+func (t *TtyConsole) Master() *os.File {
+ return t.MasterPty
+}
+
+func (t *TtyConsole) Resize(h, w int) error {
+ return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
+}
+
+func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *Pipes) error {
+ command.Stdout = t.SlavePty
+ command.Stderr = t.SlavePty
+
+ go func() {
+ if wb, ok := pipes.Stdout.(interface {
+ CloseWriters() error
+ }); ok {
+ defer wb.CloseWriters()
+ }
+ io.Copy(pipes.Stdout, t.MasterPty)
+ }()
+
+ if pipes.Stdin != nil {
+ command.Stdin = t.SlavePty
+ command.SysProcAttr.Setctty = true
+
+ go func() {
+ defer pipes.Stdin.Close()
+ io.Copy(t.MasterPty, pipes.Stdin)
+ }()
+ }
+ return nil
+}
+
+func (t *TtyConsole) Close() error {
+ t.SlavePty.Close()
+ return t.MasterPty.Close()
+}
+
+type StdConsole struct {
+}
+
+func NewStdConsole(command *Command, pipes *Pipes) (*StdConsole, error) {
+ std := &StdConsole{}
+
+ if err := std.AttachPipes(&command.Cmd, pipes); err != nil {
+ return nil, err
+ }
+ return std, nil
+}
+
+func (s *StdConsole) AttachPipes(command *exec.Cmd, pipes *Pipes) error {
+ command.Stdout = pipes.Stdout
+ command.Stderr = pipes.Stderr
+
+ if pipes.Stdin != nil {
+ stdin, err := command.StdinPipe()
+ if err != nil {
+ return err
+ }
+
+ go func() {
+ defer stdin.Close()
+ io.Copy(stdin, pipes.Stdin)
+ }()
+ }
+ return nil
+}
+
+func (s *StdConsole) Resize(h, w int) error {
+ // we do not need to reside a non tty
+ return nil
+}
+
+func (s *StdConsole) Close() error {
+ // nothing to close here
+ return nil
+}