Merge pull request #5079 from unclejack/bump_v0.10.0release-0.10 0.10.1-hotfixes

Bump version to v0.10.0
author: unclejack <unclejack@users.noreply.github.com> 2014-04-09 01:56:01 +0300
committer: unclejack <unclejack@users.noreply.github.com> 2014-04-09 01:56:01 +0300
commit: e128a606e39fa63c6b4fd6e53a1d88cf00aad868 (patch)
tree: 199ee7eb6678ffecd2ddad95fce794c795ad5183 /runtime
parent: 143c9707a9fafc39e1d9747f528db97b2564f01e (diff)
parent: dc9c28f51d669d6b09e81c2381f800f1a33bb659 (diff)
download: docker-release-0.10.tar.gz
67 files changed, 12405 insertions, 0 deletions
diff --git a/runtime/container.go b/runtime/container.go
new file mode 100644
index 0000000000..c8053b146c
--- /dev/null
+++ b/runtime/container.go
@@ -0,0 +1,1229 @@
+package runtime
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/engine"
+	"github.com/dotcloud/docker/image"
+	"github.com/dotcloud/docker/links"
+	"github.com/dotcloud/docker/nat"
+	"github.com/dotcloud/docker/runconfig"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"github.com/dotcloud/docker/utils"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+)
+
+const DefaultPathEnv = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+
+var (
+	ErrNotATTY               = errors.New("The PTY is not a file")
+	ErrNoTTY                 = errors.New("No PTY found")
+	ErrContainerStart        = errors.New("The container failed to start. Unknown error")
+	ErrContainerStartTimeout = errors.New("The container failed to start due to timed out.")
+)
+
+type Container struct {
+	sync.Mutex
+	root   string // Path to the "home" of the container, including metadata.
+	basefs string // Path to the graphdriver mountpoint
+
+	ID string
+
+	Created time.Time
+
+	Path string
+	Args []string
+
+	Config *runconfig.Config
+	State  State
+	Image  string
+
+	NetworkSettings *NetworkSettings
+
+	ResolvConfPath string
+	HostnamePath   string
+	HostsPath      string
+	Name           string
+	Driver         string
+	ExecDriver     string
+
+	command   *execdriver.Command
+	stdout    *utils.WriteBroadcaster
+	stderr    *utils.WriteBroadcaster
+	stdin     io.ReadCloser
+	stdinPipe io.WriteCloser
+
+	runtime *Runtime
+
+	waitLock chan struct{}
+	Volumes  map[string]string
+	// Store rw/ro in a separate structure to preserve reverse-compatibility on-disk.
+	// Easier than migrating older container configs :)
+	VolumesRW  map[string]bool
+	hostConfig *runconfig.HostConfig
+
+	activeLinks map[string]*links.Link
+}
+
+// FIXME: move deprecated port stuff to nat to clean up the core.
+type PortMapping map[string]string // Deprecated
+
+type NetworkSettings struct {
+	IPAddress   string
+	IPPrefixLen int
+	Gateway     string
+	Bridge      string
+	PortMapping map[string]PortMapping // Deprecated
+	Ports       nat.PortMap
+}
+
+func (settings *NetworkSettings) PortMappingAPI() *engine.Table {
+	var outs = engine.NewTable("", 0)
+	for port, bindings := range settings.Ports {
+		p, _ := nat.ParsePort(port.Port())
+		if len(bindings) == 0 {
+			out := &engine.Env{}
+			out.SetInt("PublicPort", p)
+			out.Set("Type", port.Proto())
+			outs.Add(out)
+			continue
+		}
+		for _, binding := range bindings {
+			out := &engine.Env{}
+			h, _ := nat.ParsePort(binding.HostPort)
+			out.SetInt("PrivatePort", p)
+			out.SetInt("PublicPort", h)
+			out.Set("Type", port.Proto())
+			out.Set("IP", binding.HostIp)
+			outs.Add(out)
+		}
+	}
+	return outs
+}
+
+// Inject the io.Reader at the given path. Note: do not close the reader
+func (container *Container) Inject(file io.Reader, pth string) error {
+	if err := container.Mount(); err != nil {
+		return fmt.Errorf("inject: error mounting container %s: %s", container.ID, err)
+	}
+	defer container.Unmount()
+
+	// Return error if path exists
+	destPath := path.Join(container.basefs, pth)
+	if _, err := os.Stat(destPath); err == nil {
+		// Since err is nil, the path could be stat'd and it exists
+		return fmt.Errorf("%s exists", pth)
+	} else if !os.IsNotExist(err) {
+		// Expect err might be that the file doesn't exist, so
+		// if it's some other error, return that.
+
+		return err
+	}
+
+	// Make sure the directory exists
+	if err := os.MkdirAll(path.Join(container.basefs, path.Dir(pth)), 0755); err != nil {
+		return err
+	}
+
+	dest, err := os.Create(destPath)
+	if err != nil {
+		return err
+	}
+	defer dest.Close()
+
+	if _, err := io.Copy(dest, file); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (container *Container) When() time.Time {
+	return container.Created
+}
+
+func (container *Container) FromDisk() error {
+	data, err := ioutil.ReadFile(container.jsonPath())
+	if err != nil {
+		return err
+	}
+	// Load container settings
+	// udp broke compat of docker.PortMapping, but it's not used when loading a container, we can skip it
+	if err := json.Unmarshal(data, container); err != nil && !strings.Contains(err.Error(), "docker.PortMapping") {
+		return err
+	}
+	return container.readHostConfig()
+}
+
+func (container *Container) ToDisk() (err error) {
+	data, err := json.Marshal(container)
+	if err != nil {
+		return
+	}
+	err = ioutil.WriteFile(container.jsonPath(), data, 0666)
+	if err != nil {
+		return
+	}
+	return container.WriteHostConfig()
+}
+
+func (container *Container) readHostConfig() error {
+	container.hostConfig = &runconfig.HostConfig{}
+	// If the hostconfig file does not exist, do not read it.
+	// (We still have to initialize container.hostConfig,
+	// but that's OK, since we just did that above.)
+	_, err := os.Stat(container.hostConfigPath())
+	if os.IsNotExist(err) {
+		return nil
+	}
+	data, err := ioutil.ReadFile(container.hostConfigPath())
+	if err != nil {
+		return err
+	}
+	return json.Unmarshal(data, container.hostConfig)
+}
+
+func (container *Container) WriteHostConfig() (err error) {
+	data, err := json.Marshal(container.hostConfig)
+	if err != nil {
+		return
+	}
+	return ioutil.WriteFile(container.hostConfigPath(), data, 0666)
+}
+
+func (container *Container) generateEnvConfig(env []string) error {
+	data, err := json.Marshal(env)
+	if err != nil {
+		return err
+	}
+	p, err := container.EnvConfigPath()
+	if err != nil {
+		return err
+	}
+	ioutil.WriteFile(p, data, 0600)
+	return nil
+}
+
+func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, stdout io.Writer, stderr io.Writer) chan error {
+	var cStdout, cStderr io.ReadCloser
+
+	var nJobs int
+	errors := make(chan error, 3)
+	if stdin != nil && container.Config.OpenStdin {
+		nJobs += 1
+		if cStdin, err := container.StdinPipe(); err != nil {
+			errors <- err
+		} else {
+			go func() {
+				utils.Debugf("attach: stdin: begin")
+				defer utils.Debugf("attach: stdin: end")
+				// No matter what, when stdin is closed (io.Copy unblock), close stdout and stderr
+				if container.Config.StdinOnce && !container.Config.Tty {
+					defer cStdin.Close()
+				} else {
+					defer func() {
+						if cStdout != nil {
+							cStdout.Close()
+						}
+						if cStderr != nil {
+							cStderr.Close()
+						}
+					}()
+				}
+				if container.Config.Tty {
+					_, err = utils.CopyEscapable(cStdin, stdin)
+				} else {
+					_, err = io.Copy(cStdin, stdin)
+				}
+				if err == io.ErrClosedPipe {
+					err = nil
+				}
+				if err != nil {
+					utils.Errorf("attach: stdin: %s", err)
+				}
+				errors <- err
+			}()
+		}
+	}
+	if stdout != nil {
+		nJobs += 1
+		if p, err := container.StdoutPipe(); err != nil {
+			errors <- err
+		} else {
+			cStdout = p
+			go func() {
+				utils.Debugf("attach: stdout: begin")
+				defer utils.Debugf("attach: stdout: end")
+				// If we are in StdinOnce mode, then close stdin
+				if container.Config.StdinOnce && stdin != nil {
+					defer stdin.Close()
+				}
+				if stdinCloser != nil {
+					defer stdinCloser.Close()
+				}
+				_, err := io.Copy(stdout, cStdout)
+				if err == io.ErrClosedPipe {
+					err = nil
+				}
+				if err != nil {
+					utils.Errorf("attach: stdout: %s", err)
+				}
+				errors <- err
+			}()
+		}
+	} else {
+		go func() {
+			if stdinCloser != nil {
+				defer stdinCloser.Close()
+			}
+			if cStdout, err := container.StdoutPipe(); err != nil {
+				utils.Errorf("attach: stdout pipe: %s", err)
+			} else {
+				io.Copy(&utils.NopWriter{}, cStdout)
+			}
+		}()
+	}
+	if stderr != nil {
+		nJobs += 1
+		if p, err := container.StderrPipe(); err != nil {
+			errors <- err
+		} else {
+			cStderr = p
+			go func() {
+				utils.Debugf("attach: stderr: begin")
+				defer utils.Debugf("attach: stderr: end")
+				// If we are in StdinOnce mode, then close stdin
+				if container.Config.StdinOnce && stdin != nil {
+					defer stdin.Close()
+				}
+				if stdinCloser != nil {
+					defer stdinCloser.Close()
+				}
+				_, err := io.Copy(stderr, cStderr)
+				if err == io.ErrClosedPipe {
+					err = nil
+				}
+				if err != nil {
+					utils.Errorf("attach: stderr: %s", err)
+				}
+				errors <- err
+			}()
+		}
+	} else {
+		go func() {
+			if stdinCloser != nil {
+				defer stdinCloser.Close()
+			}
+
+			if cStderr, err := container.StderrPipe(); err != nil {
+				utils.Errorf("attach: stdout pipe: %s", err)
+			} else {
+				io.Copy(&utils.NopWriter{}, cStderr)
+			}
+		}()
+	}
+
+	return utils.Go(func() error {
+		defer func() {
+			if cStdout != nil {
+				cStdout.Close()
+			}
+			if cStderr != nil {
+				cStderr.Close()
+			}
+		}()
+
+		// FIXME: how to clean up the stdin goroutine without the unwanted side effect
+		// of closing the passed stdin? Add an intermediary io.Pipe?
+		for i := 0; i < nJobs; i += 1 {
+			utils.Debugf("attach: waiting for job %d/%d", i+1, nJobs)
+			if err := <-errors; err != nil {
+				utils.Errorf("attach: job %d returned error %s, aborting all jobs", i+1, err)
+				return err
+			}
+			utils.Debugf("attach: job %d completed successfully", i+1)
+		}
+		utils.Debugf("attach: all jobs completed successfully")
+		return nil
+	})
+}
+
+func populateCommand(c *Container) {
+	var (
+		en           *execdriver.Network
+		driverConfig = make(map[string][]string)
+	)
+
+	en = &execdriver.Network{
+		Mtu:       c.runtime.config.Mtu,
+		Interface: nil,
+	}
+
+	if !c.Config.NetworkDisabled {
+		network := c.NetworkSettings
+		en.Interface = &execdriver.NetworkInterface{
+			Gateway:     network.Gateway,
+			Bridge:      network.Bridge,
+			IPAddress:   network.IPAddress,
+			IPPrefixLen: network.IPPrefixLen,
+		}
+	}
+
+	// TODO: this can be removed after lxc-conf is fully deprecated
+	mergeLxcConfIntoOptions(c.hostConfig, driverConfig)
+
+	resources := &execdriver.Resources{
+		Memory:     c.Config.Memory,
+		MemorySwap: c.Config.MemorySwap,
+		CpuShares:  c.Config.CpuShares,
+	}
+	c.command = &execdriver.Command{
+		ID:         c.ID,
+		Privileged: c.hostConfig.Privileged,
+		Rootfs:     c.RootfsPath(),
+		InitPath:   "/.dockerinit",
+		Entrypoint: c.Path,
+		Arguments:  c.Args,
+		WorkingDir: c.Config.WorkingDir,
+		Network:    en,
+		Tty:        c.Config.Tty,
+		User:       c.Config.User,
+		Config:     driverConfig,
+		Resources:  resources,
+	}
+	c.command.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
+}
+
+func (container *Container) ArgsAsString() string {
+	var args []string
+	for _, arg := range container.Args {
+		if strings.Contains(arg, " ") {
+			args = append(args, fmt.Sprintf("'%s'", arg))
+		} else {
+			args = append(args, arg)
+		}
+	}
+	return strings.Join(args, " ")
+}
+
+func (container *Container) Start() (err error) {
+	container.Lock()
+	defer container.Unlock()
+
+	if container.State.IsRunning() {
+		return nil
+	}
+
+	defer func() {
+		if err != nil {
+			container.cleanup()
+		}
+	}()
+
+	if container.ResolvConfPath == "" {
+		if err := container.setupContainerDns(); err != nil {
+			return err
+		}
+	}
+
+	if err := container.Mount(); err != nil {
+		return err
+	}
+
+	if container.runtime.config.DisableNetwork {
+		container.Config.NetworkDisabled = true
+		container.buildHostnameAndHostsFiles("127.0.1.1")
+	} else {
+		if err := container.allocateNetwork(); err != nil {
+			return err
+		}
+		container.buildHostnameAndHostsFiles(container.NetworkSettings.IPAddress)
+	}
+
+	// Make sure the config is compatible with the current kernel
+	if container.Config.Memory > 0 && !container.runtime.sysInfo.MemoryLimit {
+		log.Printf("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n")
+		container.Config.Memory = 0
+	}
+	if container.Config.Memory > 0 && !container.runtime.sysInfo.SwapLimit {
+		log.Printf("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n")
+		container.Config.MemorySwap = -1
+	}
+
+	if container.runtime.sysInfo.IPv4ForwardingDisabled {
+		log.Printf("WARNING: IPv4 forwarding is disabled. Networking will not work")
+	}
+
+	if err := prepareVolumesForContainer(container); err != nil {
+		return err
+	}
+
+	// Setup environment
+	env := []string{
+		"HOME=/",
+		"PATH=" + DefaultPathEnv,
+		"HOSTNAME=" + container.Config.Hostname,
+	}
+
+	if container.Config.Tty {
+		env = append(env, "TERM=xterm")
+	}
+
+	// Init any links between the parent and children
+	runtime := container.runtime
+
+	children, err := runtime.Children(container.Name)
+	if err != nil {
+		return err
+	}
+
+	if len(children) > 0 {
+		container.activeLinks = make(map[string]*links.Link, len(children))
+
+		// If we encounter an error make sure that we rollback any network
+		// config and ip table changes
+		rollback := func() {
+			for _, link := range container.activeLinks {
+				link.Disable()
+			}
+			container.activeLinks = nil
+		}
+
+		for linkAlias, child := range children {
+			if !child.State.IsRunning() {
+				return fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
+			}
+
+			link, err := links.NewLink(
+				container.NetworkSettings.IPAddress,
+				child.NetworkSettings.IPAddress,
+				linkAlias,
+				child.Config.Env,
+				child.Config.ExposedPorts,
+				runtime.eng)
+
+			if err != nil {
+				rollback()
+				return err
+			}
+
+			container.activeLinks[link.Alias()] = link
+			if err := link.Enable(); err != nil {
+				rollback()
+				return err
+			}
+
+			for _, envVar := range link.ToEnv() {
+				env = append(env, envVar)
+			}
+		}
+	}
+
+	// because the env on the container can override certain default values
+	// we need to replace the 'env' keys where they match and append anything
+	// else.
+	env = utils.ReplaceOrAppendEnvValues(env, container.Config.Env)
+	if err := container.generateEnvConfig(env); err != nil {
+		return err
+	}
+
+	if container.Config.WorkingDir != "" {
+		container.Config.WorkingDir = path.Clean(container.Config.WorkingDir)
+
+		pthInfo, err := os.Stat(path.Join(container.basefs, container.Config.WorkingDir))
+		if err != nil {
+			if !os.IsNotExist(err) {
+				return err
+			}
+			if err := os.MkdirAll(path.Join(container.basefs, container.Config.WorkingDir), 0755); err != nil {
+				return err
+			}
+		}
+		if pthInfo != nil && !pthInfo.IsDir() {
+			return fmt.Errorf("Cannot mkdir: %s is not a directory", container.Config.WorkingDir)
+		}
+	}
+
+	envPath, err := container.EnvConfigPath()
+	if err != nil {
+		return err
+	}
+
+	populateCommand(container)
+	container.command.Env = env
+
+	if err := setupMountsForContainer(container, envPath); err != nil {
+		return err
+	}
+
+	// Setup logging of stdout and stderr to disk
+	if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {
+		return err
+	}
+	if err := container.runtime.LogToDisk(container.stderr, container.logPath("json"), "stderr"); err != nil {
+		return err
+	}
+	container.waitLock = make(chan struct{})
+
+	callbackLock := make(chan struct{})
+	callback := func(command *execdriver.Command) {
+		container.State.SetRunning(command.Pid())
+		if command.Tty {
+			// The callback is called after the process Start()
+			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
+			// which we close here.
+			if c, ok := command.Stdout.(io.Closer); ok {
+				c.Close()
+			}
+		}
+		if err := container.ToDisk(); err != nil {
+			utils.Debugf("%s", err)
+		}
+		close(callbackLock)
+	}
+
+	// We use a callback here instead of a goroutine and an chan for
+	// syncronization purposes
+	cErr := utils.Go(func() error { return container.monitor(callback) })
+
+	// Start should not return until the process is actually running
+	select {
+	case <-callbackLock:
+	case err := <-cErr:
+		return err
+	}
+	return nil
+}
+
+func (container *Container) Run() error {
+	if err := container.Start(); err != nil {
+		return err
+	}
+	container.Wait()
+	return nil
+}
+
+func (container *Container) Output() (output []byte, err error) {
+	pipe, err := container.StdoutPipe()
+	if err != nil {
+		return nil, err
+	}
+	defer pipe.Close()
+	if err := container.Start(); err != nil {
+		return nil, err
+	}
+	output, err = ioutil.ReadAll(pipe)
+	container.Wait()
+	return output, err
+}
+
+// Container.StdinPipe returns a WriteCloser which can be used to feed data
+// to the standard input of the container's active process.
+// Container.StdoutPipe and Container.StderrPipe each return a ReadCloser
+// which can be used to retrieve the standard output (and error) generated
+// by the container's active process. The output (and error) are actually
+// copied and delivered to all StdoutPipe and StderrPipe consumers, using
+// a kind of "broadcaster".
+
+func (container *Container) StdinPipe() (io.WriteCloser, error) {
+	return container.stdinPipe, nil
+}
+
+func (container *Container) StdoutPipe() (io.ReadCloser, error) {
+	reader, writer := io.Pipe()
+	container.stdout.AddWriter(writer, "")
+	return utils.NewBufReader(reader), nil
+}
+
+func (container *Container) StderrPipe() (io.ReadCloser, error) {
+	reader, writer := io.Pipe()
+	container.stderr.AddWriter(writer, "")
+	return utils.NewBufReader(reader), nil
+}
+
+func (container *Container) buildHostnameAndHostsFiles(IP string) {
+	container.HostnamePath = path.Join(container.root, "hostname")
+	ioutil.WriteFile(container.HostnamePath, []byte(container.Config.Hostname+"\n"), 0644)
+
+	hostsContent := []byte(`
+127.0.0.1	localhost
+::1		localhost ip6-localhost ip6-loopback
+fe00::0		ip6-localnet
+ff00::0		ip6-mcastprefix
+ff02::1		ip6-allnodes
+ff02::2		ip6-allrouters
+`)
+
+	container.HostsPath = path.Join(container.root, "hosts")
+
+	if container.Config.Domainname != "" {
+		hostsContent = append([]byte(fmt.Sprintf("%s\t%s.%s %s\n", IP, container.Config.Hostname, container.Config.Domainname, container.Config.Hostname)), hostsContent...)
+	} else if !container.Config.NetworkDisabled {
+		hostsContent = append([]byte(fmt.Sprintf("%s\t%s\n", IP, container.Config.Hostname)), hostsContent...)
+	}
+
+	ioutil.WriteFile(container.HostsPath, hostsContent, 0644)
+}
+
+func (container *Container) allocateNetwork() error {
+	if container.Config.NetworkDisabled {
+		return nil
+	}
+
+	var (
+		env *engine.Env
+		err error
+		eng = container.runtime.eng
+	)
+
+	if container.State.IsGhost() {
+		if container.runtime.config.DisableNetwork {
+			env = &engine.Env{}
+		} else {
+			currentIP := container.NetworkSettings.IPAddress
+
+			job := eng.Job("allocate_interface", container.ID)
+			if currentIP != "" {
+				job.Setenv("RequestIP", currentIP)
+			}
+
+			env, err = job.Stdout.AddEnv()
+			if err != nil {
+				return err
+			}
+
+			if err := job.Run(); err != nil {
+				return err
+			}
+		}
+	} else {
+		job := eng.Job("allocate_interface", container.ID)
+		env, err = job.Stdout.AddEnv()
+		if err != nil {
+			return err
+		}
+		if err := job.Run(); err != nil {
+			return err
+		}
+	}
+
+	if container.Config.PortSpecs != nil {
+		utils.Debugf("Migrating port mappings for container: %s", strings.Join(container.Config.PortSpecs, ", "))
+		if err := migratePortMappings(container.Config, container.hostConfig); err != nil {
+			return err
+		}
+		container.Config.PortSpecs = nil
+		if err := container.WriteHostConfig(); err != nil {
+			return err
+		}
+	}
+
+	var (
+		portSpecs = make(nat.PortSet)
+		bindings  = make(nat.PortMap)
+	)
+
+	if !container.State.IsGhost() {
+		if container.Config.ExposedPorts != nil {
+			portSpecs = container.Config.ExposedPorts
+		}
+		if container.hostConfig.PortBindings != nil {
+			bindings = container.hostConfig.PortBindings
+		}
+	} else {
+		if container.NetworkSettings.Ports != nil {
+			for port, binding := range container.NetworkSettings.Ports {
+				portSpecs[port] = struct{}{}
+				bindings[port] = binding
+			}
+		}
+	}
+
+	container.NetworkSettings.PortMapping = nil
+
+	for port := range portSpecs {
+		binding := bindings[port]
+		if container.hostConfig.PublishAllPorts && len(binding) == 0 {
+			binding = append(binding, nat.PortBinding{})
+		}
+
+		for i := 0; i < len(binding); i++ {
+			b := binding[i]
+
+			portJob := eng.Job("allocate_port", container.ID)
+			portJob.Setenv("HostIP", b.HostIp)
+			portJob.Setenv("HostPort", b.HostPort)
+			portJob.Setenv("Proto", port.Proto())
+			portJob.Setenv("ContainerPort", port.Port())
+
+			portEnv, err := portJob.Stdout.AddEnv()
+			if err != nil {
+				return err
+			}
+			if err := portJob.Run(); err != nil {
+				eng.Job("release_interface", container.ID).Run()
+				return err
+			}
+			b.HostIp = portEnv.Get("HostIP")
+			b.HostPort = portEnv.Get("HostPort")
+
+			binding[i] = b
+		}
+		bindings[port] = binding
+	}
+	container.WriteHostConfig()
+
+	container.NetworkSettings.Ports = bindings
+
+	container.NetworkSettings.Bridge = env.Get("Bridge")
+	container.NetworkSettings.IPAddress = env.Get("IP")
+	container.NetworkSettings.IPPrefixLen = env.GetInt("IPPrefixLen")
+	container.NetworkSettings.Gateway = env.Get("Gateway")
+
+	return nil
+}
+
+func (container *Container) releaseNetwork() {
+	if container.Config.NetworkDisabled {
+		return
+	}
+	eng := container.runtime.eng
+
+	eng.Job("release_interface", container.ID).Run()
+	container.NetworkSettings = &NetworkSettings{}
+}
+
+func (container *Container) monitor(callback execdriver.StartCallback) error {
+	var (
+		err      error
+		exitCode int
+	)
+
+	pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
+	exitCode, err = container.runtime.Run(container, pipes, callback)
+	if err != nil {
+		utils.Errorf("Error running container: %s", err)
+	}
+
+	if container.runtime != nil && container.runtime.srv != nil && container.runtime.srv.IsRunning() {
+		container.State.SetStopped(exitCode)
+
+		// FIXME: there is a race condition here which causes this to fail during the unit tests.
+		// If another goroutine was waiting for Wait() to return before removing the container's root
+		// from the filesystem... At this point it may already have done so.
+		// This is because State.setStopped() has already been called, and has caused Wait()
+		// to return.
+		// FIXME: why are we serializing running state to disk in the first place?
+		//log.Printf("%s: Failed to dump configuration to the disk: %s", container.ID, err)
+		if err := container.ToDisk(); err != nil {
+			utils.Errorf("Error dumping container state to disk: %s\n", err)
+		}
+	}
+
+	// Cleanup
+	container.cleanup()
+
+	// Re-create a brand new stdin pipe once the container exited
+	if container.Config.OpenStdin {
+		container.stdin, container.stdinPipe = io.Pipe()
+	}
+
+	if container.runtime != nil && container.runtime.srv != nil {
+		container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image))
+	}
+
+	close(container.waitLock)
+
+	return err
+}
+
+func (container *Container) cleanup() {
+	container.releaseNetwork()
+
+	// Disable all active links
+	if container.activeLinks != nil {
+		for _, link := range container.activeLinks {
+			link.Disable()
+		}
+	}
+	if container.Config.OpenStdin {
+		if err := container.stdin.Close(); err != nil {
+			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
+		}
+	}
+	if err := container.stdout.CloseWriters(); err != nil {
+		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
+	}
+	if err := container.stderr.CloseWriters(); err != nil {
+		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
+	}
+	if container.command != nil && container.command.Terminal != nil {
+		if err := container.command.Terminal.Close(); err != nil {
+			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
+		}
+	}
+
+	if err := container.Unmount(); err != nil {
+		log.Printf("%v: Failed to umount filesystem: %v", container.ID, err)
+	}
+}
+
+func (container *Container) KillSig(sig int) error {
+	container.Lock()
+	defer container.Unlock()
+
+	if !container.State.IsRunning() {
+		return nil
+	}
+	return container.runtime.Kill(container, sig)
+}
+
+func (container *Container) Kill() error {
+	if !container.State.IsRunning() {
+		return nil
+	}
+
+	// 1. Send SIGKILL
+	if err := container.KillSig(9); err != nil {
+		return err
+	}
+
+	// 2. Wait for the process to die, in last resort, try to kill the process directly
+	if err := container.WaitTimeout(10 * time.Second); err != nil {
+		log.Printf("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", utils.TruncateID(container.ID))
+		if err := syscall.Kill(container.State.Pid, 9); err != nil {
+			return err
+		}
+	}
+
+	container.Wait()
+	return nil
+}
+
+func (container *Container) Stop(seconds int) error {
+	if !container.State.IsRunning() {
+		return nil
+	}
+
+	// 1. Send a SIGTERM
+	if err := container.KillSig(15); err != nil {
+		log.Print("Failed to send SIGTERM to the process, force killing")
+		if err := container.KillSig(9); err != nil {
+			return err
+		}
+	}
+
+	// 2. Wait for the process to exit on its own
+	if err := container.WaitTimeout(time.Duration(seconds) * time.Second); err != nil {
+		log.Printf("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
+		// 3. If it doesn't, then send SIGKILL
+		if err := container.Kill(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (container *Container) Restart(seconds int) error {
+	// Avoid unnecessarily unmounting and then directly mounting
+	// the container when the container stops and then starts
+	// again
+	if err := container.Mount(); err == nil {
+		defer container.Unmount()
+	}
+
+	if err := container.Stop(seconds); err != nil {
+		return err
+	}
+	return container.Start()
+}
+
+// Wait blocks until the container stops running, then returns its exit code.
+func (container *Container) Wait() int {
+	<-container.waitLock
+	return container.State.GetExitCode()
+}
+
+func (container *Container) Resize(h, w int) error {
+	return container.command.Terminal.Resize(h, w)
+}
+
+func (container *Container) ExportRw() (archive.Archive, error) {
+	if err := container.Mount(); err != nil {
+		return nil, err
+	}
+	if container.runtime == nil {
+		return nil, fmt.Errorf("Can't load storage driver for unregistered container %s", container.ID)
+	}
+	archive, err := container.runtime.Diff(container)
+	if err != nil {
+		container.Unmount()
+		return nil, err
+	}
+	return utils.NewReadCloserWrapper(archive, func() error {
+			err := archive.Close()
+			container.Unmount()
+			return err
+		}),
+		nil
+}
+
+func (container *Container) Export() (archive.Archive, error) {
+	if err := container.Mount(); err != nil {
+		return nil, err
+	}
+
+	archive, err := archive.Tar(container.basefs, archive.Uncompressed)
+	if err != nil {
+		container.Unmount()
+		return nil, err
+	}
+	return utils.NewReadCloserWrapper(archive, func() error {
+			err := archive.Close()
+			container.Unmount()
+			return err
+		}),
+		nil
+}
+
+func (container *Container) WaitTimeout(timeout time.Duration) error {
+	done := make(chan bool)
+	go func() {
+		container.Wait()
+		done <- true
+	}()
+
+	select {
+	case <-time.After(timeout):
+		return fmt.Errorf("Timed Out")
+	case <-done:
+		return nil
+	}
+}
+
+func (container *Container) Mount() error {
+	return container.runtime.Mount(container)
+}
+
+func (container *Container) Changes() ([]archive.Change, error) {
+	return container.runtime.Changes(container)
+}
+
+func (container *Container) GetImage() (*image.Image, error) {
+	if container.runtime == nil {
+		return nil, fmt.Errorf("Can't get image of unregistered container")
+	}
+	return container.runtime.graph.Get(container.Image)
+}
+
+func (container *Container) Unmount() error {
+	return container.runtime.Unmount(container)
+}
+
+func (container *Container) logPath(name string) string {
+	return path.Join(container.root, fmt.Sprintf("%s-%s.log", container.ID, name))
+}
+
+func (container *Container) ReadLog(name string) (io.Reader, error) {
+	return os.Open(container.logPath(name))
+}
+
+func (container *Container) hostConfigPath() string {
+	return path.Join(container.root, "hostconfig.json")
+}
+
+func (container *Container) jsonPath() string {
+	return path.Join(container.root, "config.json")
+}
+
+func (container *Container) EnvConfigPath() (string, error) {
+	p := path.Join(container.root, "config.env")
+	if _, err := os.Stat(p); err != nil {
+		if os.IsNotExist(err) {
+			f, err := os.Create(p)
+			if err != nil {
+				return "", err
+			}
+			f.Close()
+		} else {
+			return "", err
+		}
+	}
+	return p, nil
+}
+
+// This method must be exported to be used from the lxc template
+// This directory is only usable when the container is running
+func (container *Container) RootfsPath() string {
+	return container.basefs
+}
+
+func validateID(id string) error {
+	if id == "" {
+		return fmt.Errorf("Invalid empty id")
+	}
+	return nil
+}
+
+// GetSize, return real size, virtual size
+func (container *Container) GetSize() (int64, int64) {
+	var (
+		sizeRw, sizeRootfs int64
+		err                error
+		driver             = container.runtime.driver
+	)
+
+	if err := container.Mount(); err != nil {
+		utils.Errorf("Warning: failed to compute size of container rootfs %s: %s", container.ID, err)
+		return sizeRw, sizeRootfs
+	}
+	defer container.Unmount()
+
+	if differ, ok := container.runtime.driver.(graphdriver.Differ); ok {
+		sizeRw, err = differ.DiffSize(container.ID)
+		if err != nil {
+			utils.Errorf("Warning: driver %s couldn't return diff size of container %s: %s", driver, container.ID, err)
+			// FIXME: GetSize should return an error. Not changing it now in case
+			// there is a side-effect.
+			sizeRw = -1
+		}
+	} else {
+		changes, _ := container.Changes()
+		if changes != nil {
+			sizeRw = archive.ChangesSize(container.basefs, changes)
+		} else {
+			sizeRw = -1
+		}
+	}
+
+	if _, err = os.Stat(container.basefs); err != nil {
+		if sizeRootfs, err = utils.TreeSize(container.basefs); err != nil {
+			sizeRootfs = -1
+		}
+	}
+	return sizeRw, sizeRootfs
+}
+
+func (container *Container) Copy(resource string) (io.ReadCloser, error) {
+	if err := container.Mount(); err != nil {
+		return nil, err
+	}
+	var filter []string
+	basePath := path.Join(container.basefs, resource)
+	stat, err := os.Stat(basePath)
+	if err != nil {
+		container.Unmount()
+		return nil, err
+	}
+	if !stat.IsDir() {
+		d, f := path.Split(basePath)
+		basePath = d
+		filter = []string{f}
+	} else {
+		filter = []string{path.Base(basePath)}
+		basePath = path.Dir(basePath)
+	}
+
+	archive, err := archive.TarFilter(basePath, &archive.TarOptions{
+		Compression: archive.Uncompressed,
+		Includes:    filter,
+	})
+	if err != nil {
+		return nil, err
+	}
+	return utils.NewReadCloserWrapper(archive, func() error {
+			err := archive.Close()
+			container.Unmount()
+			return err
+		}),
+		nil
+}
+
+// Returns true if the container exposes a certain port
+func (container *Container) Exposes(p nat.Port) bool {
+	_, exists := container.Config.ExposedPorts[p]
+	return exists
+}
+
+func (container *Container) GetPtyMaster() (*os.File, error) {
+	ttyConsole, ok := container.command.Terminal.(execdriver.TtyTerminal)
+	if !ok {
+		return nil, ErrNoTTY
+	}
+	return ttyConsole.Master(), nil
+}
+
+func (container *Container) HostConfig() *runconfig.HostConfig {
+	return container.hostConfig
+}
+
+func (container *Container) SetHostConfig(hostConfig *runconfig.HostConfig) {
+	container.hostConfig = hostConfig
+}
+
+func (container *Container) DisableLink(name string) {
+	if container.activeLinks != nil {
+		if link, exists := container.activeLinks[name]; exists {
+			link.Disable()
+		} else {
+			utils.Debugf("Could not find active link for %s", name)
+		}
+	}
+}
+
+func (container *Container) setupContainerDns() error {
+	var (
+		config  = container.hostConfig
+		runtime = container.runtime
+	)
+	resolvConf, err := utils.GetResolvConf()
+	if err != nil {
+		return err
+	}
+	// If custom dns exists, then create a resolv.conf for the container
+	if len(config.Dns) > 0 || len(runtime.config.Dns) > 0 || len(config.DnsSearch) > 0 || len(runtime.config.DnsSearch) > 0 {
+		var (
+			dns       = utils.GetNameservers(resolvConf)
+			dnsSearch = utils.GetSearchDomains(resolvConf)
+		)
+		if len(config.Dns) > 0 {
+			dns = config.Dns
+		} else if len(runtime.config.Dns) > 0 {
+			dns = runtime.config.Dns
+		}
+		if len(config.DnsSearch) > 0 {
+			dnsSearch = config.DnsSearch
+		} else if len(runtime.config.DnsSearch) > 0 {
+			dnsSearch = runtime.config.DnsSearch
+		}
+		container.ResolvConfPath = path.Join(container.root, "resolv.conf")
+		f, err := os.Create(container.ResolvConfPath)
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+		for _, dns := range dns {
+			if _, err := f.Write([]byte("nameserver " + dns + "\n")); err != nil {
+				return err
+			}
+		}
+		if len(dnsSearch) > 0 {
+			if _, err := f.Write([]byte("search " + strings.Join(dnsSearch, " ") + "\n")); err != nil {
+				return err
+			}
+		}
+	} else {
+		container.ResolvConfPath = "/etc/resolv.conf"
+	}
+	return nil
+}
diff --git a/runtime/container_unit_test.go b/runtime/container_unit_test.go
new file mode 100644
index 0000000000..fba036ca50
--- /dev/null
+++ b/runtime/container_unit_test.go
@@ -0,0 +1,145 @@
+package runtime
+
+import (
+	"github.com/dotcloud/docker/nat"
+	"testing"
+)
+
+func TestParseNetworkOptsPrivateOnly(t *testing.T) {
+	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::80"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(ports) != 1 {
+		t.Logf("Expected 1 got %d", len(ports))
+		t.FailNow()
+	}
+	if len(bindings) != 1 {
+		t.Logf("Expected 1 got %d", len(bindings))
+		t.FailNow()
+	}
+	for k := range ports {
+		if k.Proto() != "tcp" {
+			t.Logf("Expected tcp got %s", k.Proto())
+			t.Fail()
+		}
+		if k.Port() != "80" {
+			t.Logf("Expected 80 got %s", k.Port())
+			t.Fail()
+		}
+		b, exists := bindings[k]
+		if !exists {
+			t.Log("Binding does not exist")
+			t.FailNow()
+		}
+		if len(b) != 1 {
+			t.Logf("Expected 1 got %d", len(b))
+			t.FailNow()
+		}
+		s := b[0]
+		if s.HostPort != "" {
+			t.Logf("Expected \"\" got %s", s.HostPort)
+			t.Fail()
+		}
+		if s.HostIp != "192.168.1.100" {
+			t.Fail()
+		}
+	}
+}
+
+func TestParseNetworkOptsPublic(t *testing.T) {
+	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100:8080:80"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(ports) != 1 {
+		t.Logf("Expected 1 got %d", len(ports))
+		t.FailNow()
+	}
+	if len(bindings) != 1 {
+		t.Logf("Expected 1 got %d", len(bindings))
+		t.FailNow()
+	}
+	for k := range ports {
+		if k.Proto() != "tcp" {
+			t.Logf("Expected tcp got %s", k.Proto())
+			t.Fail()
+		}
+		if k.Port() != "80" {
+			t.Logf("Expected 80 got %s", k.Port())
+			t.Fail()
+		}
+		b, exists := bindings[k]
+		if !exists {
+			t.Log("Binding does not exist")
+			t.FailNow()
+		}
+		if len(b) != 1 {
+			t.Logf("Expected 1 got %d", len(b))
+			t.FailNow()
+		}
+		s := b[0]
+		if s.HostPort != "8080" {
+			t.Logf("Expected 8080 got %s", s.HostPort)
+			t.Fail()
+		}
+		if s.HostIp != "192.168.1.100" {
+			t.Fail()
+		}
+	}
+}
+
+func TestParseNetworkOptsUdp(t *testing.T) {
+	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::6000/udp"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(ports) != 1 {
+		t.Logf("Expected 1 got %d", len(ports))
+		t.FailNow()
+	}
+	if len(bindings) != 1 {
+		t.Logf("Expected 1 got %d", len(bindings))
+		t.FailNow()
+	}
+	for k := range ports {
+		if k.Proto() != "udp" {
+			t.Logf("Expected udp got %s", k.Proto())
+			t.Fail()
+		}
+		if k.Port() != "6000" {
+			t.Logf("Expected 6000 got %s", k.Port())
+			t.Fail()
+		}
+		b, exists := bindings[k]
+		if !exists {
+			t.Log("Binding does not exist")
+			t.FailNow()
+		}
+		if len(b) != 1 {
+			t.Logf("Expected 1 got %d", len(b))
+			t.FailNow()
+		}
+		s := b[0]
+		if s.HostPort != "" {
+			t.Logf("Expected \"\" got %s", s.HostPort)
+			t.Fail()
+		}
+		if s.HostIp != "192.168.1.100" {
+			t.Fail()
+		}
+	}
+}
+
+func TestGetFullName(t *testing.T) {
+	name, err := GetFullContainerName("testing")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if name != "/testing" {
+		t.Fatalf("Expected /testing got %s", name)
+	}
+	if _, err := GetFullContainerName(""); err == nil {
+		t.Fatal("Error should not be nil")
+	}
+}
diff --git a/runtime/execdriver/MAINTAINERS b/runtime/execdriver/MAINTAINERS
new file mode 100644
index 0000000000..1cb551364d
--- /dev/null
+++ b/runtime/execdriver/MAINTAINERS
@@ -0,0 +1,2 @@
+Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
+Guillaume J. Charmes <guillaume@docker.com> (@creack)
diff --git a/runtime/execdriver/driver.go b/runtime/execdriver/driver.go
new file mode 100644
index 0000000000..27a575cb3a
--- /dev/null
+++ b/runtime/execdriver/driver.go
@@ -0,0 +1,144 @@
+package execdriver
+
+import (
+	"errors"
+	"io"
+	"os"
+	"os/exec"
+)
+
+// Context is a generic key value pair that allows
+// arbatrary data to be sent
+type Context map[string]string
+
+var (
+	ErrNotRunning              = errors.New("Process could not be started")
+	ErrWaitTimeoutReached      = errors.New("Wait timeout reached")
+	ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function")
+	ErrDriverNotFound          = errors.New("The requested docker init has not been found")
+)
+
+var dockerInitFcts map[string]InitFunc
+
+type (
+	StartCallback func(*Command)
+	InitFunc      func(i *InitArgs) error
+)
+
+func RegisterInitFunc(name string, fct InitFunc) error {
+	if dockerInitFcts == nil {
+		dockerInitFcts = make(map[string]InitFunc)
+	}
+	if _, ok := dockerInitFcts[name]; ok {
+		return ErrDriverAlreadyRegistered
+	}
+	dockerInitFcts[name] = fct
+	return nil
+}
+
+func GetInitFunc(name string) (InitFunc, error) {
+	fct, ok := dockerInitFcts[name]
+	if !ok {
+		return nil, ErrDriverNotFound
+	}
+	return fct, nil
+}
+
+// Args provided to the init function for a driver
+type InitArgs struct {
+	User       string
+	Gateway    string
+	Ip         string
+	WorkDir    string
+	Privileged bool
+	Env        []string
+	Args       []string
+	Mtu        int
+	Driver     string
+	Console    string
+	Pipe       int
+	Root       string
+}
+
+// Driver specific information based on
+// processes registered with the driver
+type Info interface {
+	IsRunning() bool
+}
+
+// Terminal in an interface for drivers to implement
+// if they want to support Close and Resize calls from
+// the core
+type Terminal interface {
+	io.Closer
+	Resize(height, width int) error
+}
+
+type TtyTerminal interface {
+	Master() *os.File
+}
+
+type Driver interface {
+	Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
+	Kill(c *Command, sig int) error
+	Name() string                                 // Driver name
+	Info(id string) Info                          // "temporary" hack (until we move state from core to plugins)
+	GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.
+	Terminate(c *Command) error                   // kill it with fire
+}
+
+// Network settings of the container
+type Network struct {
+	Interface *NetworkInterface `json:"interface"` // if interface is nil then networking is disabled
+	Mtu       int               `json:"mtu"`
+}
+
+type NetworkInterface struct {
+	Gateway     string `json:"gateway"`
+	IPAddress   string `json:"ip"`
+	Bridge      string `json:"bridge"`
+	IPPrefixLen int    `json:"ip_prefix_len"`
+}
+
+type Resources struct {
+	Memory     int64 `json:"memory"`
+	MemorySwap int64 `json:"memory_swap"`
+	CpuShares  int64 `json:"cpu_shares"`
+}
+
+type Mount struct {
+	Source      string `json:"source"`
+	Destination string `json:"destination"`
+	Writable    bool   `json:"writable"`
+	Private     bool   `json:"private"`
+}
+
+// Process wrapps an os/exec.Cmd to add more metadata
+type Command struct {
+	exec.Cmd `json:"-"`
+
+	ID         string              `json:"id"`
+	Privileged bool                `json:"privileged"`
+	User       string              `json:"user"`
+	Rootfs     string              `json:"rootfs"`   // root fs of the container
+	InitPath   string              `json:"initpath"` // dockerinit
+	Entrypoint string              `json:"entrypoint"`
+	Arguments  []string            `json:"arguments"`
+	WorkingDir string              `json:"working_dir"`
+	ConfigPath string              `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
+	Tty        bool                `json:"tty"`
+	Network    *Network            `json:"network"`
+	Config     map[string][]string `json:"config"` //  generic values that specific drivers can consume
+	Resources  *Resources          `json:"resources"`
+	Mounts     []Mount             `json:"mounts"`
+
+	Terminal     Terminal `json:"-"`             // standard or tty terminal
+	Console      string   `json:"-"`             // dev/console path
+	ContainerPid int      `json:"container_pid"` // the pid for the process inside a container
+}
+
+// Return the pid of the process
+// If the process is nil -1 will be returned
+func (c *Command) Pid() int {
+	return c.ContainerPid
+}
diff --git a/runtime/execdriver/execdrivers/execdrivers.go b/runtime/execdriver/execdrivers/execdrivers.go
new file mode 100644
index 0000000000..9e277c86df
--- /dev/null
+++ b/runtime/execdriver/execdrivers/execdrivers.go
@@ -0,0 +1,23 @@
+package execdrivers
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/pkg/sysinfo"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"github.com/dotcloud/docker/runtime/execdriver/lxc"
+	"github.com/dotcloud/docker/runtime/execdriver/native"
+	"path"
+)
+
+func NewDriver(name, root, initPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
+	switch name {
+	case "lxc":
+		// we want to five the lxc driver the full docker root because it needs
+		// to access and write config and template files in /var/lib/docker/containers/*
+		// to be backwards compatible
+		return lxc.NewDriver(root, sysInfo.AppArmor)
+	case "native":
+		return native.NewDriver(path.Join(root, "execdriver", "native"), initPath)
+	}
+	return nil, fmt.Errorf("unknown exec driver %s", name)
+}
diff --git a/runtime/execdriver/lxc/driver.go b/runtime/execdriver/lxc/driver.go
new file mode 100644
index 0000000000..ef16dcc380
--- /dev/null
+++ b/runtime/execdriver/lxc/driver.go
@@ -0,0 +1,418 @@
+package lxc
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/pkg/cgroups"
+	"github.com/dotcloud/docker/pkg/label"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"github.com/dotcloud/docker/utils"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+	"time"
+)
+
+const DriverName = "lxc"
+
+func init() {
+	execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
+		if err := setupEnv(args); err != nil {
+			return err
+		}
+
+		if err := setupHostname(args); err != nil {
+			return err
+		}
+
+		if err := setupNetworking(args); err != nil {
+			return err
+		}
+
+		if err := setupCapabilities(args); err != nil {
+			return err
+		}
+
+		if err := setupWorkingDirectory(args); err != nil {
+			return err
+		}
+
+		if err := changeUser(args); err != nil {
+			return err
+		}
+
+		path, err := exec.LookPath(args.Args[0])
+		if err != nil {
+			log.Printf("Unable to locate %v", args.Args[0])
+			os.Exit(127)
+		}
+		if err := syscall.Exec(path, args.Args, os.Environ()); err != nil {
+			return fmt.Errorf("dockerinit unable to execute %s - %s", path, err)
+		}
+		panic("Unreachable")
+	})
+}
+
+type driver struct {
+	root       string // root path for the driver to use
+	apparmor   bool
+	sharedRoot bool
+}
+
+func NewDriver(root string, apparmor bool) (*driver, error) {
+	// setup unconfined symlink
+	if err := linkLxcStart(root); err != nil {
+		return nil, err
+	}
+	return &driver{
+		apparmor:   apparmor,
+		root:       root,
+		sharedRoot: rootIsShared(),
+	}, nil
+}
+
+func (d *driver) Name() string {
+	version := d.version()
+	return fmt.Sprintf("%s-%s", DriverName, version)
+}
+
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+	if err := execdriver.SetTerminal(c, pipes); err != nil {
+		return -1, err
+	}
+	configPath, err := d.generateLXCConfig(c)
+	if err != nil {
+		return -1, err
+	}
+	params := []string{
+		"lxc-start",
+		"-n", c.ID,
+		"-f", configPath,
+		"--",
+		c.InitPath,
+		"-driver",
+		DriverName,
+	}
+
+	if c.Network.Interface != nil {
+		params = append(params,
+			"-g", c.Network.Interface.Gateway,
+			"-i", fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
+		)
+	}
+	params = append(params,
+		"-mtu", strconv.Itoa(c.Network.Mtu),
+	)
+
+	if c.User != "" {
+		params = append(params, "-u", c.User)
+	}
+
+	if c.Privileged {
+		if d.apparmor {
+			params[0] = path.Join(d.root, "lxc-start-unconfined")
+
+		}
+		params = append(params, "-privileged")
+	}
+
+	if c.WorkingDir != "" {
+		params = append(params, "-w", c.WorkingDir)
+	}
+
+	params = append(params, "--", c.Entrypoint)
+	params = append(params, c.Arguments...)
+
+	if d.sharedRoot {
+		// lxc-start really needs / to be non-shared, or all kinds of stuff break
+		// when lxc-start unmount things and those unmounts propagate to the main
+		// mount namespace.
+		// What we really want is to clone into a new namespace and then
+		// mount / MS_REC|MS_SLAVE, but since we can't really clone or fork
+		// without exec in go we have to do this horrible shell hack...
+		shellString :=
+			"mount --make-rslave /; exec " +
+				utils.ShellQuoteArguments(params)
+
+		params = []string{
+			"unshare", "-m", "--", "/bin/sh", "-c", shellString,
+		}
+	}
+
+	var (
+		name = params[0]
+		arg  = params[1:]
+	)
+	aname, err := exec.LookPath(name)
+	if err != nil {
+		aname = name
+	}
+	c.Path = aname
+	c.Args = append([]string{name}, arg...)
+
+	if err := c.Start(); err != nil {
+		return -1, err
+	}
+
+	var (
+		waitErr  error
+		waitLock = make(chan struct{})
+	)
+	go func() {
+		if err := c.Wait(); err != nil {
+			if _, ok := err.(*exec.ExitError); !ok { // Do not propagate the error if it's simply a status code != 0
+				waitErr = err
+			}
+		}
+		close(waitLock)
+	}()
+
+	// Poll lxc for RUNNING status
+	pid, err := d.waitForStart(c, waitLock)
+	if err != nil {
+		if c.Process != nil {
+			c.Process.Kill()
+		}
+		return -1, err
+	}
+	c.ContainerPid = pid
+
+	if startCallback != nil {
+		startCallback(c)
+	}
+
+	<-waitLock
+
+	return getExitCode(c), waitErr
+}
+
+/// Return the exit code of the process
+// if the process has not exited -1 will be returned
+func getExitCode(c *execdriver.Command) int {
+	if c.ProcessState == nil {
+		return -1
+	}
+	return c.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+}
+
+func (d *driver) Kill(c *execdriver.Command, sig int) error {
+	return KillLxc(c.ID, sig)
+}
+
+func (d *driver) Terminate(c *execdriver.Command) error {
+	return KillLxc(c.ID, 9)
+}
+
+func (d *driver) version() string {
+	var (
+		version string
+		output  []byte
+		err     error
+	)
+	if _, errPath := exec.LookPath("lxc-version"); errPath == nil {
+		output, err = exec.Command("lxc-version").CombinedOutput()
+	} else {
+		output, err = exec.Command("lxc-start", "--version").CombinedOutput()
+	}
+	if err == nil {
+		version = strings.TrimSpace(string(output))
+		if parts := strings.SplitN(version, ":", 2); len(parts) == 2 {
+			version = strings.TrimSpace(parts[1])
+		}
+	}
+	return version
+}
+
+func KillLxc(id string, sig int) error {
+	var (
+		err    error
+		output []byte
+	)
+	_, err = exec.LookPath("lxc-kill")
+	if err == nil {
+		output, err = exec.Command("lxc-kill", "-n", id, strconv.Itoa(sig)).CombinedOutput()
+	} else {
+		output, err = exec.Command("lxc-stop", "-k", "-n", id, strconv.Itoa(sig)).CombinedOutput()
+	}
+	if err != nil {
+		return fmt.Errorf("Err: %s Output: %s", err, output)
+	}
+	return nil
+}
+
+// wait for the process to start and return the pid for the process
+func (d *driver) waitForStart(c *execdriver.Command, waitLock chan struct{}) (int, error) {
+	var (
+		err    error
+		output []byte
+	)
+	// We wait for the container to be fully running.
+	// Timeout after 5 seconds. In case of broken pipe, just retry.
+	// Note: The container can run and finish correctly before
+	// the end of this loop
+	for now := time.Now(); time.Since(now) < 5*time.Second; {
+		select {
+		case <-waitLock:
+			// If the process dies while waiting for it, just return
+			return -1, nil
+		default:
+		}
+
+		output, err = d.getInfo(c.ID)
+		if err != nil {
+			output, err = d.getInfo(c.ID)
+			if err != nil {
+				return -1, err
+			}
+		}
+		info, err := parseLxcInfo(string(output))
+		if err != nil {
+			return -1, err
+		}
+		if info.Running {
+			return info.Pid, nil
+		}
+		time.Sleep(50 * time.Millisecond)
+	}
+	return -1, execdriver.ErrNotRunning
+}
+
+func (d *driver) getInfo(id string) ([]byte, error) {
+	return exec.Command("lxc-info", "-n", id).CombinedOutput()
+}
+
+type info struct {
+	ID     string
+	driver *driver
+}
+
+func (i *info) IsRunning() bool {
+	var running bool
+
+	output, err := i.driver.getInfo(i.ID)
+	if err != nil {
+		utils.Errorf("Error getting info for lxc container %s: %s (%s)", i.ID, err, output)
+		return false
+	}
+	if strings.Contains(string(output), "RUNNING") {
+		running = true
+	}
+	return running
+}
+
+func (d *driver) Info(id string) execdriver.Info {
+	return &info{
+		ID:     id,
+		driver: d,
+	}
+}
+
+func (d *driver) GetPidsForContainer(id string) ([]int, error) {
+	pids := []int{}
+
+	// cpu is chosen because it is the only non optional subsystem in cgroups
+	subsystem := "cpu"
+	cgroupRoot, err := cgroups.FindCgroupMountpoint(subsystem)
+	if err != nil {
+		return pids, err
+	}
+
+	cgroupDir, err := cgroups.GetThisCgroupDir(subsystem)
+	if err != nil {
+		return pids, err
+	}
+
+	filename := filepath.Join(cgroupRoot, cgroupDir, id, "tasks")
+	if _, err := os.Stat(filename); os.IsNotExist(err) {
+		// With more recent lxc versions use, cgroup will be in lxc/
+		filename = filepath.Join(cgroupRoot, cgroupDir, "lxc", id, "tasks")
+	}
+
+	output, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return pids, err
+	}
+	for _, p := range strings.Split(string(output), "\n") {
+		if len(p) == 0 {
+			continue
+		}
+		pid, err := strconv.Atoi(p)
+		if err != nil {
+			return pids, fmt.Errorf("Invalid pid '%s': %s", p, err)
+		}
+		pids = append(pids, pid)
+	}
+	return pids, nil
+}
+
+func linkLxcStart(root string) error {
+	sourcePath, err := exec.LookPath("lxc-start")
+	if err != nil {
+		return err
+	}
+	targetPath := path.Join(root, "lxc-start-unconfined")
+
+	if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) {
+		return err
+	} else if err == nil {
+		if err := os.Remove(targetPath); err != nil {
+			return err
+		}
+	}
+	return os.Symlink(sourcePath, targetPath)
+}
+
+// TODO: This can be moved to the mountinfo reader in the mount pkg
+func rootIsShared() bool {
+	if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil {
+		for _, line := range strings.Split(string(data), "\n") {
+			cols := strings.Split(line, " ")
+			if len(cols) >= 6 && cols[4] == "/" {
+				return strings.HasPrefix(cols[6], "shared")
+			}
+		}
+	}
+
+	// No idea, probably safe to assume so
+	return true
+}
+
+func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
+	var (
+		process, mount string
+		root           = path.Join(d.root, "containers", c.ID, "config.lxc")
+		labels         = c.Config["label"]
+	)
+	fo, err := os.Create(root)
+	if err != nil {
+		return "", err
+	}
+	defer fo.Close()
+
+	if len(labels) > 0 {
+		process, mount, err = label.GenLabels(labels[0])
+		if err != nil {
+			return "", err
+		}
+	}
+
+	if err := LxcTemplateCompiled.Execute(fo, struct {
+		*execdriver.Command
+		AppArmor     bool
+		ProcessLabel string
+		MountLabel   string
+	}{
+		Command:      c,
+		AppArmor:     d.apparmor,
+		ProcessLabel: process,
+		MountLabel:   mount,
+	}); err != nil {
+		return "", err
+	}
+	return root, nil
+}
diff --git a/runtime/execdriver/lxc/info.go b/runtime/execdriver/lxc/info.go
new file mode 100644
index 0000000000..27b4c58604
--- /dev/null
+++ b/runtime/execdriver/lxc/info.go
@@ -0,0 +1,50 @@
+package lxc
+
+import (
+	"bufio"
+	"errors"
+	"strconv"
+	"strings"
+)
+
+var (
+	ErrCannotParse = errors.New("cannot parse raw input")
+)
+
+type lxcInfo struct {
+	Running bool
+	Pid     int
+}
+
+func parseLxcInfo(raw string) (*lxcInfo, error) {
+	if raw == "" {
+		return nil, ErrCannotParse
+	}
+	var (
+		err  error
+		s    = bufio.NewScanner(strings.NewReader(raw))
+		info = &lxcInfo{}
+	)
+	for s.Scan() {
+		text := s.Text()
+
+		if s.Err() != nil {
+			return nil, s.Err()
+		}
+
+		parts := strings.Split(text, ":")
+		if len(parts) < 2 {
+			continue
+		}
+		switch strings.ToLower(strings.TrimSpace(parts[0])) {
+		case "state":
+			info.Running = strings.TrimSpace(parts[1]) == "RUNNING"
+		case "pid":
+			info.Pid, err = strconv.Atoi(strings.TrimSpace(parts[1]))
+			if err != nil {
+				return nil, err
+			}
+		}
+	}
+	return info, nil
+}
diff --git a/runtime/execdriver/lxc/info_test.go b/runtime/execdriver/lxc/info_test.go
new file mode 100644
index 0000000000..edafc02511
--- /dev/null
+++ b/runtime/execdriver/lxc/info_test.go
@@ -0,0 +1,36 @@
+package lxc
+
+import (
+	"testing"
+)
+
+func TestParseRunningInfo(t *testing.T) {
+	raw := `
+    state: RUNNING
+    pid:    50`
+
+	info, err := parseLxcInfo(raw)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !info.Running {
+		t.Fatal("info should return a running state")
+	}
+	if info.Pid != 50 {
+		t.Fatalf("info should have pid 50 got %d", info.Pid)
+	}
+}
+
+func TestEmptyInfo(t *testing.T) {
+	_, err := parseLxcInfo("")
+	if err == nil {
+		t.Fatal("error should not be nil")
+	}
+}
+
+func TestBadInfo(t *testing.T) {
+	_, err := parseLxcInfo("state")
+	if err != nil {
+		t.Fatal(err)
+	}
+}
diff --git a/runtime/execdriver/lxc/init.go b/runtime/execdriver/lxc/init.go
new file mode 100644
index 0000000000..c1933a5e43
--- /dev/null
+++ b/runtime/execdriver/lxc/init.go
@@ -0,0 +1,175 @@
+package lxc
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/dotcloud/docker/pkg/netlink"
+	"github.com/dotcloud/docker/pkg/user"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"github.com/syndtr/gocapability/capability"
+	"io/ioutil"
+	"net"
+	"os"
+	"strings"
+	"syscall"
+)
+
+// Clear environment pollution introduced by lxc-start
+func setupEnv(args *execdriver.InitArgs) error {
+	// Get env
+	var env []string
+	content, err := ioutil.ReadFile(".dockerenv")
+	if err != nil {
+		return fmt.Errorf("Unable to load environment variables: %v", err)
+	}
+	if err := json.Unmarshal(content, &env); err != nil {
+		return fmt.Errorf("Unable to unmarshal environment variables: %v", err)
+	}
+	// Propagate the plugin-specific container env variable
+	env = append(env, "container="+os.Getenv("container"))
+
+	args.Env = env
+
+	os.Clearenv()
+	for _, kv := range args.Env {
+		parts := strings.SplitN(kv, "=", 2)
+		if len(parts) == 1 {
+			parts = append(parts, "")
+		}
+		os.Setenv(parts[0], parts[1])
+	}
+
+	return nil
+}
+
+func setupHostname(args *execdriver.InitArgs) error {
+	hostname := getEnv(args, "HOSTNAME")
+	if hostname == "" {
+		return nil
+	}
+	return setHostname(hostname)
+}
+
+// Setup networking
+func setupNetworking(args *execdriver.InitArgs) error {
+	if args.Ip != "" {
+		// eth0
+		iface, err := net.InterfaceByName("eth0")
+		if err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		ip, ipNet, err := net.ParseCIDR(args.Ip)
+		if err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		if err := netlink.NetworkSetMTU(iface, args.Mtu); err != nil {
+			return fmt.Errorf("Unable to set MTU: %v", err)
+		}
+		if err := netlink.NetworkLinkUp(iface); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+
+		// loopback
+		iface, err = net.InterfaceByName("lo")
+		if err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+		if err := netlink.NetworkLinkUp(iface); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+	}
+	if args.Gateway != "" {
+		gw := net.ParseIP(args.Gateway)
+		if gw == nil {
+			return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.Gateway)
+		}
+
+		if err := netlink.AddDefaultGw(gw); err != nil {
+			return fmt.Errorf("Unable to set up networking: %v", err)
+		}
+	}
+
+	return nil
+}
+
+// Setup working directory
+func setupWorkingDirectory(args *execdriver.InitArgs) error {
+	if args.WorkDir == "" {
+		return nil
+	}
+	if err := syscall.Chdir(args.WorkDir); err != nil {
+		return fmt.Errorf("Unable to change dir to %v: %v", args.WorkDir, err)
+	}
+	return nil
+}
+
+// Takes care of dropping privileges to the desired user
+func changeUser(args *execdriver.InitArgs) error {
+	uid, gid, suppGids, err := user.GetUserGroupSupplementary(
+		args.User,
+		syscall.Getuid(), syscall.Getgid(),
+	)
+	if err != nil {
+		return err
+	}
+
+	if err := syscall.Setgroups(suppGids); err != nil {
+		return fmt.Errorf("Setgroups failed: %v", err)
+	}
+	if err := syscall.Setgid(gid); err != nil {
+		return fmt.Errorf("Setgid failed: %v", err)
+	}
+	if err := syscall.Setuid(uid); err != nil {
+		return fmt.Errorf("Setuid failed: %v", err)
+	}
+
+	return nil
+}
+
+func setupCapabilities(args *execdriver.InitArgs) error {
+	if args.Privileged {
+		return nil
+	}
+
+	drop := []capability.Cap{
+		capability.CAP_SETPCAP,
+		capability.CAP_SYS_MODULE,
+		capability.CAP_SYS_RAWIO,
+		capability.CAP_SYS_PACCT,
+		capability.CAP_SYS_ADMIN,
+		capability.CAP_SYS_NICE,
+		capability.CAP_SYS_RESOURCE,
+		capability.CAP_SYS_TIME,
+		capability.CAP_SYS_TTY_CONFIG,
+		capability.CAP_AUDIT_WRITE,
+		capability.CAP_AUDIT_CONTROL,
+		capability.CAP_MAC_OVERRIDE,
+		capability.CAP_MAC_ADMIN,
+		capability.CAP_NET_ADMIN,
+	}
+
+	c, err := capability.NewPid(os.Getpid())
+	if err != nil {
+		return err
+	}
+
+	c.Unset(capability.CAPS|capability.BOUNDS, drop...)
+
+	if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil {
+		return err
+	}
+	return nil
+}
+
+func getEnv(args *execdriver.InitArgs, key string) string {
+	for _, kv := range args.Env {
+		parts := strings.SplitN(kv, "=", 2)
+		if parts[0] == key && len(parts) == 2 {
+			return parts[1]
+		}
+	}
+	return ""
+}
diff --git a/runtime/execdriver/lxc/lxc_init_linux.go b/runtime/execdriver/lxc/lxc_init_linux.go
new file mode 100644
index 0000000000..7288f5877b
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_init_linux.go
@@ -0,0 +1,11 @@
+// +build amd64
+
+package lxc
+
+import (
+	"syscall"
+)
+
+func setHostname(hostname string) error {
+	return syscall.Sethostname([]byte(hostname))
+}
diff --git a/runtime/execdriver/lxc/lxc_init_unsupported.go b/runtime/execdriver/lxc/lxc_init_unsupported.go
new file mode 100644
index 0000000000..d68cb91a1e
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_init_unsupported.go
@@ -0,0 +1,7 @@
+// +build !linux !amd64
+
+package lxc
+
+func setHostname(hostname string) error {
+	panic("Not supported on darwin")
+}
diff --git a/runtime/execdriver/lxc/lxc_template.go b/runtime/execdriver/lxc/lxc_template.go
new file mode 100644
index 0000000000..c49753c6aa
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_template.go
@@ -0,0 +1,176 @@
+package lxc
+
+import (
+	"github.com/dotcloud/docker/pkg/label"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"strings"
+	"text/template"
+)
+
+const LxcTemplate = `
+{{if .Network.Interface}}
+# network configuration
+lxc.network.type = veth
+lxc.network.link = {{.Network.Interface.Bridge}}
+lxc.network.name = eth0
+{{else}}
+# network is disabled (-n=false)
+lxc.network.type = empty
+lxc.network.flags = up
+{{end}}
+lxc.network.mtu = {{.Network.Mtu}}
+
+# root filesystem
+{{$ROOTFS := .Rootfs}}
+lxc.rootfs = {{$ROOTFS}}
+
+# use a dedicated pts for the container (and limit the number of pseudo terminal
+# available)
+lxc.pts = 1024
+
+# disable the main console
+lxc.console = none
+{{if .ProcessLabel}}
+lxc.se_context = {{ .ProcessLabel}}
+{{end}}
+{{$MOUNTLABEL := .MountLabel}}
+
+# no controlling tty at all
+lxc.tty = 1
+
+{{if .Privileged}}
+lxc.cgroup.devices.allow = a
+{{else}}
+# no implicit access to devices
+lxc.cgroup.devices.deny = a
+
+# but allow mknod for any device
+lxc.cgroup.devices.allow = c *:* m
+lxc.cgroup.devices.allow = b *:* m
+
+# /dev/null and zero
+lxc.cgroup.devices.allow = c 1:3 rwm
+lxc.cgroup.devices.allow = c 1:5 rwm
+
+# consoles
+lxc.cgroup.devices.allow = c 5:1 rwm
+lxc.cgroup.devices.allow = c 5:0 rwm
+lxc.cgroup.devices.allow = c 4:0 rwm
+lxc.cgroup.devices.allow = c 4:1 rwm
+
+# /dev/urandom,/dev/random
+lxc.cgroup.devices.allow = c 1:9 rwm
+lxc.cgroup.devices.allow = c 1:8 rwm
+
+# /dev/pts/ - pts namespaces are "coming soon"
+lxc.cgroup.devices.allow = c 136:* rwm
+lxc.cgroup.devices.allow = c 5:2 rwm
+
+# tuntap
+lxc.cgroup.devices.allow = c 10:200 rwm
+
+# fuse
+#lxc.cgroup.devices.allow = c 10:229 rwm
+
+# rtc
+#lxc.cgroup.devices.allow = c 254:0 rwm
+{{end}}
+
+# standard mount point
+# Use mnt.putold as per https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/986385
+lxc.pivotdir = lxc_putold
+
+# NOTICE: These mounts must be applied within the namespace
+
+#  WARNING: procfs is a known attack vector and should probably be disabled
+#           if your userspace allows it. eg. see http://blog.zx2c4.com/749
+lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0
+
+# WARNING: sysfs is a known attack vector and should probably be disabled
+# if your userspace allows it. eg. see http://bit.ly/T9CkqJ
+lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
+
+{{if .Tty}}
+lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0
+{{end}}
+
+lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts {{formatMountLabel "newinstance,ptmxmode=0666,nosuid,noexec" $MOUNTLABEL}} 0 0
+lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs {{formatMountLabel "size=65536k,nosuid,nodev,noexec" $MOUNTLABEL}} 0 0
+
+{{range $value := .Mounts}}
+{{if $value.Writable}}
+lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,rw 0 0
+{{else}}
+lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,ro 0 0
+{{end}}
+{{end}}
+
+{{if .Privileged}}
+{{if .AppArmor}}
+lxc.aa_profile = unconfined
+{{else}}
+#lxc.aa_profile = unconfined
+{{end}}
+{{end}}
+
+# limits
+{{if .Resources}}
+{{if .Resources.Memory}}
+lxc.cgroup.memory.limit_in_bytes = {{.Resources.Memory}}
+lxc.cgroup.memory.soft_limit_in_bytes = {{.Resources.Memory}}
+{{with $memSwap := getMemorySwap .Resources}}
+lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}}
+{{end}}
+{{end}}
+{{if .Resources.CpuShares}}
+lxc.cgroup.cpu.shares = {{.Resources.CpuShares}}
+{{end}}
+{{end}}
+
+{{if .Config.lxc}}
+{{range $value := .Config.lxc}}
+lxc.{{$value}}
+{{end}}
+{{end}}
+`
+
+var LxcTemplateCompiled *template.Template
+
+// Escape spaces in strings according to the fstab documentation, which is the
+// format for "lxc.mount.entry" lines in lxc.conf. See also "man 5 fstab".
+func escapeFstabSpaces(field string) string {
+	return strings.Replace(field, " ", "\\040", -1)
+}
+
+func getMemorySwap(v *execdriver.Resources) int64 {
+	// By default, MemorySwap is set to twice the size of RAM.
+	// If you want to omit MemorySwap, set it to `-1'.
+	if v.MemorySwap < 0 {
+		return 0
+	}
+	return v.Memory * 2
+}
+
+func getLabel(c map[string][]string, name string) string {
+	label := c["label"]
+	for _, l := range label {
+		parts := strings.SplitN(l, "=", 2)
+		if strings.TrimSpace(parts[0]) == name {
+			return strings.TrimSpace(parts[1])
+		}
+	}
+	return ""
+}
+
+func init() {
+	var err error
+	funcMap := template.FuncMap{
+		"getMemorySwap":     getMemorySwap,
+		"escapeFstabSpaces": escapeFstabSpaces,
+		"formatMountLabel":  label.FormatMountLabel,
+	}
+	LxcTemplateCompiled, err = template.New("lxc").Funcs(funcMap).Parse(LxcTemplate)
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/runtime/execdriver/lxc/lxc_template_unit_test.go b/runtime/execdriver/lxc/lxc_template_unit_test.go
new file mode 100644
index 0000000000..7f473a0502
--- /dev/null
+++ b/runtime/execdriver/lxc/lxc_template_unit_test.go
@@ -0,0 +1,135 @@
+package lxc
+
+import (
+	"bufio"
+	"fmt"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"io/ioutil"
+	"math/rand"
+	"os"
+	"path"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestLXCConfig(t *testing.T) {
+	root, err := ioutil.TempDir("", "TestLXCConfig")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(root)
+
+	os.MkdirAll(path.Join(root, "containers", "1"), 0777)
+
+	// Memory is allocated randomly for testing
+	rand.Seed(time.Now().UTC().UnixNano())
+	var (
+		memMin = 33554432
+		memMax = 536870912
+		mem    = memMin + rand.Intn(memMax-memMin)
+		cpuMin = 100
+		cpuMax = 10000
+		cpu    = cpuMin + rand.Intn(cpuMax-cpuMin)
+	)
+
+	driver, err := NewDriver(root, false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	command := &execdriver.Command{
+		ID: "1",
+		Resources: &execdriver.Resources{
+			Memory:    int64(mem),
+			CpuShares: int64(cpu),
+		},
+		Network: &execdriver.Network{
+			Mtu:       1500,
+			Interface: nil,
+		},
+	}
+	p, err := driver.generateLXCConfig(command)
+	if err != nil {
+		t.Fatal(err)
+	}
+	grepFile(t, p,
+		fmt.Sprintf("lxc.cgroup.memory.limit_in_bytes = %d", mem))
+
+	grepFile(t, p,
+		fmt.Sprintf("lxc.cgroup.memory.memsw.limit_in_bytes = %d", mem*2))
+}
+
+func TestCustomLxcConfig(t *testing.T) {
+	root, err := ioutil.TempDir("", "TestCustomLxcConfig")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(root)
+
+	os.MkdirAll(path.Join(root, "containers", "1"), 0777)
+
+	driver, err := NewDriver(root, false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	command := &execdriver.Command{
+		ID:         "1",
+		Privileged: false,
+		Config: map[string][]string{
+			"lxc": {
+				"lxc.utsname = docker",
+				"lxc.cgroup.cpuset.cpus = 0,1",
+			},
+		},
+		Network: &execdriver.Network{
+			Mtu:       1500,
+			Interface: nil,
+		},
+	}
+
+	p, err := driver.generateLXCConfig(command)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	grepFile(t, p, "lxc.utsname = docker")
+	grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
+}
+
+func grepFile(t *testing.T, path string, pattern string) {
+	f, err := os.Open(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+	r := bufio.NewReader(f)
+	var (
+		line string
+	)
+	err = nil
+	for err == nil {
+		line, err = r.ReadString('\n')
+		if strings.Contains(line, pattern) == true {
+			return
+		}
+	}
+	t.Fatalf("grepFile: pattern \"%s\" not found in \"%s\"", pattern, path)
+}
+
+func TestEscapeFstabSpaces(t *testing.T) {
+	var testInputs = map[string]string{
+		" ":                      "\\040",
+		"":                       "",
+		"/double  space":         "/double\\040\\040space",
+		"/some long test string": "/some\\040long\\040test\\040string",
+		"/var/lib/docker":        "/var/lib/docker",
+		" leading":               "\\040leading",
+		"trailing ":              "trailing\\040",
+	}
+	for in, exp := range testInputs {
+		if out := escapeFstabSpaces(in); exp != out {
+			t.Logf("Expected %s got %s", exp, out)
+			t.Fail()
+		}
+	}
+}
diff --git a/runtime/execdriver/native/configuration/parse.go b/runtime/execdriver/native/configuration/parse.go
new file mode 100644
index 0000000000..6d6c643919
--- /dev/null
+++ b/runtime/execdriver/native/configuration/parse.go
@@ -0,0 +1,186 @@
+package configuration
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/utils"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
+)
+
+type Action func(*libcontainer.Container, interface{}, string) error
+
+var actions = map[string]Action{
+	"cap.add":  addCap,  // add a cap
+	"cap.drop": dropCap, // drop a cap
+
+	"ns.add":  addNamespace,  // add a namespace
+	"ns.drop": dropNamespace, // drop a namespace when cloning
+
+	"net.join": joinNetNamespace, // join another containers net namespace
+
+	"cgroups.cpu_shares":  cpuShares,  // set the cpu shares
+	"cgroups.memory":      memory,     // set the memory limit
+	"cgroups.memory_swap": memorySwap, // set the memory swap limit
+	"cgroups.cpuset.cpus": cpusetCpus, // set the cpus used
+
+	"apparmor_profile": apparmorProfile, // set the apparmor profile to apply
+
+	"fs.readonly": readonlyFs, // make the rootfs of the container read only
+}
+
+func cpusetCpus(container *libcontainer.Container, context interface{}, value string) error {
+	if container.Cgroups == nil {
+		return fmt.Errorf("cannot set cgroups when they are disabled")
+	}
+	container.Cgroups.CpusetCpus = value
+
+	return nil
+}
+
+func apparmorProfile(container *libcontainer.Container, context interface{}, value string) error {
+	container.Context["apparmor_profile"] = value
+	return nil
+}
+
+func cpuShares(container *libcontainer.Container, context interface{}, value string) error {
+	if container.Cgroups == nil {
+		return fmt.Errorf("cannot set cgroups when they are disabled")
+	}
+	v, err := strconv.ParseInt(value, 10, 0)
+	if err != nil {
+		return err
+	}
+	container.Cgroups.CpuShares = v
+	return nil
+}
+
+func memory(container *libcontainer.Container, context interface{}, value string) error {
+	if container.Cgroups == nil {
+		return fmt.Errorf("cannot set cgroups when they are disabled")
+	}
+
+	v, err := utils.RAMInBytes(value)
+	if err != nil {
+		return err
+	}
+	container.Cgroups.Memory = v
+	return nil
+}
+
+func memorySwap(container *libcontainer.Container, context interface{}, value string) error {
+	if container.Cgroups == nil {
+		return fmt.Errorf("cannot set cgroups when they are disabled")
+	}
+	v, err := strconv.ParseInt(value, 0, 64)
+	if err != nil {
+		return err
+	}
+	container.Cgroups.MemorySwap = v
+	return nil
+}
+
+func addCap(container *libcontainer.Container, context interface{}, value string) error {
+	c := container.CapabilitiesMask.Get(value)
+	if c == nil {
+		return fmt.Errorf("%s is not a valid capability", value)
+	}
+	c.Enabled = true
+	return nil
+}
+
+func dropCap(container *libcontainer.Container, context interface{}, value string) error {
+	c := container.CapabilitiesMask.Get(value)
+	if c == nil {
+		return fmt.Errorf("%s is not a valid capability", value)
+	}
+	c.Enabled = false
+	return nil
+}
+
+func addNamespace(container *libcontainer.Container, context interface{}, value string) error {
+	ns := container.Namespaces.Get(value)
+	if ns == nil {
+		return fmt.Errorf("%s is not a valid namespace", value[1:])
+	}
+	ns.Enabled = true
+	return nil
+}
+
+func dropNamespace(container *libcontainer.Container, context interface{}, value string) error {
+	ns := container.Namespaces.Get(value)
+	if ns == nil {
+		return fmt.Errorf("%s is not a valid namespace", value[1:])
+	}
+	ns.Enabled = false
+	return nil
+}
+
+func readonlyFs(container *libcontainer.Container, context interface{}, value string) error {
+	switch value {
+	case "1", "true":
+		container.ReadonlyFs = true
+	default:
+		container.ReadonlyFs = false
+	}
+	return nil
+}
+
+func joinNetNamespace(container *libcontainer.Container, context interface{}, value string) error {
+	var (
+		running = context.(map[string]*exec.Cmd)
+		cmd     = running[value]
+	)
+
+	if cmd == nil || cmd.Process == nil {
+		return fmt.Errorf("%s is not a valid running container to join", value)
+	}
+	nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net")
+	container.Networks = append(container.Networks, &libcontainer.Network{
+		Type: "netns",
+		Context: libcontainer.Context{
+			"nspath": nspath,
+		},
+	})
+	return nil
+}
+
+func vethMacAddress(container *libcontainer.Container, context interface{}, value string) error {
+	var veth *libcontainer.Network
+	for _, network := range container.Networks {
+		if network.Type == "veth" {
+			veth = network
+			break
+		}
+	}
+	if veth == nil {
+		return fmt.Errorf("not veth configured for container")
+	}
+	veth.Context["mac"] = value
+	return nil
+}
+
+// configureCustomOptions takes string commands from the user and allows modification of the
+// container's default configuration.
+//
+// TODO: this can be moved to a general utils or parser in pkg
+func ParseConfiguration(container *libcontainer.Container, running map[string]*exec.Cmd, opts []string) error {
+	for _, opt := range opts {
+		kv := strings.SplitN(opt, "=", 2)
+		if len(kv) < 2 {
+			return fmt.Errorf("invalid format for %s", opt)
+		}
+
+		action, exists := actions[kv[0]]
+		if !exists {
+			return fmt.Errorf("%s is not a valid option for the native driver", kv[0])
+		}
+
+		if err := action(container, running, kv[1]); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/runtime/execdriver/native/configuration/parse_test.go b/runtime/execdriver/native/configuration/parse_test.go
new file mode 100644
index 0000000000..8001358766
--- /dev/null
+++ b/runtime/execdriver/native/configuration/parse_test.go
@@ -0,0 +1,166 @@
+package configuration
+
+import (
+	"github.com/dotcloud/docker/runtime/execdriver/native/template"
+	"testing"
+)
+
+func TestSetReadonlyRootFs(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"fs.readonly=true",
+		}
+	)
+
+	if container.ReadonlyFs {
+		t.Fatal("container should not have a readonly rootfs by default")
+	}
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if !container.ReadonlyFs {
+		t.Fatal("container should have a readonly rootfs")
+	}
+}
+
+func TestConfigurationsDoNotConflict(t *testing.T) {
+	var (
+		container1 = template.New()
+		container2 = template.New()
+		opts       = []string{
+			"cap.add=NET_ADMIN",
+		}
+	)
+
+	if err := ParseConfiguration(container1, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if !container1.CapabilitiesMask.Get("NET_ADMIN").Enabled {
+		t.Fatal("container one should have NET_ADMIN enabled")
+	}
+	if container2.CapabilitiesMask.Get("NET_ADMIN").Enabled {
+		t.Fatal("container two should not have NET_ADMIN enabled")
+	}
+}
+
+func TestCpusetCpus(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"cgroups.cpuset.cpus=1,2",
+		}
+	)
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if expected := "1,2"; container.Cgroups.CpusetCpus != expected {
+		t.Fatalf("expected %s got %s for cpuset.cpus", expected, container.Cgroups.CpusetCpus)
+	}
+}
+
+func TestAppArmorProfile(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"apparmor_profile=koye-the-protector",
+		}
+	)
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+	if expected := "koye-the-protector"; container.Context["apparmor_profile"] != expected {
+		t.Fatalf("expected profile %s got %s", expected, container.Context["apparmor_profile"])
+	}
+}
+
+func TestCpuShares(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"cgroups.cpu_shares=1048",
+		}
+	)
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if expected := int64(1048); container.Cgroups.CpuShares != expected {
+		t.Fatalf("expected cpu shares %d got %d", expected, container.Cgroups.CpuShares)
+	}
+}
+
+func TestCgroupMemory(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"cgroups.memory=500m",
+		}
+	)
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if expected := int64(500 * 1024 * 1024); container.Cgroups.Memory != expected {
+		t.Fatalf("expected memory %d got %d", expected, container.Cgroups.Memory)
+	}
+}
+
+func TestAddCap(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"cap.add=MKNOD",
+			"cap.add=SYS_ADMIN",
+		}
+	)
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if !container.CapabilitiesMask.Get("MKNOD").Enabled {
+		t.Fatal("container should have MKNOD enabled")
+	}
+	if !container.CapabilitiesMask.Get("SYS_ADMIN").Enabled {
+		t.Fatal("container should have SYS_ADMIN enabled")
+	}
+}
+
+func TestDropCap(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"cap.drop=MKNOD",
+		}
+	)
+	// enabled all caps like in privileged mode
+	for _, c := range container.CapabilitiesMask {
+		c.Enabled = true
+	}
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if container.CapabilitiesMask.Get("MKNOD").Enabled {
+		t.Fatal("container should not have MKNOD enabled")
+	}
+}
+
+func TestDropNamespace(t *testing.T) {
+	var (
+		container = template.New()
+		opts      = []string{
+			"ns.drop=NEWNET",
+		}
+	)
+	if err := ParseConfiguration(container, nil, opts); err != nil {
+		t.Fatal(err)
+	}
+
+	if container.Namespaces.Get("NEWNET").Enabled {
+		t.Fatal("container should not have NEWNET enabled")
+	}
+}
diff --git a/runtime/execdriver/native/create.go b/runtime/execdriver/native/create.go
new file mode 100644
index 0000000000..71fab3e064
--- /dev/null
+++ b/runtime/execdriver/native/create.go
@@ -0,0 +1,114 @@
+package native
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/dotcloud/docker/pkg/label"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"github.com/dotcloud/docker/runtime/execdriver/native/configuration"
+	"github.com/dotcloud/docker/runtime/execdriver/native/template"
+)
+
+// createContainer populates and configures the container type with the
+// data provided by the execdriver.Command
+func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container, error) {
+	container := template.New()
+
+	container.Hostname = getEnv("HOSTNAME", c.Env)
+	container.Tty = c.Tty
+	container.User = c.User
+	container.WorkingDir = c.WorkingDir
+	container.Env = c.Env
+	container.Cgroups.Name = c.ID
+	// check to see if we are running in ramdisk to disable pivot root
+	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
+
+	if err := d.createNetwork(container, c); err != nil {
+		return nil, err
+	}
+	if c.Privileged {
+		if err := d.setPrivileged(container); err != nil {
+			return nil, err
+		}
+	}
+	if err := d.setupCgroups(container, c); err != nil {
+		return nil, err
+	}
+	if err := d.setupMounts(container, c); err != nil {
+		return nil, err
+	}
+	if err := d.setupLabels(container, c); err != nil {
+		return nil, err
+	}
+	if err := configuration.ParseConfiguration(container, d.activeContainers, c.Config["native"]); err != nil {
+		return nil, err
+	}
+	return container, nil
+}
+
+func (d *driver) createNetwork(container *libcontainer.Container, c *execdriver.Command) error {
+	container.Networks = []*libcontainer.Network{
+		{
+			Mtu:     c.Network.Mtu,
+			Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0),
+			Gateway: "localhost",
+			Type:    "loopback",
+			Context: libcontainer.Context{},
+		},
+	}
+
+	if c.Network.Interface != nil {
+		vethNetwork := libcontainer.Network{
+			Mtu:     c.Network.Mtu,
+			Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
+			Gateway: c.Network.Interface.Gateway,
+			Type:    "veth",
+			Context: libcontainer.Context{
+				"prefix": "veth",
+				"bridge": c.Network.Interface.Bridge,
+			},
+		}
+		container.Networks = append(container.Networks, &vethNetwork)
+	}
+	return nil
+}
+
+func (d *driver) setPrivileged(container *libcontainer.Container) error {
+	for _, c := range container.CapabilitiesMask {
+		c.Enabled = true
+	}
+	container.Cgroups.DeviceAccess = true
+	container.Context["apparmor_profile"] = "unconfined"
+	return nil
+}
+
+func (d *driver) setupCgroups(container *libcontainer.Container, c *execdriver.Command) error {
+	if c.Resources != nil {
+		container.Cgroups.CpuShares = c.Resources.CpuShares
+		container.Cgroups.Memory = c.Resources.Memory
+		container.Cgroups.MemorySwap = c.Resources.MemorySwap
+	}
+	return nil
+}
+
+func (d *driver) setupMounts(container *libcontainer.Container, c *execdriver.Command) error {
+	for _, m := range c.Mounts {
+		container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private})
+	}
+	return nil
+}
+
+func (d *driver) setupLabels(container *libcontainer.Container, c *execdriver.Command) error {
+	labels := c.Config["label"]
+	if len(labels) > 0 {
+		process, mount, err := label.GenLabels(labels[0])
+		if err != nil {
+			return err
+		}
+		container.Context["mount_label"] = mount
+		container.Context["process_label"] = process
+	}
+	return nil
+}
diff --git a/runtime/execdriver/native/driver.go b/runtime/execdriver/native/driver.go
new file mode 100644
index 0000000000..d18865e508
--- /dev/null
+++ b/runtime/execdriver/native/driver.go
@@ -0,0 +1,292 @@
+package native
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/dotcloud/docker/pkg/cgroups"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/pkg/libcontainer/apparmor"
+	"github.com/dotcloud/docker/pkg/libcontainer/nsinit"
+	"github.com/dotcloud/docker/pkg/system"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+)
+
+const (
+	DriverName                = "native"
+	Version                   = "0.1"
+	BackupApparmorProfilePath = "apparmor/docker.back" // relative to docker root
+)
+
+func init() {
+	execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
+		var (
+			container *libcontainer.Container
+			ns        = nsinit.NewNsInit(&nsinit.DefaultCommandFactory{}, &nsinit.DefaultStateWriter{args.Root}, createLogger(""))
+		)
+		f, err := os.Open(filepath.Join(args.Root, "container.json"))
+		if err != nil {
+			return err
+		}
+		if err := json.NewDecoder(f).Decode(&container); err != nil {
+			f.Close()
+			return err
+		}
+		f.Close()
+
+		cwd, err := os.Getwd()
+		if err != nil {
+			return err
+		}
+		syncPipe, err := nsinit.NewSyncPipeFromFd(0, uintptr(args.Pipe))
+		if err != nil {
+			return err
+		}
+		if err := ns.Init(container, cwd, args.Console, syncPipe, args.Args); err != nil {
+			return err
+		}
+		return nil
+	})
+}
+
+type driver struct {
+	root             string
+	initPath         string
+	activeContainers map[string]*exec.Cmd
+}
+
+func NewDriver(root, initPath string) (*driver, error) {
+	if err := os.MkdirAll(root, 0700); err != nil {
+		return nil, err
+	}
+	// native driver root is at docker_root/execdriver/native. Put apparmor at docker_root
+	if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
+		return nil, err
+	}
+	return &driver{
+		root:             root,
+		initPath:         initPath,
+		activeContainers: make(map[string]*exec.Cmd),
+	}, nil
+}
+
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+	// take the Command and populate the libcontainer.Container from it
+	container, err := d.createContainer(c)
+	if err != nil {
+		return -1, err
+	}
+	d.activeContainers[c.ID] = &c.Cmd
+
+	var (
+		term        nsinit.Terminal
+		factory     = &dockerCommandFactory{c: c, driver: d}
+		stateWriter = &dockerStateWriter{
+			callback: startCallback,
+			c:        c,
+			dsw:      &nsinit.DefaultStateWriter{filepath.Join(d.root, c.ID)},
+		}
+		ns   = nsinit.NewNsInit(factory, stateWriter, createLogger(os.Getenv("DEBUG")))
+		args = append([]string{c.Entrypoint}, c.Arguments...)
+	)
+	if err := d.createContainerRoot(c.ID); err != nil {
+		return -1, err
+	}
+	defer d.removeContainerRoot(c.ID)
+
+	if c.Tty {
+		term = &dockerTtyTerm{
+			pipes: pipes,
+		}
+	} else {
+		term = &dockerStdTerm{
+			pipes: pipes,
+		}
+	}
+	c.Terminal = term
+	if err := d.writeContainerFile(container, c.ID); err != nil {
+		return -1, err
+	}
+	return ns.Exec(container, term, args)
+}
+
+func (d *driver) Kill(p *execdriver.Command, sig int) error {
+	return syscall.Kill(p.Process.Pid, syscall.Signal(sig))
+}
+
+func (d *driver) Terminate(p *execdriver.Command) error {
+	// lets check the start time for the process
+	started, err := d.readStartTime(p)
+	if err != nil {
+		// if we don't have the data on disk then we can assume the process is gone
+		// because this is only removed after we know the process has stopped
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+
+	currentStartTime, err := system.GetProcessStartTime(p.Process.Pid)
+	if err != nil {
+		return err
+	}
+	if started == currentStartTime {
+		err = syscall.Kill(p.Process.Pid, 9)
+	}
+	d.removeContainerRoot(p.ID)
+	return err
+
+}
+
+func (d *driver) readStartTime(p *execdriver.Command) (string, error) {
+	data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start"))
+	if err != nil {
+		return "", err
+	}
+	return string(data), nil
+}
+
+func (d *driver) Info(id string) execdriver.Info {
+	return &info{
+		ID:     id,
+		driver: d,
+	}
+}
+
+func (d *driver) Name() string {
+	return fmt.Sprintf("%s-%s", DriverName, Version)
+}
+
+// TODO: this can be improved with our driver
+// there has to be a better way to do this
+func (d *driver) GetPidsForContainer(id string) ([]int, error) {
+	pids := []int{}
+
+	subsystem := "devices"
+	cgroupRoot, err := cgroups.FindCgroupMountpoint(subsystem)
+	if err != nil {
+		return pids, err
+	}
+	cgroupDir, err := cgroups.GetThisCgroupDir(subsystem)
+	if err != nil {
+		return pids, err
+	}
+
+	filename := filepath.Join(cgroupRoot, cgroupDir, id, "tasks")
+	if _, err := os.Stat(filename); os.IsNotExist(err) {
+		filename = filepath.Join(cgroupRoot, cgroupDir, "docker", id, "tasks")
+	}
+
+	output, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return pids, err
+	}
+	for _, p := range strings.Split(string(output), "\n") {
+		if len(p) == 0 {
+			continue
+		}
+		pid, err := strconv.Atoi(p)
+		if err != nil {
+			return pids, fmt.Errorf("Invalid pid '%s': %s", p, err)
+		}
+		pids = append(pids, pid)
+	}
+	return pids, nil
+}
+
+func (d *driver) writeContainerFile(container *libcontainer.Container, id string) error {
+	data, err := json.Marshal(container)
+	if err != nil {
+		return err
+	}
+	return ioutil.WriteFile(filepath.Join(d.root, id, "container.json"), data, 0655)
+}
+
+func (d *driver) createContainerRoot(id string) error {
+	return os.MkdirAll(filepath.Join(d.root, id), 0655)
+}
+
+func (d *driver) removeContainerRoot(id string) error {
+	return os.RemoveAll(filepath.Join(d.root, id))
+}
+
+func getEnv(key string, env []string) string {
+	for _, pair := range env {
+		parts := strings.Split(pair, "=")
+		if parts[0] == key {
+			return parts[1]
+		}
+	}
+	return ""
+}
+
+type dockerCommandFactory struct {
+	c      *execdriver.Command
+	driver *driver
+}
+
+// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
+// defined on the container's configuration and use the current binary as the init with the
+// args provided
+func (d *dockerCommandFactory) Create(container *libcontainer.Container, console string, syncFile *os.File, args []string) *exec.Cmd {
+	// we need to join the rootfs because nsinit will setup the rootfs and chroot
+	initPath := filepath.Join(d.c.Rootfs, d.c.InitPath)
+
+	d.c.Path = d.driver.initPath
+	d.c.Args = append([]string{
+		initPath,
+		"-driver", DriverName,
+		"-console", console,
+		"-pipe", "3",
+		"-root", filepath.Join(d.driver.root, d.c.ID),
+		"--",
+	}, args...)
+
+	// set this to nil so that when we set the clone flags anything else is reset
+	d.c.SysProcAttr = nil
+	system.SetCloneFlags(&d.c.Cmd, uintptr(nsinit.GetNamespaceFlags(container.Namespaces)))
+	d.c.ExtraFiles = []*os.File{syncFile}
+
+	d.c.Env = container.Env
+	d.c.Dir = d.c.Rootfs
+
+	return &d.c.Cmd
+}
+
+type dockerStateWriter struct {
+	dsw      nsinit.StateWriter
+	c        *execdriver.Command
+	callback execdriver.StartCallback
+}
+
+func (d *dockerStateWriter) WritePid(pid int, started string) error {
+	d.c.ContainerPid = pid
+	err := d.dsw.WritePid(pid, started)
+	if d.callback != nil {
+		d.callback(d.c)
+	}
+	return err
+}
+
+func (d *dockerStateWriter) DeletePid() error {
+	return d.dsw.DeletePid()
+}
+
+func createLogger(debug string) *log.Logger {
+	var w io.Writer
+	// if we are in debug mode set the logger to stderr
+	if debug != "" {
+		w = os.Stderr
+	} else {
+		w = ioutil.Discard
+	}
+	return log.New(w, "[libcontainer] ", log.LstdFlags)
+}
diff --git a/runtime/execdriver/native/info.go b/runtime/execdriver/native/info.go
new file mode 100644
index 0000000000..aef2f85c6b
--- /dev/null
+++ b/runtime/execdriver/native/info.go
@@ -0,0 +1,21 @@
+package native
+
+import (
+	"os"
+	"path/filepath"
+)
+
+type info struct {
+	ID     string
+	driver *driver
+}
+
+// IsRunning is determined by looking for the
+// pid file for a container.  If the file exists then the
+// container is currently running
+func (i *info) IsRunning() bool {
+	if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil {
+		return true
+	}
+	return false
+}
diff --git a/runtime/execdriver/native/template/default_template.go b/runtime/execdriver/native/template/default_template.go
new file mode 100644
index 0000000000..a1ecb04d76
--- /dev/null
+++ b/runtime/execdriver/native/template/default_template.go
@@ -0,0 +1,45 @@
+package template
+
+import (
+	"github.com/dotcloud/docker/pkg/cgroups"
+	"github.com/dotcloud/docker/pkg/libcontainer"
+)
+
+// New returns the docker default configuration for libcontainer
+func New() *libcontainer.Container {
+	container := &libcontainer.Container{
+		CapabilitiesMask: libcontainer.Capabilities{
+			libcontainer.GetCapability("SETPCAP"),
+			libcontainer.GetCapability("SYS_MODULE"),
+			libcontainer.GetCapability("SYS_RAWIO"),
+			libcontainer.GetCapability("SYS_PACCT"),
+			libcontainer.GetCapability("SYS_ADMIN"),
+			libcontainer.GetCapability("SYS_NICE"),
+			libcontainer.GetCapability("SYS_RESOURCE"),
+			libcontainer.GetCapability("SYS_TIME"),
+			libcontainer.GetCapability("SYS_TTY_CONFIG"),
+			libcontainer.GetCapability("AUDIT_WRITE"),
+			libcontainer.GetCapability("AUDIT_CONTROL"),
+			libcontainer.GetCapability("MAC_OVERRIDE"),
+			libcontainer.GetCapability("MAC_ADMIN"),
+			libcontainer.GetCapability("NET_ADMIN"),
+			libcontainer.GetCapability("MKNOD"),
+		},
+		Namespaces: libcontainer.Namespaces{
+			libcontainer.GetNamespace("NEWNS"),
+			libcontainer.GetNamespace("NEWUTS"),
+			libcontainer.GetNamespace("NEWIPC"),
+			libcontainer.GetNamespace("NEWPID"),
+			libcontainer.GetNamespace("NEWNET"),
+		},
+		Cgroups: &cgroups.Cgroup{
+			Parent:       "docker",
+			DeviceAccess: false,
+		},
+		Context: libcontainer.Context{
+			"apparmor_profile": "docker-default",
+		},
+	}
+	container.CapabilitiesMask.Get("MKNOD").Enabled = true
+	return container
+}
diff --git a/runtime/execdriver/native/term.go b/runtime/execdriver/native/term.go
new file mode 100644
index 0000000000..0d5298d388
--- /dev/null
+++ b/runtime/execdriver/native/term.go
@@ -0,0 +1,42 @@
+/*
+   These types are wrappers around the libcontainer Terminal interface so that
+   we can resuse the docker implementations where possible.
+*/
+package native
+
+import (
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"io"
+	"os"
+	"os/exec"
+)
+
+type dockerStdTerm struct {
+	execdriver.StdConsole
+	pipes *execdriver.Pipes
+}
+
+func (d *dockerStdTerm) Attach(cmd *exec.Cmd) error {
+	return d.AttachPipes(cmd, d.pipes)
+}
+
+func (d *dockerStdTerm) SetMaster(master *os.File) {
+	// do nothing
+}
+
+type dockerTtyTerm struct {
+	execdriver.TtyConsole
+	pipes *execdriver.Pipes
+}
+
+func (t *dockerTtyTerm) Attach(cmd *exec.Cmd) error {
+	go io.Copy(t.pipes.Stdout, t.MasterPty)
+	if t.pipes.Stdin != nil {
+		go io.Copy(t.MasterPty, t.pipes.Stdin)
+	}
+	return nil
+}
+
+func (t *dockerTtyTerm) SetMaster(master *os.File) {
+	t.MasterPty = master
+}
diff --git a/runtime/execdriver/pipes.go b/runtime/execdriver/pipes.go
new file mode 100644
index 0000000000..158219f0c5
--- /dev/null
+++ b/runtime/execdriver/pipes.go
@@ -0,0 +1,23 @@
+package execdriver
+
+import (
+	"io"
+)
+
+// Pipes is a wrapper around a containers output for
+// stdin, stdout, stderr
+type Pipes struct {
+	Stdin          io.ReadCloser
+	Stdout, Stderr io.Writer
+}
+
+func NewPipes(stdin io.ReadCloser, stdout, stderr io.Writer, useStdin bool) *Pipes {
+	p := &Pipes{
+		Stdout: stdout,
+		Stderr: stderr,
+	}
+	if useStdin {
+		p.Stdin = stdin
+	}
+	return p
+}
diff --git a/runtime/execdriver/termconsole.go b/runtime/execdriver/termconsole.go
new file mode 100644
index 0000000000..af6b88d3d1
--- /dev/null
+++ b/runtime/execdriver/termconsole.go
@@ -0,0 +1,126 @@
+package execdriver
+
+import (
+	"github.com/dotcloud/docker/pkg/term"
+	"github.com/kr/pty"
+	"io"
+	"os"
+	"os/exec"
+)
+
+func SetTerminal(command *Command, pipes *Pipes) error {
+	var (
+		term Terminal
+		err  error
+	)
+	if command.Tty {
+		term, err = NewTtyConsole(command, pipes)
+	} else {
+		term, err = NewStdConsole(command, pipes)
+	}
+	if err != nil {
+		return err
+	}
+	command.Terminal = term
+	return nil
+}
+
+type TtyConsole struct {
+	MasterPty *os.File
+	SlavePty  *os.File
+}
+
+func NewTtyConsole(command *Command, pipes *Pipes) (*TtyConsole, error) {
+	ptyMaster, ptySlave, err := pty.Open()
+	if err != nil {
+		return nil, err
+	}
+	tty := &TtyConsole{
+		MasterPty: ptyMaster,
+		SlavePty:  ptySlave,
+	}
+	if err := tty.AttachPipes(&command.Cmd, pipes); err != nil {
+		tty.Close()
+		return nil, err
+	}
+	command.Console = tty.SlavePty.Name()
+	return tty, nil
+}
+
+func (t *TtyConsole) Master() *os.File {
+	return t.MasterPty
+}
+
+func (t *TtyConsole) Resize(h, w int) error {
+	return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
+}
+
+func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *Pipes) error {
+	command.Stdout = t.SlavePty
+	command.Stderr = t.SlavePty
+
+	go func() {
+		if wb, ok := pipes.Stdout.(interface {
+			CloseWriters() error
+		}); ok {
+			defer wb.CloseWriters()
+		}
+		io.Copy(pipes.Stdout, t.MasterPty)
+	}()
+
+	if pipes.Stdin != nil {
+		command.Stdin = t.SlavePty
+		command.SysProcAttr.Setctty = true
+
+		go func() {
+			defer pipes.Stdin.Close()
+			io.Copy(t.MasterPty, pipes.Stdin)
+		}()
+	}
+	return nil
+}
+
+func (t *TtyConsole) Close() error {
+	t.SlavePty.Close()
+	return t.MasterPty.Close()
+}
+
+type StdConsole struct {
+}
+
+func NewStdConsole(command *Command, pipes *Pipes) (*StdConsole, error) {
+	std := &StdConsole{}
+
+	if err := std.AttachPipes(&command.Cmd, pipes); err != nil {
+		return nil, err
+	}
+	return std, nil
+}
+
+func (s *StdConsole) AttachPipes(command *exec.Cmd, pipes *Pipes) error {
+	command.Stdout = pipes.Stdout
+	command.Stderr = pipes.Stderr
+
+	if pipes.Stdin != nil {
+		stdin, err := command.StdinPipe()
+		if err != nil {
+			return err
+		}
+
+		go func() {
+			defer stdin.Close()
+			io.Copy(stdin, pipes.Stdin)
+		}()
+	}
+	return nil
+}
+
+func (s *StdConsole) Resize(h, w int) error {
+	// we do not need to reside a non tty
+	return nil
+}
+
+func (s *StdConsole) Close() error {
+	// nothing to close here
+	return nil
+}
diff --git a/runtime/graphdriver/aufs/aufs.go b/runtime/graphdriver/aufs/aufs.go
new file mode 100644
index 0000000000..401bbd8c86
--- /dev/null
+++ b/runtime/graphdriver/aufs/aufs.go
@@ -0,0 +1,401 @@
+/*
+
+aufs driver directory structure
+
+.
+├── layers // Metadata of layers
+│   ├── 1
+│   ├── 2
+│   └── 3
+├── diffs  // Content of the layer
+│   ├── 1  // Contains layers that need to be mounted for the id
+│   ├── 2
+│   └── 3
+└── mnt    // Mount points for the rw layers to be mounted
+    ├── 1
+    ├── 2
+    └── 3
+
+*/
+
+package aufs
+
+import (
+	"bufio"
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	mountpk "github.com/dotcloud/docker/pkg/mount"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"github.com/dotcloud/docker/utils"
+	"os"
+	"os/exec"
+	"path"
+	"strings"
+	"sync"
+)
+
+var (
+	ErrAufsNotSupported = fmt.Errorf("AUFS was not found in /proc/filesystems")
+)
+
+func init() {
+	graphdriver.Register("aufs", Init)
+}
+
+type Driver struct {
+	root       string
+	sync.Mutex // Protects concurrent modification to active
+	active     map[string]int
+}
+
+// New returns a new AUFS driver.
+// An error is returned if AUFS is not supported.
+func Init(root string) (graphdriver.Driver, error) {
+	// Try to load the aufs kernel module
+	if err := supportsAufs(); err != nil {
+		return nil, err
+	}
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	a := &Driver{
+		root:   root,
+		active: make(map[string]int),
+	}
+
+	// Create the root aufs driver dir and return
+	// if it already exists
+	// If not populate the dir structure
+	if err := os.MkdirAll(root, 0755); err != nil {
+		if os.IsExist(err) {
+			return a, nil
+		}
+		return nil, err
+	}
+
+	for _, p := range paths {
+		if err := os.MkdirAll(path.Join(root, p), 0755); err != nil {
+			return nil, err
+		}
+	}
+	return a, nil
+}
+
+// Return a nil error if the kernel supports aufs
+// We cannot modprobe because inside dind modprobe fails
+// to run
+func supportsAufs() error {
+	// We can try to modprobe aufs first before looking at
+	// proc/filesystems for when aufs is supported
+	exec.Command("modprobe", "aufs").Run()
+
+	f, err := os.Open("/proc/filesystems")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		if strings.Contains(s.Text(), "aufs") {
+			return nil
+		}
+	}
+	return ErrAufsNotSupported
+}
+
+func (a Driver) rootPath() string {
+	return a.root
+}
+
+func (Driver) String() string {
+	return "aufs"
+}
+
+func (a Driver) Status() [][2]string {
+	ids, _ := loadIds(path.Join(a.rootPath(), "layers"))
+	return [][2]string{
+		{"Root Dir", a.rootPath()},
+		{"Dirs", fmt.Sprintf("%d", len(ids))},
+	}
+}
+
+// Exists returns true if the given id is registered with
+// this driver
+func (a Driver) Exists(id string) bool {
+	if _, err := os.Lstat(path.Join(a.rootPath(), "layers", id)); err != nil {
+		return false
+	}
+	return true
+}
+
+// Three folders are created for each id
+// mnt, layers, and diff
+func (a *Driver) Create(id, parent string, mountLabel string) error {
+	if err := a.createDirsFor(id); err != nil {
+		return err
+	}
+	// Write the layers metadata
+	f, err := os.Create(path.Join(a.rootPath(), "layers", id))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	if parent != "" {
+		ids, err := getParentIds(a.rootPath(), parent)
+		if err != nil {
+			return err
+		}
+
+		if _, err := fmt.Fprintln(f, parent); err != nil {
+			return err
+		}
+		for _, i := range ids {
+			if _, err := fmt.Fprintln(f, i); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (a *Driver) createDirsFor(id string) error {
+	paths := []string{
+		"mnt",
+		"diff",
+	}
+
+	for _, p := range paths {
+		if err := os.MkdirAll(path.Join(a.rootPath(), p, id), 0755); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Unmount and remove the dir information
+func (a *Driver) Remove(id string) error {
+	// Protect the a.active from concurrent access
+	a.Lock()
+	defer a.Unlock()
+
+	if a.active[id] != 0 {
+		utils.Errorf("Warning: removing active id %s\n", id)
+	}
+
+	// Make sure the dir is umounted first
+	if err := a.unmount(id); err != nil {
+		return err
+	}
+	tmpDirs := []string{
+		"mnt",
+		"diff",
+	}
+
+	// Atomically remove each directory in turn by first moving it out of the
+	// way (so that docker doesn't find it anymore) before doing removal of
+	// the whole tree.
+	for _, p := range tmpDirs {
+
+		realPath := path.Join(a.rootPath(), p, id)
+		tmpPath := path.Join(a.rootPath(), p, fmt.Sprintf("%s-removing", id))
+		if err := os.Rename(realPath, tmpPath); err != nil && !os.IsNotExist(err) {
+			return err
+		}
+		defer os.RemoveAll(tmpPath)
+	}
+
+	// Remove the layers file for the id
+	if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return nil
+}
+
+// Return the rootfs path for the id
+// This will mount the dir at it's given path
+func (a *Driver) Get(id string) (string, error) {
+	ids, err := getParentIds(a.rootPath(), id)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return "", err
+		}
+		ids = []string{}
+	}
+
+	// Protect the a.active from concurrent access
+	a.Lock()
+	defer a.Unlock()
+
+	count := a.active[id]
+
+	// If a dir does not have a parent ( no layers )do not try to mount
+	// just return the diff path to the data
+	out := path.Join(a.rootPath(), "diff", id)
+	if len(ids) > 0 {
+		out = path.Join(a.rootPath(), "mnt", id)
+
+		if count == 0 {
+			if err := a.mount(id); err != nil {
+				return "", err
+			}
+		}
+	}
+
+	a.active[id] = count + 1
+
+	return out, nil
+}
+
+func (a *Driver) Put(id string) {
+	// Protect the a.active from concurrent access
+	a.Lock()
+	defer a.Unlock()
+
+	if count := a.active[id]; count > 1 {
+		a.active[id] = count - 1
+	} else {
+		ids, _ := getParentIds(a.rootPath(), id)
+		// We only mounted if there are any parents
+		if ids != nil && len(ids) > 0 {
+			a.unmount(id)
+		}
+		delete(a.active, id)
+	}
+}
+
+// Returns an archive of the contents for the id
+func (a *Driver) Diff(id string) (archive.Archive, error) {
+	return archive.TarFilter(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
+		Compression: archive.Uncompressed,
+	})
+}
+
+func (a *Driver) ApplyDiff(id string, diff archive.ArchiveReader) error {
+	return archive.Untar(diff, path.Join(a.rootPath(), "diff", id), nil)
+}
+
+// Returns the size of the contents for the id
+func (a *Driver) DiffSize(id string) (int64, error) {
+	return utils.TreeSize(path.Join(a.rootPath(), "diff", id))
+}
+
+func (a *Driver) Changes(id string) ([]archive.Change, error) {
+	layers, err := a.getParentLayerPaths(id)
+	if err != nil {
+		return nil, err
+	}
+	return archive.Changes(layers, path.Join(a.rootPath(), "diff", id))
+}
+
+func (a *Driver) getParentLayerPaths(id string) ([]string, error) {
+	parentIds, err := getParentIds(a.rootPath(), id)
+	if err != nil {
+		return nil, err
+	}
+	if len(parentIds) == 0 {
+		return nil, fmt.Errorf("Dir %s does not have any parent layers", id)
+	}
+	layers := make([]string, len(parentIds))
+
+	// Get the diff paths for all the parent ids
+	for i, p := range parentIds {
+		layers[i] = path.Join(a.rootPath(), "diff", p)
+	}
+	return layers, nil
+}
+
+func (a *Driver) mount(id string) error {
+	// If the id is mounted or we get an error return
+	if mounted, err := a.mounted(id); err != nil || mounted {
+		return err
+	}
+
+	var (
+		target = path.Join(a.rootPath(), "mnt", id)
+		rw     = path.Join(a.rootPath(), "diff", id)
+	)
+
+	layers, err := a.getParentLayerPaths(id)
+	if err != nil {
+		return err
+	}
+
+	if err := a.aufsMount(layers, rw, target); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (a *Driver) unmount(id string) error {
+	if mounted, err := a.mounted(id); err != nil || !mounted {
+		return err
+	}
+	target := path.Join(a.rootPath(), "mnt", id)
+	return Unmount(target)
+}
+
+func (a *Driver) mounted(id string) (bool, error) {
+	target := path.Join(a.rootPath(), "mnt", id)
+	return mountpk.Mounted(target)
+}
+
+// During cleanup aufs needs to unmount all mountpoints
+func (a *Driver) Cleanup() error {
+	ids, err := loadIds(path.Join(a.rootPath(), "layers"))
+	if err != nil {
+		return err
+	}
+	for _, id := range ids {
+		if err := a.unmount(id); err != nil {
+			utils.Errorf("Unmounting %s: %s", utils.TruncateID(id), err)
+		}
+	}
+	return nil
+}
+
+func (a *Driver) aufsMount(ro []string, rw, target string) (err error) {
+	defer func() {
+		if err != nil {
+			Unmount(target)
+		}
+	}()
+
+	if err = a.tryMount(ro, rw, target); err != nil {
+		if err = a.mountRw(rw, target); err != nil {
+			return
+		}
+
+		for _, layer := range ro {
+			branch := fmt.Sprintf("append:%s=ro+wh", layer)
+			if err = mount("none", target, "aufs", MsRemount, branch); err != nil {
+				return
+			}
+		}
+	}
+	return
+}
+
+// Try to mount using the aufs fast path, if this fails then
+// append ro layers.
+func (a *Driver) tryMount(ro []string, rw, target string) (err error) {
+	var (
+		rwBranch   = fmt.Sprintf("%s=rw", rw)
+		roBranches = fmt.Sprintf("%s=ro+wh:", strings.Join(ro, "=ro+wh:"))
+	)
+	return mount("none", target, "aufs", 0, fmt.Sprintf("br:%v:%v,xino=/dev/shm/aufs.xino", rwBranch, roBranches))
+}
+
+func (a *Driver) mountRw(rw, target string) error {
+	return mount("none", target, "aufs", 0, fmt.Sprintf("br:%s,xino=/dev/shm/aufs.xino", rw))
+}
+
+func rollbackMount(target string, err error) {
+	if err != nil {
+		Unmount(target)
+	}
+}
diff --git a/runtime/graphdriver/aufs/aufs_test.go b/runtime/graphdriver/aufs/aufs_test.go
new file mode 100644
index 0000000000..9cfdebd160
--- /dev/null
+++ b/runtime/graphdriver/aufs/aufs_test.go
@@ -0,0 +1,697 @@
+package aufs
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"io/ioutil"
+	"os"
+	"path"
+	"testing"
+)
+
+var (
+	tmp = path.Join(os.TempDir(), "aufs-tests", "aufs")
+)
+
+func testInit(dir string, t *testing.T) graphdriver.Driver {
+	d, err := Init(dir)
+	if err != nil {
+		if err == ErrAufsNotSupported {
+			t.Skip(err)
+		} else {
+			t.Fatal(err)
+		}
+	}
+	return d
+}
+
+func newDriver(t *testing.T) *Driver {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	d := testInit(tmp, t)
+	return d.(*Driver)
+}
+
+func TestNewDriver(t *testing.T) {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	d := testInit(tmp, t)
+	defer os.RemoveAll(tmp)
+	if d == nil {
+		t.Fatalf("Driver should not be nil")
+	}
+}
+
+func TestAufsString(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if d.String() != "aufs" {
+		t.Fatalf("Expected aufs got %s", d.String())
+	}
+}
+
+func TestCreateDirStructure(t *testing.T) {
+	newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	paths := []string{
+		"mnt",
+		"layers",
+		"diff",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p)); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+// We should be able to create two drivers with the same dir structure
+func TestNewDriverFromExistingDir(t *testing.T) {
+	if err := os.MkdirAll(tmp, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	testInit(tmp, t)
+	testInit(tmp, t)
+	os.RemoveAll(tmp)
+}
+
+func TestCreateNewDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCreateNewDirStructure(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p, "1")); err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+func TestRemoveImage(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Remove("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	paths := []string{
+		"mnt",
+		"diff",
+		"layers",
+	}
+
+	for _, p := range paths {
+		if _, err := os.Stat(path.Join(tmp, p, "1")); err == nil {
+			t.Fatalf("Error should not be nil because dirs with id 1 should be delted: %s", p)
+		}
+	}
+}
+
+func TestGetWithoutParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	expected := path.Join(tmp, "diff", "1")
+	if diffPath != expected {
+		t.Fatalf("Expected path %s got %s", expected, diffPath)
+	}
+}
+
+func TestCleanupWithNoDirs(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCleanupWithDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMountedFalseResponse(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	response, err := d.mounted("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if response != false {
+		t.Fatalf("Response if dir id 1 is mounted should be false")
+	}
+}
+
+func TestMountedTrueReponse(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	_, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	response, err := d.mounted("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if response != true {
+		t.Fatalf("Response if dir id 2 is mounted should be true")
+	}
+}
+
+func TestMountWithParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPath, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mntPath == "" {
+		t.Fatal("mntPath should not be empty string")
+	}
+
+	expected := path.Join(tmp, "mnt", "2")
+	if mntPath != expected {
+		t.Fatalf("Expected %s got %s", expected, mntPath)
+	}
+}
+
+func TestRemoveMountedDir(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPath, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mntPath == "" {
+		t.Fatal("mntPath should not be empty string")
+	}
+
+	mounted, err := d.mounted("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !mounted {
+		t.Fatalf("Dir id 2 should be mounted")
+	}
+
+	if err := d.Remove("2"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCreateWithInvalidParent(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "docker", ""); err == nil {
+		t.Fatalf("Error should not be nil with parent does not exist")
+	}
+}
+
+func TestGetDiff(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	a, err := d.Diff("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if a == nil {
+		t.Fatalf("Archive should not be nil")
+	}
+}
+
+func TestChanges(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("2", "1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if err := d.Cleanup(); err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mntPoint, err := d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a file to save in the mountpoint
+	f, err := os.Create(path.Join(mntPoint, "test.txt"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := f.WriteString("testline"); err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	changes, err := d.Changes("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(changes) != 1 {
+		t.Fatalf("Dir 2 should have one change from parent got %d", len(changes))
+	}
+	change := changes[0]
+
+	expectedPath := "/test.txt"
+	if change.Path != expectedPath {
+		t.Fatalf("Expected path %s got %s", expectedPath, change.Path)
+	}
+
+	if change.Kind != archive.ChangeAdd {
+		t.Fatalf("Change kind should be ChangeAdd got %s", change.Kind)
+	}
+
+	if err := d.Create("3", "2", ""); err != nil {
+		t.Fatal(err)
+	}
+	mntPoint, err = d.Get("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a file to save in the mountpoint
+	f, err = os.Create(path.Join(mntPoint, "test2.txt"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := f.WriteString("testline"); err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	changes, err = d.Changes("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(changes) != 1 {
+		t.Fatalf("Dir 2 should have one change from parent got %d", len(changes))
+	}
+	change = changes[0]
+
+	expectedPath = "/test2.txt"
+	if change.Path != expectedPath {
+		t.Fatalf("Expected path %s got %s", expectedPath, change.Path)
+	}
+
+	if change.Kind != archive.ChangeAdd {
+		t.Fatalf("Change kind should be ChangeAdd got %s", change.Kind)
+	}
+}
+
+func TestDiffSize(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	s, err := f.Stat()
+	if err != nil {
+		t.Fatal(err)
+	}
+	size = s.Size()
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err := d.DiffSize("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diffSize != size {
+		t.Fatalf("Expected size to be %d got %d", size, diffSize)
+	}
+}
+
+func TestChildDiffSize(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	s, err := f.Stat()
+	if err != nil {
+		t.Fatal(err)
+	}
+	size = s.Size()
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err := d.DiffSize("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if diffSize != size {
+		t.Fatalf("Expected size to be %d got %d", size, diffSize)
+	}
+
+	if err := d.Create("2", "1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffSize, err = d.DiffSize("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	// The diff size for the child should be zero
+	if diffSize != 0 {
+		t.Fatalf("Expected size to be %d got %d", 0, diffSize)
+	}
+}
+
+func TestExists(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if d.Exists("none") {
+		t.Fatal("id name should not exist in the driver")
+	}
+
+	if !d.Exists("1") {
+		t.Fatal("id 1 should exist in the driver")
+	}
+}
+
+func TestStatus(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	status := d.Status()
+	if status == nil || len(status) == 0 {
+		t.Fatal("Status should not be nil or empty")
+	}
+	rootDir := status[0]
+	dirs := status[1]
+	if rootDir[0] != "Root Dir" {
+		t.Fatalf("Expected Root Dir got %s", rootDir[0])
+	}
+	if rootDir[1] != d.rootPath() {
+		t.Fatalf("Expected %s got %s", d.rootPath(), rootDir[1])
+	}
+	if dirs[0] != "Dirs" {
+		t.Fatalf("Expected Dirs got %s", dirs[0])
+	}
+	if dirs[1] != "1" {
+		t.Fatalf("Expected 1 got %s", dirs[1])
+	}
+}
+
+func TestApplyDiff(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	diffPath, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to the diff path with a fixed size
+	size := int64(1024)
+
+	f, err := os.Create(path.Join(diffPath, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	diff, err := d.Diff("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Create("2", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	if err := d.Create("3", "2", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.ApplyDiff("3", diff); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that the file is in the mount point for id 3
+
+	mountPoint, err := d.Get("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := os.Stat(path.Join(mountPoint, "test_file")); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func hash(c string) string {
+	h := sha256.New()
+	fmt.Fprint(h, c)
+	return hex.EncodeToString(h.Sum(nil))
+}
+
+func TestMountMoreThan42Layers(t *testing.T) {
+	d := newDriver(t)
+	defer os.RemoveAll(tmp)
+	defer d.Cleanup()
+	var last string
+	var expected int
+
+	for i := 1; i < 127; i++ {
+		expected++
+		var (
+			parent  = fmt.Sprintf("%d", i-1)
+			current = fmt.Sprintf("%d", i)
+		)
+
+		if parent == "0" {
+			parent = ""
+		} else {
+			parent = hash(parent)
+		}
+		current = hash(current)
+
+		if err := d.Create(current, parent, ""); err != nil {
+			t.Logf("Current layer %d", i)
+			t.Fatal(err)
+		}
+		point, err := d.Get(current)
+		if err != nil {
+			t.Logf("Current layer %d", i)
+			t.Fatal(err)
+		}
+		f, err := os.Create(path.Join(point, current))
+		if err != nil {
+			t.Logf("Current layer %d", i)
+			t.Fatal(err)
+		}
+		f.Close()
+
+		if i%10 == 0 {
+			if err := os.Remove(path.Join(point, parent)); err != nil {
+				t.Logf("Current layer %d", i)
+				t.Fatal(err)
+			}
+			expected--
+		}
+		last = current
+	}
+
+	// Perform the actual mount for the top most image
+	point, err := d.Get(last)
+	if err != nil {
+		t.Fatal(err)
+	}
+	files, err := ioutil.ReadDir(point)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(files) != expected {
+		t.Fatalf("Expected %d got %d", expected, len(files))
+	}
+}
diff --git a/runtime/graphdriver/aufs/dirs.go b/runtime/graphdriver/aufs/dirs.go
new file mode 100644
index 0000000000..fb9b81edd2
--- /dev/null
+++ b/runtime/graphdriver/aufs/dirs.go
@@ -0,0 +1,46 @@
+package aufs
+
+import (
+	"bufio"
+	"io/ioutil"
+	"os"
+	"path"
+)
+
+// Return all the directories
+func loadIds(root string) ([]string, error) {
+	dirs, err := ioutil.ReadDir(root)
+	if err != nil {
+		return nil, err
+	}
+	out := []string{}
+	for _, d := range dirs {
+		if !d.IsDir() {
+			out = append(out, d.Name())
+		}
+	}
+	return out, nil
+}
+
+// Read the layers file for the current id and return all the
+// layers represented by new lines in the file
+//
+// If there are no lines in the file then the id has no parent
+// and an empty slice is returned.
+func getParentIds(root, id string) ([]string, error) {
+	f, err := os.Open(path.Join(root, "layers", id))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	out := []string{}
+	s := bufio.NewScanner(f)
+
+	for s.Scan() {
+		if t := s.Text(); t != "" {
+			out = append(out, s.Text())
+		}
+	}
+	return out, s.Err()
+}
diff --git a/runtime/graphdriver/aufs/migrate.go b/runtime/graphdriver/aufs/migrate.go
new file mode 100644
index 0000000000..400e260797
--- /dev/null
+++ b/runtime/graphdriver/aufs/migrate.go
@@ -0,0 +1,194 @@
+package aufs
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path"
+)
+
+type metadata struct {
+	ID       string `json:"id"`
+	ParentID string `json:"parent,omitempty"`
+	Image    string `json:"Image,omitempty"`
+
+	parent *metadata
+}
+
+func pathExists(pth string) bool {
+	if _, err := os.Stat(pth); err != nil {
+		return false
+	}
+	return true
+}
+
+// Migrate existing images and containers from docker < 0.7.x
+//
+// The format pre 0.7 is for docker to store the metadata and filesystem
+// content in the same directory.  For the migration to work we need to move Image layer
+// data from /var/lib/docker/graph/<id>/layers to the diff of the registered id.
+//
+// Next we need to migrate the container's rw layer to diff of the driver.  After the
+// contents are migrated we need to register the image and container ids with the
+// driver.
+//
+// For the migration we try to move the folder containing the layer files, if that
+// fails because the data is currently mounted we will fallback to creating a
+// symlink.
+func (a *Driver) Migrate(pth string, setupInit func(p string) error) error {
+	if pathExists(path.Join(pth, "graph")) {
+		if err := a.migrateRepositories(pth); err != nil {
+			return err
+		}
+		if err := a.migrateImages(path.Join(pth, "graph")); err != nil {
+			return err
+		}
+		return a.migrateContainers(path.Join(pth, "containers"), setupInit)
+	}
+	return nil
+}
+
+func (a *Driver) migrateRepositories(pth string) error {
+	name := path.Join(pth, "repositories")
+	if err := os.Rename(name, name+"-aufs"); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return nil
+}
+
+func (a *Driver) migrateContainers(pth string, setupInit func(p string) error) error {
+	fis, err := ioutil.ReadDir(pth)
+	if err != nil {
+		return err
+	}
+
+	for _, fi := range fis {
+		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "rw")) {
+			if err := tryRelocate(path.Join(pth, id, "rw"), path.Join(a.rootPath(), "diff", id)); err != nil {
+				return err
+			}
+
+			if !a.Exists(id) {
+
+				metadata, err := loadMetadata(path.Join(pth, id, "config.json"))
+				if err != nil {
+					return err
+				}
+
+				initID := fmt.Sprintf("%s-init", id)
+				if err := a.Create(initID, metadata.Image, ""); err != nil {
+					return err
+				}
+
+				initPath, err := a.Get(initID)
+				if err != nil {
+					return err
+				}
+				// setup init layer
+				if err := setupInit(initPath); err != nil {
+					return err
+				}
+
+				if err := a.Create(id, initID, ""); err != nil {
+					return err
+				}
+			}
+		}
+	}
+	return nil
+}
+
+func (a *Driver) migrateImages(pth string) error {
+	fis, err := ioutil.ReadDir(pth)
+	if err != nil {
+		return err
+	}
+	var (
+		m       = make(map[string]*metadata)
+		current *metadata
+		exists  bool
+	)
+
+	for _, fi := range fis {
+		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "layer")) {
+			if current, exists = m[id]; !exists {
+				current, err = loadMetadata(path.Join(pth, id, "json"))
+				if err != nil {
+					return err
+				}
+				m[id] = current
+			}
+		}
+	}
+
+	for _, v := range m {
+		v.parent = m[v.ParentID]
+	}
+
+	migrated := make(map[string]bool)
+	for _, v := range m {
+		if err := a.migrateImage(v, pth, migrated); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (a *Driver) migrateImage(m *metadata, pth string, migrated map[string]bool) error {
+	if !migrated[m.ID] {
+		if m.parent != nil {
+			a.migrateImage(m.parent, pth, migrated)
+		}
+		if err := tryRelocate(path.Join(pth, m.ID, "layer"), path.Join(a.rootPath(), "diff", m.ID)); err != nil {
+			return err
+		}
+		if !a.Exists(m.ID) {
+			if err := a.Create(m.ID, m.ParentID, ""); err != nil {
+				return err
+			}
+		}
+		migrated[m.ID] = true
+	}
+	return nil
+}
+
+// tryRelocate will try to rename the old path to the new pack and if
+// the operation fails, it will fallback to a symlink
+func tryRelocate(oldPath, newPath string) error {
+	s, err := os.Lstat(newPath)
+	if err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	// If the destination is a symlink then we already tried to relocate once before
+	// and it failed so we delete it and try to remove
+	if s != nil && s.Mode()&os.ModeSymlink == os.ModeSymlink {
+		if err := os.RemoveAll(newPath); err != nil {
+			return err
+		}
+	}
+	if err := os.Rename(oldPath, newPath); err != nil {
+		if sErr := os.Symlink(oldPath, newPath); sErr != nil {
+			return fmt.Errorf("Unable to relocate %s to %s: Rename err %s Symlink err %s", oldPath, newPath, err, sErr)
+		}
+	}
+	return nil
+}
+
+func loadMetadata(pth string) (*metadata, error) {
+	f, err := os.Open(pth)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var (
+		out = &metadata{}
+		dec = json.NewDecoder(f)
+	)
+
+	if err := dec.Decode(out); err != nil {
+		return nil, err
+	}
+	return out, nil
+}
diff --git a/runtime/graphdriver/aufs/mount.go b/runtime/graphdriver/aufs/mount.go
new file mode 100644
index 0000000000..1f1d98f809
--- /dev/null
+++ b/runtime/graphdriver/aufs/mount.go
@@ -0,0 +1,17 @@
+package aufs
+
+import (
+	"github.com/dotcloud/docker/utils"
+	"os/exec"
+	"syscall"
+)
+
+func Unmount(target string) error {
+	if err := exec.Command("auplink", target, "flush").Run(); err != nil {
+		utils.Errorf("[warning]: couldn't run auplink before unmount: %s", err)
+	}
+	if err := syscall.Unmount(target, 0); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/runtime/graphdriver/aufs/mount_linux.go b/runtime/graphdriver/aufs/mount_linux.go
new file mode 100644
index 0000000000..6082d9f240
--- /dev/null
+++ b/runtime/graphdriver/aufs/mount_linux.go
@@ -0,0 +1,11 @@
+// +build amd64
+
+package aufs
+
+import "syscall"
+
+const MsRemount = syscall.MS_REMOUNT
+
+func mount(source string, target string, fstype string, flags uintptr, data string) error {
+	return syscall.Mount(source, target, fstype, flags, data)
+}
diff --git a/runtime/graphdriver/aufs/mount_unsupported.go b/runtime/graphdriver/aufs/mount_unsupported.go
new file mode 100644
index 0000000000..2735624112
--- /dev/null
+++ b/runtime/graphdriver/aufs/mount_unsupported.go
@@ -0,0 +1,11 @@
+// +build !linux !amd64
+
+package aufs
+
+import "errors"
+
+const MsRemount = 0
+
+func mount(source string, target string, fstype string, flags uintptr, data string) (err error) {
+	return errors.New("mount is not implemented on darwin")
+}
diff --git a/runtime/graphdriver/btrfs/btrfs.go b/runtime/graphdriver/btrfs/btrfs.go
new file mode 100644
index 0000000000..2a94a4089f
--- /dev/null
+++ b/runtime/graphdriver/btrfs/btrfs.go
@@ -0,0 +1,213 @@
+// +build linux,amd64
+
+package btrfs
+
+/*
+#include <stdlib.h>
+#include <dirent.h>
+#include <btrfs/ioctl.h>
+*/
+import "C"
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"os"
+	"path"
+	"syscall"
+	"unsafe"
+)
+
+func init() {
+	graphdriver.Register("btrfs", Init)
+}
+
+func Init(home string) (graphdriver.Driver, error) {
+	rootdir := path.Dir(home)
+
+	var buf syscall.Statfs_t
+	if err := syscall.Statfs(rootdir, &buf); err != nil {
+		return nil, err
+	}
+
+	if buf.Type != 0x9123683E {
+		return nil, fmt.Errorf("%s is not a btrfs filesystem", rootdir)
+	}
+
+	return &Driver{
+		home: home,
+	}, nil
+}
+
+type Driver struct {
+	home string
+}
+
+func (d *Driver) String() string {
+	return "btrfs"
+}
+
+func (d *Driver) Status() [][2]string {
+	return nil
+}
+
+func (d *Driver) Cleanup() error {
+	return nil
+}
+
+func free(p *C.char) {
+	C.free(unsafe.Pointer(p))
+}
+
+func openDir(path string) (*C.DIR, error) {
+	Cpath := C.CString(path)
+	defer free(Cpath)
+
+	dir := C.opendir(Cpath)
+	if dir == nil {
+		return nil, fmt.Errorf("Can't open dir")
+	}
+	return dir, nil
+}
+
+func closeDir(dir *C.DIR) {
+	if dir != nil {
+		C.closedir(dir)
+	}
+}
+
+func getDirFd(dir *C.DIR) uintptr {
+	return uintptr(C.dirfd(dir))
+}
+
+func subvolCreate(path, name string, mountLabel string) error {
+	dir, err := openDir(path)
+	if err != nil {
+		return err
+	}
+	defer closeDir(dir)
+
+	var args C.struct_btrfs_ioctl_vol_args
+	for i, c := range []byte(name) {
+		args.name[i] = C.char(c)
+	}
+
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE,
+		uintptr(unsafe.Pointer(&args)))
+	if errno != 0 {
+		return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error())
+	}
+	return nil
+}
+
+func subvolSnapshot(src, dest, name string) error {
+	srcDir, err := openDir(src)
+	if err != nil {
+		return err
+	}
+	defer closeDir(srcDir)
+
+	destDir, err := openDir(dest)
+	if err != nil {
+		return err
+	}
+	defer closeDir(destDir)
+
+	var args C.struct_btrfs_ioctl_vol_args_v2
+	args.fd = C.__s64(getDirFd(srcDir))
+	for i, c := range []byte(name) {
+		args.name[i] = C.char(c)
+	}
+
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2,
+		uintptr(unsafe.Pointer(&args)))
+	if errno != 0 {
+		return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error())
+	}
+	return nil
+}
+
+func subvolDelete(path, name string) error {
+	dir, err := openDir(path)
+	if err != nil {
+		return err
+	}
+	defer closeDir(dir)
+
+	var args C.struct_btrfs_ioctl_vol_args
+	for i, c := range []byte(name) {
+		args.name[i] = C.char(c)
+	}
+
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY,
+		uintptr(unsafe.Pointer(&args)))
+	if errno != 0 {
+		return fmt.Errorf("Failed to destroy btrfs snapshot: %v", errno.Error())
+	}
+	return nil
+}
+
+func (d *Driver) subvolumesDir() string {
+	return path.Join(d.home, "subvolumes")
+}
+
+func (d *Driver) subvolumesDirId(id string) string {
+	return path.Join(d.subvolumesDir(), id)
+}
+
+func (d *Driver) Create(id string, parent string, mountLabel string) error {
+	subvolumes := path.Join(d.home, "subvolumes")
+	if err := os.MkdirAll(subvolumes, 0700); err != nil {
+		return err
+	}
+	if parent == "" {
+		if err := subvolCreate(subvolumes, id, mountLabel); err != nil {
+			return err
+		}
+	} else {
+		parentDir, err := d.Get(parent)
+		if err != nil {
+			return err
+		}
+		if err := subvolSnapshot(parentDir, subvolumes, id); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (d *Driver) Remove(id string) error {
+	dir := d.subvolumesDirId(id)
+	if _, err := os.Stat(dir); err != nil {
+		return err
+	}
+	if err := subvolDelete(d.subvolumesDir(), id); err != nil {
+		return err
+	}
+	return os.RemoveAll(dir)
+}
+
+func (d *Driver) Get(id string) (string, error) {
+	dir := d.subvolumesDirId(id)
+	st, err := os.Stat(dir)
+	if err != nil {
+		return "", err
+	}
+
+	if !st.IsDir() {
+		return "", fmt.Errorf("%s: not a directory", dir)
+	}
+
+	return dir, nil
+}
+
+func (d *Driver) Put(id string) {
+	// Get() creates no runtime resources (like e.g. mounts)
+	// so this doesn't need to do anything.
+}
+
+func (d *Driver) Exists(id string) bool {
+	dir := d.subvolumesDirId(id)
+	_, err := os.Stat(dir)
+	return err == nil
+}
diff --git a/runtime/graphdriver/btrfs/dummy_unsupported.go b/runtime/graphdriver/btrfs/dummy_unsupported.go
new file mode 100644
index 0000000000..6c44615763
--- /dev/null
+++ b/runtime/graphdriver/btrfs/dummy_unsupported.go
@@ -0,0 +1,3 @@
+// +build !linux !amd64
+
+package btrfs
diff --git a/runtime/graphdriver/devmapper/attach_loopback.go b/runtime/graphdriver/devmapper/attach_loopback.go
new file mode 100644
index 0000000000..23339076e8
--- /dev/null
+++ b/runtime/graphdriver/devmapper/attach_loopback.go
@@ -0,0 +1,126 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/utils"
+)
+
+func stringToLoopName(src string) [LoNameSize]uint8 {
+	var dst [LoNameSize]uint8
+	copy(dst[:], src[:])
+	return dst
+}
+
+func getNextFreeLoopbackIndex() (int, error) {
+	f, err := osOpenFile("/dev/loop-control", osORdOnly, 0644)
+	if err != nil {
+		return 0, err
+	}
+	defer f.Close()
+
+	index, err := ioctlLoopCtlGetFree(f.Fd())
+	if index < 0 {
+		index = 0
+	}
+	return index, err
+}
+
+func openNextAvailableLoopback(index int, sparseFile *osFile) (loopFile *osFile, err error) {
+	// Start looking for a free /dev/loop
+	for {
+		target := fmt.Sprintf("/dev/loop%d", index)
+		index++
+
+		fi, err := osStat(target)
+		if err != nil {
+			if osIsNotExist(err) {
+				utils.Errorf("There are no more loopback device available.")
+			}
+			return nil, ErrAttachLoopbackDevice
+		}
+
+		if fi.Mode()&osModeDevice != osModeDevice {
+			utils.Errorf("Loopback device %s is not a block device.", target)
+			continue
+		}
+
+		// OpenFile adds O_CLOEXEC
+		loopFile, err = osOpenFile(target, osORdWr, 0644)
+		if err != nil {
+			utils.Errorf("Error openning loopback device: %s", err)
+			return nil, ErrAttachLoopbackDevice
+		}
+
+		// Try to attach to the loop file
+		if err := ioctlLoopSetFd(loopFile.Fd(), sparseFile.Fd()); err != nil {
+			loopFile.Close()
+
+			// If the error is EBUSY, then try the next loopback
+			if err != sysEBusy {
+				utils.Errorf("Cannot set up loopback device %s: %s", target, err)
+				return nil, ErrAttachLoopbackDevice
+			}
+
+			// Otherwise, we keep going with the loop
+			continue
+		}
+		// In case of success, we finished. Break the loop.
+		break
+	}
+
+	// This can't happen, but let's be sure
+	if loopFile == nil {
+		utils.Errorf("Unreachable code reached! Error attaching %s to a loopback device.", sparseFile.Name())
+		return nil, ErrAttachLoopbackDevice
+	}
+
+	return loopFile, nil
+}
+
+// attachLoopDevice attaches the given sparse file to the next
+// available loopback device. It returns an opened *osFile.
+func attachLoopDevice(sparseName string) (loop *osFile, err error) {
+
+	// Try to retrieve the next available loopback device via syscall.
+	// If it fails, we discard error and start loopking for a
+	// loopback from index 0.
+	startIndex, err := getNextFreeLoopbackIndex()
+	if err != nil {
+		utils.Debugf("Error retrieving the next available loopback: %s", err)
+	}
+
+	// OpenFile adds O_CLOEXEC
+	sparseFile, err := osOpenFile(sparseName, osORdWr, 0644)
+	if err != nil {
+		utils.Errorf("Error openning sparse file %s: %s", sparseName, err)
+		return nil, ErrAttachLoopbackDevice
+	}
+	defer sparseFile.Close()
+
+	loopFile, err := openNextAvailableLoopback(startIndex, sparseFile)
+	if err != nil {
+		return nil, err
+	}
+
+	// Set the status of the loopback device
+	loopInfo := &LoopInfo64{
+		loFileName: stringToLoopName(loopFile.Name()),
+		loOffset:   0,
+		loFlags:    LoFlagsAutoClear,
+	}
+
+	if err := ioctlLoopSetStatus64(loopFile.Fd(), loopInfo); err != nil {
+		utils.Errorf("Cannot set up loopback device info: %s", err)
+
+		// If the call failed, then free the loopback device
+		if err := ioctlLoopClrFd(loopFile.Fd()); err != nil {
+			utils.Errorf("Error while cleaning up the loopback device")
+		}
+		loopFile.Close()
+		return nil, ErrAttachLoopbackDevice
+	}
+
+	return loopFile, nil
+}
diff --git a/runtime/graphdriver/devmapper/deviceset.go b/runtime/graphdriver/devmapper/deviceset.go
new file mode 100644
index 0000000000..97d670a3d9
--- /dev/null
+++ b/runtime/graphdriver/devmapper/deviceset.go
@@ -0,0 +1,1122 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"github.com/dotcloud/docker/pkg/label"
+	"github.com/dotcloud/docker/utils"
+	"io"
+	"io/ioutil"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+)
+
+var (
+	DefaultDataLoopbackSize     int64  = 100 * 1024 * 1024 * 1024
+	DefaultMetaDataLoopbackSize int64  = 2 * 1024 * 1024 * 1024
+	DefaultBaseFsSize           uint64 = 10 * 1024 * 1024 * 1024
+)
+
+type DevInfo struct {
+	Hash          string     `json:"-"`
+	DeviceId      int        `json:"device_id"`
+	Size          uint64     `json:"size"`
+	TransactionId uint64     `json:"transaction_id"`
+	Initialized   bool       `json:"initialized"`
+	devices       *DeviceSet `json:"-"`
+
+	mountCount int    `json:"-"`
+	mountPath  string `json:"-"`
+	// A floating mount means one reference is not owned and
+	// will be stolen by the next mount. This allows us to
+	// avoid unmounting directly after creation before the
+	// first get (since we need to mount to set up the device
+	// a bit first).
+	floating bool `json:"-"`
+
+	// The global DeviceSet lock guarantees that we serialize all
+	// the calls to libdevmapper (which is not threadsafe), but we
+	// sometimes release that lock while sleeping. In that case
+	// this per-device lock is still held, protecting against
+	// other accesses to the device that we're doing the wait on.
+	//
+	// WARNING: In order to avoid AB-BA deadlocks when releasing
+	// the global lock while holding the per-device locks all
+	// device locks must be aquired *before* the device lock, and
+	// multiple device locks should be aquired parent before child.
+	lock sync.Mutex `json:"-"`
+}
+
+type MetaData struct {
+	Devices     map[string]*DevInfo `json:devices`
+	devicesLock sync.Mutex          `json:"-"` // Protects all read/writes to Devices map
+}
+
+type DeviceSet struct {
+	MetaData
+	sync.Mutex       // Protects Devices map and serializes calls into libdevmapper
+	root             string
+	devicePrefix     string
+	TransactionId    uint64
+	NewTransactionId uint64
+	nextFreeDevice   int
+	sawBusy          bool
+}
+
+type DiskUsage struct {
+	Used  uint64
+	Total uint64
+}
+
+type Status struct {
+	PoolName         string
+	DataLoopback     string
+	MetadataLoopback string
+	Data             DiskUsage
+	Metadata         DiskUsage
+	SectorSize       uint64
+}
+
+type DevStatus struct {
+	DeviceId            int
+	Size                uint64
+	TransactionId       uint64
+	SizeInSectors       uint64
+	MappedSectors       uint64
+	HighestMappedSector uint64
+}
+
+type UnmountMode int
+
+const (
+	UnmountRegular UnmountMode = iota
+	UnmountFloat
+	UnmountSink
+)
+
+func getDevName(name string) string {
+	return "/dev/mapper/" + name
+}
+
+func (info *DevInfo) Name() string {
+	hash := info.Hash
+	if hash == "" {
+		hash = "base"
+	}
+	return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash)
+}
+
+func (info *DevInfo) DevName() string {
+	return getDevName(info.Name())
+}
+
+func (devices *DeviceSet) loopbackDir() string {
+	return path.Join(devices.root, "devicemapper")
+}
+
+func (devices *DeviceSet) jsonFile() string {
+	return path.Join(devices.loopbackDir(), "json")
+}
+
+func (devices *DeviceSet) getPoolName() string {
+	return devices.devicePrefix + "-pool"
+}
+
+func (devices *DeviceSet) getPoolDevName() string {
+	return getDevName(devices.getPoolName())
+}
+
+func (devices *DeviceSet) hasImage(name string) bool {
+	dirname := devices.loopbackDir()
+	filename := path.Join(dirname, name)
+
+	_, err := osStat(filename)
+	return err == nil
+}
+
+// ensureImage creates a sparse file of <size> bytes at the path
+// <root>/devicemapper/<name>.
+// If the file already exists, it does nothing.
+// Either way it returns the full path.
+func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) {
+	dirname := devices.loopbackDir()
+	filename := path.Join(dirname, name)
+
+	if err := osMkdirAll(dirname, 0700); err != nil && !osIsExist(err) {
+		return "", err
+	}
+
+	if _, err := osStat(filename); err != nil {
+		if !osIsNotExist(err) {
+			return "", err
+		}
+		utils.Debugf("Creating loopback file %s for device-manage use", filename)
+		file, err := osOpenFile(filename, osORdWr|osOCreate, 0600)
+		if err != nil {
+			return "", err
+		}
+		defer file.Close()
+
+		if err = file.Truncate(size); err != nil {
+			return "", err
+		}
+	}
+	return filename, nil
+}
+
+func (devices *DeviceSet) allocateDeviceId() int {
+	// TODO: Add smarter reuse of deleted devices
+	id := devices.nextFreeDevice
+	devices.nextFreeDevice = devices.nextFreeDevice + 1
+	return id
+}
+
+func (devices *DeviceSet) allocateTransactionId() uint64 {
+	devices.NewTransactionId = devices.NewTransactionId + 1
+	return devices.NewTransactionId
+}
+
+func (devices *DeviceSet) saveMetadata() error {
+	devices.devicesLock.Lock()
+	jsonData, err := json.Marshal(devices.MetaData)
+	devices.devicesLock.Unlock()
+	if err != nil {
+		return fmt.Errorf("Error encoding metadata to json: %s", err)
+	}
+	tmpFile, err := ioutil.TempFile(filepath.Dir(devices.jsonFile()), ".json")
+	if err != nil {
+		return fmt.Errorf("Error creating metadata file: %s", err)
+	}
+
+	n, err := tmpFile.Write(jsonData)
+	if err != nil {
+		return fmt.Errorf("Error writing metadata to %s: %s", tmpFile.Name(), err)
+	}
+	if n < len(jsonData) {
+		return io.ErrShortWrite
+	}
+	if err := tmpFile.Sync(); err != nil {
+		return fmt.Errorf("Error syncing metadata file %s: %s", tmpFile.Name(), err)
+	}
+	if err := tmpFile.Close(); err != nil {
+		return fmt.Errorf("Error closing metadata file %s: %s", tmpFile.Name(), err)
+	}
+	if err := osRename(tmpFile.Name(), devices.jsonFile()); err != nil {
+		return fmt.Errorf("Error committing metadata file %s: %s", tmpFile.Name(), err)
+	}
+
+	if devices.NewTransactionId != devices.TransactionId {
+		if err = setTransactionId(devices.getPoolDevName(), devices.TransactionId, devices.NewTransactionId); err != nil {
+			return fmt.Errorf("Error setting devmapper transition ID: %s", err)
+		}
+		devices.TransactionId = devices.NewTransactionId
+	}
+	return nil
+}
+
+func (devices *DeviceSet) lookupDevice(hash string) (*DevInfo, error) {
+	devices.devicesLock.Lock()
+	defer devices.devicesLock.Unlock()
+	info := devices.Devices[hash]
+	if info == nil {
+		return nil, fmt.Errorf("Unknown device %s", hash)
+	}
+	return info, nil
+}
+
+func (devices *DeviceSet) registerDevice(id int, hash string, size uint64) (*DevInfo, error) {
+	utils.Debugf("registerDevice(%v, %v)", id, hash)
+	info := &DevInfo{
+		Hash:          hash,
+		DeviceId:      id,
+		Size:          size,
+		TransactionId: devices.allocateTransactionId(),
+		Initialized:   false,
+		devices:       devices,
+	}
+
+	devices.devicesLock.Lock()
+	devices.Devices[hash] = info
+	devices.devicesLock.Unlock()
+
+	if err := devices.saveMetadata(); err != nil {
+		// Try to remove unused device
+		devices.devicesLock.Lock()
+		delete(devices.Devices, hash)
+		devices.devicesLock.Unlock()
+		return nil, err
+	}
+
+	return info, nil
+}
+
+func (devices *DeviceSet) activateDeviceIfNeeded(info *DevInfo) error {
+	utils.Debugf("activateDeviceIfNeeded(%v)", info.Hash)
+
+	if devinfo, _ := getInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 {
+		return nil
+	}
+
+	return activateDevice(devices.getPoolDevName(), info.Name(), info.DeviceId, info.Size)
+}
+
+func (devices *DeviceSet) createFilesystem(info *DevInfo) error {
+	devname := info.DevName()
+
+	err := execRun("mkfs.ext4", "-E", "discard,lazy_itable_init=0,lazy_journal_init=0", devname)
+	if err != nil {
+		err = execRun("mkfs.ext4", "-E", "discard,lazy_itable_init=0", devname)
+	}
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	return nil
+}
+
+func (devices *DeviceSet) loadMetaData() error {
+	utils.Debugf("loadMetadata()")
+	defer utils.Debugf("loadMetadata END")
+	_, _, _, params, err := getStatus(devices.getPoolName())
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	if _, err := fmt.Sscanf(params, "%d", &devices.TransactionId); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	devices.NewTransactionId = devices.TransactionId
+
+	jsonData, err := ioutil.ReadFile(devices.jsonFile())
+	if err != nil && !osIsNotExist(err) {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	devices.MetaData.Devices = make(map[string]*DevInfo)
+	if jsonData != nil {
+		if err := json.Unmarshal(jsonData, &devices.MetaData); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	for hash, d := range devices.Devices {
+		d.Hash = hash
+		d.devices = devices
+
+		if d.DeviceId >= devices.nextFreeDevice {
+			devices.nextFreeDevice = d.DeviceId + 1
+		}
+
+		// If the transaction id is larger than the actual one we lost the device due to some crash
+		if d.TransactionId > devices.TransactionId {
+			utils.Debugf("Removing lost device %s with id %d", hash, d.TransactionId)
+			delete(devices.Devices, hash)
+		}
+	}
+	return nil
+}
+
+func (devices *DeviceSet) setupBaseImage() error {
+	oldInfo, _ := devices.lookupDevice("")
+	if oldInfo != nil && oldInfo.Initialized {
+		return nil
+	}
+
+	if oldInfo != nil && !oldInfo.Initialized {
+		utils.Debugf("Removing uninitialized base image")
+		if err := devices.deleteDevice(oldInfo); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	utils.Debugf("Initializing base device-manager snapshot")
+
+	id := devices.allocateDeviceId()
+
+	// Create initial device
+	if err := createDevice(devices.getPoolDevName(), id); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	utils.Debugf("Registering base device (id %v) with FS size %v", id, DefaultBaseFsSize)
+	info, err := devices.registerDevice(id, "", DefaultBaseFsSize)
+	if err != nil {
+		_ = deleteDevice(devices.getPoolDevName(), id)
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	utils.Debugf("Creating filesystem on base device-manager snapshot")
+
+	if err = devices.activateDeviceIfNeeded(info); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	if err := devices.createFilesystem(info); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	info.Initialized = true
+	if err = devices.saveMetadata(); err != nil {
+		info.Initialized = false
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func setCloseOnExec(name string) {
+	if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil {
+		for _, i := range fileInfos {
+			link, _ := osReadlink(filepath.Join("/proc/self/fd", i.Name()))
+			if link == name {
+				fd, err := strconv.Atoi(i.Name())
+				if err == nil {
+					sysCloseOnExec(fd)
+				}
+			}
+		}
+	}
+}
+
+func (devices *DeviceSet) log(level int, file string, line int, dmError int, message string) {
+	if level >= 7 {
+		return // Ignore _LOG_DEBUG
+	}
+
+	if strings.Contains(message, "busy") {
+		devices.sawBusy = true
+	}
+
+	utils.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message)
+}
+
+func major(device uint64) uint64 {
+	return (device >> 8) & 0xfff
+}
+
+func minor(device uint64) uint64 {
+	return (device & 0xff) | ((device >> 12) & 0xfff00)
+}
+
+func (devices *DeviceSet) ResizePool(size int64) error {
+	dirname := devices.loopbackDir()
+	datafilename := path.Join(dirname, "data")
+	metadatafilename := path.Join(dirname, "metadata")
+
+	datafile, err := osOpenFile(datafilename, osORdWr, 0)
+	if datafile == nil {
+		return err
+	}
+	defer datafile.Close()
+
+	fi, err := datafile.Stat()
+	if fi == nil {
+		return err
+	}
+
+	if fi.Size() > size {
+		return fmt.Errorf("Can't shrink file")
+	}
+
+	dataloopback := FindLoopDeviceFor(datafile)
+	if dataloopback == nil {
+		return fmt.Errorf("Unable to find loopback mount for: %s", datafilename)
+	}
+	defer dataloopback.Close()
+
+	metadatafile, err := osOpenFile(metadatafilename, osORdWr, 0)
+	if metadatafile == nil {
+		return err
+	}
+	defer metadatafile.Close()
+
+	metadataloopback := FindLoopDeviceFor(metadatafile)
+	if metadataloopback == nil {
+		return fmt.Errorf("Unable to find loopback mount for: %s", metadatafilename)
+	}
+	defer metadataloopback.Close()
+
+	// Grow loopback file
+	if err := datafile.Truncate(size); err != nil {
+		return fmt.Errorf("Unable to grow loopback file: %s", err)
+	}
+
+	// Reload size for loopback device
+	if err := LoopbackSetCapacity(dataloopback); err != nil {
+		return fmt.Errorf("Unable to update loopback capacity: %s", err)
+	}
+
+	// Suspend the pool
+	if err := suspendDevice(devices.getPoolName()); err != nil {
+		return fmt.Errorf("Unable to suspend pool: %s", err)
+	}
+
+	// Reload with the new block sizes
+	if err := reloadPool(devices.getPoolName(), dataloopback, metadataloopback); err != nil {
+		return fmt.Errorf("Unable to reload pool: %s", err)
+	}
+
+	// Resume the pool
+	if err := resumeDevice(devices.getPoolName()); err != nil {
+		return fmt.Errorf("Unable to resume pool: %s", err)
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) initDevmapper(doInit bool) error {
+	logInit(devices)
+
+	// Make sure the sparse images exist in <root>/devicemapper/data and
+	// <root>/devicemapper/metadata
+
+	hasData := devices.hasImage("data")
+	hasMetadata := devices.hasImage("metadata")
+
+	if !doInit && !hasData {
+		return errors.New("Loopback data file not found")
+	}
+
+	if !doInit && !hasMetadata {
+		return errors.New("Loopback metadata file not found")
+	}
+
+	createdLoopback := !hasData || !hasMetadata
+	data, err := devices.ensureImage("data", DefaultDataLoopbackSize)
+	if err != nil {
+		utils.Debugf("Error device ensureImage (data): %s\n", err)
+		return err
+	}
+	metadata, err := devices.ensureImage("metadata", DefaultMetaDataLoopbackSize)
+	if err != nil {
+		utils.Debugf("Error device ensureImage (metadata): %s\n", err)
+		return err
+	}
+
+	// Set the device prefix from the device id and inode of the docker root dir
+
+	st, err := osStat(devices.root)
+	if err != nil {
+		return fmt.Errorf("Error looking up dir %s: %s", devices.root, err)
+	}
+	sysSt := toSysStatT(st.Sys())
+	// "reg-" stands for "regular file".
+	// In the future we might use "dev-" for "device file", etc.
+	// docker-maj,min[-inode] stands for:
+	//	- Managed by docker
+	//	- The target of this device is at major <maj> and minor <min>
+	//	- If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself.
+	devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino)
+	utils.Debugf("Generated prefix: %s", devices.devicePrefix)
+
+	// Check for the existence of the device <prefix>-pool
+	utils.Debugf("Checking for existence of the pool '%s'", devices.getPoolName())
+	info, err := getInfo(devices.getPoolName())
+	if info == nil {
+		utils.Debugf("Error device getInfo: %s", err)
+		return err
+	}
+
+	// It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files
+	// that are not Close-on-exec, and lxc-start will die if it inherits any unexpected files,
+	// so we add this badhack to make sure it closes itself
+	setCloseOnExec("/dev/mapper/control")
+
+	// If the pool doesn't exist, create it
+	if info.Exists == 0 {
+		utils.Debugf("Pool doesn't exist. Creating it.")
+
+		dataFile, err := attachLoopDevice(data)
+		if err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		defer dataFile.Close()
+
+		metadataFile, err := attachLoopDevice(metadata)
+		if err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+		defer metadataFile.Close()
+
+		if err := createPool(devices.getPoolName(), dataFile, metadataFile); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	// If we didn't just create the data or metadata image, we need to
+	// load the metadata from the existing file.
+	if !createdLoopback {
+		if err = devices.loadMetaData(); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	// Setup the base image
+	if doInit {
+		if err := devices.setupBaseImage(); err != nil {
+			utils.Debugf("Error device setupBaseImage: %s\n", err)
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
+	baseInfo, err := devices.lookupDevice(baseHash)
+	if err != nil {
+		return err
+	}
+
+	baseInfo.lock.Lock()
+	defer baseInfo.lock.Unlock()
+
+	devices.Lock()
+	defer devices.Unlock()
+
+	if info, _ := devices.lookupDevice(hash); info != nil {
+		return fmt.Errorf("device %s already exists", hash)
+	}
+
+	deviceId := devices.allocateDeviceId()
+
+	if err := devices.createSnapDevice(devices.getPoolDevName(), deviceId, baseInfo.Name(), baseInfo.DeviceId); err != nil {
+		utils.Debugf("Error creating snap device: %s\n", err)
+		return err
+	}
+
+	if _, err := devices.registerDevice(deviceId, hash, baseInfo.Size); err != nil {
+		deleteDevice(devices.getPoolDevName(), deviceId)
+		utils.Debugf("Error registering device: %s\n", err)
+		return err
+	}
+	return nil
+}
+
+func (devices *DeviceSet) deleteDevice(info *DevInfo) error {
+	// This is a workaround for the kernel not discarding block so
+	// on the thin pool when we remove a thinp device, so we do it
+	// manually
+	if err := devices.activateDeviceIfNeeded(info); err == nil {
+		if err := BlockDeviceDiscard(info.DevName()); err != nil {
+			utils.Debugf("Error discarding block on device: %s (ignoring)\n", err)
+		}
+	}
+
+	devinfo, _ := getInfo(info.Name())
+	if devinfo != nil && devinfo.Exists != 0 {
+		if err := devices.removeDeviceAndWait(info.Name()); err != nil {
+			utils.Debugf("Error removing device: %s\n", err)
+			return err
+		}
+	}
+
+	if info.Initialized {
+		info.Initialized = false
+		if err := devices.saveMetadata(); err != nil {
+			utils.Debugf("Error saving meta data: %s\n", err)
+			return err
+		}
+	}
+
+	if err := deleteDevice(devices.getPoolDevName(), info.DeviceId); err != nil {
+		utils.Debugf("Error deleting device: %s\n", err)
+		return err
+	}
+
+	devices.allocateTransactionId()
+	devices.devicesLock.Lock()
+	delete(devices.Devices, info.Hash)
+	devices.devicesLock.Unlock()
+
+	if err := devices.saveMetadata(); err != nil {
+		devices.devicesLock.Lock()
+		devices.Devices[info.Hash] = info
+		devices.devicesLock.Unlock()
+		utils.Debugf("Error saving meta data: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) DeleteDevice(hash string) error {
+	info, err := devices.lookupDevice(hash)
+	if err != nil {
+		return err
+	}
+
+	info.lock.Lock()
+	defer info.lock.Unlock()
+
+	devices.Lock()
+	defer devices.Unlock()
+
+	return devices.deleteDevice(info)
+}
+
+func (devices *DeviceSet) deactivatePool() error {
+	utils.Debugf("[devmapper] deactivatePool()")
+	defer utils.Debugf("[devmapper] deactivatePool END")
+	devname := devices.getPoolDevName()
+	devinfo, err := getInfo(devname)
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	if devinfo.Exists != 0 {
+		return removeDevice(devname)
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) deactivateDevice(info *DevInfo) error {
+	utils.Debugf("[devmapper] deactivateDevice(%s)", info.Hash)
+	defer utils.Debugf("[devmapper] deactivateDevice END")
+
+	// Wait for the unmount to be effective,
+	// by watching the value of Info.OpenCount for the device
+	if err := devices.waitClose(info); err != nil {
+		utils.Errorf("Warning: error waiting for device %s to close: %s\n", info.Hash, err)
+	}
+
+	devinfo, err := getInfo(info.Name())
+	if err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	if devinfo.Exists != 0 {
+		if err := devices.removeDeviceAndWait(info.Name()); err != nil {
+			utils.Debugf("\n--->Err: %s\n", err)
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Issues the underlying dm remove operation and then waits
+// for it to finish.
+func (devices *DeviceSet) removeDeviceAndWait(devname string) error {
+	var err error
+
+	for i := 0; i < 1000; i++ {
+		devices.sawBusy = false
+		err = removeDevice(devname)
+		if err == nil {
+			break
+		}
+		if !devices.sawBusy {
+			return err
+		}
+
+		// If we see EBUSY it may be a transient error,
+		// sleep a bit a retry a few times.
+		devices.Unlock()
+		time.Sleep(10 * time.Millisecond)
+		devices.Lock()
+	}
+	if err != nil {
+		return err
+	}
+
+	if err := devices.waitRemove(devname); err != nil {
+		return err
+	}
+	return nil
+}
+
+// waitRemove blocks until either:
+// a) the device registered at <device_set_prefix>-<hash> is removed,
+// or b) the 10 second timeout expires.
+func (devices *DeviceSet) waitRemove(devname string) error {
+	utils.Debugf("[deviceset %s] waitRemove(%s)", devices.devicePrefix, devname)
+	defer utils.Debugf("[deviceset %s] waitRemove(%s) END", devices.devicePrefix, devname)
+	i := 0
+	for ; i < 1000; i += 1 {
+		devinfo, err := getInfo(devname)
+		if err != nil {
+			// If there is an error we assume the device doesn't exist.
+			// The error might actually be something else, but we can't differentiate.
+			return nil
+		}
+		if i%100 == 0 {
+			utils.Debugf("Waiting for removal of %s: exists=%d", devname, devinfo.Exists)
+		}
+		if devinfo.Exists == 0 {
+			break
+		}
+
+		devices.Unlock()
+		time.Sleep(10 * time.Millisecond)
+		devices.Lock()
+	}
+	if i == 1000 {
+		return fmt.Errorf("Timeout while waiting for device %s to be removed", devname)
+	}
+	return nil
+}
+
+// waitClose blocks until either:
+// a) the device registered at <device_set_prefix>-<hash> is closed,
+// or b) the 10 second timeout expires.
+func (devices *DeviceSet) waitClose(info *DevInfo) error {
+	i := 0
+	for ; i < 1000; i += 1 {
+		devinfo, err := getInfo(info.Name())
+		if err != nil {
+			return err
+		}
+		if i%100 == 0 {
+			utils.Debugf("Waiting for unmount of %s: opencount=%d", info.Hash, devinfo.OpenCount)
+		}
+		if devinfo.OpenCount == 0 {
+			break
+		}
+		devices.Unlock()
+		time.Sleep(10 * time.Millisecond)
+		devices.Lock()
+	}
+	if i == 1000 {
+		return fmt.Errorf("Timeout while waiting for device %s to close", info.Hash)
+	}
+	return nil
+}
+
+func (devices *DeviceSet) Shutdown() error {
+
+	utils.Debugf("[deviceset %s] shutdown()", devices.devicePrefix)
+	utils.Debugf("[devmapper] Shutting down DeviceSet: %s", devices.root)
+	defer utils.Debugf("[deviceset %s] shutdown END", devices.devicePrefix)
+
+	var devs []*DevInfo
+
+	devices.devicesLock.Lock()
+	for _, info := range devices.Devices {
+		devs = append(devs, info)
+	}
+	devices.devicesLock.Unlock()
+
+	for _, info := range devs {
+		info.lock.Lock()
+		if info.mountCount > 0 {
+			// We use MNT_DETACH here in case it is still busy in some running
+			// container. This means it'll go away from the global scope directly,
+			// and the device will be released when that container dies.
+			if err := sysUnmount(info.mountPath, syscall.MNT_DETACH); err != nil {
+				utils.Debugf("Shutdown unmounting %s, error: %s\n", info.mountPath, err)
+			}
+
+			devices.Lock()
+			if err := devices.deactivateDevice(info); err != nil {
+				utils.Debugf("Shutdown deactivate %s , error: %s\n", info.Hash, err)
+			}
+			devices.Unlock()
+		}
+		info.lock.Unlock()
+	}
+
+	info, _ := devices.lookupDevice("")
+	if info != nil {
+		info.lock.Lock()
+		devices.Lock()
+		if err := devices.deactivateDevice(info); err != nil {
+			utils.Debugf("Shutdown deactivate base , error: %s\n", err)
+		}
+		devices.Unlock()
+		info.lock.Unlock()
+	}
+
+	devices.Lock()
+	if err := devices.deactivatePool(); err != nil {
+		utils.Debugf("Shutdown deactivate pool , error: %s\n", err)
+	}
+	devices.Unlock()
+
+	return nil
+}
+
+func (devices *DeviceSet) MountDevice(hash, path string, mountLabel string) error {
+	info, err := devices.lookupDevice(hash)
+	if err != nil {
+		return err
+	}
+
+	info.lock.Lock()
+	defer info.lock.Unlock()
+
+	devices.Lock()
+	defer devices.Unlock()
+
+	if info.mountCount > 0 {
+		if path != info.mountPath {
+			return fmt.Errorf("Trying to mount devmapper device in multple places (%s, %s)", info.mountPath, path)
+		}
+
+		if info.floating {
+			// Steal floating ref
+			info.floating = false
+		} else {
+			info.mountCount++
+		}
+		return nil
+	}
+
+	if err := devices.activateDeviceIfNeeded(info); err != nil {
+		return fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
+	}
+
+	var flags uintptr = sysMsMgcVal
+
+	mountOptions := label.FormatMountLabel("discard", mountLabel)
+	err = sysMount(info.DevName(), path, "ext4", flags, mountOptions)
+	if err != nil && err == sysEInval {
+		mountOptions = label.FormatMountLabel(mountLabel, "")
+		err = sysMount(info.DevName(), path, "ext4", flags, mountOptions)
+	}
+	if err != nil {
+		return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), path, err)
+	}
+
+	info.mountCount = 1
+	info.mountPath = path
+	info.floating = false
+
+	return devices.setInitialized(info)
+}
+
+func (devices *DeviceSet) UnmountDevice(hash string, mode UnmountMode) error {
+	utils.Debugf("[devmapper] UnmountDevice(hash=%s, mode=%d)", hash, mode)
+	defer utils.Debugf("[devmapper] UnmountDevice END")
+
+	info, err := devices.lookupDevice(hash)
+	if err != nil {
+		return err
+	}
+
+	info.lock.Lock()
+	defer info.lock.Unlock()
+
+	devices.Lock()
+	defer devices.Unlock()
+
+	if mode == UnmountFloat {
+		if info.floating {
+			return fmt.Errorf("UnmountDevice: can't float floating reference %s\n", hash)
+		}
+
+		// Leave this reference floating
+		info.floating = true
+		return nil
+	}
+
+	if mode == UnmountSink {
+		if !info.floating {
+			// Someone already sunk this
+			return nil
+		}
+		// Otherwise, treat this as a regular unmount
+	}
+
+	if info.mountCount == 0 {
+		return fmt.Errorf("UnmountDevice: device not-mounted id %s\n", hash)
+	}
+
+	info.mountCount--
+	if info.mountCount > 0 {
+		return nil
+	}
+
+	utils.Debugf("[devmapper] Unmount(%s)", info.mountPath)
+	if err := sysUnmount(info.mountPath, 0); err != nil {
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+	utils.Debugf("[devmapper] Unmount done")
+
+	if err := devices.deactivateDevice(info); err != nil {
+		return err
+	}
+
+	info.mountPath = ""
+
+	return nil
+}
+
+func (devices *DeviceSet) HasDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info, _ := devices.lookupDevice(hash)
+	return info != nil
+}
+
+func (devices *DeviceSet) HasInitializedDevice(hash string) bool {
+	devices.Lock()
+	defer devices.Unlock()
+
+	info, _ := devices.lookupDevice(hash)
+	return info != nil && info.Initialized
+}
+
+func (devices *DeviceSet) HasActivatedDevice(hash string) bool {
+	info, _ := devices.lookupDevice(hash)
+	if info == nil {
+		return false
+	}
+
+	info.lock.Lock()
+	defer info.lock.Unlock()
+
+	devices.Lock()
+	defer devices.Unlock()
+
+	devinfo, _ := getInfo(info.Name())
+	return devinfo != nil && devinfo.Exists != 0
+}
+
+func (devices *DeviceSet) setInitialized(info *DevInfo) error {
+	info.Initialized = true
+	if err := devices.saveMetadata(); err != nil {
+		info.Initialized = false
+		utils.Debugf("\n--->Err: %s\n", err)
+		return err
+	}
+
+	return nil
+}
+
+func (devices *DeviceSet) List() []string {
+	devices.Lock()
+	defer devices.Unlock()
+
+	devices.devicesLock.Lock()
+	ids := make([]string, len(devices.Devices))
+	i := 0
+	for k := range devices.Devices {
+		ids[i] = k
+		i++
+	}
+	devices.devicesLock.Unlock()
+
+	return ids
+}
+
+func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) {
+	var params string
+	_, sizeInSectors, _, params, err = getStatus(devName)
+	if err != nil {
+		return
+	}
+	if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil {
+		return
+	}
+	return
+}
+
+func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
+	info, err := devices.lookupDevice(hash)
+	if err != nil {
+		return nil, err
+	}
+
+	info.lock.Lock()
+	defer info.lock.Unlock()
+
+	devices.Lock()
+	defer devices.Unlock()
+
+	status := &DevStatus{
+		DeviceId:      info.DeviceId,
+		Size:          info.Size,
+		TransactionId: info.TransactionId,
+	}
+
+	if err := devices.activateDeviceIfNeeded(info); err != nil {
+		return nil, fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
+	}
+
+	if sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName()); err != nil {
+		return nil, err
+	} else {
+		status.SizeInSectors = sizeInSectors
+		status.MappedSectors = mappedSectors
+		status.HighestMappedSector = highestMappedSector
+	}
+
+	return status, nil
+}
+
+func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionId, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) {
+	var params string
+	if _, totalSizeInSectors, _, params, err = getStatus(devices.getPoolName()); err == nil {
+		_, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionId, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal)
+	}
+	return
+}
+
+func (devices *DeviceSet) Status() *Status {
+	devices.Lock()
+	defer devices.Unlock()
+
+	status := &Status{}
+
+	status.PoolName = devices.getPoolName()
+	status.DataLoopback = path.Join(devices.loopbackDir(), "data")
+	status.MetadataLoopback = path.Join(devices.loopbackDir(), "metadata")
+
+	totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus()
+	if err == nil {
+		// Convert from blocks to bytes
+		blockSizeInSectors := totalSizeInSectors / dataTotal
+
+		status.Data.Used = dataUsed * blockSizeInSectors * 512
+		status.Data.Total = dataTotal * blockSizeInSectors * 512
+
+		// metadata blocks are always 4k
+		status.Metadata.Used = metadataUsed * 4096
+		status.Metadata.Total = metadataTotal * 4096
+
+		status.SectorSize = blockSizeInSectors * 512
+	}
+
+	return status
+}
+
+func NewDeviceSet(root string, doInit bool) (*DeviceSet, error) {
+	SetDevDir("/dev")
+
+	devices := &DeviceSet{
+		root:     root,
+		MetaData: MetaData{Devices: make(map[string]*DevInfo)},
+	}
+
+	if err := devices.initDevmapper(doInit); err != nil {
+		return nil, err
+	}
+
+	return devices, nil
+}
diff --git a/runtime/graphdriver/devmapper/devmapper.go b/runtime/graphdriver/devmapper/devmapper.go
new file mode 100644
index 0000000000..7317118dcf
--- /dev/null
+++ b/runtime/graphdriver/devmapper/devmapper.go
@@ -0,0 +1,595 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"errors"
+	"fmt"
+	"github.com/dotcloud/docker/utils"
+	"runtime"
+	"syscall"
+)
+
+type DevmapperLogger interface {
+	log(level int, file string, line int, dmError int, message string)
+}
+
+const (
+	DeviceCreate TaskType = iota
+	DeviceReload
+	DeviceRemove
+	DeviceRemoveAll
+	DeviceSuspend
+	DeviceResume
+	DeviceInfo
+	DeviceDeps
+	DeviceRename
+	DeviceVersion
+	DeviceStatus
+	DeviceTable
+	DeviceWaitevent
+	DeviceList
+	DeviceClear
+	DeviceMknodes
+	DeviceListVersions
+	DeviceTargetMsg
+	DeviceSetGeometry
+)
+
+const (
+	AddNodeOnResume AddNodeType = iota
+	AddNodeOnCreate
+)
+
+var (
+	ErrTaskRun                = errors.New("dm_task_run failed")
+	ErrTaskSetName            = errors.New("dm_task_set_name failed")
+	ErrTaskSetMessage         = errors.New("dm_task_set_message failed")
+	ErrTaskSetAddNode         = errors.New("dm_task_set_add_node failed")
+	ErrTaskSetRo              = errors.New("dm_task_set_ro failed")
+	ErrTaskAddTarget          = errors.New("dm_task_add_target failed")
+	ErrTaskSetSector          = errors.New("dm_task_set_sector failed")
+	ErrTaskGetInfo            = errors.New("dm_task_get_info failed")
+	ErrTaskSetCookie          = errors.New("dm_task_set_cookie failed")
+	ErrNilCookie              = errors.New("cookie ptr can't be nil")
+	ErrAttachLoopbackDevice   = errors.New("loopback mounting failed")
+	ErrGetBlockSize           = errors.New("Can't get block size")
+	ErrUdevWait               = errors.New("wait on udev cookie failed")
+	ErrSetDevDir              = errors.New("dm_set_dev_dir failed")
+	ErrGetLibraryVersion      = errors.New("dm_get_library_version failed")
+	ErrCreateRemoveTask       = errors.New("Can't create task of type DeviceRemove")
+	ErrRunRemoveDevice        = errors.New("running removeDevice failed")
+	ErrInvalidAddNode         = errors.New("Invalide AddNoce type")
+	ErrGetLoopbackBackingFile = errors.New("Unable to get loopback backing file")
+	ErrLoopbackSetCapacity    = errors.New("Unable set loopback capacity")
+)
+
+type (
+	Task struct {
+		unmanaged *CDmTask
+	}
+	Info struct {
+		Exists        int
+		Suspended     int
+		LiveTable     int
+		InactiveTable int
+		OpenCount     int32
+		EventNr       uint32
+		Major         uint32
+		Minor         uint32
+		ReadOnly      int
+		TargetCount   int32
+	}
+	TaskType    int
+	AddNodeType int
+)
+
+func (t *Task) destroy() {
+	if t != nil {
+		DmTaskDestroy(t.unmanaged)
+		runtime.SetFinalizer(t, nil)
+	}
+}
+
+func TaskCreate(tasktype TaskType) *Task {
+	Ctask := DmTaskCreate(int(tasktype))
+	if Ctask == nil {
+		return nil
+	}
+	task := &Task{unmanaged: Ctask}
+	runtime.SetFinalizer(task, (*Task).destroy)
+	return task
+}
+
+func (t *Task) Run() error {
+	if res := DmTaskRun(t.unmanaged); res != 1 {
+		return ErrTaskRun
+	}
+	return nil
+}
+
+func (t *Task) SetName(name string) error {
+	if res := DmTaskSetName(t.unmanaged, name); res != 1 {
+		return ErrTaskSetName
+	}
+	return nil
+}
+
+func (t *Task) SetMessage(message string) error {
+	if res := DmTaskSetMessage(t.unmanaged, message); res != 1 {
+		return ErrTaskSetMessage
+	}
+	return nil
+}
+
+func (t *Task) SetSector(sector uint64) error {
+	if res := DmTaskSetSector(t.unmanaged, sector); res != 1 {
+		return ErrTaskSetSector
+	}
+	return nil
+}
+
+func (t *Task) SetCookie(cookie *uint, flags uint16) error {
+	if cookie == nil {
+		return ErrNilCookie
+	}
+	if res := DmTaskSetCookie(t.unmanaged, cookie, flags); res != 1 {
+		return ErrTaskSetCookie
+	}
+	return nil
+}
+
+func (t *Task) SetAddNode(addNode AddNodeType) error {
+	if addNode != AddNodeOnResume && addNode != AddNodeOnCreate {
+		return ErrInvalidAddNode
+	}
+	if res := DmTaskSetAddNode(t.unmanaged, addNode); res != 1 {
+		return ErrTaskSetAddNode
+	}
+	return nil
+}
+
+func (t *Task) SetRo() error {
+	if res := DmTaskSetRo(t.unmanaged); res != 1 {
+		return ErrTaskSetRo
+	}
+	return nil
+}
+
+func (t *Task) AddTarget(start, size uint64, ttype, params string) error {
+	if res := DmTaskAddTarget(t.unmanaged, start, size,
+		ttype, params); res != 1 {
+		return ErrTaskAddTarget
+	}
+	return nil
+}
+
+func (t *Task) GetInfo() (*Info, error) {
+	info := &Info{}
+	if res := DmTaskGetInfo(t.unmanaged, info); res != 1 {
+		return nil, ErrTaskGetInfo
+	}
+	return info, nil
+}
+
+func (t *Task) GetNextTarget(next uintptr) (nextPtr uintptr, start uint64,
+	length uint64, targetType string, params string) {
+
+	return DmGetNextTarget(t.unmanaged, next, &start, &length,
+			&targetType, &params),
+		start, length, targetType, params
+}
+
+func getLoopbackBackingFile(file *osFile) (uint64, uint64, error) {
+	loopInfo, err := ioctlLoopGetStatus64(file.Fd())
+	if err != nil {
+		utils.Errorf("Error get loopback backing file: %s\n", err)
+		return 0, 0, ErrGetLoopbackBackingFile
+	}
+	return loopInfo.loDevice, loopInfo.loInode, nil
+}
+
+func LoopbackSetCapacity(file *osFile) error {
+	if err := ioctlLoopSetCapacity(file.Fd(), 0); err != nil {
+		utils.Errorf("Error loopbackSetCapacity: %s", err)
+		return ErrLoopbackSetCapacity
+	}
+	return nil
+}
+
+func FindLoopDeviceFor(file *osFile) *osFile {
+	stat, err := file.Stat()
+	if err != nil {
+		return nil
+	}
+	targetInode := stat.Sys().(*sysStatT).Ino
+	targetDevice := stat.Sys().(*sysStatT).Dev
+
+	for i := 0; true; i++ {
+		path := fmt.Sprintf("/dev/loop%d", i)
+
+		file, err := osOpenFile(path, osORdWr, 0)
+		if err != nil {
+			if osIsNotExist(err) {
+				return nil
+			}
+
+			// Ignore all errors until the first not-exist
+			// we want to continue looking for the file
+			continue
+		}
+
+		dev, inode, err := getLoopbackBackingFile(file)
+		if err == nil && dev == targetDevice && inode == targetInode {
+			return file
+		}
+		file.Close()
+	}
+
+	return nil
+}
+
+func UdevWait(cookie uint) error {
+	if res := DmUdevWait(cookie); res != 1 {
+		utils.Debugf("Failed to wait on udev cookie %d", cookie)
+		return ErrUdevWait
+	}
+	return nil
+}
+
+func LogInitVerbose(level int) {
+	DmLogInitVerbose(level)
+}
+
+var dmLogger DevmapperLogger = nil
+
+func logInit(logger DevmapperLogger) {
+	dmLogger = logger
+	LogWithErrnoInit()
+}
+
+func SetDevDir(dir string) error {
+	if res := DmSetDevDir(dir); res != 1 {
+		utils.Debugf("Error dm_set_dev_dir")
+		return ErrSetDevDir
+	}
+	return nil
+}
+
+func GetLibraryVersion() (string, error) {
+	var version string
+	if res := DmGetLibraryVersion(&version); res != 1 {
+		return "", ErrGetLibraryVersion
+	}
+	return version, nil
+}
+
+// Useful helper for cleanup
+func RemoveDevice(name string) error {
+	task := TaskCreate(DeviceRemove)
+	if task == nil {
+		return ErrCreateRemoveTask
+	}
+	if err := task.SetName(name); err != nil {
+		utils.Debugf("Can't set task name %s", name)
+		return err
+	}
+	if err := task.Run(); err != nil {
+		return ErrRunRemoveDevice
+	}
+	return nil
+}
+
+func GetBlockDeviceSize(file *osFile) (uint64, error) {
+	size, err := ioctlBlkGetSize64(file.Fd())
+	if err != nil {
+		utils.Errorf("Error getblockdevicesize: %s", err)
+		return 0, ErrGetBlockSize
+	}
+	return uint64(size), nil
+}
+
+func BlockDeviceDiscard(path string) error {
+	file, err := osOpenFile(path, osORdWr, 0)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	size, err := GetBlockDeviceSize(file)
+	if err != nil {
+		return err
+	}
+
+	if err := ioctlBlkDiscard(file.Fd(), 0, size); err != nil {
+		return err
+	}
+
+	// Without this sometimes the remove of the device that happens after
+	// discard fails with EBUSY.
+	syscall.Sync()
+
+	return nil
+}
+
+// This is the programmatic example of "dmsetup create"
+func createPool(poolName string, dataFile, metadataFile *osFile) error {
+	task, err := createTask(DeviceCreate, poolName)
+	if task == nil {
+		return err
+	}
+
+	size, err := GetBlockDeviceSize(dataFile)
+	if err != nil {
+		return fmt.Errorf("Can't get data size")
+	}
+
+	params := metadataFile.Name() + " " + dataFile.Name() + " 128 32768 1 skip_block_zeroing"
+	if err := task.AddTarget(0, size/512, "thin-pool", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate (createPool)")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func reloadPool(poolName string, dataFile, metadataFile *osFile) error {
+	task, err := createTask(DeviceReload, poolName)
+	if task == nil {
+		return err
+	}
+
+	size, err := GetBlockDeviceSize(dataFile)
+	if err != nil {
+		return fmt.Errorf("Can't get data size")
+	}
+
+	params := metadataFile.Name() + " " + dataFile.Name() + " 128 32768"
+	if err := task.AddTarget(0, size/512, "thin-pool", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate")
+	}
+
+	return nil
+}
+
+func createTask(t TaskType, name string) (*Task, error) {
+	task := TaskCreate(t)
+	if task == nil {
+		return nil, fmt.Errorf("Can't create task of type %d", int(t))
+	}
+	if err := task.SetName(name); err != nil {
+		return nil, fmt.Errorf("Can't set task name %s", name)
+	}
+	return task, nil
+}
+
+func getInfo(name string) (*Info, error) {
+	task, err := createTask(DeviceInfo, name)
+	if task == nil {
+		return nil, err
+	}
+	if err := task.Run(); err != nil {
+		return nil, err
+	}
+	return task.GetInfo()
+}
+
+func getStatus(name string) (uint64, uint64, string, string, error) {
+	task, err := createTask(DeviceStatus, name)
+	if task == nil {
+		utils.Debugf("getStatus: Error createTask: %s", err)
+		return 0, 0, "", "", err
+	}
+	if err := task.Run(); err != nil {
+		utils.Debugf("getStatus: Error Run: %s", err)
+		return 0, 0, "", "", err
+	}
+
+	devinfo, err := task.GetInfo()
+	if err != nil {
+		utils.Debugf("getStatus: Error GetInfo: %s", err)
+		return 0, 0, "", "", err
+	}
+	if devinfo.Exists == 0 {
+		utils.Debugf("getStatus: Non existing device %s", name)
+		return 0, 0, "", "", fmt.Errorf("Non existing device %s", name)
+	}
+
+	_, start, length, targetType, params := task.GetNextTarget(0)
+	return start, length, targetType, params, nil
+}
+
+func setTransactionId(poolName string, oldId uint64, newId uint64) error {
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("set_transaction_id %d %d", oldId, newId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running setTransactionId")
+	}
+	return nil
+}
+
+func suspendDevice(name string) error {
+	task, err := createTask(DeviceSuspend, name)
+	if task == nil {
+		return err
+	}
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceSuspend: %s", err)
+	}
+	return nil
+}
+
+func resumeDevice(name string) error {
+	task, err := createTask(DeviceResume, name)
+	if task == nil {
+		return err
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceResume")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func createDevice(poolName string, deviceId int) error {
+	utils.Debugf("[devmapper] createDevice(poolName=%v, deviceId=%v)", poolName, deviceId)
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("create_thin %d", deviceId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running createDevice")
+	}
+	return nil
+}
+
+func deleteDevice(poolName string, deviceId int) error {
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("delete %d", deviceId)); err != nil {
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running deleteDevice")
+	}
+	return nil
+}
+
+func removeDevice(name string) error {
+	utils.Debugf("[devmapper] removeDevice START")
+	defer utils.Debugf("[devmapper] removeDevice END")
+	task, err := createTask(DeviceRemove, name)
+	if task == nil {
+		return err
+	}
+	if err = task.Run(); err != nil {
+		return fmt.Errorf("Error running removeDevice")
+	}
+	return nil
+}
+
+func activateDevice(poolName string, name string, deviceId int, size uint64) error {
+	task, err := createTask(DeviceCreate, name)
+	if task == nil {
+		return err
+	}
+
+	params := fmt.Sprintf("%s %d", poolName, deviceId)
+	if err := task.AddTarget(0, size/512, "thin", params); err != nil {
+		return fmt.Errorf("Can't add target")
+	}
+	if err := task.SetAddNode(AddNodeOnCreate); err != nil {
+		return fmt.Errorf("Can't add node")
+	}
+
+	var cookie uint = 0
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		return fmt.Errorf("Can't set cookie")
+	}
+
+	if err := task.Run(); err != nil {
+		return fmt.Errorf("Error running DeviceCreate (activateDevice)")
+	}
+
+	UdevWait(cookie)
+
+	return nil
+}
+
+func (devices *DeviceSet) createSnapDevice(poolName string, deviceId int, baseName string, baseDeviceId int) error {
+	devinfo, _ := getInfo(baseName)
+	doSuspend := devinfo != nil && devinfo.Exists != 0
+
+	if doSuspend {
+		if err := suspendDevice(baseName); err != nil {
+			return err
+		}
+	}
+
+	task, err := createTask(DeviceTargetMsg, poolName)
+	if task == nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return err
+	}
+
+	if err := task.SetSector(0); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Can't set sector")
+	}
+
+	if err := task.SetMessage(fmt.Sprintf("create_snap %d %d", deviceId, baseDeviceId)); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Can't set message")
+	}
+
+	if err := task.Run(); err != nil {
+		if doSuspend {
+			resumeDevice(baseName)
+		}
+		return fmt.Errorf("Error running DeviceCreate (createSnapDevice)")
+	}
+
+	if doSuspend {
+		if err := resumeDevice(baseName); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/runtime/graphdriver/devmapper/devmapper_doc.go b/runtime/graphdriver/devmapper/devmapper_doc.go
new file mode 100644
index 0000000000..c1c3e3891b
--- /dev/null
+++ b/runtime/graphdriver/devmapper/devmapper_doc.go
@@ -0,0 +1,106 @@
+package devmapper
+
+// Definition of struct dm_task and sub structures (from lvm2)
+//
+// struct dm_ioctl {
+// 	/*
+// 	 * The version number is made up of three parts:
+// 	 * major - no backward or forward compatibility,
+// 	 * minor - only backwards compatible,
+// 	 * patch - both backwards and forwards compatible.
+// 	 *
+// 	 * All clients of the ioctl interface should fill in the
+// 	 * version number of the interface that they were
+// 	 * compiled with.
+// 	 *
+// 	 * All recognised ioctl commands (ie. those that don't
+// 	 * return -ENOTTY) fill out this field, even if the
+// 	 * command failed.
+// 	 */
+// 	uint32_t version[3];	/* in/out */
+// 	uint32_t data_size;	/* total size of data passed in
+// 				 * including this struct */
+
+// 	uint32_t data_start;	/* offset to start of data
+// 				 * relative to start of this struct */
+
+// 	uint32_t target_count;	/* in/out */
+// 	int32_t open_count;	/* out */
+// 	uint32_t flags;		/* in/out */
+
+// 	/*
+// 	 * event_nr holds either the event number (input and output) or the
+// 	 * udev cookie value (input only).
+// 	 * The DM_DEV_WAIT ioctl takes an event number as input.
+// 	 * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+// 	 * use the field as a cookie to return in the DM_COOKIE
+// 	 * variable with the uevents they issue.
+// 	 * For output, the ioctls return the event number, not the cookie.
+// 	 */
+// 	uint32_t event_nr;      	/* in/out */
+// 	uint32_t padding;
+
+// 	uint64_t dev;		/* in/out */
+
+// 	char name[DM_NAME_LEN];	/* device name */
+// 	char uuid[DM_UUID_LEN];	/* unique identifier for
+// 				 * the block device */
+// 	char data[7];		/* padding or data */
+// };
+
+// struct target {
+// 	uint64_t start;
+// 	uint64_t length;
+// 	char *type;
+// 	char *params;
+
+// 	struct target *next;
+// };
+
+// typedef enum {
+// 	DM_ADD_NODE_ON_RESUME, /* add /dev/mapper node with dmsetup resume */
+// 	DM_ADD_NODE_ON_CREATE  /* add /dev/mapper node with dmsetup create */
+// } dm_add_node_t;
+
+// struct dm_task {
+// 	int type;
+// 	char *dev_name;
+// 	char *mangled_dev_name;
+
+// 	struct target *head, *tail;
+
+// 	int read_only;
+// 	uint32_t event_nr;
+// 	int major;
+// 	int minor;
+// 	int allow_default_major_fallback;
+// 	uid_t uid;
+// 	gid_t gid;
+// 	mode_t mode;
+// 	uint32_t read_ahead;
+// 	uint32_t read_ahead_flags;
+// 	union {
+// 		struct dm_ioctl *v4;
+// 	} dmi;
+// 	char *newname;
+// 	char *message;
+// 	char *geometry;
+// 	uint64_t sector;
+// 	int no_flush;
+// 	int no_open_count;
+// 	int skip_lockfs;
+// 	int query_inactive_table;
+// 	int suppress_identical_reload;
+// 	dm_add_node_t add_node;
+// 	uint64_t existing_table_size;
+// 	int cookie_set;
+// 	int new_uuid;
+// 	int secure_data;
+// 	int retry_remove;
+// 	int enable_checks;
+// 	int expected_errno;
+
+// 	char *uuid;
+// 	char *mangled_uuid;
+// };
+//
diff --git a/runtime/graphdriver/devmapper/devmapper_log.go b/runtime/graphdriver/devmapper/devmapper_log.go
new file mode 100644
index 0000000000..18dde7cca5
--- /dev/null
+++ b/runtime/graphdriver/devmapper/devmapper_log.go
@@ -0,0 +1,15 @@
+// +build linux,amd64
+
+package devmapper
+
+import "C"
+
+// Due to the way cgo works this has to be in a separate file, as devmapper.go has
+// definitions in the cgo block, which is incompatible with using "//export"
+
+//export DevmapperLogCallback
+func DevmapperLogCallback(level C.int, file *C.char, line C.int, dm_errno_or_class C.int, message *C.char) {
+	if dmLogger != nil {
+		dmLogger.log(int(level), C.GoString(file), int(line), int(dm_errno_or_class), C.GoString(message))
+	}
+}
diff --git a/runtime/graphdriver/devmapper/devmapper_test.go b/runtime/graphdriver/devmapper/devmapper_test.go
new file mode 100644
index 0000000000..3ffa163ceb
--- /dev/null
+++ b/runtime/graphdriver/devmapper/devmapper_test.go
@@ -0,0 +1,287 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"testing"
+)
+
+func TestTaskCreate(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	// Test success
+	taskCreate(t, DeviceInfo)
+
+	// Test Failure
+	DmTaskCreate = dmTaskCreateFail
+	defer func() { DmTaskCreate = dmTaskCreateFct }()
+	if task := TaskCreate(-1); task != nil {
+		t.Fatalf("An error should have occured while creating an invalid task.")
+	}
+}
+
+func TestTaskRun(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	// Perform the RUN
+	if err := task.Run(); err != nil {
+		t.Fatal(err)
+	}
+	// Make sure we don't have error with GetInfo
+	if _, err := task.GetInfo(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskRun = dmTaskRunFail
+	defer func() { DmTaskRun = dmTaskRunFct }()
+
+	task = taskCreate(t, DeviceInfo)
+	// Perform the RUN
+	if err := task.Run(); err != ErrTaskRun {
+		t.Fatalf("An error should have occured while running task.")
+	}
+	// Make sure GetInfo also fails
+	if _, err := task.GetInfo(); err != ErrTaskGetInfo {
+		t.Fatalf("GetInfo should fail if task.Run() failed.")
+	}
+}
+
+func TestTaskSetName(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetName("test"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetName = dmTaskSetNameFail
+	defer func() { DmTaskSetName = dmTaskSetNameFct }()
+
+	if err := task.SetName("test"); err != ErrTaskSetName {
+		t.Fatalf("An error should have occured while runnign SetName.")
+	}
+}
+
+func TestTaskSetMessage(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetMessage("test"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetMessage = dmTaskSetMessageFail
+	defer func() { DmTaskSetMessage = dmTaskSetMessageFct }()
+
+	if err := task.SetMessage("test"); err != ErrTaskSetMessage {
+		t.Fatalf("An error should have occured while runnign SetMessage.")
+	}
+}
+
+func TestTaskSetSector(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetSector(128); err != nil {
+		t.Fatal(err)
+	}
+
+	DmTaskSetSector = dmTaskSetSectorFail
+	defer func() { DmTaskSetSector = dmTaskSetSectorFct }()
+
+	// Test failure
+	if err := task.SetSector(0); err != ErrTaskSetSector {
+		t.Fatalf("An error should have occured while running SetSector.")
+	}
+}
+
+func TestTaskSetCookie(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	var (
+		cookie uint = 0
+		task        = taskCreate(t, DeviceInfo)
+	)
+
+	// Test success
+	if err := task.SetCookie(&cookie, 0); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	if err := task.SetCookie(nil, 0); err != ErrNilCookie {
+		t.Fatalf("An error should have occured while running SetCookie with nil cookie.")
+	}
+
+	DmTaskSetCookie = dmTaskSetCookieFail
+	defer func() { DmTaskSetCookie = dmTaskSetCookieFct }()
+
+	if err := task.SetCookie(&cookie, 0); err != ErrTaskSetCookie {
+		t.Fatalf("An error should have occured while running SetCookie.")
+	}
+}
+
+func TestTaskSetAddNode(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetAddNode(0); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	if err := task.SetAddNode(-1); err != ErrInvalidAddNode {
+		t.Fatalf("An error should have occured running SetAddNode with wrong node.")
+	}
+
+	DmTaskSetAddNode = dmTaskSetAddNodeFail
+	defer func() { DmTaskSetAddNode = dmTaskSetAddNodeFct }()
+
+	if err := task.SetAddNode(0); err != ErrTaskSetAddNode {
+		t.Fatalf("An error should have occured running SetAddNode.")
+	}
+}
+
+func TestTaskSetRo(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.SetRo(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskSetRo = dmTaskSetRoFail
+	defer func() { DmTaskSetRo = dmTaskSetRoFct }()
+
+	if err := task.SetRo(); err != ErrTaskSetRo {
+		t.Fatalf("An error should have occured running SetRo.")
+	}
+}
+
+func TestTaskAddTarget(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	task := taskCreate(t, DeviceInfo)
+
+	// Test success
+	if err := task.AddTarget(0, 128, "thinp", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	// Test failure
+	DmTaskAddTarget = dmTaskAddTargetFail
+	defer func() { DmTaskAddTarget = dmTaskAddTargetFct }()
+
+	if err := task.AddTarget(0, 128, "thinp", ""); err != ErrTaskAddTarget {
+		t.Fatalf("An error should have occured running AddTarget.")
+	}
+}
+
+// func TestTaskGetInfo(t *testing.T) {
+// 	task := taskCreate(t, DeviceInfo)
+
+// 	// Test success
+// 	if _, err := task.GetInfo(); err != nil {
+// 		t.Fatal(err)
+// 	}
+
+// 	// Test failure
+// 	DmTaskGetInfo = dmTaskGetInfoFail
+// 	defer func() { DmTaskGetInfo = dmTaskGetInfoFct }()
+
+// 	if _, err := task.GetInfo(); err != ErrTaskGetInfo {
+// 		t.Fatalf("An error should have occured running GetInfo.")
+// 	}
+// }
+
+// func TestTaskGetNextTarget(t *testing.T) {
+// 	task := taskCreate(t, DeviceInfo)
+
+// 	if next, _, _, _, _ := task.GetNextTarget(0); next == 0 {
+// 		t.Fatalf("The next target should not be 0.")
+// 	}
+// }
+
+/// Utils
+func taskCreate(t *testing.T, taskType TaskType) *Task {
+	task := TaskCreate(taskType)
+	if task == nil {
+		t.Fatalf("Error creating task")
+	}
+	return task
+}
+
+/// Failure function replacement
+func dmTaskCreateFail(t int) *CDmTask {
+	return nil
+}
+
+func dmTaskRunFail(task *CDmTask) int {
+	return -1
+}
+
+func dmTaskSetNameFail(task *CDmTask, name string) int {
+	return -1
+}
+
+func dmTaskSetMessageFail(task *CDmTask, message string) int {
+	return -1
+}
+
+func dmTaskSetSectorFail(task *CDmTask, sector uint64) int {
+	return -1
+}
+
+func dmTaskSetCookieFail(task *CDmTask, cookie *uint, flags uint16) int {
+	return -1
+}
+
+func dmTaskSetAddNodeFail(task *CDmTask, addNode AddNodeType) int {
+	return -1
+}
+
+func dmTaskSetRoFail(task *CDmTask) int {
+	return -1
+}
+
+func dmTaskAddTargetFail(task *CDmTask,
+	start, size uint64, ttype, params string) int {
+	return -1
+}
+
+func dmTaskGetInfoFail(task *CDmTask, info *Info) int {
+	return -1
+}
+
+func dmGetNextTargetFail(task *CDmTask, next uintptr, start, length *uint64,
+	target, params *string) uintptr {
+	return 0
+}
+
+func dmAttachLoopDeviceFail(filename string, fd *int) string {
+	return ""
+}
+
+func sysGetBlockSizeFail(fd uintptr, size *uint64) sysErrno {
+	return 1
+}
+
+func dmUdevWaitFail(cookie uint) int {
+	return -1
+}
+
+func dmSetDevDirFail(dir string) int {
+	return -1
+}
+
+func dmGetLibraryVersionFail(version *string) int {
+	return -1
+}
diff --git a/runtime/graphdriver/devmapper/devmapper_wrapper.go b/runtime/graphdriver/devmapper/devmapper_wrapper.go
new file mode 100644
index 0000000000..bf558affc8
--- /dev/null
+++ b/runtime/graphdriver/devmapper/devmapper_wrapper.go
@@ -0,0 +1,229 @@
+// +build linux,amd64
+
+package devmapper
+
+/*
+#cgo LDFLAGS: -L. -ldevmapper
+#include <libdevmapper.h>
+#include <linux/loop.h> // FIXME: present only for defines, maybe we can remove it?
+#include <linux/fs.h>   // FIXME: present only for BLKGETSIZE64, maybe we can remove it?
+
+#ifndef LOOP_CTL_GET_FREE
+  #define LOOP_CTL_GET_FREE 0x4C82
+#endif
+
+#ifndef LO_FLAGS_PARTSCAN
+  #define LO_FLAGS_PARTSCAN 8
+#endif
+
+// FIXME: Can't we find a way to do the logging in pure Go?
+extern void DevmapperLogCallback(int level, char *file, int line, int dm_errno_or_class, char *str);
+
+static void	log_cb(int level, const char *file, int line, int dm_errno_or_class, const char *f, ...)
+{
+  char buffer[256];
+  va_list ap;
+
+  va_start(ap, f);
+  vsnprintf(buffer, 256, f, ap);
+  va_end(ap);
+
+  DevmapperLogCallback(level, (char *)file, line, dm_errno_or_class, buffer);
+}
+
+static void	log_with_errno_init()
+{
+  dm_log_with_errno_init(log_cb);
+}
+*/
+import "C"
+
+import (
+	"unsafe"
+)
+
+type (
+	CDmTask C.struct_dm_task
+
+	CLoopInfo64 C.struct_loop_info64
+	LoopInfo64  struct {
+		loDevice           uint64 /* ioctl r/o */
+		loInode            uint64 /* ioctl r/o */
+		loRdevice          uint64 /* ioctl r/o */
+		loOffset           uint64
+		loSizelimit        uint64 /* bytes, 0 == max available */
+		loNumber           uint32 /* ioctl r/o */
+		loEncrypt_type     uint32
+		loEncrypt_key_size uint32 /* ioctl w/o */
+		loFlags            uint32 /* ioctl r/o */
+		loFileName         [LoNameSize]uint8
+		loCryptName        [LoNameSize]uint8
+		loEncryptKey       [LoKeySize]uint8 /* ioctl w/o */
+		loInit             [2]uint64
+	}
+)
+
+// IOCTL consts
+const (
+	BlkGetSize64 = C.BLKGETSIZE64
+	BlkDiscard   = C.BLKDISCARD
+
+	LoopSetFd       = C.LOOP_SET_FD
+	LoopCtlGetFree  = C.LOOP_CTL_GET_FREE
+	LoopGetStatus64 = C.LOOP_GET_STATUS64
+	LoopSetStatus64 = C.LOOP_SET_STATUS64
+	LoopClrFd       = C.LOOP_CLR_FD
+	LoopSetCapacity = C.LOOP_SET_CAPACITY
+)
+
+const (
+	LoFlagsAutoClear = C.LO_FLAGS_AUTOCLEAR
+	LoFlagsReadOnly  = C.LO_FLAGS_READ_ONLY
+	LoFlagsPartScan  = C.LO_FLAGS_PARTSCAN
+	LoKeySize        = C.LO_KEY_SIZE
+	LoNameSize       = C.LO_NAME_SIZE
+)
+
+var (
+	DmGetLibraryVersion = dmGetLibraryVersionFct
+	DmGetNextTarget     = dmGetNextTargetFct
+	DmLogInitVerbose    = dmLogInitVerboseFct
+	DmSetDevDir         = dmSetDevDirFct
+	DmTaskAddTarget     = dmTaskAddTargetFct
+	DmTaskCreate        = dmTaskCreateFct
+	DmTaskDestroy       = dmTaskDestroyFct
+	DmTaskGetInfo       = dmTaskGetInfoFct
+	DmTaskRun           = dmTaskRunFct
+	DmTaskSetAddNode    = dmTaskSetAddNodeFct
+	DmTaskSetCookie     = dmTaskSetCookieFct
+	DmTaskSetMessage    = dmTaskSetMessageFct
+	DmTaskSetName       = dmTaskSetNameFct
+	DmTaskSetRo         = dmTaskSetRoFct
+	DmTaskSetSector     = dmTaskSetSectorFct
+	DmUdevWait          = dmUdevWaitFct
+	LogWithErrnoInit    = logWithErrnoInitFct
+)
+
+func free(p *C.char) {
+	C.free(unsafe.Pointer(p))
+}
+
+func dmTaskDestroyFct(task *CDmTask) {
+	C.dm_task_destroy((*C.struct_dm_task)(task))
+}
+
+func dmTaskCreateFct(taskType int) *CDmTask {
+	return (*CDmTask)(C.dm_task_create(C.int(taskType)))
+}
+
+func dmTaskRunFct(task *CDmTask) int {
+	ret, _ := C.dm_task_run((*C.struct_dm_task)(task))
+	return int(ret)
+}
+
+func dmTaskSetNameFct(task *CDmTask, name string) int {
+	Cname := C.CString(name)
+	defer free(Cname)
+
+	return int(C.dm_task_set_name((*C.struct_dm_task)(task), Cname))
+}
+
+func dmTaskSetMessageFct(task *CDmTask, message string) int {
+	Cmessage := C.CString(message)
+	defer free(Cmessage)
+
+	return int(C.dm_task_set_message((*C.struct_dm_task)(task), Cmessage))
+}
+
+func dmTaskSetSectorFct(task *CDmTask, sector uint64) int {
+	return int(C.dm_task_set_sector((*C.struct_dm_task)(task), C.uint64_t(sector)))
+}
+
+func dmTaskSetCookieFct(task *CDmTask, cookie *uint, flags uint16) int {
+	cCookie := C.uint32_t(*cookie)
+	defer func() {
+		*cookie = uint(cCookie)
+	}()
+	return int(C.dm_task_set_cookie((*C.struct_dm_task)(task), &cCookie, C.uint16_t(flags)))
+}
+
+func dmTaskSetAddNodeFct(task *CDmTask, addNode AddNodeType) int {
+	return int(C.dm_task_set_add_node((*C.struct_dm_task)(task), C.dm_add_node_t(addNode)))
+}
+
+func dmTaskSetRoFct(task *CDmTask) int {
+	return int(C.dm_task_set_ro((*C.struct_dm_task)(task)))
+}
+
+func dmTaskAddTargetFct(task *CDmTask,
+	start, size uint64, ttype, params string) int {
+
+	Cttype := C.CString(ttype)
+	defer free(Cttype)
+
+	Cparams := C.CString(params)
+	defer free(Cparams)
+
+	return int(C.dm_task_add_target((*C.struct_dm_task)(task), C.uint64_t(start), C.uint64_t(size), Cttype, Cparams))
+}
+
+func dmTaskGetInfoFct(task *CDmTask, info *Info) int {
+	Cinfo := C.struct_dm_info{}
+	defer func() {
+		info.Exists = int(Cinfo.exists)
+		info.Suspended = int(Cinfo.suspended)
+		info.LiveTable = int(Cinfo.live_table)
+		info.InactiveTable = int(Cinfo.inactive_table)
+		info.OpenCount = int32(Cinfo.open_count)
+		info.EventNr = uint32(Cinfo.event_nr)
+		info.Major = uint32(Cinfo.major)
+		info.Minor = uint32(Cinfo.minor)
+		info.ReadOnly = int(Cinfo.read_only)
+		info.TargetCount = int32(Cinfo.target_count)
+	}()
+	return int(C.dm_task_get_info((*C.struct_dm_task)(task), &Cinfo))
+}
+
+func dmGetNextTargetFct(task *CDmTask, next uintptr, start, length *uint64, target, params *string) uintptr {
+	var (
+		Cstart, Clength      C.uint64_t
+		CtargetType, Cparams *C.char
+	)
+	defer func() {
+		*start = uint64(Cstart)
+		*length = uint64(Clength)
+		*target = C.GoString(CtargetType)
+		*params = C.GoString(Cparams)
+	}()
+
+	nextp := C.dm_get_next_target((*C.struct_dm_task)(task), unsafe.Pointer(next), &Cstart, &Clength, &CtargetType, &Cparams)
+	return uintptr(nextp)
+}
+
+func dmUdevWaitFct(cookie uint) int {
+	return int(C.dm_udev_wait(C.uint32_t(cookie)))
+}
+
+func dmLogInitVerboseFct(level int) {
+	C.dm_log_init_verbose(C.int(level))
+}
+
+func logWithErrnoInitFct() {
+	C.log_with_errno_init()
+}
+
+func dmSetDevDirFct(dir string) int {
+	Cdir := C.CString(dir)
+	defer free(Cdir)
+
+	return int(C.dm_set_dev_dir(Cdir))
+}
+
+func dmGetLibraryVersionFct(version *string) int {
+	buffer := C.CString(string(make([]byte, 128)))
+	defer free(buffer)
+	defer func() {
+		*version = C.GoString(buffer)
+	}()
+	return int(C.dm_get_library_version(buffer, 128))
+}
diff --git a/runtime/graphdriver/devmapper/driver.go b/runtime/graphdriver/devmapper/driver.go
new file mode 100644
index 0000000000..35fe883f26
--- /dev/null
+++ b/runtime/graphdriver/devmapper/driver.go
@@ -0,0 +1,142 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"github.com/dotcloud/docker/utils"
+	"io/ioutil"
+	"os"
+	"path"
+)
+
+func init() {
+	graphdriver.Register("devicemapper", Init)
+}
+
+// Placeholder interfaces, to be replaced
+// at integration.
+
+// End of placeholder interfaces.
+
+type Driver struct {
+	*DeviceSet
+	home string
+}
+
+var Init = func(home string) (graphdriver.Driver, error) {
+	deviceSet, err := NewDeviceSet(home, true)
+	if err != nil {
+		return nil, err
+	}
+	d := &Driver{
+		DeviceSet: deviceSet,
+		home:      home,
+	}
+	return d, nil
+}
+
+func (d *Driver) String() string {
+	return "devicemapper"
+}
+
+func (d *Driver) Status() [][2]string {
+	s := d.DeviceSet.Status()
+
+	status := [][2]string{
+		{"Pool Name", s.PoolName},
+		{"Data file", s.DataLoopback},
+		{"Metadata file", s.MetadataLoopback},
+		{"Data Space Used", fmt.Sprintf("%.1f Mb", float64(s.Data.Used)/(1024*1024))},
+		{"Data Space Total", fmt.Sprintf("%.1f Mb", float64(s.Data.Total)/(1024*1024))},
+		{"Metadata Space Used", fmt.Sprintf("%.1f Mb", float64(s.Metadata.Used)/(1024*1024))},
+		{"Metadata Space Total", fmt.Sprintf("%.1f Mb", float64(s.Metadata.Total)/(1024*1024))},
+	}
+	return status
+}
+
+func (d *Driver) Cleanup() error {
+	return d.DeviceSet.Shutdown()
+}
+
+func (d *Driver) Create(id, parent string, mountLabel string) error {
+	if err := d.DeviceSet.AddDevice(id, parent); err != nil {
+		return err
+	}
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.mount(id, mp); err != nil {
+		return err
+	}
+
+	if err := osMkdirAll(path.Join(mp, "rootfs"), 0755); err != nil && !osIsExist(err) {
+		return err
+	}
+
+	// Create an "id" file with the container/image id in it to help reconscruct this in case
+	// of later problems
+	if err := ioutil.WriteFile(path.Join(mp, "id"), []byte(id), 0600); err != nil {
+		return err
+	}
+
+	// We float this reference so that the next Get call can
+	// steal it, so we don't have to unmount
+	if err := d.DeviceSet.UnmountDevice(id, UnmountFloat); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (d *Driver) Remove(id string) error {
+	if !d.DeviceSet.HasDevice(id) {
+		// Consider removing a non-existing device a no-op
+		// This is useful to be able to progress on container removal
+		// if the underlying device has gone away due to earlier errors
+		return nil
+	}
+
+	// Sink the float from create in case no Get() call was made
+	if err := d.DeviceSet.UnmountDevice(id, UnmountSink); err != nil {
+		return err
+	}
+	// This assumes the device has been properly Get/Put:ed and thus is unmounted
+	if err := d.DeviceSet.DeleteDevice(id); err != nil {
+		return err
+	}
+
+	mp := path.Join(d.home, "mnt", id)
+	if err := os.RemoveAll(mp); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+
+	return nil
+}
+
+func (d *Driver) Get(id string) (string, error) {
+	mp := path.Join(d.home, "mnt", id)
+	if err := d.mount(id, mp); err != nil {
+		return "", err
+	}
+
+	return path.Join(mp, "rootfs"), nil
+}
+
+func (d *Driver) Put(id string) {
+	if err := d.DeviceSet.UnmountDevice(id, UnmountRegular); err != nil {
+		utils.Errorf("Warning: error unmounting device %s: %s\n", id, err)
+	}
+}
+
+func (d *Driver) mount(id, mountPoint string) error {
+	// Create the target directories if they don't exist
+	if err := osMkdirAll(mountPoint, 0755); err != nil && !osIsExist(err) {
+		return err
+	}
+	// Mount the device
+	return d.DeviceSet.MountDevice(id, mountPoint, "")
+}
+
+func (d *Driver) Exists(id string) bool {
+	return d.Devices[id] != nil
+}
diff --git a/runtime/graphdriver/devmapper/driver_test.go b/runtime/graphdriver/devmapper/driver_test.go
new file mode 100644
index 0000000000..4ca72db0ca
--- /dev/null
+++ b/runtime/graphdriver/devmapper/driver_test.go
@@ -0,0 +1,886 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"io/ioutil"
+	"path"
+	"runtime"
+	"strings"
+	"syscall"
+	"testing"
+)
+
+func init() {
+	// Reduce the size the the base fs and loopback for the tests
+	DefaultDataLoopbackSize = 300 * 1024 * 1024
+	DefaultMetaDataLoopbackSize = 200 * 1024 * 1024
+	DefaultBaseFsSize = 300 * 1024 * 1024
+}
+
+// denyAllDevmapper mocks all calls to libdevmapper in the unit tests, and denies them by default
+func denyAllDevmapper() {
+	// Hijack all calls to libdevmapper with default panics.
+	// Authorized calls are selectively hijacked in each tests.
+	DmTaskCreate = func(t int) *CDmTask {
+		panic("DmTaskCreate: this method should not be called here")
+	}
+	DmTaskRun = func(task *CDmTask) int {
+		panic("DmTaskRun: this method should not be called here")
+	}
+	DmTaskSetName = func(task *CDmTask, name string) int {
+		panic("DmTaskSetName: this method should not be called here")
+	}
+	DmTaskSetMessage = func(task *CDmTask, message string) int {
+		panic("DmTaskSetMessage: this method should not be called here")
+	}
+	DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+		panic("DmTaskSetSector: this method should not be called here")
+	}
+	DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+		panic("DmTaskSetCookie: this method should not be called here")
+	}
+	DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+		panic("DmTaskSetAddNode: this method should not be called here")
+	}
+	DmTaskSetRo = func(task *CDmTask) int {
+		panic("DmTaskSetRo: this method should not be called here")
+	}
+	DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+		panic("DmTaskAddTarget: this method should not be called here")
+	}
+	DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+		panic("DmTaskGetInfo: this method should not be called here")
+	}
+	DmGetNextTarget = func(task *CDmTask, next uintptr, start, length *uint64, target, params *string) uintptr {
+		panic("DmGetNextTarget: this method should not be called here")
+	}
+	DmUdevWait = func(cookie uint) int {
+		panic("DmUdevWait: this method should not be called here")
+	}
+	DmSetDevDir = func(dir string) int {
+		panic("DmSetDevDir: this method should not be called here")
+	}
+	DmGetLibraryVersion = func(version *string) int {
+		panic("DmGetLibraryVersion: this method should not be called here")
+	}
+	DmLogInitVerbose = func(level int) {
+		panic("DmLogInitVerbose: this method should not be called here")
+	}
+	DmTaskDestroy = func(task *CDmTask) {
+		panic("DmTaskDestroy: this method should not be called here")
+	}
+	LogWithErrnoInit = func() {
+		panic("LogWithErrnoInit: this method should not be called here")
+	}
+}
+
+func denyAllSyscall() {
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		panic("sysMount: this method should not be called here")
+	}
+	sysUnmount = func(target string, flags int) (err error) {
+		panic("sysUnmount: this method should not be called here")
+	}
+	sysCloseOnExec = func(fd int) {
+		panic("sysCloseOnExec: this method should not be called here")
+	}
+	sysSyscall = func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+		panic("sysSyscall: this method should not be called here")
+	}
+	// Not a syscall, but forbidding it here anyway
+	Mounted = func(mnt string) (bool, error) {
+		panic("devmapper.Mounted: this method should not be called here")
+	}
+	// osOpenFile = os.OpenFile
+	// osNewFile = os.NewFile
+	// osCreate = os.Create
+	// osStat = os.Stat
+	// osIsNotExist = os.IsNotExist
+	// osIsExist = os.IsExist
+	// osMkdirAll = os.MkdirAll
+	// osRemoveAll = os.RemoveAll
+	// osRename = os.Rename
+	// osReadlink = os.Readlink
+
+	// execRun = func(name string, args ...string) error {
+	// 	return exec.Command(name, args...).Run()
+	// }
+}
+
+func mkTestDirectory(t *testing.T) string {
+	dir, err := ioutil.TempDir("", "docker-test-devmapper-")
+	if err != nil {
+		t.Fatal(err)
+	}
+	return dir
+}
+
+func newDriver(t *testing.T) *Driver {
+	home := mkTestDirectory(t)
+	d, err := Init(home)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return d.(*Driver)
+}
+
+func cleanup(d *Driver) {
+	d.Cleanup()
+	osRemoveAll(d.home)
+}
+
+type Set map[string]bool
+
+func (r Set) Assert(t *testing.T, names ...string) {
+	for _, key := range names {
+		required := true
+		if strings.HasPrefix(key, "?") {
+			key = key[1:]
+			required = false
+		}
+		if _, exists := r[key]; !exists && required {
+			t.Fatalf("Key not set: %s", key)
+		}
+		delete(r, key)
+	}
+	if len(r) != 0 {
+		t.Fatalf("Unexpected keys: %v", r)
+	}
+}
+
+func TestInit(t *testing.T) {
+	var (
+		calls        = make(Set)
+		taskMessages = make(Set)
+		taskTypes    = make(Set)
+		home         = mkTestDirectory(t)
+	)
+	defer osRemoveAll(home)
+
+	func() {
+		denyAllDevmapper()
+		DmSetDevDir = func(dir string) int {
+			calls["DmSetDevDir"] = true
+			expectedDir := "/dev"
+			if dir != expectedDir {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmSetDevDir(%v)\nReceived: DmSetDevDir(%v)\n", expectedDir, dir)
+			}
+			return 0
+		}
+		LogWithErrnoInit = func() {
+			calls["DmLogWithErrnoInit"] = true
+		}
+		var task1 CDmTask
+		DmTaskCreate = func(taskType int) *CDmTask {
+			calls["DmTaskCreate"] = true
+			taskTypes[fmt.Sprintf("%d", taskType)] = true
+			return &task1
+		}
+		DmTaskSetName = func(task *CDmTask, name string) int {
+			calls["DmTaskSetName"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetName(%v)\nReceived: DmTaskSetName(%v)\n", expectedTask, task)
+			}
+			// FIXME: use Set.AssertRegexp()
+			if !strings.HasPrefix(name, "docker-") && !strings.HasPrefix(name, "/dev/mapper/docker-") ||
+				!strings.HasSuffix(name, "-pool") && !strings.HasSuffix(name, "-base") {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetName(%v)\nReceived: DmTaskSetName(%v)\n", "docker-...-pool", name)
+			}
+			return 1
+		}
+		DmTaskRun = func(task *CDmTask) int {
+			calls["DmTaskRun"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskRun(%v)\nReceived: DmTaskRun(%v)\n", expectedTask, task)
+			}
+			return 1
+		}
+		DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+			calls["DmTaskGetInfo"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskGetInfo(%v)\nReceived: DmTaskGetInfo(%v)\n", expectedTask, task)
+			}
+			// This will crash if info is not dereferenceable
+			info.Exists = 0
+			return 1
+		}
+		DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+			calls["DmTaskSetSector"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetSector(%v)\nReceived: DmTaskSetSector(%v)\n", expectedTask, task)
+			}
+			if expectedSector := uint64(0); sector != expectedSector {
+				t.Fatalf("Wrong libdevmapper call to DmTaskSetSector\nExpected: %v\nReceived: %v\n", expectedSector, sector)
+			}
+			return 1
+		}
+		DmTaskSetMessage = func(task *CDmTask, message string) int {
+			calls["DmTaskSetMessage"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskSetSector(%v)\nReceived: DmTaskSetSector(%v)\n", expectedTask, task)
+			}
+			taskMessages[message] = true
+			return 1
+		}
+		DmTaskDestroy = func(task *CDmTask) {
+			calls["DmTaskDestroy"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+		}
+		DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+			calls["DmTaskSetTarget"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+			if start != 0 {
+				t.Fatalf("Wrong start: %d != %d", start, 0)
+			}
+			if ttype != "thin" && ttype != "thin-pool" {
+				t.Fatalf("Wrong ttype: %s", ttype)
+			}
+			// Quick smoke test
+			if params == "" {
+				t.Fatalf("Params should not be empty")
+			}
+			return 1
+		}
+		fakeCookie := uint(4321)
+		DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+			calls["DmTaskSetCookie"] = true
+			expectedTask := &task1
+			if task != expectedTask {
+				t.Fatalf("Wrong libdevmapper call\nExpected: DmTaskDestroy(%v)\nReceived: DmTaskDestroy(%v)\n", expectedTask, task)
+			}
+			if flags != 0 {
+				t.Fatalf("Cookie flags should be 0 (not %x)", flags)
+			}
+			*cookie = fakeCookie
+			return 1
+		}
+		DmUdevWait = func(cookie uint) int {
+			calls["DmUdevWait"] = true
+			if cookie != fakeCookie {
+				t.Fatalf("Wrong cookie: %d != %d", cookie, fakeCookie)
+			}
+			return 1
+		}
+		DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+			if addNode != AddNodeOnCreate {
+				t.Fatalf("Wrong AddNoteType: %v (expected %v)", addNode, AddNodeOnCreate)
+			}
+			calls["DmTaskSetAddNode"] = true
+			return 1
+		}
+		execRun = func(name string, args ...string) error {
+			calls["execRun"] = true
+			if name != "mkfs.ext4" {
+				t.Fatalf("Expected %s to be executed, not %s", "mkfs.ext4", name)
+			}
+			return nil
+		}
+		driver, err := Init(home)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer func() {
+			if err := driver.Cleanup(); err != nil {
+				t.Fatal(err)
+			}
+		}()
+	}()
+	// Put all tests in a function to make sure the garbage collection will
+	// occur.
+
+	// Call GC to cleanup runtime.Finalizers
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmSetDevDir",
+		"DmLogWithErrnoInit",
+		"DmTaskSetName",
+		"DmTaskRun",
+		"DmTaskGetInfo",
+		"DmTaskDestroy",
+		"execRun",
+		"DmTaskCreate",
+		"DmTaskSetTarget",
+		"DmTaskSetCookie",
+		"DmUdevWait",
+		"DmTaskSetSector",
+		"DmTaskSetMessage",
+		"DmTaskSetAddNode",
+	)
+	taskTypes.Assert(t, "0", "6", "17")
+	taskMessages.Assert(t, "create_thin 0", "set_transaction_id 0 1")
+}
+
+func fakeInit() func(home string) (graphdriver.Driver, error) {
+	oldInit := Init
+	Init = func(home string) (graphdriver.Driver, error) {
+		return &Driver{
+			home: home,
+		}, nil
+	}
+	return oldInit
+}
+
+func restoreInit(init func(home string) (graphdriver.Driver, error)) {
+	Init = init
+}
+
+func mockAllDevmapper(calls Set) {
+	DmSetDevDir = func(dir string) int {
+		calls["DmSetDevDir"] = true
+		return 0
+	}
+	LogWithErrnoInit = func() {
+		calls["DmLogWithErrnoInit"] = true
+	}
+	DmTaskCreate = func(taskType int) *CDmTask {
+		calls["DmTaskCreate"] = true
+		return &CDmTask{}
+	}
+	DmTaskSetName = func(task *CDmTask, name string) int {
+		calls["DmTaskSetName"] = true
+		return 1
+	}
+	DmTaskRun = func(task *CDmTask) int {
+		calls["DmTaskRun"] = true
+		return 1
+	}
+	DmTaskGetInfo = func(task *CDmTask, info *Info) int {
+		calls["DmTaskGetInfo"] = true
+		return 1
+	}
+	DmTaskSetSector = func(task *CDmTask, sector uint64) int {
+		calls["DmTaskSetSector"] = true
+		return 1
+	}
+	DmTaskSetMessage = func(task *CDmTask, message string) int {
+		calls["DmTaskSetMessage"] = true
+		return 1
+	}
+	DmTaskDestroy = func(task *CDmTask) {
+		calls["DmTaskDestroy"] = true
+	}
+	DmTaskAddTarget = func(task *CDmTask, start, size uint64, ttype, params string) int {
+		calls["DmTaskSetTarget"] = true
+		return 1
+	}
+	DmTaskSetCookie = func(task *CDmTask, cookie *uint, flags uint16) int {
+		calls["DmTaskSetCookie"] = true
+		return 1
+	}
+	DmUdevWait = func(cookie uint) int {
+		calls["DmUdevWait"] = true
+		return 1
+	}
+	DmTaskSetAddNode = func(task *CDmTask, addNode AddNodeType) int {
+		calls["DmTaskSetAddNode"] = true
+		return 1
+	}
+	execRun = func(name string, args ...string) error {
+		calls["execRun"] = true
+		return nil
+	}
+}
+
+func TestDriverName(t *testing.T) {
+	denyAllDevmapper()
+	defer denyAllDevmapper()
+
+	oldInit := fakeInit()
+	defer restoreInit(oldInit)
+
+	d := newDriver(t)
+	if d.String() != "devicemapper" {
+		t.Fatalf("Expected driver name to be devicemapper got %s", d.String())
+	}
+}
+
+func TestDriverCreate(t *testing.T) {
+	denyAllDevmapper()
+	denyAllSyscall()
+	defer denyAllSyscall()
+	defer denyAllDevmapper()
+
+	calls := make(Set)
+	mockAllDevmapper(calls)
+
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		calls["sysMount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedSource := "/dev/mapper/docker-"; !strings.HasPrefix(source, expectedSource) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedSource, source)
+		}
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFstype := "ext4"; fstype != expectedFstype {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFstype, fstype)
+		}
+		if expectedFlags := uintptr(3236757504); flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+
+	Mounted = func(mnt string) (bool, error) {
+		calls["Mounted"] = true
+		if !strings.HasPrefix(mnt, "/tmp/docker-test-devmapper-") || !strings.HasSuffix(mnt, "/mnt/1") {
+			t.Fatalf("Wrong mounted call\nExpected: Mounted(%v)\nReceived: Mounted(%v)\n", "/tmp/docker-test-devmapper-.../mnt/1", mnt)
+		}
+		return false, nil
+	}
+
+	sysSyscall = func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+		calls["sysSyscall"] = true
+		if trap != sysSysIoctl {
+			t.Fatalf("Unexpected syscall. Expecting SYS_IOCTL, received: %d", trap)
+		}
+		switch a2 {
+		case LoopSetFd:
+			calls["ioctl.loopsetfd"] = true
+		case LoopCtlGetFree:
+			calls["ioctl.loopctlgetfree"] = true
+		case LoopGetStatus64:
+			calls["ioctl.loopgetstatus"] = true
+		case LoopSetStatus64:
+			calls["ioctl.loopsetstatus"] = true
+		case LoopClrFd:
+			calls["ioctl.loopclrfd"] = true
+		case LoopSetCapacity:
+			calls["ioctl.loopsetcapacity"] = true
+		case BlkGetSize64:
+			calls["ioctl.blkgetsize"] = true
+		default:
+			t.Fatalf("Unexpected IOCTL. Received %d", a2)
+		}
+		return 0, 0, 0
+	}
+
+	func() {
+		d := newDriver(t)
+
+		calls.Assert(t,
+			"DmSetDevDir",
+			"DmLogWithErrnoInit",
+			"DmTaskSetName",
+			"DmTaskRun",
+			"DmTaskGetInfo",
+			"execRun",
+			"DmTaskCreate",
+			"DmTaskSetTarget",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetSector",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+			"sysSyscall",
+			"ioctl.blkgetsize",
+			"ioctl.loopsetfd",
+			"ioctl.loopsetstatus",
+			"?ioctl.loopctlgetfree",
+		)
+
+		if err := d.Create("1", "", ""); err != nil {
+			t.Fatal(err)
+		}
+		calls.Assert(t,
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"sysMount",
+			"DmTaskRun",
+			"DmTaskSetTarget",
+			"DmTaskSetSector",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+	}()
+
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmTaskDestroy",
+	)
+}
+
+func TestDriverRemove(t *testing.T) {
+	denyAllDevmapper()
+	denyAllSyscall()
+	defer denyAllSyscall()
+	defer denyAllDevmapper()
+
+	calls := make(Set)
+	mockAllDevmapper(calls)
+
+	sysMount = func(source, target, fstype string, flags uintptr, data string) (err error) {
+		calls["sysMount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedSource := "/dev/mapper/docker-"; !strings.HasPrefix(source, expectedSource) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedSource, source)
+		}
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFstype := "ext4"; fstype != expectedFstype {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFstype, fstype)
+		}
+		if expectedFlags := uintptr(3236757504); flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+	sysUnmount = func(target string, flags int) (err error) {
+		calls["sysUnmount"] = true
+		// FIXME: compare the exact source and target strings (inodes + devname)
+		if expectedTarget := "/tmp/docker-test-devmapper-"; !strings.HasPrefix(target, expectedTarget) {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedTarget, target)
+		}
+		if expectedFlags := 0; flags != expectedFlags {
+			t.Fatalf("Wrong syscall call\nExpected: Mount(%v)\nReceived: Mount(%v)\n", expectedFlags, flags)
+		}
+		return nil
+	}
+	Mounted = func(mnt string) (bool, error) {
+		calls["Mounted"] = true
+		return false, nil
+	}
+
+	sysSyscall = func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+		calls["sysSyscall"] = true
+		if trap != sysSysIoctl {
+			t.Fatalf("Unexpected syscall. Expecting SYS_IOCTL, received: %d", trap)
+		}
+		switch a2 {
+		case LoopSetFd:
+			calls["ioctl.loopsetfd"] = true
+		case LoopCtlGetFree:
+			calls["ioctl.loopctlgetfree"] = true
+		case LoopGetStatus64:
+			calls["ioctl.loopgetstatus"] = true
+		case LoopSetStatus64:
+			calls["ioctl.loopsetstatus"] = true
+		case LoopClrFd:
+			calls["ioctl.loopclrfd"] = true
+		case LoopSetCapacity:
+			calls["ioctl.loopsetcapacity"] = true
+		case BlkGetSize64:
+			calls["ioctl.blkgetsize"] = true
+		default:
+			t.Fatalf("Unexpected IOCTL. Received %d", a2)
+		}
+		return 0, 0, 0
+	}
+
+	func() {
+		d := newDriver(t)
+
+		calls.Assert(t,
+			"DmSetDevDir",
+			"DmLogWithErrnoInit",
+			"DmTaskSetName",
+			"DmTaskRun",
+			"DmTaskGetInfo",
+			"execRun",
+			"DmTaskCreate",
+			"DmTaskSetTarget",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetSector",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+			"sysSyscall",
+			"ioctl.blkgetsize",
+			"ioctl.loopsetfd",
+			"ioctl.loopsetstatus",
+			"?ioctl.loopctlgetfree",
+		)
+
+		if err := d.Create("1", "", ""); err != nil {
+			t.Fatal(err)
+		}
+
+		calls.Assert(t,
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"sysMount",
+			"DmTaskRun",
+			"DmTaskSetTarget",
+			"DmTaskSetSector",
+			"DmTaskSetCookie",
+			"DmUdevWait",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskSetAddNode",
+		)
+
+		Mounted = func(mnt string) (bool, error) {
+			calls["Mounted"] = true
+			return true, nil
+		}
+
+		if err := d.Remove("1"); err != nil {
+			t.Fatal(err)
+		}
+
+		calls.Assert(t,
+			"DmTaskRun",
+			"DmTaskSetSector",
+			"DmTaskSetName",
+			"DmTaskSetMessage",
+			"DmTaskCreate",
+			"DmTaskGetInfo",
+			"DmTaskSetCookie",
+			"DmTaskSetTarget",
+			"DmTaskSetAddNode",
+			"DmUdevWait",
+			"sysUnmount",
+		)
+	}()
+	runtime.GC()
+
+	calls.Assert(t,
+		"DmTaskDestroy",
+	)
+}
+
+func TestCleanup(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skip("Unimplemented")
+	d := newDriver(t)
+	defer osRemoveAll(d.home)
+
+	mountPoints := make([]string, 2)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	// Mount the id
+	p, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	mountPoints[0] = p
+
+	if err := d.Create("2", "1", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	p, err = d.Get("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	mountPoints[1] = p
+
+	// Ensure that all the mount points are currently mounted
+	for _, p := range mountPoints {
+		if mounted, err := Mounted(p); err != nil {
+			t.Fatal(err)
+		} else if !mounted {
+			t.Fatalf("Expected %s to be mounted", p)
+		}
+	}
+
+	// Ensure that devices are active
+	for _, p := range []string{"1", "2"} {
+		if !d.HasActivatedDevice(p) {
+			t.Fatalf("Expected %s to have an active device", p)
+		}
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that all the mount points are no longer mounted
+	for _, p := range mountPoints {
+		if mounted, err := Mounted(p); err != nil {
+			t.Fatal(err)
+		} else if mounted {
+			t.Fatalf("Expected %s to not be mounted", p)
+		}
+	}
+
+	// Ensure that devices are no longer activated
+	for _, p := range []string{"1", "2"} {
+		if d.HasActivatedDevice(p) {
+			t.Fatalf("Expected %s not be an active device", p)
+		}
+	}
+}
+
+func TestNotMounted(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skip("Not implemented")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	mounted, err := Mounted(path.Join(d.home, "mnt", "1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mounted {
+		t.Fatal("Id 1 should not be mounted")
+	}
+}
+
+func TestMounted(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	mounted, err := Mounted(path.Join(d.home, "mnt", "1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !mounted {
+		t.Fatal("Id 1 should be mounted")
+	}
+}
+
+func TestInitCleanedDriver(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+
+	driver, err := Init(d.home)
+	if err != nil {
+		t.Fatal(err)
+	}
+	d = driver.(*Driver)
+	defer cleanup(d)
+
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMountMountedDriver(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	// Perform get on same id to ensure that it will
+	// not be mounted twice
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestGetReturnsValidDevice(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	if !d.HasDevice("1") {
+		t.Fatalf("Expected id 1 to be in device set")
+	}
+
+	if _, err := d.Get("1"); err != nil {
+		t.Fatal(err)
+	}
+
+	if !d.HasActivatedDevice("1") {
+		t.Fatalf("Expected id 1 to be activated")
+	}
+
+	if !d.HasInitializedDevice("1") {
+		t.Fatalf("Expected id 1 to be initialized")
+	}
+}
+
+func TestDriverGetSize(t *testing.T) {
+	t.Skip("FIXME: not a unit test")
+	t.Skipf("Size is currently not implemented")
+
+	d := newDriver(t)
+	defer cleanup(d)
+
+	if err := d.Create("1", "", ""); err != nil {
+		t.Fatal(err)
+	}
+
+	mountPoint, err := d.Get("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	size := int64(1024)
+
+	f, err := osCreate(path.Join(mountPoint, "test_file"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Truncate(size); err != nil {
+		t.Fatal(err)
+	}
+	f.Close()
+
+	// diffSize, err := d.DiffSize("1")
+	// if err != nil {
+	// 	t.Fatal(err)
+	// }
+	// if diffSize != size {
+	// 	t.Fatalf("Expected size %d got %d", size, diffSize)
+	// }
+}
+
+func assertMap(t *testing.T, m map[string]bool, keys ...string) {
+	for _, key := range keys {
+		if _, exists := m[key]; !exists {
+			t.Fatalf("Key not set: %s", key)
+		}
+		delete(m, key)
+	}
+	if len(m) != 0 {
+		t.Fatalf("Unexpected keys: %v", m)
+	}
+}
diff --git a/runtime/graphdriver/devmapper/ioctl.go b/runtime/graphdriver/devmapper/ioctl.go
new file mode 100644
index 0000000000..30bafff943
--- /dev/null
+++ b/runtime/graphdriver/devmapper/ioctl.go
@@ -0,0 +1,71 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"unsafe"
+)
+
+func ioctlLoopCtlGetFree(fd uintptr) (int, error) {
+	index, _, err := sysSyscall(sysSysIoctl, fd, LoopCtlGetFree, 0)
+	if err != 0 {
+		return 0, err
+	}
+	return int(index), nil
+}
+
+func ioctlLoopSetFd(loopFd, sparseFd uintptr) error {
+	if _, _, err := sysSyscall(sysSysIoctl, loopFd, LoopSetFd, sparseFd); err != 0 {
+		return err
+	}
+	return nil
+}
+
+func ioctlLoopSetStatus64(loopFd uintptr, loopInfo *LoopInfo64) error {
+	if _, _, err := sysSyscall(sysSysIoctl, loopFd, LoopSetStatus64, uintptr(unsafe.Pointer(loopInfo))); err != 0 {
+		return err
+	}
+	return nil
+}
+
+func ioctlLoopClrFd(loopFd uintptr) error {
+	if _, _, err := sysSyscall(sysSysIoctl, loopFd, LoopClrFd, 0); err != 0 {
+		return err
+	}
+	return nil
+}
+
+func ioctlLoopGetStatus64(loopFd uintptr) (*LoopInfo64, error) {
+	loopInfo := &LoopInfo64{}
+
+	if _, _, err := sysSyscall(sysSysIoctl, loopFd, LoopGetStatus64, uintptr(unsafe.Pointer(loopInfo))); err != 0 {
+		return nil, err
+	}
+	return loopInfo, nil
+}
+
+func ioctlLoopSetCapacity(loopFd uintptr, value int) error {
+	if _, _, err := sysSyscall(sysSysIoctl, loopFd, LoopSetCapacity, uintptr(value)); err != 0 {
+		return err
+	}
+	return nil
+}
+
+func ioctlBlkGetSize64(fd uintptr) (int64, error) {
+	var size int64
+	if _, _, err := sysSyscall(sysSysIoctl, fd, BlkGetSize64, uintptr(unsafe.Pointer(&size))); err != 0 {
+		return 0, err
+	}
+	return size, nil
+}
+
+func ioctlBlkDiscard(fd uintptr, offset, length uint64) error {
+	var r [2]uint64
+	r[0] = offset
+	r[1] = length
+
+	if _, _, err := sysSyscall(sysSysIoctl, fd, BlkDiscard, uintptr(unsafe.Pointer(&r[0]))); err != 0 {
+		return err
+	}
+	return nil
+}
diff --git a/runtime/graphdriver/devmapper/mount.go b/runtime/graphdriver/devmapper/mount.go
new file mode 100644
index 0000000000..4f19109bf8
--- /dev/null
+++ b/runtime/graphdriver/devmapper/mount.go
@@ -0,0 +1,27 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"path/filepath"
+)
+
+// FIXME: this is copy-pasted from the aufs driver.
+// It should be moved into the core.
+
+var Mounted = func(mountpoint string) (bool, error) {
+	mntpoint, err := osStat(mountpoint)
+	if err != nil {
+		if osIsNotExist(err) {
+			return false, nil
+		}
+		return false, err
+	}
+	parent, err := osStat(filepath.Join(mountpoint, ".."))
+	if err != nil {
+		return false, err
+	}
+	mntpointSt := toSysStatT(mntpoint.Sys())
+	parentSt := toSysStatT(parent.Sys())
+	return mntpointSt.Dev != parentSt.Dev, nil
+}
diff --git a/runtime/graphdriver/devmapper/sys.go b/runtime/graphdriver/devmapper/sys.go
new file mode 100644
index 0000000000..5a9ab4d74b
--- /dev/null
+++ b/runtime/graphdriver/devmapper/sys.go
@@ -0,0 +1,57 @@
+// +build linux,amd64
+
+package devmapper
+
+import (
+	"os"
+	"os/exec"
+	"syscall"
+)
+
+type (
+	sysStatT syscall.Stat_t
+	sysErrno syscall.Errno
+
+	osFile struct{ *os.File }
+)
+
+var (
+	sysMount       = syscall.Mount
+	sysUnmount     = syscall.Unmount
+	sysCloseOnExec = syscall.CloseOnExec
+	sysSyscall     = syscall.Syscall
+
+	osOpenFile = func(name string, flag int, perm os.FileMode) (*osFile, error) {
+		f, err := os.OpenFile(name, flag, perm)
+		return &osFile{File: f}, err
+	}
+	osOpen       = func(name string) (*osFile, error) { f, err := os.Open(name); return &osFile{File: f}, err }
+	osNewFile    = os.NewFile
+	osCreate     = os.Create
+	osStat       = os.Stat
+	osIsNotExist = os.IsNotExist
+	osIsExist    = os.IsExist
+	osMkdirAll   = os.MkdirAll
+	osRemoveAll  = os.RemoveAll
+	osRename     = os.Rename
+	osReadlink   = os.Readlink
+
+	execRun = func(name string, args ...string) error { return exec.Command(name, args...).Run() }
+)
+
+const (
+	sysMsMgcVal = syscall.MS_MGC_VAL
+	sysMsRdOnly = syscall.MS_RDONLY
+	sysEInval   = syscall.EINVAL
+	sysSysIoctl = syscall.SYS_IOCTL
+	sysEBusy    = syscall.EBUSY
+
+	osORdOnly    = os.O_RDONLY
+	osORdWr      = os.O_RDWR
+	osOCreate    = os.O_CREATE
+	osModeDevice = os.ModeDevice
+)
+
+func toSysStatT(i interface{}) *sysStatT {
+	return (*sysStatT)(i.(*syscall.Stat_t))
+}
diff --git a/runtime/graphdriver/driver.go b/runtime/graphdriver/driver.go
new file mode 100644
index 0000000000..bd4c2faaca
--- /dev/null
+++ b/runtime/graphdriver/driver.go
@@ -0,0 +1,92 @@
+package graphdriver
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/utils"
+	"os"
+	"path"
+)
+
+type InitFunc func(root string) (Driver, error)
+
+type Driver interface {
+	String() string
+
+	Create(id, parent string, mountLabel string) error
+	Remove(id string) error
+
+	Get(id string) (dir string, err error)
+	Put(id string)
+	Exists(id string) bool
+
+	Status() [][2]string
+
+	Cleanup() error
+}
+
+type Differ interface {
+	Diff(id string) (archive.Archive, error)
+	Changes(id string) ([]archive.Change, error)
+	ApplyDiff(id string, diff archive.ArchiveReader) error
+	DiffSize(id string) (bytes int64, err error)
+}
+
+var (
+	DefaultDriver string
+	// All registred drivers
+	drivers map[string]InitFunc
+	// Slice of drivers that should be used in an order
+	priority = []string{
+		"aufs",
+		"btrfs",
+		"devicemapper",
+		"vfs",
+	}
+)
+
+func init() {
+	drivers = make(map[string]InitFunc)
+}
+
+func Register(name string, initFunc InitFunc) error {
+	if _, exists := drivers[name]; exists {
+		return fmt.Errorf("Name already registered %s", name)
+	}
+	drivers[name] = initFunc
+
+	return nil
+}
+
+func GetDriver(name, home string) (Driver, error) {
+	if initFunc, exists := drivers[name]; exists {
+		return initFunc(path.Join(home, name))
+	}
+	return nil, fmt.Errorf("No such driver: %s", name)
+}
+
+func New(root string) (driver Driver, err error) {
+	for _, name := range []string{os.Getenv("DOCKER_DRIVER"), DefaultDriver} {
+		if name != "" {
+			return GetDriver(name, root)
+		}
+	}
+
+	// Check for priority drivers first
+	for _, name := range priority {
+		if driver, err = GetDriver(name, root); err != nil {
+			utils.Debugf("Error loading driver %s: %s", name, err)
+			continue
+		}
+		return driver, nil
+	}
+
+	// Check all registered drivers if no priority driver is found
+	for _, initFunc := range drivers {
+		if driver, err = initFunc(root); err != nil {
+			continue
+		}
+		return driver, nil
+	}
+	return nil, err
+}
diff --git a/runtime/graphdriver/vfs/driver.go b/runtime/graphdriver/vfs/driver.go
new file mode 100644
index 0000000000..fe09560f24
--- /dev/null
+++ b/runtime/graphdriver/vfs/driver.go
@@ -0,0 +1,95 @@
+package vfs
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"os"
+	"os/exec"
+	"path"
+)
+
+func init() {
+	graphdriver.Register("vfs", Init)
+}
+
+func Init(home string) (graphdriver.Driver, error) {
+	d := &Driver{
+		home: home,
+	}
+	return d, nil
+}
+
+type Driver struct {
+	home string
+}
+
+func (d *Driver) String() string {
+	return "vfs"
+}
+
+func (d *Driver) Status() [][2]string {
+	return nil
+}
+
+func (d *Driver) Cleanup() error {
+	return nil
+}
+
+func copyDir(src, dst string) error {
+	if output, err := exec.Command("cp", "-aT", "--reflink=auto", src, dst).CombinedOutput(); err != nil {
+		return fmt.Errorf("Error VFS copying directory: %s (%s)", err, output)
+	}
+	return nil
+}
+
+func (d *Driver) Create(id string, parent string, mountLabel string) error {
+	dir := d.dir(id)
+	if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
+		return err
+	}
+	if err := os.Mkdir(dir, 0700); err != nil {
+		return err
+	}
+	if parent == "" {
+		return nil
+	}
+	parentDir, err := d.Get(parent)
+	if err != nil {
+		return fmt.Errorf("%s: %s", parent, err)
+	}
+	if err := copyDir(parentDir, dir); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d *Driver) dir(id string) string {
+	return path.Join(d.home, "dir", path.Base(id))
+}
+
+func (d *Driver) Remove(id string) error {
+	if _, err := os.Stat(d.dir(id)); err != nil {
+		return err
+	}
+	return os.RemoveAll(d.dir(id))
+}
+
+func (d *Driver) Get(id string) (string, error) {
+	dir := d.dir(id)
+	if st, err := os.Stat(dir); err != nil {
+		return "", err
+	} else if !st.IsDir() {
+		return "", fmt.Errorf("%s: not a directory", dir)
+	}
+	return dir, nil
+}
+
+func (d *Driver) Put(id string) {
+	// The vfs driver has no runtime resources (e.g. mounts)
+	// to clean up, so we don't need anything here
+}
+
+func (d *Driver) Exists(id string) bool {
+	_, err := os.Stat(d.dir(id))
+	return err == nil
+}
diff --git a/runtime/history.go b/runtime/history.go
new file mode 100644
index 0000000000..835ac9c11e
--- /dev/null
+++ b/runtime/history.go
@@ -0,0 +1,30 @@
+package runtime
+
+import (
+	"sort"
+)
+
+// History is a convenience type for storing a list of containers,
+// ordered by creation date.
+type History []*Container
+
+func (history *History) Len() int {
+	return len(*history)
+}
+
+func (history *History) Less(i, j int) bool {
+	containers := *history
+	return containers[j].When().Before(containers[i].When())
+}
+
+func (history *History) Swap(i, j int) {
+	containers := *history
+	tmp := containers[i]
+	containers[i] = containers[j]
+	containers[j] = tmp
+}
+
+func (history *History) Add(container *Container) {
+	*history = append(*history, container)
+	sort.Sort(history)
+}
diff --git a/runtime/networkdriver/bridge/driver.go b/runtime/networkdriver/bridge/driver.go
new file mode 100644
index 0000000000..f7c3bc6b01
--- /dev/null
+++ b/runtime/networkdriver/bridge/driver.go
@@ -0,0 +1,470 @@
+package bridge
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/engine"
+	"github.com/dotcloud/docker/pkg/iptables"
+	"github.com/dotcloud/docker/pkg/netlink"
+	"github.com/dotcloud/docker/runtime/networkdriver"
+	"github.com/dotcloud/docker/runtime/networkdriver/ipallocator"
+	"github.com/dotcloud/docker/runtime/networkdriver/portallocator"
+	"github.com/dotcloud/docker/runtime/networkdriver/portmapper"
+	"github.com/dotcloud/docker/utils"
+	"io/ioutil"
+	"log"
+	"net"
+	"strings"
+)
+
+const (
+	DefaultNetworkBridge = "docker0"
+)
+
+// Network interface represents the networking stack of a container
+type networkInterface struct {
+	IP           net.IP
+	PortMappings []net.Addr // there are mappings to the host interfaces
+}
+
+var (
+	addrs = []string{
+		// Here we don't follow the convention of using the 1st IP of the range for the gateway.
+		// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
+		// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
+		// on the internal addressing or other stupid things like that.
+		// The shouldn't, but hey, let's not break them unless we really have to.
+		"172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
+		"10.0.42.1/16",   // Don't even try using the entire /8, that's too intrusive
+		"10.1.42.1/16",
+		"10.42.42.1/16",
+		"172.16.42.1/24",
+		"172.16.43.1/24",
+		"172.16.44.1/24",
+		"10.0.42.1/24",
+		"10.0.43.1/24",
+		"192.168.42.1/24",
+		"192.168.43.1/24",
+		"192.168.44.1/24",
+	}
+
+	bridgeIface   string
+	bridgeNetwork *net.IPNet
+
+	defaultBindingIP  = net.ParseIP("0.0.0.0")
+	currentInterfaces = make(map[string]*networkInterface)
+)
+
+func InitDriver(job *engine.Job) engine.Status {
+	var (
+		network        *net.IPNet
+		enableIPTables = job.GetenvBool("EnableIptables")
+		icc            = job.GetenvBool("InterContainerCommunication")
+		ipForward      = job.GetenvBool("EnableIpForward")
+		bridgeIP       = job.Getenv("BridgeIP")
+	)
+
+	if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" {
+		defaultBindingIP = net.ParseIP(defaultIP)
+	}
+
+	bridgeIface = job.Getenv("BridgeIface")
+	if bridgeIface == "" {
+		bridgeIface = DefaultNetworkBridge
+	}
+
+	addr, err := networkdriver.GetIfaceAddr(bridgeIface)
+	if err != nil {
+		// If the iface is not found, try to create it
+		job.Logf("creating new bridge for %s", bridgeIface)
+		if err := createBridge(bridgeIP); err != nil {
+			job.Error(err)
+			return engine.StatusErr
+		}
+
+		job.Logf("getting iface addr")
+		addr, err = networkdriver.GetIfaceAddr(bridgeIface)
+		if err != nil {
+			job.Error(err)
+			return engine.StatusErr
+		}
+		network = addr.(*net.IPNet)
+	} else {
+		network = addr.(*net.IPNet)
+		// validate that the bridge ip matches the ip specified by BridgeIP
+		if bridgeIP != "" {
+			if !network.IP.Equal(net.ParseIP(bridgeIP)) {
+				return job.Errorf("bridge ip (%s) does not match existing bridge configuration %s", network.IP, bridgeIP)
+			}
+		}
+	}
+
+	// Configure iptables for link support
+	if enableIPTables {
+		if err := setupIPTables(addr, icc); err != nil {
+			job.Error(err)
+			return engine.StatusErr
+		}
+	}
+
+	if ipForward {
+		// Enable IPv4 forwarding
+		if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
+			job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
+		}
+	}
+
+	// We can always try removing the iptables
+	if err := iptables.RemoveExistingChain("DOCKER"); err != nil {
+		job.Error(err)
+		return engine.StatusErr
+	}
+
+	if enableIPTables {
+		chain, err := iptables.NewChain("DOCKER", bridgeIface)
+		if err != nil {
+			job.Error(err)
+			return engine.StatusErr
+		}
+		portmapper.SetIptablesChain(chain)
+	}
+
+	bridgeNetwork = network
+
+	// https://github.com/dotcloud/docker/issues/2768
+	job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP)
+
+	for name, f := range map[string]engine.Handler{
+		"allocate_interface": Allocate,
+		"release_interface":  Release,
+		"allocate_port":      AllocatePort,
+		"link":               LinkContainers,
+	} {
+		if err := job.Eng.Register(name, f); err != nil {
+			job.Error(err)
+			return engine.StatusErr
+		}
+	}
+	return engine.StatusOK
+}
+
+func setupIPTables(addr net.Addr, icc bool) error {
+	// Enable NAT
+	natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-d", addr.String(), "-j", "MASQUERADE"}
+
+	if !iptables.Exists(natArgs...) {
+		if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil {
+			return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
+		} else if len(output) != 0 {
+			return fmt.Errorf("Error iptables postrouting: %s", output)
+		}
+	}
+
+	var (
+		args       = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"}
+		acceptArgs = append(args, "ACCEPT")
+		dropArgs   = append(args, "DROP")
+	)
+
+	if !icc {
+		iptables.Raw(append([]string{"-D"}, acceptArgs...)...)
+
+		if !iptables.Exists(dropArgs...) {
+			utils.Debugf("Disable inter-container communication")
+			if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil {
+				return fmt.Errorf("Unable to prevent intercontainer communication: %s", err)
+			} else if len(output) != 0 {
+				return fmt.Errorf("Error disabling intercontainer communication: %s", output)
+			}
+		}
+	} else {
+		iptables.Raw(append([]string{"-D"}, dropArgs...)...)
+
+		if !iptables.Exists(acceptArgs...) {
+			utils.Debugf("Enable inter-container communication")
+			if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil {
+				return fmt.Errorf("Unable to allow intercontainer communication: %s", err)
+			} else if len(output) != 0 {
+				return fmt.Errorf("Error enabling intercontainer communication: %s", output)
+			}
+		}
+	}
+
+	// Accept all non-intercontainer outgoing packets
+	outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
+	if !iptables.Exists(outgoingArgs...) {
+		if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil {
+			return fmt.Errorf("Unable to allow outgoing packets: %s", err)
+		} else if len(output) != 0 {
+			return fmt.Errorf("Error iptables allow outgoing: %s", output)
+		}
+	}
+
+	// Accept incoming packets for existing connections
+	existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}
+
+	if !iptables.Exists(existingArgs...) {
+		if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil {
+			return fmt.Errorf("Unable to allow incoming packets: %s", err)
+		} else if len(output) != 0 {
+			return fmt.Errorf("Error iptables allow incoming: %s", output)
+		}
+	}
+	return nil
+}
+
+// CreateBridgeIface creates a network bridge interface on the host system with the name `ifaceName`,
+// and attempts to configure it with an address which doesn't conflict with any other interface on the host.
+// If it can't find an address which doesn't conflict, it will return an error.
+func createBridge(bridgeIP string) error {
+	nameservers := []string{}
+	resolvConf, _ := utils.GetResolvConf()
+	// we don't check for an error here, because we don't really care
+	// if we can't read /etc/resolv.conf. So instead we skip the append
+	// if resolvConf is nil. It either doesn't exist, or we can't read it
+	// for some reason.
+	if resolvConf != nil {
+		nameservers = append(nameservers, utils.GetNameserversAsCIDR(resolvConf)...)
+	}
+
+	var ifaceAddr string
+	if len(bridgeIP) != 0 {
+		_, _, err := net.ParseCIDR(bridgeIP)
+		if err != nil {
+			return err
+		}
+		ifaceAddr = bridgeIP
+	} else {
+		for _, addr := range addrs {
+			_, dockerNetwork, err := net.ParseCIDR(addr)
+			if err != nil {
+				return err
+			}
+			if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil {
+				if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil {
+					ifaceAddr = addr
+					break
+				} else {
+					utils.Debugf("%s %s", addr, err)
+				}
+			}
+		}
+	}
+
+	if ifaceAddr == "" {
+		return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface)
+	}
+	utils.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr)
+
+	if err := createBridgeIface(bridgeIface); err != nil {
+		return err
+	}
+
+	iface, err := net.InterfaceByName(bridgeIface)
+	if err != nil {
+		return err
+	}
+
+	ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr)
+	if err != nil {
+		return err
+	}
+
+	if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil {
+		return fmt.Errorf("Unable to add private network: %s", err)
+	}
+	if err := netlink.NetworkLinkUp(iface); err != nil {
+		return fmt.Errorf("Unable to start network bridge: %s", err)
+	}
+	return nil
+}
+
+func createBridgeIface(name string) error {
+	kv, err := utils.GetKernelVersion()
+	// only set the bridge's mac address if the kernel version is > 3.3
+	// before that it was not supported
+	setBridgeMacAddr := err == nil && (kv.Kernel >= 3 && kv.Major >= 3)
+	utils.Debugf("setting bridge mac address = %v", setBridgeMacAddr)
+	return netlink.CreateBridge(name, setBridgeMacAddr)
+}
+
+// Allocate a network interface
+func Allocate(job *engine.Job) engine.Status {
+	var (
+		ip          *net.IP
+		err         error
+		id          = job.Args[0]
+		requestedIP = net.ParseIP(job.Getenv("RequestedIP"))
+	)
+
+	if requestedIP != nil {
+		ip, err = ipallocator.RequestIP(bridgeNetwork, &requestedIP)
+	} else {
+		ip, err = ipallocator.RequestIP(bridgeNetwork, nil)
+	}
+	if err != nil {
+		job.Error(err)
+		return engine.StatusErr
+	}
+
+	out := engine.Env{}
+	out.Set("IP", ip.String())
+	out.Set("Mask", bridgeNetwork.Mask.String())
+	out.Set("Gateway", bridgeNetwork.IP.String())
+	out.Set("Bridge", bridgeIface)
+
+	size, _ := bridgeNetwork.Mask.Size()
+	out.SetInt("IPPrefixLen", size)
+
+	currentInterfaces[id] = &networkInterface{
+		IP: *ip,
+	}
+
+	out.WriteTo(job.Stdout)
+
+	return engine.StatusOK
+}
+
+// release an interface for a select ip
+func Release(job *engine.Job) engine.Status {
+	var (
+		id                 = job.Args[0]
+		containerInterface = currentInterfaces[id]
+		ip                 net.IP
+		port               int
+		proto              string
+	)
+
+	if containerInterface == nil {
+		return job.Errorf("No network information to release for %s", id)
+	}
+
+	for _, nat := range containerInterface.PortMappings {
+		if err := portmapper.Unmap(nat); err != nil {
+			log.Printf("Unable to unmap port %s: %s", nat, err)
+		}
+
+		// this is host mappings
+		switch a := nat.(type) {
+		case *net.TCPAddr:
+			proto = "tcp"
+			ip = a.IP
+			port = a.Port
+		case *net.UDPAddr:
+			proto = "udp"
+			ip = a.IP
+			port = a.Port
+		}
+
+		if err := portallocator.ReleasePort(ip, proto, port); err != nil {
+			log.Printf("Unable to release port %s", nat)
+		}
+	}
+
+	if err := ipallocator.ReleaseIP(bridgeNetwork, &containerInterface.IP); err != nil {
+		log.Printf("Unable to release ip %s\n", err)
+	}
+	return engine.StatusOK
+}
+
+// Allocate an external port and map it to the interface
+func AllocatePort(job *engine.Job) engine.Status {
+	var (
+		err error
+
+		ip            = defaultBindingIP
+		id            = job.Args[0]
+		hostIP        = job.Getenv("HostIP")
+		hostPort      = job.GetenvInt("HostPort")
+		containerPort = job.GetenvInt("ContainerPort")
+		proto         = job.Getenv("Proto")
+		network       = currentInterfaces[id]
+	)
+
+	if hostIP != "" {
+		ip = net.ParseIP(hostIP)
+	}
+
+	// host ip, proto, and host port
+	hostPort, err = portallocator.RequestPort(ip, proto, hostPort)
+	if err != nil {
+		job.Error(err)
+		return engine.StatusErr
+	}
+
+	var (
+		container net.Addr
+		host      net.Addr
+	)
+
+	if proto == "tcp" {
+		host = &net.TCPAddr{IP: ip, Port: hostPort}
+		container = &net.TCPAddr{IP: network.IP, Port: containerPort}
+	} else {
+		host = &net.UDPAddr{IP: ip, Port: hostPort}
+		container = &net.UDPAddr{IP: network.IP, Port: containerPort}
+	}
+
+	if err := portmapper.Map(container, ip, hostPort); err != nil {
+		portallocator.ReleasePort(ip, proto, hostPort)
+
+		job.Error(err)
+		return engine.StatusErr
+	}
+	network.PortMappings = append(network.PortMappings, host)
+
+	out := engine.Env{}
+	out.Set("HostIP", ip.String())
+	out.SetInt("HostPort", hostPort)
+
+	if _, err := out.WriteTo(job.Stdout); err != nil {
+		job.Error(err)
+		return engine.StatusErr
+	}
+	return engine.StatusOK
+}
+
+func LinkContainers(job *engine.Job) engine.Status {
+	var (
+		action       = job.Args[0]
+		childIP      = job.Getenv("ChildIP")
+		parentIP     = job.Getenv("ParentIP")
+		ignoreErrors = job.GetenvBool("IgnoreErrors")
+		ports        = job.GetenvList("Ports")
+	)
+	split := func(p string) (string, string) {
+		parts := strings.Split(p, "/")
+		return parts[0], parts[1]
+	}
+
+	for _, p := range ports {
+		port, proto := split(p)
+		if output, err := iptables.Raw(action, "FORWARD",
+			"-i", bridgeIface, "-o", bridgeIface,
+			"-p", proto,
+			"-s", parentIP,
+			"--dport", port,
+			"-d", childIP,
+			"-j", "ACCEPT"); !ignoreErrors && err != nil {
+			job.Error(err)
+			return engine.StatusErr
+		} else if len(output) != 0 {
+			job.Errorf("Error toggle iptables forward: %s", output)
+			return engine.StatusErr
+		}
+
+		if output, err := iptables.Raw(action, "FORWARD",
+			"-i", bridgeIface, "-o", bridgeIface,
+			"-p", proto,
+			"-s", childIP,
+			"--sport", port,
+			"-d", parentIP,
+			"-j", "ACCEPT"); !ignoreErrors && err != nil {
+			job.Error(err)
+			return engine.StatusErr
+		} else if len(output) != 0 {
+			job.Errorf("Error toggle iptables forward: %s", output)
+			return engine.StatusErr
+		}
+	}
+	return engine.StatusOK
+}
diff --git a/runtime/networkdriver/ipallocator/allocator.go b/runtime/networkdriver/ipallocator/allocator.go
new file mode 100644
index 0000000000..70a7028bbe
--- /dev/null
+++ b/runtime/networkdriver/ipallocator/allocator.go
@@ -0,0 +1,159 @@
+package ipallocator
+
+import (
+	"encoding/binary"
+	"errors"
+	"github.com/dotcloud/docker/pkg/collections"
+	"github.com/dotcloud/docker/runtime/networkdriver"
+	"net"
+	"sync"
+)
+
+type networkSet map[string]*collections.OrderedIntSet
+
+var (
+	ErrNoAvailableIPs     = errors.New("no available ip addresses on network")
+	ErrIPAlreadyAllocated = errors.New("ip already allocated")
+)
+
+var (
+	lock         = sync.Mutex{}
+	allocatedIPs = networkSet{}
+	availableIPS = networkSet{}
+)
+
+// RequestIP requests an available ip from the given network.  It
+// will return the next available ip if the ip provided is nil.  If the
+// ip provided is not nil it will validate that the provided ip is available
+// for use or return an error
+func RequestIP(address *net.IPNet, ip *net.IP) (*net.IP, error) {
+	lock.Lock()
+	defer lock.Unlock()
+
+	checkAddress(address)
+
+	if ip == nil {
+		next, err := getNextIp(address)
+		if err != nil {
+			return nil, err
+		}
+		return next, nil
+	}
+
+	if err := registerIP(address, ip); err != nil {
+		return nil, err
+	}
+	return ip, nil
+}
+
+// ReleaseIP adds the provided ip back into the pool of
+// available ips to be returned for use.
+func ReleaseIP(address *net.IPNet, ip *net.IP) error {
+	lock.Lock()
+	defer lock.Unlock()
+
+	checkAddress(address)
+
+	var (
+		existing  = allocatedIPs[address.String()]
+		available = availableIPS[address.String()]
+		pos       = getPosition(address, ip)
+	)
+
+	existing.Remove(int(pos))
+	available.Push(int(pos))
+
+	return nil
+}
+
+// convert the ip into the position in the subnet.  Only
+// position are saved in the set
+func getPosition(address *net.IPNet, ip *net.IP) int32 {
+	var (
+		first, _ = networkdriver.NetworkRange(address)
+		base     = ipToInt(&first)
+		i        = ipToInt(ip)
+	)
+	return i - base
+}
+
+// return an available ip if one is currently available.  If not,
+// return the next available ip for the nextwork
+func getNextIp(address *net.IPNet) (*net.IP, error) {
+	var (
+		ownIP     = ipToInt(&address.IP)
+		available = availableIPS[address.String()]
+		allocated = allocatedIPs[address.String()]
+		first, _  = networkdriver.NetworkRange(address)
+		base      = ipToInt(&first)
+		size      = int(networkdriver.NetworkSize(address.Mask))
+		max       = int32(size - 2) // size -1 for the broadcast address, -1 for the gateway address
+		pos       = int32(available.Pop())
+	)
+
+	// We pop and push the position not the ip
+	if pos != 0 {
+		ip := intToIP(int32(base + pos))
+		allocated.Push(int(pos))
+
+		return ip, nil
+	}
+
+	var (
+		firstNetIP = address.IP.To4().Mask(address.Mask)
+		firstAsInt = ipToInt(&firstNetIP) + 1
+	)
+
+	pos = int32(allocated.PullBack())
+	for i := int32(0); i < max; i++ {
+		pos = pos%max + 1
+		next := int32(base + pos)
+
+		if next == ownIP || next == firstAsInt {
+			continue
+		}
+
+		if !allocated.Exists(int(pos)) {
+			ip := intToIP(next)
+			allocated.Push(int(pos))
+			return ip, nil
+		}
+	}
+	return nil, ErrNoAvailableIPs
+}
+
+func registerIP(address *net.IPNet, ip *net.IP) error {
+	var (
+		existing  = allocatedIPs[address.String()]
+		available = availableIPS[address.String()]
+		pos       = getPosition(address, ip)
+	)
+
+	if existing.Exists(int(pos)) {
+		return ErrIPAlreadyAllocated
+	}
+	available.Remove(int(pos))
+
+	return nil
+}
+
+// Converts a 4 bytes IP into a 32 bit integer
+func ipToInt(ip *net.IP) int32 {
+	return int32(binary.BigEndian.Uint32(ip.To4()))
+}
+
+// Converts 32 bit integer into a 4 bytes IP address
+func intToIP(n int32) *net.IP {
+	b := make([]byte, 4)
+	binary.BigEndian.PutUint32(b, uint32(n))
+	ip := net.IP(b)
+	return &ip
+}
+
+func checkAddress(address *net.IPNet) {
+	key := address.String()
+	if _, exists := allocatedIPs[key]; !exists {
+		allocatedIPs[key] = collections.NewOrderedIntSet()
+		availableIPS[key] = collections.NewOrderedIntSet()
+	}
+}
diff --git a/runtime/networkdriver/ipallocator/allocator_test.go b/runtime/networkdriver/ipallocator/allocator_test.go
new file mode 100644
index 0000000000..5e9fcfc983
--- /dev/null
+++ b/runtime/networkdriver/ipallocator/allocator_test.go
@@ -0,0 +1,241 @@
+package ipallocator
+
+import (
+	"fmt"
+	"net"
+	"testing"
+)
+
+func reset() {
+	allocatedIPs = networkSet{}
+	availableIPS = networkSet{}
+}
+
+func TestRequestNewIps(t *testing.T) {
+	defer reset()
+	network := &net.IPNet{
+		IP:   []byte{192, 168, 0, 1},
+		Mask: []byte{255, 255, 255, 0},
+	}
+
+	for i := 2; i < 10; i++ {
+		ip, err := RequestIP(network, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if expected := fmt.Sprintf("192.168.0.%d", i); ip.String() != expected {
+			t.Fatalf("Expected ip %s got %s", expected, ip.String())
+		}
+	}
+}
+
+func TestReleaseIp(t *testing.T) {
+	defer reset()
+	network := &net.IPNet{
+		IP:   []byte{192, 168, 0, 1},
+		Mask: []byte{255, 255, 255, 0},
+	}
+
+	ip, err := RequestIP(network, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := ReleaseIP(network, ip); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestGetReleasedIp(t *testing.T) {
+	defer reset()
+	network := &net.IPNet{
+		IP:   []byte{192, 168, 0, 1},
+		Mask: []byte{255, 255, 255, 0},
+	}
+
+	ip, err := RequestIP(network, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	value := ip.String()
+	if err := ReleaseIP(network, ip); err != nil {
+		t.Fatal(err)
+	}
+
+	ip, err = RequestIP(network, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if ip.String() != value {
+		t.Fatalf("Expected to receive same ip %s got %s", value, ip.String())
+	}
+}
+
+func TestRequesetSpecificIp(t *testing.T) {
+	defer reset()
+	network := &net.IPNet{
+		IP:   []byte{192, 168, 0, 1},
+		Mask: []byte{255, 255, 255, 0},
+	}
+
+	ip := net.ParseIP("192.168.1.5")
+
+	if _, err := RequestIP(network, &ip); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestConversion(t *testing.T) {
+	ip := net.ParseIP("127.0.0.1")
+	i := ipToInt(&ip)
+	if i == 0 {
+		t.Fatal("converted to zero")
+	}
+	conv := intToIP(i)
+	if !ip.Equal(*conv) {
+		t.Error(conv.String())
+	}
+}
+
+func TestIPAllocator(t *testing.T) {
+	expectedIPs := []net.IP{
+		0: net.IPv4(127, 0, 0, 2),
+		1: net.IPv4(127, 0, 0, 3),
+		2: net.IPv4(127, 0, 0, 4),
+		3: net.IPv4(127, 0, 0, 5),
+		4: net.IPv4(127, 0, 0, 6),
+	}
+
+	gwIP, n, _ := net.ParseCIDR("127.0.0.1/29")
+	network := &net.IPNet{IP: gwIP, Mask: n.Mask}
+	// Pool after initialisation (f = free, u = used)
+	// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
+	//  ↑
+
+	// Check that we get 5 IPs, from 127.0.0.2–127.0.0.6, in that
+	// order.
+	for i := 0; i < 5; i++ {
+		ip, err := RequestIP(network, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		assertIPEquals(t, &expectedIPs[i], ip)
+	}
+	// Before loop begin
+	// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
+	//  ↑
+
+	// After i = 0
+	// 2(u) - 3(f) - 4(f) - 5(f) - 6(f)
+	//         ↑
+
+	// After i = 1
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
+	//                ↑
+
+	// After i = 2
+	// 2(u) - 3(u) - 4(u) - 5(f) - 6(f)
+	//                       ↑
+
+	// After i = 3
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(f)
+	//                              ↑
+
+	// After i = 4
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
+	//  ↑
+
+	// Check that there are no more IPs
+	ip, err := RequestIP(network, nil)
+	if err == nil {
+		t.Fatalf("There shouldn't be any IP addresses at this point, got %s\n", ip)
+	}
+
+	// Release some IPs in non-sequential order
+	if err := ReleaseIP(network, &expectedIPs[3]); err != nil {
+		t.Fatal(err)
+	}
+	// 2(u) - 3(u) - 4(u) - 5(f) - 6(u)
+	//                       ↑
+
+	if err := ReleaseIP(network, &expectedIPs[2]); err != nil {
+		t.Fatal(err)
+	}
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(u)
+	//                       ↑
+
+	if err := ReleaseIP(network, &expectedIPs[4]); err != nil {
+		t.Fatal(err)
+	}
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
+	//                       ↑
+
+	// Make sure that IPs are reused in sequential order, starting
+	// with the first released IP
+	newIPs := make([]*net.IP, 3)
+	for i := 0; i < 3; i++ {
+		ip, err := RequestIP(network, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		newIPs[i] = ip
+	}
+	// Before loop begin
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
+	//                       ↑
+
+	// After i = 0
+	// 2(u) - 3(u) - 4(f) - 5(u) - 6(f)
+	//                              ↑
+
+	// After i = 1
+	// 2(u) - 3(u) - 4(f) - 5(u) - 6(u)
+	//                ↑
+
+	// After i = 2
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
+	//                       ↑
+
+	// Reordered these because the new set will always return the
+	// lowest ips first and not in the order that they were released
+	assertIPEquals(t, &expectedIPs[2], newIPs[0])
+	assertIPEquals(t, &expectedIPs[3], newIPs[1])
+	assertIPEquals(t, &expectedIPs[4], newIPs[2])
+
+	_, err = RequestIP(network, nil)
+	if err == nil {
+		t.Fatal("There shouldn't be any IP addresses at this point")
+	}
+}
+
+func TestAllocateFirstIP(t *testing.T) {
+	defer reset()
+	network := &net.IPNet{
+		IP:   []byte{192, 168, 0, 0},
+		Mask: []byte{255, 255, 255, 0},
+	}
+
+	firstIP := network.IP.To4().Mask(network.Mask)
+	first := ipToInt(&firstIP) + 1
+
+	ip, err := RequestIP(network, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	allocated := ipToInt(ip)
+
+	if allocated == first {
+		t.Fatalf("allocated ip should not equal first ip: %d == %d", first, allocated)
+	}
+}
+
+func assertIPEquals(t *testing.T, ip1, ip2 *net.IP) {
+	if !ip1.Equal(*ip2) {
+		t.Fatalf("Expected IP %s, got %s", ip1, ip2)
+	}
+}
diff --git a/runtime/networkdriver/network.go b/runtime/networkdriver/network.go
new file mode 100644
index 0000000000..8dda789d2f
--- /dev/null
+++ b/runtime/networkdriver/network.go
@@ -0,0 +1,10 @@
+package networkdriver
+
+import (
+	"errors"
+)
+
+var (
+	ErrNetworkOverlapsWithNameservers = errors.New("requested network overlaps with nameserver")
+	ErrNetworkOverlaps                = errors.New("requested network overlaps with existing network")
+)
diff --git a/runtime/networkdriver/network_test.go b/runtime/networkdriver/network_test.go
new file mode 100644
index 0000000000..6224c2dffb
--- /dev/null
+++ b/runtime/networkdriver/network_test.go
@@ -0,0 +1,190 @@
+package networkdriver
+
+import (
+	"github.com/dotcloud/docker/pkg/netlink"
+	"net"
+	"testing"
+)
+
+func TestNonOverlapingNameservers(t *testing.T) {
+	network := &net.IPNet{
+		IP:   []byte{192, 168, 0, 1},
+		Mask: []byte{255, 255, 255, 0},
+	}
+	nameservers := []string{
+		"127.0.0.1/32",
+	}
+
+	if err := CheckNameserverOverlaps(nameservers, network); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestOverlapingNameservers(t *testing.T) {
+	network := &net.IPNet{
+		IP:   []byte{192, 168, 0, 1},
+		Mask: []byte{255, 255, 255, 0},
+	}
+	nameservers := []string{
+		"192.168.0.1/32",
+	}
+
+	if err := CheckNameserverOverlaps(nameservers, network); err == nil {
+		t.Fatalf("Expected error %s got %s", ErrNetworkOverlapsWithNameservers, err)
+	}
+}
+
+func TestCheckRouteOverlaps(t *testing.T) {
+	orig := networkGetRoutesFct
+	defer func() {
+		networkGetRoutesFct = orig
+	}()
+	networkGetRoutesFct = func() ([]netlink.Route, error) {
+		routesData := []string{"10.0.2.0/32", "10.0.3.0/24", "10.0.42.0/24", "172.16.42.0/24", "192.168.142.0/24"}
+
+		routes := []netlink.Route{}
+		for _, addr := range routesData {
+			_, netX, _ := net.ParseCIDR(addr)
+			routes = append(routes, netlink.Route{IPNet: netX})
+		}
+		return routes, nil
+	}
+
+	_, netX, _ := net.ParseCIDR("172.16.0.1/24")
+	if err := CheckRouteOverlaps(netX); err != nil {
+		t.Fatal(err)
+	}
+
+	_, netX, _ = net.ParseCIDR("10.0.2.0/24")
+	if err := CheckRouteOverlaps(netX); err == nil {
+		t.Fatalf("10.0.2.0/24 and 10.0.2.0 should overlap but it doesn't")
+	}
+}
+
+func TestCheckNameserverOverlaps(t *testing.T) {
+	nameservers := []string{"10.0.2.3/32", "192.168.102.1/32"}
+
+	_, netX, _ := net.ParseCIDR("10.0.2.3/32")
+
+	if err := CheckNameserverOverlaps(nameservers, netX); err == nil {
+		t.Fatalf("%s should overlap 10.0.2.3/32 but doesn't", netX)
+	}
+
+	_, netX, _ = net.ParseCIDR("192.168.102.2/32")
+
+	if err := CheckNameserverOverlaps(nameservers, netX); err != nil {
+		t.Fatalf("%s should not overlap %v but it does", netX, nameservers)
+	}
+}
+
+func AssertOverlap(CIDRx string, CIDRy string, t *testing.T) {
+	_, netX, _ := net.ParseCIDR(CIDRx)
+	_, netY, _ := net.ParseCIDR(CIDRy)
+	if !NetworkOverlaps(netX, netY) {
+		t.Errorf("%v and %v should overlap", netX, netY)
+	}
+}
+
+func AssertNoOverlap(CIDRx string, CIDRy string, t *testing.T) {
+	_, netX, _ := net.ParseCIDR(CIDRx)
+	_, netY, _ := net.ParseCIDR(CIDRy)
+	if NetworkOverlaps(netX, netY) {
+		t.Errorf("%v and %v should not overlap", netX, netY)
+	}
+}
+
+func TestNetworkOverlaps(t *testing.T) {
+	//netY starts at same IP and ends within netX
+	AssertOverlap("172.16.0.1/24", "172.16.0.1/25", t)
+	//netY starts within netX and ends at same IP
+	AssertOverlap("172.16.0.1/24", "172.16.0.128/25", t)
+	//netY starts and ends within netX
+	AssertOverlap("172.16.0.1/24", "172.16.0.64/25", t)
+	//netY starts at same IP and ends outside of netX
+	AssertOverlap("172.16.0.1/24", "172.16.0.1/23", t)
+	//netY starts before and ends at same IP of netX
+	AssertOverlap("172.16.1.1/24", "172.16.0.1/23", t)
+	//netY starts before and ends outside of netX
+	AssertOverlap("172.16.1.1/24", "172.16.0.1/22", t)
+	//netY starts and ends before netX
+	AssertNoOverlap("172.16.1.1/25", "172.16.0.1/24", t)
+	//netX starts and ends before netY
+	AssertNoOverlap("172.16.1.1/25", "172.16.2.1/24", t)
+}
+
+func TestNetworkRange(t *testing.T) {
+	// Simple class C test
+	_, network, _ := net.ParseCIDR("192.168.0.1/24")
+	first, last := NetworkRange(network)
+	if !first.Equal(net.ParseIP("192.168.0.0")) {
+		t.Error(first.String())
+	}
+	if !last.Equal(net.ParseIP("192.168.0.255")) {
+		t.Error(last.String())
+	}
+	if size := NetworkSize(network.Mask); size != 256 {
+		t.Error(size)
+	}
+
+	// Class A test
+	_, network, _ = net.ParseCIDR("10.0.0.1/8")
+	first, last = NetworkRange(network)
+	if !first.Equal(net.ParseIP("10.0.0.0")) {
+		t.Error(first.String())
+	}
+	if !last.Equal(net.ParseIP("10.255.255.255")) {
+		t.Error(last.String())
+	}
+	if size := NetworkSize(network.Mask); size != 16777216 {
+		t.Error(size)
+	}
+
+	// Class A, random IP address
+	_, network, _ = net.ParseCIDR("10.1.2.3/8")
+	first, last = NetworkRange(network)
+	if !first.Equal(net.ParseIP("10.0.0.0")) {
+		t.Error(first.String())
+	}
+	if !last.Equal(net.ParseIP("10.255.255.255")) {
+		t.Error(last.String())
+	}
+
+	// 32bit mask
+	_, network, _ = net.ParseCIDR("10.1.2.3/32")
+	first, last = NetworkRange(network)
+	if !first.Equal(net.ParseIP("10.1.2.3")) {
+		t.Error(first.String())
+	}
+	if !last.Equal(net.ParseIP("10.1.2.3")) {
+		t.Error(last.String())
+	}
+	if size := NetworkSize(network.Mask); size != 1 {
+		t.Error(size)
+	}
+
+	// 31bit mask
+	_, network, _ = net.ParseCIDR("10.1.2.3/31")
+	first, last = NetworkRange(network)
+	if !first.Equal(net.ParseIP("10.1.2.2")) {
+		t.Error(first.String())
+	}
+	if !last.Equal(net.ParseIP("10.1.2.3")) {
+		t.Error(last.String())
+	}
+	if size := NetworkSize(network.Mask); size != 2 {
+		t.Error(size)
+	}
+
+	// 26bit mask
+	_, network, _ = net.ParseCIDR("10.1.2.3/26")
+	first, last = NetworkRange(network)
+	if !first.Equal(net.ParseIP("10.1.2.0")) {
+		t.Error(first.String())
+	}
+	if !last.Equal(net.ParseIP("10.1.2.63")) {
+		t.Error(last.String())
+	}
+	if size := NetworkSize(network.Mask); size != 64 {
+		t.Error(size)
+	}
+}
diff --git a/runtime/networkdriver/portallocator/portallocator.go b/runtime/networkdriver/portallocator/portallocator.go
new file mode 100644
index 0000000000..9ecd447116
--- /dev/null
+++ b/runtime/networkdriver/portallocator/portallocator.go
@@ -0,0 +1,188 @@
+package portallocator
+
+import (
+	"errors"
+	"github.com/dotcloud/docker/pkg/collections"
+	"net"
+	"sync"
+)
+
+const (
+	BeginPortRange = 49153
+	EndPortRange   = 65535
+)
+
+type (
+	portMappings map[string]*collections.OrderedIntSet
+	ipMapping    map[string]portMappings
+)
+
+var (
+	ErrAllPortsAllocated    = errors.New("all ports are allocated")
+	ErrPortAlreadyAllocated = errors.New("port has already been allocated")
+	ErrUnknownProtocol      = errors.New("unknown protocol")
+)
+
+var (
+	currentDynamicPort = map[string]int{
+		"tcp": BeginPortRange - 1,
+		"udp": BeginPortRange - 1,
+	}
+	defaultIP             = net.ParseIP("0.0.0.0")
+	defaultAllocatedPorts = portMappings{}
+	otherAllocatedPorts   = ipMapping{}
+	lock                  = sync.Mutex{}
+)
+
+func init() {
+	defaultAllocatedPorts["tcp"] = collections.NewOrderedIntSet()
+	defaultAllocatedPorts["udp"] = collections.NewOrderedIntSet()
+}
+
+// RequestPort returns an available port if the port is 0
+// If the provided port is not 0 then it will be checked if
+// it is available for allocation
+func RequestPort(ip net.IP, proto string, port int) (int, error) {
+	lock.Lock()
+	defer lock.Unlock()
+
+	if err := validateProtocol(proto); err != nil {
+		return 0, err
+	}
+
+	// If the user requested a specific port to be allocated
+	if port > 0 {
+		if err := registerSetPort(ip, proto, port); err != nil {
+			return 0, err
+		}
+		return port, nil
+	}
+	return registerDynamicPort(ip, proto)
+}
+
+// ReleasePort will return the provided port back into the
+// pool for reuse
+func ReleasePort(ip net.IP, proto string, port int) error {
+	lock.Lock()
+	defer lock.Unlock()
+
+	if err := validateProtocol(proto); err != nil {
+		return err
+	}
+
+	allocated := defaultAllocatedPorts[proto]
+	allocated.Remove(port)
+
+	if !equalsDefault(ip) {
+		registerIP(ip)
+
+		// Remove the port for the specific ip address
+		allocated = otherAllocatedPorts[ip.String()][proto]
+		allocated.Remove(port)
+	}
+	return nil
+}
+
+func ReleaseAll() error {
+	lock.Lock()
+	defer lock.Unlock()
+
+	currentDynamicPort["tcp"] = BeginPortRange - 1
+	currentDynamicPort["udp"] = BeginPortRange - 1
+
+	defaultAllocatedPorts = portMappings{}
+	defaultAllocatedPorts["tcp"] = collections.NewOrderedIntSet()
+	defaultAllocatedPorts["udp"] = collections.NewOrderedIntSet()
+
+	otherAllocatedPorts = ipMapping{}
+
+	return nil
+}
+
+func registerDynamicPort(ip net.IP, proto string) (int, error) {
+
+	if !equalsDefault(ip) {
+		registerIP(ip)
+
+		ipAllocated := otherAllocatedPorts[ip.String()][proto]
+
+		port, err := findNextPort(proto, ipAllocated)
+		if err != nil {
+			return 0, err
+		}
+		ipAllocated.Push(port)
+		return port, nil
+
+	} else {
+
+		allocated := defaultAllocatedPorts[proto]
+
+		port, err := findNextPort(proto, allocated)
+		if err != nil {
+			return 0, err
+		}
+		allocated.Push(port)
+		return port, nil
+	}
+}
+
+func registerSetPort(ip net.IP, proto string, port int) error {
+	allocated := defaultAllocatedPorts[proto]
+	if allocated.Exists(port) {
+		return ErrPortAlreadyAllocated
+	}
+
+	if !equalsDefault(ip) {
+		registerIP(ip)
+
+		ipAllocated := otherAllocatedPorts[ip.String()][proto]
+		if ipAllocated.Exists(port) {
+			return ErrPortAlreadyAllocated
+		}
+		ipAllocated.Push(port)
+	} else {
+		allocated.Push(port)
+	}
+	return nil
+}
+
+func equalsDefault(ip net.IP) bool {
+	return ip == nil || ip.Equal(defaultIP)
+}
+
+func findNextPort(proto string, allocated *collections.OrderedIntSet) (int, error) {
+	port := nextPort(proto)
+	startSearchPort := port
+	for allocated.Exists(port) {
+		port = nextPort(proto)
+		if startSearchPort == port {
+			return 0, ErrAllPortsAllocated
+		}
+	}
+	return port, nil
+}
+
+func nextPort(proto string) int {
+	c := currentDynamicPort[proto] + 1
+	if c > EndPortRange {
+		c = BeginPortRange
+	}
+	currentDynamicPort[proto] = c
+	return c
+}
+
+func registerIP(ip net.IP) {
+	if _, exists := otherAllocatedPorts[ip.String()]; !exists {
+		otherAllocatedPorts[ip.String()] = portMappings{
+			"tcp": collections.NewOrderedIntSet(),
+			"udp": collections.NewOrderedIntSet(),
+		}
+	}
+}
+
+func validateProtocol(proto string) error {
+	if _, exists := defaultAllocatedPorts[proto]; !exists {
+		return ErrUnknownProtocol
+	}
+	return nil
+}
diff --git a/runtime/networkdriver/portallocator/portallocator_test.go b/runtime/networkdriver/portallocator/portallocator_test.go
new file mode 100644
index 0000000000..5a4765ddd4
--- /dev/null
+++ b/runtime/networkdriver/portallocator/portallocator_test.go
@@ -0,0 +1,213 @@
+package portallocator
+
+import (
+	"net"
+	"testing"
+)
+
+func reset() {
+	ReleaseAll()
+}
+
+func TestRequestNewPort(t *testing.T) {
+	defer reset()
+
+	port, err := RequestPort(defaultIP, "tcp", 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if expected := BeginPortRange; port != expected {
+		t.Fatalf("Expected port %d got %d", expected, port)
+	}
+}
+
+func TestRequestSpecificPort(t *testing.T) {
+	defer reset()
+
+	port, err := RequestPort(defaultIP, "tcp", 5000)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if port != 5000 {
+		t.Fatalf("Expected port 5000 got %d", port)
+	}
+}
+
+func TestReleasePort(t *testing.T) {
+	defer reset()
+
+	port, err := RequestPort(defaultIP, "tcp", 5000)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if port != 5000 {
+		t.Fatalf("Expected port 5000 got %d", port)
+	}
+
+	if err := ReleasePort(defaultIP, "tcp", 5000); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestReuseReleasedPort(t *testing.T) {
+	defer reset()
+
+	port, err := RequestPort(defaultIP, "tcp", 5000)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if port != 5000 {
+		t.Fatalf("Expected port 5000 got %d", port)
+	}
+
+	if err := ReleasePort(defaultIP, "tcp", 5000); err != nil {
+		t.Fatal(err)
+	}
+
+	port, err = RequestPort(defaultIP, "tcp", 5000)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestReleaseUnreadledPort(t *testing.T) {
+	defer reset()
+
+	port, err := RequestPort(defaultIP, "tcp", 5000)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if port != 5000 {
+		t.Fatalf("Expected port 5000 got %d", port)
+	}
+
+	port, err = RequestPort(defaultIP, "tcp", 5000)
+	if err != ErrPortAlreadyAllocated {
+		t.Fatalf("Expected error %s got %s", ErrPortAlreadyAllocated, err)
+	}
+}
+
+func TestUnknowProtocol(t *testing.T) {
+	defer reset()
+
+	if _, err := RequestPort(defaultIP, "tcpp", 0); err != ErrUnknownProtocol {
+		t.Fatalf("Expected error %s got %s", ErrUnknownProtocol, err)
+	}
+}
+
+func TestAllocateAllPorts(t *testing.T) {
+	defer reset()
+
+	for i := 0; i <= EndPortRange-BeginPortRange; i++ {
+		port, err := RequestPort(defaultIP, "tcp", 0)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if expected := BeginPortRange + i; port != expected {
+			t.Fatalf("Expected port %d got %d", expected, port)
+		}
+	}
+
+	if _, err := RequestPort(defaultIP, "tcp", 0); err != ErrAllPortsAllocated {
+		t.Fatalf("Expected error %s got %s", ErrAllPortsAllocated, err)
+	}
+
+	_, err := RequestPort(defaultIP, "udp", 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// release a port in the middle and ensure we get another tcp port
+	port := BeginPortRange + 5
+	if err := ReleasePort(defaultIP, "tcp", port); err != nil {
+		t.Fatal(err)
+	}
+	newPort, err := RequestPort(defaultIP, "tcp", 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if newPort != port {
+		t.Fatalf("Expected port %d got %d", port, newPort)
+	}
+}
+
+func BenchmarkAllocatePorts(b *testing.B) {
+	defer reset()
+
+	for i := 0; i < b.N; i++ {
+		for i := 0; i <= EndPortRange-BeginPortRange; i++ {
+			port, err := RequestPort(defaultIP, "tcp", 0)
+			if err != nil {
+				b.Fatal(err)
+			}
+
+			if expected := BeginPortRange + i; port != expected {
+				b.Fatalf("Expected port %d got %d", expected, port)
+			}
+		}
+		reset()
+	}
+}
+
+func TestPortAllocation(t *testing.T) {
+	defer reset()
+
+	ip := net.ParseIP("192.168.0.1")
+	ip2 := net.ParseIP("192.168.0.2")
+	if port, err := RequestPort(ip, "tcp", 80); err != nil {
+		t.Fatal(err)
+	} else if port != 80 {
+		t.Fatalf("Acquire(80) should return 80, not %d", port)
+	}
+	port, err := RequestPort(ip, "tcp", 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if port <= 0 {
+		t.Fatalf("Acquire(0) should return a non-zero port")
+	}
+
+	if _, err := RequestPort(ip, "tcp", port); err == nil {
+		t.Fatalf("Acquiring a port already in use should return an error")
+	}
+
+	if newPort, err := RequestPort(ip, "tcp", 0); err != nil {
+		t.Fatal(err)
+	} else if newPort == port {
+		t.Fatalf("Acquire(0) allocated the same port twice: %d", port)
+	}
+
+	if _, err := RequestPort(ip, "tcp", 80); err == nil {
+		t.Fatalf("Acquiring a port already in use should return an error")
+	}
+	if _, err := RequestPort(ip2, "tcp", 80); err != nil {
+		t.Fatalf("It should be possible to allocate the same port on a different interface")
+	}
+	if _, err := RequestPort(ip2, "tcp", 80); err == nil {
+		t.Fatalf("Acquiring a port already in use should return an error")
+	}
+	if err := ReleasePort(ip, "tcp", 80); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := RequestPort(ip, "tcp", 80); err != nil {
+		t.Fatal(err)
+	}
+
+	port, err = RequestPort(ip, "tcp", 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	port2, err := RequestPort(ip, "tcp", port+1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	port3, err := RequestPort(ip, "tcp", 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if port3 == port2 {
+		t.Fatal("Requesting a dynamic port should never allocate a used port")
+	}
+}
diff --git a/runtime/networkdriver/portmapper/mapper.go b/runtime/networkdriver/portmapper/mapper.go
new file mode 100644
index 0000000000..e29959a245
--- /dev/null
+++ b/runtime/networkdriver/portmapper/mapper.go
@@ -0,0 +1,131 @@
+package portmapper
+
+import (
+	"errors"
+	"fmt"
+	"github.com/dotcloud/docker/pkg/iptables"
+	"github.com/dotcloud/docker/pkg/proxy"
+	"net"
+	"sync"
+)
+
+type mapping struct {
+	proto         string
+	userlandProxy proxy.Proxy
+	host          net.Addr
+	container     net.Addr
+}
+
+var (
+	chain *iptables.Chain
+	lock  sync.Mutex
+
+	// udp:ip:port
+	currentMappings = make(map[string]*mapping)
+	newProxy        = proxy.NewProxy
+)
+
+var (
+	ErrUnknownBackendAddressType = errors.New("unknown container address type not supported")
+	ErrPortMappedForIP           = errors.New("port is already mapped to ip")
+	ErrPortNotMapped             = errors.New("port is not mapped")
+)
+
+func SetIptablesChain(c *iptables.Chain) {
+	chain = c
+}
+
+func Map(container net.Addr, hostIP net.IP, hostPort int) error {
+	lock.Lock()
+	defer lock.Unlock()
+
+	var m *mapping
+	switch container.(type) {
+	case *net.TCPAddr:
+		m = &mapping{
+			proto:     "tcp",
+			host:      &net.TCPAddr{IP: hostIP, Port: hostPort},
+			container: container,
+		}
+	case *net.UDPAddr:
+		m = &mapping{
+			proto:     "udp",
+			host:      &net.UDPAddr{IP: hostIP, Port: hostPort},
+			container: container,
+		}
+	default:
+		return ErrUnknownBackendAddressType
+	}
+
+	key := getKey(m.host)
+	if _, exists := currentMappings[key]; exists {
+		return ErrPortMappedForIP
+	}
+
+	containerIP, containerPort := getIPAndPort(m.container)
+	if err := forward(iptables.Add, m.proto, hostIP, hostPort, containerIP.String(), containerPort); err != nil {
+		return err
+	}
+
+	p, err := newProxy(m.host, m.container)
+	if err != nil {
+		// need to undo the iptables rules before we reutrn
+		forward(iptables.Delete, m.proto, hostIP, hostPort, containerIP.String(), containerPort)
+		return err
+	}
+
+	m.userlandProxy = p
+	currentMappings[key] = m
+
+	go p.Run()
+
+	return nil
+}
+
+func Unmap(host net.Addr) error {
+	lock.Lock()
+	defer lock.Unlock()
+
+	key := getKey(host)
+	data, exists := currentMappings[key]
+	if !exists {
+		return ErrPortNotMapped
+	}
+
+	data.userlandProxy.Close()
+	delete(currentMappings, key)
+
+	containerIP, containerPort := getIPAndPort(data.container)
+	hostIP, hostPort := getIPAndPort(data.host)
+	if err := forward(iptables.Delete, data.proto, hostIP, hostPort, containerIP.String(), containerPort); err != nil {
+		return err
+	}
+	return nil
+}
+
+func getKey(a net.Addr) string {
+	switch t := a.(type) {
+	case *net.TCPAddr:
+		return fmt.Sprintf("%s:%d/%s", t.IP.String(), t.Port, "tcp")
+	case *net.UDPAddr:
+		return fmt.Sprintf("%s:%d/%s", t.IP.String(), t.Port, "udp")
+	}
+	return ""
+}
+
+func getIPAndPort(a net.Addr) (net.IP, int) {
+	switch t := a.(type) {
+	case *net.TCPAddr:
+		return t.IP, t.Port
+	case *net.UDPAddr:
+		return t.IP, t.Port
+	}
+	return nil, 0
+}
+
+func forward(action iptables.Action, proto string, sourceIP net.IP, sourcePort int, containerIP string, containerPort int) error {
+	if chain == nil {
+		return nil
+	}
+	return chain.Forward(action, sourceIP, sourcePort, proto, containerIP, containerPort)
+}
diff --git a/runtime/networkdriver/portmapper/mapper_test.go b/runtime/networkdriver/portmapper/mapper_test.go
new file mode 100644
index 0000000000..4c09f3c651
--- /dev/null
+++ b/runtime/networkdriver/portmapper/mapper_test.go
@@ -0,0 +1,107 @@
+package portmapper
+
+import (
+	"github.com/dotcloud/docker/pkg/iptables"
+	"github.com/dotcloud/docker/pkg/proxy"
+	"net"
+	"testing"
+)
+
+func init() {
+	// override this func to mock out the proxy server
+	newProxy = proxy.NewStubProxy
+}
+
+func reset() {
+	chain = nil
+	currentMappings = make(map[string]*mapping)
+}
+
+func TestSetIptablesChain(t *testing.T) {
+	defer reset()
+
+	c := &iptables.Chain{
+		Name:   "TEST",
+		Bridge: "192.168.1.1",
+	}
+
+	if chain != nil {
+		t.Fatal("chain should be nil at init")
+	}
+
+	SetIptablesChain(c)
+	if chain == nil {
+		t.Fatal("chain should not be nil after set")
+	}
+}
+
+func TestMapPorts(t *testing.T) {
+	dstIp1 := net.ParseIP("192.168.0.1")
+	dstIp2 := net.ParseIP("192.168.0.2")
+	dstAddr1 := &net.TCPAddr{IP: dstIp1, Port: 80}
+	dstAddr2 := &net.TCPAddr{IP: dstIp2, Port: 80}
+
+	srcAddr1 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")}
+	srcAddr2 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")}
+
+	if err := Map(srcAddr1, dstIp1, 80); err != nil {
+		t.Fatalf("Failed to allocate port: %s", err)
+	}
+
+	if Map(srcAddr1, dstIp1, 80) == nil {
+		t.Fatalf("Port is in use - mapping should have failed")
+	}
+
+	if Map(srcAddr2, dstIp1, 80) == nil {
+		t.Fatalf("Port is in use - mapping should have failed")
+	}
+
+	if err := Map(srcAddr2, dstIp2, 80); err != nil {
+		t.Fatalf("Failed to allocate port: %s", err)
+	}
+
+	if Unmap(dstAddr1) != nil {
+		t.Fatalf("Failed to release port")
+	}
+
+	if Unmap(dstAddr2) != nil {
+		t.Fatalf("Failed to release port")
+	}
+
+	if Unmap(dstAddr2) == nil {
+		t.Fatalf("Port already released, but no error reported")
+	}
+}
+
+func TestGetUDPKey(t *testing.T) {
+	addr := &net.UDPAddr{IP: net.ParseIP("192.168.1.5"), Port: 53}
+
+	key := getKey(addr)
+
+	if expected := "192.168.1.5:53/udp"; key != expected {
+		t.Fatalf("expected key %s got %s", expected, key)
+	}
+}
+
+func TestGetTCPKey(t *testing.T) {
+	addr := &net.TCPAddr{IP: net.ParseIP("192.168.1.5"), Port: 80}
+
+	key := getKey(addr)
+
+	if expected := "192.168.1.5:80/tcp"; key != expected {
+		t.Fatalf("expected key %s got %s", expected, key)
+	}
+}
+
+func TestGetUDPIPAndPort(t *testing.T) {
+	addr := &net.UDPAddr{IP: net.ParseIP("192.168.1.5"), Port: 53}
+
+	ip, port := getIPAndPort(addr)
+	if expected := "192.168.1.5"; ip.String() != expected {
+		t.Fatalf("expected ip %s got %s", expected, ip)
+	}
+
+	if ep := 53; port != ep {
+		t.Fatalf("expected port %d got %d", ep, port)
+	}
+}
diff --git a/runtime/networkdriver/utils.go b/runtime/networkdriver/utils.go
new file mode 100644
index 0000000000..0a4ef70c95
--- /dev/null
+++ b/runtime/networkdriver/utils.go
@@ -0,0 +1,118 @@
+package networkdriver
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"net"
+
+	"github.com/dotcloud/docker/pkg/netlink"
+)
+
+var (
+	networkGetRoutesFct = netlink.NetworkGetRoutes
+	ErrNoDefaultRoute   = errors.New("no default route")
+)
+
+func CheckNameserverOverlaps(nameservers []string, toCheck *net.IPNet) error {
+	if len(nameservers) > 0 {
+		for _, ns := range nameservers {
+			_, nsNetwork, err := net.ParseCIDR(ns)
+			if err != nil {
+				return err
+			}
+			if NetworkOverlaps(toCheck, nsNetwork) {
+				return ErrNetworkOverlapsWithNameservers
+			}
+		}
+	}
+	return nil
+}
+
+func CheckRouteOverlaps(toCheck *net.IPNet) error {
+	networks, err := networkGetRoutesFct()
+	if err != nil {
+		return err
+	}
+
+	for _, network := range networks {
+		if network.IPNet != nil && NetworkOverlaps(toCheck, network.IPNet) {
+			return ErrNetworkOverlaps
+		}
+	}
+	return nil
+}
+
+// Detects overlap between one IPNet and another
+func NetworkOverlaps(netX *net.IPNet, netY *net.IPNet) bool {
+	if firstIP, _ := NetworkRange(netX); netY.Contains(firstIP) {
+		return true
+	}
+	if firstIP, _ := NetworkRange(netY); netX.Contains(firstIP) {
+		return true
+	}
+	return false
+}
+
+// Calculates the first and last IP addresses in an IPNet
+func NetworkRange(network *net.IPNet) (net.IP, net.IP) {
+	var (
+		netIP   = network.IP.To4()
+		firstIP = netIP.Mask(network.Mask)
+		lastIP  = net.IPv4(0, 0, 0, 0).To4()
+	)
+
+	for i := 0; i < len(lastIP); i++ {
+		lastIP[i] = netIP[i] | ^network.Mask[i]
+	}
+	return firstIP, lastIP
+}
+
+// Given a netmask, calculates the number of available hosts
+func NetworkSize(mask net.IPMask) int32 {
+	m := net.IPv4Mask(0, 0, 0, 0)
+	for i := 0; i < net.IPv4len; i++ {
+		m[i] = ^mask[i]
+	}
+	return int32(binary.BigEndian.Uint32(m)) + 1
+}
+
+// Return the IPv4 address of a network interface
+func GetIfaceAddr(name string) (net.Addr, error) {
+	iface, err := net.InterfaceByName(name)
+	if err != nil {
+		return nil, err
+	}
+	addrs, err := iface.Addrs()
+	if err != nil {
+		return nil, err
+	}
+	var addrs4 []net.Addr
+	for _, addr := range addrs {
+		ip := (addr.(*net.IPNet)).IP
+		if ip4 := ip.To4(); len(ip4) == net.IPv4len {
+			addrs4 = append(addrs4, addr)
+		}
+	}
+	switch {
+	case len(addrs4) == 0:
+		return nil, fmt.Errorf("Interface %v has no IP addresses", name)
+	case len(addrs4) > 1:
+		fmt.Printf("Interface %v has more than 1 IPv4 address. Defaulting to using %v\n",
+			name, (addrs4[0].(*net.IPNet)).IP)
+	}
+	return addrs4[0], nil
+}
+
+func GetDefaultRouteIface() (*net.Interface, error) {
+	rs, err := networkGetRoutesFct()
+	if err != nil {
+		return nil, fmt.Errorf("unable to get routes: %v", err)
+	}
+	for _, r := range rs {
+		if r.Default {
+			return r.Iface, nil
+		}
+	}
+	return nil, ErrNoDefaultRoute
+}
diff --git a/runtime/runtime.go b/runtime/runtime.go
new file mode 100644
index 0000000000..98903cfa08
--- /dev/null
+++ b/runtime/runtime.go
@@ -0,0 +1,993 @@
+package runtime
+
+import (
+	"container/list"
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/daemonconfig"
+	"github.com/dotcloud/docker/dockerversion"
+	"github.com/dotcloud/docker/engine"
+	"github.com/dotcloud/docker/graph"
+	"github.com/dotcloud/docker/image"
+	"github.com/dotcloud/docker/pkg/graphdb"
+	"github.com/dotcloud/docker/pkg/mount"
+	"github.com/dotcloud/docker/pkg/selinux"
+	"github.com/dotcloud/docker/pkg/sysinfo"
+	"github.com/dotcloud/docker/runconfig"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"github.com/dotcloud/docker/runtime/execdriver/execdrivers"
+	"github.com/dotcloud/docker/runtime/execdriver/lxc"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	_ "github.com/dotcloud/docker/runtime/graphdriver/vfs"
+	_ "github.com/dotcloud/docker/runtime/networkdriver/bridge"
+	"github.com/dotcloud/docker/runtime/networkdriver/portallocator"
+	"github.com/dotcloud/docker/utils"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path"
+	"regexp"
+	"strings"
+	"sync"
+	"time"
+)
+
+// Set the max depth to the aufs default that most
+// kernels are compiled with
+// For more information see: http://sourceforge.net/p/aufs/aufs3-standalone/ci/aufs3.12/tree/config.mk
+const MaxImageDepth = 127
+
+var (
+	DefaultDns                = []string{"8.8.8.8", "8.8.4.4"}
+	validContainerNameChars   = `[a-zA-Z0-9_.-]`
+	validContainerNamePattern = regexp.MustCompile(`^/?` + validContainerNameChars + `+$`)
+)
+
+type Runtime struct {
+	repository     string
+	sysInitPath    string
+	containers     *list.List
+	graph          *graph.Graph
+	repositories   *graph.TagStore
+	idIndex        *utils.TruncIndex
+	sysInfo        *sysinfo.SysInfo
+	volumes        *graph.Graph
+	srv            Server
+	eng            *engine.Engine
+	config         *daemonconfig.Config
+	containerGraph *graphdb.Database
+	driver         graphdriver.Driver
+	execDriver     execdriver.Driver
+}
+
+// Mountpoints should be private to the container
+func remountPrivate(mountPoint string) error {
+	mounted, err := mount.Mounted(mountPoint)
+	if err != nil {
+		return err
+	}
+
+	if !mounted {
+		if err := mount.Mount(mountPoint, mountPoint, "none", "bind,rw"); err != nil {
+			return err
+		}
+	}
+	return mount.ForceMount("", mountPoint, "none", "private")
+}
+
+// List returns an array of all containers registered in the runtime.
+func (runtime *Runtime) List() []*Container {
+	containers := new(History)
+	for e := runtime.containers.Front(); e != nil; e = e.Next() {
+		containers.Add(e.Value.(*Container))
+	}
+	return *containers
+}
+
+func (runtime *Runtime) getContainerElement(id string) *list.Element {
+	for e := runtime.containers.Front(); e != nil; e = e.Next() {
+		container := e.Value.(*Container)
+		if container.ID == id {
+			return e
+		}
+	}
+	return nil
+}
+
+// Get looks for a container by the specified ID or name, and returns it.
+// If the container is not found, or if an error occurs, nil is returned.
+func (runtime *Runtime) Get(name string) *Container {
+	if c, _ := runtime.GetByName(name); c != nil {
+		return c
+	}
+
+	id, err := runtime.idIndex.Get(name)
+	if err != nil {
+		return nil
+	}
+
+	e := runtime.getContainerElement(id)
+	if e == nil {
+		return nil
+	}
+	return e.Value.(*Container)
+}
+
+// Exists returns a true if a container of the specified ID or name exists,
+// false otherwise.
+func (runtime *Runtime) Exists(id string) bool {
+	return runtime.Get(id) != nil
+}
+
+func (runtime *Runtime) containerRoot(id string) string {
+	return path.Join(runtime.repository, id)
+}
+
+// Load reads the contents of a container from disk
+// This is typically done at startup.
+func (runtime *Runtime) load(id string) (*Container, error) {
+	container := &Container{root: runtime.containerRoot(id)}
+	if err := container.FromDisk(); err != nil {
+		return nil, err
+	}
+	if container.ID != id {
+		return container, fmt.Errorf("Container %s is stored at %s", container.ID, id)
+	}
+	if container.State.IsRunning() {
+		container.State.SetGhost(true)
+	}
+	return container, nil
+}
+
+// Register makes a container object usable by the runtime as <container.ID>
+func (runtime *Runtime) Register(container *Container) error {
+	if container.runtime != nil || runtime.Exists(container.ID) {
+		return fmt.Errorf("Container is already loaded")
+	}
+	if err := validateID(container.ID); err != nil {
+		return err
+	}
+	if err := runtime.ensureName(container); err != nil {
+		return err
+	}
+
+	container.runtime = runtime
+
+	// Attach to stdout and stderr
+	container.stderr = utils.NewWriteBroadcaster()
+	container.stdout = utils.NewWriteBroadcaster()
+	// Attach to stdin
+	if container.Config.OpenStdin {
+		container.stdin, container.stdinPipe = io.Pipe()
+	} else {
+		container.stdinPipe = utils.NopWriteCloser(ioutil.Discard) // Silently drop stdin
+	}
+	// done
+	runtime.containers.PushBack(container)
+	runtime.idIndex.Add(container.ID)
+
+	// FIXME: if the container is supposed to be running but is not, auto restart it?
+	//        if so, then we need to restart monitor and init a new lock
+	// If the container is supposed to be running, make sure of it
+	if container.State.IsRunning() {
+		if container.State.IsGhost() {
+			utils.Debugf("killing ghost %s", container.ID)
+
+			existingPid := container.State.Pid
+			container.State.SetGhost(false)
+			container.State.SetStopped(0)
+
+			// We only have to handle this for lxc because the other drivers will ensure that
+			// no ghost processes are left when docker dies
+			if container.ExecDriver == "" || strings.Contains(container.ExecDriver, "lxc") {
+				lxc.KillLxc(container.ID, 9)
+			} else {
+				// use the current driver and ensure that the container is dead x.x
+				cmd := &execdriver.Command{
+					ID: container.ID,
+				}
+				var err error
+				cmd.Process, err = os.FindProcess(existingPid)
+				if err != nil {
+					utils.Debugf("cannot find existing process for %d", existingPid)
+				}
+				runtime.execDriver.Terminate(cmd)
+			}
+			if err := container.Unmount(); err != nil {
+				utils.Debugf("ghost unmount error %s", err)
+			}
+			if err := container.ToDisk(); err != nil {
+				utils.Debugf("saving ghost state to disk %s", err)
+			}
+		}
+
+		info := runtime.execDriver.Info(container.ID)
+		if !info.IsRunning() {
+			utils.Debugf("Container %s was supposed to be running but is not.", container.ID)
+			if runtime.config.AutoRestart {
+				utils.Debugf("Restarting")
+				if err := container.Unmount(); err != nil {
+					utils.Debugf("restart unmount error %s", err)
+				}
+
+				container.State.SetGhost(false)
+				container.State.SetStopped(0)
+				if err := container.Start(); err != nil {
+					return err
+				}
+			} else {
+				utils.Debugf("Marking as stopped")
+				container.State.SetStopped(-127)
+				if err := container.ToDisk(); err != nil {
+					return err
+				}
+			}
+		}
+	} else {
+		// When the container is not running, we still initialize the waitLock
+		// chan and close it. Receiving on nil chan blocks whereas receiving on a
+		// closed chan does not. In this case we do not want to block.
+		container.waitLock = make(chan struct{})
+		close(container.waitLock)
+	}
+	return nil
+}
+
+func (runtime *Runtime) ensureName(container *Container) error {
+	if container.Name == "" {
+		name, err := generateRandomName(runtime)
+		if err != nil {
+			name = utils.TruncateID(container.ID)
+		}
+		container.Name = name
+
+		if err := container.ToDisk(); err != nil {
+			utils.Debugf("Error saving container name %s", err)
+		}
+		if !runtime.containerGraph.Exists(name) {
+			if _, err := runtime.containerGraph.Set(name, container.ID); err != nil {
+				utils.Debugf("Setting default id - %s", err)
+			}
+		}
+	}
+	return nil
+}
+
+func (runtime *Runtime) LogToDisk(src *utils.WriteBroadcaster, dst, stream string) error {
+	log, err := os.OpenFile(dst, os.O_RDWR|os.O_APPEND|os.O_CREATE, 0600)
+	if err != nil {
+		return err
+	}
+	src.AddWriter(log, stream)
+	return nil
+}
+
+// Destroy unregisters a container from the runtime and cleanly removes its contents from the filesystem.
+func (runtime *Runtime) Destroy(container *Container) error {
+	if container == nil {
+		return fmt.Errorf("The given container is <nil>")
+	}
+
+	element := runtime.getContainerElement(container.ID)
+	if element == nil {
+		return fmt.Errorf("Container %v not found - maybe it was already destroyed?", container.ID)
+	}
+
+	if err := container.Stop(3); err != nil {
+		return err
+	}
+
+	if err := runtime.driver.Remove(container.ID); err != nil {
+		return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", runtime.driver, container.ID, err)
+	}
+
+	initID := fmt.Sprintf("%s-init", container.ID)
+	if err := runtime.driver.Remove(initID); err != nil {
+		return fmt.Errorf("Driver %s failed to remove init filesystem %s: %s", runtime.driver, initID, err)
+	}
+
+	if _, err := runtime.containerGraph.Purge(container.ID); err != nil {
+		utils.Debugf("Unable to remove container from link graph: %s", err)
+	}
+
+	// Deregister the container before removing its directory, to avoid race conditions
+	runtime.idIndex.Delete(container.ID)
+	runtime.containers.Remove(element)
+	if err := os.RemoveAll(container.root); err != nil {
+		return fmt.Errorf("Unable to remove filesystem for %v: %v", container.ID, err)
+	}
+	return nil
+}
+
+func (runtime *Runtime) restore() error {
+	if os.Getenv("DEBUG") == "" && os.Getenv("TEST") == "" {
+		fmt.Printf("Loading containers: ")
+	}
+	dir, err := ioutil.ReadDir(runtime.repository)
+	if err != nil {
+		return err
+	}
+	containers := make(map[string]*Container)
+	currentDriver := runtime.driver.String()
+
+	for _, v := range dir {
+		id := v.Name()
+		container, err := runtime.load(id)
+		if os.Getenv("DEBUG") == "" && os.Getenv("TEST") == "" {
+			fmt.Print(".")
+		}
+		if err != nil {
+			utils.Errorf("Failed to load container %v: %v", id, err)
+			continue
+		}
+
+		// Ignore the container if it does not support the current driver being used by the graph
+		if container.Driver == "" && currentDriver == "aufs" || container.Driver == currentDriver {
+			utils.Debugf("Loaded container %v", container.ID)
+			containers[container.ID] = container
+		} else {
+			utils.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
+		}
+	}
+
+	register := func(container *Container) {
+		if err := runtime.Register(container); err != nil {
+			utils.Debugf("Failed to register container %s: %s", container.ID, err)
+		}
+	}
+
+	if entities := runtime.containerGraph.List("/", -1); entities != nil {
+		for _, p := range entities.Paths() {
+			if os.Getenv("DEBUG") == "" && os.Getenv("TEST") == "" {
+				fmt.Print(".")
+			}
+			e := entities[p]
+			if container, ok := containers[e.ID()]; ok {
+				register(container)
+				delete(containers, e.ID())
+			}
+		}
+	}
+
+	// Any containers that are left over do not exist in the graph
+	for _, container := range containers {
+		// Try to set the default name for a container if it exists prior to links
+		container.Name, err = generateRandomName(runtime)
+		if err != nil {
+			container.Name = utils.TruncateID(container.ID)
+		}
+
+		if _, err := runtime.containerGraph.Set(container.Name, container.ID); err != nil {
+			utils.Debugf("Setting default id - %s", err)
+		}
+		register(container)
+	}
+
+	if os.Getenv("DEBUG") == "" && os.Getenv("TEST") == "" {
+		fmt.Printf(": done.\n")
+	}
+
+	return nil
+}
+
+// Create creates a new container from the given configuration with a given name.
+func (runtime *Runtime) Create(config *runconfig.Config, name string) (*Container, []string, error) {
+	var (
+		container *Container
+		warnings  []string
+	)
+
+	img, err := runtime.repositories.LookupImage(config.Image)
+	if err != nil {
+		return nil, nil, err
+	}
+	if err := runtime.checkImageDepth(img); err != nil {
+		return nil, nil, err
+	}
+	if warnings, err = runtime.mergeAndVerifyConfig(config, img); err != nil {
+		return nil, nil, err
+	}
+	if container, err = runtime.newContainer(name, config, img); err != nil {
+		return nil, nil, err
+	}
+	if err := runtime.createRootfs(container, img); err != nil {
+		return nil, nil, err
+	}
+	if err := container.ToDisk(); err != nil {
+		return nil, nil, err
+	}
+	if err := runtime.Register(container); err != nil {
+		return nil, nil, err
+	}
+	return container, warnings, nil
+}
+
+func (runtime *Runtime) checkImageDepth(img *image.Image) error {
+	// We add 2 layers to the depth because the container's rw and
+	// init layer add to the restriction
+	depth, err := img.Depth()
+	if err != nil {
+		return err
+	}
+	if depth+2 >= MaxImageDepth {
+		return fmt.Errorf("Cannot create container with more than %d parents", MaxImageDepth)
+	}
+	return nil
+}
+
+func (runtime *Runtime) checkDeprecatedExpose(config *runconfig.Config) bool {
+	if config != nil {
+		if config.PortSpecs != nil {
+			for _, p := range config.PortSpecs {
+				if strings.Contains(p, ":") {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+func (runtime *Runtime) mergeAndVerifyConfig(config *runconfig.Config, img *image.Image) ([]string, error) {
+	warnings := []string{}
+	if runtime.checkDeprecatedExpose(img.Config) || runtime.checkDeprecatedExpose(config) {
+		warnings = append(warnings, "The mapping to public ports on your host via Dockerfile EXPOSE (host:port:port) has been deprecated. Use -p to publish the ports.")
+	}
+	if img.Config != nil {
+		if err := runconfig.Merge(config, img.Config); err != nil {
+			return nil, err
+		}
+	}
+	if len(config.Entrypoint) == 0 && len(config.Cmd) == 0 {
+		return nil, fmt.Errorf("No command specified")
+	}
+	return warnings, nil
+}
+
+func (runtime *Runtime) generateIdAndName(name string) (string, string, error) {
+	var (
+		err error
+		id  = utils.GenerateRandomID()
+	)
+
+	if name == "" {
+		name, err = generateRandomName(runtime)
+		if err != nil {
+			name = utils.TruncateID(id)
+		}
+	} else {
+		if !validContainerNamePattern.MatchString(name) {
+			return "", "", fmt.Errorf("Invalid container name (%s), only %s are allowed", name, validContainerNameChars)
+		}
+	}
+	if name[0] != '/' {
+		name = "/" + name
+	}
+	// Set the enitity in the graph using the default name specified
+	if _, err := runtime.containerGraph.Set(name, id); err != nil {
+		if !graphdb.IsNonUniqueNameError(err) {
+			return "", "", err
+		}
+
+		conflictingContainer, err := runtime.GetByName(name)
+		if err != nil {
+			if strings.Contains(err.Error(), "Could not find entity") {
+				return "", "", err
+			}
+
+			// Remove name and continue starting the container
+			if err := runtime.containerGraph.Delete(name); err != nil {
+				return "", "", err
+			}
+		} else {
+			nameAsKnownByUser := strings.TrimPrefix(name, "/")
+			return "", "", fmt.Errorf(
+				"Conflict, The name %s is already assigned to %s. You have to delete (or rename) that container to be able to assign %s to a container again.", nameAsKnownByUser,
+				utils.TruncateID(conflictingContainer.ID), nameAsKnownByUser)
+		}
+	}
+	return id, name, nil
+}
+
+func (runtime *Runtime) generateHostname(id string, config *runconfig.Config) {
+	// Generate default hostname
+	// FIXME: the lxc template no longer needs to set a default hostname
+	if config.Hostname == "" {
+		config.Hostname = id[:12]
+	}
+}
+
+func (runtime *Runtime) getEntrypointAndArgs(config *runconfig.Config) (string, []string) {
+	var (
+		entrypoint string
+		args       []string
+	)
+	if len(config.Entrypoint) != 0 {
+		entrypoint = config.Entrypoint[0]
+		args = append(config.Entrypoint[1:], config.Cmd...)
+	} else {
+		entrypoint = config.Cmd[0]
+		args = config.Cmd[1:]
+	}
+	return entrypoint, args
+}
+
+func (runtime *Runtime) newContainer(name string, config *runconfig.Config, img *image.Image) (*Container, error) {
+	var (
+		id  string
+		err error
+	)
+	id, name, err = runtime.generateIdAndName(name)
+	if err != nil {
+		return nil, err
+	}
+
+	runtime.generateHostname(id, config)
+	entrypoint, args := runtime.getEntrypointAndArgs(config)
+
+	container := &Container{
+		// FIXME: we should generate the ID here instead of receiving it as an argument
+		ID:              id,
+		Created:         time.Now().UTC(),
+		Path:            entrypoint,
+		Args:            args, //FIXME: de-duplicate from config
+		Config:          config,
+		hostConfig:      &runconfig.HostConfig{},
+		Image:           img.ID, // Always use the resolved image id
+		NetworkSettings: &NetworkSettings{},
+		Name:            name,
+		Driver:          runtime.driver.String(),
+		ExecDriver:      runtime.execDriver.Name(),
+	}
+	container.root = runtime.containerRoot(container.ID)
+	return container, nil
+}
+
+func (runtime *Runtime) createRootfs(container *Container, img *image.Image) error {
+	// Step 1: create the container directory.
+	// This doubles as a barrier to avoid race conditions.
+	if err := os.Mkdir(container.root, 0700); err != nil {
+		return err
+	}
+	initID := fmt.Sprintf("%s-init", container.ID)
+	if err := runtime.driver.Create(initID, img.ID, ""); err != nil {
+		return err
+	}
+	initPath, err := runtime.driver.Get(initID)
+	if err != nil {
+		return err
+	}
+	defer runtime.driver.Put(initID)
+
+	if err := graph.SetupInitLayer(initPath); err != nil {
+		return err
+	}
+
+	if err := runtime.driver.Create(container.ID, initID, ""); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Commit creates a new filesystem image from the current state of a container.
+// The image can optionally be tagged into a repository
+func (runtime *Runtime) Commit(container *Container, repository, tag, comment, author string, config *runconfig.Config) (*image.Image, error) {
+	// FIXME: freeze the container before copying it to avoid data corruption?
+	if err := container.Mount(); err != nil {
+		return nil, err
+	}
+	defer container.Unmount()
+
+	rwTar, err := container.ExportRw()
+	if err != nil {
+		return nil, err
+	}
+	defer rwTar.Close()
+
+	// Create a new image from the container's base layers + a new layer from container changes
+	var (
+		containerID, containerImage string
+		containerConfig             *runconfig.Config
+	)
+	if container != nil {
+		containerID = container.ID
+		containerImage = container.Image
+		containerConfig = container.Config
+	}
+	img, err := runtime.graph.Create(rwTar, containerID, containerImage, comment, author, containerConfig, config)
+	if err != nil {
+		return nil, err
+	}
+	// Register the image if needed
+	if repository != "" {
+		if err := runtime.repositories.Set(repository, tag, img.ID, true); err != nil {
+			return img, err
+		}
+	}
+	return img, nil
+}
+
+func GetFullContainerName(name string) (string, error) {
+	if name == "" {
+		return "", fmt.Errorf("Container name cannot be empty")
+	}
+	if name[0] != '/' {
+		name = "/" + name
+	}
+	return name, nil
+}
+
+func (runtime *Runtime) GetByName(name string) (*Container, error) {
+	fullName, err := GetFullContainerName(name)
+	if err != nil {
+		return nil, err
+	}
+	entity := runtime.containerGraph.Get(fullName)
+	if entity == nil {
+		return nil, fmt.Errorf("Could not find entity for %s", name)
+	}
+	e := runtime.getContainerElement(entity.ID())
+	if e == nil {
+		return nil, fmt.Errorf("Could not find container for entity id %s", entity.ID())
+	}
+	return e.Value.(*Container), nil
+}
+
+func (runtime *Runtime) Children(name string) (map[string]*Container, error) {
+	name, err := GetFullContainerName(name)
+	if err != nil {
+		return nil, err
+	}
+	children := make(map[string]*Container)
+
+	err = runtime.containerGraph.Walk(name, func(p string, e *graphdb.Entity) error {
+		c := runtime.Get(e.ID())
+		if c == nil {
+			return fmt.Errorf("Could not get container for name %s and id %s", e.ID(), p)
+		}
+		children[p] = c
+		return nil
+	}, 0)
+
+	if err != nil {
+		return nil, err
+	}
+	return children, nil
+}
+
+func (runtime *Runtime) RegisterLink(parent, child *Container, alias string) error {
+	fullName := path.Join(parent.Name, alias)
+	if !runtime.containerGraph.Exists(fullName) {
+		_, err := runtime.containerGraph.Set(fullName, child.ID)
+		return err
+	}
+	return nil
+}
+
+// FIXME: harmonize with NewGraph()
+func NewRuntime(config *daemonconfig.Config, eng *engine.Engine) (*Runtime, error) {
+	runtime, err := NewRuntimeFromDirectory(config, eng)
+	if err != nil {
+		return nil, err
+	}
+	return runtime, nil
+}
+
+func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (*Runtime, error) {
+	if !config.EnableSelinuxSupport {
+		selinux.SetDisabled()
+	}
+
+	// Set the default driver
+	graphdriver.DefaultDriver = config.GraphDriver
+
+	// Load storage driver
+	driver, err := graphdriver.New(config.Root)
+	if err != nil {
+		return nil, err
+	}
+	utils.Debugf("Using graph driver %s", driver)
+
+	if err := remountPrivate(config.Root); err != nil {
+		return nil, err
+	}
+
+	runtimeRepo := path.Join(config.Root, "containers")
+
+	if err := os.MkdirAll(runtimeRepo, 0700); err != nil && !os.IsExist(err) {
+		return nil, err
+	}
+
+	// Migrate the container if it is aufs and aufs is enabled
+	if err = migrateIfAufs(driver, config.Root); err != nil {
+		return nil, err
+	}
+
+	utils.Debugf("Creating images graph")
+	g, err := graph.NewGraph(path.Join(config.Root, "graph"), driver)
+	if err != nil {
+		return nil, err
+	}
+
+	// We don't want to use a complex driver like aufs or devmapper
+	// for volumes, just a plain filesystem
+	volumesDriver, err := graphdriver.GetDriver("vfs", config.Root)
+	if err != nil {
+		return nil, err
+	}
+	utils.Debugf("Creating volumes graph")
+	volumes, err := graph.NewGraph(path.Join(config.Root, "volumes"), volumesDriver)
+	if err != nil {
+		return nil, err
+	}
+	utils.Debugf("Creating repository list")
+	repositories, err := graph.NewTagStore(path.Join(config.Root, "repositories-"+driver.String()), g)
+	if err != nil {
+		return nil, fmt.Errorf("Couldn't create Tag store: %s", err)
+	}
+
+	if !config.DisableNetwork {
+		job := eng.Job("init_networkdriver")
+
+		job.SetenvBool("EnableIptables", config.EnableIptables)
+		job.SetenvBool("InterContainerCommunication", config.InterContainerCommunication)
+		job.SetenvBool("EnableIpForward", config.EnableIpForward)
+		job.Setenv("BridgeIface", config.BridgeIface)
+		job.Setenv("BridgeIP", config.BridgeIP)
+		job.Setenv("DefaultBindingIP", config.DefaultIp.String())
+
+		if err := job.Run(); err != nil {
+			return nil, err
+		}
+	}
+
+	graphdbPath := path.Join(config.Root, "linkgraph.db")
+	graph, err := graphdb.NewSqliteConn(graphdbPath)
+	if err != nil {
+		return nil, err
+	}
+
+	localCopy := path.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", dockerversion.VERSION))
+	sysInitPath := utils.DockerInitPath(localCopy)
+	if sysInitPath == "" {
+		return nil, fmt.Errorf("Could not locate dockerinit: This usually means docker was built incorrectly. See http://docs.docker.io/en/latest/contributing/devenvironment for official build instructions.")
+	}
+
+	if sysInitPath != localCopy {
+		// When we find a suitable dockerinit binary (even if it's our local binary), we copy it into config.Root at localCopy for future use (so that the original can go away without that being a problem, for example during a package upgrade).
+		if err := os.Mkdir(path.Dir(localCopy), 0700); err != nil && !os.IsExist(err) {
+			return nil, err
+		}
+		if _, err := utils.CopyFile(sysInitPath, localCopy); err != nil {
+			return nil, err
+		}
+		if err := os.Chmod(localCopy, 0700); err != nil {
+			return nil, err
+		}
+		sysInitPath = localCopy
+	}
+
+	sysInfo := sysinfo.New(false)
+	ed, err := execdrivers.NewDriver(config.ExecDriver, config.Root, sysInitPath, sysInfo)
+	if err != nil {
+		return nil, err
+	}
+
+	runtime := &Runtime{
+		repository:     runtimeRepo,
+		containers:     list.New(),
+		graph:          g,
+		repositories:   repositories,
+		idIndex:        utils.NewTruncIndex(),
+		sysInfo:        sysInfo,
+		volumes:        volumes,
+		config:         config,
+		containerGraph: graph,
+		driver:         driver,
+		sysInitPath:    sysInitPath,
+		execDriver:     ed,
+		eng:            eng,
+	}
+
+	if err := runtime.checkLocaldns(); err != nil {
+		return nil, err
+	}
+	if err := runtime.restore(); err != nil {
+		return nil, err
+	}
+	return runtime, nil
+}
+
+func (runtime *Runtime) shutdown() error {
+	group := sync.WaitGroup{}
+	utils.Debugf("starting clean shutdown of all containers...")
+	for _, container := range runtime.List() {
+		c := container
+		if c.State.IsRunning() {
+			utils.Debugf("stopping %s", c.ID)
+			group.Add(1)
+
+			go func() {
+				defer group.Done()
+				if err := c.KillSig(15); err != nil {
+					utils.Debugf("kill 15 error for %s - %s", c.ID, err)
+				}
+				c.Wait()
+				utils.Debugf("container stopped %s", c.ID)
+			}()
+		}
+	}
+	group.Wait()
+
+	return nil
+}
+
+func (runtime *Runtime) Close() error {
+	errorsStrings := []string{}
+	if err := runtime.shutdown(); err != nil {
+		utils.Errorf("runtime.shutdown(): %s", err)
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if err := portallocator.ReleaseAll(); err != nil {
+		utils.Errorf("portallocator.ReleaseAll(): %s", err)
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if err := runtime.driver.Cleanup(); err != nil {
+		utils.Errorf("runtime.driver.Cleanup(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if err := runtime.containerGraph.Close(); err != nil {
+		utils.Errorf("runtime.containerGraph.Close(): %s", err.Error())
+		errorsStrings = append(errorsStrings, err.Error())
+	}
+	if len(errorsStrings) > 0 {
+		return fmt.Errorf("%s", strings.Join(errorsStrings, ", "))
+	}
+	return nil
+}
+
+func (runtime *Runtime) Mount(container *Container) error {
+	dir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return fmt.Errorf("Error getting container %s from driver %s: %s", container.ID, runtime.driver, err)
+	}
+	if container.basefs == "" {
+		container.basefs = dir
+	} else if container.basefs != dir {
+		return fmt.Errorf("Error: driver %s is returning inconsistent paths for container %s ('%s' then '%s')",
+			runtime.driver, container.ID, container.basefs, dir)
+	}
+	return nil
+}
+
+func (runtime *Runtime) Unmount(container *Container) error {
+	runtime.driver.Put(container.ID)
+	return nil
+}
+
+func (runtime *Runtime) Changes(container *Container) ([]archive.Change, error) {
+	if differ, ok := runtime.driver.(graphdriver.Differ); ok {
+		return differ.Changes(container.ID)
+	}
+	cDir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+	defer runtime.driver.Put(container.ID)
+	initDir, err := runtime.driver.Get(container.ID + "-init")
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container init rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+	defer runtime.driver.Put(container.ID + "-init")
+	return archive.ChangesDirs(cDir, initDir)
+}
+
+func (runtime *Runtime) Diff(container *Container) (archive.Archive, error) {
+	if differ, ok := runtime.driver.(graphdriver.Differ); ok {
+		return differ.Diff(container.ID)
+	}
+
+	changes, err := runtime.Changes(container)
+	if err != nil {
+		return nil, err
+	}
+
+	cDir, err := runtime.driver.Get(container.ID)
+	if err != nil {
+		return nil, fmt.Errorf("Error getting container rootfs %s from driver %s: %s", container.ID, container.runtime.driver, err)
+	}
+
+	archive, err := archive.ExportChanges(cDir, changes)
+	if err != nil {
+		return nil, err
+	}
+	return utils.NewReadCloserWrapper(archive, func() error {
+		err := archive.Close()
+		runtime.driver.Put(container.ID)
+		return err
+	}), nil
+}
+
+func (runtime *Runtime) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+	return runtime.execDriver.Run(c.command, pipes, startCallback)
+}
+
+func (runtime *Runtime) Kill(c *Container, sig int) error {
+	return runtime.execDriver.Kill(c.command, sig)
+}
+
+// Nuke kills all containers then removes all content
+// from the content root, including images, volumes and
+// container filesystems.
+// Again: this will remove your entire docker runtime!
+func (runtime *Runtime) Nuke() error {
+	var wg sync.WaitGroup
+	for _, container := range runtime.List() {
+		wg.Add(1)
+		go func(c *Container) {
+			c.Kill()
+			wg.Done()
+		}(container)
+	}
+	wg.Wait()
+	runtime.Close()
+
+	return os.RemoveAll(runtime.config.Root)
+}
+
+// FIXME: this is a convenience function for integration tests
+// which need direct access to runtime.graph.
+// Once the tests switch to using engine and jobs, this method
+// can go away.
+func (runtime *Runtime) Graph() *graph.Graph {
+	return runtime.graph
+}
+
+func (runtime *Runtime) Repositories() *graph.TagStore {
+	return runtime.repositories
+}
+
+func (runtime *Runtime) Config() *daemonconfig.Config {
+	return runtime.config
+}
+
+func (runtime *Runtime) SystemConfig() *sysinfo.SysInfo {
+	return runtime.sysInfo
+}
+
+func (runtime *Runtime) SystemInitPath() string {
+	return runtime.sysInitPath
+}
+
+func (runtime *Runtime) GraphDriver() graphdriver.Driver {
+	return runtime.driver
+}
+
+func (runtime *Runtime) ExecutionDriver() execdriver.Driver {
+	return runtime.execDriver
+}
+
+func (runtime *Runtime) Volumes() *graph.Graph {
+	return runtime.volumes
+}
+
+func (runtime *Runtime) ContainerGraph() *graphdb.Database {
+	return runtime.containerGraph
+}
+
+func (runtime *Runtime) SetServer(server Server) {
+	runtime.srv = server
+}
+
+func (runtime *Runtime) checkLocaldns() error {
+	resolvConf, err := utils.GetResolvConf()
+	if err != nil {
+		return err
+	}
+	if len(runtime.config.Dns) == 0 && utils.CheckLocalDns(resolvConf) {
+		log.Printf("Local (127.0.0.1) DNS resolver found in resolv.conf and containers can't use it. Using default external servers : %v\n", DefaultDns)
+		runtime.config.Dns = DefaultDns
+	}
+	return nil
+}
diff --git a/runtime/runtime_aufs.go b/runtime/runtime_aufs.go
new file mode 100644
index 0000000000..5a32615df5
--- /dev/null
+++ b/runtime/runtime_aufs.go
@@ -0,0 +1,22 @@
+// +build !exclude_graphdriver_aufs
+
+package runtime
+
+import (
+	"github.com/dotcloud/docker/graph"
+	"github.com/dotcloud/docker/runtime/graphdriver"
+	"github.com/dotcloud/docker/runtime/graphdriver/aufs"
+	"github.com/dotcloud/docker/utils"
+)
+
+// Given the graphdriver ad, if it is aufs, then migrate it.
+// If aufs driver is not built, this func is a noop.
+func migrateIfAufs(driver graphdriver.Driver, root string) error {
+	if ad, ok := driver.(*aufs.Driver); ok {
+		utils.Debugf("Migrating existing containers")
+		if err := ad.Migrate(root, graph.SetupInitLayer); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/runtime/runtime_btrfs.go b/runtime/runtime_btrfs.go
new file mode 100644
index 0000000000..c59b103ff9
--- /dev/null
+++ b/runtime/runtime_btrfs.go
@@ -0,0 +1,7 @@
+// +build !exclude_graphdriver_btrfs
+
+package runtime
+
+import (
+	_ "github.com/dotcloud/docker/runtime/graphdriver/btrfs"
+)
diff --git a/runtime/runtime_devicemapper.go b/runtime/runtime_devicemapper.go
new file mode 100644
index 0000000000..5b418b377a
--- /dev/null
+++ b/runtime/runtime_devicemapper.go
@@ -0,0 +1,7 @@
+// +build !exclude_graphdriver_devicemapper
+
+package runtime
+
+import (
+	_ "github.com/dotcloud/docker/runtime/graphdriver/devmapper"
+)
diff --git a/runtime/runtime_no_aufs.go b/runtime/runtime_no_aufs.go
new file mode 100644
index 0000000000..05a01fe151
--- /dev/null
+++ b/runtime/runtime_no_aufs.go
@@ -0,0 +1,11 @@
+// +build exclude_graphdriver_aufs
+
+package runtime
+
+import (
+	"github.com/dotcloud/docker/runtime/graphdriver"
+)
+
+func migrateIfAufs(driver graphdriver.Driver, root string) error {
+	return nil
+}
diff --git a/runtime/server.go b/runtime/server.go
new file mode 100644
index 0000000000..a74c4d1200
--- /dev/null
+++ b/runtime/server.go
@@ -0,0 +1,10 @@
+package runtime
+
+import (
+	"github.com/dotcloud/docker/utils"
+)
+
+type Server interface {
+	LogEvent(action, id, from string) *utils.JSONMessage
+	IsRunning() bool // returns true if the server is currently in operation
+}
diff --git a/runtime/sorter.go b/runtime/sorter.go
new file mode 100644
index 0000000000..c5af772dae
--- /dev/null
+++ b/runtime/sorter.go
@@ -0,0 +1,25 @@
+package runtime
+
+import "sort"
+
+type containerSorter struct {
+	containers []*Container
+	by         func(i, j *Container) bool
+}
+
+func (s *containerSorter) Len() int {
+	return len(s.containers)
+}
+
+func (s *containerSorter) Swap(i, j int) {
+	s.containers[i], s.containers[j] = s.containers[j], s.containers[i]
+}
+
+func (s *containerSorter) Less(i, j int) bool {
+	return s.by(s.containers[i], s.containers[j])
+}
+
+func sortContainers(containers []*Container, predicate func(i, j *Container) bool) {
+	s := &containerSorter{containers, predicate}
+	sort.Sort(s)
+}
diff --git a/runtime/state.go b/runtime/state.go
new file mode 100644
index 0000000000..316b8a40f1
--- /dev/null
+++ b/runtime/state.go
@@ -0,0 +1,84 @@
+package runtime
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/utils"
+	"sync"
+	"time"
+)
+
+type State struct {
+	sync.RWMutex
+	Running    bool
+	Pid        int
+	ExitCode   int
+	StartedAt  time.Time
+	FinishedAt time.Time
+	Ghost      bool
+}
+
+// String returns a human-readable description of the state
+func (s *State) String() string {
+	s.RLock()
+	defer s.RUnlock()
+
+	if s.Running {
+		if s.Ghost {
+			return fmt.Sprintf("Ghost")
+		}
+		return fmt.Sprintf("Up %s", utils.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
+	}
+	if s.FinishedAt.IsZero() {
+		return ""
+	}
+	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, utils.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
+}
+
+func (s *State) IsRunning() bool {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.Running
+}
+
+func (s *State) IsGhost() bool {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.Ghost
+}
+
+func (s *State) GetExitCode() int {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.ExitCode
+}
+
+func (s *State) SetGhost(val bool) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.Ghost = val
+}
+
+func (s *State) SetRunning(pid int) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.Running = true
+	s.Ghost = false
+	s.ExitCode = 0
+	s.Pid = pid
+	s.StartedAt = time.Now().UTC()
+}
+
+func (s *State) SetStopped(exitCode int) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.Running = false
+	s.Pid = 0
+	s.FinishedAt = time.Now().UTC()
+	s.ExitCode = exitCode
+}
diff --git a/runtime/utils.go b/runtime/utils.go
new file mode 100644
index 0000000000..b983e67d41
--- /dev/null
+++ b/runtime/utils.go
@@ -0,0 +1,64 @@
+package runtime
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/nat"
+	"github.com/dotcloud/docker/pkg/namesgenerator"
+	"github.com/dotcloud/docker/runconfig"
+	"strings"
+)
+
+func migratePortMappings(config *runconfig.Config, hostConfig *runconfig.HostConfig) error {
+	if config.PortSpecs != nil {
+		ports, bindings, err := nat.ParsePortSpecs(config.PortSpecs)
+		if err != nil {
+			return err
+		}
+		config.PortSpecs = nil
+		if len(bindings) > 0 {
+			if hostConfig == nil {
+				hostConfig = &runconfig.HostConfig{}
+			}
+			hostConfig.PortBindings = bindings
+		}
+
+		if config.ExposedPorts == nil {
+			config.ExposedPorts = make(nat.PortSet, len(ports))
+		}
+		for k, v := range ports {
+			config.ExposedPorts[k] = v
+		}
+	}
+	return nil
+}
+
+func mergeLxcConfIntoOptions(hostConfig *runconfig.HostConfig, driverConfig map[string][]string) {
+	if hostConfig == nil {
+		return
+	}
+
+	// merge in the lxc conf options into the generic config map
+	if lxcConf := hostConfig.LxcConf; lxcConf != nil {
+		lxc := driverConfig["lxc"]
+		for _, pair := range lxcConf {
+			// because lxc conf gets the driver name lxc.XXXX we need to trim it off
+			// and let the lxc driver add it back later if needed
+			parts := strings.SplitN(pair.Key, ".", 2)
+			lxc = append(lxc, fmt.Sprintf("%s=%s", parts[1], pair.Value))
+		}
+		driverConfig["lxc"] = lxc
+	}
+}
+
+type checker struct {
+	runtime *Runtime
+}
+
+func (c *checker) Exists(name string) bool {
+	return c.runtime.containerGraph.Exists("/" + name)
+}
+
+// Generate a random and unique name
+func generateRandomName(runtime *Runtime) (string, error) {
+	return namesgenerator.GenerateRandomName(&checker{runtime})
+}
diff --git a/runtime/utils_test.go b/runtime/utils_test.go
new file mode 100644
index 0000000000..bdf3543a49
--- /dev/null
+++ b/runtime/utils_test.go
@@ -0,0 +1,29 @@
+package runtime
+
+import (
+	"testing"
+
+	"github.com/dotcloud/docker/runconfig"
+	"github.com/dotcloud/docker/utils"
+)
+
+func TestMergeLxcConfig(t *testing.T) {
+	var (
+		hostConfig = &runconfig.HostConfig{
+			LxcConf: []utils.KeyValuePair{
+				{Key: "lxc.cgroups.cpuset", Value: "1,2"},
+			},
+		}
+		driverConfig = make(map[string][]string)
+	)
+
+	mergeLxcConfIntoOptions(hostConfig, driverConfig)
+	if l := len(driverConfig["lxc"]); l > 1 {
+		t.Fatalf("expected lxc options len of 1 got %d", l)
+	}
+
+	cpuset := driverConfig["lxc"][0]
+	if expected := "cgroups.cpuset=1,2"; cpuset != expected {
+		t.Fatalf("expected %s got %s", expected, cpuset)
+	}
+}
diff --git a/runtime/volumes.go b/runtime/volumes.go
new file mode 100644
index 0000000000..004f1bb024
--- /dev/null
+++ b/runtime/volumes.go
@@ -0,0 +1,287 @@
+package runtime
+
+import (
+	"fmt"
+	"github.com/dotcloud/docker/archive"
+	"github.com/dotcloud/docker/runtime/execdriver"
+	"github.com/dotcloud/docker/utils"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strings"
+	"syscall"
+)
+
+type BindMap struct {
+	SrcPath string
+	DstPath string
+	Mode    string
+}
+
+func prepareVolumesForContainer(container *Container) error {
+	if container.Volumes == nil || len(container.Volumes) == 0 {
+		container.Volumes = make(map[string]string)
+		container.VolumesRW = make(map[string]bool)
+		if err := applyVolumesFrom(container); err != nil {
+			return err
+		}
+	}
+
+	if err := createVolumes(container); err != nil {
+		return err
+	}
+	return nil
+}
+
+func setupMountsForContainer(container *Container, envPath string) error {
+	mounts := []execdriver.Mount{
+		{container.runtime.sysInitPath, "/.dockerinit", false, true},
+		{envPath, "/.dockerenv", false, true},
+		{container.ResolvConfPath, "/etc/resolv.conf", false, true},
+	}
+
+	if container.HostnamePath != "" && container.HostsPath != "" {
+		mounts = append(mounts, execdriver.Mount{container.HostnamePath, "/etc/hostname", false, true})
+		mounts = append(mounts, execdriver.Mount{container.HostsPath, "/etc/hosts", false, true})
+	}
+
+	// Mount user specified volumes
+	// Note, these are not private because you may want propagation of (un)mounts from host
+	// volumes. For instance if you use -v /usr:/usr and the host later mounts /usr/share you
+	// want this new mount in the container
+	for r, v := range container.Volumes {
+		mounts = append(mounts, execdriver.Mount{v, r, container.VolumesRW[r], false})
+	}
+
+	container.command.Mounts = mounts
+
+	return nil
+}
+
+func applyVolumesFrom(container *Container) error {
+	volumesFrom := container.hostConfig.VolumesFrom
+	if len(volumesFrom) > 0 {
+		for _, containerSpec := range volumesFrom {
+			var (
+				mountRW   = true
+				specParts = strings.SplitN(containerSpec, ":", 2)
+			)
+
+			switch len(specParts) {
+			case 0:
+				return fmt.Errorf("Malformed volumes-from specification: %s", containerSpec)
+			case 2:
+				switch specParts[1] {
+				case "ro":
+					mountRW = false
+				case "rw": // mountRW is already true
+				default:
+					return fmt.Errorf("Malformed volumes-from specification: %s", containerSpec)
+				}
+			}
+
+			c := container.runtime.Get(specParts[0])
+			if c == nil {
+				return fmt.Errorf("Container %s not found. Impossible to mount its volumes", specParts[0])
+			}
+
+			if err := c.Mount(); err != nil {
+				return fmt.Errorf("Container %s failed to mount. Impossible to mount its volumes", specParts[0])
+			}
+			defer c.Unmount()
+
+			for volPath, id := range c.Volumes {
+				if _, exists := container.Volumes[volPath]; exists {
+					continue
+				}
+				stat, err := os.Stat(filepath.Join(c.basefs, volPath))
+				if err != nil {
+					return err
+				}
+				if err := createIfNotExists(filepath.Join(container.basefs, volPath), stat.IsDir()); err != nil {
+					return err
+				}
+				container.Volumes[volPath] = id
+				if isRW, exists := c.VolumesRW[volPath]; exists {
+					container.VolumesRW[volPath] = isRW && mountRW
+				}
+			}
+
+		}
+	}
+	return nil
+}
+
+func getBindMap(container *Container) (map[string]BindMap, error) {
+	var (
+		// Create the requested bind mounts
+		binds = make(map[string]BindMap)
+		// Define illegal container destinations
+		illegalDsts = []string{"/", "."}
+	)
+
+	for _, bind := range container.hostConfig.Binds {
+		// FIXME: factorize bind parsing in parseBind
+		var (
+			src, dst, mode string
+			arr            = strings.Split(bind, ":")
+		)
+
+		if len(arr) == 2 {
+			src = arr[0]
+			dst = arr[1]
+			mode = "rw"
+		} else if len(arr) == 3 {
+			src = arr[0]
+			dst = arr[1]
+			mode = arr[2]
+		} else {
+			return nil, fmt.Errorf("Invalid bind specification: %s", bind)
+		}
+
+		// Bail if trying to mount to an illegal destination
+		for _, illegal := range illegalDsts {
+			if dst == illegal {
+				return nil, fmt.Errorf("Illegal bind destination: %s", dst)
+			}
+		}
+
+		bindMap := BindMap{
+			SrcPath: src,
+			DstPath: dst,
+			Mode:    mode,
+		}
+		binds[filepath.Clean(dst)] = bindMap
+	}
+	return binds, nil
+}
+
+func createVolumes(container *Container) error {
+	binds, err := getBindMap(container)
+	if err != nil {
+		return err
+	}
+
+	volumesDriver := container.runtime.volumes.Driver()
+	// Create the requested volumes if they don't exist
+	for volPath := range container.Config.Volumes {
+		volPath = filepath.Clean(volPath)
+		volIsDir := true
+		// Skip existing volumes
+		if _, exists := container.Volumes[volPath]; exists {
+			continue
+		}
+		var srcPath string
+		var isBindMount bool
+		srcRW := false
+		// If an external bind is defined for this volume, use that as a source
+		if bindMap, exists := binds[volPath]; exists {
+			isBindMount = true
+			srcPath = bindMap.SrcPath
+			if !filepath.IsAbs(srcPath) {
+				return fmt.Errorf("%s must be an absolute path", srcPath)
+			}
+			if strings.ToLower(bindMap.Mode) == "rw" {
+				srcRW = true
+			}
+			if stat, err := os.Stat(bindMap.SrcPath); err != nil {
+				return err
+			} else {
+				volIsDir = stat.IsDir()
+			}
+			// Otherwise create an directory in $ROOT/volumes/ and use that
+		} else {
+
+			// Do not pass a container as the parameter for the volume creation.
+			// The graph driver using the container's information ( Image ) to
+			// create the parent.
+			c, err := container.runtime.volumes.Create(nil, "", "", "", "", nil, nil)
+			if err != nil {
+				return err
+			}
+			srcPath, err = volumesDriver.Get(c.ID)
+			if err != nil {
+				return fmt.Errorf("Driver %s failed to get volume rootfs %s: %s", volumesDriver, c.ID, err)
+			}
+			srcRW = true // RW by default
+		}
+
+		if p, err := filepath.EvalSymlinks(srcPath); err != nil {
+			return err
+		} else {
+			srcPath = p
+		}
+
+		container.Volumes[volPath] = srcPath
+		container.VolumesRW[volPath] = srcRW
+
+		// Create the mountpoint
+		volPath = filepath.Join(container.basefs, volPath)
+		rootVolPath, err := utils.FollowSymlinkInScope(volPath, container.basefs)
+		if err != nil {
+			return err
+		}
+		if err := createIfNotExists(rootVolPath, volIsDir); err != nil {
+			return err
+		}
+
+		// Do not copy or change permissions if we are mounting from the host
+		if srcRW && !isBindMount {
+			volList, err := ioutil.ReadDir(rootVolPath)
+			if err != nil {
+				return err
+			}
+			if len(volList) > 0 {
+				srcList, err := ioutil.ReadDir(srcPath)
+				if err != nil {
+					return err
+				}
+				if len(srcList) == 0 {
+					// If the source volume is empty copy files from the root into the volume
+					if err := archive.CopyWithTar(rootVolPath, srcPath); err != nil {
+						return err
+					}
+
+					var stat syscall.Stat_t
+					if err := syscall.Stat(rootVolPath, &stat); err != nil {
+						return err
+					}
+					var srcStat syscall.Stat_t
+					if err := syscall.Stat(srcPath, &srcStat); err != nil {
+						return err
+					}
+					// Change the source volume's ownership if it differs from the root
+					// files that were just copied
+					if stat.Uid != srcStat.Uid || stat.Gid != srcStat.Gid {
+						if err := os.Chown(srcPath, int(stat.Uid), int(stat.Gid)); err != nil {
+							return err
+						}
+					}
+				}
+			}
+		}
+	}
+	return nil
+}
+
+func createIfNotExists(path string, isDir bool) error {
+	if _, err := os.Stat(path); err != nil {
+		if os.IsNotExist(err) {
+			if isDir {
+				if err := os.MkdirAll(path, 0755); err != nil {
+					return err
+				}
+			} else {
+				if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+					return err
+				}
+				f, err := os.OpenFile(path, os.O_CREATE, 0755)
+				if err != nil {
+					return err
+				}
+				defer f.Close()
+			}
+		}
+	}
+	return nil
+}
author	unclejack <unclejack@users.noreply.github.com>	2014-04-09 01:56:01 +0300
committer	unclejack <unclejack@users.noreply.github.com>	2014-04-09 01:56:01 +0300
commit	e128a606e39fa63c6b4fd6e53a1d88cf00aad868 (patch)
tree	199ee7eb6678ffecd2ddad95fce794c795ad5183 /runtime
parent	143c9707a9fafc39e1d9747f528db97b2564f01e (diff)
parent	dc9c28f51d669d6b09e81c2381f800f1a33bb659 (diff)
download	docker-release-0.10.tar.gz